bcp47_spec 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Readme.md +45 -0
- data/lib/bcp47_spec/parser.rb +135 -0
- data/lib/bcp47_spec/version.rb +5 -0
- data/lib/bcp47_spec.rb +24 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1b7ac3631986a8b1fd3df0f12f8a7358ba0523dbdb7d16c5bba5b61a53ae5a02
|
4
|
+
data.tar.gz: 99813e4da3415417889603d1ee9daab2c31b5468ecc1246cb7095140b49791d0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 27717b3a58240861bd7bbfa8ed2ac94370d01039c0c2ae002959fdd5a5e98e4a2e3f67b262120c46347f9ce9d4cfc00cdd9dd2dc39e70f5bca0312979b9f4a6f
|
7
|
+
data.tar.gz: d36beae7a694d77563b09cac2958515edc9f6d630bf54f01786fa13e7f57defc3d5639f42a1b2e429e97d74b59c8b38c9f757efe5a95f51c9b7b191afbc8fe1f
|
data/Readme.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# bcp47_spec [](https://travis-ci.org/dadah89/bcp47_spec)
|
2
|
+
A subset of the BCP 47 spec: https://tools.ietf.org/html/bcp47
|
3
|
+
|
4
|
+
## Installation
|
5
|
+
|
6
|
+
Add this line to your application's Gemfile:
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
gem 'bcp47_spec'
|
10
|
+
```
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
```
|
14
|
+
$ bundle
|
15
|
+
```
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
```
|
19
|
+
$ gem install bcp47_spec
|
20
|
+
```
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
BCP47.valid?('de-Latn-DE-1996-u-attr-co-phonebk-t-und-cyrl-x-private-test') # true
|
26
|
+
```
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
tag = BCP47.parse('de-Latn-DE-1996-u-attr-co-phonebk-t-und-cyrl-x-private-test')
|
30
|
+
tag.language # de
|
31
|
+
tag.script # Latn
|
32
|
+
tag.region # DE
|
33
|
+
tag.variant # [1996]
|
34
|
+
tag.extension # [['t', 'und-cyrl'], ['u', 'attr-co-phonebk']]
|
35
|
+
tag.private # ['private', 'test']
|
36
|
+
```
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
BCP47.parse('blahblahblah') # raises InvalidLanguageTag
|
40
|
+
```
|
41
|
+
|
42
|
+
## TODO
|
43
|
+
|
44
|
+
* Parse private and grandfathered language tags (maybe?)
|
45
|
+
* Parse extensions T (https://tools.ietf.org/html/rfc6497) and U (https://tools.ietf.org/html/rfc6067)
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BCP47::Parser
|
4
|
+
# https://tools.ietf.org/html/bcp47#section-2.1
|
5
|
+
# Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
|
6
|
+
|
7
|
+
# Language-Tag = langtag ; normal language tags
|
8
|
+
# / privateuse ; private use tag
|
9
|
+
# / grandfathered ; grandfathered tags
|
10
|
+
|
11
|
+
# langtag = language
|
12
|
+
# ["-" script]
|
13
|
+
# ["-" region]
|
14
|
+
# *("-" variant)
|
15
|
+
# *("-" extension)
|
16
|
+
# ["-" privateuse]
|
17
|
+
|
18
|
+
# language = 2*3ALPHA ; shortest ISO 639 code
|
19
|
+
# ["-" extlang] ; sometimes followed by
|
20
|
+
# ; extended language subtags
|
21
|
+
# / 4ALPHA ; or reserved for future use
|
22
|
+
# / 5*8ALPHA ; or registered language subtag
|
23
|
+
|
24
|
+
# extlang = 3ALPHA ; selected ISO 639 codes
|
25
|
+
# *2("-" 3ALPHA) ; permanently reserved
|
26
|
+
|
27
|
+
# script = 4ALPHA ; ISO 15924 code
|
28
|
+
|
29
|
+
# region = 2ALPHA ; ISO 3166-1 code
|
30
|
+
# / 3DIGIT ; UN M.49 code
|
31
|
+
|
32
|
+
# variant = 5*8alphanum ; registered variants
|
33
|
+
# / (DIGIT 3alphanum)
|
34
|
+
|
35
|
+
# extension = singleton 1*("-" (2*8alphanum))
|
36
|
+
|
37
|
+
# ; Single alphanumerics
|
38
|
+
# ; "x" reserved for private use
|
39
|
+
# singleton = DIGIT ; 0 - 9
|
40
|
+
# / %x41-57 ; A - W
|
41
|
+
# / %x59-5A ; Y - Z
|
42
|
+
# / %x61-77 ; a - w
|
43
|
+
# / %x79-7A ; y - z
|
44
|
+
|
45
|
+
# privateuse = "x" 1*("-" (1*8alphanum))
|
46
|
+
|
47
|
+
# grandfathered = irregular ; non-redundant tags registered
|
48
|
+
# / regular ; during the RFC 3066 era
|
49
|
+
|
50
|
+
# irregular = "en-GB-oed" ; irregular tags do not match
|
51
|
+
# / "i-ami" ; the 'langtag' production and
|
52
|
+
# / "i-bnn" ; would not otherwise be
|
53
|
+
# / "i-default" ; considered 'well-formed'
|
54
|
+
# / "i-enochian" ; These tags are all valid,
|
55
|
+
# / "i-hak" ; but most are deprecated
|
56
|
+
# / "i-klingon" ; in favor of more modern
|
57
|
+
# / "i-lux" ; subtags or subtag
|
58
|
+
# / "i-mingo" ; combination
|
59
|
+
# / "i-navajo"
|
60
|
+
# / "i-pwn"
|
61
|
+
# / "i-tao"
|
62
|
+
# / "i-tay"
|
63
|
+
# / "i-tsu"
|
64
|
+
# / "sgn-BE-FR"
|
65
|
+
# / "sgn-BE-NL"
|
66
|
+
# / "sgn-CH-DE"
|
67
|
+
|
68
|
+
# regular = "art-lojban" ; these tags match the 'langtag'
|
69
|
+
# / "cel-gaulish" ; production, but their subtags
|
70
|
+
# / "no-bok" ; are not extended language
|
71
|
+
# / "no-nyn" ; or variant subtags: their meaning
|
72
|
+
# / "zh-guoyu" ; is defined by their registration
|
73
|
+
# / "zh-hakka" ; and all of these are deprecated
|
74
|
+
# / "zh-min" ; in favor of a more modern
|
75
|
+
# / "zh-min-nan" ; subtag or sequence of subtags
|
76
|
+
# / "zh-xiang"
|
77
|
+
|
78
|
+
# alphanum = (ALPHA / DIGIT) ; letters and numbers
|
79
|
+
|
80
|
+
# Simplified check. Not implementing high level privateuse / grandfathered.
|
81
|
+
# Should replace with a proper check at some point.
|
82
|
+
ALPHANUM = /[a-zA-Z\d]/
|
83
|
+
SINGLETON = /[\dA-WY-Za-wy-z]/
|
84
|
+
|
85
|
+
EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
|
86
|
+
|
87
|
+
LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
|
88
|
+
SCRIPT = /[a-zA-Z]{4}/
|
89
|
+
REGION = /([a-zA-Z]{2}|\d{3})/
|
90
|
+
VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
|
91
|
+
EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
|
92
|
+
PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
|
93
|
+
|
94
|
+
# Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
|
95
|
+
# in one captured group, then break them down in multipe groups separately
|
96
|
+
LANGTAG = %r{
|
97
|
+
(?<language>#{LANGUAGE})
|
98
|
+
(-(?<script>#{SCRIPT}))?
|
99
|
+
(-(?<region>#{REGION}))?
|
100
|
+
(?<variants>(-#{VARIANT})*)
|
101
|
+
(?<extensions>(-#{EXTENSION})*)
|
102
|
+
(-(?<private>#{PRIVATEUSE}))?
|
103
|
+
}x
|
104
|
+
|
105
|
+
LANGUAGE_TAG = /\A#{LANGTAG}\z/
|
106
|
+
|
107
|
+
class << self
|
108
|
+
def parse(language_tag)
|
109
|
+
return unless match = language_tag.match(LANGUAGE_TAG)
|
110
|
+
|
111
|
+
named_captures(match).tap do |captures|
|
112
|
+
captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
|
113
|
+
captures['extensions'] = split_extensions(captures['extensions'])
|
114
|
+
captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def named_captures(match)
|
121
|
+
return match.named_captures if match.respond_to?(:named_captures)
|
122
|
+
|
123
|
+
match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def split_extensions(extensions)
|
127
|
+
return [] if extensions.to_s.empty?
|
128
|
+
|
129
|
+
# [["u-attr-co-phonebk"], ["t-und-cyrl"]]
|
130
|
+
extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
|
131
|
+
# [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
|
132
|
+
extensions.flatten.sort.map { |st| st.split('-', 2) }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
data/lib/bcp47_spec.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bcp47_spec/parser'
|
4
|
+
require 'bcp47_spec/version'
|
5
|
+
|
6
|
+
module BCP47
|
7
|
+
class InvalidLanguageTag < StandardError; end
|
8
|
+
|
9
|
+
Tag = Struct.new(:language, :script, :region, :variants, :extensions, :private)
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def valid?(language_tag)
|
13
|
+
!Parser.parse(language_tag).nil?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse(language_tag)
|
17
|
+
result = Parser.parse(language_tag)
|
18
|
+
raise InvalidLanguageTag, "Invalid language tag: #{language_tag}" unless result
|
19
|
+
|
20
|
+
fields = %w[language script region variants extensions private]
|
21
|
+
Tag.new(*result.values_at(*fields))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bcp47_spec
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Igor Justino
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-09-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bump
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: byebug
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: maxitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mocha
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubocop
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: single_cov
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
description:
|
126
|
+
email:
|
127
|
+
executables: []
|
128
|
+
extensions: []
|
129
|
+
extra_rdoc_files: []
|
130
|
+
files:
|
131
|
+
- Readme.md
|
132
|
+
- lib/bcp47_spec.rb
|
133
|
+
- lib/bcp47_spec/parser.rb
|
134
|
+
- lib/bcp47_spec/version.rb
|
135
|
+
homepage: https://github.com/dadah89/bcp47_spec
|
136
|
+
licenses:
|
137
|
+
- MIT
|
138
|
+
metadata: {}
|
139
|
+
post_install_message:
|
140
|
+
rdoc_options: []
|
141
|
+
require_paths:
|
142
|
+
- lib
|
143
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
144
|
+
requirements:
|
145
|
+
- - ">="
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: '0'
|
148
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
requirements: []
|
154
|
+
rubyforge_project:
|
155
|
+
rubygems_version: 2.7.6
|
156
|
+
signing_key:
|
157
|
+
specification_version: 4
|
158
|
+
summary: A subset of the BCP 47 spec.
|
159
|
+
test_files: []
|