bcp47_spec 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b7ac3631986a8b1fd3df0f12f8a7358ba0523dbdb7d16c5bba5b61a53ae5a02
4
- data.tar.gz: 99813e4da3415417889603d1ee9daab2c31b5468ecc1246cb7095140b49791d0
3
+ metadata.gz: 2039159ce3a6ad3a971a0fd8d0119c83ef6727447651cc4eacfe0a2cca9dab15
4
+ data.tar.gz: d3c6f3bf99c68cfe41612d26ca2ce50de63244cc2928c1ec4c6532368cdecb42
5
5
  SHA512:
6
- metadata.gz: 27717b3a58240861bd7bbfa8ed2ac94370d01039c0c2ae002959fdd5a5e98e4a2e3f67b262120c46347f9ce9d4cfc00cdd9dd2dc39e70f5bca0312979b9f4a6f
7
- data.tar.gz: d36beae7a694d77563b09cac2958515edc9f6d630bf54f01786fa13e7f57defc3d5639f42a1b2e429e97d74b59c8b38c9f757efe5a95f51c9b7b191afbc8fe1f
6
+ metadata.gz: 32ad050b2858a0300439ef971602706f387722e52ca8b64be443bc325fd0c8523e33eecbdc63a20c01b583b33da90578a15b16744574749b523813e2618cf4d8
7
+ data.tar.gz: 792e8721f1b5b894f0632e4b64a7734a93a8f8900cc00faae9a0cef4cdc862c6a7fe96014177a048804ab4ff4d1872b8b1c6313cf2392e78eacae72b7eb179ea
@@ -1,135 +1,137 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module BCP47::Parser
4
- # https://tools.ietf.org/html/bcp47#section-2.1
5
- # Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
6
-
7
- # Language-Tag = langtag ; normal language tags
8
- # / privateuse ; private use tag
9
- # / grandfathered ; grandfathered tags
10
-
11
- # langtag = language
12
- # ["-" script]
13
- # ["-" region]
14
- # *("-" variant)
15
- # *("-" extension)
16
- # ["-" privateuse]
17
-
18
- # language = 2*3ALPHA ; shortest ISO 639 code
19
- # ["-" extlang] ; sometimes followed by
20
- # ; extended language subtags
21
- # / 4ALPHA ; or reserved for future use
22
- # / 5*8ALPHA ; or registered language subtag
23
-
24
- # extlang = 3ALPHA ; selected ISO 639 codes
25
- # *2("-" 3ALPHA) ; permanently reserved
26
-
27
- # script = 4ALPHA ; ISO 15924 code
28
-
29
- # region = 2ALPHA ; ISO 3166-1 code
30
- # / 3DIGIT ; UN M.49 code
31
-
32
- # variant = 5*8alphanum ; registered variants
33
- # / (DIGIT 3alphanum)
34
-
35
- # extension = singleton 1*("-" (2*8alphanum))
36
-
37
- # ; Single alphanumerics
38
- # ; "x" reserved for private use
39
- # singleton = DIGIT ; 0 - 9
40
- # / %x41-57 ; A - W
41
- # / %x59-5A ; Y - Z
42
- # / %x61-77 ; a - w
43
- # / %x79-7A ; y - z
44
-
45
- # privateuse = "x" 1*("-" (1*8alphanum))
46
-
47
- # grandfathered = irregular ; non-redundant tags registered
48
- # / regular ; during the RFC 3066 era
49
-
50
- # irregular = "en-GB-oed" ; irregular tags do not match
51
- # / "i-ami" ; the 'langtag' production and
52
- # / "i-bnn" ; would not otherwise be
53
- # / "i-default" ; considered 'well-formed'
54
- # / "i-enochian" ; These tags are all valid,
55
- # / "i-hak" ; but most are deprecated
56
- # / "i-klingon" ; in favor of more modern
57
- # / "i-lux" ; subtags or subtag
58
- # / "i-mingo" ; combination
59
- # / "i-navajo"
60
- # / "i-pwn"
61
- # / "i-tao"
62
- # / "i-tay"
63
- # / "i-tsu"
64
- # / "sgn-BE-FR"
65
- # / "sgn-BE-NL"
66
- # / "sgn-CH-DE"
67
-
68
- # regular = "art-lojban" ; these tags match the 'langtag'
69
- # / "cel-gaulish" ; production, but their subtags
70
- # / "no-bok" ; are not extended language
71
- # / "no-nyn" ; or variant subtags: their meaning
72
- # / "zh-guoyu" ; is defined by their registration
73
- # / "zh-hakka" ; and all of these are deprecated
74
- # / "zh-min" ; in favor of a more modern
75
- # / "zh-min-nan" ; subtag or sequence of subtags
76
- # / "zh-xiang"
77
-
78
- # alphanum = (ALPHA / DIGIT) ; letters and numbers
79
-
80
- # Simplified check. Not implementing high level privateuse / grandfathered.
81
- # Should replace with a proper check at some point.
82
- ALPHANUM = /[a-zA-Z\d]/
83
- SINGLETON = /[\dA-WY-Za-wy-z]/
84
-
85
- EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
86
-
87
- LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
88
- SCRIPT = /[a-zA-Z]{4}/
89
- REGION = /([a-zA-Z]{2}|\d{3})/
90
- VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
91
- EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
92
- PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
93
-
94
- # Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
95
- # in one captured group, then break them down in multipe groups separately
96
- LANGTAG = %r{
97
- (?<language>#{LANGUAGE})
98
- (-(?<script>#{SCRIPT}))?
99
- (-(?<region>#{REGION}))?
100
- (?<variants>(-#{VARIANT})*)
101
- (?<extensions>(-#{EXTENSION})*)
102
- (-(?<private>#{PRIVATEUSE}))?
103
- }x
104
-
105
- LANGUAGE_TAG = /\A#{LANGTAG}\z/
106
-
107
- class << self
108
- def parse(language_tag)
109
- return unless match = language_tag.match(LANGUAGE_TAG)
110
-
111
- named_captures(match).tap do |captures|
112
- captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
113
- captures['extensions'] = split_extensions(captures['extensions'])
114
- captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
3
+ module BCP47
4
+ module Parser
5
+ # https://tools.ietf.org/html/bcp47#section-2.1
6
+ # Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
7
+
8
+ # Language-Tag = langtag ; normal language tags
9
+ # / privateuse ; private use tag
10
+ # / grandfathered ; grandfathered tags
11
+
12
+ # langtag = language
13
+ # ["-" script]
14
+ # ["-" region]
15
+ # *("-" variant)
16
+ # *("-" extension)
17
+ # ["-" privateuse]
18
+
19
+ # language = 2*3ALPHA ; shortest ISO 639 code
20
+ # ["-" extlang] ; sometimes followed by
21
+ # ; extended language subtags
22
+ # / 4ALPHA ; or reserved for future use
23
+ # / 5*8ALPHA ; or registered language subtag
24
+
25
+ # extlang = 3ALPHA ; selected ISO 639 codes
26
+ # *2("-" 3ALPHA) ; permanently reserved
27
+
28
+ # script = 4ALPHA ; ISO 15924 code
29
+
30
+ # region = 2ALPHA ; ISO 3166-1 code
31
+ # / 3DIGIT ; UN M.49 code
32
+
33
+ # variant = 5*8alphanum ; registered variants
34
+ # / (DIGIT 3alphanum)
35
+
36
+ # extension = singleton 1*("-" (2*8alphanum))
37
+
38
+ # ; Single alphanumerics
39
+ # ; "x" reserved for private use
40
+ # singleton = DIGIT ; 0 - 9
41
+ # / %x41-57 ; A - W
42
+ # / %x59-5A ; Y - Z
43
+ # / %x61-77 ; a - w
44
+ # / %x79-7A ; y - z
45
+
46
+ # privateuse = "x" 1*("-" (1*8alphanum))
47
+
48
+ # grandfathered = irregular ; non-redundant tags registered
49
+ # / regular ; during the RFC 3066 era
50
+
51
+ # irregular = "en-GB-oed" ; irregular tags do not match
52
+ # / "i-ami" ; the 'langtag' production and
53
+ # / "i-bnn" ; would not otherwise be
54
+ # / "i-default" ; considered 'well-formed'
55
+ # / "i-enochian" ; These tags are all valid,
56
+ # / "i-hak" ; but most are deprecated
57
+ # / "i-klingon" ; in favor of more modern
58
+ # / "i-lux" ; subtags or subtag
59
+ # / "i-mingo" ; combination
60
+ # / "i-navajo"
61
+ # / "i-pwn"
62
+ # / "i-tao"
63
+ # / "i-tay"
64
+ # / "i-tsu"
65
+ # / "sgn-BE-FR"
66
+ # / "sgn-BE-NL"
67
+ # / "sgn-CH-DE"
68
+
69
+ # regular = "art-lojban" ; these tags match the 'langtag'
70
+ # / "cel-gaulish" ; production, but their subtags
71
+ # / "no-bok" ; are not extended language
72
+ # / "no-nyn" ; or variant subtags: their meaning
73
+ # / "zh-guoyu" ; is defined by their registration
74
+ # / "zh-hakka" ; and all of these are deprecated
75
+ # / "zh-min" ; in favor of a more modern
76
+ # / "zh-min-nan" ; subtag or sequence of subtags
77
+ # / "zh-xiang"
78
+
79
+ # alphanum = (ALPHA / DIGIT) ; letters and numbers
80
+
81
+ # Simplified check. Not implementing high level privateuse / grandfathered.
82
+ # Should replace with a proper check at some point.
83
+ ALPHANUM = /[a-zA-Z\d]/
84
+ SINGLETON = /[\dA-WY-Za-wy-z]/
85
+
86
+ EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
87
+
88
+ LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
89
+ SCRIPT = /[a-zA-Z]{4}/
90
+ REGION = /([a-zA-Z]{2}|\d{3})/
91
+ VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
92
+ EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
93
+ PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
94
+
95
+ # Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
96
+ # in one captured group, then break them down in multipe groups separately
97
+ LANGTAG = %r{
98
+ (?<language>#{LANGUAGE})
99
+ (-(?<script>#{SCRIPT}))?
100
+ (-(?<region>#{REGION}))?
101
+ (?<variants>(-#{VARIANT})*)
102
+ (?<extensions>(-#{EXTENSION})*)
103
+ (-(?<private>#{PRIVATEUSE}))?
104
+ }x
105
+
106
+ LANGUAGE_TAG = /\A#{LANGTAG}\z/
107
+
108
+ class << self
109
+ def parse(language_tag)
110
+ return unless match = language_tag.match(LANGUAGE_TAG)
111
+
112
+ named_captures(match).tap do |captures|
113
+ captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
114
+ captures['extensions'] = split_extensions(captures['extensions'])
115
+ captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
116
+ end
115
117
  end
116
- end
117
118
 
118
- private
119
+ private
119
120
 
120
- def named_captures(match)
121
- return match.named_captures if match.respond_to?(:named_captures)
121
+ def named_captures(match)
122
+ return match.named_captures if match.respond_to?(:named_captures)
122
123
 
123
- match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
124
- end
124
+ match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
125
+ end
125
126
 
126
- def split_extensions(extensions)
127
- return [] if extensions.to_s.empty?
127
+ def split_extensions(extensions)
128
+ return [] if extensions.to_s.empty?
128
129
 
129
- # [["u-attr-co-phonebk"], ["t-und-cyrl"]]
130
- extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
131
- # [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
132
- extensions.flatten.sort.map { |st| st.split('-', 2) }
130
+ # [["u-attr-co-phonebk"], ["t-und-cyrl"]]
131
+ extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
132
+ # [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
133
+ extensions.flatten.sort.map { |st| st.split('-', 2) }
134
+ end
133
135
  end
134
136
  end
135
137
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BCP47
4
- VERSION = '0.2.0'.freeze
4
+ VERSION = '0.2.1'.freeze
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bcp47_spec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Igor Justino