bcp47_spec 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b7ac3631986a8b1fd3df0f12f8a7358ba0523dbdb7d16c5bba5b61a53ae5a02
4
- data.tar.gz: 99813e4da3415417889603d1ee9daab2c31b5468ecc1246cb7095140b49791d0
3
+ metadata.gz: 2039159ce3a6ad3a971a0fd8d0119c83ef6727447651cc4eacfe0a2cca9dab15
4
+ data.tar.gz: d3c6f3bf99c68cfe41612d26ca2ce50de63244cc2928c1ec4c6532368cdecb42
5
5
  SHA512:
6
- metadata.gz: 27717b3a58240861bd7bbfa8ed2ac94370d01039c0c2ae002959fdd5a5e98e4a2e3f67b262120c46347f9ce9d4cfc00cdd9dd2dc39e70f5bca0312979b9f4a6f
7
- data.tar.gz: d36beae7a694d77563b09cac2958515edc9f6d630bf54f01786fa13e7f57defc3d5639f42a1b2e429e97d74b59c8b38c9f757efe5a95f51c9b7b191afbc8fe1f
6
+ metadata.gz: 32ad050b2858a0300439ef971602706f387722e52ca8b64be443bc325fd0c8523e33eecbdc63a20c01b583b33da90578a15b16744574749b523813e2618cf4d8
7
+ data.tar.gz: 792e8721f1b5b894f0632e4b64a7734a93a8f8900cc00faae9a0cef4cdc862c6a7fe96014177a048804ab4ff4d1872b8b1c6313cf2392e78eacae72b7eb179ea
@@ -1,135 +1,137 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module BCP47::Parser
4
- # https://tools.ietf.org/html/bcp47#section-2.1
5
- # Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
6
-
7
- # Language-Tag = langtag ; normal language tags
8
- # / privateuse ; private use tag
9
- # / grandfathered ; grandfathered tags
10
-
11
- # langtag = language
12
- # ["-" script]
13
- # ["-" region]
14
- # *("-" variant)
15
- # *("-" extension)
16
- # ["-" privateuse]
17
-
18
- # language = 2*3ALPHA ; shortest ISO 639 code
19
- # ["-" extlang] ; sometimes followed by
20
- # ; extended language subtags
21
- # / 4ALPHA ; or reserved for future use
22
- # / 5*8ALPHA ; or registered language subtag
23
-
24
- # extlang = 3ALPHA ; selected ISO 639 codes
25
- # *2("-" 3ALPHA) ; permanently reserved
26
-
27
- # script = 4ALPHA ; ISO 15924 code
28
-
29
- # region = 2ALPHA ; ISO 3166-1 code
30
- # / 3DIGIT ; UN M.49 code
31
-
32
- # variant = 5*8alphanum ; registered variants
33
- # / (DIGIT 3alphanum)
34
-
35
- # extension = singleton 1*("-" (2*8alphanum))
36
-
37
- # ; Single alphanumerics
38
- # ; "x" reserved for private use
39
- # singleton = DIGIT ; 0 - 9
40
- # / %x41-57 ; A - W
41
- # / %x59-5A ; Y - Z
42
- # / %x61-77 ; a - w
43
- # / %x79-7A ; y - z
44
-
45
- # privateuse = "x" 1*("-" (1*8alphanum))
46
-
47
- # grandfathered = irregular ; non-redundant tags registered
48
- # / regular ; during the RFC 3066 era
49
-
50
- # irregular = "en-GB-oed" ; irregular tags do not match
51
- # / "i-ami" ; the 'langtag' production and
52
- # / "i-bnn" ; would not otherwise be
53
- # / "i-default" ; considered 'well-formed'
54
- # / "i-enochian" ; These tags are all valid,
55
- # / "i-hak" ; but most are deprecated
56
- # / "i-klingon" ; in favor of more modern
57
- # / "i-lux" ; subtags or subtag
58
- # / "i-mingo" ; combination
59
- # / "i-navajo"
60
- # / "i-pwn"
61
- # / "i-tao"
62
- # / "i-tay"
63
- # / "i-tsu"
64
- # / "sgn-BE-FR"
65
- # / "sgn-BE-NL"
66
- # / "sgn-CH-DE"
67
-
68
- # regular = "art-lojban" ; these tags match the 'langtag'
69
- # / "cel-gaulish" ; production, but their subtags
70
- # / "no-bok" ; are not extended language
71
- # / "no-nyn" ; or variant subtags: their meaning
72
- # / "zh-guoyu" ; is defined by their registration
73
- # / "zh-hakka" ; and all of these are deprecated
74
- # / "zh-min" ; in favor of a more modern
75
- # / "zh-min-nan" ; subtag or sequence of subtags
76
- # / "zh-xiang"
77
-
78
- # alphanum = (ALPHA / DIGIT) ; letters and numbers
79
-
80
- # Simplified check. Not implementing high level privateuse / grandfathered.
81
- # Should replace with a proper check at some point.
82
- ALPHANUM = /[a-zA-Z\d]/
83
- SINGLETON = /[\dA-WY-Za-wy-z]/
84
-
85
- EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
86
-
87
- LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
88
- SCRIPT = /[a-zA-Z]{4}/
89
- REGION = /([a-zA-Z]{2}|\d{3})/
90
- VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
91
- EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
92
- PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
93
-
94
- # Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
95
- # in one captured group, then break them down in multipe groups separately
96
- LANGTAG = %r{
97
- (?<language>#{LANGUAGE})
98
- (-(?<script>#{SCRIPT}))?
99
- (-(?<region>#{REGION}))?
100
- (?<variants>(-#{VARIANT})*)
101
- (?<extensions>(-#{EXTENSION})*)
102
- (-(?<private>#{PRIVATEUSE}))?
103
- }x
104
-
105
- LANGUAGE_TAG = /\A#{LANGTAG}\z/
106
-
107
- class << self
108
- def parse(language_tag)
109
- return unless match = language_tag.match(LANGUAGE_TAG)
110
-
111
- named_captures(match).tap do |captures|
112
- captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
113
- captures['extensions'] = split_extensions(captures['extensions'])
114
- captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
3
+ module BCP47
4
+ module Parser
5
+ # https://tools.ietf.org/html/bcp47#section-2.1
6
+ # Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
7
+
8
+ # Language-Tag = langtag ; normal language tags
9
+ # / privateuse ; private use tag
10
+ # / grandfathered ; grandfathered tags
11
+
12
+ # langtag = language
13
+ # ["-" script]
14
+ # ["-" region]
15
+ # *("-" variant)
16
+ # *("-" extension)
17
+ # ["-" privateuse]
18
+
19
+ # language = 2*3ALPHA ; shortest ISO 639 code
20
+ # ["-" extlang] ; sometimes followed by
21
+ # ; extended language subtags
22
+ # / 4ALPHA ; or reserved for future use
23
+ # / 5*8ALPHA ; or registered language subtag
24
+
25
+ # extlang = 3ALPHA ; selected ISO 639 codes
26
+ # *2("-" 3ALPHA) ; permanently reserved
27
+
28
+ # script = 4ALPHA ; ISO 15924 code
29
+
30
+ # region = 2ALPHA ; ISO 3166-1 code
31
+ # / 3DIGIT ; UN M.49 code
32
+
33
+ # variant = 5*8alphanum ; registered variants
34
+ # / (DIGIT 3alphanum)
35
+
36
+ # extension = singleton 1*("-" (2*8alphanum))
37
+
38
+ # ; Single alphanumerics
39
+ # ; "x" reserved for private use
40
+ # singleton = DIGIT ; 0 - 9
41
+ # / %x41-57 ; A - W
42
+ # / %x59-5A ; Y - Z
43
+ # / %x61-77 ; a - w
44
+ # / %x79-7A ; y - z
45
+
46
+ # privateuse = "x" 1*("-" (1*8alphanum))
47
+
48
+ # grandfathered = irregular ; non-redundant tags registered
49
+ # / regular ; during the RFC 3066 era
50
+
51
+ # irregular = "en-GB-oed" ; irregular tags do not match
52
+ # / "i-ami" ; the 'langtag' production and
53
+ # / "i-bnn" ; would not otherwise be
54
+ # / "i-default" ; considered 'well-formed'
55
+ # / "i-enochian" ; These tags are all valid,
56
+ # / "i-hak" ; but most are deprecated
57
+ # / "i-klingon" ; in favor of more modern
58
+ # / "i-lux" ; subtags or subtag
59
+ # / "i-mingo" ; combination
60
+ # / "i-navajo"
61
+ # / "i-pwn"
62
+ # / "i-tao"
63
+ # / "i-tay"
64
+ # / "i-tsu"
65
+ # / "sgn-BE-FR"
66
+ # / "sgn-BE-NL"
67
+ # / "sgn-CH-DE"
68
+
69
+ # regular = "art-lojban" ; these tags match the 'langtag'
70
+ # / "cel-gaulish" ; production, but their subtags
71
+ # / "no-bok" ; are not extended language
72
+ # / "no-nyn" ; or variant subtags: their meaning
73
+ # / "zh-guoyu" ; is defined by their registration
74
+ # / "zh-hakka" ; and all of these are deprecated
75
+ # / "zh-min" ; in favor of a more modern
76
+ # / "zh-min-nan" ; subtag or sequence of subtags
77
+ # / "zh-xiang"
78
+
79
+ # alphanum = (ALPHA / DIGIT) ; letters and numbers
80
+
81
+ # Simplified check. Not implementing high level privateuse / grandfathered.
82
+ # Should replace with a proper check at some point.
83
+ ALPHANUM = /[a-zA-Z\d]/
84
+ SINGLETON = /[\dA-WY-Za-wy-z]/
85
+
86
+ EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
87
+
88
+ LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
89
+ SCRIPT = /[a-zA-Z]{4}/
90
+ REGION = /([a-zA-Z]{2}|\d{3})/
91
+ VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
92
+ EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
93
+ PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
94
+
95
+ # Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
96
+ # in one captured group, then break them down in multipe groups separately
97
+ LANGTAG = %r{
98
+ (?<language>#{LANGUAGE})
99
+ (-(?<script>#{SCRIPT}))?
100
+ (-(?<region>#{REGION}))?
101
+ (?<variants>(-#{VARIANT})*)
102
+ (?<extensions>(-#{EXTENSION})*)
103
+ (-(?<private>#{PRIVATEUSE}))?
104
+ }x
105
+
106
+ LANGUAGE_TAG = /\A#{LANGTAG}\z/
107
+
108
+ class << self
109
+ def parse(language_tag)
110
+ return unless match = language_tag.match(LANGUAGE_TAG)
111
+
112
+ named_captures(match).tap do |captures|
113
+ captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
114
+ captures['extensions'] = split_extensions(captures['extensions'])
115
+ captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
116
+ end
115
117
  end
116
- end
117
118
 
118
- private
119
+ private
119
120
 
120
- def named_captures(match)
121
- return match.named_captures if match.respond_to?(:named_captures)
121
+ def named_captures(match)
122
+ return match.named_captures if match.respond_to?(:named_captures)
122
123
 
123
- match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
124
- end
124
+ match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
125
+ end
125
126
 
126
- def split_extensions(extensions)
127
- return [] if extensions.to_s.empty?
127
+ def split_extensions(extensions)
128
+ return [] if extensions.to_s.empty?
128
129
 
129
- # [["u-attr-co-phonebk"], ["t-und-cyrl"]]
130
- extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
131
- # [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
132
- extensions.flatten.sort.map { |st| st.split('-', 2) }
130
+ # [["u-attr-co-phonebk"], ["t-und-cyrl"]]
131
+ extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
132
+ # [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
133
+ extensions.flatten.sort.map { |st| st.split('-', 2) }
134
+ end
133
135
  end
134
136
  end
135
137
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BCP47
4
- VERSION = '0.2.0'.freeze
4
+ VERSION = '0.2.1'.freeze
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bcp47_spec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Igor Justino