bcp47_spec 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1b7ac3631986a8b1fd3df0f12f8a7358ba0523dbdb7d16c5bba5b61a53ae5a02
4
+ data.tar.gz: 99813e4da3415417889603d1ee9daab2c31b5468ecc1246cb7095140b49791d0
5
+ SHA512:
6
+ metadata.gz: 27717b3a58240861bd7bbfa8ed2ac94370d01039c0c2ae002959fdd5a5e98e4a2e3f67b262120c46347f9ce9d4cfc00cdd9dd2dc39e70f5bca0312979b9f4a6f
7
+ data.tar.gz: d36beae7a694d77563b09cac2958515edc9f6d630bf54f01786fa13e7f57defc3d5639f42a1b2e429e97d74b59c8b38c9f757efe5a95f51c9b7b191afbc8fe1f
data/Readme.md ADDED
@@ -0,0 +1,45 @@
1
+ # bcp47_spec [![Build Status](https://travis-ci.org/dadah89/bcp47_spec.svg?branch=master)](https://travis-ci.org/dadah89/bcp47_spec)
2
+ A subset of the BCP 47 spec: https://tools.ietf.org/html/bcp47
3
+
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'bcp47_spec'
10
+ ```
11
+
12
+ And then execute:
13
+ ```
14
+ $ bundle
15
+ ```
16
+
17
+ Or install it yourself as:
18
+ ```
19
+ $ gem install bcp47_spec
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ BCP47.valid?('de-Latn-DE-1996-u-attr-co-phonebk-t-und-cyrl-x-private-test') # true
26
+ ```
27
+
28
+ ```ruby
29
+ tag = BCP47.parse('de-Latn-DE-1996-u-attr-co-phonebk-t-und-cyrl-x-private-test')
30
+ tag.language # de
31
+ tag.script # Latn
32
+ tag.region # DE
33
+ tag.variant # [1996]
34
+ tag.extension # [['t', 'und-cyrl'], ['u', 'attr-co-phonebk']]
35
+ tag.private # ['private', 'test']
36
+ ```
37
+
38
+ ```ruby
39
+ BCP47.parse('blahblahblah') # raises InvalidLanguageTag
40
+ ```
41
+
42
+ ## TODO
43
+
44
+ * Parse private and grandfathered language tags (maybe?)
45
+ * Parse extensions T (https://tools.ietf.org/html/rfc6497) and U (https://tools.ietf.org/html/rfc6067)
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BCP47::Parser
4
+ # https://tools.ietf.org/html/bcp47#section-2.1
5
+ # Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
6
+
7
+ # Language-Tag = langtag ; normal language tags
8
+ # / privateuse ; private use tag
9
+ # / grandfathered ; grandfathered tags
10
+
11
+ # langtag = language
12
+ # ["-" script]
13
+ # ["-" region]
14
+ # *("-" variant)
15
+ # *("-" extension)
16
+ # ["-" privateuse]
17
+
18
+ # language = 2*3ALPHA ; shortest ISO 639 code
19
+ # ["-" extlang] ; sometimes followed by
20
+ # ; extended language subtags
21
+ # / 4ALPHA ; or reserved for future use
22
+ # / 5*8ALPHA ; or registered language subtag
23
+
24
+ # extlang = 3ALPHA ; selected ISO 639 codes
25
+ # *2("-" 3ALPHA) ; permanently reserved
26
+
27
+ # script = 4ALPHA ; ISO 15924 code
28
+
29
+ # region = 2ALPHA ; ISO 3166-1 code
30
+ # / 3DIGIT ; UN M.49 code
31
+
32
+ # variant = 5*8alphanum ; registered variants
33
+ # / (DIGIT 3alphanum)
34
+
35
+ # extension = singleton 1*("-" (2*8alphanum))
36
+
37
+ # ; Single alphanumerics
38
+ # ; "x" reserved for private use
39
+ # singleton = DIGIT ; 0 - 9
40
+ # / %x41-57 ; A - W
41
+ # / %x59-5A ; Y - Z
42
+ # / %x61-77 ; a - w
43
+ # / %x79-7A ; y - z
44
+
45
+ # privateuse = "x" 1*("-" (1*8alphanum))
46
+
47
+ # grandfathered = irregular ; non-redundant tags registered
48
+ # / regular ; during the RFC 3066 era
49
+
50
+ # irregular = "en-GB-oed" ; irregular tags do not match
51
+ # / "i-ami" ; the 'langtag' production and
52
+ # / "i-bnn" ; would not otherwise be
53
+ # / "i-default" ; considered 'well-formed'
54
+ # / "i-enochian" ; These tags are all valid,
55
+ # / "i-hak" ; but most are deprecated
56
+ # / "i-klingon" ; in favor of more modern
57
+ # / "i-lux" ; subtags or subtag
58
+ # / "i-mingo" ; combination
59
+ # / "i-navajo"
60
+ # / "i-pwn"
61
+ # / "i-tao"
62
+ # / "i-tay"
63
+ # / "i-tsu"
64
+ # / "sgn-BE-FR"
65
+ # / "sgn-BE-NL"
66
+ # / "sgn-CH-DE"
67
+
68
+ # regular = "art-lojban" ; these tags match the 'langtag'
69
+ # / "cel-gaulish" ; production, but their subtags
70
+ # / "no-bok" ; are not extended language
71
+ # / "no-nyn" ; or variant subtags: their meaning
72
+ # / "zh-guoyu" ; is defined by their registration
73
+ # / "zh-hakka" ; and all of these are deprecated
74
+ # / "zh-min" ; in favor of a more modern
75
+ # / "zh-min-nan" ; subtag or sequence of subtags
76
+ # / "zh-xiang"
77
+
78
+ # alphanum = (ALPHA / DIGIT) ; letters and numbers
79
+
80
+ # Simplified check. Not implementing high level privateuse / grandfathered.
81
+ # Should replace with a proper check at some point.
82
+ ALPHANUM = /[a-zA-Z\d]/
83
+ SINGLETON = /[\dA-WY-Za-wy-z]/
84
+
85
+ EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
86
+
87
+ LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
88
+ SCRIPT = /[a-zA-Z]{4}/
89
+ REGION = /([a-zA-Z]{2}|\d{3})/
90
+ VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
91
+ EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
92
+ PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
93
+
94
+ # Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
95
+ # in one captured group, then break them down in multipe groups separately
96
+ LANGTAG = %r{
97
+ (?<language>#{LANGUAGE})
98
+ (-(?<script>#{SCRIPT}))?
99
+ (-(?<region>#{REGION}))?
100
+ (?<variants>(-#{VARIANT})*)
101
+ (?<extensions>(-#{EXTENSION})*)
102
+ (-(?<private>#{PRIVATEUSE}))?
103
+ }x
104
+
105
+ LANGUAGE_TAG = /\A#{LANGTAG}\z/
106
+
107
+ class << self
108
+ def parse(language_tag)
109
+ return unless match = language_tag.match(LANGUAGE_TAG)
110
+
111
+ named_captures(match).tap do |captures|
112
+ captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
113
+ captures['extensions'] = split_extensions(captures['extensions'])
114
+ captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
115
+ end
116
+ end
117
+
118
+ private
119
+
120
+ def named_captures(match)
121
+ return match.named_captures if match.respond_to?(:named_captures)
122
+
123
+ match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
124
+ end
125
+
126
+ def split_extensions(extensions)
127
+ return [] if extensions.to_s.empty?
128
+
129
+ # [["u-attr-co-phonebk"], ["t-und-cyrl"]]
130
+ extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
131
+ # [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
132
+ extensions.flatten.sort.map { |st| st.split('-', 2) }
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BCP47
4
+ VERSION = '0.2.0'.freeze
5
+ end
data/lib/bcp47_spec.rb ADDED
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bcp47_spec/parser'
4
+ require 'bcp47_spec/version'
5
+
6
+ module BCP47
7
+ class InvalidLanguageTag < StandardError; end
8
+
9
+ Tag = Struct.new(:language, :script, :region, :variants, :extensions, :private)
10
+
11
+ class << self
12
+ def valid?(language_tag)
13
+ !Parser.parse(language_tag).nil?
14
+ end
15
+
16
+ def parse(language_tag)
17
+ result = Parser.parse(language_tag)
18
+ raise InvalidLanguageTag, "Invalid language tag: #{language_tag}" unless result
19
+
20
+ fields = %w[language script region variants extensions private]
21
+ Tag.new(*result.values_at(*fields))
22
+ end
23
+ end
24
+ end
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bcp47_spec
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Igor Justino
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-09-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bump
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: maxitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mocha
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: single_cov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description:
126
+ email:
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - Readme.md
132
+ - lib/bcp47_spec.rb
133
+ - lib/bcp47_spec/parser.rb
134
+ - lib/bcp47_spec/version.rb
135
+ homepage: https://github.com/dadah89/bcp47_spec
136
+ licenses:
137
+ - MIT
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.7.6
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: A subset of the BCP 47 spec.
159
+ test_files: []