bcp47_spec 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1b7ac3631986a8b1fd3df0f12f8a7358ba0523dbdb7d16c5bba5b61a53ae5a02
4
+ data.tar.gz: 99813e4da3415417889603d1ee9daab2c31b5468ecc1246cb7095140b49791d0
5
+ SHA512:
6
+ metadata.gz: 27717b3a58240861bd7bbfa8ed2ac94370d01039c0c2ae002959fdd5a5e98e4a2e3f67b262120c46347f9ce9d4cfc00cdd9dd2dc39e70f5bca0312979b9f4a6f
7
+ data.tar.gz: d36beae7a694d77563b09cac2958515edc9f6d630bf54f01786fa13e7f57defc3d5639f42a1b2e429e97d74b59c8b38c9f757efe5a95f51c9b7b191afbc8fe1f
data/Readme.md ADDED
@@ -0,0 +1,45 @@
1
+ # bcp47_spec [![Build Status](https://travis-ci.org/dadah89/bcp47_spec.svg?branch=master)](https://travis-ci.org/dadah89/bcp47_spec)
2
+ A subset of the BCP 47 spec: https://tools.ietf.org/html/bcp47
3
+
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'bcp47_spec'
10
+ ```
11
+
12
+ And then execute:
13
+ ```
14
+ $ bundle
15
+ ```
16
+
17
+ Or install it yourself as:
18
+ ```
19
+ $ gem install bcp47_spec
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ BCP47.valid?('de-Latn-DE-1996-u-attr-co-phonebk-t-und-cyrl-x-private-test') # true
26
+ ```
27
+
28
+ ```ruby
29
+ tag = BCP47.parse('de-Latn-DE-1996-u-attr-co-phonebk-t-und-cyrl-x-private-test')
30
+ tag.language # de
31
+ tag.script # Latn
32
+ tag.region # DE
33
+ tag.variant # [1996]
34
+ tag.extension # [['t', 'und-cyrl'], ['u', 'attr-co-phonebk']]
35
+ tag.private # ['private', 'test']
36
+ ```
37
+
38
+ ```ruby
39
+ BCP47.parse('blahblahblah') # raises InvalidLanguageTag
40
+ ```
41
+
42
+ ## TODO
43
+
44
+ * Parse private and grandfathered language tags (maybe?)
45
+ * Parse extensions T (https://tools.ietf.org/html/rfc6497) and U (https://tools.ietf.org/html/rfc6067)
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BCP47::Parser
4
+ # https://tools.ietf.org/html/bcp47#section-2.1
5
+ # Augmented BNF for Syntax Specifications: https://tools.ietf.org/html/rfc5234
6
+
7
+ # Language-Tag = langtag ; normal language tags
8
+ # / privateuse ; private use tag
9
+ # / grandfathered ; grandfathered tags
10
+
11
+ # langtag = language
12
+ # ["-" script]
13
+ # ["-" region]
14
+ # *("-" variant)
15
+ # *("-" extension)
16
+ # ["-" privateuse]
17
+
18
+ # language = 2*3ALPHA ; shortest ISO 639 code
19
+ # ["-" extlang] ; sometimes followed by
20
+ # ; extended language subtags
21
+ # / 4ALPHA ; or reserved for future use
22
+ # / 5*8ALPHA ; or registered language subtag
23
+
24
+ # extlang = 3ALPHA ; selected ISO 639 codes
25
+ # *2("-" 3ALPHA) ; permanently reserved
26
+
27
+ # script = 4ALPHA ; ISO 15924 code
28
+
29
+ # region = 2ALPHA ; ISO 3166-1 code
30
+ # / 3DIGIT ; UN M.49 code
31
+
32
+ # variant = 5*8alphanum ; registered variants
33
+ # / (DIGIT 3alphanum)
34
+
35
+ # extension = singleton 1*("-" (2*8alphanum))
36
+
37
+ # ; Single alphanumerics
38
+ # ; "x" reserved for private use
39
+ # singleton = DIGIT ; 0 - 9
40
+ # / %x41-57 ; A - W
41
+ # / %x59-5A ; Y - Z
42
+ # / %x61-77 ; a - w
43
+ # / %x79-7A ; y - z
44
+
45
+ # privateuse = "x" 1*("-" (1*8alphanum))
46
+
47
+ # grandfathered = irregular ; non-redundant tags registered
48
+ # / regular ; during the RFC 3066 era
49
+
50
+ # irregular = "en-GB-oed" ; irregular tags do not match
51
+ # / "i-ami" ; the 'langtag' production and
52
+ # / "i-bnn" ; would not otherwise be
53
+ # / "i-default" ; considered 'well-formed'
54
+ # / "i-enochian" ; These tags are all valid,
55
+ # / "i-hak" ; but most are deprecated
56
+ # / "i-klingon" ; in favor of more modern
57
+ # / "i-lux" ; subtags or subtag
58
+ # / "i-mingo" ; combination
59
+ # / "i-navajo"
60
+ # / "i-pwn"
61
+ # / "i-tao"
62
+ # / "i-tay"
63
+ # / "i-tsu"
64
+ # / "sgn-BE-FR"
65
+ # / "sgn-BE-NL"
66
+ # / "sgn-CH-DE"
67
+
68
+ # regular = "art-lojban" ; these tags match the 'langtag'
69
+ # / "cel-gaulish" ; production, but their subtags
70
+ # / "no-bok" ; are not extended language
71
+ # / "no-nyn" ; or variant subtags: their meaning
72
+ # / "zh-guoyu" ; is defined by their registration
73
+ # / "zh-hakka" ; and all of these are deprecated
74
+ # / "zh-min" ; in favor of a more modern
75
+ # / "zh-min-nan" ; subtag or sequence of subtags
76
+ # / "zh-xiang"
77
+
78
+ # alphanum = (ALPHA / DIGIT) ; letters and numbers
79
+
80
+ # Simplified check. Not implementing high level privateuse / grandfathered.
81
+ # Should replace with a proper check at some point.
82
+ ALPHANUM = /[a-zA-Z\d]/
83
+ SINGLETON = /[\dA-WY-Za-wy-z]/
84
+
85
+ EXTLANG = /[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2}/
86
+
87
+ LANGUAGE = /([a-zA-Z]{2,3}(-#{EXTLANG})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})/
88
+ SCRIPT = /[a-zA-Z]{4}/
89
+ REGION = /([a-zA-Z]{2}|\d{3})/
90
+ VARIANT = /(#{ALPHANUM}{5,8}|\d#{ALPHANUM}{3})/
91
+ EXTENSION = /#{SINGLETON}(-[a-zA-Z]{2,8})+/
92
+ PRIVATEUSE = /x(-#{ALPHANUM}{1,8})+/
93
+
94
+ # Ruby .match only keeps the first captured group, so expressions like variants/extensions we need to keep everything
95
+ # in one captured group, then break them down in multipe groups separately
96
+ LANGTAG = %r{
97
+ (?<language>#{LANGUAGE})
98
+ (-(?<script>#{SCRIPT}))?
99
+ (-(?<region>#{REGION}))?
100
+ (?<variants>(-#{VARIANT})*)
101
+ (?<extensions>(-#{EXTENSION})*)
102
+ (-(?<private>#{PRIVATEUSE}))?
103
+ }x
104
+
105
+ LANGUAGE_TAG = /\A#{LANGTAG}\z/
106
+
107
+ class << self
108
+ def parse(language_tag)
109
+ return unless match = language_tag.match(LANGUAGE_TAG)
110
+
111
+ named_captures(match).tap do |captures|
112
+ captures['variants'] = captures['variants'].to_s.empty? ? [] : captures['variants'][/-(.*)/, 1].split('-').sort
113
+ captures['extensions'] = split_extensions(captures['extensions'])
114
+ captures['private'] = captures['private'].to_s.empty? ? [] : captures['private'][/x-(.*)/, 1].split('-').sort
115
+ end
116
+ end
117
+
118
+ private
119
+
120
+ def named_captures(match)
121
+ return match.named_captures if match.respond_to?(:named_captures)
122
+
123
+ match.names.each_with_object({}) { |name, acc| acc[name] = match[name] }
124
+ end
125
+
126
+ def split_extensions(extensions)
127
+ return [] if extensions.to_s.empty?
128
+
129
+ # [["u-attr-co-phonebk"], ["t-und-cyrl"]]
130
+ extensions = extensions.scan(/\b(?<ext>#{EXTENSION})\b/)
131
+ # [["t", "und-cyrl"], ["u", "attr-co-phonebk"]]
132
+ extensions.flatten.sort.map { |st| st.split('-', 2) }
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BCP47
4
+ VERSION = '0.2.0'.freeze
5
+ end
data/lib/bcp47_spec.rb ADDED
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bcp47_spec/parser'
4
+ require 'bcp47_spec/version'
5
+
6
+ module BCP47
7
+ class InvalidLanguageTag < StandardError; end
8
+
9
+ Tag = Struct.new(:language, :script, :region, :variants, :extensions, :private)
10
+
11
+ class << self
12
+ def valid?(language_tag)
13
+ !Parser.parse(language_tag).nil?
14
+ end
15
+
16
+ def parse(language_tag)
17
+ result = Parser.parse(language_tag)
18
+ raise InvalidLanguageTag, "Invalid language tag: #{language_tag}" unless result
19
+
20
+ fields = %w[language script region variants extensions private]
21
+ Tag.new(*result.values_at(*fields))
22
+ end
23
+ end
24
+ end
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bcp47_spec
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Igor Justino
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-09-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bump
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: maxitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mocha
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: single_cov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description:
126
+ email:
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - Readme.md
132
+ - lib/bcp47_spec.rb
133
+ - lib/bcp47_spec/parser.rb
134
+ - lib/bcp47_spec/version.rb
135
+ homepage: https://github.com/dadah89/bcp47_spec
136
+ licenses:
137
+ - MIT
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.7.6
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: A subset of the BCP 47 spec.
159
+ test_files: []