lang 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +46 -0
- data/bin/lang +150 -0
- data/lib/lang/subtags.rb +147 -0
- data/lib/lang/subtags/entry.rb +40 -0
- data/lib/lang/subtags/extlang.rb +19 -0
- data/lib/lang/subtags/grandfathered.rb +9 -0
- data/lib/lang/subtags/language.rb +18 -0
- data/lib/lang/subtags/redundant.rb +9 -0
- data/lib/lang/subtags/region.rb +9 -0
- data/lib/lang/subtags/script.rb +9 -0
- data/lib/lang/subtags/variant.rb +17 -0
- data/lib/lang/tag.rb +141 -0
- data/lib/lang/tag/canonicalization.rb +376 -0
- data/lib/lang/tag/composition.rb +141 -0
- data/lib/lang/tag/filtering.rb +143 -0
- data/lib/lang/tag/grandfathered.rb +36 -0
- data/lib/lang/tag/langtag.rb +437 -0
- data/lib/lang/tag/lookup.rb +77 -0
- data/lib/lang/tag/pattern.rb +31 -0
- data/lib/lang/tag/privateuse.rb +34 -0
- data/lib/lang/version.rb +5 -0
- metadata +108 -0
data/lib/lang/tag.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'lang/tag/pattern'
|
2
|
+
require 'lang/tag/composition'
|
3
|
+
require 'lang/tag/langtag'
|
4
|
+
require 'lang/tag/grandfathered'
|
5
|
+
require 'lang/tag/privateuse'
|
6
|
+
|
7
|
+
module Lang
|
8
|
+
|
9
|
+
def self.Tag(thing)
|
10
|
+
#return thing if Tag::Composition === thing
|
11
|
+
Tag::Grandfathered(thing) rescue
|
12
|
+
Tag::Langtag(thing) rescue
|
13
|
+
Tag::Privateuse(thing)
|
14
|
+
rescue
|
15
|
+
raise ArgumentError, "#{thing.inspect} is not a language tag."
|
16
|
+
end
|
17
|
+
|
18
|
+
module Tag
|
19
|
+
|
20
|
+
class Error < StandardError
|
21
|
+
end
|
22
|
+
|
23
|
+
class InvalidComponentError < Error
|
24
|
+
end
|
25
|
+
|
26
|
+
#--
|
27
|
+
# Grandfathered tags that do not match the 'langtag' production in the
|
28
|
+
# ABNF and would otherwise be invalid are considered 'irregular'
|
29
|
+
# grandfathered tags. With the exception of "en-GB-oed", which is a
|
30
|
+
# variant of "en-GB", each of them, in its entirety, represents a
|
31
|
+
# language.
|
32
|
+
#++
|
33
|
+
|
34
|
+
IRREGULAR = {
|
35
|
+
'en-gb-oed' => nil ,
|
36
|
+
'i-ami' => 'ami' ,
|
37
|
+
'i-bnn' => 'bnn' ,
|
38
|
+
'i-default' => nil ,
|
39
|
+
'i-enochian' => nil ,
|
40
|
+
'i-hak' => 'hak' ,
|
41
|
+
'i-klingon' => 'tlh' ,
|
42
|
+
'i-lux' => 'lb' ,
|
43
|
+
'i-mingo' => nil ,
|
44
|
+
'i-navajo' => 'nv' ,
|
45
|
+
'i-pwn' => 'pwn' ,
|
46
|
+
'i-tao' => 'tao' ,
|
47
|
+
'i-tay' => 'tay' ,
|
48
|
+
'i-tsu' => 'tsu' ,
|
49
|
+
'sgn-be-fr' => 'sfb' ,
|
50
|
+
'sgn-be-nl' => 'vgt' ,
|
51
|
+
'sgn-ch-de' => 'sgg' ,
|
52
|
+
}.freeze
|
53
|
+
|
54
|
+
#--
|
55
|
+
# Grandfathered tags that (appear to) match the 'langtag' production in
|
56
|
+
# Figure 1 are considered 'regular' grandfathered tags. These tags
|
57
|
+
# contain one or more subtags that either do not individually appear in
|
58
|
+
# the registry or appear but with a different semantic meaning: each
|
59
|
+
# tag, in its entirety, represents a language or collection of
|
60
|
+
# languages.
|
61
|
+
#++
|
62
|
+
|
63
|
+
GRANDFATHERED = IRREGULAR.merge(
|
64
|
+
'art-lojban' => 'jbo' ,
|
65
|
+
'cel-gaulish' => nil ,
|
66
|
+
'no-bok' => 'nb' ,
|
67
|
+
'no-nyn' => 'nn' ,
|
68
|
+
'zh-guoyu' => 'cmn' ,
|
69
|
+
'zh-hakka' => 'hak' ,
|
70
|
+
'zh-min' => nil ,
|
71
|
+
'zh-min-nan' => 'nan' ,
|
72
|
+
'zh-xiang' => 'hsn'
|
73
|
+
).freeze
|
74
|
+
|
75
|
+
HYPHEN = '-'.freeze
|
76
|
+
HYPHEN_SPLITTER = RUBY_VERSION < '1.9.1' ? /-/.freeze : HYPHEN
|
77
|
+
PRIVATEUSE = 'x'.freeze
|
78
|
+
LANGUAGE_REGEX = /^(?:#{PATTERN::LANGUAGE})$/io.freeze
|
79
|
+
SCRIPT_REGEX = /^(?:#{PATTERN::SCRIPT})$/io.freeze
|
80
|
+
REGION_REGEX = /^(?:#{PATTERN::REGION})$/io.freeze
|
81
|
+
VARIANTS_SEQUENCE_REGEX = /^(?:#{PATTERN::VARIANT_SEQUENCE}+)$/io.freeze
|
82
|
+
EXTENSIONS_SEQUENCE_REGEX = /^#{PATTERN::EXTENSION_SEQUENCE}+$/io.freeze
|
83
|
+
EXTENSIONS_SEQUENCE_SPLITTER = /(?:^|-)(?=#{PATTERN::SINGLETON}-)/io.freeze
|
84
|
+
PRIVATEUSE_REGEX = /^#{PATTERN::PRIVATEUSE}$/io.freeze
|
85
|
+
|
86
|
+
LANGTAG_REGEX = /^
|
87
|
+
(#{PATTERN::LANGUAGE}) (?# shortest ISO 639 code plus extlang or reserved for future use or registered language subtag)
|
88
|
+
(?:-(#{PATTERN::SCRIPT}))? (?# ISO 15924 code)
|
89
|
+
(?:-(#{PATTERN::REGION}))? (?# ISO 3166-1 code or UN M.49 code)
|
90
|
+
(#{PATTERN::VARIANT_SEQUENCE}*)? (?# registered variants)
|
91
|
+
(#{PATTERN::EXTENSION_SEQUENCE}*)? (?# extensions)
|
92
|
+
(?=(?:-#{PATTERN::PRIVATEUSE})?$) (?# privateuse)
|
93
|
+
/iox.freeze
|
94
|
+
|
95
|
+
LANGTAG_WELLFORMEDNESS_REGEX = /^
|
96
|
+
(?:#{PATTERN::LOOSE_LANGUAGE}) (?# shortest ISO 639 code plus at most 3 extlangs or reserved for future use or registered language subtag)
|
97
|
+
(?:-(?:#{PATTERN::SCRIPT}))? (?# ISO 15924 code)
|
98
|
+
(?:-(?:#{PATTERN::REGION}))? (?# ISO 3166-1 code or UN M.49 code)
|
99
|
+
(?=#{PATTERN::VARIANT_SEQUENCE}* (?# registered variants)
|
100
|
+
#{PATTERN::EXTENSION_SEQUENCE}* (?# extensions)
|
101
|
+
(?:-#{PATTERN::PRIVATEUSE})?$) (?# privateuse)
|
102
|
+
/iox.freeze
|
103
|
+
|
104
|
+
class << self
|
105
|
+
|
106
|
+
# Checks if the +String+ passed represents a 'privateuse' language tag.
|
107
|
+
# Works case-insensitively.
|
108
|
+
#
|
109
|
+
def privateuse?(snippet)
|
110
|
+
PRIVATEUSE_REGEX === snippet
|
111
|
+
end
|
112
|
+
|
113
|
+
# Checks if the +String+ passed represents a 'grandfathered' language tag.
|
114
|
+
# Works case-insensitively.
|
115
|
+
#
|
116
|
+
def grandfathered?(snippet)
|
117
|
+
GRANDFATHERED.key?(snippet) || GRANDFATHERED.key?(snippet.downcase)
|
118
|
+
end
|
119
|
+
|
120
|
+
#--
|
121
|
+
# RFC 5646, Section 2.2.9:
|
122
|
+
# A tag is considered "well-formed" if it conforms to the ABNF
|
123
|
+
# (Section 2.1). Language tags may be well-formed in terms of syntax
|
124
|
+
# but not valid in terms of content. However, many operations
|
125
|
+
# involving language tags work well without knowing anything about the
|
126
|
+
# meaning or validity of the subtags.
|
127
|
+
#++
|
128
|
+
|
129
|
+
# Checks if the +String+ passed represents a well-formed language tag.
|
130
|
+
# Works case-insensitively.
|
131
|
+
#
|
132
|
+
def wellformed?(snippet)
|
133
|
+
privateuse?(snippet) || grandfathered?(snippet) || LANGTAG_WELLFORMEDNESS_REGEX === snippet
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# EOF
|
@@ -0,0 +1,376 @@
|
|
1
|
+
require 'lang/tag'
|
2
|
+
require 'lang/subtags'
|
3
|
+
|
4
|
+
module Lang #:nodoc:
|
5
|
+
module Tag
|
6
|
+
|
7
|
+
module Canonicalization
|
8
|
+
|
9
|
+
# Handles exceptions that might
|
10
|
+
# appear in canonicalization or validation processes.
|
11
|
+
#
|
12
|
+
class Error < Error
|
13
|
+
end
|
14
|
+
|
15
|
+
#--
|
16
|
+
# RFC 5646, Section 2.2.1
|
17
|
+
# The subtags in the range 'qaa' through 'qtz' are reserved for
|
18
|
+
# private use in language tags. These subtags correspond to codes
|
19
|
+
# reserved by ISO 639-2 for private use. These codes MAY be used
|
20
|
+
# for non-registered primary language subtags (instead of using
|
21
|
+
# private use subtags following 'x-').
|
22
|
+
#++
|
23
|
+
|
24
|
+
PRIVATE_LANGUAGE_REGEX = /^q[a-t][a-z]$/i.freeze
|
25
|
+
|
26
|
+
#--
|
27
|
+
# RFC 5646, Section 3.1.7
|
28
|
+
# Extended language subtags always have a mapping to their
|
29
|
+
# identical primary language subtag. For example, the extended
|
30
|
+
# language subtag 'yue' (Cantonese) can be used to form the tag
|
31
|
+
# "zh-yue". It has a 'Preferred-Value' mapping to the primary
|
32
|
+
# language subtag 'yue', meaning that a tag such as
|
33
|
+
# "zh-yue-Hant-HK" can be canonicalized to "yue-Hant-HK".
|
34
|
+
#++
|
35
|
+
|
36
|
+
# Canonicalizes language component, applying rules that described
|
37
|
+
# in RFC5646, sections 2.2.1, 2.2.2 and 4.5. Also validates the
|
38
|
+
# language sequence using the 'Prefix' field-value of the extlang.
|
39
|
+
#
|
40
|
+
def canonicalize_language
|
41
|
+
raise InvalidComponentError, "Language can not be omitted." unless @language
|
42
|
+
decompose_language unless @primary
|
43
|
+
|
44
|
+
|
45
|
+
if @extlang
|
46
|
+
subtag = Subtags::Extlang(@extlang)
|
47
|
+
raise Error, "Extlang #{@extlang.inspect} is not registered." unless subtag
|
48
|
+
|
49
|
+
# RFC 5646, Section 2.2.2
|
50
|
+
# Extended language subtag records MUST include a 'Preferred-
|
51
|
+
# Value'. The 'Preferred-Value' and 'Subtag' fields MUST be
|
52
|
+
# identical.
|
53
|
+
|
54
|
+
# RFC 5646, Section 4.5
|
55
|
+
# For extlangs, the original primary language subtag is also
|
56
|
+
# replaced if there is a primary language subtag in the 'Preferred-Value'.
|
57
|
+
# The 'Preferred-Value' field in subtag records of type "extlang" also
|
58
|
+
# contains an "extended language range". This allows the subtag to be
|
59
|
+
# deprecated in favor of either a single primary language subtag or a
|
60
|
+
# new language-extlang sequence.
|
61
|
+
|
62
|
+
unless subtag.prefix == @primary ||
|
63
|
+
subtag.prefix == @primary.downcase # as of now, we have exactly one extlang
|
64
|
+
|
65
|
+
# RFC 5646, Section 3.4
|
66
|
+
# Extended language subtag records MUST include exactly one
|
67
|
+
# 'Prefix' field indicating an appropriate subtag or sequence of
|
68
|
+
# subtags for that extended language subtag.
|
69
|
+
|
70
|
+
raise Error, "Extlang #{@extlang.inspect} requires prefix #{subtag.prefix.inspect}."
|
71
|
+
end
|
72
|
+
|
73
|
+
@language = subtag.preferred_value
|
74
|
+
@primary = nil
|
75
|
+
@extlang = nil
|
76
|
+
dirty
|
77
|
+
|
78
|
+
elsif PRIVATE_LANGUAGE_REGEX !~ @primary
|
79
|
+
subtag = Subtags::Language(@primary)
|
80
|
+
raise Error, "Language #{@primary.inspect} is not registered." unless subtag
|
81
|
+
if subtag.preferred_value
|
82
|
+
@language = subtag.preferred_value
|
83
|
+
@primary = nil
|
84
|
+
dirty
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
|
91
|
+
protected :canonicalize_language
|
92
|
+
|
93
|
+
#--
|
94
|
+
# RFC 5646, Section 2.2.3
|
95
|
+
# The script subtags 'Qaaa' through 'Qabx' are reserved for private
|
96
|
+
# use in language tags. These subtags correspond to codes reserved
|
97
|
+
# by ISO 15924 for private use. These codes MAY be used for non-
|
98
|
+
# registered script values. Please refer to Section 4.6 for more
|
99
|
+
# information on private use subtags.
|
100
|
+
#++
|
101
|
+
|
102
|
+
PRIVATE_SCRIPT_REGEX = /^Qa[ab][a-x]$/i.freeze
|
103
|
+
|
104
|
+
def canonicalize_script
|
105
|
+
return if !@script || PRIVATE_SCRIPT_REGEX === @script
|
106
|
+
subtag = Subtags::Script(@script)
|
107
|
+
raise Error, "Script #{@script.inspect} is not registered." unless subtag
|
108
|
+
if subtag.preferred_value
|
109
|
+
@script = subtag.preferred_value
|
110
|
+
dirty
|
111
|
+
end
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
|
115
|
+
protected :canonicalize_script
|
116
|
+
|
117
|
+
#--
|
118
|
+
# RFC 5646, Section 2.2.4
|
119
|
+
# The region subtags 'AA', 'QM'-'QZ', 'XA'-'XZ', and 'ZZ' are
|
120
|
+
# reserved for private use in language tags. These subtags
|
121
|
+
# correspond to codes reserved by ISO 3166 for private use. These
|
122
|
+
# codes MAY be used for private use region subtags (instead of
|
123
|
+
# using a private use subtag sequence). Please refer to
|
124
|
+
# Section 4.6 for more information on private use subtags.
|
125
|
+
#++
|
126
|
+
|
127
|
+
PRIVATE_REGION_REGEX = /^(?:AA|Q[M-Z]|X[A-Z]|ZZ)$/i.freeze
|
128
|
+
|
129
|
+
#--
|
130
|
+
# RFC 5646, Section 4.5
|
131
|
+
# Example: Although the tag "en-BU" (English as used in Burma)
|
132
|
+
# maintains its validity, the language tag "en-BU" is not in canonical
|
133
|
+
# form because the 'BU' subtag has a canonical mapping to 'MM'
|
134
|
+
# (Myanmar).
|
135
|
+
#++
|
136
|
+
|
137
|
+
def canonicalize_region
|
138
|
+
return if !@region || PRIVATE_REGION_REGEX === @region
|
139
|
+
subtag = Subtags::Region(@region)
|
140
|
+
raise Error, "Region #{@region.inspect} is not registered." unless subtag
|
141
|
+
if subtag.preferred_value
|
142
|
+
@region = subtag.preferred_value
|
143
|
+
dirty
|
144
|
+
end
|
145
|
+
nil
|
146
|
+
end
|
147
|
+
|
148
|
+
protected :canonicalize_region
|
149
|
+
|
150
|
+
#--
|
151
|
+
# RFC 5646, Section 3.1.8
|
152
|
+
# The 'Prefix' also indicates when variant subtags make sense when used
|
153
|
+
# together (many that otherwise share a 'Prefix' are mutually
|
154
|
+
# exclusive) and what the relative ordering of variants is supposed to
|
155
|
+
# be. For example, the variant '1994' (Standardized Resian
|
156
|
+
# orthography) has several 'Prefix' fields in the registry ("sl-rozaj",
|
157
|
+
# "sl-rozaj-biske", "sl-rozaj-njiva", "sl-rozaj-osojs", and "sl-rozaj-
|
158
|
+
# solba"). This indicates not only that '1994' is appropriate to use
|
159
|
+
# with each of these five Resian variant subtags ('rozaj', 'biske',
|
160
|
+
# 'njiva', 'osojs', and 'solba'), but also that it SHOULD appear
|
161
|
+
# following any of these variants in a tag. Thus, the language tag
|
162
|
+
# ought to take the form "sl-rozaj-biske-1994", rather than "sl-1994-
|
163
|
+
# rozaj-biske" or "sl-rozaj-1994-biske".
|
164
|
+
#++
|
165
|
+
|
166
|
+
PREFIX_REGEX = /^(#{PATTERN::LANGUAGE})(?:-(#{PATTERN::SCRIPT}))?(?:-(#{PATTERN::REGION}))?(?:-(.+))?$/io.freeze
|
167
|
+
|
168
|
+
# Canonicalizes variants, applying rules that described in RFC 5646,
|
169
|
+
# sections 2.2.5 and 4.5. Also validates the sequence of variants
|
170
|
+
# using 'Prefix' field-values (see RFC 5646, Section 3.1.8).
|
171
|
+
#
|
172
|
+
def canonicalize_variants
|
173
|
+
return unless @variants_sequence
|
174
|
+
|
175
|
+
sequence = nil
|
176
|
+
sequence_dirty = false
|
177
|
+
@variants = variants.map do |variant|
|
178
|
+
|
179
|
+
v = Subtags::Variant(variant)
|
180
|
+
raise Error, "Variant #{variant.inspect} is not registered." unless v
|
181
|
+
|
182
|
+
if !v.prefixes || v.prefixes.any? { |prefix|
|
183
|
+
PREFIX_REGEX === prefix
|
184
|
+
($4 == nil || $4 == sequence) &&
|
185
|
+
($3 == nil || @region && ($3 == @region || $3 == @region.upcase)) &&
|
186
|
+
($2 == nil || @script && ($2 == @script || $2 == @script.capitalize)) &&
|
187
|
+
($1 == @language || $1 == @language.downcase)
|
188
|
+
}
|
189
|
+
|
190
|
+
sequence ? sequence << HYPHEN : sequence = ""
|
191
|
+
sequence << v.name
|
192
|
+
|
193
|
+
if v.preferred_value
|
194
|
+
sequence_dirty ||= true
|
195
|
+
v.preferred_value
|
196
|
+
else
|
197
|
+
variant
|
198
|
+
end
|
199
|
+
|
200
|
+
else raise Error,
|
201
|
+
"Variant #{variant.inspect} requires " \
|
202
|
+
"one of following prefixes: " \
|
203
|
+
"#{v.prefixes.map{ |p| p.inspect }.join(", ")}."
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
if sequence_dirty
|
209
|
+
@variants_sequence = @variants.join(HYPHEN)
|
210
|
+
dirty
|
211
|
+
end
|
212
|
+
|
213
|
+
nil
|
214
|
+
end
|
215
|
+
|
216
|
+
protected :canonicalize_variants
|
217
|
+
|
218
|
+
#--
|
219
|
+
# RFC 5646, Section 4.5
|
220
|
+
# Example: The language tag "en-a-aaa-b-ccc-bbb-x-xyz" is in canonical
|
221
|
+
# form, while "en-b-ccc-bbb-a-aaa-X-xyz" is well-formed and potentially
|
222
|
+
# valid (extensions 'a' and 'b' are not defined as of the publication
|
223
|
+
# of this document) but not in canonical form (the extensions are not
|
224
|
+
# in alphabetical order).
|
225
|
+
#++
|
226
|
+
|
227
|
+
def canonicalize_extensions
|
228
|
+
return unless @extensions_sequence
|
229
|
+
ordered = @extensions_sequence.
|
230
|
+
split(EXTENSIONS_SEQUENCE_SPLITTER).
|
231
|
+
sort!{ |k,v| k.downcase <=> v.downcase }.join(HYPHEN)
|
232
|
+
|
233
|
+
unless @extensions_sequence == ordered
|
234
|
+
@extensions_sequence = ordered
|
235
|
+
dirty
|
236
|
+
end
|
237
|
+
nil
|
238
|
+
end
|
239
|
+
|
240
|
+
protected :canonicalize_extensions
|
241
|
+
|
242
|
+
#--
|
243
|
+
# RFC 5646, Section 3.1.7
|
244
|
+
# For example, the tags "zh-yue-Hant-HK" and "yue-Hant-HK"
|
245
|
+
# are semantically equivalent and ought to be treated as
|
246
|
+
# if they were the same tag.
|
247
|
+
#++
|
248
|
+
|
249
|
+
def same?(other)
|
250
|
+
self.canonicalize == other.canonicalize
|
251
|
+
end
|
252
|
+
|
253
|
+
def canonicalize
|
254
|
+
duplicated = self.dup
|
255
|
+
duplicated.canonicalize!
|
256
|
+
duplicated
|
257
|
+
end
|
258
|
+
|
259
|
+
def canonicalize!
|
260
|
+
|
261
|
+
# 1. Extension sequences are ordered into case-insensitive ASCII order
|
262
|
+
# by singleton subtag.
|
263
|
+
|
264
|
+
canonicalize_extensions
|
265
|
+
|
266
|
+
# A redundant tag is a grandfathered
|
267
|
+
# registration whose individual subtags appear with the same semantic
|
268
|
+
# meaning in the registry. For example, the tag "zh-Hant" (Traditional
|
269
|
+
# Chinese) can now be composed from the subtags 'zh' (Chinese) and
|
270
|
+
# 'Hant' (Han script traditional variant). These redundant tags are
|
271
|
+
# maintained in the registry as records of type 'redundant', mostly as
|
272
|
+
# a matter of historical curiosity.
|
273
|
+
|
274
|
+
# 2. Redundant or grandfathered tags are replaced by their 'Preferred-
|
275
|
+
# Value', if there is one.
|
276
|
+
|
277
|
+
if re = Subtags::Redundant(composition)
|
278
|
+
return recompose(re.preferred_value) if re.preferred_value
|
279
|
+
end
|
280
|
+
|
281
|
+
# 3. Subtags are replaced by their 'Preferred-Value', if there is one.
|
282
|
+
# For extlangs, the original primary language subtag is also
|
283
|
+
# replaced if there is a primary language subtag in the 'Preferred-
|
284
|
+
# Value'.
|
285
|
+
|
286
|
+
canonicalize_language
|
287
|
+
canonicalize_script
|
288
|
+
canonicalize_region
|
289
|
+
canonicalize_variants
|
290
|
+
|
291
|
+
nil
|
292
|
+
end
|
293
|
+
|
294
|
+
alias :to_canonical_form! :canonicalize!
|
295
|
+
alias :to_canonical_form :canonicalize
|
296
|
+
|
297
|
+
#--
|
298
|
+
# RFC 5646, Section 4.5
|
299
|
+
# For example, "hak-CN" (Hakka, China) has the primary language
|
300
|
+
# subtag 'hak', which in turn has an 'extlang' record with a
|
301
|
+
# 'Prefix' 'zh' (Chinese). The extlang form is "zh-hak-CN"
|
302
|
+
# (Chinese, Hakka, China).
|
303
|
+
#++
|
304
|
+
|
305
|
+
def to_extlang_form!
|
306
|
+
canonicalize!
|
307
|
+
subtag = Subtags::Extlang(@language)
|
308
|
+
@primary = subtag.prefix
|
309
|
+
@extlang = @language
|
310
|
+
@language = "#{@primary}#{HYPHEN}#{@extlang}"
|
311
|
+
dirty
|
312
|
+
nil
|
313
|
+
end
|
314
|
+
|
315
|
+
def to_extlang_form
|
316
|
+
duplicated = self.dup
|
317
|
+
duplicated.to_extlang_form!
|
318
|
+
duplicated
|
319
|
+
end
|
320
|
+
|
321
|
+
#--
|
322
|
+
# RFC 5646, Section 4.1
|
323
|
+
# The script subtag SHOULD NOT be used to form language tags unless
|
324
|
+
# the script adds some distinguishing information to the tag.
|
325
|
+
# ...
|
326
|
+
# The field 'Suppress-Script' in the primary or extended language
|
327
|
+
# record in the registry indicates script subtags that do not add
|
328
|
+
# distinguishing information for most applications; this field
|
329
|
+
# defines when users SHOULD NOT include a script subtag with a
|
330
|
+
# particular primary language subtag.
|
331
|
+
#
|
332
|
+
# For example, if an implementation selects content using Basic
|
333
|
+
# Filtering [RFC4647] (originally described in Section 14.4 of
|
334
|
+
# [RFC2616]) and the user requested the language range "en-US",
|
335
|
+
# content labeled "en-Latn-US" will not match the request and thus
|
336
|
+
# not be selected. Therefore, it is important to know when script
|
337
|
+
# subtags will customarily be used and when they ought not be used.
|
338
|
+
#++
|
339
|
+
|
340
|
+
def suppress_script!
|
341
|
+
return unless @script && @language
|
342
|
+
decompose_language unless @primary
|
343
|
+
|
344
|
+
return if PRIVATE_LANGUAGE_REGEX === @primary
|
345
|
+
|
346
|
+
subtag = Subtags::Language(@primary)
|
347
|
+
raise Error, "Language #{@primary.inspect} is not registered." unless subtag
|
348
|
+
if subtag.suppress_script && @script == subtag.suppress_script
|
349
|
+
@script = nil
|
350
|
+
dirty
|
351
|
+
#elsif @extlang
|
352
|
+
# subtag = Subtags::Extlang(@extlang)
|
353
|
+
# raise Error, "Extlang #{@extlang.inspect} is not registered." unless subtag
|
354
|
+
# if subtag.suppress_script && @script == subtag.suppress_script
|
355
|
+
# dirty
|
356
|
+
# end
|
357
|
+
end
|
358
|
+
nil
|
359
|
+
end
|
360
|
+
|
361
|
+
def suppress_script
|
362
|
+
duplicated = self.dup
|
363
|
+
duplicated.suppress_script!
|
364
|
+
duplicated
|
365
|
+
end
|
366
|
+
|
367
|
+
end
|
368
|
+
|
369
|
+
class Langtag
|
370
|
+
include Canonicalization
|
371
|
+
end
|
372
|
+
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
# EOF
|