langtag 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +31 -0
- data/lib/langtag.rb +170 -0
- data/test/langtagTest.txt +309 -0
- data/test/test_langtag.rb +78 -0
- metadata +57 -0
data/README
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
== Langtag version 0.1.0
|
2
|
+
|
3
|
+
=== Overview
|
4
|
+
The Langtag class represents IETF Language Tags
|
5
|
+
as a subclass of String that provides read/write
|
6
|
+
access to specific parts of a language tag, such
|
7
|
+
as language, script, region, and so on, as well
|
8
|
+
as well-formedness checks and some other simple
|
9
|
+
operations.
|
10
|
+
|
11
|
+
=== IETF Language Tags
|
12
|
+
IETF Language Tags are defined by BCP 47
|
13
|
+
(http://www.ietf.org/rfc/bcp/bcp47.txt),
|
14
|
+
which currently consists of RFC 4646
|
15
|
+
(http://www.ietf.org/rfc/rfc4646.txt)
|
16
|
+
and RFC 4647 (http://www.ietf.org/rfc/rfc467.txt).
|
17
|
+
These documents are the work of the LTRU Working
|
18
|
+
Group (see http://www.ietf.org/html.charters/ltru-charter.html).
|
19
|
+
For further explanatory information on IETF language tags,
|
20
|
+
in particular in a Web context, please also see
|
21
|
+
http://www.w3.org/International/articles/language-tags/.
|
22
|
+
|
23
|
+
=== Future Work
|
24
|
+
- Provide support for various matching/lookup options
|
25
|
+
defined in RFC 4647
|
26
|
+
- Provide support for validation
|
27
|
+
|
28
|
+
=== Copyright
|
29
|
+
Copyright (c) 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp)
|
30
|
+
Licensed under the same terms as Ruby. Absolutely no warranty.
|
31
|
+
(see http://www.ruby-lang.org/en/LICENSE.txt)
|
data/lib/langtag.rb
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
# :include: ../README
|
2
|
+
|
3
|
+
# :stopdoc:
|
4
|
+
# === Helper Functions for Array Class
|
5
|
+
class Array
|
6
|
+
# Create a new Array by repeatedly calling &block
|
7
|
+
# until the returned value is equal to final.
|
8
|
+
def Array.collect (final=nil, &block)
|
9
|
+
a = Array.new
|
10
|
+
while final != (e=block.call)
|
11
|
+
a << e
|
12
|
+
end
|
13
|
+
a
|
14
|
+
end
|
15
|
+
# Test uniqueness of elements of an Array.
|
16
|
+
# This should be available as part of the standard Array class.
|
17
|
+
def uniq?
|
18
|
+
return self.length == self.uniq.length
|
19
|
+
end
|
20
|
+
end
|
21
|
+
# :startdoc:
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
gem 'composite'
|
25
|
+
require 'composite'
|
26
|
+
|
27
|
+
# Langtag class, implementing BCP 47 (currently RFC 4646)
|
28
|
+
# IETF language tags. Provides decomposition of language
|
29
|
+
# tags into components, and wellformedness check.
|
30
|
+
#
|
31
|
+
# ==== Accessor methods
|
32
|
+
# Getting: language, script, region, variants, extensions, private.
|
33
|
+
# Setting: language=, script=, region=, variants=, extensions=, private=.
|
34
|
+
# Variants and extensions accessors get/set Arrays, the other accessors get/set Strings.
|
35
|
+
# Because of the way Ruby assignement methods are implemented,
|
36
|
+
# manipulating variants and extensions with e.g.
|
37
|
+
# myLangtag.variants += ['e-Extension']
|
38
|
+
# (adding 'e-Extension' as an extension to whatever extensions myLangtag
|
39
|
+
# already has) is possible. Similarly,
|
40
|
+
# myLangtag.variants -= ['e-Extension']
|
41
|
+
# will again remove the extension.
|
42
|
+
class Langtag < String
|
43
|
+
include Composite
|
44
|
+
def initialize (s)
|
45
|
+
super(s)
|
46
|
+
decompose
|
47
|
+
end
|
48
|
+
|
49
|
+
# the parts of a language tag
|
50
|
+
# variants and extensions are arrays, the other parts are strings
|
51
|
+
part :language, :script, :region, :variants, :extensions, :private
|
52
|
+
|
53
|
+
# Array of irregular language tags
|
54
|
+
Irregular = ['en-gb-oed',
|
55
|
+
'i-ami', 'i-bnn', 'i-default', 'i-enochian', 'i-hak',
|
56
|
+
'i-klingon', 'i-lux', 'i-mingo', 'i-navajo', 'i-pwn',
|
57
|
+
'i-tao', 'i-tay', 'i-tsu', 'sgn-be-fr',
|
58
|
+
'sgn-be-nl', 'sgn-ch-de'];
|
59
|
+
|
60
|
+
# Array of grandfathered language tags
|
61
|
+
Grandfathered = Irregular + ['art-lojban', 'cel-gaulish',
|
62
|
+
'no-bok', 'no-nyn', 'zh-cmn', 'zh-cmn-hans', 'zh-cmn-hant',
|
63
|
+
'zh-gan', 'zh-guoyu', 'zh-hakka', 'zh-min', 'zh-min-nan',
|
64
|
+
'zh-wuu', 'zh-xiang', 'zh-yue'];
|
65
|
+
|
66
|
+
# returns true if language tag is well-formed, false otherwise
|
67
|
+
def wellformed? ()
|
68
|
+
decompose
|
69
|
+
@wellformed
|
70
|
+
end
|
71
|
+
|
72
|
+
# returns true if language tag is grandfathered, false otherwise
|
73
|
+
def grandfathered? ()
|
74
|
+
Grandfathered.include? self.to_str.downcase
|
75
|
+
end
|
76
|
+
|
77
|
+
# returns true if language tag is irregular, false otherwise
|
78
|
+
def irregular? ()
|
79
|
+
Irregular.include? self.to_str.downcase
|
80
|
+
end
|
81
|
+
|
82
|
+
# changes case to look 'nice' (regions are UPPER-CASE,
|
83
|
+
# scripts are Title-Case, everything else is lower case
|
84
|
+
def nicecase! ()
|
85
|
+
@language.downcase!
|
86
|
+
@script.capitalize!
|
87
|
+
@region.upcase!
|
88
|
+
@variants.each { |v| v.downcase! }
|
89
|
+
@extensions.each { |e| e.downcase! }
|
90
|
+
@private.downcase!
|
91
|
+
compose
|
92
|
+
end
|
93
|
+
|
94
|
+
# non-descructive variant of nicecase!: returns a nicecased copy
|
95
|
+
def nicecase ()
|
96
|
+
result = Langtag.new(self).nicecase!
|
97
|
+
end
|
98
|
+
|
99
|
+
# compose the langtag from parts, joining with '-'
|
100
|
+
# flatten first to deal with @variants/@extentsions with are arrays
|
101
|
+
# then compact to remove nil values (mainly internal use)
|
102
|
+
def compose
|
103
|
+
replace([@language, @script, @region, @variants,
|
104
|
+
@extensions, @private].flatten.compact.join('-'))
|
105
|
+
end
|
106
|
+
|
107
|
+
# decompose a language tag into parts (mainly internal use)
|
108
|
+
def decompose
|
109
|
+
# check if we really need to decompose again
|
110
|
+
if @saved == self.to_str
|
111
|
+
return
|
112
|
+
end
|
113
|
+
# initialize everything
|
114
|
+
s = @saved = self.to_str # save for check next time around
|
115
|
+
@wellformed = true # assume well-formed
|
116
|
+
@language = @script = @region = @private = nil
|
117
|
+
@variants = @extensions = []
|
118
|
+
|
119
|
+
# deal with irregular and completely private langtags
|
120
|
+
if irregular? || s =~ /^x-/i
|
121
|
+
@language = s
|
122
|
+
return
|
123
|
+
end
|
124
|
+
# check well-formedness with a single regular expression,
|
125
|
+
# except for irregulars (checked above) and multiple
|
126
|
+
# occurrences of the same extension (checked below)
|
127
|
+
# notice /i modifier for case insensitive matching
|
128
|
+
if not(s =~ /^([a-z]{2,3} # shortest ISO 639 language
|
129
|
+
(-[a-z]{3}){0,3} # with optional extensions
|
130
|
+
|[a-z]{4,8}) # or reserved\registered
|
131
|
+
(-[a-z]{4})? # optional script
|
132
|
+
(-([a-z]{2}|\d{3}))? # optional region
|
133
|
+
(-([a-z0-9]{5,8}|\d[a-z0-9]{3}))* # optional variants
|
134
|
+
(-[a-wyz0-9](-[a-z0-9]{2,8})+)* # optional extensions
|
135
|
+
(-x(-[a-z0-9]{1,8})+)? # optional private use part
|
136
|
+
$/ix)
|
137
|
+
@wellformed = false
|
138
|
+
end
|
139
|
+
# extract language
|
140
|
+
if s =~ /^(([a-z]{2,3}(-[a-z]{3}){0,3}|[a-z]{4,8}))(-|$)/i
|
141
|
+
@language, s = $1, $'
|
142
|
+
end
|
143
|
+
# extract private use tail
|
144
|
+
if s =~ /(^|-)(x-.*)$/i
|
145
|
+
s, @private = $`, $2
|
146
|
+
end
|
147
|
+
# extract extensions and check for duplicates
|
148
|
+
@extensions = Array.collect do
|
149
|
+
if s =~ /(^|-)([a-wyz0-9](-[a-z0-9]{2,8})+)$/i
|
150
|
+
s = $`
|
151
|
+
$2
|
152
|
+
else
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
@extensions.reverse! # put back in order
|
157
|
+
if !((@extensions.collect {|ext| ext[0..1].downcase}).uniq?)
|
158
|
+
@wellformed = false
|
159
|
+
end
|
160
|
+
if s =~ /(^|-)([a-z]{4})(-|$)/i # extract script
|
161
|
+
@script = $2
|
162
|
+
end
|
163
|
+
if s =~ /(^|-)([a-z]{2}|\d{3})(-|$)/i # extract region
|
164
|
+
@region = $2
|
165
|
+
end
|
166
|
+
# extract variants
|
167
|
+
@variants = s.scan(/(^|-)([a-z0-9]{5,8}|\d[a-z0-9]{3})(?=(-|$))/i).
|
168
|
+
collect { |match| match[1] }
|
169
|
+
end # decompose
|
170
|
+
end # class Langtag
|
@@ -0,0 +1,309 @@
|
|
1
|
+
WELL-FORMED
|
2
|
+
|
3
|
+
AaBbCcDd
|
4
|
+
AaBbCcDd-x-y-any-x
|
5
|
+
abcd-Latn
|
6
|
+
ab-x-abc-a-a # ditto
|
7
|
+
ab-x-abc-a-a # ditto
|
8
|
+
ab-x-abc-x-abc # anything goes after x
|
9
|
+
ab-x-abc-x-abc # anything goes after x
|
10
|
+
ax-TZ # Not in the registry, but well-formed
|
11
|
+
az-Arab-x-AZE-derbend
|
12
|
+
de-a-value
|
13
|
+
de-CH-1996
|
14
|
+
de-Latg-1996
|
15
|
+
en
|
16
|
+
en-a-bbb-x-a-ccc
|
17
|
+
en-enx
|
18
|
+
en-enx-eny-enz-latn-us
|
19
|
+
en-gb-oed
|
20
|
+
en-Latn
|
21
|
+
en-Latn-001
|
22
|
+
en-Latn-GB-boont-r-extended-sequence-x-private
|
23
|
+
en-Latn-US
|
24
|
+
en-Latn-US-lojban-gaulish
|
25
|
+
en-Latn-US-lojban-gaulish-a-12345678-ABCD-b-ABCDEFGH
|
26
|
+
en-Latn-US-lojban-gaulish-a-12345678-ABCD-b-ABCDEFGH-x-a-b-c-12345678
|
27
|
+
en-US
|
28
|
+
en-US
|
29
|
+
en-US-boont
|
30
|
+
en-x-US
|
31
|
+
es-419
|
32
|
+
es-Latn-CO-x-private
|
33
|
+
fr
|
34
|
+
fra
|
35
|
+
fra-FX
|
36
|
+
fra-Latn # ISO 639 can be 3-letters
|
37
|
+
fr-FR
|
38
|
+
fr-fra # Extended tag
|
39
|
+
fr-Lat # Extended
|
40
|
+
fr-Latn
|
41
|
+
fr-Latn-419
|
42
|
+
fr-Latn-CA
|
43
|
+
fr-Latn-CA
|
44
|
+
fr-Latn-FR
|
45
|
+
fr-shadok # Variant
|
46
|
+
fr-y-myext-myext2
|
47
|
+
i-default # grandfathered
|
48
|
+
i-default # grandfathered
|
49
|
+
i-enochian # Grand fathered
|
50
|
+
i-klingon # grandfathered
|
51
|
+
i-klingon # grandfathered with singleton
|
52
|
+
mn-Cyrl-MN
|
53
|
+
mN-cYrL-Mn
|
54
|
+
no-bok # grandfathered without singleton
|
55
|
+
sl-IT-nedis
|
56
|
+
sl-nedis
|
57
|
+
sr-Latn-CS
|
58
|
+
x-12345678-a
|
59
|
+
x-fr-CH
|
60
|
+
|
61
|
+
#all of the grandfathered codes; first the really goofy ones. Cased oddly for an extra test.
|
62
|
+
En-Gb-Oed
|
63
|
+
I-Ami
|
64
|
+
I-Bnn
|
65
|
+
I-Default
|
66
|
+
I-Enochian
|
67
|
+
I-Hak
|
68
|
+
I-Klingon
|
69
|
+
I-Lux
|
70
|
+
I-Mingo
|
71
|
+
I-Navajo
|
72
|
+
I-Pwn
|
73
|
+
I-Tao
|
74
|
+
I-Tay
|
75
|
+
I-Tsu
|
76
|
+
Sgn-Be-Fr
|
77
|
+
Sgn-Be-Nl
|
78
|
+
Sgn-Ch-De
|
79
|
+
|
80
|
+
#now the ones that are well-formed, but currently invalid
|
81
|
+
art-lojban
|
82
|
+
cel-gaulish
|
83
|
+
en-boont
|
84
|
+
en-scouse
|
85
|
+
no-bok
|
86
|
+
no-nyn
|
87
|
+
zh-cmn
|
88
|
+
zh-cmn-Hans
|
89
|
+
zh-cmn-Hant
|
90
|
+
zh-gan
|
91
|
+
zh-guoyu
|
92
|
+
zh-hakka
|
93
|
+
zh-min
|
94
|
+
zh-min-nan
|
95
|
+
zh-wuu
|
96
|
+
zh-xiang
|
97
|
+
zh-yue
|
98
|
+
|
99
|
+
# Now some randomly generated correct names
|
100
|
+
|
101
|
+
cfR-wOG-g-UkjoqWt8-ii8S04LL-rbBDq0gl-o-qmzs-ifnRSqVz-241T-lVFJq30L-0JWuHsb-C-WMThK-kbEOuA-tIQ-Lfjt-a-c1gdojdJ-7iv-b-NawXDK
|
102
|
+
JP-ubE-JtS-fOa-BOiO
|
103
|
+
BYE-fiX-mKH-BKdy
|
104
|
+
rc-Ajl-jpl-X-Lh-SPB-ANEXM
|
105
|
+
FwtsUTb
|
106
|
+
GH-NgZ-rW
|
107
|
+
x-GF-E7m-v2-V09q
|
108
|
+
qQ-THL-dth
|
109
|
+
TDGbw
|
110
|
+
Uif-eE
|
111
|
+
TDJVhlwx-HEwn-6M9a-4DvI-7WaG-8IRj-7QEk-7yID
|
112
|
+
rOO-yUE-UEY-bbcM
|
113
|
+
yY-jDQ-eDK-NsZ-a-oJQ-eLc-JTMc31-nhr-h-unOGj8-Os7-JMT5jeVq-pE-QK5
|
114
|
+
oJ-57b1OPWP-6MUqYs-Kesk65J-74oa-34Ys
|
115
|
+
vX-PCm-mnT-PsQ
|
116
|
+
eQh-ewFo-7mvP-EbJ0xx-0OEK-I0I8ju6
|
117
|
+
JwdjKtH-WD
|
118
|
+
QxxLG-X-lJNNX6Pu-7OfhbLoa
|
119
|
+
icb-AAk-EFU-dAyc-1rj2-3upg-8VMx-5nml-DF45sBf
|
120
|
+
en-GB-oed
|
121
|
+
oHSgh-x-SpoWfaO8-J-NATtSLZ1
|
122
|
+
Ehl-hpi-Bbb-zu-x-n6rrcoz0
|
123
|
+
UFhQ-GO-X-HULOlod-tkv
|
124
|
+
jVVxZr
|
125
|
+
HkFqh
|
126
|
+
tyg-mMk-YME-Nia-aJab-Ej
|
127
|
+
rI-aqM-gkp-ZNW-NW
|
128
|
+
Hv-acb-248
|
129
|
+
XMXIUasy-Djzc-eW
|
130
|
+
DGKW-oZRC-G-1P-SuP-A-GI2SuR-vX-rnH1Y8-heft
|
131
|
+
X-C5lAw-Hn-XdR7x
|
132
|
+
tsQpkxE-xwuk
|
133
|
+
i-pwn
|
134
|
+
SQrky-AWCe-xo-x-cU-fo2-u1KhUJ
|
135
|
+
aNCzqvs-IVeQ-ZY
|
136
|
+
sj
|
137
|
+
jeMSw
|
138
|
+
UPm-sMd-Dn
|
139
|
+
Hho-sG-GpcoS1-IxGcI
|
140
|
+
EDrfxBz
|
141
|
+
qpW-HBWu-ta
|
142
|
+
AIl-FGV
|
143
|
+
Lqn-bid-DpI
|
144
|
+
Jm
|
145
|
+
xE-Lxs-qu
|
146
|
+
RliJDAg
|
147
|
+
ct-gwQ-SIu
|
148
|
+
csneMbEX-Umid-r-AfHD-gDWov-DfxmF4ew-0ENgU-S-pBN9O4c-9HK-c0ElsKnC
|
149
|
+
jH-BIYY-pT
|
150
|
+
en-GB-oed
|
151
|
+
|
152
|
+
ILL-FORMED
|
153
|
+
|
154
|
+
-a
|
155
|
+
a-
|
156
|
+
a1-Hant-ZH
|
157
|
+
aabbccddE
|
158
|
+
a--b
|
159
|
+
ab-123-abc
|
160
|
+
ab-123-abc
|
161
|
+
ab-123-abcd
|
162
|
+
ab-123-abcd
|
163
|
+
ab-1abc-abc
|
164
|
+
ab-1abc-abc
|
165
|
+
ab-1abc-abcd
|
166
|
+
ab-1abc-abcd
|
167
|
+
ab--ab
|
168
|
+
ab--ab
|
169
|
+
ab-a-b
|
170
|
+
ab-a-b
|
171
|
+
ab-ab-abc
|
172
|
+
ab-ab-abc
|
173
|
+
ab-ab-abcd
|
174
|
+
ab-ab-abcd
|
175
|
+
-ab-abc
|
176
|
+
-ab-abc
|
177
|
+
ab-abc-
|
178
|
+
ab-abc-
|
179
|
+
ab-abc-abc-abc-abc
|
180
|
+
ab-abc-abc-abc-abc
|
181
|
+
ab-abcd-abc
|
182
|
+
ab-abcd-abc
|
183
|
+
ab-abcde-abc
|
184
|
+
ab-abcde-abc
|
185
|
+
ab-abcde-abcd
|
186
|
+
ab-abcde-abcd
|
187
|
+
ab-a-x
|
188
|
+
ab-a-x
|
189
|
+
abcd-efg
|
190
|
+
abcdefghi-012345678
|
191
|
+
abcdefghi-012345678
|
192
|
+
a-foo
|
193
|
+
a-Hant-ZH
|
194
|
+
a-value
|
195
|
+
a-x
|
196
|
+
b-fish
|
197
|
+
en-enx-eny-enz-enw
|
198
|
+
en-UK-oed
|
199
|
+
en-US-Latn
|
200
|
+
f
|
201
|
+
f-Latn
|
202
|
+
fr-Latn-F
|
203
|
+
overlongone
|
204
|
+
tlh-a-b-foo
|
205
|
+
|
206
|
+
i-notexist # grandfathered but not registered: invalid, even if we only test well-formedness
|
207
|
+
|
208
|
+
# the following have multiple singletons
|
209
|
+
ab-a-abc-a-abc
|
210
|
+
en-a-bbb-a-ccc # 'a' appears twice
|
211
|
+
ab-c-abc-r-toto-c-abc # 'c' appears twice
|
212
|
+
|
213
|
+
#mechanically generated ill-formed items
|
214
|
+
EdY-z_H791Xx6_m_kj
|
215
|
+
qWt85_8S0-L_rbBDq0gl_m_O_zsAx_nRS
|
216
|
+
VzyL2
|
217
|
+
T_VFJq-L-0JWuH_u2_VW-hK-kbE
|
218
|
+
u-t
|
219
|
+
Q-f_ZVJXyc-doj_k-i
|
220
|
+
JWB7gNa_K-5GB-25t_W-s-ZbGVwDu1-H3E
|
221
|
+
b-2T-Qob_L-C9v_2CZxK86
|
222
|
+
fQTpX_0_4Vg_L3L_g7VtALh2
|
223
|
+
S-Z-E_J
|
224
|
+
f6wsq-02_i-F
|
225
|
+
9_GcUPq_G
|
226
|
+
QjsIy_9-0-7_Dv2yPV09_D-JXWXM
|
227
|
+
D_se-f-k
|
228
|
+
ON47Wv1_2_W
|
229
|
+
f-z-R_s-ha
|
230
|
+
N3APeiw_195_Bx2-mM-pf-Z-Ip5lXWa-5r
|
231
|
+
IRjxU-E_6kS_D_b1b_H
|
232
|
+
NB-3-5-AyW_FQ-9hB-TrRJg3JV_3C
|
233
|
+
yF-3a_V_FoJQAHeL_Z-Mc-u
|
234
|
+
n_w_bbunOG_1-s-tJMT5je
|
235
|
+
Q-AEWE_X
|
236
|
+
57b1O_k_R6MU_sb
|
237
|
+
hK_65J_i-o_SI-Y
|
238
|
+
wB4B7u_5I2_I_NZPI
|
239
|
+
J24Nb_q_d-zE
|
240
|
+
v6-dHjJmvPS_IEb-x_A-O-i
|
241
|
+
8_8_dl-ZgBr84u-P-E
|
242
|
+
nIn-xD7EVhe_C
|
243
|
+
5_N-6P_x7Of_Lo_6_YX_R
|
244
|
+
0_46Oo0sZ-YNwiU8Wr_d-M-pg1OriV
|
245
|
+
laiY-5
|
246
|
+
K-8Mdd-j_ila0sSpo_aO8_J
|
247
|
+
wNATtSL-Cp4_gPa_fD41_9z
|
248
|
+
H_FGz5V8_n6rrcoz0_1O6d-kH-7-N
|
249
|
+
wDOrnHU-odqJ_vWl
|
250
|
+
gP_qO-I-jH
|
251
|
+
h
|
252
|
+
dJ0hX-o_csBykEhU-F
|
253
|
+
L-Vf7_BV_eRJ5goSF_Kp
|
254
|
+
y-oF-chnavU-H
|
255
|
+
9FkG-8Q-8_v
|
256
|
+
W_l_NDQqI-O_SFSAOVq
|
257
|
+
kDG3fzXw
|
258
|
+
t-nsSp-7-t-mUK2
|
259
|
+
Yw-F
|
260
|
+
1-S_3_l
|
261
|
+
u-v_brn-Y
|
262
|
+
4_ft_3ZPZC5lA_D
|
263
|
+
n_dR-QodsqJnh_e
|
264
|
+
Hwvt-bSwZwj_KL-hxg0m-3_hUG
|
265
|
+
mQHzvcV-UL-o2O_1KhUJQo_G2_uryk3-a
|
266
|
+
b-UTn33HF
|
267
|
+
r-Ep-jY-aFM_N_H
|
268
|
+
K-k-krEZ0gwD_k_ua-9dm3Oy-s_v
|
269
|
+
XS_oS-p
|
270
|
+
EIx_h-zf5
|
271
|
+
p_z-0_i-omQCo3B
|
272
|
+
1_q0N_jo_9
|
273
|
+
0Ai-6-S
|
274
|
+
L-LZEp_HtW
|
275
|
+
Zj-A4JD_2A5Aj7_b-m3
|
276
|
+
x
|
277
|
+
p-qPuXQpp_d-jeKifB-c-7_G-X
|
278
|
+
X94cvJ_A
|
279
|
+
F2D25R_qk_W-w_Okf_kx
|
280
|
+
rc-f
|
281
|
+
D
|
282
|
+
gD_WrDfxmF-wu-E-U4t
|
283
|
+
Z_BN9O4_D9-D_0E_KnCwZF-84b-19
|
284
|
+
T-8_g-u-0_E
|
285
|
+
lXTtys9j_X_A_m-vtNiNMw_X_b-C6Nr
|
286
|
+
V_Ps-4Y-S
|
287
|
+
X5wGEA
|
288
|
+
mIbHFf_ALu4_Jo1Z1
|
289
|
+
ET-TacYx_c
|
290
|
+
Z-Lm5cAP_ri88-d_q_fi8-x
|
291
|
+
rTi2ah-4j_j_4AlxTs6m_8-g9zqncIf-N5
|
292
|
+
FBaLB85_u-0NxhAy-ZU_9c
|
293
|
+
x_j_l-5_aV95_s_tY_jp4
|
294
|
+
PL768_D-m7jNWjfD-Nl_7qvb_bs_8_Vg
|
295
|
+
9-yOc-gbh
|
296
|
+
6DYxZ_SL-S_Ye
|
297
|
+
ZCa-U-muib-6-d-f_oEh_O
|
298
|
+
Qt-S-o8340F_f_aGax-c-jbV0gfK_p
|
299
|
+
WE_SzOI_OGuoBDk-gDp
|
300
|
+
cs-Y_9
|
301
|
+
m1_uj
|
302
|
+
Y-ob_PT
|
303
|
+
li-B
|
304
|
+
f-2-7-9m_f8den_J_T_d
|
305
|
+
p-Os0dua-H_o-u
|
306
|
+
L
|
307
|
+
rby-w
|
308
|
+
|
309
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# some unit tests for the Langtag class
|
2
|
+
|
3
|
+
# Copyright 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp);
|
4
|
+
# available under the same licence as Ruby itself
|
5
|
+
# (see http://www.ruby-lang.org/en/LICENSE.txt)
|
6
|
+
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
8
|
+
require 'langtag'
|
9
|
+
|
10
|
+
require 'test/unit'
|
11
|
+
class TestLangtag < Test::Unit::TestCase
|
12
|
+
# Test to make sure that for a language tag starting with x-,
|
13
|
+
# the whole thing is taken as a language part, rather than as a private part
|
14
|
+
# (we are using the principle that every (well-formed) language tag
|
15
|
+
# has to have a language part).
|
16
|
+
def test_private_only
|
17
|
+
s = Langtag.new('x-just-private-only')
|
18
|
+
assert_equal('x-just-private-only', s.language)
|
19
|
+
assert_equal(nil, s.script)
|
20
|
+
assert_equal(nil, s.region)
|
21
|
+
assert_equal([], s.variants)
|
22
|
+
assert_equal([], s.extensions)
|
23
|
+
assert_equal(nil, s.private) # this may be somewhat surprising
|
24
|
+
assert_equal(true, s.wellformed?)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Test a long language tag
|
28
|
+
# Shows some uses of += and -= to manipulate variants and extensions.
|
29
|
+
def test_long
|
30
|
+
s = Langtag.new('de-Latn-ch-p-abc-q-def-x-myself')
|
31
|
+
assert_equal('de', s.language)
|
32
|
+
assert_equal('Latn', s.script)
|
33
|
+
assert_equal('ch', s.region)
|
34
|
+
assert_equal([], s.variants)
|
35
|
+
assert_equal(['p-abc', 'q-def'], s.extensions)
|
36
|
+
assert_equal('x-myself', s.private)
|
37
|
+
s.variants += ['fonipa']
|
38
|
+
assert_equal(['fonipa'], s.variants)
|
39
|
+
assert_equal('de-Latn-ch-fonipa-p-abc-q-def-x-myself', s)
|
40
|
+
assert_equal(true, s.wellformed?)
|
41
|
+
s.extensions += ['p-again']
|
42
|
+
assert_equal(false, s.wellformed?)
|
43
|
+
s.extensions -= ['p-abc']
|
44
|
+
assert_equal(true, s.wellformed?)
|
45
|
+
assert_equal(['q-def', 'p-again'], s.extensions)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Some nice-casing tests
|
49
|
+
def test_nice
|
50
|
+
s = Langtag.new('DE-LATN-CH-P-ABC-Q-DEF-X-MYSELF')
|
51
|
+
s.nicecase!
|
52
|
+
assert_equal('de-Latn-CH-p-abc-q-def-x-myself', s)
|
53
|
+
s = Langtag.new('de-latn-ch-p-abc-q-def-x-myself')
|
54
|
+
assert_equal('de-Latn-CH-p-abc-q-def-x-myself', s.nicecase)
|
55
|
+
assert_equal('Latn', s.nicecase.script)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Mechanical tests for well-formedness, using file langtagTest.txt downloaded from
|
59
|
+
# http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/langtagTest.txt,
|
60
|
+
# which includes all kinds of tests, many of them collected from the
|
61
|
+
# ltru@ietf.org mailing list.
|
62
|
+
def test_langtagTest
|
63
|
+
wellformed = false # keep track of range in file
|
64
|
+
File.open(File.join(File.dirname(__FILE__), 'langtagTest.txt')) do |file|
|
65
|
+
file.each_with_index do |line, i|
|
66
|
+
tag = line.chomp.sub(/\#.*/, '').strip
|
67
|
+
if tag == 'WELL-FORMED'
|
68
|
+
wellformed = true
|
69
|
+
elsif tag == 'ILL-FORMED'
|
70
|
+
wellformed = false
|
71
|
+
elsif tag != ''
|
72
|
+
assert_equal(wellformed, Langtag.new(tag).wellformed?,
|
73
|
+
'langtagTest.txt, line: '+(i+1).to_s+'; tag: '+tag)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: langtag
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2007-03-27 00:00:00 +09:00
|
8
|
+
summary: "Support for IETF Language Tags (BCP 47, currently RFC 4646): Wellformedness check, read/write access to parts such as language, script, region, etc."
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: duerst@it.aoyama.ac.jp
|
12
|
+
homepage:
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: langtag
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Martin J. Du"rst
|
31
|
+
files:
|
32
|
+
- lib/langtag.rb
|
33
|
+
- test/langtagTest.txt
|
34
|
+
- test/test_langtag.rb
|
35
|
+
- README
|
36
|
+
test_files:
|
37
|
+
- test/test_langtag.rb
|
38
|
+
rdoc_options: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- README
|
42
|
+
executables: []
|
43
|
+
|
44
|
+
extensions: []
|
45
|
+
|
46
|
+
requirements: []
|
47
|
+
|
48
|
+
dependencies:
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: composite
|
51
|
+
version_requirement:
|
52
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 0.3.0
|
57
|
+
version:
|