lang 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +46 -0
- data/bin/lang +150 -0
- data/lib/lang/subtags.rb +147 -0
- data/lib/lang/subtags/entry.rb +40 -0
- data/lib/lang/subtags/extlang.rb +19 -0
- data/lib/lang/subtags/grandfathered.rb +9 -0
- data/lib/lang/subtags/language.rb +18 -0
- data/lib/lang/subtags/redundant.rb +9 -0
- data/lib/lang/subtags/region.rb +9 -0
- data/lib/lang/subtags/script.rb +9 -0
- data/lib/lang/subtags/variant.rb +17 -0
- data/lib/lang/tag.rb +141 -0
- data/lib/lang/tag/canonicalization.rb +376 -0
- data/lib/lang/tag/composition.rb +141 -0
- data/lib/lang/tag/filtering.rb +143 -0
- data/lib/lang/tag/grandfathered.rb +36 -0
- data/lib/lang/tag/langtag.rb +437 -0
- data/lib/lang/tag/lookup.rb +77 -0
- data/lib/lang/tag/pattern.rb +31 -0
- data/lib/lang/tag/privateuse.rb +34 -0
- data/lib/lang/version.rb +5 -0
- metadata +108 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Tag
|
3
|
+
|
4
|
+
# Handles abstract compositions of subtags
|
5
|
+
# incl. basic and extended language-ranges.
|
6
|
+
#
|
7
|
+
# ==== Example
|
8
|
+
#
|
9
|
+
# class LanguageRange < Lang::Tag::Composition
|
10
|
+
#
|
11
|
+
# def initialize(thing)
|
12
|
+
# raise TypeError, "Can't convert #{thing.class} into String" unless thing.respond_to?(:to_str)
|
13
|
+
# sequence = thing.to_str
|
14
|
+
# unless /^(?:\*|[a-z]{1,8})(?:-[a-z\d]{1,8}|-\*)*$/i === sequence
|
15
|
+
# raise Error, "#{sequence.inspect} is not a language-range."
|
16
|
+
# end
|
17
|
+
# @sequence = sequence
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def simplify! # to basic language-range
|
21
|
+
# /^\*-/ === @sequence ? @sequence = '*' : @sequence.gsub!('-*','')
|
22
|
+
# dirty
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
class Composition
|
28
|
+
|
29
|
+
def initialize(thing)
|
30
|
+
raise TypeError, "Can't convert #{thing.class} into String" unless thing.respond_to?(:to_str)
|
31
|
+
@sequence = thing.to_str
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns +true+ if compositions are equal.
|
35
|
+
# Allows comparison against +Strings+.
|
36
|
+
#
|
37
|
+
def ===(other)
|
38
|
+
return false unless other.respond_to?(:to_str)
|
39
|
+
s = other.to_str
|
40
|
+
composition == s || composition == s.downcase
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns +true+ if Compositions are equal.
|
44
|
+
#
|
45
|
+
def ==(other)
|
46
|
+
return false unless other.kind_of?(self.class)
|
47
|
+
self.composition == other.composition
|
48
|
+
end
|
49
|
+
|
50
|
+
def eql?(other)
|
51
|
+
return false unless other.kind_of?(self.class)
|
52
|
+
self.to_s == other.to_s
|
53
|
+
end
|
54
|
+
|
55
|
+
def hash
|
56
|
+
to_s.hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def composition
|
60
|
+
@composition ||= to_s.downcase
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
@sequence
|
65
|
+
end
|
66
|
+
|
67
|
+
alias :to_str :to_s
|
68
|
+
|
69
|
+
def to_a
|
70
|
+
to_s.split(HYPHEN_SPLITTER)
|
71
|
+
end
|
72
|
+
|
73
|
+
def decomposition
|
74
|
+
@decomposition ||= composition.split(HYPHEN_SPLITTER)
|
75
|
+
end
|
76
|
+
|
77
|
+
private :decomposition
|
78
|
+
|
79
|
+
def dirty
|
80
|
+
@composition = nil
|
81
|
+
@decomposition = nil
|
82
|
+
nil
|
83
|
+
end
|
84
|
+
|
85
|
+
private :dirty
|
86
|
+
|
87
|
+
# Duplicates self.
|
88
|
+
#
|
89
|
+
def dup
|
90
|
+
self.class.new(to_s.dup)
|
91
|
+
end
|
92
|
+
|
93
|
+
def length
|
94
|
+
to_s.length
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the number of subtags in self.
|
98
|
+
#
|
99
|
+
def subtags_count
|
100
|
+
to_s.count(HYPHEN) + 1
|
101
|
+
end
|
102
|
+
|
103
|
+
#--
|
104
|
+
# RFC 5646, Section 2.1.1
|
105
|
+
# An implementation can reproduce this format without accessing the
|
106
|
+
# registry as follows. All subtags, including extension and private
|
107
|
+
# use subtags, use lowercase letters with two exceptions: two-letter
|
108
|
+
# and four-letter subtags that neither appear at the start of the tag
|
109
|
+
# nor occur after singletons. Such two-letter subtags are all
|
110
|
+
# uppercase (as in the tags "en-CA-x-ca" or "sgn-BE-FR") and four-
|
111
|
+
# letter subtags are titlecase (as in the tag "az-Latn-x-latn").
|
112
|
+
#++
|
113
|
+
|
114
|
+
def nicecase!
|
115
|
+
@sequence.downcase!
|
116
|
+
@sequence.gsub!(/-(?:([a-z\d]{4})|[a-z\d]{2}|[a-z\d]-.*)(?=-|$)/) do |sequence|
|
117
|
+
if $1
|
118
|
+
sequence = HYPHEN + $1.capitalize
|
119
|
+
elsif sequence.size == 3
|
120
|
+
sequence.upcase!
|
121
|
+
end
|
122
|
+
sequence
|
123
|
+
end
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
|
127
|
+
def nicecase
|
128
|
+
duplicated = self.dup
|
129
|
+
duplicated.nicecase!
|
130
|
+
duplicated
|
131
|
+
end
|
132
|
+
|
133
|
+
def inspect
|
134
|
+
sprintf("#<%s:%#0x %s>", self.class.to_s, self.object_id, self.to_s)
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# EOF
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require 'lang/tag'
|
2
|
+
|
3
|
+
module Lang #:nodoc:
|
4
|
+
module Tag
|
5
|
+
|
6
|
+
# Basic and extended filtering.
|
7
|
+
# RFC 4647, Sections 3.3.1, 3.3.2.
|
8
|
+
#
|
9
|
+
module Filtering
|
10
|
+
|
11
|
+
WILDCARD = '*'.freeze
|
12
|
+
|
13
|
+
#--
|
14
|
+
# RFC 4647, Section 3.3.2 ('Extended Filtering')
|
15
|
+
#
|
16
|
+
# Much like basic filtering, extended filtering selects content with
|
17
|
+
# arbitrarily long tags that share the same initial subtags as the
|
18
|
+
# language range. In addition, extended filtering selects language
|
19
|
+
# tags that contain any intermediate subtags not specified in the
|
20
|
+
# language range. For example, the extended language range "de-*-DE"
|
21
|
+
# (or its synonym "de-DE") matches all of the following tags:
|
22
|
+
#
|
23
|
+
# de-DE (German, as used in Germany)
|
24
|
+
# de-de (German, as used in Germany)
|
25
|
+
# de-Latn-DE (Latin script)
|
26
|
+
# de-Latf-DE (Fraktur variant of Latin script)
|
27
|
+
# de-DE-x-goethe (private-use subtag)
|
28
|
+
# de-Latn-DE-1996 (orthography of 1996)
|
29
|
+
# de-Deva-DE (Devanagari script)
|
30
|
+
#
|
31
|
+
# The same range does not match any of the following tags for the
|
32
|
+
# reasons shown:
|
33
|
+
#
|
34
|
+
# de (missing 'DE')
|
35
|
+
# de-x-DE (singleton 'x' occurs before 'DE')
|
36
|
+
# de-Deva ('Deva' not equal to 'DE')
|
37
|
+
#++
|
38
|
+
|
39
|
+
# Checks if the *extended* language-range (in the shortest notation)
|
40
|
+
# passed matches self.
|
41
|
+
#
|
42
|
+
# ==== Example
|
43
|
+
# Lang::Tag('de-DE').matched_by_extended_range?('de-*-DE) #=> true
|
44
|
+
# Lang::Tag('de-DE-x-goethe').matched_by_extended_range?('de-*-DE) #=> true
|
45
|
+
# Lang::Tag('de-Latn-DE').matched_by_extended_range?('de-*-DE) #=> true
|
46
|
+
# Lang::Tag('de-Latf-DE').matched_by_extended_range?('de-*-DE) #=> true
|
47
|
+
# Lang::Tag('de-x-DE').matched_by_extended_range?('de-*-DE) #=> false
|
48
|
+
# Lang::Tag('de-Deva').matched_by_extended_range?('de-*-DE) #=> false
|
49
|
+
#
|
50
|
+
def matched_by_extended_range?(range)
|
51
|
+
|
52
|
+
subtags = decomposition.dup
|
53
|
+
subranges = range.to_str.downcase.split(HYPHEN_SPLITTER)
|
54
|
+
|
55
|
+
subrange = subranges.shift
|
56
|
+
subtag = subtags.shift
|
57
|
+
|
58
|
+
while subrange
|
59
|
+
if subrange == WILDCARD
|
60
|
+
subrange = subranges.shift
|
61
|
+
elsif subtag == nil
|
62
|
+
return false
|
63
|
+
elsif subtag == subrange
|
64
|
+
subtag = subtags.shift
|
65
|
+
subrange = subranges.shift
|
66
|
+
elsif subtag.size == 1
|
67
|
+
return false
|
68
|
+
else
|
69
|
+
subtag = subtags.shift
|
70
|
+
end
|
71
|
+
end
|
72
|
+
true
|
73
|
+
rescue
|
74
|
+
false
|
75
|
+
end
|
76
|
+
|
77
|
+
#--
|
78
|
+
# RFC 4647, Section 3.3.1 ('Basic Filtering')
|
79
|
+
#
|
80
|
+
# A language range matches a
|
81
|
+
# particular language tag if, in a case-insensitive comparison, it
|
82
|
+
# exactly equals the tag, or if it exactly equals a prefix of the tag
|
83
|
+
# such that the first character following the prefix is "-". For
|
84
|
+
# example, the language-range "de-de" (German as used in Germany)
|
85
|
+
# matches the language tag "de-DE-1996" (German as used in Germany,
|
86
|
+
# orthography of 1996), but not the language tags "de-Deva" (German as
|
87
|
+
# written in the Devanagari script) or "de-Latn-DE" (German, Latin
|
88
|
+
# script, as used in Germany).
|
89
|
+
#++
|
90
|
+
|
91
|
+
# Checks if the *basic* language-range passed matches self.
|
92
|
+
#
|
93
|
+
# ==== Example
|
94
|
+
# tag = Lang::Tag('de-Latn-DE')
|
95
|
+
# tag.matched_by_basic_range?('de-Latn-DE') #=> true
|
96
|
+
# tag.matched_by_basic_range?('de-Latn') #=> true
|
97
|
+
# tag.matched_by_basic_range?('*') #=> true
|
98
|
+
# tag.matched_by_basic_range?('de-La') #=> false
|
99
|
+
# tag.matched_by_basic_range?('de-de') #=> false
|
100
|
+
# tag.matched_by_basic_range?('malformedlangtag') #=> false
|
101
|
+
#
|
102
|
+
def matched_by_basic_range?(range)
|
103
|
+
if range.kind_of?(Composition)
|
104
|
+
s = range.composition
|
105
|
+
elsif range.respond_to?(:to_str)
|
106
|
+
s = range.to_str.downcase
|
107
|
+
return true if s == WILDCARD
|
108
|
+
else
|
109
|
+
return false
|
110
|
+
end
|
111
|
+
|
112
|
+
composition == s ||
|
113
|
+
composition.index(s + HYPHEN) == 0
|
114
|
+
end
|
115
|
+
|
116
|
+
alias :has_prefix? :matched_by_basic_range?
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
#--
|
121
|
+
# Filtering is defined for the language tags only.
|
122
|
+
#
|
123
|
+
# RFC 4647, Section 3.3
|
124
|
+
# Filtering is used to select the set of language tags
|
125
|
+
# that matches a given language priority list.
|
126
|
+
#++
|
127
|
+
|
128
|
+
class Langtag
|
129
|
+
include Filtering
|
130
|
+
end
|
131
|
+
|
132
|
+
class Grandfathered
|
133
|
+
include Filtering
|
134
|
+
end
|
135
|
+
|
136
|
+
class Privateuse
|
137
|
+
include Filtering
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# EOF
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'lang/tag'
|
2
|
+
|
3
|
+
module Lang #:nodoc:
|
4
|
+
module Tag
|
5
|
+
|
6
|
+
def self.Grandfathered(thing)
|
7
|
+
return thing if Grandfathered === thing
|
8
|
+
Grandfathered.new(thing)
|
9
|
+
end
|
10
|
+
|
11
|
+
# Handles grandfathered registrations.
|
12
|
+
#
|
13
|
+
class Grandfathered < Composition
|
14
|
+
|
15
|
+
def initialize(thing)
|
16
|
+
raise TypeError, "Can't convert #{thing.class} into String" unless thing.respond_to?(:to_str)
|
17
|
+
sequence = thing.to_str
|
18
|
+
unless Lang::Tag.grandfathered?(sequence)
|
19
|
+
raise ArgumentError, "#{sequence.inspect} is not a grandfathered language tag"
|
20
|
+
end
|
21
|
+
@sequence = sequence
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_langtag
|
25
|
+
unless preferred_value = GRANDFATHERED[@sequence.downcase]
|
26
|
+
raise Error, "There is no preferred value for the grandfathered language tag #{@sequence.inspect}."
|
27
|
+
end
|
28
|
+
Tag::Langtag(preferred_value)
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# EOF
|
@@ -0,0 +1,437 @@
|
|
1
|
+
require 'lang/tag'
|
2
|
+
|
3
|
+
module Lang
|
4
|
+
module Tag
|
5
|
+
|
6
|
+
def self.Langtag(thing = nil)
|
7
|
+
return thing if Langtag === thing
|
8
|
+
Langtag.new(thing)
|
9
|
+
end
|
10
|
+
|
11
|
+
# Handles the 'langtag' production
|
12
|
+
# i.e normal language tags.
|
13
|
+
#
|
14
|
+
class Langtag < Composition
|
15
|
+
|
16
|
+
attr_reader :language, :script, :region, :variants_sequence, :extensions_sequence, :privateuse_sequence
|
17
|
+
|
18
|
+
def initialize(thing = nil)
|
19
|
+
recompose(thing) if thing
|
20
|
+
end
|
21
|
+
|
22
|
+
#--
|
23
|
+
# RFC 5646, sec. 2.2.1:
|
24
|
+
# The primary language subtag is the first subtag in a language tag and
|
25
|
+
# cannot be omitted, with two exceptions:
|
26
|
+
#
|
27
|
+
# The single-character subtag 'x' as the primary subtag indicates
|
28
|
+
# that the language tag consists solely of subtags whose meaning is
|
29
|
+
# defined by private agreement. For example, in the tag "x-fr-CH",
|
30
|
+
# the subtags 'fr' and 'CH' do not represent the French language or
|
31
|
+
# the country of Switzerland (or any other value in the IANA
|
32
|
+
# registry) unless there is a private agreement in place to do so.
|
33
|
+
# See Section 4.6.
|
34
|
+
#
|
35
|
+
# The single-character subtag 'i' is used by some grandfathered tags
|
36
|
+
# (see Section 2.2.8) such as "i-klingon" and "i-bnn". (Other
|
37
|
+
# grandfathered tags have a primary language subtag in their first
|
38
|
+
# position.)
|
39
|
+
#++
|
40
|
+
|
41
|
+
#--
|
42
|
+
# RFC 5646, sec. 2.2.2:
|
43
|
+
# Extended language subtags are used to identify certain specially
|
44
|
+
# selected languages that, for various historical and compatibility
|
45
|
+
# reasons, are closely identified with or tagged using an existing
|
46
|
+
# primary language subtag. Extended language subtags are always used
|
47
|
+
# with their enclosing primary language subtag (indicated with a
|
48
|
+
# 'Prefix' field in the registry) when used to form the language tag.
|
49
|
+
#++
|
50
|
+
|
51
|
+
# Sets the language component for this langtag.
|
52
|
+
#
|
53
|
+
def language=(value)
|
54
|
+
raise InvalidComponentError, "Primary subtag cannot be omitted." unless value
|
55
|
+
sequence = value.to_str
|
56
|
+
if LANGUAGE_REGEX !~ sequence
|
57
|
+
raise InvalidComponentError,
|
58
|
+
"#{value.inspect} does not conform to the 'language' ABNF " \
|
59
|
+
"or to the associated rules."
|
60
|
+
end
|
61
|
+
@language = sequence
|
62
|
+
@primary = nil
|
63
|
+
@extlang = nil
|
64
|
+
dirty
|
65
|
+
validate
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns a primary language subtag.
|
69
|
+
#
|
70
|
+
def primary
|
71
|
+
return nil unless @language
|
72
|
+
decompose_language unless @primary
|
73
|
+
@primary
|
74
|
+
end
|
75
|
+
|
76
|
+
# Returns a second component of the extended language, if any.
|
77
|
+
#
|
78
|
+
def extlang
|
79
|
+
return nil unless @language
|
80
|
+
decompose_language unless @primary
|
81
|
+
@extlang
|
82
|
+
end
|
83
|
+
|
84
|
+
# Decomposes a language component.
|
85
|
+
#
|
86
|
+
def decompose_language
|
87
|
+
@primary, @extlang = @language.split(HYPHEN_SPLITTER, 2)
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
|
91
|
+
protected :decompose_language
|
92
|
+
|
93
|
+
#--
|
94
|
+
# RFC 5646, sec. 2.2.3:
|
95
|
+
# Script subtags are used to indicate the script or writing system
|
96
|
+
# variations that distinguish the written forms of a language or its
|
97
|
+
# dialects.
|
98
|
+
#++
|
99
|
+
|
100
|
+
# Sets the script component for this langtag.
|
101
|
+
#
|
102
|
+
def script=(value)
|
103
|
+
subtag = value ? value.to_str : nil
|
104
|
+
if subtag && SCRIPT_REGEX !~ subtag
|
105
|
+
raise InvalidComponentError, "#{value.inspect} does not conform to the 'script' ABNF."
|
106
|
+
end
|
107
|
+
@script = subtag
|
108
|
+
dirty
|
109
|
+
validate
|
110
|
+
end
|
111
|
+
|
112
|
+
#--
|
113
|
+
# RFC 5646, sec. 2.2.4:
|
114
|
+
# Region subtags are used to indicate linguistic variations associated
|
115
|
+
# with or appropriate to a specific country, territory, or region.
|
116
|
+
# Typically, a region subtag is used to indicate variations such as
|
117
|
+
# regional dialects or usage, or region-specific spelling conventions.
|
118
|
+
# It can also be used to indicate that content is expressed in a way
|
119
|
+
# that is appropriate for use throughout a region, for instance,
|
120
|
+
# Spanish content tailored to be useful throughout Latin America.
|
121
|
+
#++
|
122
|
+
|
123
|
+
# Sets the region component for this langtag.
|
124
|
+
#
|
125
|
+
def region=(value)
|
126
|
+
subtag = value ? value.to_str : nil
|
127
|
+
if subtag && REGION_REGEX !~ subtag
|
128
|
+
raise InvalidComponentError, "#{value.inspect} does not conform to the 'region' ABNF."
|
129
|
+
end
|
130
|
+
@region = subtag
|
131
|
+
dirty
|
132
|
+
validate
|
133
|
+
end
|
134
|
+
|
135
|
+
#--
|
136
|
+
# RFC 5646, sec. 2.2.5:
|
137
|
+
# Variant subtags are used to indicate additional, well-recognized
|
138
|
+
# variations that define a language or its dialects that are not
|
139
|
+
# covered by other available subtags.
|
140
|
+
#++
|
141
|
+
|
142
|
+
# Sets the sequence of variants for this langtag.
|
143
|
+
#
|
144
|
+
# ==== Example
|
145
|
+
#
|
146
|
+
# tag = Lang::Tag('ja')
|
147
|
+
# tag.variants_sequence = 'hepburn-heploc'
|
148
|
+
# tag.variants #=> ['hepburn', 'heploc']
|
149
|
+
# tag.has_variant?('heploc') #=> true
|
150
|
+
# tag.has_variant?('nedis') #=> false
|
151
|
+
#
|
152
|
+
def variants_sequence=(value)
|
153
|
+
sequence = value ? value.to_str : nil
|
154
|
+
if sequence && VARIANTS_SEQUENCE_REGEX !~ "#{HYPHEN}#{sequence}"
|
155
|
+
raise InvalidComponentError, "#{value.inspect} does not conform to the 'variants' ABNF."
|
156
|
+
end
|
157
|
+
set_variants_sequence(sequence)
|
158
|
+
dirty
|
159
|
+
validate
|
160
|
+
end
|
161
|
+
|
162
|
+
# Friendly version of the #variants_sequence=.
|
163
|
+
# Sets the sequence of variants for this langtag.
|
164
|
+
#
|
165
|
+
# ==== Example
|
166
|
+
#
|
167
|
+
# tag = Lang::Tag('sl')
|
168
|
+
# tag.variants = ['rozaj', 'solba', '1994']
|
169
|
+
# tag.variants_sequence #=> 'rozaj-solba-1994'
|
170
|
+
# tag.variants #=> ['rozaj', 'solba', '1994']
|
171
|
+
#
|
172
|
+
def variants=(value)
|
173
|
+
subtags = Array(value).flatten
|
174
|
+
if subtags.empty?
|
175
|
+
self.variants_sequence = nil
|
176
|
+
else
|
177
|
+
self.variants_sequence = subtags.join(HYPHEN)
|
178
|
+
@variants = subtags
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Returns a list of variants of this lantag.
|
183
|
+
#
|
184
|
+
def variants
|
185
|
+
return nil unless @variants_sequence
|
186
|
+
@variants ||= @variants_sequence.split(HYPHEN_SPLITTER)
|
187
|
+
end
|
188
|
+
|
189
|
+
def set_variants_sequence(sequence)
|
190
|
+
if sequence && sequence.downcase.split(HYPHEN_SPLITTER).uniq!
|
191
|
+
raise InvalidComponentError, "#{sequence.inspect} sequence includes repeated variants."
|
192
|
+
end
|
193
|
+
@variants_sequence = sequence
|
194
|
+
@variants = nil
|
195
|
+
nil
|
196
|
+
end
|
197
|
+
|
198
|
+
protected :set_variants_sequence
|
199
|
+
|
200
|
+
# Checks if self has a variant or a sequence of
|
201
|
+
# variants passed. Works case-insensitively.
|
202
|
+
#
|
203
|
+
def has_variant?(sequence)
|
204
|
+
return false unless @variants_sequence
|
205
|
+
/(?:^|-)#{sequence}(?:-|$)/i === @variants_sequence
|
206
|
+
end
|
207
|
+
|
208
|
+
#--
|
209
|
+
# RFC 5646, sec. 2.2.6:
|
210
|
+
# Extensions provide a mechanism for extending language tags for use in
|
211
|
+
# various applications. They are intended to identify information that
|
212
|
+
# is commonly used in association with languages or language tags but
|
213
|
+
# that is not part of language identification.
|
214
|
+
#++
|
215
|
+
|
216
|
+
# Sets the sequence of extensions for this langtag.
|
217
|
+
#
|
218
|
+
def extensions_sequence=(value)
|
219
|
+
sequence = value ? value.to_str : nil
|
220
|
+
if sequence && EXTENSIONS_SEQUENCE_REGEX !~ "#{HYPHEN}#{sequence}"
|
221
|
+
raise InvalidComponentError, "#{value.inspect} does not conform to the 'extensions' ABNF."
|
222
|
+
end
|
223
|
+
set_extensions_sequence(sequence)
|
224
|
+
dirty
|
225
|
+
validate
|
226
|
+
end
|
227
|
+
|
228
|
+
# Friendly version of the #extensions_sequence=.
|
229
|
+
# Sets the sequence of extensions for this langtag.
|
230
|
+
#
|
231
|
+
def extensions=(value)
|
232
|
+
subtags = Array(value).flatten
|
233
|
+
self.extensions_sequence = subtags.empty? ? nil : subtags.join(HYPHEN)
|
234
|
+
end
|
235
|
+
|
236
|
+
def set_extensions_sequence(sequence)
|
237
|
+
if sequence
|
238
|
+
exthash = {}
|
239
|
+
sequence.split(EXTENSIONS_SEQUENCE_SPLITTER).each do |seq|
|
240
|
+
k,v = seq[0...1], seq[2..-1] # sequence.split(HYPHEN_SPLITTER,2)
|
241
|
+
k.downcase!
|
242
|
+
if exthash.key?(k)
|
243
|
+
raise InvalidComponentError, "#{sequence.inspect} sequence includes repeated singletons."
|
244
|
+
end
|
245
|
+
exthash[k] = v
|
246
|
+
end
|
247
|
+
@extensions_sequence = sequence
|
248
|
+
@extensions = exthash
|
249
|
+
else
|
250
|
+
@extensions_sequence = nil
|
251
|
+
@extensions = nil
|
252
|
+
end
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
256
|
+
protected :set_extensions_sequence
|
257
|
+
|
258
|
+
# Builds an *ordered* list of *downcased* singletons.
|
259
|
+
#
|
260
|
+
def singletons
|
261
|
+
return nil unless @extensions
|
262
|
+
keys = @extensions.keys
|
263
|
+
keys.sort!
|
264
|
+
keys
|
265
|
+
end
|
266
|
+
|
267
|
+
# Returns a sequense of subtags for a singleton passed.
|
268
|
+
# Works case-insensitively.
|
269
|
+
#
|
270
|
+
def extension(key)
|
271
|
+
return nil unless @extensions
|
272
|
+
sequence = @extensions[key] || @extensions[key = key.downcase]
|
273
|
+
return sequence unless String === sequence
|
274
|
+
@extensions[key] = sequence.split(HYPHEN) #lazy
|
275
|
+
@extensions[key]
|
276
|
+
end
|
277
|
+
|
278
|
+
# Checks if self has a singleton passed.
|
279
|
+
# Works case-insensitively.
|
280
|
+
#
|
281
|
+
def has_singleton?(key)
|
282
|
+
return false unless @extensions
|
283
|
+
@extensions.key?(key) || @extensions.key?(key.downcase)
|
284
|
+
end
|
285
|
+
|
286
|
+
alias :has_extension? :has_singleton?
|
287
|
+
|
288
|
+
#--
|
289
|
+
# RFC 5646, sec. 2.2.7:
|
290
|
+
# Private use subtags are used to indicate distinctions in language
|
291
|
+
# that are important in a given context by private agreement.
|
292
|
+
#
|
293
|
+
# RFC 5646, sec. 2.2.7:
|
294
|
+
# For example, suppose a group of scholars is studying some texts in
|
295
|
+
# medieval Greek. They might agree to use some collection of private
|
296
|
+
# use subtags to identify different styles of writing in the texts.
|
297
|
+
# For example, they might use 'el-x-koine' for documents in the
|
298
|
+
# "common" style while using 'el-x-attic' for other documents that
|
299
|
+
# mimic the Attic style. These subtags would not be recognized by
|
300
|
+
# outside processes or systems, but might be useful in categorizing
|
301
|
+
# various texts for study by those in the group.
|
302
|
+
#++
|
303
|
+
|
304
|
+
def privateuse
|
305
|
+
return nil unless @privateuse_sequence
|
306
|
+
@privateuse ||= @privateuse_sequence.split(HYPHEN)[1..-1]
|
307
|
+
end
|
308
|
+
|
309
|
+
# Friendly version of the #privateuse_sequence=.
|
310
|
+
# Sets the 'privateuse' sequence for this langtag.
|
311
|
+
#
|
312
|
+
# ==== Example
|
313
|
+
#
|
314
|
+
# tag = Lang::Tag('de')
|
315
|
+
# tag.privateuse = ['private', 'use', 'sequence']
|
316
|
+
# tag.privateuse_sequence #=> 'x-private-use-sequence'
|
317
|
+
#
|
318
|
+
def privateuse=(value)
|
319
|
+
subtags = Array(value).flatten
|
320
|
+
if subtags.empty?
|
321
|
+
self.privateuse_sequence = nil
|
322
|
+
else
|
323
|
+
self.privateuse_sequence = subtags.unshift(PRIVATEUSE).join(HYPHEN)
|
324
|
+
@privateuse = subtags
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
# Sets the 'privateuse' sequence for this langtag.
|
329
|
+
#
|
330
|
+
def privateuse_sequence=(value)
|
331
|
+
sequence = value ? value.to_str : nil
|
332
|
+
if sequence && Tag::PRIVATEUSE_REGEX !~ sequence
|
333
|
+
raise InvalidComponentError, "#{value.inspect} does not conform to the 'privateuse' ABNF."
|
334
|
+
end
|
335
|
+
@privateuse_sequence = sequence
|
336
|
+
@privateuse = nil
|
337
|
+
dirty
|
338
|
+
validate
|
339
|
+
end
|
340
|
+
|
341
|
+
def dirty
|
342
|
+
@sequence = nil
|
343
|
+
super
|
344
|
+
end
|
345
|
+
|
346
|
+
private :dirty
|
347
|
+
|
348
|
+
def defer_validation(&block)
|
349
|
+
raise LocalJumpError, "No block given." unless block
|
350
|
+
@validation_deferred = true
|
351
|
+
yield
|
352
|
+
@validation_deferred = false
|
353
|
+
validate
|
354
|
+
nil
|
355
|
+
end
|
356
|
+
|
357
|
+
def validate
|
358
|
+
return if !!@validation_deferred
|
359
|
+
if @language.nil?
|
360
|
+
raise InvalidComponentError, "Primary subtag cannot be omitted."
|
361
|
+
end
|
362
|
+
nil
|
363
|
+
end
|
364
|
+
|
365
|
+
private :validate
|
366
|
+
|
367
|
+
def nicecase!
|
368
|
+
|
369
|
+
# ugly, but faster than recompose
|
370
|
+
|
371
|
+
if @language && @language.downcase!
|
372
|
+
@primary = nil
|
373
|
+
@extlang = nil
|
374
|
+
end
|
375
|
+
|
376
|
+
# [ISO639-1] recommends that language codes be written in lowercase ('mn' Mongolian).
|
377
|
+
# [ISO15924] recommends that script codes use lowercase with the initial letter capitalized ('Cyrl' Cyrillic).
|
378
|
+
# [ISO3166-1] recommends that country codes be capitalized ('MN' Mongolia).
|
379
|
+
|
380
|
+
@script.capitalize! if @script
|
381
|
+
@region.upcase! if @region
|
382
|
+
|
383
|
+
@variants = nil if @variants_sequence &&
|
384
|
+
@variants_sequence.downcase!
|
385
|
+
|
386
|
+
set_extensions_sequence(@extensions_sequence) if @extensions_sequence &&
|
387
|
+
@extensions_sequence.downcase!
|
388
|
+
|
389
|
+
@privateuse = nil if @privateuse_sequence &&
|
390
|
+
@privateuse_sequence.downcase!
|
391
|
+
|
392
|
+
@sequence = nil
|
393
|
+
end
|
394
|
+
|
395
|
+
def to_s
|
396
|
+
return @sequence if @sequence
|
397
|
+
@sequence = ""
|
398
|
+
@sequence << @language if @language
|
399
|
+
@sequence << HYPHEN << @script if @script
|
400
|
+
@sequence << HYPHEN << @region if @region
|
401
|
+
@sequence << HYPHEN << @variants_sequence if @variants_sequence
|
402
|
+
@sequence << HYPHEN << @extensions_sequence if @extensions_sequence
|
403
|
+
@sequence << HYPHEN << @privateuse_sequence if @privateuse_sequence
|
404
|
+
@sequence
|
405
|
+
end
|
406
|
+
|
407
|
+
def recompose(thing)
|
408
|
+
|
409
|
+
raise TypeError, "Can't convert #{thing.class} into String" unless thing.respond_to?(:to_str)
|
410
|
+
tag = thing.to_str
|
411
|
+
|
412
|
+
if LANGTAG_REGEX === tag
|
413
|
+
|
414
|
+
dirty
|
415
|
+
|
416
|
+
@sequence = tag
|
417
|
+
@primary = nil
|
418
|
+
@extlang = nil
|
419
|
+
@language = $1
|
420
|
+
@script = $2
|
421
|
+
@region = $3
|
422
|
+
set_variants_sequence $4[1..-1]
|
423
|
+
set_extensions_sequence $5[1..-1]
|
424
|
+
@privateuse_sequence = $'[1..-1]
|
425
|
+
@privateuse = nil
|
426
|
+
|
427
|
+
else
|
428
|
+
raise ArgumentError, "Ill-formed, grandfathered or 'privateuse' language tag: #{thing.inspect}."
|
429
|
+
end
|
430
|
+
self
|
431
|
+
end
|
432
|
+
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
# EOF
|