lang 0.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +46 -0
- data/bin/lang +150 -0
- data/lib/lang/subtags.rb +147 -0
- data/lib/lang/subtags/entry.rb +40 -0
- data/lib/lang/subtags/extlang.rb +19 -0
- data/lib/lang/subtags/grandfathered.rb +9 -0
- data/lib/lang/subtags/language.rb +18 -0
- data/lib/lang/subtags/redundant.rb +9 -0
- data/lib/lang/subtags/region.rb +9 -0
- data/lib/lang/subtags/script.rb +9 -0
- data/lib/lang/subtags/variant.rb +17 -0
- data/lib/lang/tag.rb +141 -0
- data/lib/lang/tag/canonicalization.rb +376 -0
- data/lib/lang/tag/composition.rb +141 -0
- data/lib/lang/tag/filtering.rb +143 -0
- data/lib/lang/tag/grandfathered.rb +36 -0
- data/lib/lang/tag/langtag.rb +437 -0
- data/lib/lang/tag/lookup.rb +77 -0
- data/lib/lang/tag/pattern.rb +31 -0
- data/lib/lang/tag/privateuse.rb +34 -0
- data/lib/lang/version.rb +5 -0
- metadata +108 -0
data/README.rdoc
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
== DESCRIPTION:
|
2
|
+
|
3
|
+
Language tags implementation.
|
4
|
+
|
5
|
+
== FEATURES:
|
6
|
+
|
7
|
+
* RFC5646 conformance
|
8
|
+
* Basic filtering (RFC 4647)
|
9
|
+
* Extended filtering (RFC 4647)
|
10
|
+
* Canonicalization
|
11
|
+
* Direct work with IANA language subtag registry
|
12
|
+
|
13
|
+
== EXAMPLES:
|
14
|
+
|
15
|
+
See examples directory:
|
16
|
+
http://github.com/SSDany/lang/tree/master/examples
|
17
|
+
|
18
|
+
== INSTALLATION:
|
19
|
+
|
20
|
+
$ gem in lang
|
21
|
+
$ lang update
|
22
|
+
|
23
|
+
== LICENSE:
|
24
|
+
|
25
|
+
(The MIT License)
|
26
|
+
|
27
|
+
Copyright (c) 2010
|
28
|
+
|
29
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
30
|
+
a copy of this software and associated documentation files (the
|
31
|
+
'Software'), to deal in the Software without restriction, including
|
32
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
33
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
34
|
+
permit persons to whom the Software is furnished to do so, subject to
|
35
|
+
the following conditions:
|
36
|
+
|
37
|
+
The above copyright notice and this permission notice shall be
|
38
|
+
included in all copies or substantial portions of the Software.
|
39
|
+
|
40
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
41
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
42
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
43
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
44
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
45
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
46
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/bin/lang
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
dir = File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
$:.unshift(dir) unless $:.include?(dir)
|
5
|
+
require 'lang/subtags'
|
6
|
+
|
7
|
+
require 'net/http'
|
8
|
+
require 'tempfile'
|
9
|
+
|
10
|
+
module Lang
|
11
|
+
module Subtags
|
12
|
+
class Registry
|
13
|
+
|
14
|
+
NAME_REGEX = /^(?:#{SUBTAG}|#{TAG}):\s*([\w-]+)\s*$/io.freeze
|
15
|
+
TYPE_REGEX = /^#{TYPE}:\s*(\w+)\s*$/io.freeze
|
16
|
+
|
17
|
+
def initialize(path)
|
18
|
+
@path = File.expand_path(path)
|
19
|
+
end
|
20
|
+
|
21
|
+
def exists?
|
22
|
+
File.exists?("#{@path}.registry")
|
23
|
+
end
|
24
|
+
|
25
|
+
def download(uri)
|
26
|
+
FileUtils.mkdir_p(File.dirname(@path)) unless exists?
|
27
|
+
write("registry") { |temp| http(uri) { |chunk| temp << chunk }}
|
28
|
+
end
|
29
|
+
|
30
|
+
def build_indices
|
31
|
+
return false unless exists?
|
32
|
+
|
33
|
+
STDOUT << "Building indices\n"
|
34
|
+
calculate_indices
|
35
|
+
calculate_boundaries
|
36
|
+
|
37
|
+
write("indices") do |temp|
|
38
|
+
@boundaries.each do |boundary|
|
39
|
+
template = "%-#{boundary[-2]}s%#{boundary[-1] - boundary[-2] - 1}d\n"
|
40
|
+
@indices[boundary.first].to_a.sort.each { |k,v| temp << template % [k,v] }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
write("boundaries") do |temp|
|
45
|
+
@boundaries.each do |boundary|
|
46
|
+
temp << "#{boundary.join(":")}\n"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
STDOUT << "Done\n"
|
51
|
+
true
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def write(dest, &block)
|
57
|
+
|
58
|
+
path = "#{@path}.#{dest}"
|
59
|
+
temp = Tempfile.new(dest)
|
60
|
+
temp.binmode
|
61
|
+
yield(temp) if block_given?
|
62
|
+
temp.close
|
63
|
+
|
64
|
+
# somewhat stolen from ActiveSupport
|
65
|
+
|
66
|
+
begin
|
67
|
+
old = File.stat(path)
|
68
|
+
rescue Errno::ENOENT
|
69
|
+
check = File.join(File.dirname(path), ".permissions_check.#{Thread.current.object_id}.#{Process.pid}.#{rand(1000000)}")
|
70
|
+
File.open(check, File::WRONLY | File::CREAT) { }
|
71
|
+
old = File.stat(check)
|
72
|
+
File.unlink(check)
|
73
|
+
end
|
74
|
+
|
75
|
+
FileUtils.mv(temp.path, "#{@path}.#{dest}")
|
76
|
+
|
77
|
+
File.chown(old.uid, old.gid, path)
|
78
|
+
File.chmod(old.mode, path)
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
|
82
|
+
def http(uri)
|
83
|
+
STDOUT << "Downloading #{uri}\n"
|
84
|
+
Net::HTTP.get_response(URI(uri)) do |response|
|
85
|
+
total, size = response['Content-Length'].to_i, 0
|
86
|
+
response.read_body do |chunk|
|
87
|
+
size += chunk.size
|
88
|
+
yield(chunk) if block_given?
|
89
|
+
STDOUT << "\r%d%% done (%d of %d)" % [size*100/total, size, total]
|
90
|
+
STDOUT.flush
|
91
|
+
end
|
92
|
+
end
|
93
|
+
STDOUT << "\n"
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
97
|
+
def calculate_boundaries
|
98
|
+
calculate_indices unless @indices
|
99
|
+
offset = 0
|
100
|
+
@boundaries = @indices.keys.sort{ |a,b| a.to_s <=> b.to_s }.map do |kind|
|
101
|
+
segment = @indices[kind]
|
102
|
+
boundary = []
|
103
|
+
boundary << kind
|
104
|
+
boundary << offset
|
105
|
+
boundary << segment.size - 1
|
106
|
+
boundary << segment.keys.map{ |s| s.size }.max
|
107
|
+
boundary << segment.values.max.to_s.size + boundary.last + 1
|
108
|
+
offset += segment.size * boundary.last
|
109
|
+
boundary
|
110
|
+
end
|
111
|
+
true
|
112
|
+
end
|
113
|
+
|
114
|
+
def calculate_indices
|
115
|
+
count = 0
|
116
|
+
kind, name = nil, nil
|
117
|
+
@indices = {}
|
118
|
+
File.open("#{@path}.registry", File::RDONLY) do |f|
|
119
|
+
f.each_line do |l|
|
120
|
+
if TYPE_REGEX === l
|
121
|
+
kind = $1.to_sym
|
122
|
+
@indices[kind] ||= {}
|
123
|
+
elsif kind && NAME_REGEX === l
|
124
|
+
name = $1.downcase
|
125
|
+
@indices[kind][name] = count
|
126
|
+
elsif l == SEPARATOR
|
127
|
+
kind, name = nil, nil
|
128
|
+
end
|
129
|
+
count += l.size
|
130
|
+
end
|
131
|
+
end
|
132
|
+
#STDOUT << "#{count}\n"
|
133
|
+
true
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
command = ARGV.shift
|
141
|
+
unless %w(reindex update).include?(command)
|
142
|
+
STDERR << "unknown command: #{command.inspect}\n"
|
143
|
+
exit 1
|
144
|
+
end
|
145
|
+
|
146
|
+
registry = Lang::Subtags::Registry.new(ARGV.shift || Lang::Subtags.registry_path)
|
147
|
+
registry.download("http://www.iana.org/assignments/language-subtag-registry") if command == 'update' || !registry.exists?
|
148
|
+
registry.build_indices
|
149
|
+
|
150
|
+
# EOF
|
data/lib/lang/subtags.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'lang/subtags/entry'
|
3
|
+
require 'lang/subtags/language'
|
4
|
+
require 'lang/subtags/extlang'
|
5
|
+
require 'lang/subtags/script'
|
6
|
+
require 'lang/subtags/region'
|
7
|
+
require 'lang/subtags/variant'
|
8
|
+
require 'lang/subtags/grandfathered'
|
9
|
+
require 'lang/subtags/redundant'
|
10
|
+
|
11
|
+
module Lang #:nodoc:
|
12
|
+
module Subtags
|
13
|
+
|
14
|
+
LOCK = Mutex.new
|
15
|
+
SEPARATOR = "%%\n".freeze
|
16
|
+
TYPE = "Type".freeze
|
17
|
+
SUBTAG = "Subtag".freeze
|
18
|
+
TAG = "Tag".freeze
|
19
|
+
ADDED = "Added".freeze
|
20
|
+
DEPRECATED = "Deprecated".freeze
|
21
|
+
DESCRIPTION = "Description".freeze
|
22
|
+
COMMENTS = "Comments".freeze
|
23
|
+
PREFIX = "Prefix".freeze
|
24
|
+
PREFERRED_VALUE = "Preferred-Value".freeze
|
25
|
+
MACROLANGUAGE = "Macrolanguage".freeze
|
26
|
+
SCOPE = "Scope".freeze
|
27
|
+
SUPPRESS_SCRIPT = "Suppress-Script".freeze
|
28
|
+
CONTINUE_REGEX = /\A\s\s/.freeze
|
29
|
+
|
30
|
+
COLON = ":".freeze
|
31
|
+
COLON_SPLITTER = RUBY_VERSION < '1.9.1' ? /\:/.freeze : COLON
|
32
|
+
|
33
|
+
SYM2CLASS = {}
|
34
|
+
Entry.subclasses.each do |subclass|
|
35
|
+
meth = subclass.to_s.gsub(/^.*::/,'')
|
36
|
+
kind = meth.downcase.to_sym
|
37
|
+
SYM2CLASS[kind] = subclass
|
38
|
+
class_eval(<<-EOS, __FILE__, __LINE__ + 1)
|
39
|
+
def #{meth}(s)
|
40
|
+
entry(:#{kind},s)
|
41
|
+
end
|
42
|
+
EOS
|
43
|
+
end
|
44
|
+
|
45
|
+
def entry(kind, snippet)
|
46
|
+
return nil unless SYM2CLASS.include?(kind)
|
47
|
+
klass = SYM2CLASS[kind]
|
48
|
+
LOCK.synchronize {
|
49
|
+
if klass.entries.key?(snippet) ||
|
50
|
+
klass.entries.key?(snippet = snippet.downcase)
|
51
|
+
return klass.entries[snippet]
|
52
|
+
end
|
53
|
+
klass.entries[snippet] = load_entry(kind, snippet)
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def close
|
58
|
+
LOCK.synchronize {
|
59
|
+
registry.close
|
60
|
+
indices.close
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
def search(kind, snippet)
|
65
|
+
|
66
|
+
lower = 0
|
67
|
+
offset, upper, t, r = *boundaries[kind]
|
68
|
+
target = snippet.ljust(t)
|
69
|
+
|
70
|
+
until upper < lower
|
71
|
+
middle = (lower+upper)/2
|
72
|
+
indices.seek(offset + middle*r, IO::SEEK_SET)
|
73
|
+
value = indices.read(t)
|
74
|
+
if value == target
|
75
|
+
return indices.read(r-t).to_i
|
76
|
+
elsif target < value
|
77
|
+
upper = middle-1
|
78
|
+
else
|
79
|
+
lower = middle+1
|
80
|
+
end
|
81
|
+
end
|
82
|
+
nil
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_entry(kind, snippet)
|
86
|
+
amount = search(kind, snippet)
|
87
|
+
return nil unless amount
|
88
|
+
registry.seek(amount, IO::SEEK_SET)
|
89
|
+
thing = SYM2CLASS[kind].new
|
90
|
+
until registry.eof? || registry.readline == SEPARATOR
|
91
|
+
|
92
|
+
line = $_
|
93
|
+
thing.comments << $' && next if CONTINUE_REGEX === line
|
94
|
+
attribute, value = line.split(COLON_SPLITTER,2)
|
95
|
+
value.strip!
|
96
|
+
|
97
|
+
case attribute
|
98
|
+
when DESCRIPTION ; thing.add_description(value)
|
99
|
+
when PREFIX ; kind == :variant ? thing.add_prefix(value) : thing.prefix = value
|
100
|
+
when SUBTAG,TAG ; thing.name = value
|
101
|
+
when ADDED ; thing.added_at = value
|
102
|
+
when DEPRECATED ; thing.deprecated_at = value
|
103
|
+
when COMMENTS ; thing.comments = value
|
104
|
+
when PREFERRED_VALUE ; thing.preferred_value = value
|
105
|
+
when MACROLANGUAGE ; thing.macrolanguage = value
|
106
|
+
when SCOPE ; thing.scope = value
|
107
|
+
when SUPPRESS_SCRIPT ; thing.suppress_script = value
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
thing
|
112
|
+
end
|
113
|
+
|
114
|
+
def registry_path
|
115
|
+
@registry_path ||= File.join(File.dirname(__FILE__), "data", "language-subtag")
|
116
|
+
end
|
117
|
+
|
118
|
+
def registry
|
119
|
+
@registry ||= File.open("#{registry_path}.registry", File::RDONLY)
|
120
|
+
end
|
121
|
+
|
122
|
+
def indices
|
123
|
+
@indices ||= File.open("#{registry_path}.indices", File::RDONLY)
|
124
|
+
end
|
125
|
+
|
126
|
+
def boundaries
|
127
|
+
return @boundaries if @boundaries
|
128
|
+
@boundaries = {}
|
129
|
+
File.open("#{registry_path}.boundaries", File::RDONLY).each_line do |line|
|
130
|
+
boundary = line.split(COLON_SPLITTER)
|
131
|
+
@boundaries[boundary.shift.to_sym] = boundary.map { |b| b.to_i }
|
132
|
+
end
|
133
|
+
@boundaries
|
134
|
+
end
|
135
|
+
|
136
|
+
extend self
|
137
|
+
|
138
|
+
class << self
|
139
|
+
private :boundaries, :indices, :registry
|
140
|
+
private :load_entry
|
141
|
+
private :search
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# EOF
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Subtags
|
3
|
+
class Entry
|
4
|
+
|
5
|
+
attr_accessor :name,
|
6
|
+
:preferred_value,
|
7
|
+
:added_at,
|
8
|
+
:deprecated_at,
|
9
|
+
:comments
|
10
|
+
|
11
|
+
def deprecated?
|
12
|
+
!@deprecated_at.nil?
|
13
|
+
end
|
14
|
+
|
15
|
+
def description
|
16
|
+
@descriptions.join("\n") if @descriptions
|
17
|
+
end
|
18
|
+
|
19
|
+
def add_description(chunk)
|
20
|
+
@descriptions ||= []
|
21
|
+
@descriptions << chunk
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.inherited(subclass)
|
25
|
+
subclasses << subclass
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.subclasses
|
29
|
+
@subclasses ||= []
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.entries
|
33
|
+
@entries ||= {}
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# EOF
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Subtags
|
3
|
+
# Holds data about extlang subtags.
|
4
|
+
class Extlang < Entry
|
5
|
+
|
6
|
+
attr_accessor :macrolanguage,
|
7
|
+
:suppress_script,
|
8
|
+
:prefix,
|
9
|
+
:scope
|
10
|
+
|
11
|
+
def macro
|
12
|
+
Subtags.entry(:language, macrolanguage) if macrolanguage
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# EOF
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Subtags
|
3
|
+
# Holds data about primary language subtags.
|
4
|
+
class Language < Entry
|
5
|
+
|
6
|
+
attr_accessor :macrolanguage,
|
7
|
+
:suppress_script,
|
8
|
+
:scope
|
9
|
+
|
10
|
+
def macro
|
11
|
+
Subtags.entry(:language, macrolanguage) if macrolanguage
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# EOF
|