lang 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +46 -0
- data/bin/lang +150 -0
- data/lib/lang/subtags.rb +147 -0
- data/lib/lang/subtags/entry.rb +40 -0
- data/lib/lang/subtags/extlang.rb +19 -0
- data/lib/lang/subtags/grandfathered.rb +9 -0
- data/lib/lang/subtags/language.rb +18 -0
- data/lib/lang/subtags/redundant.rb +9 -0
- data/lib/lang/subtags/region.rb +9 -0
- data/lib/lang/subtags/script.rb +9 -0
- data/lib/lang/subtags/variant.rb +17 -0
- data/lib/lang/tag.rb +141 -0
- data/lib/lang/tag/canonicalization.rb +376 -0
- data/lib/lang/tag/composition.rb +141 -0
- data/lib/lang/tag/filtering.rb +143 -0
- data/lib/lang/tag/grandfathered.rb +36 -0
- data/lib/lang/tag/langtag.rb +437 -0
- data/lib/lang/tag/lookup.rb +77 -0
- data/lib/lang/tag/pattern.rb +31 -0
- data/lib/lang/tag/privateuse.rb +34 -0
- data/lib/lang/version.rb +5 -0
- metadata +108 -0
data/README.rdoc
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
== DESCRIPTION:
|
2
|
+
|
3
|
+
Language tags implementation.
|
4
|
+
|
5
|
+
== FEATURES:
|
6
|
+
|
7
|
+
* RFC5646 conformance
|
8
|
+
* Basic filtering (RFC 4647)
|
9
|
+
* Extended filtering (RFC 4647)
|
10
|
+
* Canonicalization
|
11
|
+
* Direct work with IANA language subtag registry
|
12
|
+
|
13
|
+
== EXAMPLES:
|
14
|
+
|
15
|
+
See examples directory:
|
16
|
+
http://github.com/SSDany/lang/tree/master/examples
|
17
|
+
|
18
|
+
== INSTALLATION:
|
19
|
+
|
20
|
+
$ gem in lang
|
21
|
+
$ lang update
|
22
|
+
|
23
|
+
== LICENSE:
|
24
|
+
|
25
|
+
(The MIT License)
|
26
|
+
|
27
|
+
Copyright (c) 2010
|
28
|
+
|
29
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
30
|
+
a copy of this software and associated documentation files (the
|
31
|
+
'Software'), to deal in the Software without restriction, including
|
32
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
33
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
34
|
+
permit persons to whom the Software is furnished to do so, subject to
|
35
|
+
the following conditions:
|
36
|
+
|
37
|
+
The above copyright notice and this permission notice shall be
|
38
|
+
included in all copies or substantial portions of the Software.
|
39
|
+
|
40
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
41
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
42
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
43
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
44
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
45
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
46
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/bin/lang
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
dir = File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
$:.unshift(dir) unless $:.include?(dir)
|
5
|
+
require 'lang/subtags'
|
6
|
+
|
7
|
+
require 'net/http'
|
8
|
+
require 'tempfile'
|
9
|
+
|
10
|
+
module Lang
|
11
|
+
module Subtags
|
12
|
+
class Registry
|
13
|
+
|
14
|
+
NAME_REGEX = /^(?:#{SUBTAG}|#{TAG}):\s*([\w-]+)\s*$/io.freeze
|
15
|
+
TYPE_REGEX = /^#{TYPE}:\s*(\w+)\s*$/io.freeze
|
16
|
+
|
17
|
+
def initialize(path)
|
18
|
+
@path = File.expand_path(path)
|
19
|
+
end
|
20
|
+
|
21
|
+
def exists?
|
22
|
+
File.exists?("#{@path}.registry")
|
23
|
+
end
|
24
|
+
|
25
|
+
def download(uri)
|
26
|
+
FileUtils.mkdir_p(File.dirname(@path)) unless exists?
|
27
|
+
write("registry") { |temp| http(uri) { |chunk| temp << chunk }}
|
28
|
+
end
|
29
|
+
|
30
|
+
def build_indices
|
31
|
+
return false unless exists?
|
32
|
+
|
33
|
+
STDOUT << "Building indices\n"
|
34
|
+
calculate_indices
|
35
|
+
calculate_boundaries
|
36
|
+
|
37
|
+
write("indices") do |temp|
|
38
|
+
@boundaries.each do |boundary|
|
39
|
+
template = "%-#{boundary[-2]}s%#{boundary[-1] - boundary[-2] - 1}d\n"
|
40
|
+
@indices[boundary.first].to_a.sort.each { |k,v| temp << template % [k,v] }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
write("boundaries") do |temp|
|
45
|
+
@boundaries.each do |boundary|
|
46
|
+
temp << "#{boundary.join(":")}\n"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
STDOUT << "Done\n"
|
51
|
+
true
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def write(dest, &block)
|
57
|
+
|
58
|
+
path = "#{@path}.#{dest}"
|
59
|
+
temp = Tempfile.new(dest)
|
60
|
+
temp.binmode
|
61
|
+
yield(temp) if block_given?
|
62
|
+
temp.close
|
63
|
+
|
64
|
+
# somewhat stolen from ActiveSupport
|
65
|
+
|
66
|
+
begin
|
67
|
+
old = File.stat(path)
|
68
|
+
rescue Errno::ENOENT
|
69
|
+
check = File.join(File.dirname(path), ".permissions_check.#{Thread.current.object_id}.#{Process.pid}.#{rand(1000000)}")
|
70
|
+
File.open(check, File::WRONLY | File::CREAT) { }
|
71
|
+
old = File.stat(check)
|
72
|
+
File.unlink(check)
|
73
|
+
end
|
74
|
+
|
75
|
+
FileUtils.mv(temp.path, "#{@path}.#{dest}")
|
76
|
+
|
77
|
+
File.chown(old.uid, old.gid, path)
|
78
|
+
File.chmod(old.mode, path)
|
79
|
+
nil
|
80
|
+
end
|
81
|
+
|
82
|
+
def http(uri)
|
83
|
+
STDOUT << "Downloading #{uri}\n"
|
84
|
+
Net::HTTP.get_response(URI(uri)) do |response|
|
85
|
+
total, size = response['Content-Length'].to_i, 0
|
86
|
+
response.read_body do |chunk|
|
87
|
+
size += chunk.size
|
88
|
+
yield(chunk) if block_given?
|
89
|
+
STDOUT << "\r%d%% done (%d of %d)" % [size*100/total, size, total]
|
90
|
+
STDOUT.flush
|
91
|
+
end
|
92
|
+
end
|
93
|
+
STDOUT << "\n"
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
97
|
+
def calculate_boundaries
|
98
|
+
calculate_indices unless @indices
|
99
|
+
offset = 0
|
100
|
+
@boundaries = @indices.keys.sort{ |a,b| a.to_s <=> b.to_s }.map do |kind|
|
101
|
+
segment = @indices[kind]
|
102
|
+
boundary = []
|
103
|
+
boundary << kind
|
104
|
+
boundary << offset
|
105
|
+
boundary << segment.size - 1
|
106
|
+
boundary << segment.keys.map{ |s| s.size }.max
|
107
|
+
boundary << segment.values.max.to_s.size + boundary.last + 1
|
108
|
+
offset += segment.size * boundary.last
|
109
|
+
boundary
|
110
|
+
end
|
111
|
+
true
|
112
|
+
end
|
113
|
+
|
114
|
+
def calculate_indices
|
115
|
+
count = 0
|
116
|
+
kind, name = nil, nil
|
117
|
+
@indices = {}
|
118
|
+
File.open("#{@path}.registry", File::RDONLY) do |f|
|
119
|
+
f.each_line do |l|
|
120
|
+
if TYPE_REGEX === l
|
121
|
+
kind = $1.to_sym
|
122
|
+
@indices[kind] ||= {}
|
123
|
+
elsif kind && NAME_REGEX === l
|
124
|
+
name = $1.downcase
|
125
|
+
@indices[kind][name] = count
|
126
|
+
elsif l == SEPARATOR
|
127
|
+
kind, name = nil, nil
|
128
|
+
end
|
129
|
+
count += l.size
|
130
|
+
end
|
131
|
+
end
|
132
|
+
#STDOUT << "#{count}\n"
|
133
|
+
true
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
command = ARGV.shift
|
141
|
+
unless %w(reindex update).include?(command)
|
142
|
+
STDERR << "unknown command: #{command.inspect}\n"
|
143
|
+
exit 1
|
144
|
+
end
|
145
|
+
|
146
|
+
registry = Lang::Subtags::Registry.new(ARGV.shift || Lang::Subtags.registry_path)
|
147
|
+
registry.download("http://www.iana.org/assignments/language-subtag-registry") if command == 'update' || !registry.exists?
|
148
|
+
registry.build_indices
|
149
|
+
|
150
|
+
# EOF
|
data/lib/lang/subtags.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'lang/subtags/entry'
|
3
|
+
require 'lang/subtags/language'
|
4
|
+
require 'lang/subtags/extlang'
|
5
|
+
require 'lang/subtags/script'
|
6
|
+
require 'lang/subtags/region'
|
7
|
+
require 'lang/subtags/variant'
|
8
|
+
require 'lang/subtags/grandfathered'
|
9
|
+
require 'lang/subtags/redundant'
|
10
|
+
|
11
|
+
module Lang #:nodoc:
|
12
|
+
module Subtags
|
13
|
+
|
14
|
+
LOCK = Mutex.new
|
15
|
+
SEPARATOR = "%%\n".freeze
|
16
|
+
TYPE = "Type".freeze
|
17
|
+
SUBTAG = "Subtag".freeze
|
18
|
+
TAG = "Tag".freeze
|
19
|
+
ADDED = "Added".freeze
|
20
|
+
DEPRECATED = "Deprecated".freeze
|
21
|
+
DESCRIPTION = "Description".freeze
|
22
|
+
COMMENTS = "Comments".freeze
|
23
|
+
PREFIX = "Prefix".freeze
|
24
|
+
PREFERRED_VALUE = "Preferred-Value".freeze
|
25
|
+
MACROLANGUAGE = "Macrolanguage".freeze
|
26
|
+
SCOPE = "Scope".freeze
|
27
|
+
SUPPRESS_SCRIPT = "Suppress-Script".freeze
|
28
|
+
CONTINUE_REGEX = /\A\s\s/.freeze
|
29
|
+
|
30
|
+
COLON = ":".freeze
|
31
|
+
COLON_SPLITTER = RUBY_VERSION < '1.9.1' ? /\:/.freeze : COLON
|
32
|
+
|
33
|
+
SYM2CLASS = {}
|
34
|
+
Entry.subclasses.each do |subclass|
|
35
|
+
meth = subclass.to_s.gsub(/^.*::/,'')
|
36
|
+
kind = meth.downcase.to_sym
|
37
|
+
SYM2CLASS[kind] = subclass
|
38
|
+
class_eval(<<-EOS, __FILE__, __LINE__ + 1)
|
39
|
+
def #{meth}(s)
|
40
|
+
entry(:#{kind},s)
|
41
|
+
end
|
42
|
+
EOS
|
43
|
+
end
|
44
|
+
|
45
|
+
def entry(kind, snippet)
|
46
|
+
return nil unless SYM2CLASS.include?(kind)
|
47
|
+
klass = SYM2CLASS[kind]
|
48
|
+
LOCK.synchronize {
|
49
|
+
if klass.entries.key?(snippet) ||
|
50
|
+
klass.entries.key?(snippet = snippet.downcase)
|
51
|
+
return klass.entries[snippet]
|
52
|
+
end
|
53
|
+
klass.entries[snippet] = load_entry(kind, snippet)
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
def close
|
58
|
+
LOCK.synchronize {
|
59
|
+
registry.close
|
60
|
+
indices.close
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
def search(kind, snippet)
|
65
|
+
|
66
|
+
lower = 0
|
67
|
+
offset, upper, t, r = *boundaries[kind]
|
68
|
+
target = snippet.ljust(t)
|
69
|
+
|
70
|
+
until upper < lower
|
71
|
+
middle = (lower+upper)/2
|
72
|
+
indices.seek(offset + middle*r, IO::SEEK_SET)
|
73
|
+
value = indices.read(t)
|
74
|
+
if value == target
|
75
|
+
return indices.read(r-t).to_i
|
76
|
+
elsif target < value
|
77
|
+
upper = middle-1
|
78
|
+
else
|
79
|
+
lower = middle+1
|
80
|
+
end
|
81
|
+
end
|
82
|
+
nil
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_entry(kind, snippet)
|
86
|
+
amount = search(kind, snippet)
|
87
|
+
return nil unless amount
|
88
|
+
registry.seek(amount, IO::SEEK_SET)
|
89
|
+
thing = SYM2CLASS[kind].new
|
90
|
+
until registry.eof? || registry.readline == SEPARATOR
|
91
|
+
|
92
|
+
line = $_
|
93
|
+
thing.comments << $' && next if CONTINUE_REGEX === line
|
94
|
+
attribute, value = line.split(COLON_SPLITTER,2)
|
95
|
+
value.strip!
|
96
|
+
|
97
|
+
case attribute
|
98
|
+
when DESCRIPTION ; thing.add_description(value)
|
99
|
+
when PREFIX ; kind == :variant ? thing.add_prefix(value) : thing.prefix = value
|
100
|
+
when SUBTAG,TAG ; thing.name = value
|
101
|
+
when ADDED ; thing.added_at = value
|
102
|
+
when DEPRECATED ; thing.deprecated_at = value
|
103
|
+
when COMMENTS ; thing.comments = value
|
104
|
+
when PREFERRED_VALUE ; thing.preferred_value = value
|
105
|
+
when MACROLANGUAGE ; thing.macrolanguage = value
|
106
|
+
when SCOPE ; thing.scope = value
|
107
|
+
when SUPPRESS_SCRIPT ; thing.suppress_script = value
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
thing
|
112
|
+
end
|
113
|
+
|
114
|
+
def registry_path
|
115
|
+
@registry_path ||= File.join(File.dirname(__FILE__), "data", "language-subtag")
|
116
|
+
end
|
117
|
+
|
118
|
+
def registry
|
119
|
+
@registry ||= File.open("#{registry_path}.registry", File::RDONLY)
|
120
|
+
end
|
121
|
+
|
122
|
+
def indices
|
123
|
+
@indices ||= File.open("#{registry_path}.indices", File::RDONLY)
|
124
|
+
end
|
125
|
+
|
126
|
+
def boundaries
|
127
|
+
return @boundaries if @boundaries
|
128
|
+
@boundaries = {}
|
129
|
+
File.open("#{registry_path}.boundaries", File::RDONLY).each_line do |line|
|
130
|
+
boundary = line.split(COLON_SPLITTER)
|
131
|
+
@boundaries[boundary.shift.to_sym] = boundary.map { |b| b.to_i }
|
132
|
+
end
|
133
|
+
@boundaries
|
134
|
+
end
|
135
|
+
|
136
|
+
extend self
|
137
|
+
|
138
|
+
class << self
|
139
|
+
private :boundaries, :indices, :registry
|
140
|
+
private :load_entry
|
141
|
+
private :search
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# EOF
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Subtags
|
3
|
+
class Entry
|
4
|
+
|
5
|
+
attr_accessor :name,
|
6
|
+
:preferred_value,
|
7
|
+
:added_at,
|
8
|
+
:deprecated_at,
|
9
|
+
:comments
|
10
|
+
|
11
|
+
def deprecated?
|
12
|
+
!@deprecated_at.nil?
|
13
|
+
end
|
14
|
+
|
15
|
+
def description
|
16
|
+
@descriptions.join("\n") if @descriptions
|
17
|
+
end
|
18
|
+
|
19
|
+
def add_description(chunk)
|
20
|
+
@descriptions ||= []
|
21
|
+
@descriptions << chunk
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.inherited(subclass)
|
25
|
+
subclasses << subclass
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.subclasses
|
29
|
+
@subclasses ||= []
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.entries
|
33
|
+
@entries ||= {}
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# EOF
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Subtags
|
3
|
+
# Holds data about extlang subtags.
|
4
|
+
class Extlang < Entry
|
5
|
+
|
6
|
+
attr_accessor :macrolanguage,
|
7
|
+
:suppress_script,
|
8
|
+
:prefix,
|
9
|
+
:scope
|
10
|
+
|
11
|
+
def macro
|
12
|
+
Subtags.entry(:language, macrolanguage) if macrolanguage
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# EOF
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Lang #:nodoc:
|
2
|
+
module Subtags
|
3
|
+
# Holds data about primary language subtags.
|
4
|
+
class Language < Entry
|
5
|
+
|
6
|
+
attr_accessor :macrolanguage,
|
7
|
+
:suppress_script,
|
8
|
+
:scope
|
9
|
+
|
10
|
+
def macro
|
11
|
+
Subtags.entry(:language, macrolanguage) if macrolanguage
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# EOF
|