lang 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
@@ -0,0 +1,46 @@
1
+ == DESCRIPTION:
2
+
3
+ Language tags implementation.
4
+
5
+ == FEATURES:
6
+
7
+ * RFC5646 conformance
8
+ * Basic filtering (RFC 4647)
9
+ * Extended filtering (RFC 4647)
10
+ * Canonicalization
11
+ * Direct work with IANA language subtag registry
12
+
13
+ == EXAMPLES:
14
+
15
+ See examples directory:
16
+ http://github.com/SSDany/lang/tree/master/examples
17
+
18
+ == INSTALLATION:
19
+
20
+ $ gem in lang
21
+ $ lang update
22
+
23
+ == LICENSE:
24
+
25
+ (The MIT License)
26
+
27
+ Copyright (c) 2010
28
+
29
+ Permission is hereby granted, free of charge, to any person obtaining
30
+ a copy of this software and associated documentation files (the
31
+ 'Software'), to deal in the Software without restriction, including
32
+ without limitation the rights to use, copy, modify, merge, publish,
33
+ distribute, sublicense, and/or sell copies of the Software, and to
34
+ permit persons to whom the Software is furnished to do so, subject to
35
+ the following conditions:
36
+
37
+ The above copyright notice and this permission notice shall be
38
+ included in all copies or substantial portions of the Software.
39
+
40
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
41
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
43
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
44
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
45
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
46
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/bin/lang ADDED
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ dir = File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ $:.unshift(dir) unless $:.include?(dir)
5
+ require 'lang/subtags'
6
+
7
+ require 'net/http'
8
+ require 'tempfile'
9
+
10
+ module Lang
11
+ module Subtags
12
+ class Registry
13
+
14
+ NAME_REGEX = /^(?:#{SUBTAG}|#{TAG}):\s*([\w-]+)\s*$/io.freeze
15
+ TYPE_REGEX = /^#{TYPE}:\s*(\w+)\s*$/io.freeze
16
+
17
+ def initialize(path)
18
+ @path = File.expand_path(path)
19
+ end
20
+
21
+ def exists?
22
+ File.exists?("#{@path}.registry")
23
+ end
24
+
25
+ def download(uri)
26
+ FileUtils.mkdir_p(File.dirname(@path)) unless exists?
27
+ write("registry") { |temp| http(uri) { |chunk| temp << chunk }}
28
+ end
29
+
30
+ def build_indices
31
+ return false unless exists?
32
+
33
+ STDOUT << "Building indices\n"
34
+ calculate_indices
35
+ calculate_boundaries
36
+
37
+ write("indices") do |temp|
38
+ @boundaries.each do |boundary|
39
+ template = "%-#{boundary[-2]}s%#{boundary[-1] - boundary[-2] - 1}d\n"
40
+ @indices[boundary.first].to_a.sort.each { |k,v| temp << template % [k,v] }
41
+ end
42
+ end
43
+
44
+ write("boundaries") do |temp|
45
+ @boundaries.each do |boundary|
46
+ temp << "#{boundary.join(":")}\n"
47
+ end
48
+ end
49
+
50
+ STDOUT << "Done\n"
51
+ true
52
+ end
53
+
54
+ private
55
+
56
+ def write(dest, &block)
57
+
58
+ path = "#{@path}.#{dest}"
59
+ temp = Tempfile.new(dest)
60
+ temp.binmode
61
+ yield(temp) if block_given?
62
+ temp.close
63
+
64
+ # somewhat stolen from ActiveSupport
65
+
66
+ begin
67
+ old = File.stat(path)
68
+ rescue Errno::ENOENT
69
+ check = File.join(File.dirname(path), ".permissions_check.#{Thread.current.object_id}.#{Process.pid}.#{rand(1000000)}")
70
+ File.open(check, File::WRONLY | File::CREAT) { }
71
+ old = File.stat(check)
72
+ File.unlink(check)
73
+ end
74
+
75
+ FileUtils.mv(temp.path, "#{@path}.#{dest}")
76
+
77
+ File.chown(old.uid, old.gid, path)
78
+ File.chmod(old.mode, path)
79
+ nil
80
+ end
81
+
82
+ def http(uri)
83
+ STDOUT << "Downloading #{uri}\n"
84
+ Net::HTTP.get_response(URI(uri)) do |response|
85
+ total, size = response['Content-Length'].to_i, 0
86
+ response.read_body do |chunk|
87
+ size += chunk.size
88
+ yield(chunk) if block_given?
89
+ STDOUT << "\r%d%% done (%d of %d)" % [size*100/total, size, total]
90
+ STDOUT.flush
91
+ end
92
+ end
93
+ STDOUT << "\n"
94
+ nil
95
+ end
96
+
97
+ def calculate_boundaries
98
+ calculate_indices unless @indices
99
+ offset = 0
100
+ @boundaries = @indices.keys.sort{ |a,b| a.to_s <=> b.to_s }.map do |kind|
101
+ segment = @indices[kind]
102
+ boundary = []
103
+ boundary << kind
104
+ boundary << offset
105
+ boundary << segment.size - 1
106
+ boundary << segment.keys.map{ |s| s.size }.max
107
+ boundary << segment.values.max.to_s.size + boundary.last + 1
108
+ offset += segment.size * boundary.last
109
+ boundary
110
+ end
111
+ true
112
+ end
113
+
114
+ def calculate_indices
115
+ count = 0
116
+ kind, name = nil, nil
117
+ @indices = {}
118
+ File.open("#{@path}.registry", File::RDONLY) do |f|
119
+ f.each_line do |l|
120
+ if TYPE_REGEX === l
121
+ kind = $1.to_sym
122
+ @indices[kind] ||= {}
123
+ elsif kind && NAME_REGEX === l
124
+ name = $1.downcase
125
+ @indices[kind][name] = count
126
+ elsif l == SEPARATOR
127
+ kind, name = nil, nil
128
+ end
129
+ count += l.size
130
+ end
131
+ end
132
+ #STDOUT << "#{count}\n"
133
+ true
134
+ end
135
+
136
+ end
137
+ end
138
+ end
139
+
140
+ command = ARGV.shift
141
+ unless %w(reindex update).include?(command)
142
+ STDERR << "unknown command: #{command.inspect}\n"
143
+ exit 1
144
+ end
145
+
146
+ registry = Lang::Subtags::Registry.new(ARGV.shift || Lang::Subtags.registry_path)
147
+ registry.download("http://www.iana.org/assignments/language-subtag-registry") if command == 'update' || !registry.exists?
148
+ registry.build_indices
149
+
150
+ # EOF
@@ -0,0 +1,147 @@
1
+ require 'thread'
2
+ require 'lang/subtags/entry'
3
+ require 'lang/subtags/language'
4
+ require 'lang/subtags/extlang'
5
+ require 'lang/subtags/script'
6
+ require 'lang/subtags/region'
7
+ require 'lang/subtags/variant'
8
+ require 'lang/subtags/grandfathered'
9
+ require 'lang/subtags/redundant'
10
+
11
+ module Lang #:nodoc:
12
+ module Subtags
13
+
14
+ LOCK = Mutex.new
15
+ SEPARATOR = "%%\n".freeze
16
+ TYPE = "Type".freeze
17
+ SUBTAG = "Subtag".freeze
18
+ TAG = "Tag".freeze
19
+ ADDED = "Added".freeze
20
+ DEPRECATED = "Deprecated".freeze
21
+ DESCRIPTION = "Description".freeze
22
+ COMMENTS = "Comments".freeze
23
+ PREFIX = "Prefix".freeze
24
+ PREFERRED_VALUE = "Preferred-Value".freeze
25
+ MACROLANGUAGE = "Macrolanguage".freeze
26
+ SCOPE = "Scope".freeze
27
+ SUPPRESS_SCRIPT = "Suppress-Script".freeze
28
+ CONTINUE_REGEX = /\A\s\s/.freeze
29
+
30
+ COLON = ":".freeze
31
+ COLON_SPLITTER = RUBY_VERSION < '1.9.1' ? /\:/.freeze : COLON
32
+
33
+ SYM2CLASS = {}
34
+ Entry.subclasses.each do |subclass|
35
+ meth = subclass.to_s.gsub(/^.*::/,'')
36
+ kind = meth.downcase.to_sym
37
+ SYM2CLASS[kind] = subclass
38
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
39
+ def #{meth}(s)
40
+ entry(:#{kind},s)
41
+ end
42
+ EOS
43
+ end
44
+
45
+ def entry(kind, snippet)
46
+ return nil unless SYM2CLASS.include?(kind)
47
+ klass = SYM2CLASS[kind]
48
+ LOCK.synchronize {
49
+ if klass.entries.key?(snippet) ||
50
+ klass.entries.key?(snippet = snippet.downcase)
51
+ return klass.entries[snippet]
52
+ end
53
+ klass.entries[snippet] = load_entry(kind, snippet)
54
+ }
55
+ end
56
+
57
+ def close
58
+ LOCK.synchronize {
59
+ registry.close
60
+ indices.close
61
+ }
62
+ end
63
+
64
+ def search(kind, snippet)
65
+
66
+ lower = 0
67
+ offset, upper, t, r = *boundaries[kind]
68
+ target = snippet.ljust(t)
69
+
70
+ until upper < lower
71
+ middle = (lower+upper)/2
72
+ indices.seek(offset + middle*r, IO::SEEK_SET)
73
+ value = indices.read(t)
74
+ if value == target
75
+ return indices.read(r-t).to_i
76
+ elsif target < value
77
+ upper = middle-1
78
+ else
79
+ lower = middle+1
80
+ end
81
+ end
82
+ nil
83
+ end
84
+
85
+ def load_entry(kind, snippet)
86
+ amount = search(kind, snippet)
87
+ return nil unless amount
88
+ registry.seek(amount, IO::SEEK_SET)
89
+ thing = SYM2CLASS[kind].new
90
+ until registry.eof? || registry.readline == SEPARATOR
91
+
92
+ line = $_
93
+ thing.comments << $' && next if CONTINUE_REGEX === line
94
+ attribute, value = line.split(COLON_SPLITTER,2)
95
+ value.strip!
96
+
97
+ case attribute
98
+ when DESCRIPTION ; thing.add_description(value)
99
+ when PREFIX ; kind == :variant ? thing.add_prefix(value) : thing.prefix = value
100
+ when SUBTAG,TAG ; thing.name = value
101
+ when ADDED ; thing.added_at = value
102
+ when DEPRECATED ; thing.deprecated_at = value
103
+ when COMMENTS ; thing.comments = value
104
+ when PREFERRED_VALUE ; thing.preferred_value = value
105
+ when MACROLANGUAGE ; thing.macrolanguage = value
106
+ when SCOPE ; thing.scope = value
107
+ when SUPPRESS_SCRIPT ; thing.suppress_script = value
108
+ end
109
+
110
+ end
111
+ thing
112
+ end
113
+
114
+ def registry_path
115
+ @registry_path ||= File.join(File.dirname(__FILE__), "data", "language-subtag")
116
+ end
117
+
118
+ def registry
119
+ @registry ||= File.open("#{registry_path}.registry", File::RDONLY)
120
+ end
121
+
122
+ def indices
123
+ @indices ||= File.open("#{registry_path}.indices", File::RDONLY)
124
+ end
125
+
126
+ def boundaries
127
+ return @boundaries if @boundaries
128
+ @boundaries = {}
129
+ File.open("#{registry_path}.boundaries", File::RDONLY).each_line do |line|
130
+ boundary = line.split(COLON_SPLITTER)
131
+ @boundaries[boundary.shift.to_sym] = boundary.map { |b| b.to_i }
132
+ end
133
+ @boundaries
134
+ end
135
+
136
+ extend self
137
+
138
+ class << self
139
+ private :boundaries, :indices, :registry
140
+ private :load_entry
141
+ private :search
142
+ end
143
+
144
+ end
145
+ end
146
+
147
+ # EOF
@@ -0,0 +1,40 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ class Entry
4
+
5
+ attr_accessor :name,
6
+ :preferred_value,
7
+ :added_at,
8
+ :deprecated_at,
9
+ :comments
10
+
11
+ def deprecated?
12
+ !@deprecated_at.nil?
13
+ end
14
+
15
+ def description
16
+ @descriptions.join("\n") if @descriptions
17
+ end
18
+
19
+ def add_description(chunk)
20
+ @descriptions ||= []
21
+ @descriptions << chunk
22
+ end
23
+
24
+ def self.inherited(subclass)
25
+ subclasses << subclass
26
+ end
27
+
28
+ def self.subclasses
29
+ @subclasses ||= []
30
+ end
31
+
32
+ def self.entries
33
+ @entries ||= {}
34
+ end
35
+
36
+ end
37
+ end
38
+ end
39
+
40
+ # EOF
@@ -0,0 +1,19 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about extlang subtags.
4
+ class Extlang < Entry
5
+
6
+ attr_accessor :macrolanguage,
7
+ :suppress_script,
8
+ :prefix,
9
+ :scope
10
+
11
+ def macro
12
+ Subtags.entry(:language, macrolanguage) if macrolanguage
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+
19
+ # EOF
@@ -0,0 +1,9 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about grandfathered registrations.
4
+ class Grandfathered < Entry
5
+ end
6
+ end
7
+ end
8
+
9
+ # EOF
@@ -0,0 +1,18 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about primary language subtags.
4
+ class Language < Entry
5
+
6
+ attr_accessor :macrolanguage,
7
+ :suppress_script,
8
+ :scope
9
+
10
+ def macro
11
+ Subtags.entry(:language, macrolanguage) if macrolanguage
12
+ end
13
+
14
+ end
15
+ end
16
+ end
17
+
18
+ # EOF
@@ -0,0 +1,9 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about redundant tags.
4
+ class Redundant < Entry
5
+ end
6
+ end
7
+ end
8
+
9
+ # EOF