lang 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,46 @@
1
+ == DESCRIPTION:
2
+
3
+ Language tags implementation.
4
+
5
+ == FEATURES:
6
+
7
+ * RFC5646 conformance
8
+ * Basic filtering (RFC 4647)
9
+ * Extended filtering (RFC 4647)
10
+ * Canonicalization
11
+ * Direct work with IANA language subtag registry
12
+
13
+ == EXAMPLES:
14
+
15
+ See examples directory:
16
+ http://github.com/SSDany/lang/tree/master/examples
17
+
18
+ == INSTALLATION:
19
+
20
+ $ gem in lang
21
+ $ lang update
22
+
23
+ == LICENSE:
24
+
25
+ (The MIT License)
26
+
27
+ Copyright (c) 2010
28
+
29
+ Permission is hereby granted, free of charge, to any person obtaining
30
+ a copy of this software and associated documentation files (the
31
+ 'Software'), to deal in the Software without restriction, including
32
+ without limitation the rights to use, copy, modify, merge, publish,
33
+ distribute, sublicense, and/or sell copies of the Software, and to
34
+ permit persons to whom the Software is furnished to do so, subject to
35
+ the following conditions:
36
+
37
+ The above copyright notice and this permission notice shall be
38
+ included in all copies or substantial portions of the Software.
39
+
40
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
41
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
43
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
44
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
45
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
46
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/bin/lang ADDED
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ dir = File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ $:.unshift(dir) unless $:.include?(dir)
5
+ require 'lang/subtags'
6
+
7
+ require 'net/http'
8
+ require 'tempfile'
9
+
10
+ module Lang
11
+ module Subtags
12
+ class Registry
13
+
14
+ NAME_REGEX = /^(?:#{SUBTAG}|#{TAG}):\s*([\w-]+)\s*$/io.freeze
15
+ TYPE_REGEX = /^#{TYPE}:\s*(\w+)\s*$/io.freeze
16
+
17
+ def initialize(path)
18
+ @path = File.expand_path(path)
19
+ end
20
+
21
+ def exists?
22
+ File.exists?("#{@path}.registry")
23
+ end
24
+
25
+ def download(uri)
26
+ FileUtils.mkdir_p(File.dirname(@path)) unless exists?
27
+ write("registry") { |temp| http(uri) { |chunk| temp << chunk }}
28
+ end
29
+
30
+ def build_indices
31
+ return false unless exists?
32
+
33
+ STDOUT << "Building indices\n"
34
+ calculate_indices
35
+ calculate_boundaries
36
+
37
+ write("indices") do |temp|
38
+ @boundaries.each do |boundary|
39
+ template = "%-#{boundary[-2]}s%#{boundary[-1] - boundary[-2] - 1}d\n"
40
+ @indices[boundary.first].to_a.sort.each { |k,v| temp << template % [k,v] }
41
+ end
42
+ end
43
+
44
+ write("boundaries") do |temp|
45
+ @boundaries.each do |boundary|
46
+ temp << "#{boundary.join(":")}\n"
47
+ end
48
+ end
49
+
50
+ STDOUT << "Done\n"
51
+ true
52
+ end
53
+
54
+ private
55
+
56
+ def write(dest, &block)
57
+
58
+ path = "#{@path}.#{dest}"
59
+ temp = Tempfile.new(dest)
60
+ temp.binmode
61
+ yield(temp) if block_given?
62
+ temp.close
63
+
64
+ # somewhat stolen from ActiveSupport
65
+
66
+ begin
67
+ old = File.stat(path)
68
+ rescue Errno::ENOENT
69
+ check = File.join(File.dirname(path), ".permissions_check.#{Thread.current.object_id}.#{Process.pid}.#{rand(1000000)}")
70
+ File.open(check, File::WRONLY | File::CREAT) { }
71
+ old = File.stat(check)
72
+ File.unlink(check)
73
+ end
74
+
75
+ FileUtils.mv(temp.path, "#{@path}.#{dest}")
76
+
77
+ File.chown(old.uid, old.gid, path)
78
+ File.chmod(old.mode, path)
79
+ nil
80
+ end
81
+
82
+ def http(uri)
83
+ STDOUT << "Downloading #{uri}\n"
84
+ Net::HTTP.get_response(URI(uri)) do |response|
85
+ total, size = response['Content-Length'].to_i, 0
86
+ response.read_body do |chunk|
87
+ size += chunk.size
88
+ yield(chunk) if block_given?
89
+ STDOUT << "\r%d%% done (%d of %d)" % [size*100/total, size, total]
90
+ STDOUT.flush
91
+ end
92
+ end
93
+ STDOUT << "\n"
94
+ nil
95
+ end
96
+
97
+ def calculate_boundaries
98
+ calculate_indices unless @indices
99
+ offset = 0
100
+ @boundaries = @indices.keys.sort{ |a,b| a.to_s <=> b.to_s }.map do |kind|
101
+ segment = @indices[kind]
102
+ boundary = []
103
+ boundary << kind
104
+ boundary << offset
105
+ boundary << segment.size - 1
106
+ boundary << segment.keys.map{ |s| s.size }.max
107
+ boundary << segment.values.max.to_s.size + boundary.last + 1
108
+ offset += segment.size * boundary.last
109
+ boundary
110
+ end
111
+ true
112
+ end
113
+
114
+ def calculate_indices
115
+ count = 0
116
+ kind, name = nil, nil
117
+ @indices = {}
118
+ File.open("#{@path}.registry", File::RDONLY) do |f|
119
+ f.each_line do |l|
120
+ if TYPE_REGEX === l
121
+ kind = $1.to_sym
122
+ @indices[kind] ||= {}
123
+ elsif kind && NAME_REGEX === l
124
+ name = $1.downcase
125
+ @indices[kind][name] = count
126
+ elsif l == SEPARATOR
127
+ kind, name = nil, nil
128
+ end
129
+ count += l.size
130
+ end
131
+ end
132
+ #STDOUT << "#{count}\n"
133
+ true
134
+ end
135
+
136
+ end
137
+ end
138
+ end
139
+
140
+ command = ARGV.shift
141
+ unless %w(reindex update).include?(command)
142
+ STDERR << "unknown command: #{command.inspect}\n"
143
+ exit 1
144
+ end
145
+
146
+ registry = Lang::Subtags::Registry.new(ARGV.shift || Lang::Subtags.registry_path)
147
+ registry.download("http://www.iana.org/assignments/language-subtag-registry") if command == 'update' || !registry.exists?
148
+ registry.build_indices
149
+
150
+ # EOF
@@ -0,0 +1,147 @@
1
+ require 'thread'
2
+ require 'lang/subtags/entry'
3
+ require 'lang/subtags/language'
4
+ require 'lang/subtags/extlang'
5
+ require 'lang/subtags/script'
6
+ require 'lang/subtags/region'
7
+ require 'lang/subtags/variant'
8
+ require 'lang/subtags/grandfathered'
9
+ require 'lang/subtags/redundant'
10
+
11
+ module Lang #:nodoc:
12
+ module Subtags
13
+
14
+ LOCK = Mutex.new
15
+ SEPARATOR = "%%\n".freeze
16
+ TYPE = "Type".freeze
17
+ SUBTAG = "Subtag".freeze
18
+ TAG = "Tag".freeze
19
+ ADDED = "Added".freeze
20
+ DEPRECATED = "Deprecated".freeze
21
+ DESCRIPTION = "Description".freeze
22
+ COMMENTS = "Comments".freeze
23
+ PREFIX = "Prefix".freeze
24
+ PREFERRED_VALUE = "Preferred-Value".freeze
25
+ MACROLANGUAGE = "Macrolanguage".freeze
26
+ SCOPE = "Scope".freeze
27
+ SUPPRESS_SCRIPT = "Suppress-Script".freeze
28
+ CONTINUE_REGEX = /\A\s\s/.freeze
29
+
30
+ COLON = ":".freeze
31
+ COLON_SPLITTER = RUBY_VERSION < '1.9.1' ? /\:/.freeze : COLON
32
+
33
+ SYM2CLASS = {}
34
+ Entry.subclasses.each do |subclass|
35
+ meth = subclass.to_s.gsub(/^.*::/,'')
36
+ kind = meth.downcase.to_sym
37
+ SYM2CLASS[kind] = subclass
38
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
39
+ def #{meth}(s)
40
+ entry(:#{kind},s)
41
+ end
42
+ EOS
43
+ end
44
+
45
+ def entry(kind, snippet)
46
+ return nil unless SYM2CLASS.include?(kind)
47
+ klass = SYM2CLASS[kind]
48
+ LOCK.synchronize {
49
+ if klass.entries.key?(snippet) ||
50
+ klass.entries.key?(snippet = snippet.downcase)
51
+ return klass.entries[snippet]
52
+ end
53
+ klass.entries[snippet] = load_entry(kind, snippet)
54
+ }
55
+ end
56
+
57
+ def close
58
+ LOCK.synchronize {
59
+ registry.close
60
+ indices.close
61
+ }
62
+ end
63
+
64
+ def search(kind, snippet)
65
+
66
+ lower = 0
67
+ offset, upper, t, r = *boundaries[kind]
68
+ target = snippet.ljust(t)
69
+
70
+ until upper < lower
71
+ middle = (lower+upper)/2
72
+ indices.seek(offset + middle*r, IO::SEEK_SET)
73
+ value = indices.read(t)
74
+ if value == target
75
+ return indices.read(r-t).to_i
76
+ elsif target < value
77
+ upper = middle-1
78
+ else
79
+ lower = middle+1
80
+ end
81
+ end
82
+ nil
83
+ end
84
+
85
+ def load_entry(kind, snippet)
86
+ amount = search(kind, snippet)
87
+ return nil unless amount
88
+ registry.seek(amount, IO::SEEK_SET)
89
+ thing = SYM2CLASS[kind].new
90
+ until registry.eof? || registry.readline == SEPARATOR
91
+
92
+ line = $_
93
+ thing.comments << $' && next if CONTINUE_REGEX === line
94
+ attribute, value = line.split(COLON_SPLITTER,2)
95
+ value.strip!
96
+
97
+ case attribute
98
+ when DESCRIPTION ; thing.add_description(value)
99
+ when PREFIX ; kind == :variant ? thing.add_prefix(value) : thing.prefix = value
100
+ when SUBTAG,TAG ; thing.name = value
101
+ when ADDED ; thing.added_at = value
102
+ when DEPRECATED ; thing.deprecated_at = value
103
+ when COMMENTS ; thing.comments = value
104
+ when PREFERRED_VALUE ; thing.preferred_value = value
105
+ when MACROLANGUAGE ; thing.macrolanguage = value
106
+ when SCOPE ; thing.scope = value
107
+ when SUPPRESS_SCRIPT ; thing.suppress_script = value
108
+ end
109
+
110
+ end
111
+ thing
112
+ end
113
+
114
+ def registry_path
115
+ @registry_path ||= File.join(File.dirname(__FILE__), "data", "language-subtag")
116
+ end
117
+
118
+ def registry
119
+ @registry ||= File.open("#{registry_path}.registry", File::RDONLY)
120
+ end
121
+
122
+ def indices
123
+ @indices ||= File.open("#{registry_path}.indices", File::RDONLY)
124
+ end
125
+
126
+ def boundaries
127
+ return @boundaries if @boundaries
128
+ @boundaries = {}
129
+ File.open("#{registry_path}.boundaries", File::RDONLY).each_line do |line|
130
+ boundary = line.split(COLON_SPLITTER)
131
+ @boundaries[boundary.shift.to_sym] = boundary.map { |b| b.to_i }
132
+ end
133
+ @boundaries
134
+ end
135
+
136
+ extend self
137
+
138
+ class << self
139
+ private :boundaries, :indices, :registry
140
+ private :load_entry
141
+ private :search
142
+ end
143
+
144
+ end
145
+ end
146
+
147
+ # EOF
@@ -0,0 +1,40 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ class Entry
4
+
5
+ attr_accessor :name,
6
+ :preferred_value,
7
+ :added_at,
8
+ :deprecated_at,
9
+ :comments
10
+
11
+ def deprecated?
12
+ !@deprecated_at.nil?
13
+ end
14
+
15
+ def description
16
+ @descriptions.join("\n") if @descriptions
17
+ end
18
+
19
+ def add_description(chunk)
20
+ @descriptions ||= []
21
+ @descriptions << chunk
22
+ end
23
+
24
+ def self.inherited(subclass)
25
+ subclasses << subclass
26
+ end
27
+
28
+ def self.subclasses
29
+ @subclasses ||= []
30
+ end
31
+
32
+ def self.entries
33
+ @entries ||= {}
34
+ end
35
+
36
+ end
37
+ end
38
+ end
39
+
40
+ # EOF
@@ -0,0 +1,19 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about extlang subtags.
4
+ class Extlang < Entry
5
+
6
+ attr_accessor :macrolanguage,
7
+ :suppress_script,
8
+ :prefix,
9
+ :scope
10
+
11
+ def macro
12
+ Subtags.entry(:language, macrolanguage) if macrolanguage
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+
19
+ # EOF
@@ -0,0 +1,9 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about grandfathered registrations.
4
+ class Grandfathered < Entry
5
+ end
6
+ end
7
+ end
8
+
9
+ # EOF
@@ -0,0 +1,18 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about primary language subtags.
4
+ class Language < Entry
5
+
6
+ attr_accessor :macrolanguage,
7
+ :suppress_script,
8
+ :scope
9
+
10
+ def macro
11
+ Subtags.entry(:language, macrolanguage) if macrolanguage
12
+ end
13
+
14
+ end
15
+ end
16
+ end
17
+
18
+ # EOF
@@ -0,0 +1,9 @@
1
+ module Lang #:nodoc:
2
+ module Subtags
3
+ # Holds data about redundant tags.
4
+ class Redundant < Entry
5
+ end
6
+ end
7
+ end
8
+
9
+ # EOF