iso-codes 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg
2
+ doc
data/CHANGELOG ADDED
@@ -0,0 +1 @@
1
+ v0.1.0. initial release
data/README.rdoc ADDED
@@ -0,0 +1,73 @@
1
+ = iso-codes
2
+
3
+ A comprehensive database of ISO language and script codes.
4
+
5
+ == Installation
6
+
7
+ gem install iso-codes
8
+
9
+ == Usage
10
+
11
+ Language codes can be looked up using +find_language+:
12
+
13
+ require 'iso_codes'
14
+
15
+ language = ISOCodes.find_language('lav')
16
+ # ISO 639-3 reference name
17
+ language.reference_name
18
+ # ISO 639-3 identifier
19
+ language.identifier
20
+ # ISO 639-1 (alpha-2) identifier
21
+ language.alpha2
22
+ # ISO 639-2 (alpha-3) identifiers
23
+ language.alpha3_bibliographic
24
+ language.alpha3_terminology
25
+
26
+ === Macrolanguages and individual languages
27
+
28
+ ISO 639-3 defines language identifiers that covers a set of other language
29
+ identifiers. For example, the identifier +lav+ for Latvian corresponds to
30
+ +lvs+ for Standard Latvian and +ltg+ for Latgalian. These identifiers are
31
+ called macrolanguages (see http://www.sil.org/iso639-3/scope.asp#M for a
32
+ more formal definition). See http://www.sil.org/iso639-3/macrolanguages.asp
33
+ for an overview of such identifiers.
34
+
35
+ Macrolanguage identifiers can be looked up in the same way as other
36
+ language codes:
37
+
38
+ language = ISOCodes.find_language('lav')
39
+ language.reference_name
40
+ # => "Latvian"
41
+ language.class
42
+ # => ISOCodes::Macrolanguage
43
+ language.alpha3_bibliographic
44
+ # => "lav"
45
+ language.alpha2
46
+ # => "lv"
47
+
48
+ The individual languages covered by the identifier can be listed:
49
+
50
+ >> language.individual_languages.map { |l| l.identifier }
51
+ # => ["ltg", "lvs"]
52
+
53
+ For an individual language the macrolanguage, if any, can be found.
54
+
55
+ language = ISOCodes.find_language('ltg')
56
+ language.class
57
+ # => ISOCodes::IndividualLanguage
58
+ language.macrolanguage.identifier
59
+ # => "lav"
60
+
61
+ == TODO
62
+
63
+ * ISO 639-1 and ISO 639-2
64
+ * IANA language codes
65
+ * country/region codes (ISO 3166-1 through 3)
66
+ * currencies (ISO 4217:2001 and 2008)
67
+
68
+ If you feel like implementing any of this, please fork the project on
69
+ github.
70
+
71
+ = Copyright
72
+
73
+ Copyright (c) 2010 Marius L. Jøhndal.
data/README.todo ADDED
@@ -0,0 +1,39 @@
1
+ == Coverage
2
+
3
+ The database currently includes codes for the following domains
4
+
5
+ * languages: two letter (`alpha-2') and three letter (`alpha-3') ISO codes,
6
+ and human readable names in English and, for a subset, in French.
7
+
8
+ * scripts: four letter (`alpha-4') and numeric ISO codes, and human readable
9
+ names in English and French.
10
+
11
+ === Standards
12
+
13
+ * languages:
14
+
15
+ - ISO 639-1
16
+
17
+ - ISO 639-2:1998 Codes for the representation of names of languages-- Part 2: alpha-3 code.
18
+
19
+ Registration authority: http://www.loc.gov/standards/iso639-2/
20
+ Last update: 2008-07-08 (http://www.loc.gov/standards/iso639-2/php/code_changes.php)
21
+
22
+ - ISO 639-3:2007 Codes for the representation of names of languages -- Part 3: Alpha-3 code for comprehensive coverage of languages
23
+
24
+ Registration authority: http://www.sil.org/iso639-3/
25
+ Last update: 2008-07-11
26
+
27
+ - RFC 4646 and RFC 4647 (which together obsoleted RFC 3066 and RFC 1766)
28
+
29
+ Language subtag registry: http://www.iana.org/assignments/language-subtag-registry
30
+ Last update: 2008-07-23
31
+
32
+ * scripts:
33
+
34
+ - ISO 15924:2004 Codes for the representation of names of scripts
35
+
36
+ Registration authority: http://unicode.org/iso15924/
37
+ Last update: 2007-11-26 (http://unicode.org/iso15924/codechanges.html)
38
+
39
+ == License
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+
7
+ Jeweler::Tasks.new do |p|
8
+ p.name = "iso-codes"
9
+ p.summary = "ISO language and script code support"
10
+ p.description = "A database of ISO language and script codes."
11
+ p.authors = ['Marius L. Jøhndal']
12
+ p.email = "mariuslj (at) ifi [dot] uio (dot) no"
13
+ p.homepage = "http://github.com/mlj/iso-codes"
14
+ p.rubyforge_project = "iso-codes"
15
+ end
16
+ rescue LoadError
17
+ puts "Jeweler not available. Install it with: sudo gem install jeweler"
18
+ end
19
+
20
+ require 'rake/testtask'
21
+ Rake::TestTask.new(:test) do |test|
22
+ test.libs << 'lib' << 'test'
23
+ test.pattern = 'test/**/test_*.rb'
24
+ test.verbose = true
25
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/iso-codes.gemspec ADDED
@@ -0,0 +1,51 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{iso-codes}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Marius L. J\303\270hndal"]
12
+ s.date = %q{2010-06-29}
13
+ s.description = %q{A database of ISO language and script codes.}
14
+ s.email = %q{mariuslj (at) ifi [dot] uio (dot) no}
15
+ s.extra_rdoc_files = [
16
+ "README.rdoc",
17
+ "README.todo"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "CHANGELOG",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "iso-codes.gemspec",
26
+ "lib/iso-639-3-macrolanguages_20100128.tab.gz",
27
+ "lib/iso-639-3_20100330.tab.gz",
28
+ "lib/iso_codes.rb",
29
+ "test/test_iso_codes.rb"
30
+ ]
31
+ s.homepage = %q{http://github.com/mlj/iso-codes}
32
+ s.rdoc_options = ["--charset=UTF-8"]
33
+ s.require_paths = ["lib"]
34
+ s.rubyforge_project = %q{iso-codes}
35
+ s.rubygems_version = %q{1.3.6}
36
+ s.summary = %q{ISO language and script code support}
37
+ s.test_files = [
38
+ "test/test_iso_codes.rb"
39
+ ]
40
+
41
+ if s.respond_to? :specification_version then
42
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
46
+ else
47
+ end
48
+ else
49
+ end
50
+ end
51
+
Binary file
data/lib/iso_codes.rb ADDED
@@ -0,0 +1,178 @@
1
+ #
2
+ # iso_codes.rb - A comprehensive database of ISO codes
3
+ #
4
+ # Written by Marius L. Jøhndal, 2008, 2010.
5
+ #
6
+ require 'zlib'
7
+
8
+ module ISOCodes
9
+ # Version of the ISO 639-3 code set (see
10
+ # http://www.sil.org/iso639-3/download.asp).
11
+ ISO_639_3_VERSION = '20100330'
12
+
13
+ # Version of the ISO 639-3 macrolanguage mappings (see
14
+ # http://www.sil.org/iso639-3/download.asp).
15
+ ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION = '20100128'
16
+
17
+ class Language
18
+ # Returns the ISO 639-3 identifier.
19
+ attr_reader :identifier
20
+
21
+ # Returns the equivalent ISO 639-2 identifier of the bibliographic applications code set, or
22
+ # +nil+ if none is defined.
23
+ attr_reader :alpha3_bibliographic
24
+
25
+ # Returns the equivalent ISO 639-2 identifier of the terminology applications code set, or
26
+ # +nil+ if none is defined.
27
+ attr_reader :alpha3_terminology
28
+
29
+ # Returns the equivalent ISO 639-1 identifier, or +nil+ if none is defined.
30
+ attr_reader :alpha2
31
+
32
+ # Returns the language type. Language type is either <tt>:living</tt>,
33
+ # <tt>:extinct</tt>, <tt>:ancient</tt>, <tt>:historic</tt>,
34
+ # <tt>:constructed</tt>, or <tt>:special</tt>. See http://www.sil.org/iso639-3/types.asp
35
+ # for a description of these.
36
+ attr_reader :language_type
37
+
38
+ # Returns the reference language name.
39
+ attr_reader :reference_name
40
+
41
+ def initialize(identifier, part2b, part2t, part1, language_type, ref_name)
42
+ @identifier = identifier
43
+ @alpha3_bibliographic = part2b
44
+ @alpha3_terminology = part2t
45
+ @alpha2 = part1
46
+ @language_type = language_type
47
+ @reference_name = ref_name
48
+ end
49
+ end
50
+
51
+ class IndividualLanguage < Language
52
+ def initialize(individual_languages, macrolanguage, *rest)
53
+ super(*rest)
54
+
55
+ raise "Individual languages given for individual language" if individual_languages # FIXME
56
+ @macrolanguage = macrolanguage
57
+ end
58
+
59
+ # Returns the macrolanguage it is part of or +nil+ if not part of any
60
+ # macrolanguage.
61
+ def macrolanguage
62
+ ISOCodes::find_iso_639_3_language(@macrolanguage)
63
+ end
64
+ end
65
+
66
+ class Macrolanguage < Language
67
+ def initialize(individual_languages, macrolanguage, *rest)
68
+ super(*rest)
69
+
70
+ raise "Macrolanguage given for macrolanguage" if macrolanguage # FIXME
71
+ @individual_languages = individual_languages
72
+ end
73
+
74
+ # Returns an array of individual languages, otherwise an empty array.
75
+ def individual_languages
76
+ @individual_languages.map { |c| ISOCodes::find_iso_639_3_language(c) }
77
+ end
78
+ end
79
+
80
+ class << self
81
+ # Returns an object describing the language identified by the language
82
+ # code.
83
+ def find_language(code)
84
+ find_iso_639_3_language(code)
85
+ end
86
+
87
+ # Returns an object describing the language identified by an ISO 639-3
88
+ # identifier.
89
+ def find_iso_639_3_language(code)
90
+ if @@iso_639_3.has_key?(code)
91
+ klass, *rest = @@iso_639_3[code]
92
+ klass.new(*rest)
93
+ else
94
+ nil
95
+ end
96
+ end
97
+ end
98
+
99
+ private
100
+
101
+ LANGUAGE_TYPES = {
102
+ 'L' => :living,
103
+ 'E' => :extinct,
104
+ 'A' => :ancient,
105
+ 'H' => :historic,
106
+ 'C' => :constructed,
107
+ 'S' => :special,
108
+ }.freeze
109
+
110
+ DATA_PATH = File.expand_path(File.dirname(__FILE__))
111
+
112
+ class << self
113
+ def get_data_filename(filename)
114
+ File.join(DATA_PATH, filename)
115
+ end
116
+
117
+ def read_data_file(filename, field_count, delimiter = /,\s*/, skip_first = false)
118
+ Zlib::GzipReader.open(get_data_filename(filename)).each_line do |l|
119
+ if skip_first
120
+ skip_first = false
121
+ next
122
+ end
123
+
124
+ yield l.chomp.split(delimiter, field_count)
125
+ end
126
+ end
127
+
128
+ def load_iso_639_3
129
+ data = {}
130
+
131
+ read_data_file("iso-639-3_#{ISO_639_3_VERSION}.tab.gz", 8, "\t", true) do |args|
132
+ identifier, part2b, part2t, part1, scope, language_type, ref_name, comment = args
133
+
134
+ # Sanity checks
135
+ raise ArgumentError, "missing identifier" if identifier.nil?
136
+ raise ArgumentError, "missing reference name" if ref_name.nil?
137
+
138
+ case scope
139
+ when 'I': klass = IndividualLanguage
140
+ when 'M': klass = Macrolanguage
141
+ when 'S': klass = nil # FIXME
142
+ else
143
+ raise ArgumentError, "invalid scope"
144
+ end
145
+
146
+ language_type = LANGUAGE_TYPES[language_type]
147
+ raise ArgumentError, "invalid language type" if language_type.nil?
148
+
149
+ data[identifier] = [klass, nil, nil, identifier, part2b, part2t, part1, language_type, ref_name]
150
+ end
151
+
152
+ read_data_file("iso-639-3-macrolanguages_#{ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION}.tab.gz", 3, "\t", true) do |args|
153
+ macrolanguage_identifier, individual_language_identifier, status = args
154
+
155
+ case status
156
+ when 'R': next #FIXME
157
+ when 'A':
158
+ else
159
+ raise ArgumentError, "invalid status"
160
+ end
161
+
162
+ # Add macrolanguage to the individual language
163
+ raise "individual language already has a macrolanguage " if data[individual_language_identifier][2]
164
+ data[individual_language_identifier][2] = macrolanguage_identifier
165
+
166
+ # Add individual language to macrolanguage
167
+ data[macrolanguage_identifier][1] ||= []
168
+ data[macrolanguage_identifier][1] << individual_language_identifier
169
+ end
170
+
171
+ data.each_pair { |k, v| v.freeze }
172
+
173
+ data
174
+ end
175
+ end
176
+
177
+ @@iso_639_3 = load_iso_639_3
178
+ end
@@ -0,0 +1,35 @@
1
+ require 'iso_codes'
2
+ require 'test/unit'
3
+
4
+ class ISO6393CodesTestCase < Test::Unit::TestCase
5
+ def test_find_language
6
+ l = ISOCodes::find_language("eng")
7
+ assert_not_nil l
8
+ assert_equal 'eng', l.identifier
9
+ assert_equal 'eng', l.alpha3_terminology
10
+ assert_equal 'eng', l.alpha3_bibliographic
11
+ assert_equal 'en', l.alpha2
12
+ assert_equal 'English', l.reference_name
13
+ end
14
+
15
+ def test_find_language_macrolanguage
16
+ l = ISOCodes::find_language("ara")
17
+ assert_not_nil l
18
+ assert_kind_of ISOCodes::Macrolanguage, l
19
+ assert l.individual_languages.any? { |c| c.identifier == 'arq' }
20
+ end
21
+
22
+ def test_doc_exx
23
+ language = ISOCodes.find_language('lav')
24
+ assert_equal 'Latvian', language.reference_name
25
+ assert_equal ISOCodes::Macrolanguage, language.class
26
+ assert_equal 'lav', language.alpha3_bibliographic
27
+ assert_equal 'lv', language.alpha2
28
+
29
+ assert_equal ["ltg", "lvs"], language.individual_languages.map { |l| l.identifier }
30
+
31
+ language = ISOCodes.find_language('ltg')
32
+ assert_equal ISOCodes::IndividualLanguage, language.class
33
+ assert_equal "lav", language.macrolanguage.identifier
34
+ end
35
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iso-codes
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - "Marius L. J\xC3\xB8hndal"
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-06-29 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: A database of ISO language and script codes.
22
+ email: mariuslj (at) ifi [dot] uio (dot) no
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ - README.todo
30
+ files:
31
+ - .gitignore
32
+ - CHANGELOG
33
+ - README.rdoc
34
+ - Rakefile
35
+ - VERSION
36
+ - iso-codes.gemspec
37
+ - lib/iso-639-3-macrolanguages_20100128.tab.gz
38
+ - lib/iso-639-3_20100330.tab.gz
39
+ - lib/iso_codes.rb
40
+ - test/test_iso_codes.rb
41
+ - README.todo
42
+ has_rdoc: true
43
+ homepage: http://github.com/mlj/iso-codes
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project: iso-codes
68
+ rubygems_version: 1.3.6
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: ISO language and script code support
72
+ test_files:
73
+ - test/test_iso_codes.rb