iso-codes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg
2
+ doc
data/CHANGELOG ADDED
@@ -0,0 +1 @@
1
+ v0.1.0. initial release
data/README.rdoc ADDED
@@ -0,0 +1,73 @@
1
+ = iso-codes
2
+
3
+ A comprehensive database of ISO language and script codes.
4
+
5
+ == Installation
6
+
7
+ gem install iso-codes
8
+
9
+ == Usage
10
+
11
+ Language codes can be looked up using +find_language+:
12
+
13
+ require 'iso_codes'
14
+
15
+ language = ISOCodes.find_language('lav')
16
+ # ISO 639-3 reference name
17
+ language.reference_name
18
+ # ISO 639-3 identifier
19
+ language.identifier
20
+ # ISO 639-1 (alpha-2) identifier
21
+ language.alpha2
22
+ # ISO 639-2 (alpha-3) identifiers
23
+ language.alpha3_bibliographic
24
+ language.alpha3_terminology
25
+
26
+ === Macrolanguages and individual languages
27
+
28
+ ISO 639-3 defines language identifiers that covers a set of other language
29
+ identifiers. For example, the identifier +lav+ for Latvian corresponds to
30
+ +lvs+ for Standard Latvian and +ltg+ for Latgalian. These identifiers are
31
+ called macrolanguages (see http://www.sil.org/iso639-3/scope.asp#M for a
32
+ more formal definition). See http://www.sil.org/iso639-3/macrolanguages.asp
33
+ for an overview of such identifiers.
34
+
35
+ Macrolanguage identifiers can be looked up in the same way as other
36
+ language codes:
37
+
38
+ language = ISOCodes.find_language('lav')
39
+ language.reference_name
40
+ # => "Latvian"
41
+ language.class
42
+ # => ISOCodes::Macrolanguage
43
+ language.alpha3_bibliographic
44
+ # => "lav"
45
+ language.alpha2
46
+ # => "lv"
47
+
48
+ The individual languages covered by the identifier can be listed:
49
+
50
+ >> language.individual_languages.map { |l| l.identifier }
51
+ # => ["ltg", "lvs"]
52
+
53
+ For an individual language the macrolanguage, if any, can be found.
54
+
55
+ language = ISOCodes.find_language('ltg')
56
+ language.class
57
+ # => ISOCodes::IndividualLanguage
58
+ language.macrolanguage.identifier
59
+ # => "lav"
60
+
61
+ == TODO
62
+
63
+ * ISO 639-1 and ISO 639-2
64
+ * IANA language codes
65
+ * country/region codes (ISO 3166-1 through 3)
66
+ * currencies (ISO 4217:2001 and 2008)
67
+
68
+ If you feel like implementing any of this, please fork the project on
69
+ github.
70
+
71
+ = Copyright
72
+
73
+ Copyright (c) 2010 Marius L. Jøhndal.
data/README.todo ADDED
@@ -0,0 +1,39 @@
1
+ == Coverage
2
+
3
+ The database currently includes codes for the following domains
4
+
5
+ * languages: two letter (`alpha-2') and three letter (`alpha-3') ISO codes,
6
+ and human readable names in English and, for a subset, in French.
7
+
8
+ * scripts: four letter (`alpha-4') and numeric ISO codes, and human readable
9
+ names in English and French.
10
+
11
+ === Standards
12
+
13
+ * languages:
14
+
15
+ - ISO 639-1
16
+
17
+ - ISO 639-2:1998 Codes for the representation of names of languages-- Part 2: alpha-3 code.
18
+
19
+ Registration authority: http://www.loc.gov/standards/iso639-2/
20
+ Last update: 2008-07-08 (http://www.loc.gov/standards/iso639-2/php/code_changes.php)
21
+
22
+ - ISO 639-3:2007 Codes for the representation of names of languages -- Part 3: Alpha-3 code for comprehensive coverage of languages
23
+
24
+ Registration authority: http://www.sil.org/iso639-3/
25
+ Last update: 2008-07-11
26
+
27
+ - RFC 4646 and RFC 4647 (which together obsoleted RFC 3066 and RFC 1766)
28
+
29
+ Language subtag registry: http://www.iana.org/assignments/language-subtag-registry
30
+ Last update: 2008-07-23
31
+
32
+ * scripts:
33
+
34
+ - ISO 15924:2004 Codes for the representation of names of scripts
35
+
36
+ Registration authority: http://unicode.org/iso15924/
37
+ Last update: 2007-11-26 (http://unicode.org/iso15924/codechanges.html)
38
+
39
+ == License
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+
7
+ Jeweler::Tasks.new do |p|
8
+ p.name = "iso-codes"
9
+ p.summary = "ISO language and script code support"
10
+ p.description = "A database of ISO language and script codes."
11
+ p.authors = ['Marius L. Jøhndal']
12
+ p.email = "mariuslj (at) ifi [dot] uio (dot) no"
13
+ p.homepage = "http://github.com/mlj/iso-codes"
14
+ p.rubyforge_project = "iso-codes"
15
+ end
16
+ rescue LoadError
17
+ puts "Jeweler not available. Install it with: sudo gem install jeweler"
18
+ end
19
+
20
+ require 'rake/testtask'
21
+ Rake::TestTask.new(:test) do |test|
22
+ test.libs << 'lib' << 'test'
23
+ test.pattern = 'test/**/test_*.rb'
24
+ test.verbose = true
25
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/iso-codes.gemspec ADDED
@@ -0,0 +1,51 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{iso-codes}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Marius L. J\303\270hndal"]
12
+ s.date = %q{2010-06-29}
13
+ s.description = %q{A database of ISO language and script codes.}
14
+ s.email = %q{mariuslj (at) ifi [dot] uio (dot) no}
15
+ s.extra_rdoc_files = [
16
+ "README.rdoc",
17
+ "README.todo"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "CHANGELOG",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "iso-codes.gemspec",
26
+ "lib/iso-639-3-macrolanguages_20100128.tab.gz",
27
+ "lib/iso-639-3_20100330.tab.gz",
28
+ "lib/iso_codes.rb",
29
+ "test/test_iso_codes.rb"
30
+ ]
31
+ s.homepage = %q{http://github.com/mlj/iso-codes}
32
+ s.rdoc_options = ["--charset=UTF-8"]
33
+ s.require_paths = ["lib"]
34
+ s.rubyforge_project = %q{iso-codes}
35
+ s.rubygems_version = %q{1.3.6}
36
+ s.summary = %q{ISO language and script code support}
37
+ s.test_files = [
38
+ "test/test_iso_codes.rb"
39
+ ]
40
+
41
+ if s.respond_to? :specification_version then
42
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
46
+ else
47
+ end
48
+ else
49
+ end
50
+ end
51
+
Binary file
data/lib/iso_codes.rb ADDED
@@ -0,0 +1,178 @@
1
+ #
2
+ # iso_codes.rb - A comprehensive database of ISO codes
3
+ #
4
+ # Written by Marius L. Jøhndal, 2008, 2010.
5
+ #
6
+ require 'zlib'
7
+
8
+ module ISOCodes
9
+ # Version of the ISO 639-3 code set (see
10
+ # http://www.sil.org/iso639-3/download.asp).
11
+ ISO_639_3_VERSION = '20100330'
12
+
13
+ # Version of the ISO 639-3 macrolanguage mappings (see
14
+ # http://www.sil.org/iso639-3/download.asp).
15
+ ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION = '20100128'
16
+
17
+ class Language
18
+ # Returns the ISO 639-3 identifier.
19
+ attr_reader :identifier
20
+
21
+ # Returns the equivalent ISO 639-2 identifier of the bibliographic applications code set, or
22
+ # +nil+ if none is defined.
23
+ attr_reader :alpha3_bibliographic
24
+
25
+ # Returns the equivalent ISO 639-2 identifier of the terminology applications code set, or
26
+ # +nil+ if none is defined.
27
+ attr_reader :alpha3_terminology
28
+
29
+ # Returns the equivalent ISO 639-1 identifier, or +nil+ if none is defined.
30
+ attr_reader :alpha2
31
+
32
+ # Returns the language type. Language type is either <tt>:living</tt>,
33
+ # <tt>:extinct</tt>, <tt>:ancient</tt>, <tt>:historic</tt>,
34
+ # <tt>:constructed</tt>, or <tt>:special</tt>. See http://www.sil.org/iso639-3/types.asp
35
+ # for a description of these.
36
+ attr_reader :language_type
37
+
38
+ # Returns the reference language name.
39
+ attr_reader :reference_name
40
+
41
+ def initialize(identifier, part2b, part2t, part1, language_type, ref_name)
42
+ @identifier = identifier
43
+ @alpha3_bibliographic = part2b
44
+ @alpha3_terminology = part2t
45
+ @alpha2 = part1
46
+ @language_type = language_type
47
+ @reference_name = ref_name
48
+ end
49
+ end
50
+
51
+ class IndividualLanguage < Language
52
+ def initialize(individual_languages, macrolanguage, *rest)
53
+ super(*rest)
54
+
55
+ raise "Individual languages given for individual language" if individual_languages # FIXME
56
+ @macrolanguage = macrolanguage
57
+ end
58
+
59
+ # Returns the macrolanguage it is part of or +nil+ if not part of any
60
+ # macrolanguage.
61
+ def macrolanguage
62
+ ISOCodes::find_iso_639_3_language(@macrolanguage)
63
+ end
64
+ end
65
+
66
+ class Macrolanguage < Language
67
+ def initialize(individual_languages, macrolanguage, *rest)
68
+ super(*rest)
69
+
70
+ raise "Macrolanguage given for macrolanguage" if macrolanguage # FIXME
71
+ @individual_languages = individual_languages
72
+ end
73
+
74
+ # Returns an array of individual languages, otherwise an empty array.
75
+ def individual_languages
76
+ @individual_languages.map { |c| ISOCodes::find_iso_639_3_language(c) }
77
+ end
78
+ end
79
+
80
+ class << self
81
+ # Returns an object describing the language identified by the language
82
+ # code.
83
+ def find_language(code)
84
+ find_iso_639_3_language(code)
85
+ end
86
+
87
+ # Returns an object describing the language identified by an ISO 639-3
88
+ # identifier.
89
+ def find_iso_639_3_language(code)
90
+ if @@iso_639_3.has_key?(code)
91
+ klass, *rest = @@iso_639_3[code]
92
+ klass.new(*rest)
93
+ else
94
+ nil
95
+ end
96
+ end
97
+ end
98
+
99
+ private
100
+
101
+ LANGUAGE_TYPES = {
102
+ 'L' => :living,
103
+ 'E' => :extinct,
104
+ 'A' => :ancient,
105
+ 'H' => :historic,
106
+ 'C' => :constructed,
107
+ 'S' => :special,
108
+ }.freeze
109
+
110
+ DATA_PATH = File.expand_path(File.dirname(__FILE__))
111
+
112
+ class << self
113
+ def get_data_filename(filename)
114
+ File.join(DATA_PATH, filename)
115
+ end
116
+
117
+ def read_data_file(filename, field_count, delimiter = /,\s*/, skip_first = false)
118
+ Zlib::GzipReader.open(get_data_filename(filename)).each_line do |l|
119
+ if skip_first
120
+ skip_first = false
121
+ next
122
+ end
123
+
124
+ yield l.chomp.split(delimiter, field_count)
125
+ end
126
+ end
127
+
128
+ def load_iso_639_3
129
+ data = {}
130
+
131
+ read_data_file("iso-639-3_#{ISO_639_3_VERSION}.tab.gz", 8, "\t", true) do |args|
132
+ identifier, part2b, part2t, part1, scope, language_type, ref_name, comment = args
133
+
134
+ # Sanity checks
135
+ raise ArgumentError, "missing identifier" if identifier.nil?
136
+ raise ArgumentError, "missing reference name" if ref_name.nil?
137
+
138
+ case scope
139
+ when 'I': klass = IndividualLanguage
140
+ when 'M': klass = Macrolanguage
141
+ when 'S': klass = nil # FIXME
142
+ else
143
+ raise ArgumentError, "invalid scope"
144
+ end
145
+
146
+ language_type = LANGUAGE_TYPES[language_type]
147
+ raise ArgumentError, "invalid language type" if language_type.nil?
148
+
149
+ data[identifier] = [klass, nil, nil, identifier, part2b, part2t, part1, language_type, ref_name]
150
+ end
151
+
152
+ read_data_file("iso-639-3-macrolanguages_#{ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION}.tab.gz", 3, "\t", true) do |args|
153
+ macrolanguage_identifier, individual_language_identifier, status = args
154
+
155
+ case status
156
+ when 'R': next #FIXME
157
+ when 'A':
158
+ else
159
+ raise ArgumentError, "invalid status"
160
+ end
161
+
162
+ # Add macrolanguage to the individual language
163
+ raise "individual language already has a macrolanguage " if data[individual_language_identifier][2]
164
+ data[individual_language_identifier][2] = macrolanguage_identifier
165
+
166
+ # Add individual language to macrolanguage
167
+ data[macrolanguage_identifier][1] ||= []
168
+ data[macrolanguage_identifier][1] << individual_language_identifier
169
+ end
170
+
171
+ data.each_pair { |k, v| v.freeze }
172
+
173
+ data
174
+ end
175
+ end
176
+
177
+ @@iso_639_3 = load_iso_639_3
178
+ end
@@ -0,0 +1,35 @@
1
+ require 'iso_codes'
2
+ require 'test/unit'
3
+
4
+ class ISO6393CodesTestCase < Test::Unit::TestCase
5
+ def test_find_language
6
+ l = ISOCodes::find_language("eng")
7
+ assert_not_nil l
8
+ assert_equal 'eng', l.identifier
9
+ assert_equal 'eng', l.alpha3_terminology
10
+ assert_equal 'eng', l.alpha3_bibliographic
11
+ assert_equal 'en', l.alpha2
12
+ assert_equal 'English', l.reference_name
13
+ end
14
+
15
+ def test_find_language_macrolanguage
16
+ l = ISOCodes::find_language("ara")
17
+ assert_not_nil l
18
+ assert_kind_of ISOCodes::Macrolanguage, l
19
+ assert l.individual_languages.any? { |c| c.identifier == 'arq' }
20
+ end
21
+
22
+ def test_doc_exx
23
+ language = ISOCodes.find_language('lav')
24
+ assert_equal 'Latvian', language.reference_name
25
+ assert_equal ISOCodes::Macrolanguage, language.class
26
+ assert_equal 'lav', language.alpha3_bibliographic
27
+ assert_equal 'lv', language.alpha2
28
+
29
+ assert_equal ["ltg", "lvs"], language.individual_languages.map { |l| l.identifier }
30
+
31
+ language = ISOCodes.find_language('ltg')
32
+ assert_equal ISOCodes::IndividualLanguage, language.class
33
+ assert_equal "lav", language.macrolanguage.identifier
34
+ end
35
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iso-codes
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - "Marius L. J\xC3\xB8hndal"
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-06-29 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: A database of ISO language and script codes.
22
+ email: mariuslj (at) ifi [dot] uio (dot) no
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ - README.todo
30
+ files:
31
+ - .gitignore
32
+ - CHANGELOG
33
+ - README.rdoc
34
+ - Rakefile
35
+ - VERSION
36
+ - iso-codes.gemspec
37
+ - lib/iso-639-3-macrolanguages_20100128.tab.gz
38
+ - lib/iso-639-3_20100330.tab.gz
39
+ - lib/iso_codes.rb
40
+ - test/test_iso_codes.rb
41
+ - README.todo
42
+ has_rdoc: true
43
+ homepage: http://github.com/mlj/iso-codes
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project: iso-codes
68
+ rubygems_version: 1.3.6
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: ISO language and script code support
72
+ test_files:
73
+ - test/test_iso_codes.rb