iso-codes 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/CHANGELOG +1 -0
- data/README.rdoc +73 -0
- data/README.todo +39 -0
- data/Rakefile +25 -0
- data/VERSION +1 -0
- data/iso-codes.gemspec +51 -0
- data/lib/iso-639-3-macrolanguages_20100128.tab.gz +0 -0
- data/lib/iso-639-3_20100330.tab.gz +0 -0
- data/lib/iso_codes.rb +178 -0
- data/test/test_iso_codes.rb +35 -0
- metadata +73 -0
data/.gitignore
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
v0.1.0. initial release
|
data/README.rdoc
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
= iso-codes
|
2
|
+
|
3
|
+
A comprehensive database of ISO language and script codes.
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
gem install iso-codes
|
8
|
+
|
9
|
+
== Usage
|
10
|
+
|
11
|
+
Language codes can be looked up using +find_language+:
|
12
|
+
|
13
|
+
require 'iso_codes'
|
14
|
+
|
15
|
+
language = ISOCodes.find_language('lav')
|
16
|
+
# ISO 639-3 reference name
|
17
|
+
language.reference_name
|
18
|
+
# ISO 639-3 identifier
|
19
|
+
language.identifier
|
20
|
+
# ISO 639-1 (alpha-2) identifier
|
21
|
+
language.alpha2
|
22
|
+
# ISO 639-2 (alpha-3) identifiers
|
23
|
+
language.alpha3_bibliographic
|
24
|
+
language.alpha3_terminology
|
25
|
+
|
26
|
+
=== Macrolanguages and individual languages
|
27
|
+
|
28
|
+
ISO 639-3 defines language identifiers that covers a set of other language
|
29
|
+
identifiers. For example, the identifier +lav+ for Latvian corresponds to
|
30
|
+
+lvs+ for Standard Latvian and +ltg+ for Latgalian. These identifiers are
|
31
|
+
called macrolanguages (see http://www.sil.org/iso639-3/scope.asp#M for a
|
32
|
+
more formal definition). See http://www.sil.org/iso639-3/macrolanguages.asp
|
33
|
+
for an overview of such identifiers.
|
34
|
+
|
35
|
+
Macrolanguage identifiers can be looked up in the same way as other
|
36
|
+
language codes:
|
37
|
+
|
38
|
+
language = ISOCodes.find_language('lav')
|
39
|
+
language.reference_name
|
40
|
+
# => "Latvian"
|
41
|
+
language.class
|
42
|
+
# => ISOCodes::Macrolanguage
|
43
|
+
language.alpha3_bibliographic
|
44
|
+
# => "lav"
|
45
|
+
language.alpha2
|
46
|
+
# => "lv"
|
47
|
+
|
48
|
+
The individual languages covered by the identifier can be listed:
|
49
|
+
|
50
|
+
>> language.individual_languages.map { |l| l.identifier }
|
51
|
+
# => ["ltg", "lvs"]
|
52
|
+
|
53
|
+
For an individual language the macrolanguage, if any, can be found.
|
54
|
+
|
55
|
+
language = ISOCodes.find_language('ltg')
|
56
|
+
language.class
|
57
|
+
# => ISOCodes::IndividualLanguage
|
58
|
+
language.macrolanguage.identifier
|
59
|
+
# => "lav"
|
60
|
+
|
61
|
+
== TODO
|
62
|
+
|
63
|
+
* ISO 639-1 and ISO 639-2
|
64
|
+
* IANA language codes
|
65
|
+
* country/region codes (ISO 3166-1 through 3)
|
66
|
+
* currencies (ISO 4217:2001 and 2008)
|
67
|
+
|
68
|
+
If you feel like implementing any of this, please fork the project on
|
69
|
+
github.
|
70
|
+
|
71
|
+
= Copyright
|
72
|
+
|
73
|
+
Copyright (c) 2010 Marius L. Jøhndal.
|
data/README.todo
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
== Coverage
|
2
|
+
|
3
|
+
The database currently includes codes for the following domains
|
4
|
+
|
5
|
+
* languages: two letter (`alpha-2') and three letter (`alpha-3') ISO codes,
|
6
|
+
and human readable names in English and, for a subset, in French.
|
7
|
+
|
8
|
+
* scripts: four letter (`alpha-4') and numeric ISO codes, and human readable
|
9
|
+
names in English and French.
|
10
|
+
|
11
|
+
=== Standards
|
12
|
+
|
13
|
+
* languages:
|
14
|
+
|
15
|
+
- ISO 639-1
|
16
|
+
|
17
|
+
- ISO 639-2:1998 Codes for the representation of names of languages-- Part 2: alpha-3 code.
|
18
|
+
|
19
|
+
Registration authority: http://www.loc.gov/standards/iso639-2/
|
20
|
+
Last update: 2008-07-08 (http://www.loc.gov/standards/iso639-2/php/code_changes.php)
|
21
|
+
|
22
|
+
- ISO 639-3:2007 Codes for the representation of names of languages -- Part 3: Alpha-3 code for comprehensive coverage of languages
|
23
|
+
|
24
|
+
Registration authority: http://www.sil.org/iso639-3/
|
25
|
+
Last update: 2008-07-11
|
26
|
+
|
27
|
+
- RFC 4646 and RFC 4647 (which together obsoleted RFC 3066 and RFC 1766)
|
28
|
+
|
29
|
+
Language subtag registry: http://www.iana.org/assignments/language-subtag-registry
|
30
|
+
Last update: 2008-07-23
|
31
|
+
|
32
|
+
* scripts:
|
33
|
+
|
34
|
+
- ISO 15924:2004 Codes for the representation of names of scripts
|
35
|
+
|
36
|
+
Registration authority: http://unicode.org/iso15924/
|
37
|
+
Last update: 2007-11-26 (http://unicode.org/iso15924/codechanges.html)
|
38
|
+
|
39
|
+
== License
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
|
7
|
+
Jeweler::Tasks.new do |p|
|
8
|
+
p.name = "iso-codes"
|
9
|
+
p.summary = "ISO language and script code support"
|
10
|
+
p.description = "A database of ISO language and script codes."
|
11
|
+
p.authors = ['Marius L. Jøhndal']
|
12
|
+
p.email = "mariuslj (at) ifi [dot] uio (dot) no"
|
13
|
+
p.homepage = "http://github.com/mlj/iso-codes"
|
14
|
+
p.rubyforge_project = "iso-codes"
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rake/testtask'
|
21
|
+
Rake::TestTask.new(:test) do |test|
|
22
|
+
test.libs << 'lib' << 'test'
|
23
|
+
test.pattern = 'test/**/test_*.rb'
|
24
|
+
test.verbose = true
|
25
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/iso-codes.gemspec
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{iso-codes}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Marius L. J\303\270hndal"]
|
12
|
+
s.date = %q{2010-06-29}
|
13
|
+
s.description = %q{A database of ISO language and script codes.}
|
14
|
+
s.email = %q{mariuslj (at) ifi [dot] uio (dot) no}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README.rdoc",
|
17
|
+
"README.todo"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"CHANGELOG",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"iso-codes.gemspec",
|
26
|
+
"lib/iso-639-3-macrolanguages_20100128.tab.gz",
|
27
|
+
"lib/iso-639-3_20100330.tab.gz",
|
28
|
+
"lib/iso_codes.rb",
|
29
|
+
"test/test_iso_codes.rb"
|
30
|
+
]
|
31
|
+
s.homepage = %q{http://github.com/mlj/iso-codes}
|
32
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
33
|
+
s.require_paths = ["lib"]
|
34
|
+
s.rubyforge_project = %q{iso-codes}
|
35
|
+
s.rubygems_version = %q{1.3.6}
|
36
|
+
s.summary = %q{ISO language and script code support}
|
37
|
+
s.test_files = [
|
38
|
+
"test/test_iso_codes.rb"
|
39
|
+
]
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
43
|
+
s.specification_version = 3
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
46
|
+
else
|
47
|
+
end
|
48
|
+
else
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
Binary file
|
Binary file
|
data/lib/iso_codes.rb
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
#
|
2
|
+
# iso_codes.rb - A comprehensive database of ISO codes
|
3
|
+
#
|
4
|
+
# Written by Marius L. Jøhndal, 2008, 2010.
|
5
|
+
#
|
6
|
+
require 'zlib'
|
7
|
+
|
8
|
+
module ISOCodes
|
9
|
+
# Version of the ISO 639-3 code set (see
|
10
|
+
# http://www.sil.org/iso639-3/download.asp).
|
11
|
+
ISO_639_3_VERSION = '20100330'
|
12
|
+
|
13
|
+
# Version of the ISO 639-3 macrolanguage mappings (see
|
14
|
+
# http://www.sil.org/iso639-3/download.asp).
|
15
|
+
ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION = '20100128'
|
16
|
+
|
17
|
+
class Language
|
18
|
+
# Returns the ISO 639-3 identifier.
|
19
|
+
attr_reader :identifier
|
20
|
+
|
21
|
+
# Returns the equivalent ISO 639-2 identifier of the bibliographic applications code set, or
|
22
|
+
# +nil+ if none is defined.
|
23
|
+
attr_reader :alpha3_bibliographic
|
24
|
+
|
25
|
+
# Returns the equivalent ISO 639-2 identifier of the terminology applications code set, or
|
26
|
+
# +nil+ if none is defined.
|
27
|
+
attr_reader :alpha3_terminology
|
28
|
+
|
29
|
+
# Returns the equivalent ISO 639-1 identifier, or +nil+ if none is defined.
|
30
|
+
attr_reader :alpha2
|
31
|
+
|
32
|
+
# Returns the language type. Language type is either <tt>:living</tt>,
|
33
|
+
# <tt>:extinct</tt>, <tt>:ancient</tt>, <tt>:historic</tt>,
|
34
|
+
# <tt>:constructed</tt>, or <tt>:special</tt>. See http://www.sil.org/iso639-3/types.asp
|
35
|
+
# for a description of these.
|
36
|
+
attr_reader :language_type
|
37
|
+
|
38
|
+
# Returns the reference language name.
|
39
|
+
attr_reader :reference_name
|
40
|
+
|
41
|
+
def initialize(identifier, part2b, part2t, part1, language_type, ref_name)
|
42
|
+
@identifier = identifier
|
43
|
+
@alpha3_bibliographic = part2b
|
44
|
+
@alpha3_terminology = part2t
|
45
|
+
@alpha2 = part1
|
46
|
+
@language_type = language_type
|
47
|
+
@reference_name = ref_name
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class IndividualLanguage < Language
|
52
|
+
def initialize(individual_languages, macrolanguage, *rest)
|
53
|
+
super(*rest)
|
54
|
+
|
55
|
+
raise "Individual languages given for individual language" if individual_languages # FIXME
|
56
|
+
@macrolanguage = macrolanguage
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns the macrolanguage it is part of or +nil+ if not part of any
|
60
|
+
# macrolanguage.
|
61
|
+
def macrolanguage
|
62
|
+
ISOCodes::find_iso_639_3_language(@macrolanguage)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Macrolanguage < Language
|
67
|
+
def initialize(individual_languages, macrolanguage, *rest)
|
68
|
+
super(*rest)
|
69
|
+
|
70
|
+
raise "Macrolanguage given for macrolanguage" if macrolanguage # FIXME
|
71
|
+
@individual_languages = individual_languages
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns an array of individual languages, otherwise an empty array.
|
75
|
+
def individual_languages
|
76
|
+
@individual_languages.map { |c| ISOCodes::find_iso_639_3_language(c) }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class << self
|
81
|
+
# Returns an object describing the language identified by the language
|
82
|
+
# code.
|
83
|
+
def find_language(code)
|
84
|
+
find_iso_639_3_language(code)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns an object describing the language identified by an ISO 639-3
|
88
|
+
# identifier.
|
89
|
+
def find_iso_639_3_language(code)
|
90
|
+
if @@iso_639_3.has_key?(code)
|
91
|
+
klass, *rest = @@iso_639_3[code]
|
92
|
+
klass.new(*rest)
|
93
|
+
else
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
LANGUAGE_TYPES = {
|
102
|
+
'L' => :living,
|
103
|
+
'E' => :extinct,
|
104
|
+
'A' => :ancient,
|
105
|
+
'H' => :historic,
|
106
|
+
'C' => :constructed,
|
107
|
+
'S' => :special,
|
108
|
+
}.freeze
|
109
|
+
|
110
|
+
DATA_PATH = File.expand_path(File.dirname(__FILE__))
|
111
|
+
|
112
|
+
class << self
|
113
|
+
def get_data_filename(filename)
|
114
|
+
File.join(DATA_PATH, filename)
|
115
|
+
end
|
116
|
+
|
117
|
+
def read_data_file(filename, field_count, delimiter = /,\s*/, skip_first = false)
|
118
|
+
Zlib::GzipReader.open(get_data_filename(filename)).each_line do |l|
|
119
|
+
if skip_first
|
120
|
+
skip_first = false
|
121
|
+
next
|
122
|
+
end
|
123
|
+
|
124
|
+
yield l.chomp.split(delimiter, field_count)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def load_iso_639_3
|
129
|
+
data = {}
|
130
|
+
|
131
|
+
read_data_file("iso-639-3_#{ISO_639_3_VERSION}.tab.gz", 8, "\t", true) do |args|
|
132
|
+
identifier, part2b, part2t, part1, scope, language_type, ref_name, comment = args
|
133
|
+
|
134
|
+
# Sanity checks
|
135
|
+
raise ArgumentError, "missing identifier" if identifier.nil?
|
136
|
+
raise ArgumentError, "missing reference name" if ref_name.nil?
|
137
|
+
|
138
|
+
case scope
|
139
|
+
when 'I': klass = IndividualLanguage
|
140
|
+
when 'M': klass = Macrolanguage
|
141
|
+
when 'S': klass = nil # FIXME
|
142
|
+
else
|
143
|
+
raise ArgumentError, "invalid scope"
|
144
|
+
end
|
145
|
+
|
146
|
+
language_type = LANGUAGE_TYPES[language_type]
|
147
|
+
raise ArgumentError, "invalid language type" if language_type.nil?
|
148
|
+
|
149
|
+
data[identifier] = [klass, nil, nil, identifier, part2b, part2t, part1, language_type, ref_name]
|
150
|
+
end
|
151
|
+
|
152
|
+
read_data_file("iso-639-3-macrolanguages_#{ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION}.tab.gz", 3, "\t", true) do |args|
|
153
|
+
macrolanguage_identifier, individual_language_identifier, status = args
|
154
|
+
|
155
|
+
case status
|
156
|
+
when 'R': next #FIXME
|
157
|
+
when 'A':
|
158
|
+
else
|
159
|
+
raise ArgumentError, "invalid status"
|
160
|
+
end
|
161
|
+
|
162
|
+
# Add macrolanguage to the individual language
|
163
|
+
raise "individual language already has a macrolanguage " if data[individual_language_identifier][2]
|
164
|
+
data[individual_language_identifier][2] = macrolanguage_identifier
|
165
|
+
|
166
|
+
# Add individual language to macrolanguage
|
167
|
+
data[macrolanguage_identifier][1] ||= []
|
168
|
+
data[macrolanguage_identifier][1] << individual_language_identifier
|
169
|
+
end
|
170
|
+
|
171
|
+
data.each_pair { |k, v| v.freeze }
|
172
|
+
|
173
|
+
data
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
@@iso_639_3 = load_iso_639_3
|
178
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'iso_codes'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class ISO6393CodesTestCase < Test::Unit::TestCase
|
5
|
+
def test_find_language
|
6
|
+
l = ISOCodes::find_language("eng")
|
7
|
+
assert_not_nil l
|
8
|
+
assert_equal 'eng', l.identifier
|
9
|
+
assert_equal 'eng', l.alpha3_terminology
|
10
|
+
assert_equal 'eng', l.alpha3_bibliographic
|
11
|
+
assert_equal 'en', l.alpha2
|
12
|
+
assert_equal 'English', l.reference_name
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_find_language_macrolanguage
|
16
|
+
l = ISOCodes::find_language("ara")
|
17
|
+
assert_not_nil l
|
18
|
+
assert_kind_of ISOCodes::Macrolanguage, l
|
19
|
+
assert l.individual_languages.any? { |c| c.identifier == 'arq' }
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_doc_exx
|
23
|
+
language = ISOCodes.find_language('lav')
|
24
|
+
assert_equal 'Latvian', language.reference_name
|
25
|
+
assert_equal ISOCodes::Macrolanguage, language.class
|
26
|
+
assert_equal 'lav', language.alpha3_bibliographic
|
27
|
+
assert_equal 'lv', language.alpha2
|
28
|
+
|
29
|
+
assert_equal ["ltg", "lvs"], language.individual_languages.map { |l| l.identifier }
|
30
|
+
|
31
|
+
language = ISOCodes.find_language('ltg')
|
32
|
+
assert_equal ISOCodes::IndividualLanguage, language.class
|
33
|
+
assert_equal "lav", language.macrolanguage.identifier
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iso-codes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- "Marius L. J\xC3\xB8hndal"
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-06-29 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A database of ISO language and script codes.
|
22
|
+
email: mariuslj (at) ifi [dot] uio (dot) no
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README.rdoc
|
29
|
+
- README.todo
|
30
|
+
files:
|
31
|
+
- .gitignore
|
32
|
+
- CHANGELOG
|
33
|
+
- README.rdoc
|
34
|
+
- Rakefile
|
35
|
+
- VERSION
|
36
|
+
- iso-codes.gemspec
|
37
|
+
- lib/iso-639-3-macrolanguages_20100128.tab.gz
|
38
|
+
- lib/iso-639-3_20100330.tab.gz
|
39
|
+
- lib/iso_codes.rb
|
40
|
+
- test/test_iso_codes.rb
|
41
|
+
- README.todo
|
42
|
+
has_rdoc: true
|
43
|
+
homepage: http://github.com/mlj/iso-codes
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options:
|
48
|
+
- --charset=UTF-8
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
57
|
+
version: "0"
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
rubyforge_project: iso-codes
|
68
|
+
rubygems_version: 1.3.6
|
69
|
+
signing_key:
|
70
|
+
specification_version: 3
|
71
|
+
summary: ISO language and script code support
|
72
|
+
test_files:
|
73
|
+
- test/test_iso_codes.rb
|