iso-codes 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/CHANGELOG +1 -0
- data/README.rdoc +73 -0
- data/README.todo +39 -0
- data/Rakefile +25 -0
- data/VERSION +1 -0
- data/iso-codes.gemspec +51 -0
- data/lib/iso-639-3-macrolanguages_20100128.tab.gz +0 -0
- data/lib/iso-639-3_20100330.tab.gz +0 -0
- data/lib/iso_codes.rb +178 -0
- data/test/test_iso_codes.rb +35 -0
- metadata +73 -0
data/.gitignore
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
v0.1.0. initial release
|
data/README.rdoc
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
= iso-codes
|
2
|
+
|
3
|
+
A comprehensive database of ISO language and script codes.
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
gem install iso-codes
|
8
|
+
|
9
|
+
== Usage
|
10
|
+
|
11
|
+
Language codes can be looked up using +find_language+:
|
12
|
+
|
13
|
+
require 'iso_codes'
|
14
|
+
|
15
|
+
language = ISOCodes.find_language('lav')
|
16
|
+
# ISO 639-3 reference name
|
17
|
+
language.reference_name
|
18
|
+
# ISO 639-3 identifier
|
19
|
+
language.identifier
|
20
|
+
# ISO 639-1 (alpha-2) identifier
|
21
|
+
language.alpha2
|
22
|
+
# ISO 639-2 (alpha-3) identifiers
|
23
|
+
language.alpha3_bibliographic
|
24
|
+
language.alpha3_terminology
|
25
|
+
|
26
|
+
=== Macrolanguages and individual languages
|
27
|
+
|
28
|
+
ISO 639-3 defines language identifiers that covers a set of other language
|
29
|
+
identifiers. For example, the identifier +lav+ for Latvian corresponds to
|
30
|
+
+lvs+ for Standard Latvian and +ltg+ for Latgalian. These identifiers are
|
31
|
+
called macrolanguages (see http://www.sil.org/iso639-3/scope.asp#M for a
|
32
|
+
more formal definition). See http://www.sil.org/iso639-3/macrolanguages.asp
|
33
|
+
for an overview of such identifiers.
|
34
|
+
|
35
|
+
Macrolanguage identifiers can be looked up in the same way as other
|
36
|
+
language codes:
|
37
|
+
|
38
|
+
language = ISOCodes.find_language('lav')
|
39
|
+
language.reference_name
|
40
|
+
# => "Latvian"
|
41
|
+
language.class
|
42
|
+
# => ISOCodes::Macrolanguage
|
43
|
+
language.alpha3_bibliographic
|
44
|
+
# => "lav"
|
45
|
+
language.alpha2
|
46
|
+
# => "lv"
|
47
|
+
|
48
|
+
The individual languages covered by the identifier can be listed:
|
49
|
+
|
50
|
+
>> language.individual_languages.map { |l| l.identifier }
|
51
|
+
# => ["ltg", "lvs"]
|
52
|
+
|
53
|
+
For an individual language the macrolanguage, if any, can be found.
|
54
|
+
|
55
|
+
language = ISOCodes.find_language('ltg')
|
56
|
+
language.class
|
57
|
+
# => ISOCodes::IndividualLanguage
|
58
|
+
language.macrolanguage.identifier
|
59
|
+
# => "lav"
|
60
|
+
|
61
|
+
== TODO
|
62
|
+
|
63
|
+
* ISO 639-1 and ISO 639-2
|
64
|
+
* IANA language codes
|
65
|
+
* country/region codes (ISO 3166-1 through 3)
|
66
|
+
* currencies (ISO 4217:2001 and 2008)
|
67
|
+
|
68
|
+
If you feel like implementing any of this, please fork the project on
|
69
|
+
github.
|
70
|
+
|
71
|
+
= Copyright
|
72
|
+
|
73
|
+
Copyright (c) 2010 Marius L. Jøhndal.
|
data/README.todo
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
== Coverage
|
2
|
+
|
3
|
+
The database currently includes codes for the following domains
|
4
|
+
|
5
|
+
* languages: two letter (`alpha-2') and three letter (`alpha-3') ISO codes,
|
6
|
+
and human readable names in English and, for a subset, in French.
|
7
|
+
|
8
|
+
* scripts: four letter (`alpha-4') and numeric ISO codes, and human readable
|
9
|
+
names in English and French.
|
10
|
+
|
11
|
+
=== Standards
|
12
|
+
|
13
|
+
* languages:
|
14
|
+
|
15
|
+
- ISO 639-1
|
16
|
+
|
17
|
+
- ISO 639-2:1998 Codes for the representation of names of languages-- Part 2: alpha-3 code.
|
18
|
+
|
19
|
+
Registration authority: http://www.loc.gov/standards/iso639-2/
|
20
|
+
Last update: 2008-07-08 (http://www.loc.gov/standards/iso639-2/php/code_changes.php)
|
21
|
+
|
22
|
+
- ISO 639-3:2007 Codes for the representation of names of languages -- Part 3: Alpha-3 code for comprehensive coverage of languages
|
23
|
+
|
24
|
+
Registration authority: http://www.sil.org/iso639-3/
|
25
|
+
Last update: 2008-07-11
|
26
|
+
|
27
|
+
- RFC 4646 and RFC 4647 (which together obsoleted RFC 3066 and RFC 1766)
|
28
|
+
|
29
|
+
Language subtag registry: http://www.iana.org/assignments/language-subtag-registry
|
30
|
+
Last update: 2008-07-23
|
31
|
+
|
32
|
+
* scripts:
|
33
|
+
|
34
|
+
- ISO 15924:2004 Codes for the representation of names of scripts
|
35
|
+
|
36
|
+
Registration authority: http://unicode.org/iso15924/
|
37
|
+
Last update: 2007-11-26 (http://unicode.org/iso15924/codechanges.html)
|
38
|
+
|
39
|
+
== License
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
|
7
|
+
Jeweler::Tasks.new do |p|
|
8
|
+
p.name = "iso-codes"
|
9
|
+
p.summary = "ISO language and script code support"
|
10
|
+
p.description = "A database of ISO language and script codes."
|
11
|
+
p.authors = ['Marius L. Jøhndal']
|
12
|
+
p.email = "mariuslj (at) ifi [dot] uio (dot) no"
|
13
|
+
p.homepage = "http://github.com/mlj/iso-codes"
|
14
|
+
p.rubyforge_project = "iso-codes"
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rake/testtask'
|
21
|
+
Rake::TestTask.new(:test) do |test|
|
22
|
+
test.libs << 'lib' << 'test'
|
23
|
+
test.pattern = 'test/**/test_*.rb'
|
24
|
+
test.verbose = true
|
25
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/iso-codes.gemspec
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{iso-codes}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Marius L. J\303\270hndal"]
|
12
|
+
s.date = %q{2010-06-29}
|
13
|
+
s.description = %q{A database of ISO language and script codes.}
|
14
|
+
s.email = %q{mariuslj (at) ifi [dot] uio (dot) no}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README.rdoc",
|
17
|
+
"README.todo"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"CHANGELOG",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"iso-codes.gemspec",
|
26
|
+
"lib/iso-639-3-macrolanguages_20100128.tab.gz",
|
27
|
+
"lib/iso-639-3_20100330.tab.gz",
|
28
|
+
"lib/iso_codes.rb",
|
29
|
+
"test/test_iso_codes.rb"
|
30
|
+
]
|
31
|
+
s.homepage = %q{http://github.com/mlj/iso-codes}
|
32
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
33
|
+
s.require_paths = ["lib"]
|
34
|
+
s.rubyforge_project = %q{iso-codes}
|
35
|
+
s.rubygems_version = %q{1.3.6}
|
36
|
+
s.summary = %q{ISO language and script code support}
|
37
|
+
s.test_files = [
|
38
|
+
"test/test_iso_codes.rb"
|
39
|
+
]
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
43
|
+
s.specification_version = 3
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
46
|
+
else
|
47
|
+
end
|
48
|
+
else
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
Binary file
|
Binary file
|
data/lib/iso_codes.rb
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
#
|
2
|
+
# iso_codes.rb - A comprehensive database of ISO codes
|
3
|
+
#
|
4
|
+
# Written by Marius L. Jøhndal, 2008, 2010.
|
5
|
+
#
|
6
|
+
require 'zlib'
|
7
|
+
|
8
|
+
module ISOCodes
|
9
|
+
# Version of the ISO 639-3 code set (see
|
10
|
+
# http://www.sil.org/iso639-3/download.asp).
|
11
|
+
ISO_639_3_VERSION = '20100330'
|
12
|
+
|
13
|
+
# Version of the ISO 639-3 macrolanguage mappings (see
|
14
|
+
# http://www.sil.org/iso639-3/download.asp).
|
15
|
+
ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION = '20100128'
|
16
|
+
|
17
|
+
class Language
|
18
|
+
# Returns the ISO 639-3 identifier.
|
19
|
+
attr_reader :identifier
|
20
|
+
|
21
|
+
# Returns the equivalent ISO 639-2 identifier of the bibliographic applications code set, or
|
22
|
+
# +nil+ if none is defined.
|
23
|
+
attr_reader :alpha3_bibliographic
|
24
|
+
|
25
|
+
# Returns the equivalent ISO 639-2 identifier of the terminology applications code set, or
|
26
|
+
# +nil+ if none is defined.
|
27
|
+
attr_reader :alpha3_terminology
|
28
|
+
|
29
|
+
# Returns the equivalent ISO 639-1 identifier, or +nil+ if none is defined.
|
30
|
+
attr_reader :alpha2
|
31
|
+
|
32
|
+
# Returns the language type. Language type is either <tt>:living</tt>,
|
33
|
+
# <tt>:extinct</tt>, <tt>:ancient</tt>, <tt>:historic</tt>,
|
34
|
+
# <tt>:constructed</tt>, or <tt>:special</tt>. See http://www.sil.org/iso639-3/types.asp
|
35
|
+
# for a description of these.
|
36
|
+
attr_reader :language_type
|
37
|
+
|
38
|
+
# Returns the reference language name.
|
39
|
+
attr_reader :reference_name
|
40
|
+
|
41
|
+
def initialize(identifier, part2b, part2t, part1, language_type, ref_name)
|
42
|
+
@identifier = identifier
|
43
|
+
@alpha3_bibliographic = part2b
|
44
|
+
@alpha3_terminology = part2t
|
45
|
+
@alpha2 = part1
|
46
|
+
@language_type = language_type
|
47
|
+
@reference_name = ref_name
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class IndividualLanguage < Language
|
52
|
+
def initialize(individual_languages, macrolanguage, *rest)
|
53
|
+
super(*rest)
|
54
|
+
|
55
|
+
raise "Individual languages given for individual language" if individual_languages # FIXME
|
56
|
+
@macrolanguage = macrolanguage
|
57
|
+
end
|
58
|
+
|
59
|
+
# Returns the macrolanguage it is part of or +nil+ if not part of any
|
60
|
+
# macrolanguage.
|
61
|
+
def macrolanguage
|
62
|
+
ISOCodes::find_iso_639_3_language(@macrolanguage)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Macrolanguage < Language
|
67
|
+
def initialize(individual_languages, macrolanguage, *rest)
|
68
|
+
super(*rest)
|
69
|
+
|
70
|
+
raise "Macrolanguage given for macrolanguage" if macrolanguage # FIXME
|
71
|
+
@individual_languages = individual_languages
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns an array of individual languages, otherwise an empty array.
|
75
|
+
def individual_languages
|
76
|
+
@individual_languages.map { |c| ISOCodes::find_iso_639_3_language(c) }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class << self
|
81
|
+
# Returns an object describing the language identified by the language
|
82
|
+
# code.
|
83
|
+
def find_language(code)
|
84
|
+
find_iso_639_3_language(code)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns an object describing the language identified by an ISO 639-3
|
88
|
+
# identifier.
|
89
|
+
def find_iso_639_3_language(code)
|
90
|
+
if @@iso_639_3.has_key?(code)
|
91
|
+
klass, *rest = @@iso_639_3[code]
|
92
|
+
klass.new(*rest)
|
93
|
+
else
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
LANGUAGE_TYPES = {
|
102
|
+
'L' => :living,
|
103
|
+
'E' => :extinct,
|
104
|
+
'A' => :ancient,
|
105
|
+
'H' => :historic,
|
106
|
+
'C' => :constructed,
|
107
|
+
'S' => :special,
|
108
|
+
}.freeze
|
109
|
+
|
110
|
+
DATA_PATH = File.expand_path(File.dirname(__FILE__))
|
111
|
+
|
112
|
+
class << self
|
113
|
+
def get_data_filename(filename)
|
114
|
+
File.join(DATA_PATH, filename)
|
115
|
+
end
|
116
|
+
|
117
|
+
def read_data_file(filename, field_count, delimiter = /,\s*/, skip_first = false)
|
118
|
+
Zlib::GzipReader.open(get_data_filename(filename)).each_line do |l|
|
119
|
+
if skip_first
|
120
|
+
skip_first = false
|
121
|
+
next
|
122
|
+
end
|
123
|
+
|
124
|
+
yield l.chomp.split(delimiter, field_count)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def load_iso_639_3
|
129
|
+
data = {}
|
130
|
+
|
131
|
+
read_data_file("iso-639-3_#{ISO_639_3_VERSION}.tab.gz", 8, "\t", true) do |args|
|
132
|
+
identifier, part2b, part2t, part1, scope, language_type, ref_name, comment = args
|
133
|
+
|
134
|
+
# Sanity checks
|
135
|
+
raise ArgumentError, "missing identifier" if identifier.nil?
|
136
|
+
raise ArgumentError, "missing reference name" if ref_name.nil?
|
137
|
+
|
138
|
+
case scope
|
139
|
+
when 'I': klass = IndividualLanguage
|
140
|
+
when 'M': klass = Macrolanguage
|
141
|
+
when 'S': klass = nil # FIXME
|
142
|
+
else
|
143
|
+
raise ArgumentError, "invalid scope"
|
144
|
+
end
|
145
|
+
|
146
|
+
language_type = LANGUAGE_TYPES[language_type]
|
147
|
+
raise ArgumentError, "invalid language type" if language_type.nil?
|
148
|
+
|
149
|
+
data[identifier] = [klass, nil, nil, identifier, part2b, part2t, part1, language_type, ref_name]
|
150
|
+
end
|
151
|
+
|
152
|
+
read_data_file("iso-639-3-macrolanguages_#{ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION}.tab.gz", 3, "\t", true) do |args|
|
153
|
+
macrolanguage_identifier, individual_language_identifier, status = args
|
154
|
+
|
155
|
+
case status
|
156
|
+
when 'R': next #FIXME
|
157
|
+
when 'A':
|
158
|
+
else
|
159
|
+
raise ArgumentError, "invalid status"
|
160
|
+
end
|
161
|
+
|
162
|
+
# Add macrolanguage to the individual language
|
163
|
+
raise "individual language already has a macrolanguage " if data[individual_language_identifier][2]
|
164
|
+
data[individual_language_identifier][2] = macrolanguage_identifier
|
165
|
+
|
166
|
+
# Add individual language to macrolanguage
|
167
|
+
data[macrolanguage_identifier][1] ||= []
|
168
|
+
data[macrolanguage_identifier][1] << individual_language_identifier
|
169
|
+
end
|
170
|
+
|
171
|
+
data.each_pair { |k, v| v.freeze }
|
172
|
+
|
173
|
+
data
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
@@iso_639_3 = load_iso_639_3
|
178
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'iso_codes'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class ISO6393CodesTestCase < Test::Unit::TestCase
|
5
|
+
def test_find_language
|
6
|
+
l = ISOCodes::find_language("eng")
|
7
|
+
assert_not_nil l
|
8
|
+
assert_equal 'eng', l.identifier
|
9
|
+
assert_equal 'eng', l.alpha3_terminology
|
10
|
+
assert_equal 'eng', l.alpha3_bibliographic
|
11
|
+
assert_equal 'en', l.alpha2
|
12
|
+
assert_equal 'English', l.reference_name
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_find_language_macrolanguage
|
16
|
+
l = ISOCodes::find_language("ara")
|
17
|
+
assert_not_nil l
|
18
|
+
assert_kind_of ISOCodes::Macrolanguage, l
|
19
|
+
assert l.individual_languages.any? { |c| c.identifier == 'arq' }
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_doc_exx
|
23
|
+
language = ISOCodes.find_language('lav')
|
24
|
+
assert_equal 'Latvian', language.reference_name
|
25
|
+
assert_equal ISOCodes::Macrolanguage, language.class
|
26
|
+
assert_equal 'lav', language.alpha3_bibliographic
|
27
|
+
assert_equal 'lv', language.alpha2
|
28
|
+
|
29
|
+
assert_equal ["ltg", "lvs"], language.individual_languages.map { |l| l.identifier }
|
30
|
+
|
31
|
+
language = ISOCodes.find_language('ltg')
|
32
|
+
assert_equal ISOCodes::IndividualLanguage, language.class
|
33
|
+
assert_equal "lav", language.macrolanguage.identifier
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iso-codes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- "Marius L. J\xC3\xB8hndal"
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-06-29 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A database of ISO language and script codes.
|
22
|
+
email: mariuslj (at) ifi [dot] uio (dot) no
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README.rdoc
|
29
|
+
- README.todo
|
30
|
+
files:
|
31
|
+
- .gitignore
|
32
|
+
- CHANGELOG
|
33
|
+
- README.rdoc
|
34
|
+
- Rakefile
|
35
|
+
- VERSION
|
36
|
+
- iso-codes.gemspec
|
37
|
+
- lib/iso-639-3-macrolanguages_20100128.tab.gz
|
38
|
+
- lib/iso-639-3_20100330.tab.gz
|
39
|
+
- lib/iso_codes.rb
|
40
|
+
- test/test_iso_codes.rb
|
41
|
+
- README.todo
|
42
|
+
has_rdoc: true
|
43
|
+
homepage: http://github.com/mlj/iso-codes
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options:
|
48
|
+
- --charset=UTF-8
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
57
|
+
version: "0"
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
rubyforge_project: iso-codes
|
68
|
+
rubygems_version: 1.3.6
|
69
|
+
signing_key:
|
70
|
+
specification_version: 3
|
71
|
+
summary: ISO language and script code support
|
72
|
+
test_files:
|
73
|
+
- test/test_iso_codes.rb
|