iso-codes 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +2 -2
- data/TODO +49 -0
- data/VERSION +1 -1
- data/lib/{iso-639-3_20100330.tab.gz → iso-639-3_20100707.tab.gz} +0 -0
- data/lib/iso_codes.rb +43 -41
- metadata +14 -8
- data/.gitignore +0 -2
data/README.rdoc
CHANGED
@@ -68,6 +68,6 @@ For an individual language the macrolanguage, if any, can be found.
|
|
68
68
|
If you feel like implementing any of this, please fork the project on
|
69
69
|
github.
|
70
70
|
|
71
|
-
|
71
|
+
== Copyright
|
72
72
|
|
73
|
-
Copyright (c) 2010 Marius L. Jøhndal.
|
73
|
+
Copyright (c) 2010, 2011 Marius L. Jøhndal.
|
data/TODO
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
= iso-codes
|
2
|
+
|
3
|
+
These files may be used to download the list of language codes with their language names, for example into a database. To read the files, please note that one line of text contains one entry. An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given), an alpha-2 code (when given), an English name, and a French name of a language are all separated by pipe (|) characters. If one of these elements is not applicable to the entry, the field is left empty, i.e., a pipe (|) character immediately follows the preceding entry. The Line terminator is the LF character.
|
4
|
+
|
5
|
+
A comprehensive database of ISO codes.
|
6
|
+
|
7
|
+
|
8
|
+
While most languages are given one code by the standard, twenty of the languages described have two three-letter codes, a "bibliographic" code (ISO 639-2/B), which is derived from the English name for the language and was a necessary legacy feature, and a "terminological" code (ISO 639-2/T), which is derived from the native name for the language. Each of these twenty languages is also included in the ISO 639-1 standard. (There were 22 B codes; scc and scr are now deprecated.)
|
9
|
+
|
10
|
+
In addition, there are codes for special situations:
|
11
|
+
|
12
|
+
* mis is listed as "uncoded languages"
|
13
|
+
* mul (for multiple languages) is applied when several languages are used and it is not practical to specify all the appropriate language codes
|
14
|
+
* The interval from qaa to qtz is reserved and is not used in the standard
|
15
|
+
* und (for undetermined) is used in situations in which a language or languages must be indicated but the language cannot be identified.
|
16
|
+
* zxx is listed in the code list as "no linguistic content" (added 2006-01-11)
|
17
|
+
|
18
|
+
Some ISO 639-2 codes that are commonly used for languages do not precisely represent a particular language or some related languages (as the above macrolanguages). They are regarded as collective languages (or collectives) and are excluded from ISO 639-3.
|
19
|
+
|
20
|
+
|
21
|
+
CREATE TABLE ISO_639-3_Names (
|
22
|
+
Id char(3) NOT NULL, -- The three-letter 639-3 identifier
|
23
|
+
Print_Name varchar(75) NOT NULL, -- One of the names associated with this identifier
|
24
|
+
Inverted_Name varchar(75) NOT NULL) -- The inverted form of this Print_Name form
|
25
|
+
|
26
|
+
CREATE TABLE ISO_639-3_Macrolanguages (
|
27
|
+
M_Id char(3) NOT NULL, -- The identifier for a macrolanguage
|
28
|
+
I_Id char(3) NOT NULL, -- The identifier for an individual language
|
29
|
+
-- that is a member of the macrolanguage
|
30
|
+
I_Status char(1) NOT NULL) -- A (active) or R (retired) indicating the
|
31
|
+
-- status of the individual code element
|
32
|
+
|
33
|
+
* countries and regions:
|
34
|
+
|
35
|
+
- ISO 3166-1 Codes for the representation of names of countries and their subdivisions – Part 1: Country codes,
|
36
|
+
ISO 3166-2 Codes for the representation of names of countries and their subdivisions – Part 2: Country subdivision codes,
|
37
|
+
ISO 3166-3 Codes for the representation of names of countries and their subdivisions – Part 3: Code for formerly used names of countries
|
38
|
+
|
39
|
+
Maintenance agency: http://www.iso.org/iso/country_codes
|
40
|
+
Last update: 2008-04-25 (http://www.iso.org/iso/country_codes/check_what_s_new.htm)
|
41
|
+
|
42
|
+
* currencies:
|
43
|
+
|
44
|
+
- ISO 4217:2008 Codes for the representation of currencies and funds
|
45
|
+
ISO 4217:2001 Codes for the representation of currencies and funds
|
46
|
+
|
47
|
+
Maintenance agency: http://www.bsi-global.com/en/Standards-and-Publications/Industry-Sectors/Services/BSI-Currency-Code-Service/
|
48
|
+
|
49
|
+
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
Binary file
|
data/lib/iso_codes.rb
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
#
|
2
2
|
# iso_codes.rb - A comprehensive database of ISO codes
|
3
3
|
#
|
4
|
-
# Written by Marius L. Jøhndal, 2008, 2010.
|
4
|
+
# Written by Marius L. Jøhndal, 2008, 2010, 2011.
|
5
5
|
#
|
6
6
|
require 'zlib'
|
7
7
|
|
8
8
|
module ISOCodes
|
9
|
-
# Version of the ISO 639-3 code set (see
|
9
|
+
# Version of the ISO 639-3 code set supported (see
|
10
10
|
# http://www.sil.org/iso639-3/download.asp).
|
11
|
-
ISO_639_3_VERSION = '
|
11
|
+
ISO_639_3_VERSION = '20100707'
|
12
12
|
|
13
|
-
# Version of the ISO 639-3 macrolanguage mappings (see
|
13
|
+
# Version of the ISO 639-3 macrolanguage mappings supported (see
|
14
14
|
# http://www.sil.org/iso639-3/download.asp).
|
15
15
|
ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION = '20100128'
|
16
16
|
|
17
17
|
class Language
|
18
|
-
#
|
18
|
+
# ISO 639-3 identifier for the language, or +nil+ if none is defined.
|
19
19
|
attr_reader :identifier
|
20
20
|
|
21
|
-
#
|
22
|
-
# +nil+ if none is defined.
|
21
|
+
# ISO 639-2 identifier of the bibliographic applications code set for
|
22
|
+
# the language, or +nil+ if none is defined.
|
23
23
|
attr_reader :alpha3_bibliographic
|
24
24
|
|
25
|
-
#
|
26
|
-
# +nil+ if none is defined.
|
25
|
+
# ISO 639-2 identifier of the terminology applications code set for the
|
26
|
+
# language, or +nil+ if none is defined.
|
27
27
|
attr_reader :alpha3_terminology
|
28
28
|
|
29
|
-
#
|
29
|
+
# ISO 639-1 identifier for the language, or +nil+ if none is defined.
|
30
30
|
attr_reader :alpha2
|
31
31
|
|
32
|
-
#
|
33
|
-
# <tt>:
|
34
|
-
# <tt>:constructed</tt>, or <tt>:special</tt>. See
|
35
|
-
# for a description
|
32
|
+
# Language type for the language. Language type is either
|
33
|
+
# <tt>:living</tt>, <tt>:extinct</tt>, <tt>:ancient</tt>,
|
34
|
+
# <tt>:historic</tt>, <tt>:constructed</tt>, or <tt>:special</tt>. See
|
35
|
+
# http://www.sil.org/iso639-3/types.asp for a description.
|
36
36
|
attr_reader :language_type
|
37
37
|
|
38
|
-
#
|
38
|
+
# Reference name for the language.
|
39
39
|
attr_reader :reference_name
|
40
40
|
|
41
41
|
def initialize(identifier, part2b, part2t, part1, language_type, ref_name)
|
@@ -56,8 +56,8 @@ module ISOCodes
|
|
56
56
|
@macrolanguage = macrolanguage
|
57
57
|
end
|
58
58
|
|
59
|
-
# Returns the macrolanguage
|
60
|
-
# macrolanguage.
|
59
|
+
# Returns the macrolanguage the language belongs to or +nil+ if not
|
60
|
+
# part of any macrolanguage.
|
61
61
|
def macrolanguage
|
62
62
|
ISOCodes::find_iso_639_3_language(@macrolanguage)
|
63
63
|
end
|
@@ -71,7 +71,8 @@ module ISOCodes
|
|
71
71
|
@individual_languages = individual_languages
|
72
72
|
end
|
73
73
|
|
74
|
-
# Returns an array of individual languages
|
74
|
+
# Returns an array of individual languages or an empty array if no
|
75
|
+
# individual languages are defined.
|
75
76
|
def individual_languages
|
76
77
|
@individual_languages.map { |c| ISOCodes::find_iso_639_3_language(c) }
|
77
78
|
end
|
@@ -79,13 +80,13 @@ module ISOCodes
|
|
79
80
|
|
80
81
|
class << self
|
81
82
|
# Returns an object describing the language identified by the language
|
82
|
-
# code
|
83
|
+
# code +code+.
|
83
84
|
def find_language(code)
|
84
85
|
find_iso_639_3_language(code)
|
85
86
|
end
|
86
87
|
|
87
|
-
# Returns an object describing the language identified by
|
88
|
-
# identifier
|
88
|
+
# Returns an object describing the language identified by the ISO 639-3
|
89
|
+
# identifier +code+.
|
89
90
|
def find_iso_639_3_language(code)
|
90
91
|
if @@iso_639_3.has_key?(code)
|
91
92
|
klass, *rest = @@iso_639_3[code]
|
@@ -98,15 +99,6 @@ module ISOCodes
|
|
98
99
|
|
99
100
|
private
|
100
101
|
|
101
|
-
LANGUAGE_TYPES = {
|
102
|
-
'L' => :living,
|
103
|
-
'E' => :extinct,
|
104
|
-
'A' => :ancient,
|
105
|
-
'H' => :historic,
|
106
|
-
'C' => :constructed,
|
107
|
-
'S' => :special,
|
108
|
-
}.freeze
|
109
|
-
|
110
102
|
DATA_PATH = File.expand_path(File.dirname(__FILE__))
|
111
103
|
|
112
104
|
class << self
|
@@ -114,7 +106,7 @@ module ISOCodes
|
|
114
106
|
File.join(DATA_PATH, filename)
|
115
107
|
end
|
116
108
|
|
117
|
-
def read_data_file(filename, field_count, delimiter
|
109
|
+
def read_data_file(filename, field_count, delimiter, skip_first)
|
118
110
|
Zlib::GzipReader.open(get_data_filename(filename)).each_line do |l|
|
119
111
|
if skip_first
|
120
112
|
skip_first = false
|
@@ -135,16 +127,26 @@ module ISOCodes
|
|
135
127
|
raise ArgumentError, "missing identifier" if identifier.nil?
|
136
128
|
raise ArgumentError, "missing reference name" if ref_name.nil?
|
137
129
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
130
|
+
klass =
|
131
|
+
case scope
|
132
|
+
when 'I': IndividualLanguage
|
133
|
+
when 'M': Macrolanguage
|
134
|
+
when 'S': nil # FIXME
|
135
|
+
else
|
136
|
+
raise ArgumentError, "invalid scope"
|
137
|
+
end
|
138
|
+
|
139
|
+
language_type =
|
140
|
+
case language_type
|
141
|
+
when 'L': :living
|
142
|
+
when 'E': :extinct
|
143
|
+
when 'A': :ancient
|
144
|
+
when 'H': :historic
|
145
|
+
when 'C': :constructed
|
146
|
+
when 'S': :special
|
147
|
+
else
|
148
|
+
raise ArgumentError, "invalid language type"
|
149
|
+
end
|
148
150
|
|
149
151
|
data[identifier] = [klass, nil, nil, identifier, part2b, part2t, part1, language_type, ref_name]
|
150
152
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iso-codes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
+
- 2
|
8
9
|
- 0
|
9
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- "Marius L. J\xC3\xB8hndal"
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
+
date: 2011-01-19 00:00:00 +00:00
|
18
19
|
default_executable:
|
19
20
|
dependencies: []
|
20
21
|
|
@@ -27,45 +28,50 @@ extensions: []
|
|
27
28
|
extra_rdoc_files:
|
28
29
|
- README.rdoc
|
29
30
|
- README.todo
|
31
|
+
- TODO
|
30
32
|
files:
|
31
|
-
- .gitignore
|
32
33
|
- CHANGELOG
|
33
34
|
- README.rdoc
|
34
35
|
- Rakefile
|
35
36
|
- VERSION
|
36
37
|
- iso-codes.gemspec
|
37
38
|
- lib/iso-639-3-macrolanguages_20100128.tab.gz
|
38
|
-
- lib/iso-639-
|
39
|
+
- lib/iso-639-3_20100707.tab.gz
|
39
40
|
- lib/iso_codes.rb
|
40
41
|
- test/test_iso_codes.rb
|
41
42
|
- README.todo
|
43
|
+
- TODO
|
42
44
|
has_rdoc: true
|
43
45
|
homepage: http://github.com/mlj/iso-codes
|
44
46
|
licenses: []
|
45
47
|
|
46
48
|
post_install_message:
|
47
|
-
rdoc_options:
|
48
|
-
|
49
|
+
rdoc_options: []
|
50
|
+
|
49
51
|
require_paths:
|
50
52
|
- lib
|
51
53
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
58
|
+
hash: 3
|
55
59
|
segments:
|
56
60
|
- 0
|
57
61
|
version: "0"
|
58
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
59
64
|
requirements:
|
60
65
|
- - ">="
|
61
66
|
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
62
68
|
segments:
|
63
69
|
- 0
|
64
70
|
version: "0"
|
65
71
|
requirements: []
|
66
72
|
|
67
73
|
rubyforge_project: iso-codes
|
68
|
-
rubygems_version: 1.3.
|
74
|
+
rubygems_version: 1.3.7
|
69
75
|
signing_key:
|
70
76
|
specification_version: 3
|
71
77
|
summary: ISO language and script code support
|
data/.gitignore
DELETED