iso-codes 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -2
- data/TODO +49 -0
- data/VERSION +1 -1
- data/lib/{iso-639-3_20100330.tab.gz → iso-639-3_20100707.tab.gz} +0 -0
- data/lib/iso_codes.rb +43 -41
- metadata +14 -8
- data/.gitignore +0 -2
data/README.rdoc
CHANGED
@@ -68,6 +68,6 @@ For an individual language the macrolanguage, if any, can be found.
|
|
68
68
|
If you feel like implementing any of this, please fork the project on
|
69
69
|
github.
|
70
70
|
|
71
|
-
|
71
|
+
== Copyright
|
72
72
|
|
73
|
-
Copyright (c) 2010 Marius L. Jøhndal.
|
73
|
+
Copyright (c) 2010, 2011 Marius L. Jøhndal.
|
data/TODO
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
= iso-codes
|
2
|
+
|
3
|
+
These files may be used to download the list of language codes with their language names, for example into a database. To read the files, please note that one line of text contains one entry. An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given), an alpha-2 code (when given), an English name, and a French name of a language are all separated by pipe (|) characters. If one of these elements is not applicable to the entry, the field is left empty, i.e., a pipe (|) character immediately follows the preceding entry. The Line terminator is the LF character.
|
4
|
+
|
5
|
+
A comprehensive database of ISO codes.
|
6
|
+
|
7
|
+
|
8
|
+
While most languages are given one code by the standard, twenty of the languages described have two three-letter codes, a "bibliographic" code (ISO 639-2/B), which is derived from the English name for the language and was a necessary legacy feature, and a "terminological" code (ISO 639-2/T), which is derived from the native name for the language. Each of these twenty languages is also included in the ISO 639-1 standard. (There were 22 B codes; scc and scr are now deprecated.)
|
9
|
+
|
10
|
+
In addition, there are codes for special situations:
|
11
|
+
|
12
|
+
* mis is listed as "uncoded languages"
|
13
|
+
* mul (for multiple languages) is applied when several languages are used and it is not practical to specify all the appropriate language codes
|
14
|
+
* The interval from qaa to qtz is reserved and is not used in the standard
|
15
|
+
* und (for undetermined) is used in situations in which a language or languages must be indicated but the language cannot be identified.
|
16
|
+
* zxx is listed in the code list as "no linguistic content" (added 2006-01-11)
|
17
|
+
|
18
|
+
Some ISO 639-2 codes that are commonly used for languages do not precisely represent a particular language or some related languages (as the above macrolanguages). They are regarded as collective languages (or collectives) and are excluded from ISO 639-3.
|
19
|
+
|
20
|
+
|
21
|
+
CREATE TABLE ISO_639-3_Names (
|
22
|
+
Id char(3) NOT NULL, -- The three-letter 639-3 identifier
|
23
|
+
Print_Name varchar(75) NOT NULL, -- One of the names associated with this identifier
|
24
|
+
Inverted_Name varchar(75) NOT NULL) -- The inverted form of this Print_Name form
|
25
|
+
|
26
|
+
CREATE TABLE ISO_639-3_Macrolanguages (
|
27
|
+
M_Id char(3) NOT NULL, -- The identifier for a macrolanguage
|
28
|
+
I_Id char(3) NOT NULL, -- The identifier for an individual language
|
29
|
+
-- that is a member of the macrolanguage
|
30
|
+
I_Status char(1) NOT NULL) -- A (active) or R (retired) indicating the
|
31
|
+
-- status of the individual code element
|
32
|
+
|
33
|
+
* countries and regions:
|
34
|
+
|
35
|
+
- ISO 3166-1 Codes for the representation of names of countries and their subdivisions – Part 1: Country codes,
|
36
|
+
ISO 3166-2 Codes for the representation of names of countries and their subdivisions – Part 2: Country subdivision codes,
|
37
|
+
ISO 3166-3 Codes for the representation of names of countries and their subdivisions – Part 3: Code for formerly used names of countries
|
38
|
+
|
39
|
+
Maintenance agency: http://www.iso.org/iso/country_codes
|
40
|
+
Last update: 2008-04-25 (http://www.iso.org/iso/country_codes/check_what_s_new.htm)
|
41
|
+
|
42
|
+
* currencies:
|
43
|
+
|
44
|
+
- ISO 4217:2008 Codes for the representation of currencies and funds
|
45
|
+
ISO 4217:2001 Codes for the representation of currencies and funds
|
46
|
+
|
47
|
+
Maintenance agency: http://www.bsi-global.com/en/Standards-and-Publications/Industry-Sectors/Services/BSI-Currency-Code-Service/
|
48
|
+
|
49
|
+
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
Binary file
|
data/lib/iso_codes.rb
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
#
|
2
2
|
# iso_codes.rb - A comprehensive database of ISO codes
|
3
3
|
#
|
4
|
-
# Written by Marius L. Jøhndal, 2008, 2010.
|
4
|
+
# Written by Marius L. Jøhndal, 2008, 2010, 2011.
|
5
5
|
#
|
6
6
|
require 'zlib'
|
7
7
|
|
8
8
|
module ISOCodes
|
9
|
-
# Version of the ISO 639-3 code set (see
|
9
|
+
# Version of the ISO 639-3 code set supported (see
|
10
10
|
# http://www.sil.org/iso639-3/download.asp).
|
11
|
-
ISO_639_3_VERSION = '
|
11
|
+
ISO_639_3_VERSION = '20100707'
|
12
12
|
|
13
|
-
# Version of the ISO 639-3 macrolanguage mappings (see
|
13
|
+
# Version of the ISO 639-3 macrolanguage mappings supported (see
|
14
14
|
# http://www.sil.org/iso639-3/download.asp).
|
15
15
|
ISO_639_3_MACROLANGUAGE_MAPPINGS_VERSION = '20100128'
|
16
16
|
|
17
17
|
class Language
|
18
|
-
#
|
18
|
+
# ISO 639-3 identifier for the language, or +nil+ if none is defined.
|
19
19
|
attr_reader :identifier
|
20
20
|
|
21
|
-
#
|
22
|
-
# +nil+ if none is defined.
|
21
|
+
# ISO 639-2 identifier of the bibliographic applications code set for
|
22
|
+
# the language, or +nil+ if none is defined.
|
23
23
|
attr_reader :alpha3_bibliographic
|
24
24
|
|
25
|
-
#
|
26
|
-
# +nil+ if none is defined.
|
25
|
+
# ISO 639-2 identifier of the terminology applications code set for the
|
26
|
+
# language, or +nil+ if none is defined.
|
27
27
|
attr_reader :alpha3_terminology
|
28
28
|
|
29
|
-
#
|
29
|
+
# ISO 639-1 identifier for the language, or +nil+ if none is defined.
|
30
30
|
attr_reader :alpha2
|
31
31
|
|
32
|
-
#
|
33
|
-
# <tt>:
|
34
|
-
# <tt>:constructed</tt>, or <tt>:special</tt>. See
|
35
|
-
# for a description
|
32
|
+
# Language type for the language. Language type is either
|
33
|
+
# <tt>:living</tt>, <tt>:extinct</tt>, <tt>:ancient</tt>,
|
34
|
+
# <tt>:historic</tt>, <tt>:constructed</tt>, or <tt>:special</tt>. See
|
35
|
+
# http://www.sil.org/iso639-3/types.asp for a description.
|
36
36
|
attr_reader :language_type
|
37
37
|
|
38
|
-
#
|
38
|
+
# Reference name for the language.
|
39
39
|
attr_reader :reference_name
|
40
40
|
|
41
41
|
def initialize(identifier, part2b, part2t, part1, language_type, ref_name)
|
@@ -56,8 +56,8 @@ module ISOCodes
|
|
56
56
|
@macrolanguage = macrolanguage
|
57
57
|
end
|
58
58
|
|
59
|
-
# Returns the macrolanguage
|
60
|
-
# macrolanguage.
|
59
|
+
# Returns the macrolanguage the language belongs to or +nil+ if not
|
60
|
+
# part of any macrolanguage.
|
61
61
|
def macrolanguage
|
62
62
|
ISOCodes::find_iso_639_3_language(@macrolanguage)
|
63
63
|
end
|
@@ -71,7 +71,8 @@ module ISOCodes
|
|
71
71
|
@individual_languages = individual_languages
|
72
72
|
end
|
73
73
|
|
74
|
-
# Returns an array of individual languages
|
74
|
+
# Returns an array of individual languages or an empty array if no
|
75
|
+
# individual languages are defined.
|
75
76
|
def individual_languages
|
76
77
|
@individual_languages.map { |c| ISOCodes::find_iso_639_3_language(c) }
|
77
78
|
end
|
@@ -79,13 +80,13 @@ module ISOCodes
|
|
79
80
|
|
80
81
|
class << self
|
81
82
|
# Returns an object describing the language identified by the language
|
82
|
-
# code
|
83
|
+
# code +code+.
|
83
84
|
def find_language(code)
|
84
85
|
find_iso_639_3_language(code)
|
85
86
|
end
|
86
87
|
|
87
|
-
# Returns an object describing the language identified by
|
88
|
-
# identifier
|
88
|
+
# Returns an object describing the language identified by the ISO 639-3
|
89
|
+
# identifier +code+.
|
89
90
|
def find_iso_639_3_language(code)
|
90
91
|
if @@iso_639_3.has_key?(code)
|
91
92
|
klass, *rest = @@iso_639_3[code]
|
@@ -98,15 +99,6 @@ module ISOCodes
|
|
98
99
|
|
99
100
|
private
|
100
101
|
|
101
|
-
LANGUAGE_TYPES = {
|
102
|
-
'L' => :living,
|
103
|
-
'E' => :extinct,
|
104
|
-
'A' => :ancient,
|
105
|
-
'H' => :historic,
|
106
|
-
'C' => :constructed,
|
107
|
-
'S' => :special,
|
108
|
-
}.freeze
|
109
|
-
|
110
102
|
DATA_PATH = File.expand_path(File.dirname(__FILE__))
|
111
103
|
|
112
104
|
class << self
|
@@ -114,7 +106,7 @@ module ISOCodes
|
|
114
106
|
File.join(DATA_PATH, filename)
|
115
107
|
end
|
116
108
|
|
117
|
-
def read_data_file(filename, field_count, delimiter
|
109
|
+
def read_data_file(filename, field_count, delimiter, skip_first)
|
118
110
|
Zlib::GzipReader.open(get_data_filename(filename)).each_line do |l|
|
119
111
|
if skip_first
|
120
112
|
skip_first = false
|
@@ -135,16 +127,26 @@ module ISOCodes
|
|
135
127
|
raise ArgumentError, "missing identifier" if identifier.nil?
|
136
128
|
raise ArgumentError, "missing reference name" if ref_name.nil?
|
137
129
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
130
|
+
klass =
|
131
|
+
case scope
|
132
|
+
when 'I': IndividualLanguage
|
133
|
+
when 'M': Macrolanguage
|
134
|
+
when 'S': nil # FIXME
|
135
|
+
else
|
136
|
+
raise ArgumentError, "invalid scope"
|
137
|
+
end
|
138
|
+
|
139
|
+
language_type =
|
140
|
+
case language_type
|
141
|
+
when 'L': :living
|
142
|
+
when 'E': :extinct
|
143
|
+
when 'A': :ancient
|
144
|
+
when 'H': :historic
|
145
|
+
when 'C': :constructed
|
146
|
+
when 'S': :special
|
147
|
+
else
|
148
|
+
raise ArgumentError, "invalid language type"
|
149
|
+
end
|
148
150
|
|
149
151
|
data[identifier] = [klass, nil, nil, identifier, part2b, part2t, part1, language_type, ref_name]
|
150
152
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iso-codes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
+
- 2
|
8
9
|
- 0
|
9
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- "Marius L. J\xC3\xB8hndal"
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
+
date: 2011-01-19 00:00:00 +00:00
|
18
19
|
default_executable:
|
19
20
|
dependencies: []
|
20
21
|
|
@@ -27,45 +28,50 @@ extensions: []
|
|
27
28
|
extra_rdoc_files:
|
28
29
|
- README.rdoc
|
29
30
|
- README.todo
|
31
|
+
- TODO
|
30
32
|
files:
|
31
|
-
- .gitignore
|
32
33
|
- CHANGELOG
|
33
34
|
- README.rdoc
|
34
35
|
- Rakefile
|
35
36
|
- VERSION
|
36
37
|
- iso-codes.gemspec
|
37
38
|
- lib/iso-639-3-macrolanguages_20100128.tab.gz
|
38
|
-
- lib/iso-639-
|
39
|
+
- lib/iso-639-3_20100707.tab.gz
|
39
40
|
- lib/iso_codes.rb
|
40
41
|
- test/test_iso_codes.rb
|
41
42
|
- README.todo
|
43
|
+
- TODO
|
42
44
|
has_rdoc: true
|
43
45
|
homepage: http://github.com/mlj/iso-codes
|
44
46
|
licenses: []
|
45
47
|
|
46
48
|
post_install_message:
|
47
|
-
rdoc_options:
|
48
|
-
|
49
|
+
rdoc_options: []
|
50
|
+
|
49
51
|
require_paths:
|
50
52
|
- lib
|
51
53
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
58
|
+
hash: 3
|
55
59
|
segments:
|
56
60
|
- 0
|
57
61
|
version: "0"
|
58
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
59
64
|
requirements:
|
60
65
|
- - ">="
|
61
66
|
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
62
68
|
segments:
|
63
69
|
- 0
|
64
70
|
version: "0"
|
65
71
|
requirements: []
|
66
72
|
|
67
73
|
rubyforge_project: iso-codes
|
68
|
-
rubygems_version: 1.3.
|
74
|
+
rubygems_version: 1.3.7
|
69
75
|
signing_key:
|
70
76
|
specification_version: 3
|
71
77
|
summary: ISO language and script code support
|
data/.gitignore
DELETED