iso-639 0.2.10 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/iso-639.rb CHANGED
@@ -1,541 +1,58 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # http://www.loc.gov/standards/iso639-2/ascii_8bits.html
3
+ require 'csv'
4
4
 
5
5
  class ISO_639 < Array
6
- # Redefine `[]` to freeze all strings and arrays.
7
- #
8
- # Ruby 2.3+ uses the `frozen_string_literal` magic comment to freeze all
9
- # strings, while previous versions require the `#map` approach.
10
- def self.[](a3_bib, a3_term, a2, english_name, french_name)
11
- if a3_bib.frozen? # then Ruby 2.3+
12
- super(a3_bib, a3_term, a2, english_name, french_name).freeze
13
- else
14
- super(
15
- a3_bib.freeze,
16
- a3_term.freeze,
17
- a2.freeze,
18
- english_name.freeze,
19
- french_name.freeze
20
- ).freeze
21
- end
22
- end
23
-
24
- # The ISO 639-2 dataset as an array of entries. Each entry is an array with
25
- # the following format:
6
+ # Load the ISO 639-2 dataset as an array of entries. Each entry is an array
7
+ # with the following format:
26
8
  # * [0]: an alpha-3 (bibliographic) code
27
9
  # * [1]: an alpha-3 (terminologic) code (when given)
28
10
  # * [2]: an alpha-2 code (when given)
29
11
  # * [3]: an English name
30
12
  # * [4]: a French name of a language
31
- ISO_639_2 = [
32
- self["aar", "", "aa", "Afar", "afar"],
33
- self["abk", "", "ab", "Abkhazian", "abkhaze"],
34
- self["ace", "", "", "Achinese", "aceh"],
35
- self["ach", "", "", "Acoli", "acoli"],
36
- self["ada", "", "", "Adangme", "adangme"],
37
- self["ady", "", "", "Adyghe; Adygei", "adyghé"],
38
- self["afa", "", "", "Afro-Asiatic languages", "afro-asiatiques, langues"],
39
- self["afh", "", "", "Afrihili", "afrihili"],
40
- self["afr", "", "af", "Afrikaans", "afrikaans"],
41
- self["ain", "", "", "Ainu", "aïnou"],
42
- self["aka", "", "ak", "Akan", "akan"],
43
- self["akk", "", "", "Akkadian", "akkadien"],
44
- self["alb", "sqi", "sq", "Albanian", "albanais"],
45
- self["ale", "", "", "Aleut", "aléoute"],
46
- self["alg", "", "", "Algonquian languages", "algonquines, langues"],
47
- self["alt", "", "", "Southern Altai", "altai du Sud"],
48
- self["amh", "", "am", "Amharic", "amharique"],
49
- self["ang", "", "", "English, Old (ca.450-1100)", "anglo-saxon (ca.450-1100)"],
50
- self["anp", "", "", "Angika", "angika"],
51
- self["apa", "", "", "Apache languages", "apaches, langues"],
52
- self["ara", "", "ar", "Arabic", "arabe"],
53
- self["arc", "", "", "Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)", "araméen d'empire (700-300 BCE)"],
54
- self["arg", "", "an", "Aragonese", "aragonais"],
55
- self["arm", "hye", "hy", "Armenian", "arménien"],
56
- self["arn", "", "", "Mapudungun; Mapuche", "mapudungun; mapuche; mapuce"],
57
- self["arp", "", "", "Arapaho", "arapaho"],
58
- self["art", "", "", "Artificial languages", "artificielles, langues"],
59
- self["arw", "", "", "Arawak", "arawak"],
60
- self["asm", "", "as", "Assamese", "assamais"],
61
- self["ast", "", "", "Asturian; Bable; Leonese; Asturleonese", "asturien; bable; léonais; asturoléonais"],
62
- self["ath", "", "", "Athapascan languages", "athapascanes, langues"],
63
- self["aus", "", "", "Australian languages", "australiennes, langues"],
64
- self["ava", "", "av", "Avaric", "avar"],
65
- self["ave", "", "ae", "Avestan", "avestique"],
66
- self["awa", "", "", "Awadhi", "awadhi"],
67
- self["aym", "", "ay", "Aymara", "aymara"],
68
- self["aze", "", "az", "Azerbaijani", "azéri"],
69
- self["bad", "", "", "Banda languages", "banda, langues"],
70
- self["bai", "", "", "Bamileke languages", "bamiléké, langues"],
71
- self["bak", "", "ba", "Bashkir", "bachkir"],
72
- self["bal", "", "", "Baluchi", "baloutchi"],
73
- self["bam", "", "bm", "Bambara", "bambara"],
74
- self["ban", "", "", "Balinese", "balinais"],
75
- self["baq", "eus", "eu", "Basque", "basque"],
76
- self["bas", "", "", "Basa", "basa"],
77
- self["bat", "", "", "Baltic languages", "baltes, langues"],
78
- self["bej", "", "", "Beja; Bedawiyet", "bedja"],
79
- self["bel", "", "be", "Belarusian", "biélorusse"],
80
- self["bem", "", "", "Bemba", "bemba"],
81
- self["ben", "", "bn", "Bengali", "bengali"],
82
- self["ber", "", "", "Berber languages", "berbères, langues"],
83
- self["bho", "", "", "Bhojpuri", "bhojpuri"],
84
- self["bih", "", "bh", "Bihari languages", "langues biharis"],
85
- self["bik", "", "", "Bikol", "bikol"],
86
- self["bin", "", "", "Bini; Edo", "bini; edo"],
87
- self["bis", "", "bi", "Bislama", "bichlamar"],
88
- self["bla", "", "", "Siksika", "blackfoot"],
89
- self["bnt", "", "", "Bantu (Other)", "bantoues, autres langues"],
90
- self["bos", "", "bs", "Bosnian", "bosniaque"],
91
- self["bra", "", "", "Braj", "braj"],
92
- self["bre", "", "br", "Breton", "breton"],
93
- self["btk", "", "", "Batak languages", "batak, langues"],
94
- self["bua", "", "", "Buriat", "bouriate"],
95
- self["bug", "", "", "Buginese", "bugi"],
96
- self["bul", "", "bg", "Bulgarian", "bulgare"],
97
- self["bur", "mya", "my", "Burmese", "birman"],
98
- self["byn", "", "", "Blin; Bilin", "blin; bilen"],
99
- self["cad", "", "", "Caddo", "caddo"],
100
- self["cai", "", "", "Central American Indian languages", "amérindiennes de L'Amérique centrale, langues"],
101
- self["car", "", "", "Galibi Carib", "karib; galibi; carib"],
102
- self["cat", "", "ca", "Catalan; Valencian", "catalan; valencien"],
103
- self["cau", "", "", "Caucasian languages", "caucasiennes, langues"],
104
- self["ceb", "", "", "Cebuano", "cebuano"],
105
- self["cel", "", "", "Celtic languages", "celtiques, langues; celtes, langues"],
106
- self["cha", "", "ch", "Chamorro", "chamorro"],
107
- self["chb", "", "", "Chibcha", "chibcha"],
108
- self["che", "", "ce", "Chechen", "tchétchène"],
109
- self["chg", "", "", "Chagatai", "djaghataï"],
110
- self["chi", "zho", "zh", "Chinese", "chinois"],
111
- self["chk", "", "", "Chuukese", "chuuk"],
112
- self["chm", "", "", "Mari", "mari"],
113
- self["chn", "", "", "Chinook jargon", "chinook, jargon"],
114
- self["cho", "", "", "Choctaw", "choctaw"],
115
- self["chp", "", "", "Chipewyan; Dene Suline", "chipewyan"],
116
- self["chr", "", "", "Cherokee", "cherokee"],
117
- self["chu", "", "cu", "Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic", "slavon d'église; vieux slave; slavon liturgique; vieux bulgare"],
118
- self["chv", "", "cv", "Chuvash", "tchouvache"],
119
- self["chy", "", "", "Cheyenne", "cheyenne"],
120
- self["cmc", "", "", "Chamic languages", "chames, langues"],
121
- self["cop", "", "", "Coptic", "copte"],
122
- self["cor", "", "kw", "Cornish", "cornique"],
123
- self["cos", "", "co", "Corsican", "corse"],
124
- self["cpe", "", "", "Creoles and pidgins, English based", "créoles et pidgins basés sur l'anglais"],
125
- self["cpf", "", "", "Creoles and pidgins, French-based ", "créoles et pidgins basés sur le français"],
126
- self["cpp", "", "", "Creoles and pidgins, Portuguese-based ", "créoles et pidgins basés sur le portugais"],
127
- self["cre", "", "cr", "Cree", "cree"],
128
- self["crh", "", "", "Crimean Tatar; Crimean Turkish", "tatar de Crimé"],
129
- self["crp", "", "", "Creoles and pidgins ", "créoles et pidgins"],
130
- self["csb", "", "", "Kashubian", "kachoube"],
131
- self["cus", "", "", "Cushitic languages", "couchitiques, langues"],
132
- self["cze", "ces", "cs", "Czech", "tchèque"],
133
- self["dak", "", "", "Dakota", "dakota"],
134
- self["dan", "", "da", "Danish", "danois"],
135
- self["dar", "", "", "Dargwa", "dargwa"],
136
- self["day", "", "", "Land Dayak languages", "dayak, langues"],
137
- self["del", "", "", "Delaware", "delaware"],
138
- self["den", "", "", "Slave (Athapascan)", "esclave (athapascan)"],
139
- self["dgr", "", "", "Dogrib", "dogrib"],
140
- self["din", "", "", "Dinka", "dinka"],
141
- self["div", "", "dv", "Divehi; Dhivehi; Maldivian", "maldivien"],
142
- self["doi", "", "", "Dogri", "dogri"],
143
- self["dra", "", "", "Dravidian languages", "dravidiennes, langues"],
144
- self["dsb", "", "", "Lower Sorbian", "bas-sorabe"],
145
- self["dua", "", "", "Duala", "douala"],
146
- self["dum", "", "", "Dutch, Middle (ca.1050-1350)", "néerlandais moyen (ca. 1050-1350)"],
147
- self["dut", "nld", "nl", "Dutch; Flemish", "néerlandais; flamand"],
148
- self["dyu", "", "", "Dyula", "dioula"],
149
- self["dzo", "", "dz", "Dzongkha", "dzongkha"],
150
- self["efi", "", "", "Efik", "efik"],
151
- self["egy", "", "", "Egyptian (Ancient)", "égyptien"],
152
- self["eka", "", "", "Ekajuk", "ekajuk"],
153
- self["elx", "", "", "Elamite", "élamite"],
154
- self["eng", "", "en", "English", "anglais"],
155
- self["enm", "", "", "English, Middle (1100-1500)", "anglais moyen (1100-1500)"],
156
- self["epo", "", "eo", "Esperanto", "espéranto"],
157
- self["est", "", "et", "Estonian", "estonien"],
158
- self["ewe", "", "ee", "Ewe", "éwé"],
159
- self["ewo", "", "", "Ewondo", "éwondo"],
160
- self["fan", "", "", "Fang", "fang"],
161
- self["fao", "", "fo", "Faroese", "féroïen"],
162
- self["fat", "", "", "Fanti", "fanti"],
163
- self["fij", "", "fj", "Fijian", "fidjien"],
164
- self["fil", "", "", "Filipino; Pilipino", "filipino; pilipino"],
165
- self["fin", "", "fi", "Finnish", "finnois"],
166
- self["fiu", "", "", "Finno-Ugrian languages", "finno-ougriennes, langues"],
167
- self["fon", "", "", "Fon", "fon"],
168
- self["fre", "fra", "fr", "French", "français"],
169
- self["frm", "", "", "French, Middle (ca.1400-1600)", "français moyen (1400-1600)"],
170
- self["fro", "", "", "French, Old (842-ca.1400)", "français ancien (842-ca.1400)"],
171
- self["frr", "", "", "Northern Frisian", "frison septentrional"],
172
- self["frs", "", "", "Eastern Frisian", "frison oriental"],
173
- self["fry", "", "fy", "Western Frisian", "frison occidental"],
174
- self["ful", "", "ff", "Fulah", "peul"],
175
- self["fur", "", "", "Friulian", "frioulan"],
176
- self["gaa", "", "", "Ga", "ga"],
177
- self["gay", "", "", "Gayo", "gayo"],
178
- self["gba", "", "", "Gbaya", "gbaya"],
179
- self["gem", "", "", "Germanic languages", "germaniques, langues"],
180
- self["geo", "kat", "ka", "Georgian", "géorgien"],
181
- self["ger", "deu", "de", "German", "allemand"],
182
- self["gez", "", "", "Geez", "guèze"],
183
- self["gil", "", "", "Gilbertese", "kiribati"],
184
- self["gla", "", "gd", "Gaelic; Scottish Gaelic", "gaélique; gaélique écossais"],
185
- self["gle", "", "ga", "Irish", "irlandais"],
186
- self["glg", "", "gl", "Galician", "galicien"],
187
- self["glv", "", "gv", "Manx", "manx; mannois"],
188
- self["gmh", "", "", "German, Middle High (ca.1050-1500)", "allemand, moyen haut (ca. 1050-1500)"],
189
- self["goh", "", "", "German, Old High (ca.750-1050)", "allemand, vieux haut (ca. 750-1050)"],
190
- self["gon", "", "", "Gondi", "gond"],
191
- self["gor", "", "", "Gorontalo", "gorontalo"],
192
- self["got", "", "", "Gothic", "gothique"],
193
- self["grb", "", "", "Grebo", "grebo"],
194
- self["grc", "", "", "Greek, Ancient (to 1453)", "grec ancien (jusqu'à 1453)"],
195
- self["gre", "ell", "el", "Greek, Modern (1453-)", "grec moderne (après 1453)"],
196
- self["grn", "", "gn", "Guarani", "guarani"],
197
- self["gsw", "", "", "Swiss German; Alemannic; Alsatian", "suisse alémanique; alémanique; alsacien"],
198
- self["guj", "", "gu", "Gujarati", "goudjrati"],
199
- self["gwi", "", "", "Gwich'in", "gwich'in"],
200
- self["hai", "", "", "Haida", "haida"],
201
- self["hat", "", "ht", "Haitian; Haitian Creole", "haïtien; créole haïtien"],
202
- self["hau", "", "ha", "Hausa", "haoussa"],
203
- self["haw", "", "", "Hawaiian", "hawaïen"],
204
- self["heb", "", "he", "Hebrew", "hébreu"],
205
- self["her", "", "hz", "Herero", "herero"],
206
- self["hil", "", "", "Hiligaynon", "hiligaynon"],
207
- self["him", "", "", "Himachali languages; Western Pahari languages", "langues himachalis; langues paharis occidentales"],
208
- self["hin", "", "hi", "Hindi", "hindi"],
209
- self["hit", "", "", "Hittite", "hittite"],
210
- self["hmn", "", "", "Hmong", "hmong"],
211
- self["hmo", "", "ho", "Hiri Motu", "hiri motu"],
212
- self["hrv", "", "hr", "Croatian", "croate"],
213
- self["hsb", "", "", "Upper Sorbian", "haut-sorabe"],
214
- self["hun", "", "hu", "Hungarian", "hongrois"],
215
- self["hup", "", "", "Hupa", "hupa"],
216
- self["iba", "", "", "Iban", "iban"],
217
- self["ibo", "", "ig", "Igbo", "igbo"],
218
- self["ice", "isl", "is", "Icelandic", "islandais"],
219
- self["ido", "", "io", "Ido", "ido"],
220
- self["iii", "", "ii", "Sichuan Yi; Nuosu", "yi de Sichuan"],
221
- self["ijo", "", "", "Ijo languages", "ijo, langues"],
222
- self["iku", "", "iu", "Inuktitut", "inuktitut"],
223
- self["ile", "", "ie", "Interlingue; Occidental", "interlingue"],
224
- self["ilo", "", "", "Iloko", "ilocano"],
225
- self["ina", "", "ia", "Interlingua (International Auxiliary Language Association)", "interlingua (langue auxiliaire internationale)"],
226
- self["inc", "", "", "Indic languages", "indo-aryennes, langues"],
227
- self["ind", "", "id", "Indonesian", "indonésien"],
228
- self["ine", "", "", "Indo-European languages", "indo-européennes, langues"],
229
- self["inh", "", "", "Ingush", "ingouche"],
230
- self["ipk", "", "ik", "Inupiaq", "inupiaq"],
231
- self["ira", "", "", "Iranian languages", "iraniennes, langues"],
232
- self["iro", "", "", "Iroquoian languages", "iroquoises, langues"],
233
- self["ita", "", "it", "Italian", "italien"],
234
- self["jav", "", "jv", "Javanese", "javanais"],
235
- self["jbo", "", "", "Lojban", "lojban"],
236
- self["jpn", "", "ja", "Japanese", "japonais"],
237
- self["jpr", "", "", "Judeo-Persian", "judéo-persan"],
238
- self["jrb", "", "", "Judeo-Arabic", "judéo-arabe"],
239
- self["kaa", "", "", "Kara-Kalpak", "karakalpak"],
240
- self["kab", "", "", "Kabyle", "kabyle"],
241
- self["kac", "", "", "Kachin; Jingpho", "kachin; jingpho"],
242
- self["kal", "", "kl", "Kalaallisut; Greenlandic", "groenlandais"],
243
- self["kam", "", "", "Kamba", "kamba"],
244
- self["kan", "", "kn", "Kannada", "kannada"],
245
- self["kar", "", "", "Karen languages", "karen, langues"],
246
- self["kas", "", "ks", "Kashmiri", "kashmiri"],
247
- self["kau", "", "kr", "Kanuri", "kanouri"],
248
- self["kaw", "", "", "Kawi", "kawi"],
249
- self["kaz", "", "kk", "Kazakh", "kazakh"],
250
- self["kbd", "", "", "Kabardian", "kabardien"],
251
- self["kha", "", "", "Khasi", "khasi"],
252
- self["khi", "", "", "Khoisan languages", "khoïsan, langues"],
253
- self["khm", "", "km", "Central Khmer", "khmer central"],
254
- self["kho", "", "", "Khotanese; Sakan", "khotanais; sakan"],
255
- self["kik", "", "ki", "Kikuyu; Gikuyu", "kikuyu"],
256
- self["kin", "", "rw", "Kinyarwanda", "rwanda"],
257
- self["kir", "", "ky", "Kirghiz; Kyrgyz", "kirghiz"],
258
- self["kmb", "", "", "Kimbundu", "kimbundu"],
259
- self["kok", "", "", "Konkani", "konkani"],
260
- self["kom", "", "kv", "Komi", "kom"],
261
- self["kon", "", "kg", "Kongo", "kongo"],
262
- self["kor", "", "ko", "Korean", "coréen"],
263
- self["kos", "", "", "Kosraean", "kosrae"],
264
- self["kpe", "", "", "Kpelle", "kpellé"],
265
- self["krc", "", "", "Karachay-Balkar", "karatchai balkar"],
266
- self["krl", "", "", "Karelian", "carélien"],
267
- self["kro", "", "", "Kru languages", "krou, langues"],
268
- self["kru", "", "", "Kurukh", "kurukh"],
269
- self["kua", "", "kj", "Kuanyama; Kwanyama", "kuanyama; kwanyama"],
270
- self["kum", "", "", "Kumyk", "koumyk"],
271
- self["kur", "", "ku", "Kurdish", "kurde"],
272
- self["kut", "", "", "Kutenai", "kutenai"],
273
- self["lad", "", "", "Ladino", "judéo-espagnol"],
274
- self["lah", "", "", "Lahnda", "lahnda"],
275
- self["lam", "", "", "Lamba", "lamba"],
276
- self["lao", "", "lo", "Lao", "lao"],
277
- self["lat", "", "la", "Latin", "latin"],
278
- self["lav", "", "lv", "Latvian", "letton"],
279
- self["lez", "", "", "Lezghian", "lezghien"],
280
- self["lim", "", "li", "Limburgan; Limburger; Limburgish", "limbourgeois"],
281
- self["lin", "", "ln", "Lingala", "lingala"],
282
- self["lit", "", "lt", "Lithuanian", "lituanien"],
283
- self["lol", "", "", "Mongo", "mongo"],
284
- self["loz", "", "", "Lozi", "lozi"],
285
- self["ltz", "", "lb", "Luxembourgish; Letzeburgesch", "luxembourgeois"],
286
- self["lua", "", "", "Luba-Lulua", "luba-lulua"],
287
- self["lub", "", "lu", "Luba-Katanga", "luba-katanga"],
288
- self["lug", "", "lg", "Ganda", "ganda"],
289
- self["lui", "", "", "Luiseno", "luiseno"],
290
- self["lun", "", "", "Lunda", "lunda"],
291
- self["luo", "", "", "Luo (Kenya and Tanzania)", "luo (Kenya et Tanzanie)"],
292
- self["lus", "", "", "Lushai", "lushai"],
293
- self["mac", "mkd", "mk", "Macedonian", "macédonien"],
294
- self["mad", "", "", "Madurese", "madourais"],
295
- self["mag", "", "", "Magahi", "magahi"],
296
- self["mah", "", "mh", "Marshallese", "marshall"],
297
- self["mai", "", "", "Maithili", "maithili"],
298
- self["mak", "", "", "Makasar", "makassar"],
299
- self["mal", "", "ml", "Malayalam", "malayalam"],
300
- self["man", "", "", "Mandingo", "mandingue"],
301
- self["mao", "mri", "mi", "Maori", "maori"],
302
- self["map", "", "", "Austronesian languages", "austronésiennes, langues"],
303
- self["mar", "", "mr", "Marathi", "marathe"],
304
- self["mas", "", "", "Masai", "massaï"],
305
- self["may", "msa", "ms", "Malay", "malais"],
306
- self["mdf", "", "", "Moksha", "moksa"],
307
- self["mdr", "", "", "Mandar", "mandar"],
308
- self["men", "", "", "Mende", "mendé"],
309
- self["mga", "", "", "Irish, Middle (900-1200)", "irlandais moyen (900-1200)"],
310
- self["mic", "", "", "Mi'kmaq; Micmac", "mi'kmaq; micmac"],
311
- self["min", "", "", "Minangkabau", "minangkabau"],
312
- self["mis", "", "", "Uncoded languages", "langues non codées"],
313
- self["mkh", "", "", "Mon-Khmer languages", "môn-khmer, langues"],
314
- self["mlg", "", "mg", "Malagasy", "malgache"],
315
- self["mlt", "", "mt", "Maltese", "maltais"],
316
- self["mnc", "", "", "Manchu", "mandchou"],
317
- self["mni", "", "", "Manipuri", "manipuri"],
318
- self["mno", "", "", "Manobo languages", "manobo, langues"],
319
- self["moh", "", "", "Mohawk", "mohawk"],
320
- self["mon", "", "mn", "Mongolian", "mongol"],
321
- self["mos", "", "", "Mossi", "moré"],
322
- self["mul", "", "", "Multiple languages", "multilingue"],
323
- self["mun", "", "", "Munda languages", "mounda, langues"],
324
- self["mus", "", "", "Creek", "muskogee"],
325
- self["mwl", "", "", "Mirandese", "mirandais"],
326
- self["mwr", "", "", "Marwari", "marvari"],
327
- self["myn", "", "", "Mayan languages", "maya, langues"],
328
- self["myv", "", "", "Erzya", "erza"],
329
- self["nah", "", "", "Nahuatl languages", "nahuatl, langues"],
330
- self["nai", "", "", "North American Indian languages", "nord-amérindiennes, langues"],
331
- self["nap", "", "", "Neapolitan", "napolitain"],
332
- self["nau", "", "na", "Nauru", "nauruan"],
333
- self["nav", "", "nv", "Navajo; Navaho", "navaho"],
334
- self["nbl", "", "nr", "Ndebele, South; South Ndebele", "ndébélé du Sud"],
335
- self["nde", "", "nd", "Ndebele, North; North Ndebele", "ndébélé du Nord"],
336
- self["ndo", "", "ng", "Ndonga", "ndonga"],
337
- self["nds", "", "", "Low German; Low Saxon; German, Low; Saxon, Low", "bas allemand; bas saxon; allemand, bas; saxon, bas"],
338
- self["nep", "", "ne", "Nepali", "népalais"],
339
- self["new", "", "", "Nepal Bhasa; Newari", "nepal bhasa; newari"],
340
- self["nia", "", "", "Nias", "nias"],
341
- self["nic", "", "", "Niger-Kordofanian languages", "nigéro-kordofaniennes, langues"],
342
- self["niu", "", "", "Niuean", "niué"],
343
- self["nno", "", "nn", "Norwegian Nynorsk; Nynorsk, Norwegian", "norvégien nynorsk; nynorsk, norvégien"],
344
- self["nob", "", "nb", "Bokmål, Norwegian; Norwegian Bokmål", "norvégien bokmål"],
345
- self["nog", "", "", "Nogai", "nogaï; nogay"],
346
- self["non", "", "", "Norse, Old", "norrois, vieux"],
347
- self["nor", "", "no", "Norwegian", "norvégien"],
348
- self["nqo", "", "", "N'Ko", "n'ko"],
349
- self["nso", "", "", "Pedi; Sepedi; Northern Sotho", "pedi; sepedi; sotho du Nord"],
350
- self["nub", "", "", "Nubian languages", "nubiennes, langues"],
351
- self["nwc", "", "", "Classical Newari; Old Newari; Classical Nepal Bhasa", "newari classique"],
352
- self["nya", "", "ny", "Chichewa; Chewa; Nyanja", "chichewa; chewa; nyanja"],
353
- self["nym", "", "", "Nyamwezi", "nyamwezi"],
354
- self["nyn", "", "", "Nyankole", "nyankolé"],
355
- self["nyo", "", "", "Nyoro", "nyoro"],
356
- self["nzi", "", "", "Nzima", "nzema"],
357
- self["oci", "", "oc", "Occitan (post 1500); Provençal", "occitan (après 1500); provençal"],
358
- self["oji", "", "oj", "Ojibwa", "ojibwa"],
359
- self["ori", "", "or", "Oriya", "oriya"],
360
- self["orm", "", "om", "Oromo", "galla"],
361
- self["osa", "", "", "Osage", "osage"],
362
- self["oss", "", "os", "Ossetian; Ossetic", "ossète"],
363
- self["ota", "", "", "Turkish, Ottoman (1500-1928)", "turc ottoman (1500-1928)"],
364
- self["oto", "", "", "Otomian languages", "otomi, langues"],
365
- self["paa", "", "", "Papuan languages", "papoues, langues"],
366
- self["pag", "", "", "Pangasinan", "pangasinan"],
367
- self["pal", "", "", "Pahlavi", "pahlavi"],
368
- self["pam", "", "", "Pampanga; Kapampangan", "pampangan"],
369
- self["pan", "", "pa", "Panjabi; Punjabi", "pendjabi"],
370
- self["pap", "", "", "Papiamento", "papiamento"],
371
- self["pau", "", "", "Palauan", "palau"],
372
- self["peo", "", "", "Persian, Old (ca.600-400 B.C.)", "perse, vieux (ca. 600-400 av. J.-C.)"],
373
- self["per", "fas", "fa", "Persian", "persan"],
374
- self["phi", "", "", "Philippine languages", "philippines, langues"],
375
- self["phn", "", "", "Phoenician", "phénicien"],
376
- self["pli", "", "pi", "Pali", "pali"],
377
- self["pol", "", "pl", "Polish", "polonais"],
378
- self["pon", "", "", "Pohnpeian", "pohnpei"],
379
- self["por", "", "pt", "Portuguese", "portugais"],
380
- self["pra", "", "", "Prakrit languages", "prâkrit, langues"],
381
- self["pro", "", "", "Provençal, Old (to 1500)", "provençal ancien (jusqu'à 1500)"],
382
- self["pus", "", "ps", "Pushto; Pashto", "pachto"],
383
- self["qaa-qtz", "", "", "Reserved for local use", "réservée à l'usage local"],
384
- self["que", "", "qu", "Quechua", "quechua"],
385
- self["raj", "", "", "Rajasthani", "rajasthani"],
386
- self["rap", "", "", "Rapanui", "rapanui"],
387
- self["rar", "", "", "Rarotongan; Cook Islands Maori", "rarotonga; maori des îles Cook"],
388
- self["roa", "", "", "Romance languages", "romanes, langues"],
389
- self["roh", "", "rm", "Romansh", "romanche"],
390
- self["rom", "", "", "Romany", "tsigane"],
391
- self["rum", "ron", "ro", "Romanian; Moldavian; Moldovan", "roumain; moldave"],
392
- self["run", "", "rn", "Rundi", "rundi"],
393
- self["rup", "", "", "Aromanian; Arumanian; Macedo-Romanian", "aroumain; macédo-roumain"],
394
- self["rus", "", "ru", "Russian", "russe"],
395
- self["sad", "", "", "Sandawe", "sandawe"],
396
- self["sag", "", "sg", "Sango", "sango"],
397
- self["sah", "", "", "Yakut", "iakoute"],
398
- self["sai", "", "", "South American Indian (Other)", "indiennes d'Amérique du Sud, autres langues"],
399
- self["sal", "", "", "Salishan languages", "salishennes, langues"],
400
- self["sam", "", "", "Samaritan Aramaic", "samaritain"],
401
- self["san", "", "sa", "Sanskrit", "sanskrit"],
402
- self["sas", "", "", "Sasak", "sasak"],
403
- self["sat", "", "", "Santali", "santal"],
404
- self["scn", "", "", "Sicilian", "sicilien"],
405
- self["sco", "", "", "Scots", "écossais"],
406
- self["sel", "", "", "Selkup", "selkoupe"],
407
- self["sem", "", "", "Semitic languages", "sémitiques, langues"],
408
- self["sga", "", "", "Irish, Old (to 900)", "irlandais ancien (jusqu'à 900)"],
409
- self["sgn", "", "", "Sign Languages", "langues des signes"],
410
- self["shn", "", "", "Shan", "chan"],
411
- self["sid", "", "", "Sidamo", "sidamo"],
412
- self["sin", "", "si", "Sinhala; Sinhalese", "singhalais"],
413
- self["sio", "", "", "Siouan languages", "sioux, langues"],
414
- self["sit", "", "", "Sino-Tibetan languages", "sino-tibétaines, langues"],
415
- self["sla", "", "", "Slavic languages", "slaves, langues"],
416
- self["slo", "slk", "sk", "Slovak", "slovaque"],
417
- self["slv", "", "sl", "Slovenian", "slovène"],
418
- self["sma", "", "", "Southern Sami", "sami du Sud"],
419
- self["sme", "", "se", "Northern Sami", "sami du Nord"],
420
- self["smi", "", "", "Sami languages", "sames, langues"],
421
- self["smj", "", "", "Lule Sami", "sami de Lule"],
422
- self["smn", "", "", "Inari Sami", "sami d'Inari"],
423
- self["smo", "", "sm", "Samoan", "samoan"],
424
- self["sms", "", "", "Skolt Sami", "sami skolt"],
425
- self["sna", "", "sn", "Shona", "shona"],
426
- self["snd", "", "sd", "Sindhi", "sindhi"],
427
- self["snk", "", "", "Soninke", "soninké"],
428
- self["sog", "", "", "Sogdian", "sogdien"],
429
- self["som", "", "so", "Somali", "somali"],
430
- self["son", "", "", "Songhai languages", "songhai, langues"],
431
- self["sot", "", "st", "Sotho, Southern", "sotho du Sud"],
432
- self["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"],
433
- self["srd", "", "sc", "Sardinian", "sarde"],
434
- self["srn", "", "", "Sranan Tongo", "sranan tongo"],
435
- self["srp", "", "sr", "Serbian", "serbe"],
436
- self["srr", "", "", "Serer", "sérère"],
437
- self["ssa", "", "", "Nilo-Saharan languages", "nilo-sahariennes, langues"],
438
- self["ssw", "", "ss", "Swati", "swati"],
439
- self["suk", "", "", "Sukuma", "sukuma"],
440
- self["sun", "", "su", "Sundanese", "soundanais"],
441
- self["sus", "", "", "Susu", "soussou"],
442
- self["sux", "", "", "Sumerian", "sumérien"],
443
- self["swa", "", "sw", "Swahili", "swahili"],
444
- self["swe", "", "sv", "Swedish", "suédois"],
445
- self["syc", "", "", "Classical Syriac", "syriaque classique"],
446
- self["syr", "", "", "Syriac", "syriaque"],
447
- self["tah", "", "ty", "Tahitian", "tahitien"],
448
- self["tai", "", "", "Tai languages", "tai, langues"],
449
- self["tam", "", "ta", "Tamil", "tamoul"],
450
- self["tat", "", "tt", "Tatar", "tatar"],
451
- self["tel", "", "te", "Telugu", "télougou"],
452
- self["tem", "", "", "Timne", "temne"],
453
- self["ter", "", "", "Tereno", "tereno"],
454
- self["tet", "", "", "Tetum", "tetum"],
455
- self["tgk", "", "tg", "Tajik", "tadjik"],
456
- self["tgl", "", "tl", "Tagalog", "tagalog"],
457
- self["tha", "", "th", "Thai", "thaï"],
458
- self["tib", "bod", "bo", "Tibetan", "tibétain"],
459
- self["tig", "", "", "Tigre", "tigré"],
460
- self["tir", "", "ti", "Tigrinya", "tigrigna"],
461
- self["tiv", "", "", "Tiv", "tiv"],
462
- self["tkl", "", "", "Tokelau", "tokelau"],
463
- self["tlh", "", "", "Klingon; tlhIngan-Hol", "klingon"],
464
- self["tli", "", "", "Tlingit", "tlingit"],
465
- self["tmh", "", "", "Tamashek", "tamacheq"],
466
- self["tog", "", "", "Tonga (Nyasa)", "tonga (Nyasa)"],
467
- self["ton", "", "to", "Tonga (Tonga Islands)", "tongan (Îles Tonga)"],
468
- self["tpi", "", "", "Tok Pisin", "tok pisin"],
469
- self["tsi", "", "", "Tsimshian", "tsimshian"],
470
- self["tsn", "", "tn", "Tswana", "tswana"],
471
- self["tso", "", "ts", "Tsonga", "tsonga"],
472
- self["tuk", "", "tk", "Turkmen", "turkmène"],
473
- self["tum", "", "", "Tumbuka", "tumbuka"],
474
- self["tup", "", "", "Tupi languages", "tupi, langues"],
475
- self["tur", "", "tr", "Turkish", "turc"],
476
- self["tut", "", "", "Altaic languages", "altaïques, langues"],
477
- self["tvl", "", "", "Tuvalu", "tuvalu"],
478
- self["twi", "", "tw", "Twi", "twi"],
479
- self["tyv", "", "", "Tuvinian", "touva"],
480
- self["udm", "", "", "Udmurt", "oudmourte"],
481
- self["uga", "", "", "Ugaritic", "ougaritique"],
482
- self["uig", "", "ug", "Uighur; Uyghur", "ouïgour"],
483
- self["ukr", "", "uk", "Ukrainian", "ukrainien"],
484
- self["umb", "", "", "Umbundu", "umbundu"],
485
- self["und", "", "", "Undetermined", "indéterminée"],
486
- self["urd", "", "ur", "Urdu", "ourdou"],
487
- self["uzb", "", "uz", "Uzbek", "ouszbek"],
488
- self["vai", "", "", "Vai", "vaï"],
489
- self["ven", "", "ve", "Venda", "venda"],
490
- self["vie", "", "vi", "Vietnamese", "vietnamien"],
491
- self["vol", "", "vo", "Volapük", "volapük"],
492
- self["vot", "", "", "Votic", "vote"],
493
- self["wak", "", "", "Wakashan languages", "wakashanes, langues"],
494
- self["wal", "", "", "Walamo", "walamo"],
495
- self["war", "", "", "Waray", "waray"],
496
- self["was", "", "", "Washo", "washo"],
497
- self["wel", "cym", "cy", "Welsh", "gallois"],
498
- self["wen", "", "", "Sorbian languages", "sorabes, langues"],
499
- self["wln", "", "wa", "Walloon", "wallon"],
500
- self["wol", "", "wo", "Wolof", "wolof"],
501
- self["xal", "", "", "Kalmyk; Oirat", "kalmouk; oïrat"],
502
- self["xho", "", "xh", "Xhosa", "xhosa"],
503
- self["yao", "", "", "Yao", "yao"],
504
- self["yap", "", "", "Yapese", "yapois"],
505
- self["yid", "", "yi", "Yiddish", "yiddish"],
506
- self["yor", "", "yo", "Yoruba", "yoruba"],
507
- self["ypk", "", "", "Yupik languages", "yupik, langues"],
508
- self["zap", "", "", "Zapotec", "zapotèque"],
509
- self["zbl", "", "", "Blissymbols; Blissymbolics; Bliss", "symboles Bliss; Bliss"],
510
- self["zen", "", "", "Zenaga", "zenaga"],
511
- self["zha", "", "za", "Zhuang; Chuang", "zhuang; chuang"],
512
- self["znd", "", "", "Zande languages", "zandé, langues"],
513
- self["zul", "", "zu", "Zulu", "zoulou"],
514
- self["zun", "", "", "Zuni", "zuni"],
515
- self["zxx", "", "", "No linguistic content; Not applicable", "pas de contenu linguistique; non applicable"],
516
- self["zza", "", "", "Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki", "zaza; dimili; dimli; kirdki; kirmanjki; zazaki"]
517
- ].freeze
13
+ #
14
+ # Dataset Source:
15
+ # https://www.loc.gov/standards/iso639-2/ascii_8bits.html
16
+ # https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
17
+ ISO_639_2 = lambda do
18
+ dataset = []
19
+
20
+ File.open(
21
+ File.join(File.dirname(__FILE__), 'data', 'ISO-639-2_utf-8.txt'),
22
+ 'r:bom|utf-8'
23
+ ) do |file|
24
+ CSV.new(file, **{ col_sep: '|' }).each do |row|
25
+ dataset << self[*row.map { |v| v || '' }].freeze
26
+ end
27
+ end
28
+
29
+ dataset
30
+ end.call.freeze
518
31
 
519
32
  # An inverted index generated from the ISO_639_2 data. Used for searching
520
33
  # all words and codes in all fields.
521
34
  INVERTED_INDEX = lambda do
522
35
  index = {}
36
+
523
37
  ISO_639_2.each_with_index do |record, i|
524
38
  record.each do |field|
525
39
  downcased = field.downcase
40
+
526
41
  words = (
527
42
  downcased.split(/[[:blank:]]|\(|\)|,|;/) +
528
43
  downcased.split(/;/)
529
44
  )
45
+
530
46
  words.each do |word|
531
47
  unless word.empty?
532
48
  index[word] ||= []
533
- index[word] << i
49
+ index[word] << i
534
50
  end
535
51
  end
536
52
  end
537
53
  end
538
- return index
54
+
55
+ index
539
56
  end.call.freeze
540
57
 
541
58
  # The ISO 639-1 dataset as an array of entries. Each entry is an array with
@@ -555,7 +72,7 @@ class ISO_639 < Array
555
72
  return if code.nil?
556
73
 
557
74
  case code.length
558
- when 3
75
+ when 3, 7
559
76
  ISO_639_2.detect do |entry|
560
77
  entry if [entry.alpha3, entry.alpha3_terminologic].include?(code)
561
78
  end
@@ -565,6 +82,7 @@ class ISO_639 < Array
565
82
  end
566
83
  end
567
84
  end
85
+
568
86
  alias_method :find, :find_by_code
569
87
 
570
88
  # Returns the entry array for a language specified by its English name.
@@ -585,9 +103,11 @@ class ISO_639 < Array
585
103
  # of any kind, or it can be one of the words contained in the English or
586
104
  # French name field.
587
105
  def search(term)
588
- term ||= ''
106
+ term ||= ''
107
+
589
108
  normalized_term = term.downcase.strip
590
109
  indexes = INVERTED_INDEX[normalized_term]
110
+
591
111
  indexes ? ISO_639_2.values_at(*indexes).uniq : []
592
112
  end
593
113
  end
@@ -596,6 +116,7 @@ class ISO_639 < Array
596
116
  def alpha3_bibliographic
597
117
  self[0]
598
118
  end
119
+
599
120
  alias_method :alpha3, :alpha3_bibliographic
600
121
 
601
122
  # The entry's alpha-3 terminologic (when given)