Linguistics 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # linguistics/iso639.rb - A hash of International 2- and 3-letter
4
+ # ISO639-1 and ISO639-2 language codes. Each entry has two keys:
5
+ #
6
+ # [<tt>:codes</tt>]
7
+ # All of the codes known for this language
8
+ # [<tt>:desc</tt>]
9
+ # The English-language description of the language.
10
+ #
11
+
12
+ ### A language-independent framework for adding linguistics functions to Ruby
13
+ ### classes.
14
+ module Linguistics
15
+
16
+ # Hash of ISO639 2- and 3-letter language codes
17
+ LanguageCodes = {}
18
+
19
+ # Read through the source for this file, capturing everything
20
+ # between __END__ and __END_DATA__ tokens.
21
+ inDataSection = false
22
+ File::readlines( __FILE__ ).each {|line|
23
+ case line
24
+ when /^__END_DATA__$/
25
+ inDataSection = false
26
+ false
27
+
28
+ when /^__END__$/
29
+ inDataSection = true
30
+ false
31
+
32
+ else
33
+ if inDataSection
34
+ codes, desc = line[0,15].split(%r{/|\s+}), line[15...-1]
35
+ codes.delete_if {|code| code.empty?}
36
+ entry = {
37
+ :desc => desc.strip,
38
+ :codes => codes.dup,
39
+ }
40
+ codes.each {|code|
41
+ raise "Duplicate language code #{code}:"\
42
+ "(#{LanguageCodes[code][:desc]}})}" \
43
+ if LanguageCodes.key?( code )
44
+ LanguageCodes[ code.strip ] = entry
45
+ }
46
+ end
47
+ end
48
+ }
49
+ end
50
+
51
+ __END__
52
+ abk ab Abkhazian
53
+ ace Achinese
54
+ ach Acoli
55
+ ada Adangme
56
+ aar aa Afar
57
+ afh Afrihili
58
+ afr af Afrikaans
59
+ afa Afro-Asiatic (Other)
60
+ aka Akan
61
+ akk Akkadian
62
+ alb/sqi sq Albanian
63
+ ale Aleut
64
+ alg Algonquian languages
65
+ tut Altaic (Other)
66
+ amh am Amharic
67
+ apa Apache languages
68
+ ara ar Arabic
69
+ arc Aramaic
70
+ arp Arapaho
71
+ arn Araucanian
72
+ arw Arawak
73
+ arm/hye hy Armenian
74
+ art Artificial (Other)
75
+ asm as Assamese
76
+ ath Athapascan languages
77
+ map Austronesian (Other)
78
+ ava Avaric
79
+ ave Avestan
80
+ awa Awadhi
81
+ aym ay Aymara
82
+ aze az Azerbaijani
83
+ nah Aztec
84
+ ban Balinese
85
+ bat Baltic (Other)
86
+ bal Baluchi
87
+ bam Bambara
88
+ bai Bamileke languages
89
+ bad Banda
90
+ bnt Bantu (Other)
91
+ bas Basa
92
+ bak ba Bashkir
93
+ baq/eus eu Basque
94
+ bej Beja
95
+ bem Bemba
96
+ ben bn Bengali
97
+ ber Berber (Other)
98
+ bho Bhojpuri
99
+ bih bh Bihari
100
+ bik Bikol
101
+ bin Bini
102
+ bis bi Bislama
103
+ bra Braj
104
+ bre br Breton
105
+ bug Buginese
106
+ bul bg Bulgarian
107
+ bua Buriat
108
+ bur/mya my Burmese
109
+ bel be Byelorussian
110
+ cad Caddo
111
+ car Carib
112
+ cat ca Catalan
113
+ cau Caucasian (Other)
114
+ ceb Cebuano
115
+ cel Celtic (Other)
116
+ cai Central American Indian (Other)
117
+ chg Chagatai
118
+ cha Chamorro
119
+ che Chechen
120
+ chr Cherokee
121
+ chy Cheyenne
122
+ chb Chibcha
123
+ chi/zho zh Chinese
124
+ chn Chinook jargon
125
+ cho Choctaw
126
+ chu Church Slavic
127
+ chv Chuvash
128
+ cop Coptic
129
+ cor Cornish
130
+ cos co Corsican
131
+ cre Cree
132
+ mus Creek
133
+ crp Creoles and Pidgins (Other)
134
+ cpe Creoles and Pidgins, English-based (Other)
135
+ cpf Creoles and Pidgins, French-based (Other)
136
+ cpp Creoles and Pidgins, Portuguese-based (Other)
137
+ cus Cushitic (Other)
138
+ hr Croatian
139
+ ces/cze cs Czech
140
+ dak Dakota
141
+ dan da Danish
142
+ del Delaware
143
+ din Dinka
144
+ div Divehi
145
+ doi Dogri
146
+ dra Dravidian (Other)
147
+ dua Duala
148
+ dut/nla nl Dutch
149
+ dum Dutch, Middle (ca. 1050-1350)
150
+ dyu Dyula
151
+ dzo dz Dzongkha
152
+ efi Efik
153
+ egy Egyptian (Ancient)
154
+ eka Ekajuk
155
+ elx Elamite
156
+ eng en English
157
+ enm English, Middle (ca. 1100-1500)
158
+ ang English, Old (ca. 450-1100)
159
+ esk Eskimo (Other)
160
+ epo eo Esperanto
161
+ est et Estonian
162
+ ewe Ewe
163
+ ewo Ewondo
164
+ fan Fang
165
+ fat Fanti
166
+ fao fo Faroese
167
+ fij fj Fijian
168
+ fin fi Finnish
169
+ fiu Finno-Ugrian (Other)
170
+ fon Fon
171
+ fra/fre fr French
172
+ frm French, Middle (ca. 1400-1600)
173
+ fro French, Old (842- ca. 1400)
174
+ fry fy Frisian
175
+ ful Fulah
176
+ gaa Ga
177
+ gae/gdh Gaelic (Scots)
178
+ glg gl Gallegan
179
+ lug Ganda
180
+ gay Gayo
181
+ gez Geez
182
+ geo/kat ka Georgian
183
+ deu/ger de German
184
+ gmh German, Middle High (ca. 1050-1500)
185
+ goh German, Old High (ca. 750-1050)
186
+ gem Germanic (Other)
187
+ gil Gilbertese
188
+ gon Gondi
189
+ got Gothic
190
+ grb Grebo
191
+ grc Greek, Ancient (to 1453)
192
+ ell/gre el Greek, Modern (1453-)
193
+ kal kl Greenlandic
194
+ grn gn Guarani
195
+ guj gu Gujarati
196
+ hai Haida
197
+ hau ha Hausa
198
+ haw Hawaiian
199
+ heb he Hebrew
200
+ her Herero
201
+ hil Hiligaynon
202
+ him Himachali
203
+ hin hi Hindi
204
+ hmo Hiri Motu
205
+ hun hu Hungarian
206
+ hup Hupa
207
+ iba Iban
208
+ ice/isl is Icelandic
209
+ ibo Igbo
210
+ ijo Ijo
211
+ ilo Iloko
212
+ inc Indic (Other)
213
+ ine Indo-European (Other)
214
+ ind id Indonesian
215
+ ina ia Interlingua (International Auxiliary language Association)
216
+ ile Interlingue
217
+ iku iu Inuktitut
218
+ ipk ik Inupiak
219
+ ira Iranian (Other)
220
+ gai/iri ga Irish
221
+ sga Irish, Old (to 900)
222
+ mga Irish, Middle (900 - 1200)
223
+ iro Iroquoian languages
224
+ ita it Italian
225
+ jpn ja Japanese
226
+ jav/jaw jv/jw Javanese
227
+ jrb Judeo-Arabic
228
+ jpr Judeo-Persian
229
+ kab Kabyle
230
+ kac Kachin
231
+ kam Kamba
232
+ kan kn Kannada
233
+ kau Kanuri
234
+ kaa Kara-Kalpak
235
+ kar Karen
236
+ kas ks Kashmiri
237
+ kaw Kawi
238
+ kaz kk Kazakh
239
+ kha Khasi
240
+ khm km Khmer
241
+ khi Khoisan (Other)
242
+ kho Khotanese
243
+ kik Kikuyu
244
+ kin rw Kinyarwanda
245
+ kir ky Kirghiz
246
+ kom Komi
247
+ kon Kongo
248
+ kok Konkani
249
+ kor ko Korean
250
+ kpe Kpelle
251
+ kro Kru
252
+ kua Kuanyama
253
+ kum Kumyk
254
+ kur ku Kurdish
255
+ kru Kurukh
256
+ kus Kusaie
257
+ kut Kutenai
258
+ lad Ladino
259
+ lah Lahnda
260
+ lam Lamba
261
+ oci oc Langue d'Oc (post 1500)
262
+ lao lo Lao
263
+ lat la Latin
264
+ lav lv Latvian
265
+ ltz Letzeburgesch
266
+ lez Lezghian
267
+ lin ln Lingala
268
+ lit lt Lithuanian
269
+ loz Lozi
270
+ lub Luba-Katanga
271
+ lui Luiseno
272
+ lun Lunda
273
+ luo Luo (Kenya and Tanzania)
274
+ mac/mke mk Macedonian
275
+ mad Madurese
276
+ mag Magahi
277
+ mai Maithili
278
+ mak Makasar
279
+ mlg mg Malagasy
280
+ may/msa ms Malay
281
+ mal Malayalam
282
+ mlt ml Maltese
283
+ man Mandingo
284
+ mni Manipuri
285
+ mno Manobo languages
286
+ max Manx
287
+ mao/mri mi Maori
288
+ mar mr Marathi
289
+ chm Mari
290
+ mah Marshall
291
+ mwr Marwari
292
+ mas Masai
293
+ myn Mayan languages
294
+ men Mende
295
+ mic Micmac
296
+ min Minangkabau
297
+ mis Miscellaneous (Other)
298
+ moh Mohawk
299
+ mol mo Moldavian
300
+ mkh Mon-Kmer (Other)
301
+ lol Mongo
302
+ mon mn Mongolian
303
+ mos Mossi
304
+ mul Multiple languages
305
+ mun Munda languages
306
+ nau na Nauru
307
+ nav Navajo
308
+ nde Ndebele, North
309
+ nbl Ndebele, South
310
+ ndo Ndongo
311
+ nep ne Nepali
312
+ new Newari
313
+ nic Niger-Kordofanian (Other)
314
+ ssa Nilo-Saharan (Other)
315
+ niu Niuean
316
+ non Norse, Old
317
+ nai North American Indian (Other)
318
+ nor no Norwegian
319
+ nno Norwegian (Nynorsk)
320
+ nub Nubian languages
321
+ nym Nyamwezi
322
+ nya Nyanja
323
+ nyn Nyankole
324
+ nyo Nyoro
325
+ nzi Nzima
326
+ oji Ojibwa
327
+ ori or Oriya
328
+ orm om Oromo
329
+ osa Osage
330
+ oss Ossetic
331
+ oto Otomian languages
332
+ pal Pahlavi
333
+ pau Palauan
334
+ pli Pali
335
+ pam Pampanga
336
+ pag Pangasinan
337
+ pan pa Panjabi
338
+ pap Papiamento
339
+ paa Papuan-Australian (Other)
340
+ fas/per fa Persian
341
+ peo Persian, Old (ca 600 - 400 B.C.)
342
+ phn Phoenician
343
+ pol pl Polish
344
+ pon Ponape
345
+ por pt Portuguese
346
+ pra Prakrit languages
347
+ pro Provencal, Old (to 1500)
348
+ pus ps Pushto
349
+ que qu Quechua
350
+ roh rm Rhaeto-Romance
351
+ raj Rajasthani
352
+ rar Rarotongan
353
+ roa Romance (Other)
354
+ ron/rum ro Romanian
355
+ rom Romany
356
+ run rn Rundi
357
+ rus ru Russian
358
+ sal Salishan languages
359
+ sam Samaritan Aramaic
360
+ smi Sami languages
361
+ smo sm Samoan
362
+ sad Sandawe
363
+ sag sg Sango
364
+ san sa Sanskrit
365
+ srd Sardinian
366
+ sco Scots
367
+ sel Selkup
368
+ sem Semitic (Other)
369
+ sr Serbian
370
+ scr sh Serbo-Croatian
371
+ srr Serer
372
+ shn Shan
373
+ sna sn Shona
374
+ sid Sidamo
375
+ bla Siksika
376
+ snd sd Sindhi
377
+ sin si Singhalese
378
+ sit Sino-Tibetan (Other)
379
+ sio Siouan languages
380
+ sla Slavic (Other)
381
+ ss Siswati
382
+ slk/slo sk Slovak
383
+ slv sl Slovenian
384
+ sog Sogdian
385
+ som so Somali
386
+ son Songhai
387
+ wen Sorbian languages
388
+ nso Sotho, Northern
389
+ sot st Sotho, Southern
390
+ sai South American Indian (Other)
391
+ esl/spa es Spanish
392
+ suk Sukuma
393
+ sux Sumerian
394
+ sun su Sudanese
395
+ sus Susu
396
+ swa sw Swahili
397
+ ssw Swazi
398
+ sve/swe sv Swedish
399
+ syr Syriac
400
+ tgl tl Tagalog
401
+ tah Tahitian
402
+ tgk tg Tajik
403
+ tmh Tamashek
404
+ tam ta Tamil
405
+ tat tt Tatar
406
+ tel te Telugu
407
+ ter Tereno
408
+ tha th Thai
409
+ bod/tib bo Tibetan
410
+ tig Tigre
411
+ tir ti Tigrinya
412
+ tem Timne
413
+ tiv Tivi
414
+ tli Tlingit
415
+ tog to Tonga (Nyasa)
416
+ ton Tonga (Tonga Islands)
417
+ tru Truk
418
+ tsi Tsimshian
419
+ tso ts Tsonga
420
+ tsn tn Tswana
421
+ tum Tumbuka
422
+ tur tr Turkish
423
+ ota Turkish, Ottoman (1500 - 1928)
424
+ tuk tk Turkmen
425
+ tyv Tuvinian
426
+ twi tw Twi
427
+ uga Ugaritic
428
+ uig ug Uighur
429
+ ukr uk Ukrainian
430
+ umb Umbundu
431
+ und Undetermined
432
+ urd ur Urdu
433
+ uzb uz Uzbek
434
+ vai Vai
435
+ ven Venda
436
+ vie vi Vietnamese
437
+ vol vo Volap�k
438
+ vot Votic
439
+ wak Wakashan languages
440
+ wal Walamo
441
+ war Waray
442
+ was Washo
443
+ cym/wel cy Welsh
444
+ wol wo Wolof
445
+ xho xh Xhosa
446
+ sah Yakut
447
+ yao Yao
448
+ yap Yap
449
+ yid yi Yiddish
450
+ yor yo Yoruba
451
+ zap Zapotec
452
+ zen Zenaga
453
+ zha za Zhuang
454
+ zul zu Zulu
455
+ zun Zuni
456
+ __END_DATA__