Linguistics 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # linguistics/iso639.rb - A hash of International 2- and 3-letter
4
+ # ISO639-1 and ISO639-2 language codes. Each entry has two keys:
5
+ #
6
+ # [<tt>:codes</tt>]
7
+ # All of the codes known for this language
8
+ # [<tt>:desc</tt>]
9
+ # The English-language description of the language.
10
+ #
11
+
12
+ ### A language-independent framework for adding linguistics functions to Ruby
13
+ ### classes.
14
+ module Linguistics
15
+
16
+ # Hash of ISO639 2- and 3-letter language codes
17
+ LanguageCodes = {}
18
+
19
+ # Read through the source for this file, capturing everything
20
+ # between __END__ and __END_DATA__ tokens.
21
+ inDataSection = false
22
+ File::readlines( __FILE__ ).each {|line|
23
+ case line
24
+ when /^__END_DATA__$/
25
+ inDataSection = false
26
+ false
27
+
28
+ when /^__END__$/
29
+ inDataSection = true
30
+ false
31
+
32
+ else
33
+ if inDataSection
34
+ codes, desc = line[0,15].split(%r{/|\s+}), line[15...-1]
35
+ codes.delete_if {|code| code.empty?}
36
+ entry = {
37
+ :desc => desc.strip,
38
+ :codes => codes.dup,
39
+ }
40
+ codes.each {|code|
41
+ raise "Duplicate language code #{code}:"\
42
+ "(#{LanguageCodes[code][:desc]}})}" \
43
+ if LanguageCodes.key?( code )
44
+ LanguageCodes[ code.strip ] = entry
45
+ }
46
+ end
47
+ end
48
+ }
49
+ end
50
+
51
+ __END__
52
+ abk ab Abkhazian
53
+ ace Achinese
54
+ ach Acoli
55
+ ada Adangme
56
+ aar aa Afar
57
+ afh Afrihili
58
+ afr af Afrikaans
59
+ afa Afro-Asiatic (Other)
60
+ aka Akan
61
+ akk Akkadian
62
+ alb/sqi sq Albanian
63
+ ale Aleut
64
+ alg Algonquian languages
65
+ tut Altaic (Other)
66
+ amh am Amharic
67
+ apa Apache languages
68
+ ara ar Arabic
69
+ arc Aramaic
70
+ arp Arapaho
71
+ arn Araucanian
72
+ arw Arawak
73
+ arm/hye hy Armenian
74
+ art Artificial (Other)
75
+ asm as Assamese
76
+ ath Athapascan languages
77
+ map Austronesian (Other)
78
+ ava Avaric
79
+ ave Avestan
80
+ awa Awadhi
81
+ aym ay Aymara
82
+ aze az Azerbaijani
83
+ nah Aztec
84
+ ban Balinese
85
+ bat Baltic (Other)
86
+ bal Baluchi
87
+ bam Bambara
88
+ bai Bamileke languages
89
+ bad Banda
90
+ bnt Bantu (Other)
91
+ bas Basa
92
+ bak ba Bashkir
93
+ baq/eus eu Basque
94
+ bej Beja
95
+ bem Bemba
96
+ ben bn Bengali
97
+ ber Berber (Other)
98
+ bho Bhojpuri
99
+ bih bh Bihari
100
+ bik Bikol
101
+ bin Bini
102
+ bis bi Bislama
103
+ bra Braj
104
+ bre br Breton
105
+ bug Buginese
106
+ bul bg Bulgarian
107
+ bua Buriat
108
+ bur/mya my Burmese
109
+ bel be Byelorussian
110
+ cad Caddo
111
+ car Carib
112
+ cat ca Catalan
113
+ cau Caucasian (Other)
114
+ ceb Cebuano
115
+ cel Celtic (Other)
116
+ cai Central American Indian (Other)
117
+ chg Chagatai
118
+ cha Chamorro
119
+ che Chechen
120
+ chr Cherokee
121
+ chy Cheyenne
122
+ chb Chibcha
123
+ chi/zho zh Chinese
124
+ chn Chinook jargon
125
+ cho Choctaw
126
+ chu Church Slavic
127
+ chv Chuvash
128
+ cop Coptic
129
+ cor Cornish
130
+ cos co Corsican
131
+ cre Cree
132
+ mus Creek
133
+ crp Creoles and Pidgins (Other)
134
+ cpe Creoles and Pidgins, English-based (Other)
135
+ cpf Creoles and Pidgins, French-based (Other)
136
+ cpp Creoles and Pidgins, Portuguese-based (Other)
137
+ cus Cushitic (Other)
138
+ hr Croatian
139
+ ces/cze cs Czech
140
+ dak Dakota
141
+ dan da Danish
142
+ del Delaware
143
+ din Dinka
144
+ div Divehi
145
+ doi Dogri
146
+ dra Dravidian (Other)
147
+ dua Duala
148
+ dut/nla nl Dutch
149
+ dum Dutch, Middle (ca. 1050-1350)
150
+ dyu Dyula
151
+ dzo dz Dzongkha
152
+ efi Efik
153
+ egy Egyptian (Ancient)
154
+ eka Ekajuk
155
+ elx Elamite
156
+ eng en English
157
+ enm English, Middle (ca. 1100-1500)
158
+ ang English, Old (ca. 450-1100)
159
+ esk Eskimo (Other)
160
+ epo eo Esperanto
161
+ est et Estonian
162
+ ewe Ewe
163
+ ewo Ewondo
164
+ fan Fang
165
+ fat Fanti
166
+ fao fo Faroese
167
+ fij fj Fijian
168
+ fin fi Finnish
169
+ fiu Finno-Ugrian (Other)
170
+ fon Fon
171
+ fra/fre fr French
172
+ frm French, Middle (ca. 1400-1600)
173
+ fro French, Old (842- ca. 1400)
174
+ fry fy Frisian
175
+ ful Fulah
176
+ gaa Ga
177
+ gae/gdh Gaelic (Scots)
178
+ glg gl Gallegan
179
+ lug Ganda
180
+ gay Gayo
181
+ gez Geez
182
+ geo/kat ka Georgian
183
+ deu/ger de German
184
+ gmh German, Middle High (ca. 1050-1500)
185
+ goh German, Old High (ca. 750-1050)
186
+ gem Germanic (Other)
187
+ gil Gilbertese
188
+ gon Gondi
189
+ got Gothic
190
+ grb Grebo
191
+ grc Greek, Ancient (to 1453)
192
+ ell/gre el Greek, Modern (1453-)
193
+ kal kl Greenlandic
194
+ grn gn Guarani
195
+ guj gu Gujarati
196
+ hai Haida
197
+ hau ha Hausa
198
+ haw Hawaiian
199
+ heb he Hebrew
200
+ her Herero
201
+ hil Hiligaynon
202
+ him Himachali
203
+ hin hi Hindi
204
+ hmo Hiri Motu
205
+ hun hu Hungarian
206
+ hup Hupa
207
+ iba Iban
208
+ ice/isl is Icelandic
209
+ ibo Igbo
210
+ ijo Ijo
211
+ ilo Iloko
212
+ inc Indic (Other)
213
+ ine Indo-European (Other)
214
+ ind id Indonesian
215
+ ina ia Interlingua (International Auxiliary language Association)
216
+ ile Interlingue
217
+ iku iu Inuktitut
218
+ ipk ik Inupiak
219
+ ira Iranian (Other)
220
+ gai/iri ga Irish
221
+ sga Irish, Old (to 900)
222
+ mga Irish, Middle (900 - 1200)
223
+ iro Iroquoian languages
224
+ ita it Italian
225
+ jpn ja Japanese
226
+ jav/jaw jv/jw Javanese
227
+ jrb Judeo-Arabic
228
+ jpr Judeo-Persian
229
+ kab Kabyle
230
+ kac Kachin
231
+ kam Kamba
232
+ kan kn Kannada
233
+ kau Kanuri
234
+ kaa Kara-Kalpak
235
+ kar Karen
236
+ kas ks Kashmiri
237
+ kaw Kawi
238
+ kaz kk Kazakh
239
+ kha Khasi
240
+ khm km Khmer
241
+ khi Khoisan (Other)
242
+ kho Khotanese
243
+ kik Kikuyu
244
+ kin rw Kinyarwanda
245
+ kir ky Kirghiz
246
+ kom Komi
247
+ kon Kongo
248
+ kok Konkani
249
+ kor ko Korean
250
+ kpe Kpelle
251
+ kro Kru
252
+ kua Kuanyama
253
+ kum Kumyk
254
+ kur ku Kurdish
255
+ kru Kurukh
256
+ kus Kusaie
257
+ kut Kutenai
258
+ lad Ladino
259
+ lah Lahnda
260
+ lam Lamba
261
+ oci oc Langue d'Oc (post 1500)
262
+ lao lo Lao
263
+ lat la Latin
264
+ lav lv Latvian
265
+ ltz Letzeburgesch
266
+ lez Lezghian
267
+ lin ln Lingala
268
+ lit lt Lithuanian
269
+ loz Lozi
270
+ lub Luba-Katanga
271
+ lui Luiseno
272
+ lun Lunda
273
+ luo Luo (Kenya and Tanzania)
274
+ mac/mke mk Macedonian
275
+ mad Madurese
276
+ mag Magahi
277
+ mai Maithili
278
+ mak Makasar
279
+ mlg mg Malagasy
280
+ may/msa ms Malay
281
+ mal Malayalam
282
+ mlt ml Maltese
283
+ man Mandingo
284
+ mni Manipuri
285
+ mno Manobo languages
286
+ max Manx
287
+ mao/mri mi Maori
288
+ mar mr Marathi
289
+ chm Mari
290
+ mah Marshall
291
+ mwr Marwari
292
+ mas Masai
293
+ myn Mayan languages
294
+ men Mende
295
+ mic Micmac
296
+ min Minangkabau
297
+ mis Miscellaneous (Other)
298
+ moh Mohawk
299
+ mol mo Moldavian
300
+ mkh Mon-Kmer (Other)
301
+ lol Mongo
302
+ mon mn Mongolian
303
+ mos Mossi
304
+ mul Multiple languages
305
+ mun Munda languages
306
+ nau na Nauru
307
+ nav Navajo
308
+ nde Ndebele, North
309
+ nbl Ndebele, South
310
+ ndo Ndongo
311
+ nep ne Nepali
312
+ new Newari
313
+ nic Niger-Kordofanian (Other)
314
+ ssa Nilo-Saharan (Other)
315
+ niu Niuean
316
+ non Norse, Old
317
+ nai North American Indian (Other)
318
+ nor no Norwegian
319
+ nno Norwegian (Nynorsk)
320
+ nub Nubian languages
321
+ nym Nyamwezi
322
+ nya Nyanja
323
+ nyn Nyankole
324
+ nyo Nyoro
325
+ nzi Nzima
326
+ oji Ojibwa
327
+ ori or Oriya
328
+ orm om Oromo
329
+ osa Osage
330
+ oss Ossetic
331
+ oto Otomian languages
332
+ pal Pahlavi
333
+ pau Palauan
334
+ pli Pali
335
+ pam Pampanga
336
+ pag Pangasinan
337
+ pan pa Panjabi
338
+ pap Papiamento
339
+ paa Papuan-Australian (Other)
340
+ fas/per fa Persian
341
+ peo Persian, Old (ca 600 - 400 B.C.)
342
+ phn Phoenician
343
+ pol pl Polish
344
+ pon Ponape
345
+ por pt Portuguese
346
+ pra Prakrit languages
347
+ pro Provencal, Old (to 1500)
348
+ pus ps Pushto
349
+ que qu Quechua
350
+ roh rm Rhaeto-Romance
351
+ raj Rajasthani
352
+ rar Rarotongan
353
+ roa Romance (Other)
354
+ ron/rum ro Romanian
355
+ rom Romany
356
+ run rn Rundi
357
+ rus ru Russian
358
+ sal Salishan languages
359
+ sam Samaritan Aramaic
360
+ smi Sami languages
361
+ smo sm Samoan
362
+ sad Sandawe
363
+ sag sg Sango
364
+ san sa Sanskrit
365
+ srd Sardinian
366
+ sco Scots
367
+ sel Selkup
368
+ sem Semitic (Other)
369
+ sr Serbian
370
+ scr sh Serbo-Croatian
371
+ srr Serer
372
+ shn Shan
373
+ sna sn Shona
374
+ sid Sidamo
375
+ bla Siksika
376
+ snd sd Sindhi
377
+ sin si Singhalese
378
+ sit Sino-Tibetan (Other)
379
+ sio Siouan languages
380
+ sla Slavic (Other)
381
+ ss Siswati
382
+ slk/slo sk Slovak
383
+ slv sl Slovenian
384
+ sog Sogdian
385
+ som so Somali
386
+ son Songhai
387
+ wen Sorbian languages
388
+ nso Sotho, Northern
389
+ sot st Sotho, Southern
390
+ sai South American Indian (Other)
391
+ esl/spa es Spanish
392
+ suk Sukuma
393
+ sux Sumerian
394
+ sun su Sudanese
395
+ sus Susu
396
+ swa sw Swahili
397
+ ssw Swazi
398
+ sve/swe sv Swedish
399
+ syr Syriac
400
+ tgl tl Tagalog
401
+ tah Tahitian
402
+ tgk tg Tajik
403
+ tmh Tamashek
404
+ tam ta Tamil
405
+ tat tt Tatar
406
+ tel te Telugu
407
+ ter Tereno
408
+ tha th Thai
409
+ bod/tib bo Tibetan
410
+ tig Tigre
411
+ tir ti Tigrinya
412
+ tem Timne
413
+ tiv Tivi
414
+ tli Tlingit
415
+ tog to Tonga (Nyasa)
416
+ ton Tonga (Tonga Islands)
417
+ tru Truk
418
+ tsi Tsimshian
419
+ tso ts Tsonga
420
+ tsn tn Tswana
421
+ tum Tumbuka
422
+ tur tr Turkish
423
+ ota Turkish, Ottoman (1500 - 1928)
424
+ tuk tk Turkmen
425
+ tyv Tuvinian
426
+ twi tw Twi
427
+ uga Ugaritic
428
+ uig ug Uighur
429
+ ukr uk Ukrainian
430
+ umb Umbundu
431
+ und Undetermined
432
+ urd ur Urdu
433
+ uzb uz Uzbek
434
+ vai Vai
435
+ ven Venda
436
+ vie vi Vietnamese
437
+ vol vo Volap�k
438
+ vot Votic
439
+ wak Wakashan languages
440
+ wal Walamo
441
+ war Waray
442
+ was Washo
443
+ cym/wel cy Welsh
444
+ wol wo Wolof
445
+ xho xh Xhosa
446
+ sah Yakut
447
+ yao Yao
448
+ yap Yap
449
+ yid yi Yiddish
450
+ yor yo Yoruba
451
+ zap Zapotec
452
+ zen Zenaga
453
+ zha za Zhuang
454
+ zul zu Zulu
455
+ zun Zuni
456
+ __END_DATA__