Linguistics 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Artistic +127 -0
- data/ChangeLog +444 -0
- data/MANIFEST +19 -0
- data/README +178 -0
- data/README.english +245 -0
- data/TODO +17 -0
- data/experiments/randobjlist.rb +34 -0
- data/install.rb +154 -0
- data/lib/linguistics/en/infinitive.rb +1149 -0
- data/lib/linguistics/en/linkparser.rb +142 -0
- data/lib/linguistics/en/wordnet.rb +253 -0
- data/lib/linguistics/en.rb +1694 -0
- data/lib/linguistics/iso639.rb +456 -0
- data/lib/linguistics.rb +368 -0
- data/redist/crosscase.rb +298 -0
- data/test.rb +110 -0
- data/tests/en/conjunction.tests.rb +114 -0
- data/tests/en/inflect.tests.rb +1378 -0
- data/tests/lingtestcase.rb +239 -0
- data/tests/use.tests.rb +99 -0
- data/utils.rb +689 -0
- metadata +58 -0
@@ -0,0 +1,456 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# linguistics/iso639.rb - A hash of International 2- and 3-letter
|
4
|
+
# ISO639-1 and ISO639-2 language codes. Each entry has two keys:
|
5
|
+
#
|
6
|
+
# [<tt>:codes</tt>]
|
7
|
+
# All of the codes known for this language
|
8
|
+
# [<tt>:desc</tt>]
|
9
|
+
# The English-language description of the language.
|
10
|
+
#
|
11
|
+
|
12
|
+
### A language-independent framework for adding linguistics functions to Ruby
|
13
|
+
### classes.
|
14
|
+
module Linguistics
|
15
|
+
|
16
|
+
# Hash of ISO639 2- and 3-letter language codes
|
17
|
+
LanguageCodes = {}
|
18
|
+
|
19
|
+
# Read through the source for this file, capturing everything
|
20
|
+
# between __END__ and __END_DATA__ tokens.
|
21
|
+
inDataSection = false
|
22
|
+
File::readlines( __FILE__ ).each {|line|
|
23
|
+
case line
|
24
|
+
when /^__END_DATA__$/
|
25
|
+
inDataSection = false
|
26
|
+
false
|
27
|
+
|
28
|
+
when /^__END__$/
|
29
|
+
inDataSection = true
|
30
|
+
false
|
31
|
+
|
32
|
+
else
|
33
|
+
if inDataSection
|
34
|
+
codes, desc = line[0,15].split(%r{/|\s+}), line[15...-1]
|
35
|
+
codes.delete_if {|code| code.empty?}
|
36
|
+
entry = {
|
37
|
+
:desc => desc.strip,
|
38
|
+
:codes => codes.dup,
|
39
|
+
}
|
40
|
+
codes.each {|code|
|
41
|
+
raise "Duplicate language code #{code}:"\
|
42
|
+
"(#{LanguageCodes[code][:desc]}})}" \
|
43
|
+
if LanguageCodes.key?( code )
|
44
|
+
LanguageCodes[ code.strip ] = entry
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
__END__
|
52
|
+
abk ab Abkhazian
|
53
|
+
ace Achinese
|
54
|
+
ach Acoli
|
55
|
+
ada Adangme
|
56
|
+
aar aa Afar
|
57
|
+
afh Afrihili
|
58
|
+
afr af Afrikaans
|
59
|
+
afa Afro-Asiatic (Other)
|
60
|
+
aka Akan
|
61
|
+
akk Akkadian
|
62
|
+
alb/sqi sq Albanian
|
63
|
+
ale Aleut
|
64
|
+
alg Algonquian languages
|
65
|
+
tut Altaic (Other)
|
66
|
+
amh am Amharic
|
67
|
+
apa Apache languages
|
68
|
+
ara ar Arabic
|
69
|
+
arc Aramaic
|
70
|
+
arp Arapaho
|
71
|
+
arn Araucanian
|
72
|
+
arw Arawak
|
73
|
+
arm/hye hy Armenian
|
74
|
+
art Artificial (Other)
|
75
|
+
asm as Assamese
|
76
|
+
ath Athapascan languages
|
77
|
+
map Austronesian (Other)
|
78
|
+
ava Avaric
|
79
|
+
ave Avestan
|
80
|
+
awa Awadhi
|
81
|
+
aym ay Aymara
|
82
|
+
aze az Azerbaijani
|
83
|
+
nah Aztec
|
84
|
+
ban Balinese
|
85
|
+
bat Baltic (Other)
|
86
|
+
bal Baluchi
|
87
|
+
bam Bambara
|
88
|
+
bai Bamileke languages
|
89
|
+
bad Banda
|
90
|
+
bnt Bantu (Other)
|
91
|
+
bas Basa
|
92
|
+
bak ba Bashkir
|
93
|
+
baq/eus eu Basque
|
94
|
+
bej Beja
|
95
|
+
bem Bemba
|
96
|
+
ben bn Bengali
|
97
|
+
ber Berber (Other)
|
98
|
+
bho Bhojpuri
|
99
|
+
bih bh Bihari
|
100
|
+
bik Bikol
|
101
|
+
bin Bini
|
102
|
+
bis bi Bislama
|
103
|
+
bra Braj
|
104
|
+
bre br Breton
|
105
|
+
bug Buginese
|
106
|
+
bul bg Bulgarian
|
107
|
+
bua Buriat
|
108
|
+
bur/mya my Burmese
|
109
|
+
bel be Byelorussian
|
110
|
+
cad Caddo
|
111
|
+
car Carib
|
112
|
+
cat ca Catalan
|
113
|
+
cau Caucasian (Other)
|
114
|
+
ceb Cebuano
|
115
|
+
cel Celtic (Other)
|
116
|
+
cai Central American Indian (Other)
|
117
|
+
chg Chagatai
|
118
|
+
cha Chamorro
|
119
|
+
che Chechen
|
120
|
+
chr Cherokee
|
121
|
+
chy Cheyenne
|
122
|
+
chb Chibcha
|
123
|
+
chi/zho zh Chinese
|
124
|
+
chn Chinook jargon
|
125
|
+
cho Choctaw
|
126
|
+
chu Church Slavic
|
127
|
+
chv Chuvash
|
128
|
+
cop Coptic
|
129
|
+
cor Cornish
|
130
|
+
cos co Corsican
|
131
|
+
cre Cree
|
132
|
+
mus Creek
|
133
|
+
crp Creoles and Pidgins (Other)
|
134
|
+
cpe Creoles and Pidgins, English-based (Other)
|
135
|
+
cpf Creoles and Pidgins, French-based (Other)
|
136
|
+
cpp Creoles and Pidgins, Portuguese-based (Other)
|
137
|
+
cus Cushitic (Other)
|
138
|
+
hr Croatian
|
139
|
+
ces/cze cs Czech
|
140
|
+
dak Dakota
|
141
|
+
dan da Danish
|
142
|
+
del Delaware
|
143
|
+
din Dinka
|
144
|
+
div Divehi
|
145
|
+
doi Dogri
|
146
|
+
dra Dravidian (Other)
|
147
|
+
dua Duala
|
148
|
+
dut/nla nl Dutch
|
149
|
+
dum Dutch, Middle (ca. 1050-1350)
|
150
|
+
dyu Dyula
|
151
|
+
dzo dz Dzongkha
|
152
|
+
efi Efik
|
153
|
+
egy Egyptian (Ancient)
|
154
|
+
eka Ekajuk
|
155
|
+
elx Elamite
|
156
|
+
eng en English
|
157
|
+
enm English, Middle (ca. 1100-1500)
|
158
|
+
ang English, Old (ca. 450-1100)
|
159
|
+
esk Eskimo (Other)
|
160
|
+
epo eo Esperanto
|
161
|
+
est et Estonian
|
162
|
+
ewe Ewe
|
163
|
+
ewo Ewondo
|
164
|
+
fan Fang
|
165
|
+
fat Fanti
|
166
|
+
fao fo Faroese
|
167
|
+
fij fj Fijian
|
168
|
+
fin fi Finnish
|
169
|
+
fiu Finno-Ugrian (Other)
|
170
|
+
fon Fon
|
171
|
+
fra/fre fr French
|
172
|
+
frm French, Middle (ca. 1400-1600)
|
173
|
+
fro French, Old (842- ca. 1400)
|
174
|
+
fry fy Frisian
|
175
|
+
ful Fulah
|
176
|
+
gaa Ga
|
177
|
+
gae/gdh Gaelic (Scots)
|
178
|
+
glg gl Gallegan
|
179
|
+
lug Ganda
|
180
|
+
gay Gayo
|
181
|
+
gez Geez
|
182
|
+
geo/kat ka Georgian
|
183
|
+
deu/ger de German
|
184
|
+
gmh German, Middle High (ca. 1050-1500)
|
185
|
+
goh German, Old High (ca. 750-1050)
|
186
|
+
gem Germanic (Other)
|
187
|
+
gil Gilbertese
|
188
|
+
gon Gondi
|
189
|
+
got Gothic
|
190
|
+
grb Grebo
|
191
|
+
grc Greek, Ancient (to 1453)
|
192
|
+
ell/gre el Greek, Modern (1453-)
|
193
|
+
kal kl Greenlandic
|
194
|
+
grn gn Guarani
|
195
|
+
guj gu Gujarati
|
196
|
+
hai Haida
|
197
|
+
hau ha Hausa
|
198
|
+
haw Hawaiian
|
199
|
+
heb he Hebrew
|
200
|
+
her Herero
|
201
|
+
hil Hiligaynon
|
202
|
+
him Himachali
|
203
|
+
hin hi Hindi
|
204
|
+
hmo Hiri Motu
|
205
|
+
hun hu Hungarian
|
206
|
+
hup Hupa
|
207
|
+
iba Iban
|
208
|
+
ice/isl is Icelandic
|
209
|
+
ibo Igbo
|
210
|
+
ijo Ijo
|
211
|
+
ilo Iloko
|
212
|
+
inc Indic (Other)
|
213
|
+
ine Indo-European (Other)
|
214
|
+
ind id Indonesian
|
215
|
+
ina ia Interlingua (International Auxiliary language Association)
|
216
|
+
ile Interlingue
|
217
|
+
iku iu Inuktitut
|
218
|
+
ipk ik Inupiak
|
219
|
+
ira Iranian (Other)
|
220
|
+
gai/iri ga Irish
|
221
|
+
sga Irish, Old (to 900)
|
222
|
+
mga Irish, Middle (900 - 1200)
|
223
|
+
iro Iroquoian languages
|
224
|
+
ita it Italian
|
225
|
+
jpn ja Japanese
|
226
|
+
jav/jaw jv/jw Javanese
|
227
|
+
jrb Judeo-Arabic
|
228
|
+
jpr Judeo-Persian
|
229
|
+
kab Kabyle
|
230
|
+
kac Kachin
|
231
|
+
kam Kamba
|
232
|
+
kan kn Kannada
|
233
|
+
kau Kanuri
|
234
|
+
kaa Kara-Kalpak
|
235
|
+
kar Karen
|
236
|
+
kas ks Kashmiri
|
237
|
+
kaw Kawi
|
238
|
+
kaz kk Kazakh
|
239
|
+
kha Khasi
|
240
|
+
khm km Khmer
|
241
|
+
khi Khoisan (Other)
|
242
|
+
kho Khotanese
|
243
|
+
kik Kikuyu
|
244
|
+
kin rw Kinyarwanda
|
245
|
+
kir ky Kirghiz
|
246
|
+
kom Komi
|
247
|
+
kon Kongo
|
248
|
+
kok Konkani
|
249
|
+
kor ko Korean
|
250
|
+
kpe Kpelle
|
251
|
+
kro Kru
|
252
|
+
kua Kuanyama
|
253
|
+
kum Kumyk
|
254
|
+
kur ku Kurdish
|
255
|
+
kru Kurukh
|
256
|
+
kus Kusaie
|
257
|
+
kut Kutenai
|
258
|
+
lad Ladino
|
259
|
+
lah Lahnda
|
260
|
+
lam Lamba
|
261
|
+
oci oc Langue d'Oc (post 1500)
|
262
|
+
lao lo Lao
|
263
|
+
lat la Latin
|
264
|
+
lav lv Latvian
|
265
|
+
ltz Letzeburgesch
|
266
|
+
lez Lezghian
|
267
|
+
lin ln Lingala
|
268
|
+
lit lt Lithuanian
|
269
|
+
loz Lozi
|
270
|
+
lub Luba-Katanga
|
271
|
+
lui Luiseno
|
272
|
+
lun Lunda
|
273
|
+
luo Luo (Kenya and Tanzania)
|
274
|
+
mac/mke mk Macedonian
|
275
|
+
mad Madurese
|
276
|
+
mag Magahi
|
277
|
+
mai Maithili
|
278
|
+
mak Makasar
|
279
|
+
mlg mg Malagasy
|
280
|
+
may/msa ms Malay
|
281
|
+
mal Malayalam
|
282
|
+
mlt ml Maltese
|
283
|
+
man Mandingo
|
284
|
+
mni Manipuri
|
285
|
+
mno Manobo languages
|
286
|
+
max Manx
|
287
|
+
mao/mri mi Maori
|
288
|
+
mar mr Marathi
|
289
|
+
chm Mari
|
290
|
+
mah Marshall
|
291
|
+
mwr Marwari
|
292
|
+
mas Masai
|
293
|
+
myn Mayan languages
|
294
|
+
men Mende
|
295
|
+
mic Micmac
|
296
|
+
min Minangkabau
|
297
|
+
mis Miscellaneous (Other)
|
298
|
+
moh Mohawk
|
299
|
+
mol mo Moldavian
|
300
|
+
mkh Mon-Kmer (Other)
|
301
|
+
lol Mongo
|
302
|
+
mon mn Mongolian
|
303
|
+
mos Mossi
|
304
|
+
mul Multiple languages
|
305
|
+
mun Munda languages
|
306
|
+
nau na Nauru
|
307
|
+
nav Navajo
|
308
|
+
nde Ndebele, North
|
309
|
+
nbl Ndebele, South
|
310
|
+
ndo Ndongo
|
311
|
+
nep ne Nepali
|
312
|
+
new Newari
|
313
|
+
nic Niger-Kordofanian (Other)
|
314
|
+
ssa Nilo-Saharan (Other)
|
315
|
+
niu Niuean
|
316
|
+
non Norse, Old
|
317
|
+
nai North American Indian (Other)
|
318
|
+
nor no Norwegian
|
319
|
+
nno Norwegian (Nynorsk)
|
320
|
+
nub Nubian languages
|
321
|
+
nym Nyamwezi
|
322
|
+
nya Nyanja
|
323
|
+
nyn Nyankole
|
324
|
+
nyo Nyoro
|
325
|
+
nzi Nzima
|
326
|
+
oji Ojibwa
|
327
|
+
ori or Oriya
|
328
|
+
orm om Oromo
|
329
|
+
osa Osage
|
330
|
+
oss Ossetic
|
331
|
+
oto Otomian languages
|
332
|
+
pal Pahlavi
|
333
|
+
pau Palauan
|
334
|
+
pli Pali
|
335
|
+
pam Pampanga
|
336
|
+
pag Pangasinan
|
337
|
+
pan pa Panjabi
|
338
|
+
pap Papiamento
|
339
|
+
paa Papuan-Australian (Other)
|
340
|
+
fas/per fa Persian
|
341
|
+
peo Persian, Old (ca 600 - 400 B.C.)
|
342
|
+
phn Phoenician
|
343
|
+
pol pl Polish
|
344
|
+
pon Ponape
|
345
|
+
por pt Portuguese
|
346
|
+
pra Prakrit languages
|
347
|
+
pro Provencal, Old (to 1500)
|
348
|
+
pus ps Pushto
|
349
|
+
que qu Quechua
|
350
|
+
roh rm Rhaeto-Romance
|
351
|
+
raj Rajasthani
|
352
|
+
rar Rarotongan
|
353
|
+
roa Romance (Other)
|
354
|
+
ron/rum ro Romanian
|
355
|
+
rom Romany
|
356
|
+
run rn Rundi
|
357
|
+
rus ru Russian
|
358
|
+
sal Salishan languages
|
359
|
+
sam Samaritan Aramaic
|
360
|
+
smi Sami languages
|
361
|
+
smo sm Samoan
|
362
|
+
sad Sandawe
|
363
|
+
sag sg Sango
|
364
|
+
san sa Sanskrit
|
365
|
+
srd Sardinian
|
366
|
+
sco Scots
|
367
|
+
sel Selkup
|
368
|
+
sem Semitic (Other)
|
369
|
+
sr Serbian
|
370
|
+
scr sh Serbo-Croatian
|
371
|
+
srr Serer
|
372
|
+
shn Shan
|
373
|
+
sna sn Shona
|
374
|
+
sid Sidamo
|
375
|
+
bla Siksika
|
376
|
+
snd sd Sindhi
|
377
|
+
sin si Singhalese
|
378
|
+
sit Sino-Tibetan (Other)
|
379
|
+
sio Siouan languages
|
380
|
+
sla Slavic (Other)
|
381
|
+
ss Siswati
|
382
|
+
slk/slo sk Slovak
|
383
|
+
slv sl Slovenian
|
384
|
+
sog Sogdian
|
385
|
+
som so Somali
|
386
|
+
son Songhai
|
387
|
+
wen Sorbian languages
|
388
|
+
nso Sotho, Northern
|
389
|
+
sot st Sotho, Southern
|
390
|
+
sai South American Indian (Other)
|
391
|
+
esl/spa es Spanish
|
392
|
+
suk Sukuma
|
393
|
+
sux Sumerian
|
394
|
+
sun su Sudanese
|
395
|
+
sus Susu
|
396
|
+
swa sw Swahili
|
397
|
+
ssw Swazi
|
398
|
+
sve/swe sv Swedish
|
399
|
+
syr Syriac
|
400
|
+
tgl tl Tagalog
|
401
|
+
tah Tahitian
|
402
|
+
tgk tg Tajik
|
403
|
+
tmh Tamashek
|
404
|
+
tam ta Tamil
|
405
|
+
tat tt Tatar
|
406
|
+
tel te Telugu
|
407
|
+
ter Tereno
|
408
|
+
tha th Thai
|
409
|
+
bod/tib bo Tibetan
|
410
|
+
tig Tigre
|
411
|
+
tir ti Tigrinya
|
412
|
+
tem Timne
|
413
|
+
tiv Tivi
|
414
|
+
tli Tlingit
|
415
|
+
tog to Tonga (Nyasa)
|
416
|
+
ton Tonga (Tonga Islands)
|
417
|
+
tru Truk
|
418
|
+
tsi Tsimshian
|
419
|
+
tso ts Tsonga
|
420
|
+
tsn tn Tswana
|
421
|
+
tum Tumbuka
|
422
|
+
tur tr Turkish
|
423
|
+
ota Turkish, Ottoman (1500 - 1928)
|
424
|
+
tuk tk Turkmen
|
425
|
+
tyv Tuvinian
|
426
|
+
twi tw Twi
|
427
|
+
uga Ugaritic
|
428
|
+
uig ug Uighur
|
429
|
+
ukr uk Ukrainian
|
430
|
+
umb Umbundu
|
431
|
+
und Undetermined
|
432
|
+
urd ur Urdu
|
433
|
+
uzb uz Uzbek
|
434
|
+
vai Vai
|
435
|
+
ven Venda
|
436
|
+
vie vi Vietnamese
|
437
|
+
vol vo Volap�k
|
438
|
+
vot Votic
|
439
|
+
wak Wakashan languages
|
440
|
+
wal Walamo
|
441
|
+
war Waray
|
442
|
+
was Washo
|
443
|
+
cym/wel cy Welsh
|
444
|
+
wol wo Wolof
|
445
|
+
xho xh Xhosa
|
446
|
+
sah Yakut
|
447
|
+
yao Yao
|
448
|
+
yap Yap
|
449
|
+
yid yi Yiddish
|
450
|
+
yor yo Yoruba
|
451
|
+
zap Zapotec
|
452
|
+
zen Zenaga
|
453
|
+
zha za Zhuang
|
454
|
+
zul zu Zulu
|
455
|
+
zun Zuni
|
456
|
+
__END_DATA__
|