odin 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,405 @@
1
+ # From the Ruby Linguistics Project, release 1.0.5
2
+ # abk ab Abkhazian
3
+ # ace Achinese
4
+ # ach Acoli
5
+ # ada Adangme
6
+ # aar aa Afar
7
+ # afh Afrihili
8
+ # afr af Afrikaans
9
+ # afa Afro-Asiatic (Other)
10
+ # aka Akan
11
+ # akk Akkadian
12
+ # alb/sqi sq Albanian
13
+ # ale Aleut
14
+ # alg Algonquian languages
15
+ # tut Altaic (Other)
16
+ # amh am Amharic
17
+ # apa Apache languages
18
+ # ara ar Arabic
19
+ # arc Aramaic
20
+ # arp Arapaho
21
+ # arn Araucanian
22
+ # arw Arawak
23
+ # arm/hye hy Armenian
24
+ # art Artificial (Other)
25
+ # asm as Assamese
26
+ # ath Athapascan languages
27
+ # map Austronesian (Other)
28
+ # ava Avaric
29
+ # ave Avestan
30
+ # awa Awadhi
31
+ # aym ay Aymara
32
+ # aze az Azerbaijani
33
+ # nah Aztec
34
+ # ban Balinese
35
+ # bat Baltic (Other)
36
+ # bal Baluchi
37
+ # bam Bambara
38
+ # bai Bamileke languages
39
+ # bad Banda
40
+ # bnt Bantu (Other)
41
+ # bas Basa
42
+ # bak ba Bashkir
43
+ # baq/eus eu Basque
44
+ # bej Beja
45
+ # bem Bemba
46
+ # ben bn Bengali
47
+ # ber Berber (Other)
48
+ # bho Bhojpuri
49
+ # bih bh Bihari
50
+ # bik Bikol
51
+ # bin Bini
52
+ # bis bi Bislama
53
+ # bra Braj
54
+ # bre br Breton
55
+ # bug Buginese
56
+ # bul bg Bulgarian
57
+ # bua Buriat
58
+ # bur/mya my Burmese
59
+ # bel be Byelorussian
60
+ # cad Caddo
61
+ # car Carib
62
+ # cat ca Catalan
63
+ # cau Caucasian (Other)
64
+ # ceb Cebuano
65
+ # cel Celtic (Other)
66
+ # cai Central American Indian (Other)
67
+ # chg Chagatai
68
+ # cha Chamorro
69
+ # che Chechen
70
+ # chr Cherokee
71
+ # chy Cheyenne
72
+ # chb Chibcha
73
+ # chi/zho zh Chinese
74
+ # chn Chinook jargon
75
+ # cho Choctaw
76
+ # chu Church Slavic
77
+ # chv Chuvash
78
+ # cop Coptic
79
+ # cor Cornish
80
+ # cos co Corsican
81
+ # cre Cree
82
+ # mus Creek
83
+ # crp Creoles and Pidgins (Other)
84
+ # cpe Creoles and Pidgins, English-based (Other)
85
+ # cpf Creoles and Pidgins, French-based (Other)
86
+ # cpp Creoles and Pidgins, Portuguese-based (Other)
87
+ # cus Cushitic (Other)
88
+ # hr Croatian
89
+ # ces/cze cs Czech
90
+ # dak Dakota
91
+ # dan da Danish
92
+ # del Delaware
93
+ # din Dinka
94
+ # div Divehi
95
+ # doi Dogri
96
+ # dra Dravidian (Other)
97
+ # dua Duala
98
+ # dut/nla nl Dutch
99
+ # dum Dutch, Middle (ca. 1050-1350)
100
+ # dyu Dyula
101
+ # dzo dz Dzongkha
102
+ # efi Efik
103
+ # egy Egyptian (Ancient)
104
+ # eka Ekajuk
105
+ # elx Elamite
106
+ # eng en English
107
+ # enm English, Middle (ca. 1100-1500)
108
+ # ang English, Old (ca. 450-1100)
109
+ # esk Eskimo (Other)
110
+ # epo eo Esperanto
111
+ # est et Estonian
112
+ # ewe Ewe
113
+ # ewo Ewondo
114
+ # fan Fang
115
+ # fat Fanti
116
+ # fao fo Faroese
117
+ # fij fj Fijian
118
+ # fin fi Finnish
119
+ # fiu Finno-Ugrian (Other)
120
+ # fon Fon
121
+ # fra/fre fr French
122
+ # frm French, Middle (ca. 1400-1600)
123
+ # fro French, Old (842- ca. 1400)
124
+ # fry fy Frisian
125
+ # ful Fulah
126
+ # gaa Ga
127
+ # gae/gdh Gaelic (Scots)
128
+ # glg gl Gallegan
129
+ # lug Ganda
130
+ # gay Gayo
131
+ # gez Geez
132
+ # geo/kat ka Georgian
133
+ # deu/ger de German
134
+ # gmh German, Middle High (ca. 1050-1500)
135
+ # goh German, Old High (ca. 750-1050)
136
+ # gem Germanic (Other)
137
+ # gil Gilbertese
138
+ # gon Gondi
139
+ # got Gothic
140
+ # grb Grebo
141
+ # grc Greek, Ancient (to 1453)
142
+ # ell/gre el Greek, Modern (1453-)
143
+ # kal kl Greenlandic
144
+ # grn gn Guarani
145
+ # guj gu Gujarati
146
+ # hai Haida
147
+ # hau ha Hausa
148
+ # haw Hawaiian
149
+ # heb he Hebrew
150
+ # her Herero
151
+ # hil Hiligaynon
152
+ # him Himachali
153
+ # hin hi Hindi
154
+ # hmo Hiri Motu
155
+ # hun hu Hungarian
156
+ # hup Hupa
157
+ # iba Iban
158
+ # ice/isl is Icelandic
159
+ # ibo Igbo
160
+ # ijo Ijo
161
+ # ilo Iloko
162
+ # inc Indic (Other)
163
+ # ine Indo-European (Other)
164
+ # ind id Indonesian
165
+ # ina ia Interlingua (International Auxiliary language Association)
166
+ # ile Interlingue
167
+ # iku iu Inuktitut
168
+ # ipk ik Inupiak
169
+ # ira Iranian (Other)
170
+ # gai/iri ga Irish
171
+ # sga Irish, Old (to 900)
172
+ # mga Irish, Middle (900 - 1200)
173
+ # iro Iroquoian languages
174
+ # ita it Italian
175
+ # jpn ja Japanese
176
+ # jav/jaw jv/jw Javanese
177
+ # jrb Judeo-Arabic
178
+ # jpr Judeo-Persian
179
+ # kab Kabyle
180
+ # kac Kachin
181
+ # kam Kamba
182
+ # kan kn Kannada
183
+ # kau Kanuri
184
+ # kaa Kara-Kalpak
185
+ # kar Karen
186
+ # kas ks Kashmiri
187
+ # kaw Kawi
188
+ # kaz kk Kazakh
189
+ # kha Khasi
190
+ # khm km Khmer
191
+ # khi Khoisan (Other)
192
+ # kho Khotanese
193
+ # kik Kikuyu
194
+ # kin rw Kinyarwanda
195
+ # kir ky Kirghiz
196
+ # kom Komi
197
+ # kon Kongo
198
+ # kok Konkani
199
+ # kor ko Korean
200
+ # kpe Kpelle
201
+ # kro Kru
202
+ # kua Kuanyama
203
+ # kum Kumyk
204
+ # kur ku Kurdish
205
+ # kru Kurukh
206
+ # kus Kusaie
207
+ # kut Kutenai
208
+ # lad Ladino
209
+ # lah Lahnda
210
+ # lam Lamba
211
+ # oci oc Langue d'Oc (post 1500)
212
+ # lao lo Lao
213
+ # lat la Latin
214
+ # lav lv Latvian
215
+ # ltz Letzeburgesch
216
+ # lez Lezghian
217
+ # lin ln Lingala
218
+ # lit lt Lithuanian
219
+ # loz Lozi
220
+ # lub Luba-Katanga
221
+ # lui Luiseno
222
+ # lun Lunda
223
+ # luo Luo (Kenya and Tanzania)
224
+ # mac/mke mk Macedonian
225
+ # mad Madurese
226
+ # mag Magahi
227
+ # mai Maithili
228
+ # mak Makasar
229
+ # mlg mg Malagasy
230
+ # may/msa ms Malay
231
+ # mal Malayalam
232
+ # mlt ml Maltese
233
+ # man Mandingo
234
+ # mni Manipuri
235
+ # mno Manobo languages
236
+ # max Manx
237
+ # mao/mri mi Maori
238
+ # mar mr Marathi
239
+ # chm Mari
240
+ # mah Marshall
241
+ # mwr Marwari
242
+ # mas Masai
243
+ # myn Mayan languages
244
+ # men Mende
245
+ # mic Micmac
246
+ # min Minangkabau
247
+ # mis Miscellaneous (Other)
248
+ # moh Mohawk
249
+ # mol mo Moldavian
250
+ # mkh Mon-Kmer (Other)
251
+ # lol Mongo
252
+ # mon mn Mongolian
253
+ # mos Mossi
254
+ # mul Multiple languages
255
+ # mun Munda languages
256
+ # nau na Nauru
257
+ # nav Navajo
258
+ # nde Ndebele, North
259
+ # nbl Ndebele, South
260
+ # ndo Ndongo
261
+ # nep ne Nepali
262
+ # new Newari
263
+ # nic Niger-Kordofanian (Other)
264
+ # ssa Nilo-Saharan (Other)
265
+ # niu Niuean
266
+ # non Norse, Old
267
+ # nai North American Indian (Other)
268
+ # nor no Norwegian
269
+ # nno Norwegian (Nynorsk)
270
+ # nub Nubian languages
271
+ # nym Nyamwezi
272
+ # nya Nyanja
273
+ # nyn Nyankole
274
+ # nyo Nyoro
275
+ # nzi Nzima
276
+ # oji Ojibwa
277
+ # ori or Oriya
278
+ # orm om Oromo
279
+ # osa Osage
280
+ # oss Ossetic
281
+ # oto Otomian languages
282
+ # pal Pahlavi
283
+ # pau Palauan
284
+ # pli Pali
285
+ # pam Pampanga
286
+ # pag Pangasinan
287
+ # pan pa Panjabi
288
+ # pap Papiamento
289
+ # paa Papuan-Australian (Other)
290
+ # fas/per fa Persian
291
+ # peo Persian, Old (ca 600 - 400 B.C.)
292
+ # phn Phoenician
293
+ # pol pl Polish
294
+ # pon Ponape
295
+ # por pt Portuguese
296
+ # pra Prakrit languages
297
+ # pro Provencal, Old (to 1500)
298
+ # pus ps Pushto
299
+ # que qu Quechua
300
+ # roh rm Rhaeto-Romance
301
+ # raj Rajasthani
302
+ # rar Rarotongan
303
+ # roa Romance (Other)
304
+ # ron/rum ro Romanian
305
+ # rom Romany
306
+ # run rn Rundi
307
+ # rus ru Russian
308
+ # sal Salishan languages
309
+ # sam Samaritan Aramaic
310
+ # smi Sami languages
311
+ # smo sm Samoan
312
+ # sad Sandawe
313
+ # sag sg Sango
314
+ # san sa Sanskrit
315
+ # srd Sardinian
316
+ # sco Scots
317
+ # sel Selkup
318
+ # sem Semitic (Other)
319
+ # sr Serbian
320
+ # scr sh Serbo-Croatian
321
+ # srr Serer
322
+ # shn Shan
323
+ # sna sn Shona
324
+ # sid Sidamo
325
+ # bla Siksika
326
+ # snd sd Sindhi
327
+ # sin si Singhalese
328
+ # sit Sino-Tibetan (Other)
329
+ # sio Siouan languages
330
+ # sla Slavic (Other)
331
+ # ss Siswati
332
+ # slk/slo sk Slovak
333
+ # slv sl Slovenian
334
+ # sog Sogdian
335
+ # som so Somali
336
+ # son Songhai
337
+ # wen Sorbian languages
338
+ # nso Sotho, Northern
339
+ # sot st Sotho, Southern
340
+ # sai South American Indian (Other)
341
+ # esl/spa es Spanish
342
+ # suk Sukuma
343
+ # sux Sumerian
344
+ # sun su Sudanese
345
+ # sus Susu
346
+ # swa sw Swahili
347
+ # ssw Swazi
348
+ # sve/swe sv Swedish
349
+ # syr Syriac
350
+ # tgl tl Tagalog
351
+ # tah Tahitian
352
+ # tgk tg Tajik
353
+ # tmh Tamashek
354
+ # tam ta Tamil
355
+ # tat tt Tatar
356
+ # tel te Telugu
357
+ # ter Tereno
358
+ # tha th Thai
359
+ # bod/tib bo Tibetan
360
+ # tig Tigre
361
+ # tir ti Tigrinya
362
+ # tem Timne
363
+ # tiv Tivi
364
+ # tli Tlingit
365
+ # tog to Tonga (Nyasa)
366
+ # ton Tonga (Tonga Islands)
367
+ # tru Truk
368
+ # tsi Tsimshian
369
+ # tso ts Tsonga
370
+ # tsn tn Tswana
371
+ # tum Tumbuka
372
+ # tur tr Turkish
373
+ # ota Turkish, Ottoman (1500 - 1928)
374
+ # tuk tk Turkmen
375
+ # tyv Tuvinian
376
+ # twi tw Twi
377
+ # uga Ugaritic
378
+ # uig ug Uighur
379
+ # ukr uk Ukrainian
380
+ # umb Umbundu
381
+ # und Undetermined
382
+ # urd ur Urdu
383
+ # uzb uz Uzbek
384
+ # vai Vai
385
+ # ven Venda
386
+ # vie vi Vietnamese
387
+ # vol vo Volapük
388
+ # vot Votic
389
+ # wak Wakashan languages
390
+ # wal Walamo
391
+ # war Waray
392
+ # was Washo
393
+ # cym/wel cy Welsh
394
+ # wol wo Wolof
395
+ # xho xh Xhosa
396
+ # sah Yakut
397
+ # yao Yao
398
+ # yap Yap
399
+ # yid yi Yiddish
400
+ # yor yo Yoruba
401
+ # zap Zapotec
402
+ # zen Zenaga
403
+ # zha za Zhuang
404
+ # zul zu Zulu
405
+ # zun Zuni
@@ -0,0 +1,15 @@
1
+ class Array
2
+ def inspect_as_tree(indentation = 4, level = 0)
3
+ tree = ""
4
+
5
+ self.each do |item|
6
+ if item.respond_to?(:inspect_as_tree)
7
+ tree << item.inspect_as_tree(indentation, level + indentation)
8
+ else
9
+ tree << "#{" " * level}#{item.inspect}\n"
10
+ end
11
+ end
12
+
13
+ return tree
14
+ end
15
+ end
@@ -0,0 +1,82 @@
1
+ require File.dirname(__FILE__)/'..'/'lang'/'en'/'atn.rb' # TODO no "en"
2
+
3
+ class ATN
4
+ include English
5
+
6
+ def initialize(language = :english, dialect = :US, allow_passive = true)
7
+ if :english == language
8
+ # @tree = []
9
+ else
10
+ raise LanguageException("Unsupported language.")
11
+ end
12
+ end
13
+
14
+ def parse(string)
15
+ output = ""
16
+ string.sentences.each do |sentence|
17
+ @words = sentence.words
18
+ output += root
19
+ end
20
+
21
+ return output
22
+ end
23
+
24
+ private
25
+ # @deprecated
26
+ def next_word
27
+ unless @words.empty?
28
+ @star = Word.new(@words.shift)
29
+ else
30
+ raise FragmentException.new("Fragment (consider revising)")
31
+ end
32
+ end
33
+
34
+ def tag(marker, word)
35
+ marker + word.quote.bracket('(')
36
+ end
37
+
38
+ def tag_phrase(marker, phrase)
39
+ marker + phrase.bracket('(')
40
+ end
41
+
42
+ # The following methods probably aren't the most efficient way of doing things (passing around
43
+ # blocks in particular). However, it makes the implementation simpler.
44
+
45
+ def optional(part_of_speech, &block)
46
+ begin
47
+ return required(part_of_speech, &block)
48
+ rescue UngrammaticalException
49
+ # Ignore it--this is optional
50
+ return ""
51
+ end
52
+ end
53
+
54
+ def required(part_of_speech, &block)
55
+ if part_of_speech == @star.part_of_speech
56
+ return (yield @star)
57
+ else
58
+ raise UngrammaticalException.new("Missing #{part_of_speech.to_s.downcase}: received #{@star.quote}")
59
+ end
60
+ end
61
+
62
+ def optional_phrase(&block)
63
+ begin
64
+ return required_phrase(&block)
65
+ rescue UngrammaticalException
66
+ # Ignore it--this is optional
67
+ return ""
68
+ end
69
+ end
70
+
71
+ def required_phrase(&block)
72
+ if not @words.empty?
73
+ return yield
74
+ else
75
+ raise UngrammaticalException.new("Incomplete phrase")
76
+ end
77
+ end
78
+ end
79
+
80
+ class UngrammaticalException < Exception; end
81
+ class FragmentException < UngrammaticalException; end
82
+ class PassiveException < UngrammaticalException; end