odin 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,405 @@
1
+ # From the Ruby Linguistics Project, release 1.0.5
2
+ # abk ab Abkhazian
3
+ # ace Achinese
4
+ # ach Acoli
5
+ # ada Adangme
6
+ # aar aa Afar
7
+ # afh Afrihili
8
+ # afr af Afrikaans
9
+ # afa Afro-Asiatic (Other)
10
+ # aka Akan
11
+ # akk Akkadian
12
+ # alb/sqi sq Albanian
13
+ # ale Aleut
14
+ # alg Algonquian languages
15
+ # tut Altaic (Other)
16
+ # amh am Amharic
17
+ # apa Apache languages
18
+ # ara ar Arabic
19
+ # arc Aramaic
20
+ # arp Arapaho
21
+ # arn Araucanian
22
+ # arw Arawak
23
+ # arm/hye hy Armenian
24
+ # art Artificial (Other)
25
+ # asm as Assamese
26
+ # ath Athapascan languages
27
+ # map Austronesian (Other)
28
+ # ava Avaric
29
+ # ave Avestan
30
+ # awa Awadhi
31
+ # aym ay Aymara
32
+ # aze az Azerbaijani
33
+ # nah Aztec
34
+ # ban Balinese
35
+ # bat Baltic (Other)
36
+ # bal Baluchi
37
+ # bam Bambara
38
+ # bai Bamileke languages
39
+ # bad Banda
40
+ # bnt Bantu (Other)
41
+ # bas Basa
42
+ # bak ba Bashkir
43
+ # baq/eus eu Basque
44
+ # bej Beja
45
+ # bem Bemba
46
+ # ben bn Bengali
47
+ # ber Berber (Other)
48
+ # bho Bhojpuri
49
+ # bih bh Bihari
50
+ # bik Bikol
51
+ # bin Bini
52
+ # bis bi Bislama
53
+ # bra Braj
54
+ # bre br Breton
55
+ # bug Buginese
56
+ # bul bg Bulgarian
57
+ # bua Buriat
58
+ # bur/mya my Burmese
59
+ # bel be Byelorussian
60
+ # cad Caddo
61
+ # car Carib
62
+ # cat ca Catalan
63
+ # cau Caucasian (Other)
64
+ # ceb Cebuano
65
+ # cel Celtic (Other)
66
+ # cai Central American Indian (Other)
67
+ # chg Chagatai
68
+ # cha Chamorro
69
+ # che Chechen
70
+ # chr Cherokee
71
+ # chy Cheyenne
72
+ # chb Chibcha
73
+ # chi/zho zh Chinese
74
+ # chn Chinook jargon
75
+ # cho Choctaw
76
+ # chu Church Slavic
77
+ # chv Chuvash
78
+ # cop Coptic
79
+ # cor Cornish
80
+ # cos co Corsican
81
+ # cre Cree
82
+ # mus Creek
83
+ # crp Creoles and Pidgins (Other)
84
+ # cpe Creoles and Pidgins, English-based (Other)
85
+ # cpf Creoles and Pidgins, French-based (Other)
86
+ # cpp Creoles and Pidgins, Portuguese-based (Other)
87
+ # cus Cushitic (Other)
88
+ # hr Croatian
89
+ # ces/cze cs Czech
90
+ # dak Dakota
91
+ # dan da Danish
92
+ # del Delaware
93
+ # din Dinka
94
+ # div Divehi
95
+ # doi Dogri
96
+ # dra Dravidian (Other)
97
+ # dua Duala
98
+ # dut/nla nl Dutch
99
+ # dum Dutch, Middle (ca. 1050-1350)
100
+ # dyu Dyula
101
+ # dzo dz Dzongkha
102
+ # efi Efik
103
+ # egy Egyptian (Ancient)
104
+ # eka Ekajuk
105
+ # elx Elamite
106
+ # eng en English
107
+ # enm English, Middle (ca. 1100-1500)
108
+ # ang English, Old (ca. 450-1100)
109
+ # esk Eskimo (Other)
110
+ # epo eo Esperanto
111
+ # est et Estonian
112
+ # ewe Ewe
113
+ # ewo Ewondo
114
+ # fan Fang
115
+ # fat Fanti
116
+ # fao fo Faroese
117
+ # fij fj Fijian
118
+ # fin fi Finnish
119
+ # fiu Finno-Ugrian (Other)
120
+ # fon Fon
121
+ # fra/fre fr French
122
+ # frm French, Middle (ca. 1400-1600)
123
+ # fro French, Old (842- ca. 1400)
124
+ # fry fy Frisian
125
+ # ful Fulah
126
+ # gaa Ga
127
+ # gae/gdh Gaelic (Scots)
128
+ # glg gl Gallegan
129
+ # lug Ganda
130
+ # gay Gayo
131
+ # gez Geez
132
+ # geo/kat ka Georgian
133
+ # deu/ger de German
134
+ # gmh German, Middle High (ca. 1050-1500)
135
+ # goh German, Old High (ca. 750-1050)
136
+ # gem Germanic (Other)
137
+ # gil Gilbertese
138
+ # gon Gondi
139
+ # got Gothic
140
+ # grb Grebo
141
+ # grc Greek, Ancient (to 1453)
142
+ # ell/gre el Greek, Modern (1453-)
143
+ # kal kl Greenlandic
144
+ # grn gn Guarani
145
+ # guj gu Gujarati
146
+ # hai Haida
147
+ # hau ha Hausa
148
+ # haw Hawaiian
149
+ # heb he Hebrew
150
+ # her Herero
151
+ # hil Hiligaynon
152
+ # him Himachali
153
+ # hin hi Hindi
154
+ # hmo Hiri Motu
155
+ # hun hu Hungarian
156
+ # hup Hupa
157
+ # iba Iban
158
+ # ice/isl is Icelandic
159
+ # ibo Igbo
160
+ # ijo Ijo
161
+ # ilo Iloko
162
+ # inc Indic (Other)
163
+ # ine Indo-European (Other)
164
+ # ind id Indonesian
165
+ # ina ia Interlingua (International Auxiliary language Association)
166
+ # ile Interlingue
167
+ # iku iu Inuktitut
168
+ # ipk ik Inupiak
169
+ # ira Iranian (Other)
170
+ # gai/iri ga Irish
171
+ # sga Irish, Old (to 900)
172
+ # mga Irish, Middle (900 - 1200)
173
+ # iro Iroquoian languages
174
+ # ita it Italian
175
+ # jpn ja Japanese
176
+ # jav/jaw jv/jw Javanese
177
+ # jrb Judeo-Arabic
178
+ # jpr Judeo-Persian
179
+ # kab Kabyle
180
+ # kac Kachin
181
+ # kam Kamba
182
+ # kan kn Kannada
183
+ # kau Kanuri
184
+ # kaa Kara-Kalpak
185
+ # kar Karen
186
+ # kas ks Kashmiri
187
+ # kaw Kawi
188
+ # kaz kk Kazakh
189
+ # kha Khasi
190
+ # khm km Khmer
191
+ # khi Khoisan (Other)
192
+ # kho Khotanese
193
+ # kik Kikuyu
194
+ # kin rw Kinyarwanda
195
+ # kir ky Kirghiz
196
+ # kom Komi
197
+ # kon Kongo
198
+ # kok Konkani
199
+ # kor ko Korean
200
+ # kpe Kpelle
201
+ # kro Kru
202
+ # kua Kuanyama
203
+ # kum Kumyk
204
+ # kur ku Kurdish
205
+ # kru Kurukh
206
+ # kus Kusaie
207
+ # kut Kutenai
208
+ # lad Ladino
209
+ # lah Lahnda
210
+ # lam Lamba
211
+ # oci oc Langue d'Oc (post 1500)
212
+ # lao lo Lao
213
+ # lat la Latin
214
+ # lav lv Latvian
215
+ # ltz Letzeburgesch
216
+ # lez Lezghian
217
+ # lin ln Lingala
218
+ # lit lt Lithuanian
219
+ # loz Lozi
220
+ # lub Luba-Katanga
221
+ # lui Luiseno
222
+ # lun Lunda
223
+ # luo Luo (Kenya and Tanzania)
224
+ # mac/mke mk Macedonian
225
+ # mad Madurese
226
+ # mag Magahi
227
+ # mai Maithili
228
+ # mak Makasar
229
+ # mlg mg Malagasy
230
+ # may/msa ms Malay
231
+ # mal Malayalam
232
+ # mlt ml Maltese
233
+ # man Mandingo
234
+ # mni Manipuri
235
+ # mno Manobo languages
236
+ # max Manx
237
+ # mao/mri mi Maori
238
+ # mar mr Marathi
239
+ # chm Mari
240
+ # mah Marshall
241
+ # mwr Marwari
242
+ # mas Masai
243
+ # myn Mayan languages
244
+ # men Mende
245
+ # mic Micmac
246
+ # min Minangkabau
247
+ # mis Miscellaneous (Other)
248
+ # moh Mohawk
249
+ # mol mo Moldavian
250
+ # mkh Mon-Kmer (Other)
251
+ # lol Mongo
252
+ # mon mn Mongolian
253
+ # mos Mossi
254
+ # mul Multiple languages
255
+ # mun Munda languages
256
+ # nau na Nauru
257
+ # nav Navajo
258
+ # nde Ndebele, North
259
+ # nbl Ndebele, South
260
+ # ndo Ndongo
261
+ # nep ne Nepali
262
+ # new Newari
263
+ # nic Niger-Kordofanian (Other)
264
+ # ssa Nilo-Saharan (Other)
265
+ # niu Niuean
266
+ # non Norse, Old
267
+ # nai North American Indian (Other)
268
+ # nor no Norwegian
269
+ # nno Norwegian (Nynorsk)
270
+ # nub Nubian languages
271
+ # nym Nyamwezi
272
+ # nya Nyanja
273
+ # nyn Nyankole
274
+ # nyo Nyoro
275
+ # nzi Nzima
276
+ # oji Ojibwa
277
+ # ori or Oriya
278
+ # orm om Oromo
279
+ # osa Osage
280
+ # oss Ossetic
281
+ # oto Otomian languages
282
+ # pal Pahlavi
283
+ # pau Palauan
284
+ # pli Pali
285
+ # pam Pampanga
286
+ # pag Pangasinan
287
+ # pan pa Panjabi
288
+ # pap Papiamento
289
+ # paa Papuan-Australian (Other)
290
+ # fas/per fa Persian
291
+ # peo Persian, Old (ca 600 - 400 B.C.)
292
+ # phn Phoenician
293
+ # pol pl Polish
294
+ # pon Ponape
295
+ # por pt Portuguese
296
+ # pra Prakrit languages
297
+ # pro Provencal, Old (to 1500)
298
+ # pus ps Pushto
299
+ # que qu Quechua
300
+ # roh rm Rhaeto-Romance
301
+ # raj Rajasthani
302
+ # rar Rarotongan
303
+ # roa Romance (Other)
304
+ # ron/rum ro Romanian
305
+ # rom Romany
306
+ # run rn Rundi
307
+ # rus ru Russian
308
+ # sal Salishan languages
309
+ # sam Samaritan Aramaic
310
+ # smi Sami languages
311
+ # smo sm Samoan
312
+ # sad Sandawe
313
+ # sag sg Sango
314
+ # san sa Sanskrit
315
+ # srd Sardinian
316
+ # sco Scots
317
+ # sel Selkup
318
+ # sem Semitic (Other)
319
+ # sr Serbian
320
+ # scr sh Serbo-Croatian
321
+ # srr Serer
322
+ # shn Shan
323
+ # sna sn Shona
324
+ # sid Sidamo
325
+ # bla Siksika
326
+ # snd sd Sindhi
327
+ # sin si Singhalese
328
+ # sit Sino-Tibetan (Other)
329
+ # sio Siouan languages
330
+ # sla Slavic (Other)
331
+ # ss Siswati
332
+ # slk/slo sk Slovak
333
+ # slv sl Slovenian
334
+ # sog Sogdian
335
+ # som so Somali
336
+ # son Songhai
337
+ # wen Sorbian languages
338
+ # nso Sotho, Northern
339
+ # sot st Sotho, Southern
340
+ # sai South American Indian (Other)
341
+ # esl/spa es Spanish
342
+ # suk Sukuma
343
+ # sux Sumerian
344
+ # sun su Sudanese
345
+ # sus Susu
346
+ # swa sw Swahili
347
+ # ssw Swazi
348
+ # sve/swe sv Swedish
349
+ # syr Syriac
350
+ # tgl tl Tagalog
351
+ # tah Tahitian
352
+ # tgk tg Tajik
353
+ # tmh Tamashek
354
+ # tam ta Tamil
355
+ # tat tt Tatar
356
+ # tel te Telugu
357
+ # ter Tereno
358
+ # tha th Thai
359
+ # bod/tib bo Tibetan
360
+ # tig Tigre
361
+ # tir ti Tigrinya
362
+ # tem Timne
363
+ # tiv Tivi
364
+ # tli Tlingit
365
+ # tog to Tonga (Nyasa)
366
+ # ton Tonga (Tonga Islands)
367
+ # tru Truk
368
+ # tsi Tsimshian
369
+ # tso ts Tsonga
370
+ # tsn tn Tswana
371
+ # tum Tumbuka
372
+ # tur tr Turkish
373
+ # ota Turkish, Ottoman (1500 - 1928)
374
+ # tuk tk Turkmen
375
+ # tyv Tuvinian
376
+ # twi tw Twi
377
+ # uga Ugaritic
378
+ # uig ug Uighur
379
+ # ukr uk Ukrainian
380
+ # umb Umbundu
381
+ # und Undetermined
382
+ # urd ur Urdu
383
+ # uzb uz Uzbek
384
+ # vai Vai
385
+ # ven Venda
386
+ # vie vi Vietnamese
387
+ # vol vo Volapük
388
+ # vot Votic
389
+ # wak Wakashan languages
390
+ # wal Walamo
391
+ # war Waray
392
+ # was Washo
393
+ # cym/wel cy Welsh
394
+ # wol wo Wolof
395
+ # xho xh Xhosa
396
+ # sah Yakut
397
+ # yao Yao
398
+ # yap Yap
399
+ # yid yi Yiddish
400
+ # yor yo Yoruba
401
+ # zap Zapotec
402
+ # zen Zenaga
403
+ # zha za Zhuang
404
+ # zul zu Zulu
405
+ # zun Zuni
@@ -0,0 +1,15 @@
1
+ class Array
2
+ def inspect_as_tree(indentation = 4, level = 0)
3
+ tree = ""
4
+
5
+ self.each do |item|
6
+ if item.respond_to?(:inspect_as_tree)
7
+ tree << item.inspect_as_tree(indentation, level + indentation)
8
+ else
9
+ tree << "#{" " * level}#{item.inspect}\n"
10
+ end
11
+ end
12
+
13
+ return tree
14
+ end
15
+ end
@@ -0,0 +1,82 @@
1
+ require File.dirname(__FILE__)/'..'/'lang'/'en'/'atn.rb' # TODO no "en"
2
+
3
+ class ATN
4
+ include English
5
+
6
+ def initialize(language = :english, dialect = :US, allow_passive = true)
7
+ if :english == language
8
+ # @tree = []
9
+ else
10
+ raise LanguageException("Unsupported language.")
11
+ end
12
+ end
13
+
14
+ def parse(string)
15
+ output = ""
16
+ string.sentences.each do |sentence|
17
+ @words = sentence.words
18
+ output += root
19
+ end
20
+
21
+ return output
22
+ end
23
+
24
+ private
25
+ # @deprecated
26
+ def next_word
27
+ unless @words.empty?
28
+ @star = Word.new(@words.shift)
29
+ else
30
+ raise FragmentException.new("Fragment (consider revising)")
31
+ end
32
+ end
33
+
34
+ def tag(marker, word)
35
+ marker + word.quote.bracket('(')
36
+ end
37
+
38
+ def tag_phrase(marker, phrase)
39
+ marker + phrase.bracket('(')
40
+ end
41
+
42
+ # The following methods probably aren't the most efficient way of doing things (passing around
43
+ # blocks in particular). However, it makes the implementation simpler.
44
+
45
+ def optional(part_of_speech, &block)
46
+ begin
47
+ return required(part_of_speech, &block)
48
+ rescue UngrammaticalException
49
+ # Ignore it--this is optional
50
+ return ""
51
+ end
52
+ end
53
+
54
+ def required(part_of_speech, &block)
55
+ if part_of_speech == @star.part_of_speech
56
+ return (yield @star)
57
+ else
58
+ raise UngrammaticalException.new("Missing #{part_of_speech.to_s.downcase}: received #{@star.quote}")
59
+ end
60
+ end
61
+
62
+ def optional_phrase(&block)
63
+ begin
64
+ return required_phrase(&block)
65
+ rescue UngrammaticalException
66
+ # Ignore it--this is optional
67
+ return ""
68
+ end
69
+ end
70
+
71
+ def required_phrase(&block)
72
+ if not @words.empty?
73
+ return yield
74
+ else
75
+ raise UngrammaticalException.new("Incomplete phrase")
76
+ end
77
+ end
78
+ end
79
+
80
+ class UngrammaticalException < Exception; end
81
+ class FragmentException < UngrammaticalException; end
82
+ class PassiveException < UngrammaticalException; end