odin 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,3 @@
1
+ module Odin
2
+ autoload :VERSION, 'odin/version'
3
+ end
@@ -0,0 +1,3 @@
1
+ module Odin
2
+ VERSION = "0.0.4"
3
+ end
@@ -0,0 +1,402 @@
1
+ require File.dirname(__FILE__) + '/noun_inflector.rb'
2
+ require File.dirname(__FILE__) + '/../lang/en/closed_class_words.rb'
3
+ require File.dirname(__FILE__) + '/../lang/en/adjectives.rb'
4
+ require File.dirname(__FILE__) + '/../lang/en/verbs.rb'
5
+
6
+ # Part of speech implementations for the state pattern.
7
+ module Conjuction
8
+ # Intentionally left blank
9
+ end
10
+
11
+ module Determiner
12
+ # Intentionally left blank
13
+ end
14
+
15
+ module Pronoun
16
+ # def singular?
17
+ # @@SingularPronouns.member?(self)
18
+ # end
19
+ #
20
+ # def plural?
21
+ # @@PluralPronouns.member?(self)
22
+ # end
23
+
24
+ # def pluralize
25
+ #
26
+ # end
27
+ end
28
+
29
+ module Preposition
30
+ # Intentionally left blank
31
+ end
32
+
33
+ module Adjective
34
+ include CachedAdjectives
35
+ end
36
+
37
+ module Noun
38
+ include NounInflector
39
+
40
+ protected
41
+ def plural?
42
+ # TODO
43
+ # puts "-" * 20
44
+ # puts("self: #{self}")
45
+ # puts("singularize(self): #{singularize(self)}")
46
+ # puts("pluralize(self): #{pluralize(self)}")
47
+ # puts("pluralize(singularize(self)): #{pluralize(singularize(self))}")
48
+ # puts("singularize(pluralize(self)): #{singularize(pluralize(self))}")
49
+ #
50
+ # plural = self == pluralize(singularize(self))
51
+ # singular = self == singularize(pluralize(self))
52
+ #
53
+ # puts("plural: #{plural}")
54
+ # puts("singular: #{singular}")
55
+ # puts("plural and singular: #{plural and singular}")
56
+ # puts("plural or !singular or (plural and singular): #{plural or !singular or (plural and singular)}")
57
+
58
+ return (pluralize(singularize(self)) or !singular?)
59
+ end
60
+
61
+ def singular?
62
+ # TODO
63
+ # plural = self == pluralize(singularize(self))
64
+ # singular = self == singularize(pluralize(self))
65
+
66
+ return (self == singularize(pluralize(self)) or !plural?)
67
+ end
68
+ end
69
+
70
+ module Verb
71
+ include CachedVerbs
72
+
73
+ protected
74
+ # def pluralize
75
+ # # TODO
76
+ # "#{self}"
77
+ # end
78
+
79
+ # TODO How can we keep this separate from Nouns?
80
+ # def present_participle?
81
+ # # TODO
82
+ # end
83
+
84
+ # ### The object class for the result returned from calling
85
+ # ### Linguistics::EN::infinitive.
86
+ # class Infinitive < String
87
+ #
88
+ # ### Create and return a new Infinitive object.
89
+ # def initialize( word1, word2, suffix, rule )
90
+ # super( word1 )
91
+ # @word2 = word2
92
+ # @suffix = suffix
93
+ # @rule = rule
94
+ # end
95
+ #
96
+ #
97
+ # ######
98
+ # public
99
+ # ######
100
+ #
101
+ # # The fallback deconjugated form
102
+ # attr_reader :word2
103
+ #
104
+ # # The suffix used to to identify the transform rule
105
+ # attr_reader :suffix
106
+ #
107
+ # # The rule used
108
+ # attr_reader :rule
109
+ # end
110
+
111
+ # ###############
112
+ # module_function
113
+ # ###############
114
+ #
115
+ # ### Return the infinitive form of the given word
116
+ # def infinitive( word )
117
+ # word = word.to_s
118
+ # word1 = word2 = suffix = rule = newword = ''
119
+ #
120
+ # if IrregularInfinitives.key?( word )
121
+ # word1 = IrregularInfinitives[ word ]
122
+ # rule = 'irregular'
123
+ # else
124
+ # # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
125
+ # prefix, suffix = nil
126
+ # prefixes = Hash::new {|hsh,key| hsh[key] = []}
127
+ #
128
+ # # Build the hash of prefixes for the word
129
+ # 1.upto( word.length ) {|i|
130
+ # prefix = word[0, i]
131
+ # suffix = word[i..-1]
132
+ #
133
+ # (suffix.length - 1).downto( 0 ) {|j|
134
+ # newword = prefix + suffix[0, j]
135
+ # prefixes[ suffix ].push( newword )
136
+ # }
137
+ # }
138
+ #
139
+ # $stderr.puts "prefixes: %p" % prefixes if $DEBUG
140
+ #
141
+ # # Now check for rules covering the prefixes for this word, picking
142
+ # # the first one if one was found.
143
+ # if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
144
+ # rule = InfSuffixRules[ suffix ][:rule]
145
+ # shortestPrefix = InfSuffixRules[ suffix ][:word1]
146
+ # $stderr.puts "Using rule %p (%p) for suffix %p" %
147
+ # [ rule, shortestPrefix, suffix ] if $DEBUG
148
+ #
149
+ # case shortestPrefix
150
+ # when 0
151
+ # word1 = prefixes[ suffix ][ 0 ]
152
+ # word2 = prefixes[ suffix ][ 1 ]
153
+ # $stderr.puts "For sp = 0: word1: %p, word2: %p" %
154
+ # [ word1, word2 ] if $DEBUG
155
+ #
156
+ # when -1
157
+ # word1 = prefixes[ suffix ].last +
158
+ # InfSuffixRules[ suffix ][:suffix1]
159
+ # word2 = ''
160
+ # $stderr.puts "For sp = -1: word1: %p, word2: %p" %
161
+ # [ word1, word2 ] if $DEBUG
162
+ #
163
+ # when -2
164
+ # word1 = prefixes[ suffix ].last +
165
+ # InfSuffixRules[ suffix ][:suffix1]
166
+ # word2 = prefixes[ suffix ].last
167
+ # $stderr.puts "For sp = -2: word1: %p, word2: %p" %
168
+ # [ word1, word2 ] if $DEBUG
169
+ #
170
+ # when -3
171
+ # word1 = prefixes[ suffix ].last +
172
+ # InfSuffixRules[ suffix ][:suffix1]
173
+ # word2 = prefixes[ suffix ].last +
174
+ # InfSuffixRules[ suffix ][:suffix2]
175
+ # $stderr.puts "For sp = -3: word1: %p, word2: %p" %
176
+ # [ word1, word2 ] if $DEBUG
177
+ #
178
+ # when -4
179
+ # word1 = word
180
+ # word2 = ''
181
+ # $stderr.puts "For sp = -4: word1: %p, word2: %p" %
182
+ # [ word1, word2 ] if $DEBUG
183
+ #
184
+ # else
185
+ # raise IndexError,
186
+ # "Couldn't find rule for shortest prefix %p" %
187
+ # shortestPrefix
188
+ # end
189
+ #
190
+ # # Rules 12b and 15: Strip off 'ed' or 'ing'.
191
+ # if rule == '12b' or rule == '15'
192
+ # # Do we have a monosyllable of this form:
193
+ # # o 0+ Consonants
194
+ # # o 1+ Vowel
195
+ # # o 2 Non-wx
196
+ # # Eg: tipped => tipp?
197
+ # # Then return tip and tipp.
198
+ # # Eg: swimming => swimm?
199
+ # # Then return tipswim and swimm.
200
+ #
201
+ # if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
202
+ # word1 = $1 + $2
203
+ # word2 = $1 + $2 + $2
204
+ # end
205
+ # end
206
+ # end
207
+ # end
208
+ #
209
+ # return Infinitive::new( word1, word2, suffix, rule )
210
+ # end
211
+ end
212
+
213
+ # # From the Ruby Linguistics Project, release 1.0.5
214
+ # # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en.rb
215
+ # # CREDIT: deveiate
216
+ #
217
+ # ### Return the given phrase with the appropriate indefinite article ("a" or
218
+ # ### "an") prepended.
219
+ # def a( phrase, count=nil )
220
+ # md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
221
+ # pre, word, post = md.to_a[1,3]
222
+ # return phrase if word.nil? or word.empty?
223
+ #
224
+ # result = indef_article( word, count )
225
+ # return pre + result + post
226
+ # end
227
+ #
228
+ # ### Returns the given word with a prepended indefinite article, unless
229
+ # ### +count+ is non-nil and not singular.
230
+ # def indef_article( word, count )
231
+ # count ||= Linguistics::num
232
+ # return "#{count} #{word}" if
233
+ # count && /^(#{PL_count_one})$/i !~ count.to_s
234
+ #
235
+ # # Handle user-defined variants
236
+ # # return value if value = ud_match( word, A_a_user_defined )
237
+ #
238
+ # case word
239
+ #
240
+ # # Handle special cases
241
+ # when /^(#{A_explicit_an})/i
242
+ # return "an #{word}"
243
+ #
244
+ # # Handle abbreviations
245
+ # when /^(#{A_abbrev})/x
246
+ # return "an #{word}"
247
+ # when /^[aefhilmnorsx][.-]/i
248
+ # return "an #{word}"
249
+ # when /^[a-z][.-]/i
250
+ # return "a #{word}"
251
+ #
252
+ # # Handle consonants
253
+ # when /^[^aeiouy]/i
254
+ # return "a #{word}"
255
+ #
256
+ # # Handle special vowel-forms
257
+ # when /^e[uw]/i
258
+ # return "a #{word}"
259
+ # when /^onc?e\b/i
260
+ # return "a #{word}"
261
+ # when /^uni([^nmd]|mo)/i
262
+ # return "a #{word}"
263
+ # when /^u[bcfhjkqrst][aeiou]/i
264
+ # return "a #{word}"
265
+ #
266
+ # # Handle vowels
267
+ # when /^[aeiou]/i
268
+ # return "an #{word}"
269
+ #
270
+ # # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
271
+ # when /^(#{A_y_cons})/i
272
+ # return "an #{word}"
273
+ #
274
+ # # Otherwise, guess "a"
275
+ # else
276
+ # return "a #{word}"
277
+ # end
278
+ # end
279
+ #
280
+ # def normalize_count( count, default=2 )
281
+ # return default if count.nil? # Default to plural
282
+ # if /^(#{PL_count_one})$/i =~ count.to_s ||
283
+ # Linguistics::classical? &&
284
+ # /^(#{PL_count_zero})$/ =~ count.to_s
285
+ # return 1
286
+ # else
287
+ # return default
288
+ # end
289
+ # end
290
+ #
291
+ # ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
292
+ # ### examining the <tt>original</tt> input.
293
+ # def postprocess( original, inflected )
294
+ # inflected.sub!( /([^|]+)\|(.+)/ ) {
295
+ # Linguistics::classical? ? $2 : $1
296
+ # }
297
+ #
298
+ # case original
299
+ # when "I"
300
+ # return inflected
301
+ # when /^[A-Z]+$/
302
+ # return inflected.upcase
303
+ # when /^[A-Z]/
304
+ # # Can't use #capitalize, as it will downcase the rest of the string,
305
+ # # too.
306
+ # inflected[0,1] = inflected[0,1].upcase
307
+ # return inflected
308
+ # else
309
+ # return inflected
310
+ # end
311
+ # end
312
+ #
313
+ # ### Return the plural of the given verb +phrase+ if +count+ indicates it
314
+ # ### should be plural.
315
+ # def plural_verb( phrase, count=nil )
316
+ # md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
317
+ # pre, word, post = md.to_a[1,3]
318
+ # return phrase if word.nil? or word.empty?
319
+ #
320
+ # plural = postprocess( word,
321
+ # pluralize_special_verb(word, count) ||
322
+ # pluralize_general_verb(word, count) )
323
+ # return pre + plural + post
324
+ # end
325
+ #
326
+ # def pluralize_special_verb( word, count )
327
+ # count ||= Linguistics::num
328
+ # count = normalize_count( count )
329
+ #
330
+ # return nil if /^(#{PL_count_one})$/i =~ count.to_s
331
+ #
332
+ # # Handle user-defined verbs
333
+ # #if value = ud_match( word, PL_v_user_defined )
334
+ # # return value
335
+ # #end
336
+ #
337
+ # case word
338
+ #
339
+ # # Handle irregular present tense (simple and compound)
340
+ # when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
341
+ # return PL_v_irregular_pres_h[ $1.downcase ] + $2
342
+ #
343
+ # # Handle irregular future, preterite and perfect tenses
344
+ # when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
345
+ # return word
346
+ #
347
+ # # Handle special cases
348
+ # when /^(#{PL_v_special_s})$/, /\s/
349
+ # return nil
350
+ #
351
+ # # Handle standard 3rd person (chop the ...(e)s off single words)
352
+ # when /^(.*)([cs]h|[x]|zz|ss)es$/i
353
+ # return $1 + $2
354
+ # when /^(..+)ies$/i
355
+ # return "#{$1}y"
356
+ # when /^(.+)oes$/i
357
+ # return "#{$1}o"
358
+ # when /^(.*[^s])s$/i
359
+ # return $1
360
+ #
361
+ # # Otherwise, a regular verb (handle elsewhere)
362
+ # else
363
+ # return nil
364
+ # end
365
+ # end
366
+ #
367
+ # ### Pluralize regular verbs
368
+ # def pluralize_general_verb( word, count )
369
+ # count ||= Linguistics::num
370
+ # count = normalize_count( count )
371
+ #
372
+ # return word if /^(#{PL_count_one})$/i =~ count.to_s
373
+ #
374
+ # case word
375
+ #
376
+ # # Handle ambiguous present tenses (simple and compound)
377
+ # when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
378
+ # return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
379
+ #
380
+ # # Handle ambiguous preterite and perfect tenses
381
+ # when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
382
+ # return word
383
+ #
384
+ # # Otherwise, 1st or 2nd person is uninflected
385
+ # else
386
+ # return word
387
+ # end
388
+ # end
389
+ #
390
+ # def present_participle( word )
391
+ # plural = plural_verb( word.to_s, 2 )
392
+ #
393
+ # plural.sub!( /ie$/, 'y' ) or
394
+ # plural.sub!( /ue$/, 'u' ) or
395
+ # plural.sub!( /([auy])e$/, '$1' ) or
396
+ # plural.sub!( /i$/, '' ) or
397
+ # plural.sub!( /([^e])e$/, "\\1" ) or
398
+ # /er$/.match( plural ) or
399
+ # plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
400
+ #
401
+ # return "#{plural}ing"
402
+ # end
@@ -0,0 +1,23 @@
1
+ class Star < Array
2
+ def initialize(*elements)
3
+ @current = 0
4
+ super(elements)
5
+ end
6
+
7
+ def current
8
+ return self.[](@current)
9
+ end
10
+
11
+ def next
12
+ unless last_word?
13
+ @current += 1
14
+ else
15
+ raise FragmentException.new("Fragment (consider revising)")
16
+ end
17
+ end
18
+
19
+ private
20
+ def last_word?
21
+ (length - 1) == @current
22
+ end
23
+ end
@@ -0,0 +1,99 @@
1
+ require File.dirname(__FILE__) + '/string_bracketing.rb'
2
+ require File.dirname(__FILE__) + '/word.rb'
3
+
4
+ class String
5
+ include StringBracketing
6
+
7
+ @@ending_punctuation = /[.?!]+/
8
+
9
+ alias :trim :strip
10
+
11
+ def -(pattern)
12
+ self.gsub(pattern, '')
13
+ end
14
+
15
+ def /(str_to_join)
16
+ File.join(self, str_to_join)
17
+ end
18
+
19
+ def normalize
20
+ # We use "." as a "full-stop" character that denotes the end of a sentence, regardless
21
+ # "+" in the regex for sentences like "Hello again..." => "hello again."
22
+ # TODO how is this used with abbreviations?
23
+ self.downcase.gsub(/#{@@ending_punctuation}/, '.')
24
+ end
25
+
26
+ # TODO I would think that there might be some way of using Enumerable#inject to simplify these three
27
+
28
+ def sentences
29
+ sentences = []
30
+
31
+ self.gsub(/.*?#{@@ending_punctuation}/i) do |match|
32
+ sentences << match.trim
33
+ end
34
+
35
+ return sentences
36
+ end
37
+
38
+ def words
39
+ # Not the most efficient, but it works better than the old one
40
+ words = []
41
+
42
+ self.split(/\s+/).each do |string|
43
+ # See StringTest#test_whitespace_then_character_string
44
+ words << string.match(/[a-z\'\-]+/i).to_s unless string.empty?
45
+ end
46
+
47
+ return words
48
+ end
49
+
50
+ def matches_for(pattern)
51
+ matches = []
52
+ self.gsub(pattern) do |match|
53
+ matches << match
54
+ end
55
+ return matches
56
+ end
57
+
58
+ # A sentence is determined to be grammatically correct if
59
+ # a final state in the ATN is reached by the last word in the sentence.
60
+ def grammatical?(language = :english)
61
+ begin
62
+ parse(language)
63
+ return true
64
+ rescue UngrammaticalException => e
65
+ return false
66
+ end
67
+ end
68
+
69
+ # TODO add tests
70
+ def check_grammar(language = :english)
71
+ checked = []
72
+
73
+ sentences.each do |sentence|
74
+ if sentence.grammatical?
75
+ checked << sentence
76
+ else
77
+ # TODO needs "yield message" etc
78
+ checked << (yield sentence) # TODO e.message
79
+ end
80
+
81
+ # begin
82
+ # parse(language)
83
+ # checked << sentence
84
+ # rescue UngrammaticalException
85
+ # checked << (yield sentence)
86
+ # end
87
+ end
88
+
89
+ return checked.join(" ")
90
+ end
91
+
92
+ private
93
+ def parse(language)
94
+ # Keep the ATN class-level to avoid the performance hit of creating one for each string
95
+ # I'd like to do it in the constructor, but don't know a good way.
96
+ @@atn ||= ATN.new(language)
97
+ @@atn.parse(self)
98
+ end
99
+ end