odin 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,3 @@
1
+ module Odin
2
+ autoload :VERSION, 'odin/version'
3
+ end
@@ -0,0 +1,3 @@
1
+ module Odin
2
+ VERSION = "0.0.4"
3
+ end
@@ -0,0 +1,402 @@
1
+ require File.dirname(__FILE__) + '/noun_inflector.rb'
2
+ require File.dirname(__FILE__) + '/../lang/en/closed_class_words.rb'
3
+ require File.dirname(__FILE__) + '/../lang/en/adjectives.rb'
4
+ require File.dirname(__FILE__) + '/../lang/en/verbs.rb'
5
+
6
+ # Part of speech implementations for the state pattern.
7
+ module Conjuction
8
+ # Intentionally left blank
9
+ end
10
+
11
+ module Determiner
12
+ # Intentionally left blank
13
+ end
14
+
15
+ module Pronoun
16
+ # def singular?
17
+ # @@SingularPronouns.member?(self)
18
+ # end
19
+ #
20
+ # def plural?
21
+ # @@PluralPronouns.member?(self)
22
+ # end
23
+
24
+ # def pluralize
25
+ #
26
+ # end
27
+ end
28
+
29
+ module Preposition
30
+ # Intentionally left blank
31
+ end
32
+
33
+ module Adjective
34
+ include CachedAdjectives
35
+ end
36
+
37
+ module Noun
38
+ include NounInflector
39
+
40
+ protected
41
+ def plural?
42
+ # TODO
43
+ # puts "-" * 20
44
+ # puts("self: #{self}")
45
+ # puts("singularize(self): #{singularize(self)}")
46
+ # puts("pluralize(self): #{pluralize(self)}")
47
+ # puts("pluralize(singularize(self)): #{pluralize(singularize(self))}")
48
+ # puts("singularize(pluralize(self)): #{singularize(pluralize(self))}")
49
+ #
50
+ # plural = self == pluralize(singularize(self))
51
+ # singular = self == singularize(pluralize(self))
52
+ #
53
+ # puts("plural: #{plural}")
54
+ # puts("singular: #{singular}")
55
+ # puts("plural and singular: #{plural and singular}")
56
+ # puts("plural or !singular or (plural and singular): #{plural or !singular or (plural and singular)}")
57
+
58
+ return (pluralize(singularize(self)) or !singular?)
59
+ end
60
+
61
+ def singular?
62
+ # TODO
63
+ # plural = self == pluralize(singularize(self))
64
+ # singular = self == singularize(pluralize(self))
65
+
66
+ return (self == singularize(pluralize(self)) or !plural?)
67
+ end
68
+ end
69
+
70
+ module Verb
71
+ include CachedVerbs
72
+
73
+ protected
74
+ # def pluralize
75
+ # # TODO
76
+ # "#{self}"
77
+ # end
78
+
79
+ # TODO How can we keep this separate from Nouns?
80
+ # def present_participle?
81
+ # # TODO
82
+ # end
83
+
84
+ # ### The object class for the result returned from calling
85
+ # ### Linguistics::EN::infinitive.
86
+ # class Infinitive < String
87
+ #
88
+ # ### Create and return a new Infinitive object.
89
+ # def initialize( word1, word2, suffix, rule )
90
+ # super( word1 )
91
+ # @word2 = word2
92
+ # @suffix = suffix
93
+ # @rule = rule
94
+ # end
95
+ #
96
+ #
97
+ # ######
98
+ # public
99
+ # ######
100
+ #
101
+ # # The fallback deconjugated form
102
+ # attr_reader :word2
103
+ #
104
+ # # The suffix used to to identify the transform rule
105
+ # attr_reader :suffix
106
+ #
107
+ # # The rule used
108
+ # attr_reader :rule
109
+ # end
110
+
111
+ # ###############
112
+ # module_function
113
+ # ###############
114
+ #
115
+ # ### Return the infinitive form of the given word
116
+ # def infinitive( word )
117
+ # word = word.to_s
118
+ # word1 = word2 = suffix = rule = newword = ''
119
+ #
120
+ # if IrregularInfinitives.key?( word )
121
+ # word1 = IrregularInfinitives[ word ]
122
+ # rule = 'irregular'
123
+ # else
124
+ # # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
125
+ # prefix, suffix = nil
126
+ # prefixes = Hash::new {|hsh,key| hsh[key] = []}
127
+ #
128
+ # # Build the hash of prefixes for the word
129
+ # 1.upto( word.length ) {|i|
130
+ # prefix = word[0, i]
131
+ # suffix = word[i..-1]
132
+ #
133
+ # (suffix.length - 1).downto( 0 ) {|j|
134
+ # newword = prefix + suffix[0, j]
135
+ # prefixes[ suffix ].push( newword )
136
+ # }
137
+ # }
138
+ #
139
+ # $stderr.puts "prefixes: %p" % prefixes if $DEBUG
140
+ #
141
+ # # Now check for rules covering the prefixes for this word, picking
142
+ # # the first one if one was found.
143
+ # if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
144
+ # rule = InfSuffixRules[ suffix ][:rule]
145
+ # shortestPrefix = InfSuffixRules[ suffix ][:word1]
146
+ # $stderr.puts "Using rule %p (%p) for suffix %p" %
147
+ # [ rule, shortestPrefix, suffix ] if $DEBUG
148
+ #
149
+ # case shortestPrefix
150
+ # when 0
151
+ # word1 = prefixes[ suffix ][ 0 ]
152
+ # word2 = prefixes[ suffix ][ 1 ]
153
+ # $stderr.puts "For sp = 0: word1: %p, word2: %p" %
154
+ # [ word1, word2 ] if $DEBUG
155
+ #
156
+ # when -1
157
+ # word1 = prefixes[ suffix ].last +
158
+ # InfSuffixRules[ suffix ][:suffix1]
159
+ # word2 = ''
160
+ # $stderr.puts "For sp = -1: word1: %p, word2: %p" %
161
+ # [ word1, word2 ] if $DEBUG
162
+ #
163
+ # when -2
164
+ # word1 = prefixes[ suffix ].last +
165
+ # InfSuffixRules[ suffix ][:suffix1]
166
+ # word2 = prefixes[ suffix ].last
167
+ # $stderr.puts "For sp = -2: word1: %p, word2: %p" %
168
+ # [ word1, word2 ] if $DEBUG
169
+ #
170
+ # when -3
171
+ # word1 = prefixes[ suffix ].last +
172
+ # InfSuffixRules[ suffix ][:suffix1]
173
+ # word2 = prefixes[ suffix ].last +
174
+ # InfSuffixRules[ suffix ][:suffix2]
175
+ # $stderr.puts "For sp = -3: word1: %p, word2: %p" %
176
+ # [ word1, word2 ] if $DEBUG
177
+ #
178
+ # when -4
179
+ # word1 = word
180
+ # word2 = ''
181
+ # $stderr.puts "For sp = -4: word1: %p, word2: %p" %
182
+ # [ word1, word2 ] if $DEBUG
183
+ #
184
+ # else
185
+ # raise IndexError,
186
+ # "Couldn't find rule for shortest prefix %p" %
187
+ # shortestPrefix
188
+ # end
189
+ #
190
+ # # Rules 12b and 15: Strip off 'ed' or 'ing'.
191
+ # if rule == '12b' or rule == '15'
192
+ # # Do we have a monosyllable of this form:
193
+ # # o 0+ Consonants
194
+ # # o 1+ Vowel
195
+ # # o 2 Non-wx
196
+ # # Eg: tipped => tipp?
197
+ # # Then return tip and tipp.
198
+ # # Eg: swimming => swimm?
199
+ # # Then return tipswim and swimm.
200
+ #
201
+ # if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
202
+ # word1 = $1 + $2
203
+ # word2 = $1 + $2 + $2
204
+ # end
205
+ # end
206
+ # end
207
+ # end
208
+ #
209
+ # return Infinitive::new( word1, word2, suffix, rule )
210
+ # end
211
+ end
212
+
213
+ # # From the Ruby Linguistics Project, release 1.0.5
214
+ # # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en.rb
215
+ # # CREDIT: deveiate
216
+ #
217
+ # ### Return the given phrase with the appropriate indefinite article ("a" or
218
+ # ### "an") prepended.
219
+ # def a( phrase, count=nil )
220
+ # md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
221
+ # pre, word, post = md.to_a[1,3]
222
+ # return phrase if word.nil? or word.empty?
223
+ #
224
+ # result = indef_article( word, count )
225
+ # return pre + result + post
226
+ # end
227
+ #
228
+ # ### Returns the given word with a prepended indefinite article, unless
229
+ # ### +count+ is non-nil and not singular.
230
+ # def indef_article( word, count )
231
+ # count ||= Linguistics::num
232
+ # return "#{count} #{word}" if
233
+ # count && /^(#{PL_count_one})$/i !~ count.to_s
234
+ #
235
+ # # Handle user-defined variants
236
+ # # return value if value = ud_match( word, A_a_user_defined )
237
+ #
238
+ # case word
239
+ #
240
+ # # Handle special cases
241
+ # when /^(#{A_explicit_an})/i
242
+ # return "an #{word}"
243
+ #
244
+ # # Handle abbreviations
245
+ # when /^(#{A_abbrev})/x
246
+ # return "an #{word}"
247
+ # when /^[aefhilmnorsx][.-]/i
248
+ # return "an #{word}"
249
+ # when /^[a-z][.-]/i
250
+ # return "a #{word}"
251
+ #
252
+ # # Handle consonants
253
+ # when /^[^aeiouy]/i
254
+ # return "a #{word}"
255
+ #
256
+ # # Handle special vowel-forms
257
+ # when /^e[uw]/i
258
+ # return "a #{word}"
259
+ # when /^onc?e\b/i
260
+ # return "a #{word}"
261
+ # when /^uni([^nmd]|mo)/i
262
+ # return "a #{word}"
263
+ # when /^u[bcfhjkqrst][aeiou]/i
264
+ # return "a #{word}"
265
+ #
266
+ # # Handle vowels
267
+ # when /^[aeiou]/i
268
+ # return "an #{word}"
269
+ #
270
+ # # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
271
+ # when /^(#{A_y_cons})/i
272
+ # return "an #{word}"
273
+ #
274
+ # # Otherwise, guess "a"
275
+ # else
276
+ # return "a #{word}"
277
+ # end
278
+ # end
279
+ #
280
+ # def normalize_count( count, default=2 )
281
+ # return default if count.nil? # Default to plural
282
+ # if /^(#{PL_count_one})$/i =~ count.to_s ||
283
+ # Linguistics::classical? &&
284
+ # /^(#{PL_count_zero})$/ =~ count.to_s
285
+ # return 1
286
+ # else
287
+ # return default
288
+ # end
289
+ # end
290
+ #
291
+ # ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
292
+ # ### examining the <tt>original</tt> input.
293
+ # def postprocess( original, inflected )
294
+ # inflected.sub!( /([^|]+)\|(.+)/ ) {
295
+ # Linguistics::classical? ? $2 : $1
296
+ # }
297
+ #
298
+ # case original
299
+ # when "I"
300
+ # return inflected
301
+ # when /^[A-Z]+$/
302
+ # return inflected.upcase
303
+ # when /^[A-Z]/
304
+ # # Can't use #capitalize, as it will downcase the rest of the string,
305
+ # # too.
306
+ # inflected[0,1] = inflected[0,1].upcase
307
+ # return inflected
308
+ # else
309
+ # return inflected
310
+ # end
311
+ # end
312
+ #
313
+ # ### Return the plural of the given verb +phrase+ if +count+ indicates it
314
+ # ### should be plural.
315
+ # def plural_verb( phrase, count=nil )
316
+ # md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
317
+ # pre, word, post = md.to_a[1,3]
318
+ # return phrase if word.nil? or word.empty?
319
+ #
320
+ # plural = postprocess( word,
321
+ # pluralize_special_verb(word, count) ||
322
+ # pluralize_general_verb(word, count) )
323
+ # return pre + plural + post
324
+ # end
325
+ #
326
+ # def pluralize_special_verb( word, count )
327
+ # count ||= Linguistics::num
328
+ # count = normalize_count( count )
329
+ #
330
+ # return nil if /^(#{PL_count_one})$/i =~ count.to_s
331
+ #
332
+ # # Handle user-defined verbs
333
+ # #if value = ud_match( word, PL_v_user_defined )
334
+ # # return value
335
+ # #end
336
+ #
337
+ # case word
338
+ #
339
+ # # Handle irregular present tense (simple and compound)
340
+ # when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
341
+ # return PL_v_irregular_pres_h[ $1.downcase ] + $2
342
+ #
343
+ # # Handle irregular future, preterite and perfect tenses
344
+ # when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
345
+ # return word
346
+ #
347
+ # # Handle special cases
348
+ # when /^(#{PL_v_special_s})$/, /\s/
349
+ # return nil
350
+ #
351
+ # # Handle standard 3rd person (chop the ...(e)s off single words)
352
+ # when /^(.*)([cs]h|[x]|zz|ss)es$/i
353
+ # return $1 + $2
354
+ # when /^(..+)ies$/i
355
+ # return "#{$1}y"
356
+ # when /^(.+)oes$/i
357
+ # return "#{$1}o"
358
+ # when /^(.*[^s])s$/i
359
+ # return $1
360
+ #
361
+ # # Otherwise, a regular verb (handle elsewhere)
362
+ # else
363
+ # return nil
364
+ # end
365
+ # end
366
+ #
367
+ # ### Pluralize regular verbs
368
+ # def pluralize_general_verb( word, count )
369
+ # count ||= Linguistics::num
370
+ # count = normalize_count( count )
371
+ #
372
+ # return word if /^(#{PL_count_one})$/i =~ count.to_s
373
+ #
374
+ # case word
375
+ #
376
+ # # Handle ambiguous present tenses (simple and compound)
377
+ # when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
378
+ # return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
379
+ #
380
+ # # Handle ambiguous preterite and perfect tenses
381
+ # when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
382
+ # return word
383
+ #
384
+ # # Otherwise, 1st or 2nd person is uninflected
385
+ # else
386
+ # return word
387
+ # end
388
+ # end
389
+ #
390
+ # def present_participle( word )
391
+ # plural = plural_verb( word.to_s, 2 )
392
+ #
393
+ # plural.sub!( /ie$/, 'y' ) or
394
+ # plural.sub!( /ue$/, 'u' ) or
395
+ # plural.sub!( /([auy])e$/, '$1' ) or
396
+ # plural.sub!( /i$/, '' ) or
397
+ # plural.sub!( /([^e])e$/, "\\1" ) or
398
+ # /er$/.match( plural ) or
399
+ # plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
400
+ #
401
+ # return "#{plural}ing"
402
+ # end
@@ -0,0 +1,23 @@
1
+ class Star < Array
2
+ def initialize(*elements)
3
+ @current = 0
4
+ super(elements)
5
+ end
6
+
7
+ def current
8
+ return self.[](@current)
9
+ end
10
+
11
+ def next
12
+ unless last_word?
13
+ @current += 1
14
+ else
15
+ raise FragmentException.new("Fragment (consider revising)")
16
+ end
17
+ end
18
+
19
+ private
20
+ def last_word?
21
+ (length - 1) == @current
22
+ end
23
+ end
@@ -0,0 +1,99 @@
1
+ require File.dirname(__FILE__) + '/string_bracketing.rb'
2
+ require File.dirname(__FILE__) + '/word.rb'
3
+
4
+ class String
5
+ include StringBracketing
6
+
7
+ @@ending_punctuation = /[.?!]+/
8
+
9
+ alias :trim :strip
10
+
11
+ def -(pattern)
12
+ self.gsub(pattern, '')
13
+ end
14
+
15
+ def /(str_to_join)
16
+ File.join(self, str_to_join)
17
+ end
18
+
19
+ def normalize
20
+ # We use "." as a "full-stop" character that denotes the end of a sentence, regardless
21
+ # "+" in the regex for sentences like "Hello again..." => "hello again."
22
+ # TODO how is this used with abbreviations?
23
+ self.downcase.gsub(/#{@@ending_punctuation}/, '.')
24
+ end
25
+
26
+ # TODO I would think that there might be some way of using Enumerable#inject to simplify these three
27
+
28
+ def sentences
29
+ sentences = []
30
+
31
+ self.gsub(/.*?#{@@ending_punctuation}/i) do |match|
32
+ sentences << match.trim
33
+ end
34
+
35
+ return sentences
36
+ end
37
+
38
+ def words
39
+ # Not the most efficient, but it works better than the old one
40
+ words = []
41
+
42
+ self.split(/\s+/).each do |string|
43
+ # See StringTest#test_whitespace_then_character_string
44
+ words << string.match(/[a-z\'\-]+/i).to_s unless string.empty?
45
+ end
46
+
47
+ return words
48
+ end
49
+
50
+ def matches_for(pattern)
51
+ matches = []
52
+ self.gsub(pattern) do |match|
53
+ matches << match
54
+ end
55
+ return matches
56
+ end
57
+
58
+ # A sentence is determined to be grammatically correct if
59
+ # a final state in the ATN is reached by the last word in the sentence.
60
+ def grammatical?(language = :english)
61
+ begin
62
+ parse(language)
63
+ return true
64
+ rescue UngrammaticalException => e
65
+ return false
66
+ end
67
+ end
68
+
69
+ # TODO add tests
70
+ def check_grammar(language = :english)
71
+ checked = []
72
+
73
+ sentences.each do |sentence|
74
+ if sentence.grammatical?
75
+ checked << sentence
76
+ else
77
+ # TODO needs "yield message" etc
78
+ checked << (yield sentence) # TODO e.message
79
+ end
80
+
81
+ # begin
82
+ # parse(language)
83
+ # checked << sentence
84
+ # rescue UngrammaticalException
85
+ # checked << (yield sentence)
86
+ # end
87
+ end
88
+
89
+ return checked.join(" ")
90
+ end
91
+
92
+ private
93
+ def parse(language)
94
+ # Keep the ATN class-level to avoid the performance hit of creating one for each string
95
+ # I'd like to do it in the constructor, but don't know a good way.
96
+ @@atn ||= ATN.new(language)
97
+ @@atn.parse(self)
98
+ end
99
+ end