odin 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/.rvmrc +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +26 -0
- data/HISTORY.md +102 -0
- data/LICENSE.md +10 -0
- data/README.md +46 -0
- data/Rakefile +69 -0
- data/app/controllers/grammar_checker.rb +51 -0
- data/check_grammar.rb +24 -0
- data/configure +9 -0
- data/images/atn_diagram.graffle +0 -0
- data/images/atn_diagram.pdf +0 -0
- data/images/odin-ff6.gif +0 -0
- data/lang/en/adjectives.rb +388 -0
- data/lang/en/atn.rb +102 -0
- data/lang/en/closed_class_words.rb +206 -0
- data/lang/en/data.rb +1086 -0
- data/lang/en/noun_inflections.rb +76 -0
- data/lang/en/noun_inflector_test_cases.rb +235 -0
- data/lang/en/pronoun_inflector_test_cases.rb +14 -0
- data/lang/en/verbs.rb +648 -0
- data/lang/iso639.rb +405 -0
- data/lib/array.rb +15 -0
- data/lib/atn.rb +82 -0
- data/lib/augmented_transition_network.rb +146 -0
- data/lib/dumper.rb +44 -0
- data/lib/noun_inflector.rb +283 -0
- data/lib/odin.rb +3 -0
- data/lib/odin/version.rb +3 -0
- data/lib/parts_of_speech.rb +402 -0
- data/lib/star.rb +23 -0
- data/lib/string.rb +99 -0
- data/lib/string_bracketing.rb +100 -0
- data/lib/word.rb +69 -0
- data/lib/word_net.rb +265 -0
- data/odin.gemspec +27 -0
- data/simple_atn/README.md +45 -0
- data/simple_atn/Rakefile +9 -0
- data/simple_atn/array.rb +15 -0
- data/simple_atn/augmented_transition_network.rb +146 -0
- data/simple_atn/augmented_transition_network_test.rb +113 -0
- data/simple_atn/english.rb +161 -0
- data/simple_atn/string.rb +63 -0
- data/test/fixtures/alice.txt +3594 -0
- data/test/fixtures/art.txt +7 -0
- data/test/fixtures/both.txt +1 -0
- data/test/fixtures/existing.txt +0 -0
- data/test/fixtures/existing.txt.checked.html +0 -0
- data/test/fixtures/grammar_checker.css +4 -0
- data/test/fixtures/grammatical.txt +1 -0
- data/test/fixtures/ungrammatical.txt +1 -0
- data/test/functional/grammar_checker_test.rb +64 -0
- data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
- data/test/test_helper.rb +82 -0
- data/test/unit/atn_test.rb +240 -0
- data/test/unit/noun_inflector_test.rb +249 -0
- data/test/unit/pronoun_inflector_test.rb +17 -0
- data/test/unit/star_test.rb +24 -0
- data/test/unit/string_bracketing_test_module.rb +70 -0
- data/test/unit/string_test.rb +92 -0
- data/test/unit/word_test.rb +15 -0
- metadata +223 -0
data/lib/odin.rb
ADDED
data/lib/odin/version.rb
ADDED
@@ -0,0 +1,402 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/noun_inflector.rb'
|
2
|
+
require File.dirname(__FILE__) + '/../lang/en/closed_class_words.rb'
|
3
|
+
require File.dirname(__FILE__) + '/../lang/en/adjectives.rb'
|
4
|
+
require File.dirname(__FILE__) + '/../lang/en/verbs.rb'
|
5
|
+
|
6
|
+
# Part of speech implementations for the state pattern.
|
7
|
+
module Conjuction
|
8
|
+
# Intentionally left blank
|
9
|
+
end
|
10
|
+
|
11
|
+
module Determiner
|
12
|
+
# Intentionally left blank
|
13
|
+
end
|
14
|
+
|
15
|
+
module Pronoun
|
16
|
+
# def singular?
|
17
|
+
# @@SingularPronouns.member?(self)
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def plural?
|
21
|
+
# @@PluralPronouns.member?(self)
|
22
|
+
# end
|
23
|
+
|
24
|
+
# def pluralize
|
25
|
+
#
|
26
|
+
# end
|
27
|
+
end
|
28
|
+
|
29
|
+
module Preposition
|
30
|
+
# Intentionally left blank
|
31
|
+
end
|
32
|
+
|
33
|
+
module Adjective
|
34
|
+
include CachedAdjectives
|
35
|
+
end
|
36
|
+
|
37
|
+
module Noun
|
38
|
+
include NounInflector
|
39
|
+
|
40
|
+
protected
|
41
|
+
def plural?
|
42
|
+
# TODO
|
43
|
+
# puts "-" * 20
|
44
|
+
# puts("self: #{self}")
|
45
|
+
# puts("singularize(self): #{singularize(self)}")
|
46
|
+
# puts("pluralize(self): #{pluralize(self)}")
|
47
|
+
# puts("pluralize(singularize(self)): #{pluralize(singularize(self))}")
|
48
|
+
# puts("singularize(pluralize(self)): #{singularize(pluralize(self))}")
|
49
|
+
#
|
50
|
+
# plural = self == pluralize(singularize(self))
|
51
|
+
# singular = self == singularize(pluralize(self))
|
52
|
+
#
|
53
|
+
# puts("plural: #{plural}")
|
54
|
+
# puts("singular: #{singular}")
|
55
|
+
# puts("plural and singular: #{plural and singular}")
|
56
|
+
# puts("plural or !singular or (plural and singular): #{plural or !singular or (plural and singular)}")
|
57
|
+
|
58
|
+
return (pluralize(singularize(self)) or !singular?)
|
59
|
+
end
|
60
|
+
|
61
|
+
def singular?
|
62
|
+
# TODO
|
63
|
+
# plural = self == pluralize(singularize(self))
|
64
|
+
# singular = self == singularize(pluralize(self))
|
65
|
+
|
66
|
+
return (self == singularize(pluralize(self)) or !plural?)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
module Verb
|
71
|
+
include CachedVerbs
|
72
|
+
|
73
|
+
protected
|
74
|
+
# def pluralize
|
75
|
+
# # TODO
|
76
|
+
# "#{self}"
|
77
|
+
# end
|
78
|
+
|
79
|
+
# TODO How can we keep this separate from Nouns?
|
80
|
+
# def present_participle?
|
81
|
+
# # TODO
|
82
|
+
# end
|
83
|
+
|
84
|
+
# ### The object class for the result returned from calling
|
85
|
+
# ### Linguistics::EN::infinitive.
|
86
|
+
# class Infinitive < String
|
87
|
+
#
|
88
|
+
# ### Create and return a new Infinitive object.
|
89
|
+
# def initialize( word1, word2, suffix, rule )
|
90
|
+
# super( word1 )
|
91
|
+
# @word2 = word2
|
92
|
+
# @suffix = suffix
|
93
|
+
# @rule = rule
|
94
|
+
# end
|
95
|
+
#
|
96
|
+
#
|
97
|
+
# ######
|
98
|
+
# public
|
99
|
+
# ######
|
100
|
+
#
|
101
|
+
# # The fallback deconjugated form
|
102
|
+
# attr_reader :word2
|
103
|
+
#
|
104
|
+
# # The suffix used to to identify the transform rule
|
105
|
+
# attr_reader :suffix
|
106
|
+
#
|
107
|
+
# # The rule used
|
108
|
+
# attr_reader :rule
|
109
|
+
# end
|
110
|
+
|
111
|
+
# ###############
|
112
|
+
# module_function
|
113
|
+
# ###############
|
114
|
+
#
|
115
|
+
# ### Return the infinitive form of the given word
|
116
|
+
# def infinitive( word )
|
117
|
+
# word = word.to_s
|
118
|
+
# word1 = word2 = suffix = rule = newword = ''
|
119
|
+
#
|
120
|
+
# if IrregularInfinitives.key?( word )
|
121
|
+
# word1 = IrregularInfinitives[ word ]
|
122
|
+
# rule = 'irregular'
|
123
|
+
# else
|
124
|
+
# # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
|
125
|
+
# prefix, suffix = nil
|
126
|
+
# prefixes = Hash::new {|hsh,key| hsh[key] = []}
|
127
|
+
#
|
128
|
+
# # Build the hash of prefixes for the word
|
129
|
+
# 1.upto( word.length ) {|i|
|
130
|
+
# prefix = word[0, i]
|
131
|
+
# suffix = word[i..-1]
|
132
|
+
#
|
133
|
+
# (suffix.length - 1).downto( 0 ) {|j|
|
134
|
+
# newword = prefix + suffix[0, j]
|
135
|
+
# prefixes[ suffix ].push( newword )
|
136
|
+
# }
|
137
|
+
# }
|
138
|
+
#
|
139
|
+
# $stderr.puts "prefixes: %p" % prefixes if $DEBUG
|
140
|
+
#
|
141
|
+
# # Now check for rules covering the prefixes for this word, picking
|
142
|
+
# # the first one if one was found.
|
143
|
+
# if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
|
144
|
+
# rule = InfSuffixRules[ suffix ][:rule]
|
145
|
+
# shortestPrefix = InfSuffixRules[ suffix ][:word1]
|
146
|
+
# $stderr.puts "Using rule %p (%p) for suffix %p" %
|
147
|
+
# [ rule, shortestPrefix, suffix ] if $DEBUG
|
148
|
+
#
|
149
|
+
# case shortestPrefix
|
150
|
+
# when 0
|
151
|
+
# word1 = prefixes[ suffix ][ 0 ]
|
152
|
+
# word2 = prefixes[ suffix ][ 1 ]
|
153
|
+
# $stderr.puts "For sp = 0: word1: %p, word2: %p" %
|
154
|
+
# [ word1, word2 ] if $DEBUG
|
155
|
+
#
|
156
|
+
# when -1
|
157
|
+
# word1 = prefixes[ suffix ].last +
|
158
|
+
# InfSuffixRules[ suffix ][:suffix1]
|
159
|
+
# word2 = ''
|
160
|
+
# $stderr.puts "For sp = -1: word1: %p, word2: %p" %
|
161
|
+
# [ word1, word2 ] if $DEBUG
|
162
|
+
#
|
163
|
+
# when -2
|
164
|
+
# word1 = prefixes[ suffix ].last +
|
165
|
+
# InfSuffixRules[ suffix ][:suffix1]
|
166
|
+
# word2 = prefixes[ suffix ].last
|
167
|
+
# $stderr.puts "For sp = -2: word1: %p, word2: %p" %
|
168
|
+
# [ word1, word2 ] if $DEBUG
|
169
|
+
#
|
170
|
+
# when -3
|
171
|
+
# word1 = prefixes[ suffix ].last +
|
172
|
+
# InfSuffixRules[ suffix ][:suffix1]
|
173
|
+
# word2 = prefixes[ suffix ].last +
|
174
|
+
# InfSuffixRules[ suffix ][:suffix2]
|
175
|
+
# $stderr.puts "For sp = -3: word1: %p, word2: %p" %
|
176
|
+
# [ word1, word2 ] if $DEBUG
|
177
|
+
#
|
178
|
+
# when -4
|
179
|
+
# word1 = word
|
180
|
+
# word2 = ''
|
181
|
+
# $stderr.puts "For sp = -4: word1: %p, word2: %p" %
|
182
|
+
# [ word1, word2 ] if $DEBUG
|
183
|
+
#
|
184
|
+
# else
|
185
|
+
# raise IndexError,
|
186
|
+
# "Couldn't find rule for shortest prefix %p" %
|
187
|
+
# shortestPrefix
|
188
|
+
# end
|
189
|
+
#
|
190
|
+
# # Rules 12b and 15: Strip off 'ed' or 'ing'.
|
191
|
+
# if rule == '12b' or rule == '15'
|
192
|
+
# # Do we have a monosyllable of this form:
|
193
|
+
# # o 0+ Consonants
|
194
|
+
# # o 1+ Vowel
|
195
|
+
# # o 2 Non-wx
|
196
|
+
# # Eg: tipped => tipp?
|
197
|
+
# # Then return tip and tipp.
|
198
|
+
# # Eg: swimming => swimm?
|
199
|
+
# # Then return tipswim and swimm.
|
200
|
+
#
|
201
|
+
# if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
|
202
|
+
# word1 = $1 + $2
|
203
|
+
# word2 = $1 + $2 + $2
|
204
|
+
# end
|
205
|
+
# end
|
206
|
+
# end
|
207
|
+
# end
|
208
|
+
#
|
209
|
+
# return Infinitive::new( word1, word2, suffix, rule )
|
210
|
+
# end
|
211
|
+
end
|
212
|
+
|
213
|
+
# # From the Ruby Linguistics Project, release 1.0.5
|
214
|
+
# # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en.rb
|
215
|
+
# # CREDIT: deveiate
|
216
|
+
#
|
217
|
+
# ### Return the given phrase with the appropriate indefinite article ("a" or
|
218
|
+
# ### "an") prepended.
|
219
|
+
# def a( phrase, count=nil )
|
220
|
+
# md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
221
|
+
# pre, word, post = md.to_a[1,3]
|
222
|
+
# return phrase if word.nil? or word.empty?
|
223
|
+
#
|
224
|
+
# result = indef_article( word, count )
|
225
|
+
# return pre + result + post
|
226
|
+
# end
|
227
|
+
#
|
228
|
+
# ### Returns the given word with a prepended indefinite article, unless
|
229
|
+
# ### +count+ is non-nil and not singular.
|
230
|
+
# def indef_article( word, count )
|
231
|
+
# count ||= Linguistics::num
|
232
|
+
# return "#{count} #{word}" if
|
233
|
+
# count && /^(#{PL_count_one})$/i !~ count.to_s
|
234
|
+
#
|
235
|
+
# # Handle user-defined variants
|
236
|
+
# # return value if value = ud_match( word, A_a_user_defined )
|
237
|
+
#
|
238
|
+
# case word
|
239
|
+
#
|
240
|
+
# # Handle special cases
|
241
|
+
# when /^(#{A_explicit_an})/i
|
242
|
+
# return "an #{word}"
|
243
|
+
#
|
244
|
+
# # Handle abbreviations
|
245
|
+
# when /^(#{A_abbrev})/x
|
246
|
+
# return "an #{word}"
|
247
|
+
# when /^[aefhilmnorsx][.-]/i
|
248
|
+
# return "an #{word}"
|
249
|
+
# when /^[a-z][.-]/i
|
250
|
+
# return "a #{word}"
|
251
|
+
#
|
252
|
+
# # Handle consonants
|
253
|
+
# when /^[^aeiouy]/i
|
254
|
+
# return "a #{word}"
|
255
|
+
#
|
256
|
+
# # Handle special vowel-forms
|
257
|
+
# when /^e[uw]/i
|
258
|
+
# return "a #{word}"
|
259
|
+
# when /^onc?e\b/i
|
260
|
+
# return "a #{word}"
|
261
|
+
# when /^uni([^nmd]|mo)/i
|
262
|
+
# return "a #{word}"
|
263
|
+
# when /^u[bcfhjkqrst][aeiou]/i
|
264
|
+
# return "a #{word}"
|
265
|
+
#
|
266
|
+
# # Handle vowels
|
267
|
+
# when /^[aeiou]/i
|
268
|
+
# return "an #{word}"
|
269
|
+
#
|
270
|
+
# # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
|
271
|
+
# when /^(#{A_y_cons})/i
|
272
|
+
# return "an #{word}"
|
273
|
+
#
|
274
|
+
# # Otherwise, guess "a"
|
275
|
+
# else
|
276
|
+
# return "a #{word}"
|
277
|
+
# end
|
278
|
+
# end
|
279
|
+
#
|
280
|
+
# def normalize_count( count, default=2 )
|
281
|
+
# return default if count.nil? # Default to plural
|
282
|
+
# if /^(#{PL_count_one})$/i =~ count.to_s ||
|
283
|
+
# Linguistics::classical? &&
|
284
|
+
# /^(#{PL_count_zero})$/ =~ count.to_s
|
285
|
+
# return 1
|
286
|
+
# else
|
287
|
+
# return default
|
288
|
+
# end
|
289
|
+
# end
|
290
|
+
#
|
291
|
+
# ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
|
292
|
+
# ### examining the <tt>original</tt> input.
|
293
|
+
# def postprocess( original, inflected )
|
294
|
+
# inflected.sub!( /([^|]+)\|(.+)/ ) {
|
295
|
+
# Linguistics::classical? ? $2 : $1
|
296
|
+
# }
|
297
|
+
#
|
298
|
+
# case original
|
299
|
+
# when "I"
|
300
|
+
# return inflected
|
301
|
+
# when /^[A-Z]+$/
|
302
|
+
# return inflected.upcase
|
303
|
+
# when /^[A-Z]/
|
304
|
+
# # Can't use #capitalize, as it will downcase the rest of the string,
|
305
|
+
# # too.
|
306
|
+
# inflected[0,1] = inflected[0,1].upcase
|
307
|
+
# return inflected
|
308
|
+
# else
|
309
|
+
# return inflected
|
310
|
+
# end
|
311
|
+
# end
|
312
|
+
#
|
313
|
+
# ### Return the plural of the given verb +phrase+ if +count+ indicates it
|
314
|
+
# ### should be plural.
|
315
|
+
# def plural_verb( phrase, count=nil )
|
316
|
+
# md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
317
|
+
# pre, word, post = md.to_a[1,3]
|
318
|
+
# return phrase if word.nil? or word.empty?
|
319
|
+
#
|
320
|
+
# plural = postprocess( word,
|
321
|
+
# pluralize_special_verb(word, count) ||
|
322
|
+
# pluralize_general_verb(word, count) )
|
323
|
+
# return pre + plural + post
|
324
|
+
# end
|
325
|
+
#
|
326
|
+
# def pluralize_special_verb( word, count )
|
327
|
+
# count ||= Linguistics::num
|
328
|
+
# count = normalize_count( count )
|
329
|
+
#
|
330
|
+
# return nil if /^(#{PL_count_one})$/i =~ count.to_s
|
331
|
+
#
|
332
|
+
# # Handle user-defined verbs
|
333
|
+
# #if value = ud_match( word, PL_v_user_defined )
|
334
|
+
# # return value
|
335
|
+
# #end
|
336
|
+
#
|
337
|
+
# case word
|
338
|
+
#
|
339
|
+
# # Handle irregular present tense (simple and compound)
|
340
|
+
# when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
|
341
|
+
# return PL_v_irregular_pres_h[ $1.downcase ] + $2
|
342
|
+
#
|
343
|
+
# # Handle irregular future, preterite and perfect tenses
|
344
|
+
# when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
|
345
|
+
# return word
|
346
|
+
#
|
347
|
+
# # Handle special cases
|
348
|
+
# when /^(#{PL_v_special_s})$/, /\s/
|
349
|
+
# return nil
|
350
|
+
#
|
351
|
+
# # Handle standard 3rd person (chop the ...(e)s off single words)
|
352
|
+
# when /^(.*)([cs]h|[x]|zz|ss)es$/i
|
353
|
+
# return $1 + $2
|
354
|
+
# when /^(..+)ies$/i
|
355
|
+
# return "#{$1}y"
|
356
|
+
# when /^(.+)oes$/i
|
357
|
+
# return "#{$1}o"
|
358
|
+
# when /^(.*[^s])s$/i
|
359
|
+
# return $1
|
360
|
+
#
|
361
|
+
# # Otherwise, a regular verb (handle elsewhere)
|
362
|
+
# else
|
363
|
+
# return nil
|
364
|
+
# end
|
365
|
+
# end
|
366
|
+
#
|
367
|
+
# ### Pluralize regular verbs
|
368
|
+
# def pluralize_general_verb( word, count )
|
369
|
+
# count ||= Linguistics::num
|
370
|
+
# count = normalize_count( count )
|
371
|
+
#
|
372
|
+
# return word if /^(#{PL_count_one})$/i =~ count.to_s
|
373
|
+
#
|
374
|
+
# case word
|
375
|
+
#
|
376
|
+
# # Handle ambiguous present tenses (simple and compound)
|
377
|
+
# when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
|
378
|
+
# return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
|
379
|
+
#
|
380
|
+
# # Handle ambiguous preterite and perfect tenses
|
381
|
+
# when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
|
382
|
+
# return word
|
383
|
+
#
|
384
|
+
# # Otherwise, 1st or 2nd person is uninflected
|
385
|
+
# else
|
386
|
+
# return word
|
387
|
+
# end
|
388
|
+
# end
|
389
|
+
#
|
390
|
+
# def present_participle( word )
|
391
|
+
# plural = plural_verb( word.to_s, 2 )
|
392
|
+
#
|
393
|
+
# plural.sub!( /ie$/, 'y' ) or
|
394
|
+
# plural.sub!( /ue$/, 'u' ) or
|
395
|
+
# plural.sub!( /([auy])e$/, '$1' ) or
|
396
|
+
# plural.sub!( /i$/, '' ) or
|
397
|
+
# plural.sub!( /([^e])e$/, "\\1" ) or
|
398
|
+
# /er$/.match( plural ) or
|
399
|
+
# plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
|
400
|
+
#
|
401
|
+
# return "#{plural}ing"
|
402
|
+
# end
|
data/lib/star.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
class Star < Array
|
2
|
+
def initialize(*elements)
|
3
|
+
@current = 0
|
4
|
+
super(elements)
|
5
|
+
end
|
6
|
+
|
7
|
+
def current
|
8
|
+
return self.[](@current)
|
9
|
+
end
|
10
|
+
|
11
|
+
def next
|
12
|
+
unless last_word?
|
13
|
+
@current += 1
|
14
|
+
else
|
15
|
+
raise FragmentException.new("Fragment (consider revising)")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def last_word?
|
21
|
+
(length - 1) == @current
|
22
|
+
end
|
23
|
+
end
|
data/lib/string.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/string_bracketing.rb'
|
2
|
+
require File.dirname(__FILE__) + '/word.rb'
|
3
|
+
|
4
|
+
class String
|
5
|
+
include StringBracketing
|
6
|
+
|
7
|
+
@@ending_punctuation = /[.?!]+/
|
8
|
+
|
9
|
+
alias :trim :strip
|
10
|
+
|
11
|
+
def -(pattern)
|
12
|
+
self.gsub(pattern, '')
|
13
|
+
end
|
14
|
+
|
15
|
+
def /(str_to_join)
|
16
|
+
File.join(self, str_to_join)
|
17
|
+
end
|
18
|
+
|
19
|
+
def normalize
|
20
|
+
# We use "." as a "full-stop" character that denotes the end of a sentence, regardless
|
21
|
+
# "+" in the regex for sentences like "Hello again..." => "hello again."
|
22
|
+
# TODO how is this used with abbreviations?
|
23
|
+
self.downcase.gsub(/#{@@ending_punctuation}/, '.')
|
24
|
+
end
|
25
|
+
|
26
|
+
# TODO I would think that there might be some way of using Enumerable#inject to simplify these three
|
27
|
+
|
28
|
+
def sentences
|
29
|
+
sentences = []
|
30
|
+
|
31
|
+
self.gsub(/.*?#{@@ending_punctuation}/i) do |match|
|
32
|
+
sentences << match.trim
|
33
|
+
end
|
34
|
+
|
35
|
+
return sentences
|
36
|
+
end
|
37
|
+
|
38
|
+
def words
|
39
|
+
# Not the most efficient, but it works better than the old one
|
40
|
+
words = []
|
41
|
+
|
42
|
+
self.split(/\s+/).each do |string|
|
43
|
+
# See StringTest#test_whitespace_then_character_string
|
44
|
+
words << string.match(/[a-z\'\-]+/i).to_s unless string.empty?
|
45
|
+
end
|
46
|
+
|
47
|
+
return words
|
48
|
+
end
|
49
|
+
|
50
|
+
def matches_for(pattern)
|
51
|
+
matches = []
|
52
|
+
self.gsub(pattern) do |match|
|
53
|
+
matches << match
|
54
|
+
end
|
55
|
+
return matches
|
56
|
+
end
|
57
|
+
|
58
|
+
# A sentence is determined to be grammatically correct if
|
59
|
+
# a final state in the ATN is reached by the last word in the sentence.
|
60
|
+
def grammatical?(language = :english)
|
61
|
+
begin
|
62
|
+
parse(language)
|
63
|
+
return true
|
64
|
+
rescue UngrammaticalException => e
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# TODO add tests
|
70
|
+
def check_grammar(language = :english)
|
71
|
+
checked = []
|
72
|
+
|
73
|
+
sentences.each do |sentence|
|
74
|
+
if sentence.grammatical?
|
75
|
+
checked << sentence
|
76
|
+
else
|
77
|
+
# TODO needs "yield message" etc
|
78
|
+
checked << (yield sentence) # TODO e.message
|
79
|
+
end
|
80
|
+
|
81
|
+
# begin
|
82
|
+
# parse(language)
|
83
|
+
# checked << sentence
|
84
|
+
# rescue UngrammaticalException
|
85
|
+
# checked << (yield sentence)
|
86
|
+
# end
|
87
|
+
end
|
88
|
+
|
89
|
+
return checked.join(" ")
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
def parse(language)
|
94
|
+
# Keep the ATN class-level to avoid the performance hit of creating one for each string
|
95
|
+
# I'd like to do it in the constructor, but don't know a good way.
|
96
|
+
@@atn ||= ATN.new(language)
|
97
|
+
@@atn.parse(self)
|
98
|
+
end
|
99
|
+
end
|