odin 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/.rvmrc +1 -0
- data/.travis.yml +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +26 -0
- data/HISTORY.md +102 -0
- data/LICENSE.md +10 -0
- data/README.md +46 -0
- data/Rakefile +69 -0
- data/app/controllers/grammar_checker.rb +51 -0
- data/check_grammar.rb +24 -0
- data/configure +9 -0
- data/images/atn_diagram.graffle +0 -0
- data/images/atn_diagram.pdf +0 -0
- data/images/odin-ff6.gif +0 -0
- data/lang/en/adjectives.rb +388 -0
- data/lang/en/atn.rb +102 -0
- data/lang/en/closed_class_words.rb +206 -0
- data/lang/en/data.rb +1086 -0
- data/lang/en/noun_inflections.rb +76 -0
- data/lang/en/noun_inflector_test_cases.rb +235 -0
- data/lang/en/pronoun_inflector_test_cases.rb +14 -0
- data/lang/en/verbs.rb +648 -0
- data/lang/iso639.rb +405 -0
- data/lib/array.rb +15 -0
- data/lib/atn.rb +82 -0
- data/lib/augmented_transition_network.rb +146 -0
- data/lib/dumper.rb +44 -0
- data/lib/noun_inflector.rb +283 -0
- data/lib/odin.rb +3 -0
- data/lib/odin/version.rb +3 -0
- data/lib/parts_of_speech.rb +402 -0
- data/lib/star.rb +23 -0
- data/lib/string.rb +99 -0
- data/lib/string_bracketing.rb +100 -0
- data/lib/word.rb +69 -0
- data/lib/word_net.rb +265 -0
- data/odin.gemspec +27 -0
- data/simple_atn/README.md +45 -0
- data/simple_atn/Rakefile +9 -0
- data/simple_atn/array.rb +15 -0
- data/simple_atn/augmented_transition_network.rb +146 -0
- data/simple_atn/augmented_transition_network_test.rb +113 -0
- data/simple_atn/english.rb +161 -0
- data/simple_atn/string.rb +63 -0
- data/test/fixtures/alice.txt +3594 -0
- data/test/fixtures/art.txt +7 -0
- data/test/fixtures/both.txt +1 -0
- data/test/fixtures/existing.txt +0 -0
- data/test/fixtures/existing.txt.checked.html +0 -0
- data/test/fixtures/grammar_checker.css +4 -0
- data/test/fixtures/grammatical.txt +1 -0
- data/test/fixtures/ungrammatical.txt +1 -0
- data/test/functional/grammar_checker_test.rb +64 -0
- data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
- data/test/test_helper.rb +82 -0
- data/test/unit/atn_test.rb +240 -0
- data/test/unit/noun_inflector_test.rb +249 -0
- data/test/unit/pronoun_inflector_test.rb +17 -0
- data/test/unit/star_test.rb +24 -0
- data/test/unit/string_bracketing_test_module.rb +70 -0
- data/test/unit/string_test.rb +92 -0
- data/test/unit/word_test.rb +15 -0
- metadata +223 -0
data/lib/odin.rb
ADDED
data/lib/odin/version.rb
ADDED
@@ -0,0 +1,402 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/noun_inflector.rb'
|
2
|
+
require File.dirname(__FILE__) + '/../lang/en/closed_class_words.rb'
|
3
|
+
require File.dirname(__FILE__) + '/../lang/en/adjectives.rb'
|
4
|
+
require File.dirname(__FILE__) + '/../lang/en/verbs.rb'
|
5
|
+
|
6
|
+
# Part of speech implementations for the state pattern.
|
7
|
+
module Conjuction
|
8
|
+
# Intentionally left blank
|
9
|
+
end
|
10
|
+
|
11
|
+
module Determiner
|
12
|
+
# Intentionally left blank
|
13
|
+
end
|
14
|
+
|
15
|
+
module Pronoun
|
16
|
+
# def singular?
|
17
|
+
# @@SingularPronouns.member?(self)
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def plural?
|
21
|
+
# @@PluralPronouns.member?(self)
|
22
|
+
# end
|
23
|
+
|
24
|
+
# def pluralize
|
25
|
+
#
|
26
|
+
# end
|
27
|
+
end
|
28
|
+
|
29
|
+
module Preposition
|
30
|
+
# Intentionally left blank
|
31
|
+
end
|
32
|
+
|
33
|
+
module Adjective
|
34
|
+
include CachedAdjectives
|
35
|
+
end
|
36
|
+
|
37
|
+
module Noun
|
38
|
+
include NounInflector
|
39
|
+
|
40
|
+
protected
|
41
|
+
def plural?
|
42
|
+
# TODO
|
43
|
+
# puts "-" * 20
|
44
|
+
# puts("self: #{self}")
|
45
|
+
# puts("singularize(self): #{singularize(self)}")
|
46
|
+
# puts("pluralize(self): #{pluralize(self)}")
|
47
|
+
# puts("pluralize(singularize(self)): #{pluralize(singularize(self))}")
|
48
|
+
# puts("singularize(pluralize(self)): #{singularize(pluralize(self))}")
|
49
|
+
#
|
50
|
+
# plural = self == pluralize(singularize(self))
|
51
|
+
# singular = self == singularize(pluralize(self))
|
52
|
+
#
|
53
|
+
# puts("plural: #{plural}")
|
54
|
+
# puts("singular: #{singular}")
|
55
|
+
# puts("plural and singular: #{plural and singular}")
|
56
|
+
# puts("plural or !singular or (plural and singular): #{plural or !singular or (plural and singular)}")
|
57
|
+
|
58
|
+
return (pluralize(singularize(self)) or !singular?)
|
59
|
+
end
|
60
|
+
|
61
|
+
def singular?
|
62
|
+
# TODO
|
63
|
+
# plural = self == pluralize(singularize(self))
|
64
|
+
# singular = self == singularize(pluralize(self))
|
65
|
+
|
66
|
+
return (self == singularize(pluralize(self)) or !plural?)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
module Verb
|
71
|
+
include CachedVerbs
|
72
|
+
|
73
|
+
protected
|
74
|
+
# def pluralize
|
75
|
+
# # TODO
|
76
|
+
# "#{self}"
|
77
|
+
# end
|
78
|
+
|
79
|
+
# TODO How can we keep this separate from Nouns?
|
80
|
+
# def present_participle?
|
81
|
+
# # TODO
|
82
|
+
# end
|
83
|
+
|
84
|
+
# ### The object class for the result returned from calling
|
85
|
+
# ### Linguistics::EN::infinitive.
|
86
|
+
# class Infinitive < String
|
87
|
+
#
|
88
|
+
# ### Create and return a new Infinitive object.
|
89
|
+
# def initialize( word1, word2, suffix, rule )
|
90
|
+
# super( word1 )
|
91
|
+
# @word2 = word2
|
92
|
+
# @suffix = suffix
|
93
|
+
# @rule = rule
|
94
|
+
# end
|
95
|
+
#
|
96
|
+
#
|
97
|
+
# ######
|
98
|
+
# public
|
99
|
+
# ######
|
100
|
+
#
|
101
|
+
# # The fallback deconjugated form
|
102
|
+
# attr_reader :word2
|
103
|
+
#
|
104
|
+
# # The suffix used to to identify the transform rule
|
105
|
+
# attr_reader :suffix
|
106
|
+
#
|
107
|
+
# # The rule used
|
108
|
+
# attr_reader :rule
|
109
|
+
# end
|
110
|
+
|
111
|
+
# ###############
|
112
|
+
# module_function
|
113
|
+
# ###############
|
114
|
+
#
|
115
|
+
# ### Return the infinitive form of the given word
|
116
|
+
# def infinitive( word )
|
117
|
+
# word = word.to_s
|
118
|
+
# word1 = word2 = suffix = rule = newword = ''
|
119
|
+
#
|
120
|
+
# if IrregularInfinitives.key?( word )
|
121
|
+
# word1 = IrregularInfinitives[ word ]
|
122
|
+
# rule = 'irregular'
|
123
|
+
# else
|
124
|
+
# # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
|
125
|
+
# prefix, suffix = nil
|
126
|
+
# prefixes = Hash::new {|hsh,key| hsh[key] = []}
|
127
|
+
#
|
128
|
+
# # Build the hash of prefixes for the word
|
129
|
+
# 1.upto( word.length ) {|i|
|
130
|
+
# prefix = word[0, i]
|
131
|
+
# suffix = word[i..-1]
|
132
|
+
#
|
133
|
+
# (suffix.length - 1).downto( 0 ) {|j|
|
134
|
+
# newword = prefix + suffix[0, j]
|
135
|
+
# prefixes[ suffix ].push( newword )
|
136
|
+
# }
|
137
|
+
# }
|
138
|
+
#
|
139
|
+
# $stderr.puts "prefixes: %p" % prefixes if $DEBUG
|
140
|
+
#
|
141
|
+
# # Now check for rules covering the prefixes for this word, picking
|
142
|
+
# # the first one if one was found.
|
143
|
+
# if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
|
144
|
+
# rule = InfSuffixRules[ suffix ][:rule]
|
145
|
+
# shortestPrefix = InfSuffixRules[ suffix ][:word1]
|
146
|
+
# $stderr.puts "Using rule %p (%p) for suffix %p" %
|
147
|
+
# [ rule, shortestPrefix, suffix ] if $DEBUG
|
148
|
+
#
|
149
|
+
# case shortestPrefix
|
150
|
+
# when 0
|
151
|
+
# word1 = prefixes[ suffix ][ 0 ]
|
152
|
+
# word2 = prefixes[ suffix ][ 1 ]
|
153
|
+
# $stderr.puts "For sp = 0: word1: %p, word2: %p" %
|
154
|
+
# [ word1, word2 ] if $DEBUG
|
155
|
+
#
|
156
|
+
# when -1
|
157
|
+
# word1 = prefixes[ suffix ].last +
|
158
|
+
# InfSuffixRules[ suffix ][:suffix1]
|
159
|
+
# word2 = ''
|
160
|
+
# $stderr.puts "For sp = -1: word1: %p, word2: %p" %
|
161
|
+
# [ word1, word2 ] if $DEBUG
|
162
|
+
#
|
163
|
+
# when -2
|
164
|
+
# word1 = prefixes[ suffix ].last +
|
165
|
+
# InfSuffixRules[ suffix ][:suffix1]
|
166
|
+
# word2 = prefixes[ suffix ].last
|
167
|
+
# $stderr.puts "For sp = -2: word1: %p, word2: %p" %
|
168
|
+
# [ word1, word2 ] if $DEBUG
|
169
|
+
#
|
170
|
+
# when -3
|
171
|
+
# word1 = prefixes[ suffix ].last +
|
172
|
+
# InfSuffixRules[ suffix ][:suffix1]
|
173
|
+
# word2 = prefixes[ suffix ].last +
|
174
|
+
# InfSuffixRules[ suffix ][:suffix2]
|
175
|
+
# $stderr.puts "For sp = -3: word1: %p, word2: %p" %
|
176
|
+
# [ word1, word2 ] if $DEBUG
|
177
|
+
#
|
178
|
+
# when -4
|
179
|
+
# word1 = word
|
180
|
+
# word2 = ''
|
181
|
+
# $stderr.puts "For sp = -4: word1: %p, word2: %p" %
|
182
|
+
# [ word1, word2 ] if $DEBUG
|
183
|
+
#
|
184
|
+
# else
|
185
|
+
# raise IndexError,
|
186
|
+
# "Couldn't find rule for shortest prefix %p" %
|
187
|
+
# shortestPrefix
|
188
|
+
# end
|
189
|
+
#
|
190
|
+
# # Rules 12b and 15: Strip off 'ed' or 'ing'.
|
191
|
+
# if rule == '12b' or rule == '15'
|
192
|
+
# # Do we have a monosyllable of this form:
|
193
|
+
# # o 0+ Consonants
|
194
|
+
# # o 1+ Vowel
|
195
|
+
# # o 2 Non-wx
|
196
|
+
# # Eg: tipped => tipp?
|
197
|
+
# # Then return tip and tipp.
|
198
|
+
# # Eg: swimming => swimm?
|
199
|
+
# # Then return tipswim and swimm.
|
200
|
+
#
|
201
|
+
# if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
|
202
|
+
# word1 = $1 + $2
|
203
|
+
# word2 = $1 + $2 + $2
|
204
|
+
# end
|
205
|
+
# end
|
206
|
+
# end
|
207
|
+
# end
|
208
|
+
#
|
209
|
+
# return Infinitive::new( word1, word2, suffix, rule )
|
210
|
+
# end
|
211
|
+
end
|
212
|
+
|
213
|
+
# # From the Ruby Linguistics Project, release 1.0.5
|
214
|
+
# # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en.rb
|
215
|
+
# # CREDIT: deveiate
|
216
|
+
#
|
217
|
+
# ### Return the given phrase with the appropriate indefinite article ("a" or
|
218
|
+
# ### "an") prepended.
|
219
|
+
# def a( phrase, count=nil )
|
220
|
+
# md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
221
|
+
# pre, word, post = md.to_a[1,3]
|
222
|
+
# return phrase if word.nil? or word.empty?
|
223
|
+
#
|
224
|
+
# result = indef_article( word, count )
|
225
|
+
# return pre + result + post
|
226
|
+
# end
|
227
|
+
#
|
228
|
+
# ### Returns the given word with a prepended indefinite article, unless
|
229
|
+
# ### +count+ is non-nil and not singular.
|
230
|
+
# def indef_article( word, count )
|
231
|
+
# count ||= Linguistics::num
|
232
|
+
# return "#{count} #{word}" if
|
233
|
+
# count && /^(#{PL_count_one})$/i !~ count.to_s
|
234
|
+
#
|
235
|
+
# # Handle user-defined variants
|
236
|
+
# # return value if value = ud_match( word, A_a_user_defined )
|
237
|
+
#
|
238
|
+
# case word
|
239
|
+
#
|
240
|
+
# # Handle special cases
|
241
|
+
# when /^(#{A_explicit_an})/i
|
242
|
+
# return "an #{word}"
|
243
|
+
#
|
244
|
+
# # Handle abbreviations
|
245
|
+
# when /^(#{A_abbrev})/x
|
246
|
+
# return "an #{word}"
|
247
|
+
# when /^[aefhilmnorsx][.-]/i
|
248
|
+
# return "an #{word}"
|
249
|
+
# when /^[a-z][.-]/i
|
250
|
+
# return "a #{word}"
|
251
|
+
#
|
252
|
+
# # Handle consonants
|
253
|
+
# when /^[^aeiouy]/i
|
254
|
+
# return "a #{word}"
|
255
|
+
#
|
256
|
+
# # Handle special vowel-forms
|
257
|
+
# when /^e[uw]/i
|
258
|
+
# return "a #{word}"
|
259
|
+
# when /^onc?e\b/i
|
260
|
+
# return "a #{word}"
|
261
|
+
# when /^uni([^nmd]|mo)/i
|
262
|
+
# return "a #{word}"
|
263
|
+
# when /^u[bcfhjkqrst][aeiou]/i
|
264
|
+
# return "a #{word}"
|
265
|
+
#
|
266
|
+
# # Handle vowels
|
267
|
+
# when /^[aeiou]/i
|
268
|
+
# return "an #{word}"
|
269
|
+
#
|
270
|
+
# # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
|
271
|
+
# when /^(#{A_y_cons})/i
|
272
|
+
# return "an #{word}"
|
273
|
+
#
|
274
|
+
# # Otherwise, guess "a"
|
275
|
+
# else
|
276
|
+
# return "a #{word}"
|
277
|
+
# end
|
278
|
+
# end
|
279
|
+
#
|
280
|
+
# def normalize_count( count, default=2 )
|
281
|
+
# return default if count.nil? # Default to plural
|
282
|
+
# if /^(#{PL_count_one})$/i =~ count.to_s ||
|
283
|
+
# Linguistics::classical? &&
|
284
|
+
# /^(#{PL_count_zero})$/ =~ count.to_s
|
285
|
+
# return 1
|
286
|
+
# else
|
287
|
+
# return default
|
288
|
+
# end
|
289
|
+
# end
|
290
|
+
#
|
291
|
+
# ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
|
292
|
+
# ### examining the <tt>original</tt> input.
|
293
|
+
# def postprocess( original, inflected )
|
294
|
+
# inflected.sub!( /([^|]+)\|(.+)/ ) {
|
295
|
+
# Linguistics::classical? ? $2 : $1
|
296
|
+
# }
|
297
|
+
#
|
298
|
+
# case original
|
299
|
+
# when "I"
|
300
|
+
# return inflected
|
301
|
+
# when /^[A-Z]+$/
|
302
|
+
# return inflected.upcase
|
303
|
+
# when /^[A-Z]/
|
304
|
+
# # Can't use #capitalize, as it will downcase the rest of the string,
|
305
|
+
# # too.
|
306
|
+
# inflected[0,1] = inflected[0,1].upcase
|
307
|
+
# return inflected
|
308
|
+
# else
|
309
|
+
# return inflected
|
310
|
+
# end
|
311
|
+
# end
|
312
|
+
#
|
313
|
+
# ### Return the plural of the given verb +phrase+ if +count+ indicates it
|
314
|
+
# ### should be plural.
|
315
|
+
# def plural_verb( phrase, count=nil )
|
316
|
+
# md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
317
|
+
# pre, word, post = md.to_a[1,3]
|
318
|
+
# return phrase if word.nil? or word.empty?
|
319
|
+
#
|
320
|
+
# plural = postprocess( word,
|
321
|
+
# pluralize_special_verb(word, count) ||
|
322
|
+
# pluralize_general_verb(word, count) )
|
323
|
+
# return pre + plural + post
|
324
|
+
# end
|
325
|
+
#
|
326
|
+
# def pluralize_special_verb( word, count )
|
327
|
+
# count ||= Linguistics::num
|
328
|
+
# count = normalize_count( count )
|
329
|
+
#
|
330
|
+
# return nil if /^(#{PL_count_one})$/i =~ count.to_s
|
331
|
+
#
|
332
|
+
# # Handle user-defined verbs
|
333
|
+
# #if value = ud_match( word, PL_v_user_defined )
|
334
|
+
# # return value
|
335
|
+
# #end
|
336
|
+
#
|
337
|
+
# case word
|
338
|
+
#
|
339
|
+
# # Handle irregular present tense (simple and compound)
|
340
|
+
# when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
|
341
|
+
# return PL_v_irregular_pres_h[ $1.downcase ] + $2
|
342
|
+
#
|
343
|
+
# # Handle irregular future, preterite and perfect tenses
|
344
|
+
# when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
|
345
|
+
# return word
|
346
|
+
#
|
347
|
+
# # Handle special cases
|
348
|
+
# when /^(#{PL_v_special_s})$/, /\s/
|
349
|
+
# return nil
|
350
|
+
#
|
351
|
+
# # Handle standard 3rd person (chop the ...(e)s off single words)
|
352
|
+
# when /^(.*)([cs]h|[x]|zz|ss)es$/i
|
353
|
+
# return $1 + $2
|
354
|
+
# when /^(..+)ies$/i
|
355
|
+
# return "#{$1}y"
|
356
|
+
# when /^(.+)oes$/i
|
357
|
+
# return "#{$1}o"
|
358
|
+
# when /^(.*[^s])s$/i
|
359
|
+
# return $1
|
360
|
+
#
|
361
|
+
# # Otherwise, a regular verb (handle elsewhere)
|
362
|
+
# else
|
363
|
+
# return nil
|
364
|
+
# end
|
365
|
+
# end
|
366
|
+
#
|
367
|
+
# ### Pluralize regular verbs
|
368
|
+
# def pluralize_general_verb( word, count )
|
369
|
+
# count ||= Linguistics::num
|
370
|
+
# count = normalize_count( count )
|
371
|
+
#
|
372
|
+
# return word if /^(#{PL_count_one})$/i =~ count.to_s
|
373
|
+
#
|
374
|
+
# case word
|
375
|
+
#
|
376
|
+
# # Handle ambiguous present tenses (simple and compound)
|
377
|
+
# when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
|
378
|
+
# return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
|
379
|
+
#
|
380
|
+
# # Handle ambiguous preterite and perfect tenses
|
381
|
+
# when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
|
382
|
+
# return word
|
383
|
+
#
|
384
|
+
# # Otherwise, 1st or 2nd person is uninflected
|
385
|
+
# else
|
386
|
+
# return word
|
387
|
+
# end
|
388
|
+
# end
|
389
|
+
#
|
390
|
+
# def present_participle( word )
|
391
|
+
# plural = plural_verb( word.to_s, 2 )
|
392
|
+
#
|
393
|
+
# plural.sub!( /ie$/, 'y' ) or
|
394
|
+
# plural.sub!( /ue$/, 'u' ) or
|
395
|
+
# plural.sub!( /([auy])e$/, '$1' ) or
|
396
|
+
# plural.sub!( /i$/, '' ) or
|
397
|
+
# plural.sub!( /([^e])e$/, "\\1" ) or
|
398
|
+
# /er$/.match( plural ) or
|
399
|
+
# plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
|
400
|
+
#
|
401
|
+
# return "#{plural}ing"
|
402
|
+
# end
|
data/lib/star.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
class Star < Array
|
2
|
+
def initialize(*elements)
|
3
|
+
@current = 0
|
4
|
+
super(elements)
|
5
|
+
end
|
6
|
+
|
7
|
+
def current
|
8
|
+
return self.[](@current)
|
9
|
+
end
|
10
|
+
|
11
|
+
def next
|
12
|
+
unless last_word?
|
13
|
+
@current += 1
|
14
|
+
else
|
15
|
+
raise FragmentException.new("Fragment (consider revising)")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def last_word?
|
21
|
+
(length - 1) == @current
|
22
|
+
end
|
23
|
+
end
|
data/lib/string.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/string_bracketing.rb'
|
2
|
+
require File.dirname(__FILE__) + '/word.rb'
|
3
|
+
|
4
|
+
class String
|
5
|
+
include StringBracketing
|
6
|
+
|
7
|
+
@@ending_punctuation = /[.?!]+/
|
8
|
+
|
9
|
+
alias :trim :strip
|
10
|
+
|
11
|
+
def -(pattern)
|
12
|
+
self.gsub(pattern, '')
|
13
|
+
end
|
14
|
+
|
15
|
+
def /(str_to_join)
|
16
|
+
File.join(self, str_to_join)
|
17
|
+
end
|
18
|
+
|
19
|
+
def normalize
|
20
|
+
# We use "." as a "full-stop" character that denotes the end of a sentence, regardless
|
21
|
+
# "+" in the regex for sentences like "Hello again..." => "hello again."
|
22
|
+
# TODO how is this used with abbreviations?
|
23
|
+
self.downcase.gsub(/#{@@ending_punctuation}/, '.')
|
24
|
+
end
|
25
|
+
|
26
|
+
# TODO I would think that there might be some way of using Enumerable#inject to simplify these three
|
27
|
+
|
28
|
+
def sentences
|
29
|
+
sentences = []
|
30
|
+
|
31
|
+
self.gsub(/.*?#{@@ending_punctuation}/i) do |match|
|
32
|
+
sentences << match.trim
|
33
|
+
end
|
34
|
+
|
35
|
+
return sentences
|
36
|
+
end
|
37
|
+
|
38
|
+
def words
|
39
|
+
# Not the most efficient, but it works better than the old one
|
40
|
+
words = []
|
41
|
+
|
42
|
+
self.split(/\s+/).each do |string|
|
43
|
+
# See StringTest#test_whitespace_then_character_string
|
44
|
+
words << string.match(/[a-z\'\-]+/i).to_s unless string.empty?
|
45
|
+
end
|
46
|
+
|
47
|
+
return words
|
48
|
+
end
|
49
|
+
|
50
|
+
def matches_for(pattern)
|
51
|
+
matches = []
|
52
|
+
self.gsub(pattern) do |match|
|
53
|
+
matches << match
|
54
|
+
end
|
55
|
+
return matches
|
56
|
+
end
|
57
|
+
|
58
|
+
# A sentence is determined to be grammatically correct if
|
59
|
+
# a final state in the ATN is reached by the last word in the sentence.
|
60
|
+
def grammatical?(language = :english)
|
61
|
+
begin
|
62
|
+
parse(language)
|
63
|
+
return true
|
64
|
+
rescue UngrammaticalException => e
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# TODO add tests
|
70
|
+
def check_grammar(language = :english)
|
71
|
+
checked = []
|
72
|
+
|
73
|
+
sentences.each do |sentence|
|
74
|
+
if sentence.grammatical?
|
75
|
+
checked << sentence
|
76
|
+
else
|
77
|
+
# TODO needs "yield message" etc
|
78
|
+
checked << (yield sentence) # TODO e.message
|
79
|
+
end
|
80
|
+
|
81
|
+
# begin
|
82
|
+
# parse(language)
|
83
|
+
# checked << sentence
|
84
|
+
# rescue UngrammaticalException
|
85
|
+
# checked << (yield sentence)
|
86
|
+
# end
|
87
|
+
end
|
88
|
+
|
89
|
+
return checked.join(" ")
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
def parse(language)
|
94
|
+
# Keep the ATN class-level to avoid the performance hit of creating one for each string
|
95
|
+
# I'd like to do it in the constructor, but don't know a good way.
|
96
|
+
@@atn ||= ATN.new(language)
|
97
|
+
@@atn.parse(self)
|
98
|
+
end
|
99
|
+
end
|