odin 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,146 @@
1
+ require File.dirname(__FILE__) + '/string'
2
+ require File.dirname(__FILE__) + '/array'
3
+
4
+ class Ungrammatical < Exception; end
5
+
6
+ class AugmentedTransitionNetwork
7
+ def initialize(language = :en)
8
+ if :en == language
9
+ require File.dirname(__FILE__) + '/english'
10
+ extend English
11
+ end
12
+ clear!
13
+ end
14
+
15
+ def parse(words, start_node = :sentence)
16
+ clear!
17
+ @words = words.dup
18
+ @words.freeze
19
+ send(start_node, 0, Hash.new)
20
+ # The result for the network traversal is located in @star.
21
+ return @star
22
+ end
23
+
24
+ def parse_to_string(words, start_node = :sentence)
25
+ parsed = parse(words, start_node)
26
+ return parsed.inspect.matches_for(/".*?"/).join(" ").gsub("\"", '')
27
+ end
28
+
29
+ private
30
+ def clear!
31
+ @star = nil
32
+ @words = []
33
+ end
34
+
35
+ # Tag a word or phrase with a functional role.
36
+ #
37
+ # For example, a single word may be labeled :noun.
38
+ # A phrase (multiple words) may be labeled :noun_phrase. (Note that each constituent of a phrase
39
+ # should have a tag as well.)
40
+ def tag(marker, constituents)
41
+ # TODO Tag in a different way? I have to call .last to get the real word...
42
+ tagged = [marker]
43
+
44
+ constituents.each do |constituent|
45
+ unless constituent.nil?
46
+ # if there's nothing in the register, etc, the value will be nil
47
+ # don't include the nil in the tagging
48
+ tagged << constituent
49
+ end
50
+ end
51
+
52
+ return tagged
53
+ end
54
+
55
+ # TODO
56
+ # def choose_arc(arcs, position, registers)
57
+ # arcs.each do |arc|
58
+ # begin
59
+ # arc.call(position, registers)
60
+ # rescue Ungrammatical
61
+ # # Move onto the next one
62
+ # end
63
+ # end
64
+ #
65
+ # raise Ungrammatical
66
+ # end
67
+
68
+ # Set a given register in the hash given as an argument. The value that gets assigned
69
+ # to the key is specified in the optional 'extras' hash. By default, the tag is the same
70
+ # as the destination register (register_name) and the content is the word at the given
71
+ # position.
72
+ def set_register(register_name, position, registers, extras = {})
73
+ # TODO I'm pretty sure there's an easier way to handle the argument hash
74
+ if extras[:tag]
75
+ tag = extras[:tag]
76
+ else
77
+ tag = register_name
78
+ end
79
+
80
+ if extras[:content]
81
+ content = extras[:content]
82
+ else
83
+ content = @words[position]
84
+ end
85
+
86
+ registers[register_name] = tag(tag, content)
87
+ end
88
+
89
+ def at_last_word?(position)
90
+ # puts("in at_last_word?")
91
+ if !@words[position].nil?
92
+ # puts("failing...")
93
+ raise Ungrammatical
94
+ else
95
+ return @words.length == position
96
+ end
97
+ end
98
+
99
+ def in_category?(category, position)
100
+ word = @words[position]
101
+ return (!word.nil? and word.send("#{category}?"))
102
+ end
103
+
104
+ def exact_word?(exact_word, position)
105
+ word = @words[position]
106
+ if word.nil? # if we're checking for a position outside the length of @words
107
+ raise Ungrammatical
108
+ else
109
+ return word == exact_word
110
+ end
111
+ # word = @words.at(position)
112
+ # return (!word.nil? and word == exact_word)
113
+ end
114
+
115
+ def follow_arc_to(node_name, position, registers)
116
+ send(node_name, position + 1, registers.dup)
117
+ end
118
+
119
+ def jump_to(node_name, position, registers)
120
+ send(node_name, position, registers.dup)
121
+ end
122
+
123
+ def push(node_name, position, registers, extras)
124
+ # TODO I'm pretty sure there's an easier way to handle the argument hash
125
+ if extras[:into].nil? or extras[:next].nil?
126
+ raise "You must give :into and :next for the 'extra' hash"
127
+ end
128
+
129
+ destination_register = extras[:into]
130
+ next_node = extras[:next]
131
+
132
+ # Traverse the subnetwork...
133
+ send(node_name, position, registers.dup)
134
+
135
+ # The result for the subnetwork traversal is located in @star.
136
+ registers[destination_register] = @star.dup
137
+ position += registers[destination_register].inspect.number_in_quotes
138
+
139
+ # Move along to the next node
140
+ send(next_node, position, registers.dup)
141
+ end
142
+
143
+ def pop(content)
144
+ @star = content
145
+ end
146
+ end
@@ -0,0 +1,44 @@
1
+ module Dumper
2
+ private
3
+ def heading(string)
4
+ length = string.length + 5
5
+
6
+ puts "=" * length
7
+ puts string.upcase
8
+ puts "=" * length
9
+ yield
10
+ puts "=" * length
11
+ puts
12
+ end
13
+
14
+ def section(string)
15
+ puts string
16
+ puts "-" * (string.length + 2)
17
+ yield
18
+ puts
19
+ end
20
+
21
+ def indent(string, length = 2, character = " ")
22
+ output = ""
23
+
24
+ string.each_line do |line|
25
+ output << (character * length) + line
26
+ end
27
+
28
+ return output
29
+ end
30
+
31
+ def inspect_tree(tree)
32
+ output = ""
33
+
34
+ tree.each do |branch|
35
+ if branch.class.to_s == "Array" # TODO better way?
36
+ output << indent(inspect_tree(branch))
37
+ else
38
+ output << "#{branch.inspect}\n"
39
+ end
40
+ end
41
+
42
+ return output
43
+ end
44
+ end
@@ -0,0 +1,283 @@
1
+ # From Rails
2
+ require 'singleton'
3
+
4
+ # The NounInflector transforms words from singular to plural, class names to table names, modularized class names to ones without,
5
+ # and class names to foreign keys. The default inflections for pluralization, singularization, and uncountable words are kept
6
+ # in inflections.rb.
7
+ module NounInflector
8
+ # A singleton instance of this class is yielded by NounInflector.inflections, which can then be used to specify additional
9
+ # inflection rules. Examples:
10
+ #
11
+ # Inflector.inflections do |inflect|
12
+ # inflect.plural /^(ox)$/i, '\1\2en'
13
+ # inflect.singular /^(ox)en/i, '\1'
14
+ #
15
+ # inflect.irregular 'octopus', 'octopi'
16
+ #
17
+ # inflect.uncountable "equipment"
18
+ # end
19
+ #
20
+ # New rules are added at the top. So in the example above, the irregular rule for octopus will now be the first of the
21
+ # pluralization and singularization rules that is runs. This guarantees that your rules run before any of the rules that may
22
+ # already have been loaded.
23
+ class Inflections
24
+ include Singleton
25
+
26
+ attr_reader :plurals, :singulars, :uncountables
27
+
28
+ def initialize
29
+ @plurals, @singulars, @uncountables = [], [], []
30
+ end
31
+
32
+ # Specifies a new pluralization rule and its replacement. The rule can either be a string or a regular expression.
33
+ # The replacement should always be a string that may include references to the matched data from the rule.
34
+ def plural(rule, replacement)
35
+ @plurals.insert(0, [rule, replacement])
36
+ end
37
+
38
+ # Specifies a new singularization rule and its replacement. The rule can either be a string or a regular expression.
39
+ # The replacement should always be a string that may include references to the matched data from the rule.
40
+ def singular(rule, replacement)
41
+ @singulars.insert(0, [rule, replacement])
42
+ end
43
+
44
+ # Specifies a new irregular that applies to both pluralization and singularization at the same time. This can only be used
45
+ # for strings, not regular expressions. You simply pass the irregular in singular and plural form.
46
+ #
47
+ # Examples:
48
+ # irregular 'octopus', 'octopi'
49
+ # irregular 'person', 'people'
50
+ def irregular(singular, plural)
51
+ if singular[0,1].upcase == plural[0,1].upcase
52
+ plural(Regexp.new("(#{singular[0,1]})#{singular[1..-1]}$", "i"), '\1' + plural[1..-1])
53
+ singular(Regexp.new("(#{plural[0,1]})#{plural[1..-1]}$", "i"), '\1' + singular[1..-1])
54
+ else
55
+ plural(Regexp.new("#{singular[0,1].upcase}(?i)#{singular[1..-1]}$"), plural[0,1].upcase + plural[1..-1])
56
+ plural(Regexp.new("#{singular[0,1].downcase}(?i)#{singular[1..-1]}$"), plural[0,1].downcase + plural[1..-1])
57
+ singular(Regexp.new("#{plural[0,1].upcase}(?i)#{plural[1..-1]}$"), singular[0,1].upcase + singular[1..-1])
58
+ singular(Regexp.new("#{plural[0,1].downcase}(?i)#{plural[1..-1]}$"), singular[0,1].downcase + singular[1..-1])
59
+ end
60
+ end
61
+
62
+ # Add uncountable words that shouldn't be attempted inflected.
63
+ #
64
+ # Examples:
65
+ # uncountable "money"
66
+ # uncountable "money", "information"
67
+ # uncountable %w( money information rice )
68
+ def uncountable(*words)
69
+ (@uncountables << words).flatten!
70
+ end
71
+
72
+ # Clears the loaded inflections within a given scope (default is :all). Give the scope as a symbol of the inflection type,
73
+ # the options are: :plurals, :singulars, :uncountables
74
+ #
75
+ # Examples:
76
+ # clear :all
77
+ # clear :plurals
78
+ def clear(scope = :all)
79
+ case scope
80
+ when :all
81
+ @plurals, @singulars, @uncountables = [], [], []
82
+ else
83
+ instance_variable_set "@#{scope}", []
84
+ end
85
+ end
86
+ end
87
+
88
+ extend self
89
+
90
+ def inflections
91
+ if block_given?
92
+ yield Inflections.instance
93
+ else
94
+ Inflections.instance
95
+ end
96
+ end
97
+
98
+ # Returns the plural form of the word in the string.
99
+ #
100
+ # Examples
101
+ # "post".pluralize #=> "posts"
102
+ # "octopus".pluralize #=> "octopi"
103
+ # "sheep".pluralize #=> "sheep"
104
+ # "words".pluralize #=> "words"
105
+ # "the blue mailman".pluralize #=> "the blue mailmen"
106
+ # "CamelOctopus".pluralize #=> "CamelOctopi"
107
+ def pluralize(word)
108
+ result = word.to_s.dup
109
+
110
+ if word.empty? || inflections.uncountables.include?(result.downcase)
111
+ result
112
+ else
113
+ inflections.plurals.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
114
+ result
115
+ end
116
+ end
117
+
118
+ # The reverse of pluralize, returns the singular form of a word in a string.
119
+ #
120
+ # Examples
121
+ # "posts".singularize #=> "post"
122
+ # "octopi".singularize #=> "octopus"
123
+ # "sheep".singluarize #=> "sheep"
124
+ # "word".singluarize #=> "word"
125
+ # "the blue mailmen".singularize #=> "the blue mailman"
126
+ # "CamelOctopi".singularize #=> "CamelOctopus"
127
+ def singularize(word)
128
+ result = word.to_s.dup
129
+
130
+ if inflections.uncountables.include?(result.downcase)
131
+ result
132
+ else
133
+ inflections.singulars.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
134
+ result
135
+ end
136
+ end
137
+
138
+ # By default, camelize converts strings to UpperCamelCase. If the argument to camelize
139
+ # is set to ":lower" then camelize produces lowerCamelCase.
140
+ #
141
+ # camelize will also convert '/' to '::' which is useful for converting paths to namespaces
142
+ #
143
+ # Examples
144
+ # "active_record".camelize #=> "ActiveRecord"
145
+ # "active_record".camelize(:lower) #=> "activeRecord"
146
+ # "active_record/errors".camelize #=> "ActiveRecord::Errors"
147
+ # "active_record/errors".camelize(:lower) #=> "activeRecord::Errors"
148
+ def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
149
+ if first_letter_in_uppercase
150
+ lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
151
+ else
152
+ lower_case_and_underscored_word.first + camelize(lower_case_and_underscored_word)[1..-1]
153
+ end
154
+ end
155
+
156
+ # Capitalizes all the words and replaces some characters in the string to create
157
+ # a nicer looking title. Titleize is meant for creating pretty output. It is not
158
+ # used in the Rails internals.
159
+ #
160
+ # titleize is also aliased as as titlecase
161
+ #
162
+ # Examples
163
+ # "man from the boondocks".titleize #=> "Man From The Boondocks"
164
+ # "x-men: the last stand".titleize #=> "X Men: The Last Stand"
165
+ def titleize(word)
166
+ humanize(underscore(word)).gsub(/\b([a-z])/) { $1.capitalize }
167
+ end
168
+
169
+ # The reverse of +camelize+. Makes an underscored form from the expression in the string.
170
+ #
171
+ # Changes '::' to '/' to convert namespaces to paths.
172
+ #
173
+ # Examples
174
+ # "ActiveRecord".underscore #=> "active_record"
175
+ # "ActiveRecord::Errors".underscore #=> active_record/errors
176
+ def underscore(camel_cased_word)
177
+ camel_cased_word.to_s.gsub(/::/, '/').
178
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
179
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
180
+ tr("-", "_").
181
+ downcase
182
+ end
183
+
184
+ # Replaces underscores with dashes in the string.
185
+ #
186
+ # Example
187
+ # "puni_puni" #=> "puni-puni"
188
+ def dasherize(underscored_word)
189
+ underscored_word.gsub(/_/, '-')
190
+ end
191
+
192
+ # Capitalizes the first word and turns underscores into spaces and strips _id.
193
+ # Like titleize, this is meant for creating pretty output.
194
+ #
195
+ # Examples
196
+ # "employee_salary" #=> "Employee salary"
197
+ # "author_id" #=> "Author"
198
+ def humanize(lower_case_and_underscored_word)
199
+ lower_case_and_underscored_word.to_s.gsub(/_id$/, "").gsub(/_/, " ").capitalize
200
+ end
201
+
202
+ # Removes the module part from the expression in the string
203
+ #
204
+ # Examples
205
+ # "ActiveRecord::CoreExtensions::String::Inflections".demodulize #=> "Inflections"
206
+ # "Inflections".demodulize #=> "Inflections"
207
+ def demodulize(class_name_in_module)
208
+ class_name_in_module.to_s.gsub(/^.*::/, '')
209
+ end
210
+
211
+ # Create the name of a table like Rails does for models to table names. This method
212
+ # uses the pluralize method on the last word in the string.
213
+ #
214
+ # Examples
215
+ # "RawScaledScorer".tableize #=> "raw_scaled_scorers"
216
+ # "egg_and_ham".tableize #=> "egg_and_hams"
217
+ # "fancyCategory".tableize #=> "fancy_categories"
218
+ def tableize(class_name)
219
+ pluralize(underscore(class_name))
220
+ end
221
+
222
+ # Create a class name from a table name like Rails does for table names to models.
223
+ # Note that this returns a string and not a Class. (To convert to an actual class
224
+ # follow classify with constantize.)
225
+ #
226
+ # Examples
227
+ # "egg_and_hams".classify #=> "EggAndHam"
228
+ # "post".classify #=> "Post"
229
+ def classify(table_name)
230
+ # strip out any leading schema name
231
+ camelize(singularize(table_name.to_s.sub(/.*\./, '')))
232
+ end
233
+
234
+ # Creates a foreign key name from a class name.
235
+ # +separate_class_name_and_id_with_underscore+ sets whether
236
+ # the method should put '_' between the name and 'id'.
237
+ #
238
+ # Examples
239
+ # "Message".foreign_key #=> "message_id"
240
+ # "Message".foreign_key(false) #=> "messageid"
241
+ # "Admin::Post".foreign_key #=> "post_id"
242
+ def foreign_key(class_name, separate_class_name_and_id_with_underscore = true)
243
+ underscore(demodulize(class_name)) + (separate_class_name_and_id_with_underscore ? "_id" : "id")
244
+ end
245
+
246
+ # Constantize tries to find a declared constant with the name specified
247
+ # in the string. It raises a NameError when the name is not in CamelCase
248
+ # or is not initialized.
249
+ #
250
+ # Examples
251
+ # "Module".constantize #=> Module
252
+ # "Class".constantize #=> Class
253
+ def constantize(camel_cased_word)
254
+ unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ camel_cased_word
255
+ raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
256
+ end
257
+
258
+ Object.module_eval("::#{$1}", __FILE__, __LINE__)
259
+ end
260
+
261
+ # Ordinalize turns a number into an ordinal string used to denote the
262
+ # position in an ordered sequence such as 1st, 2nd, 3rd, 4th.
263
+ #
264
+ # Examples
265
+ # ordinalize(1) # => "1st"
266
+ # ordinalize(2) # => "2nd"
267
+ # ordinalize(1002) # => "1002nd"
268
+ # ordinalize(1003) # => "1003rd"
269
+ def ordinalize(number)
270
+ if (11..13).include?(number.to_i % 100)
271
+ "#{number}th"
272
+ else
273
+ case number.to_i % 10
274
+ when 1; "#{number}st"
275
+ when 2; "#{number}nd"
276
+ when 3; "#{number}rd"
277
+ else "#{number}th"
278
+ end
279
+ end
280
+ end
281
+ end
282
+
283
+ require File.dirname(__FILE__) + '/../lang/en/noun_inflections'