odin 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,45 @@
1
+ simple_atn
2
+ ==========
3
+
4
+ The version of the ATN framework and sample ATN that was produced during prototyping. I'm happy with how it turned out. It should be pretty easy on the eyes. Example function call:
5
+
6
+ push(:noun_phrase, position, registers, :into => :object_of_preposition, :next => :prepositional_phrase__noun_phrase)
7
+
8
+ There's no frontend. Running `rake` (or `ruby augmented_transition_network_test.rb`, if you don't have rake installed) in the root of this directory will run the unit tests. I've left a sample parse of the nonsensical sentence 'the monster in the man grows avocados in the street' uncommented so that you can see some output. You should receive this at the command prompt:
9
+
10
+ Loaded suite augmented_transition_network_test
11
+ Started
12
+ ....
13
+ :sentence
14
+ :noun_phrase
15
+ :determiner
16
+ "the"
17
+ :noun
18
+ "monster"
19
+ :prepositional_phrase
20
+ :preposition
21
+ "in"
22
+ :noun_phrase
23
+ :determiner
24
+ "the"
25
+ :noun
26
+ "man"
27
+ :verb
28
+ "grows"
29
+ :noun_phrase
30
+ :noun
31
+ "avocados"
32
+ :prepositional_phrase
33
+ :preposition
34
+ "in"
35
+ :noun_phrase
36
+ :determiner
37
+ "the"
38
+ :noun
39
+ "street"
40
+ ..
41
+ Finished in 0.02785 seconds.
42
+
43
+ 6 tests, 48 assertions, 0 failures, 0 errors
44
+
45
+ The vocabulary of this ATN is quite limited -- see `string.rb`. I have more in-depth categorization routines in the parent directory -- I didn't see a huge need to include them here -- it would have only taken longer. Also, the grammar the ATN represents isn't too entirely complicated. I spent most of my time paring the former code down so that it could be manageable. At the moment, only noun phrases, prepositional phrases, verb phrases, and (of course) sentences are defined. The important part is that it handles non_determinism gracefully. Also of note, the ATN computes the deep structure of passive sentences by the same mechanism as given during class.
@@ -0,0 +1,9 @@
1
+ task :default => "test"
2
+ task :test => ["test:units"]
3
+
4
+ namespace "test" do
5
+ task :units do
6
+ require 'rake/runtest'
7
+ Rake.run_tests "*_test.rb"
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ class Array
2
+ def inspect_as_tree(indentation = 4, level = 0)
3
+ tree = ""
4
+
5
+ self.each do |item|
6
+ if item.respond_to?(:inspect_as_tree)
7
+ tree << item.inspect_as_tree(indentation, level + indentation)
8
+ else
9
+ tree << "#{" " * level}#{item.inspect}\n"
10
+ end
11
+ end
12
+
13
+ return tree
14
+ end
15
+ end
@@ -0,0 +1,146 @@
1
+ require File.dirname(__FILE__) + '/string'
2
+ require File.dirname(__FILE__) + '/array'
3
+
4
+ class Ungrammatical < Exception; end
5
+
6
+ class AugmentedTransitionNetwork
7
+ def initialize(language = :en)
8
+ if :en == language
9
+ require File.dirname(__FILE__) + '/english'
10
+ extend English
11
+ end
12
+ clear!
13
+ end
14
+
15
+ def parse(words, start_node = :sentence)
16
+ clear!
17
+ @words = words.dup
18
+ @words.freeze
19
+ send(start_node, 0, Hash.new)
20
+ # The result for the network traversal is located in @star.
21
+ return @star
22
+ end
23
+
24
+ def parse_to_string(words, start_node = :sentence)
25
+ parsed = parse(words, start_node)
26
+ return parsed.inspect.matches_for(/".*?"/).join(" ").gsub("\"", '')
27
+ end
28
+
29
+ private
30
+ def clear!
31
+ @star = nil
32
+ @words = []
33
+ end
34
+
35
+ # Tag a word or phrase with a functional role.
36
+ #
37
+ # For example, a single word may be labeled :noun.
38
+ # A phrase (multiple words) may be labeled :noun_phrase. (Note that each constituent of a phrase
39
+ # should have a tag as well.)
40
+ def tag(marker, constituents)
41
+ # TODO Tag in a different way? I have to call .last to get the real word...
42
+ tagged = [marker]
43
+
44
+ constituents.each do |constituent|
45
+ unless constituent.nil?
46
+ # if there's nothing in the register, etc, the value will be nil
47
+ # don't include the nil in the tagging
48
+ tagged << constituent
49
+ end
50
+ end
51
+
52
+ return tagged
53
+ end
54
+
55
+ # TODO
56
+ # def choose_arc(arcs, position, registers)
57
+ # arcs.each do |arc|
58
+ # begin
59
+ # arc.call(position, registers)
60
+ # rescue Ungrammatical
61
+ # # Move onto the next one
62
+ # end
63
+ # end
64
+ #
65
+ # raise Ungrammatical
66
+ # end
67
+
68
+ # Set a given register in the hash given as an argument. The value that gets assigned
69
+ # to the key is specified in the optional 'extras' hash. By default, the tag is the same
70
+ # as the destination register (register_name) and the content is the word at the given
71
+ # position.
72
+ def set_register(register_name, position, registers, extras = {})
73
+ # TODO I'm pretty sure there's an easier way to handle the argument hash
74
+ if extras[:tag]
75
+ tag = extras[:tag]
76
+ else
77
+ tag = register_name
78
+ end
79
+
80
+ if extras[:content]
81
+ content = extras[:content]
82
+ else
83
+ content = @words[position]
84
+ end
85
+
86
+ registers[register_name] = tag(tag, content)
87
+ end
88
+
89
+ def at_last_word?(position)
90
+ # puts("in at_last_word?")
91
+ if !@words[position].nil?
92
+ # puts("failing...")
93
+ raise Ungrammatical
94
+ else
95
+ return @words.length == position
96
+ end
97
+ end
98
+
99
+ def in_category?(category, position)
100
+ word = @words[position]
101
+ return (!word.nil? and word.send("#{category}?"))
102
+ end
103
+
104
+ def exact_word?(exact_word, position)
105
+ word = @words[position]
106
+ if word.nil? # if we're checking for a position outside the length of @words
107
+ raise Ungrammatical
108
+ else
109
+ return word == exact_word
110
+ end
111
+ # word = @words.at(position)
112
+ # return (!word.nil? and word == exact_word)
113
+ end
114
+
115
+ def follow_arc_to(node_name, position, registers)
116
+ send(node_name, position + 1, registers.dup)
117
+ end
118
+
119
+ def jump_to(node_name, position, registers)
120
+ send(node_name, position, registers.dup)
121
+ end
122
+
123
+ def push(node_name, position, registers, extras)
124
+ # TODO I'm pretty sure there's an easier way to handle the argument hash
125
+ if extras[:into].nil? or extras[:next].nil?
126
+ raise "You must give :into and :next for the 'extra' hash"
127
+ end
128
+
129
+ destination_register = extras[:into]
130
+ next_node = extras[:next]
131
+
132
+ # Traverse the subnetwork...
133
+ send(node_name, position, registers.dup)
134
+
135
+ # The result for the subnetwork traversal is located in @star.
136
+ registers[destination_register] = @star.dup
137
+ position += registers[destination_register].inspect.number_in_quotes
138
+
139
+ # Move along to the next node
140
+ send(next_node, position, registers.dup)
141
+ end
142
+
143
+ def pop(content)
144
+ @star = content
145
+ end
146
+ end
@@ -0,0 +1,113 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/augmented_transition_network'
3
+
4
+ class AugmentedTransitionNetworkTest < Test::Unit::TestCase
5
+ def setup
6
+ @atn = AugmentedTransitionNetwork.new
7
+ end
8
+
9
+ def test_non_passive_parse
10
+ sentences = []
11
+ sentences << %w{man grows}
12
+ sentences << %w{the man grows}
13
+ sentences << %w{the old man grows}
14
+ sentences << %w{the old smiling man grows}
15
+ sentences << %w{the old smiling man grows avocados}
16
+ sentences << %w{the old smiling man grows the avocados}
17
+ sentences << %w{the old old smiling man grows the avocados}
18
+ sentences << %w{monster ate}
19
+ sentences << %w{the monster ate}
20
+ sentences << %w{the monster ate cookie}
21
+ sentences << %w{the monster ate the cookie}
22
+ sentences << %w{the monster ate the old cookie}
23
+
24
+ sentences.each do |sentence|
25
+ parsed = @atn.parse_to_string(sentence)
26
+ # puts("sentence: #{sentence.inspect}")
27
+ # puts("parsed: #{parsed.inspect}")
28
+ # puts('=' * 50)
29
+ # puts @atn.parse(sentence).inspect_as_tree
30
+ # puts('=' * 50)
31
+ assert_equal(sentence.join(" "), parsed)
32
+ end
33
+ end
34
+
35
+ def test_passive_parse
36
+ assert_equal("monster ate cookie", @atn.parse_to_string(%w{cookie was eaten by monster}))
37
+ assert_equal("the monster ate the cookie", @atn.parse_to_string(%w{the cookie was eaten by the monster}))
38
+ assert_equal("the monster ate the old cookie", @atn.parse_to_string(%w{the old cookie was eaten by the monster}))
39
+ assert_equal("the avocados ate the old man", @atn.parse_to_string(%w{the old man was eaten by the avocados}))
40
+ puts @atn.parse(%w{the old man was eaten by the avocados}).inspect_as_tree
41
+ end
42
+
43
+ def test_ungrammatical_parse
44
+ ungrammatical = []
45
+ ungrammatical << %w{the}
46
+ ungrammatical << %w{man}
47
+ ungrammatical << %w{the man}
48
+ ungrammatical << %w{the old man}
49
+ ungrammatical << %w{the old smiling man}
50
+ ungrammatical << %w{grows}
51
+ ungrammatical << %w{grows avocados}
52
+ ungrammatical << %w{grows the avocados}
53
+ ungrammatical << %w{was the avocados}
54
+ ungrammatical << %w{was eaten the avocados}
55
+ ungrammatical << %w{was eaten by the avocados}
56
+ # ungrammatical << %w{the old man was eaten the avocados}
57
+
58
+ ungrammatical.each do |sentence|
59
+ assert_raise(Ungrammatical) do
60
+ @atn.parse(sentence)
61
+ end
62
+ end
63
+ end
64
+
65
+ def test_prepositional_phrase
66
+ assert_equal("in the street", @atn.parse_to_string(%w{in the street}, :prepositional_phrase))
67
+
68
+ ungrammatical = []
69
+ ungrammatical << %w{}
70
+ ungrammatical << %w{in}
71
+ ungrammatical << %w{in the}
72
+ ungrammatical << %w{in the the}
73
+ ungrammatical << %w{in the grows}
74
+
75
+ ungrammatical.each do |phrase|
76
+ assert_raise(Ungrammatical) do
77
+ @atn.parse(phrase, :prepositional_phrase)
78
+ end
79
+ end
80
+ end
81
+
82
+ def test_prepositional_phrase_in_noun_phrase
83
+ assert_equal("man in street", @atn.parse_to_string(%w{man in street}, :noun_phrase))
84
+ assert_equal("man in the street", @atn.parse_to_string(%w{man in the street}, :noun_phrase))
85
+ assert_equal("the man in the street", @atn.parse_to_string(%w{the man in the street}, :noun_phrase))
86
+ end
87
+
88
+ def test_prepositional_phrase_in_sentence
89
+ assert_equal("man in street grows", @atn.parse_to_string(%w{man in street grows}))
90
+ assert_equal("man in the street grows", @atn.parse_to_string(%w{man in the street grows}))
91
+ assert_equal("the man in the street grows", @atn.parse_to_string(%w{the man in the street grows}))
92
+ assert_equal("the man in the street grows avocados", @atn.parse_to_string(%w{the man in the street grows avocados}))
93
+
94
+ assert_equal("the man grows avocados in the street", @atn.parse_to_string(%w{the man grows avocados in the street}))
95
+ assert_equal("the man in the street grows avocados in the street", @atn.parse_to_string(%w{the man in the street grows avocados in the street}))
96
+
97
+ ungrammatical = []
98
+ ungrammatical << %w{}
99
+ ungrammatical << %w{in}
100
+ ungrammatical << %w{in the}
101
+ ungrammatical << %w{in the the}
102
+ ungrammatical << %w{in the grows}
103
+ ungrammatical << %w{the in the street man in the street grows avocados in the street}
104
+
105
+ ungrammatical.each do |phrase|
106
+ assert_raise(Ungrammatical) do
107
+ @atn.parse(phrase, :prepositional_phrase)
108
+ end
109
+ end
110
+
111
+ # puts @atn.parse(%w{the monster in the man grows avocados in the street}).inspect
112
+ end
113
+ end
@@ -0,0 +1,161 @@
1
+ # there might still be some issues with things that don't raise Ungrammatical correctly
2
+ module English
3
+ private
4
+ def sentence(position, registers)
5
+ push(:noun_phrase, position, registers, :into => :subject, :next => :sentence__subject)
6
+ end
7
+
8
+ def sentence__subject(position, registers)
9
+ if in_category?(:verb, position)
10
+ set_register(:verb, position, registers)
11
+ follow_arc_to(:verb_phrase__verb, position, registers)
12
+ else
13
+ raise Ungrammatical
14
+ end
15
+ end
16
+
17
+ def verb_phrase__verb(position, registers)
18
+ if in_category?(:past_participle, position) and "was" == registers[:verb].last # last because of the tagging
19
+ # Passive voice
20
+ registers[:object] = registers[:subject]
21
+ registers.delete(:subject)
22
+ set_register(:verb, position, registers, :content => @words[position].preterite)
23
+ follow_arc_to(:verb_phrase__verb, position, registers)
24
+ # TODO arc 9
25
+ else
26
+ # TODO transitive verb
27
+ begin
28
+ push(:noun_phrase, position, registers, :into => :object, :next => :sentence__verb_phrase)
29
+ rescue Ungrammatical # this is necessary whenever there is a push followed by another arc
30
+ jump_to(:sentence__verb_phrase, position, registers)
31
+ end
32
+ end
33
+ end
34
+
35
+ def sentence__verb_phrase(position, registers)
36
+ # puts("@words: #{@words.inspect}")
37
+ # puts("position: #{position.inspect}")
38
+ # puts("@words.at(position): #{@words.at(position).inspect}")
39
+ # if at_last_word?(position)
40
+ # @star = tag(:sentence, registers[:subject], registers[:verb], registers[:object])
41
+ # else
42
+ # if exact_word?("by", position)
43
+ # follow_arc_to(:sentence__by, position, registers)
44
+ # else
45
+ # raise Ungrammatical
46
+ # end
47
+ # end
48
+ begin
49
+ if at_last_word?(position)
50
+ pop(tag(:sentence, [registers[:subject], registers[:verb], registers[:object]]))
51
+ end
52
+ rescue Ungrammatical
53
+ # puts("Coming to the rescue!")
54
+ if exact_word?("by", position)
55
+ follow_arc_to(:sentence__by, position, registers)
56
+ else
57
+ raise Ungrammatical
58
+ end
59
+ end
60
+ end
61
+
62
+ def sentence__by(position, registers)
63
+ # puts("in sentence__by")
64
+ if registers[:subject].nil?
65
+ # Passive voice
66
+ push(:noun_phrase, position, registers, :into => :subject, :next => :sentence__verb_phrase)
67
+ else
68
+ raise Ungrammatical
69
+ end
70
+ end
71
+
72
+ def noun_phrase(position, registers)
73
+ if in_category?(:determiner, position)
74
+ set_register(:determiner, position, registers)
75
+ follow_arc_to(:noun_phrase__determiner, position, registers)
76
+ else
77
+ begin
78
+ jump_to(:noun_phrase__determiner, position, registers)
79
+ rescue Ungrammatical
80
+ if in_category?(:pronoun, position)
81
+ set_register(:noun, position, registers)
82
+ follow_arc_to(:noun_phrase__noun, position, registers)
83
+ else
84
+ raise Ungrammatical
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def noun_phrase__determiner(position, registers)
91
+ if in_category?(:adjective, position)
92
+ set_register(:adjective, position, registers, :content => [registers[:adjective], @words[position]])
93
+ follow_arc_to(:noun_phrase__determiner, position, registers)
94
+ else
95
+ if in_category?(:present_participle, position)
96
+ set_register(:adjective, position, registers, :content => [registers[:adjective], @words[position]])
97
+ follow_arc_to(:noun_phrase__determiner, position, registers)
98
+ else
99
+ if in_category?(:noun, position)
100
+ set_register(:noun, position, registers)
101
+ follow_arc_to(:noun_phrase__noun, position, registers)
102
+ else
103
+ raise Ungrammatical
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ # TODO
110
+ # def noun_phrase__determiner(position, registers)
111
+ # arcs = []
112
+ #
113
+ # arcs << Proc.new do |position, registers|
114
+ # if in_category?(:adjective, position)
115
+ # set_register(:adjective, position, registers, :content => [registers[:adjective], @words[position]])
116
+ # follow_arc_to(:noun_phrase__determiner, position, registers)
117
+ # end
118
+ # end
119
+ #
120
+ # arcs << Proc.new do |position, registers|
121
+ # if in_category?(:present_participle, position)
122
+ # set_register(:adjective, position, registers, :content => [registers[:adjective], @words[position]])
123
+ # follow_arc_to(:noun_phrase__determiner, position, registers)
124
+ # end
125
+ # end
126
+ #
127
+ # arcs << Proc.new do |position, registers|
128
+ # if in_category?(:noun, position)
129
+ # set_register(:noun, position, registers)
130
+ # follow_arc_to(:noun_phrase__noun, position, registers)
131
+ # end
132
+ # end
133
+ #
134
+ # choose_arc(arcs, position, registers)
135
+ # end
136
+
137
+ def noun_phrase__noun(position, registers)
138
+ begin
139
+ push(:prepositional_phrase, position, registers, :into => :prepositional_phrase, :next => :noun_phrase__noun)
140
+ rescue Ungrammatical
141
+ pop(tag(:noun_phrase, [registers[:determiner], registers[:adjective], registers[:noun], registers[:prepositional_phrase]]))
142
+ end
143
+ end
144
+
145
+ def prepositional_phrase(position, registers)
146
+ if in_category?(:preposition, position)
147
+ set_register(:preposition, position, registers)
148
+ follow_arc_to(:prepositional_phrase__preposition, position, registers)
149
+ else
150
+ raise Ungrammatical
151
+ end
152
+ end
153
+
154
+ def prepositional_phrase__preposition(position, registers)
155
+ push(:noun_phrase, position, registers, :into => :object_of_preposition, :next => :prepositional_phrase__noun_phrase)
156
+ end
157
+
158
+ def prepositional_phrase__noun_phrase(position, registers)
159
+ pop(tag(:prepositional_phrase, [registers[:preposition], registers[:object_of_preposition]]))
160
+ end
161
+ end