odin 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,100 @@
1
+ module StringBracketing
2
+ # TITLE:
3
+ #
4
+ # String Bracketing Extensoins
5
+ #
6
+ # SUMMARY:
7
+ #
8
+ # String extension methods which enclose on unenclose a striong.
9
+ #
10
+ # CREDITS:
11
+ #
12
+ # - Thomas Sawyer
13
+
14
+ # Return a new string embraced by given brakets.
15
+ # If only one bracket char is given it will be placed
16
+ # on either side.
17
+ #
18
+ # "wrap me".bracket('{') #=> "{wrap me}"
19
+ # "wrap me".bracket('--','!') #=> "--wrap me!"
20
+
21
+ def bracket(bra, ket=nil)
22
+ #ket = String.bra2ket[$&] if ! ket && /^[\[({<]$/ =~ bra
23
+ ket = BRA2KET[bra] unless ket
24
+ "#{bra}#{self}#{ket ? ket : bra}"
25
+ end
26
+
27
+ # Inplace version of #braket.
28
+
29
+ def bracket!(bra, ket=nil)
30
+ self.replace(bracket(bra, ket))
31
+ end
32
+
33
+ # Return a new string embraced by given brakets.
34
+ # If only one bracket char is given it will be placed
35
+ # on either side.
36
+ #
37
+ # "{unwrap me}".debracket('{') #=> "unwrap me"
38
+ # "--unwrap me!".debracket('--','!') #=> "unwrap me!"
39
+
40
+ def unbracket(bra=nil, ket=nil)
41
+ if bra
42
+ ket = BRA2KET[bra] unless ket
43
+ ket = ket ? ket : bra
44
+ s = self.dup
45
+ s.gsub!(%r[^#{Regexp.escape(bra)}], '')
46
+ s.gsub!(%r[#{Regexp.escape(ket)}$], '')
47
+ return s
48
+ else
49
+ if m = BRA2KET[ self[0,1] ]
50
+ return self.slice(1...-1) if self[-1,1] == m
51
+ end
52
+ end
53
+ return self.dup # if nothing else
54
+ end
55
+
56
+ # Inplace version of #debraket.
57
+
58
+ def unbracket!(bra=nil, ket=nil)
59
+ self.replace( unbracket(bra, ket) )
60
+ end
61
+
62
+ # Return a new string embraced by given quotes.
63
+ # If no quotes are specified, then assumes single quotes.
64
+ #
65
+ # "quote me".quote #=> "'quote me'"
66
+ # "quote me".quote(2) #=> "\"quote me\""
67
+
68
+ def quote(type=:s)
69
+ case type.to_s.downcase
70
+ when 's', 'single'
71
+ bracket("'")
72
+ when 'd', 'double'
73
+ bracket('"')
74
+ when 'b', 'back'
75
+ bracket('`')
76
+ else
77
+ bracket("'")
78
+ end
79
+ end
80
+
81
+ # Remove quotes from string.
82
+
83
+ def dequote
84
+ s = self.dup
85
+
86
+ case self[0,1]
87
+ when "'", '"', '`'
88
+ s[0] = ''
89
+ end
90
+
91
+ case self[-1,1]
92
+ when "'", '"', '`'
93
+ s[-1] = ''
94
+ end
95
+
96
+ return s
97
+ end
98
+
99
+ # end of String Bracketing Extensoins
100
+ end
@@ -0,0 +1,69 @@
1
+ require 'rubygems'
2
+ require 'facets'
3
+
4
+ require File.dirname(__FILE__) + '/parts_of_speech.rb'
5
+
6
+ # We have a separate class for this so that we know there are no spaces
7
+ #
8
+ # Uses the state pattern for parts of speech. (Needs "facets")
9
+ # See http://blog.jayfields.com/2007/08/ruby-state-pattern-using-modules-and.html
10
+ # and http://blog.jayfields.com/2007/08/ruby-calling-methods-of-specific.html
11
+ class Word < String
12
+ include ClosedClassWords
13
+
14
+ include Adjective
15
+ include Noun
16
+ include Verb
17
+
18
+ def initialize(content)
19
+ if content.words.length > 1
20
+ raise FormatException, "'#{content}' contains more than one word"
21
+ else
22
+ # Keeping a normalized form separate is nice for when we print out the output later.
23
+ @normalized = content.normalize
24
+ @part_of_speech = determine_part_of_speech
25
+ super(content)
26
+ end
27
+ end
28
+
29
+ def part_of_speech
30
+ return @part_of_speech
31
+ end
32
+
33
+ def plural?
34
+ # TODO
35
+ as(@part_of_speech).plural?
36
+ end
37
+
38
+ def singular?
39
+ # TODO
40
+ as(@part_of_speech).singular?
41
+ end
42
+
43
+ def inspect
44
+ return "\"#{self}\" (#{@part_of_speech.to_s})"
45
+ end
46
+
47
+ private
48
+ def determine_part_of_speech
49
+ # TODO
50
+ if @@Determiners.member?(@normalized) or @@PossesiveAdjectives.member?(@normalized)
51
+ return Determiner
52
+ elsif @@Pronouns.member?(@normalized)
53
+ return Pronoun
54
+ elsif @@Prepositions.member?(@normalized)
55
+ return Preposition
56
+ elsif @@IrregularVerbs.member?(@normalized) or @@RegularVerbs.member?(@normalized)
57
+ return Verb
58
+ elsif @@Adjectives.member?(@normalized)
59
+ return Adjective
60
+ elsif @@Conjunctions.member?(@normalized)
61
+ return Conjuction
62
+ else
63
+ # TODO add an error
64
+ return Noun
65
+ end
66
+ end
67
+
68
+ class FormatException < Exception; end
69
+ end
@@ -0,0 +1,265 @@
1
+ # # From the Ruby Linguistics Project, release 1.0.5
2
+ # #
3
+ # # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en/wordnet.rb
4
+ # #
5
+ # # #!/usr/bin/ruby
6
+ # #
7
+ # # This file contains functions for finding relations for English words. It
8
+ # # requires the Ruby-WordNet module to be installed; if it is not installed,
9
+ # # calling the functions defined by this file will raise NotImplemented
10
+ # # exceptions if called. Requiring this file adds functions and constants to the
11
+ # # Linguistics::EN module.
12
+ # #
13
+ # # == Synopsis
14
+ # #
15
+ # # # Test to be sure the WordNet module loaded okay.
16
+ # # Linguistics::EN.has_wordnet?
17
+ # # # => true
18
+ # #
19
+ # # # Fetch the default synset for the word "balance"
20
+ # # "balance".synset
21
+ # # # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
22
+ # # (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
23
+ # #
24
+ # # # Fetch the synset for the first verb sense of "balance"
25
+ # # "balance".en.synset( :verb )
26
+ # # # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
27
+ # # (verb): "bring into balance or equilibrium; "She has to balance work and her
28
+ # # domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
29
+ # # verbGroups: 2, hypernyms: 1, hyponyms: 5)>
30
+ # #
31
+ # # # Fetch the second noun sense
32
+ # # "balance".en.synset( 2, :noun )
33
+ # # # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
34
+ # # on pull of gravity" (hypernyms: 1, hyponyms: 5)>
35
+ # #
36
+ # # # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
37
+ # # "balance".en.synset( 2, :noun ).hypernyms
38
+ # # # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
39
+ # # instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
40
+ # # hyponyms: 2)>]
41
+ # #
42
+ # # # A simpler way of doing the same thing:
43
+ # # "balance".en.hypernyms( 2, :noun )
44
+ # # # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
45
+ # # instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
46
+ # # hyponyms: 2)>]
47
+ # #
48
+ # # # Fetch the first hypernym's hypernyms
49
+ # # "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
50
+ # # # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
51
+ # # measuring device (noun): "instrument that shows the extent or amount or quantity
52
+ # # or degree of something" (hypernyms: 1, hyponyms: 83)>]
53
+ # #
54
+ # # # Find the synset to which both the second noun sense of "balance" and the
55
+ # # # default sense of "shovel" belong.
56
+ # # ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
57
+ # # # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
58
+ # # artifact (or system of artifacts) that is instrumental in accomplishing some
59
+ # # end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
60
+ # #
61
+ # # # Fetch just the words for the other kinds of "instruments"
62
+ # # "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
63
+ # # # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
64
+ # # "extractor", "instrument of execution", "instrument of punishment", "measuring
65
+ # # instrument", "measuring system", "measuring device", "medical instrument",
66
+ # # "navigational instrument", "optical instrument", "plotter", "scientific
67
+ # # instrument", "sonograph", "surveying instrument", "surveyor's instrument",
68
+ # # "tracer", "weapon", "arm", "weapon system", "whip"]
69
+ # #
70
+ # #
71
+ # # == Authors
72
+ # #
73
+ # # * Michael Granger <ged@FaerieMUD.org>
74
+ # #
75
+ # # == Copyright
76
+ # #
77
+ # # Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
78
+ # #
79
+ # # This module is free software. You may use, modify, and/or redistribute this
80
+ # # software under the terms of the Perl Artistic License. (See
81
+ # # http://language.perl.com/misc/Artistic.html)
82
+ # #
83
+ # # == Version
84
+ # #
85
+ # # $Id: wordnet.rb,v 1.3 2003/09/14 11:28:02 deveiant Exp $
86
+ # #
87
+ #
88
+ # module Linguistics::EN
89
+ #
90
+ # @has_wordnet = false
91
+ # @wn_error = nil
92
+ # @wn_lexicon = nil
93
+ #
94
+ # # Load WordNet and open the lexicon if possible, saving the error that
95
+ # # occurs if anything goes wrong.
96
+ # begin
97
+ # require 'wordnet'
98
+ # @has_wordnet = true
99
+ # rescue LoadError => err
100
+ # @wn_error = err
101
+ # end
102
+ #
103
+ #
104
+ # #################################################################
105
+ # ### M O D U L E M E T H O D S
106
+ # #################################################################
107
+ # class << self
108
+ #
109
+ # ### Returns +true+ if WordNet was loaded okay
110
+ # def has_wordnet? ; @has_wordnet; end
111
+ #
112
+ # ### If #haveWordnet? returns +false+, this can be called to fetch the
113
+ # ### exception which was raised when WordNet was loaded.
114
+ # def wn_error ; @wn_error; end
115
+ #
116
+ # ### The instance of the WordNet::Lexicon used for all Linguistics WordNet
117
+ # ### functions.
118
+ # def wn_lexicon
119
+ # if @wn_error
120
+ # raise NotImplementedError,
121
+ # "WordNet functions are not loaded: %s" %
122
+ # @wn_error.message
123
+ # end
124
+ #
125
+ # @wn_lexicon ||= WordNet::Lexicon::new
126
+ # end
127
+ #
128
+ # ### Make a function that calls the method +meth+ on the synset of an input
129
+ # ### word.
130
+ # def def_synset_function( meth )
131
+ # (class << self; self; end).instance_eval do
132
+ # define_method( meth ) {|*args|
133
+ # word, pos, sense = *args
134
+ # raise ArgumentError,
135
+ # "wrong number of arguments (0 for 1)" unless word
136
+ # sense ||= 1
137
+ #
138
+ # syn = synset( word.to_s, pos, sense )
139
+ # return syn.nil? ? nil : syn.send( meth )
140
+ # }
141
+ # end
142
+ # end
143
+ # end
144
+ #
145
+ #
146
+ #
147
+ # #################################################################
148
+ # ### W O R D N E T I N T E R F A C E
149
+ # #################################################################
150
+ #
151
+ # ###############
152
+ # module_function
153
+ # ###############
154
+ #
155
+ # ### Look up the synset associated with the given word or collocation in the
156
+ # ### WordNet lexicon and return a WordNet::Synset object.
157
+ # def synset( word, pos=nil, sense=1 )
158
+ # lex = Linguistics::EN::wn_lexicon
159
+ # if pos.is_a?( Fixnum )
160
+ # sense = pos
161
+ # pos = nil
162
+ # end
163
+ # postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
164
+ # syn = nil
165
+ #
166
+ # postries.each do |pos|
167
+ # break if syn = lex.lookupSynsets( word.to_s, pos, sense )
168
+ # end
169
+ #
170
+ # return syn
171
+ # end
172
+ #
173
+ #
174
+ # ### Look up all the synsets associated with the given word or collocation in
175
+ # ### the WordNet lexicon and return an Array of WordNet::Synset objects. If
176
+ # ### +pos+ is +nil+, return synsets for all parts of speech.
177
+ # def synsets( word, pos=nil )
178
+ # lex = Linguistics::EN::wn_lexicon
179
+ # postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
180
+ # syns = []
181
+ #
182
+ # postries.each {|pos|
183
+ # syns << lex.lookupSynsets( word.to_s, pos )
184
+ # }
185
+ #
186
+ # return syns.flatten.compact
187
+ # end
188
+ #
189
+ #
190
+ # # Returns definitions and/or example sentences as a String.
191
+ # def_synset_function :gloss
192
+ #
193
+ # # Returns definitions and/or example sentences as an Array.
194
+ # def_synset_function :glosses
195
+ #
196
+ # # Return nouns or verbs that have the same hypernym as the receiver.
197
+ # def_synset_function :coordinates
198
+ #
199
+ # # Returns the Array of synonyms contained in the synset for the receiver.
200
+ # def_synset_function :words
201
+ # def_synset_function :synonyms
202
+ #
203
+ # # Returns the name of the lexicographer file that contains the raw data for
204
+ # # the receiver.
205
+ # def_synset_function :lex_info
206
+ #
207
+ # # :TODO: Finish these comments, and figure out how the hell to get the
208
+ # # methods to show up in RDoc.
209
+ # def_synset_function :frames
210
+ #
211
+ #
212
+ # # Returns the synsets for the receiver's antonyms, if any. Ex:
213
+ # # 'opaque'.en.synset.antonyms
214
+ # # ==> [#<WordNet::Synset:0x010ca614/454927 clear (adjective): "free
215
+ # # from cloudiness; allowing light to pass through; "clear water";
216
+ # # "clear plastic bags"; "clear glass"; "the air is clear and clean""
217
+ # # (similarTos: 6, attributes: 1, derivations: 2, antonyms: 1,
218
+ # # seeAlsos: 1)>]
219
+ # def_synset_function :antonyms
220
+ #
221
+ # def_synset_function :hypernyms
222
+ # def_synset_function :instanceHypernyms
223
+ # def_synset_function :entailment
224
+ # def_synset_function :hyponyms
225
+ # def_synset_function :instanceHyponyms
226
+ # def_synset_function :causes
227
+ # def_synset_function :verbgroups
228
+ # def_synset_function :similarTo
229
+ # def_synset_function :participles
230
+ # def_synset_function :pertainyms
231
+ # def_synset_function :attributes
232
+ # def_synset_function :derivedFrom
233
+ # def_synset_function :seeAlso
234
+ # def_synset_function :functions
235
+ #
236
+ # def_synset_function :meronyms
237
+ # def_synset_function :memberMeronyms
238
+ # def_synset_function :stuffMeronyms
239
+ # def_synset_function :portionMeronyms
240
+ # def_synset_function :componentMeronyms
241
+ # def_synset_function :featureMeronyms
242
+ # def_synset_function :phaseMeronyms
243
+ # def_synset_function :placeMeronyms
244
+ #
245
+ # def_synset_function :holonyms
246
+ # def_synset_function :memberHolonyms
247
+ # def_synset_function :stuffHolonyms
248
+ # def_synset_function :portionHolonyms
249
+ # def_synset_function :componentHolonyms
250
+ # def_synset_function :featureHolonyms
251
+ # def_synset_function :phaseHolonyms
252
+ # def_synset_function :placeHolonyms
253
+ #
254
+ # def_synset_function :domains
255
+ # def_synset_function :categoryDomains
256
+ # def_synset_function :regionDomains
257
+ # def_synset_function :usageDomains
258
+ #
259
+ # def_synset_function :members
260
+ # def_synset_function :categoryMembers
261
+ # def_synset_function :regionMembers
262
+ # def_synset_function :usageMembers
263
+ #
264
+ #
265
+ # end # module Linguistics::EN
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "odin/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "odin"
7
+ s.version = Odin::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Benjamin Oakes"]
10
+ s.email = ["hello@benjaminoakes.com"]
11
+ s.homepage = "http://github.com/benjaminoakes/odin"
12
+ s.summary = %q{A parser for human languages.}
13
+ s.description = s.summary
14
+
15
+ s.rubyforge_project = "odin"
16
+ s.add_dependency('activesupport', '~> 2.0.1')
17
+ s.add_dependency('english', '~> 0.1')
18
+ s.add_dependency('facets', '~> 2.2.1')
19
+ s.add_dependency('linguistics', '~> 1.0.8')
20
+
21
+ s.add_development_dependency('rake', '~> 0.8.7')
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end