odin 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.gitignore +19 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +2 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +26 -0
  6. data/HISTORY.md +102 -0
  7. data/LICENSE.md +10 -0
  8. data/README.md +46 -0
  9. data/Rakefile +69 -0
  10. data/app/controllers/grammar_checker.rb +51 -0
  11. data/check_grammar.rb +24 -0
  12. data/configure +9 -0
  13. data/images/atn_diagram.graffle +0 -0
  14. data/images/atn_diagram.pdf +0 -0
  15. data/images/odin-ff6.gif +0 -0
  16. data/lang/en/adjectives.rb +388 -0
  17. data/lang/en/atn.rb +102 -0
  18. data/lang/en/closed_class_words.rb +206 -0
  19. data/lang/en/data.rb +1086 -0
  20. data/lang/en/noun_inflections.rb +76 -0
  21. data/lang/en/noun_inflector_test_cases.rb +235 -0
  22. data/lang/en/pronoun_inflector_test_cases.rb +14 -0
  23. data/lang/en/verbs.rb +648 -0
  24. data/lang/iso639.rb +405 -0
  25. data/lib/array.rb +15 -0
  26. data/lib/atn.rb +82 -0
  27. data/lib/augmented_transition_network.rb +146 -0
  28. data/lib/dumper.rb +44 -0
  29. data/lib/noun_inflector.rb +283 -0
  30. data/lib/odin.rb +3 -0
  31. data/lib/odin/version.rb +3 -0
  32. data/lib/parts_of_speech.rb +402 -0
  33. data/lib/star.rb +23 -0
  34. data/lib/string.rb +99 -0
  35. data/lib/string_bracketing.rb +100 -0
  36. data/lib/word.rb +69 -0
  37. data/lib/word_net.rb +265 -0
  38. data/odin.gemspec +27 -0
  39. data/simple_atn/README.md +45 -0
  40. data/simple_atn/Rakefile +9 -0
  41. data/simple_atn/array.rb +15 -0
  42. data/simple_atn/augmented_transition_network.rb +146 -0
  43. data/simple_atn/augmented_transition_network_test.rb +113 -0
  44. data/simple_atn/english.rb +161 -0
  45. data/simple_atn/string.rb +63 -0
  46. data/test/fixtures/alice.txt +3594 -0
  47. data/test/fixtures/art.txt +7 -0
  48. data/test/fixtures/both.txt +1 -0
  49. data/test/fixtures/existing.txt +0 -0
  50. data/test/fixtures/existing.txt.checked.html +0 -0
  51. data/test/fixtures/grammar_checker.css +4 -0
  52. data/test/fixtures/grammatical.txt +1 -0
  53. data/test/fixtures/ungrammatical.txt +1 -0
  54. data/test/functional/grammar_checker_test.rb +64 -0
  55. data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
  56. data/test/test_helper.rb +82 -0
  57. data/test/unit/atn_test.rb +240 -0
  58. data/test/unit/noun_inflector_test.rb +249 -0
  59. data/test/unit/pronoun_inflector_test.rb +17 -0
  60. data/test/unit/star_test.rb +24 -0
  61. data/test/unit/string_bracketing_test_module.rb +70 -0
  62. data/test/unit/string_test.rb +92 -0
  63. data/test/unit/word_test.rb +15 -0
  64. metadata +223 -0
@@ -0,0 +1,100 @@
1
+ module StringBracketing
2
+ # TITLE:
3
+ #
4
+ # String Bracketing Extensoins
5
+ #
6
+ # SUMMARY:
7
+ #
8
+ # String extension methods which enclose on unenclose a striong.
9
+ #
10
+ # CREDITS:
11
+ #
12
+ # - Thomas Sawyer
13
+
14
+ # Return a new string embraced by given brakets.
15
+ # If only one bracket char is given it will be placed
16
+ # on either side.
17
+ #
18
+ # "wrap me".bracket('{') #=> "{wrap me}"
19
+ # "wrap me".bracket('--','!') #=> "--wrap me!"
20
+
21
+ def bracket(bra, ket=nil)
22
+ #ket = String.bra2ket[$&] if ! ket && /^[\[({<]$/ =~ bra
23
+ ket = BRA2KET[bra] unless ket
24
+ "#{bra}#{self}#{ket ? ket : bra}"
25
+ end
26
+
27
+ # Inplace version of #braket.
28
+
29
+ def bracket!(bra, ket=nil)
30
+ self.replace(bracket(bra, ket))
31
+ end
32
+
33
+ # Return a new string embraced by given brakets.
34
+ # If only one bracket char is given it will be placed
35
+ # on either side.
36
+ #
37
+ # "{unwrap me}".debracket('{') #=> "unwrap me"
38
+ # "--unwrap me!".debracket('--','!') #=> "unwrap me!"
39
+
40
+ def unbracket(bra=nil, ket=nil)
41
+ if bra
42
+ ket = BRA2KET[bra] unless ket
43
+ ket = ket ? ket : bra
44
+ s = self.dup
45
+ s.gsub!(%r[^#{Regexp.escape(bra)}], '')
46
+ s.gsub!(%r[#{Regexp.escape(ket)}$], '')
47
+ return s
48
+ else
49
+ if m = BRA2KET[ self[0,1] ]
50
+ return self.slice(1...-1) if self[-1,1] == m
51
+ end
52
+ end
53
+ return self.dup # if nothing else
54
+ end
55
+
56
+ # Inplace version of #debraket.
57
+
58
+ def unbracket!(bra=nil, ket=nil)
59
+ self.replace( unbracket(bra, ket) )
60
+ end
61
+
62
+ # Return a new string embraced by given quotes.
63
+ # If no quotes are specified, then assumes single quotes.
64
+ #
65
+ # "quote me".quote #=> "'quote me'"
66
+ # "quote me".quote(2) #=> "\"quote me\""
67
+
68
+ def quote(type=:s)
69
+ case type.to_s.downcase
70
+ when 's', 'single'
71
+ bracket("'")
72
+ when 'd', 'double'
73
+ bracket('"')
74
+ when 'b', 'back'
75
+ bracket('`')
76
+ else
77
+ bracket("'")
78
+ end
79
+ end
80
+
81
+ # Remove quotes from string.
82
+
83
+ def dequote
84
+ s = self.dup
85
+
86
+ case self[0,1]
87
+ when "'", '"', '`'
88
+ s[0] = ''
89
+ end
90
+
91
+ case self[-1,1]
92
+ when "'", '"', '`'
93
+ s[-1] = ''
94
+ end
95
+
96
+ return s
97
+ end
98
+
99
+ # end of String Bracketing Extensoins
100
+ end
@@ -0,0 +1,69 @@
1
+ require 'rubygems'
2
+ require 'facets'
3
+
4
+ require File.dirname(__FILE__) + '/parts_of_speech.rb'
5
+
6
+ # We have a separate class for this so that we know there are no spaces
7
+ #
8
+ # Uses the state pattern for parts of speech. (Needs "facets")
9
+ # See http://blog.jayfields.com/2007/08/ruby-state-pattern-using-modules-and.html
10
+ # and http://blog.jayfields.com/2007/08/ruby-calling-methods-of-specific.html
11
+ class Word < String
12
+ include ClosedClassWords
13
+
14
+ include Adjective
15
+ include Noun
16
+ include Verb
17
+
18
+ def initialize(content)
19
+ if content.words.length > 1
20
+ raise FormatException, "'#{content}' contains more than one word"
21
+ else
22
+ # Keeping a normalized form separate is nice for when we print out the output later.
23
+ @normalized = content.normalize
24
+ @part_of_speech = determine_part_of_speech
25
+ super(content)
26
+ end
27
+ end
28
+
29
+ def part_of_speech
30
+ return @part_of_speech
31
+ end
32
+
33
+ def plural?
34
+ # TODO
35
+ as(@part_of_speech).plural?
36
+ end
37
+
38
+ def singular?
39
+ # TODO
40
+ as(@part_of_speech).singular?
41
+ end
42
+
43
+ def inspect
44
+ return "\"#{self}\" (#{@part_of_speech.to_s})"
45
+ end
46
+
47
+ private
48
+ def determine_part_of_speech
49
+ # TODO
50
+ if @@Determiners.member?(@normalized) or @@PossesiveAdjectives.member?(@normalized)
51
+ return Determiner
52
+ elsif @@Pronouns.member?(@normalized)
53
+ return Pronoun
54
+ elsif @@Prepositions.member?(@normalized)
55
+ return Preposition
56
+ elsif @@IrregularVerbs.member?(@normalized) or @@RegularVerbs.member?(@normalized)
57
+ return Verb
58
+ elsif @@Adjectives.member?(@normalized)
59
+ return Adjective
60
+ elsif @@Conjunctions.member?(@normalized)
61
+ return Conjuction
62
+ else
63
+ # TODO add an error
64
+ return Noun
65
+ end
66
+ end
67
+
68
+ class FormatException < Exception; end
69
+ end
@@ -0,0 +1,265 @@
1
+ # # From the Ruby Linguistics Project, release 1.0.5
2
+ # #
3
+ # # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en/wordnet.rb
4
+ # #
5
+ # # #!/usr/bin/ruby
6
+ # #
7
+ # # This file contains functions for finding relations for English words. It
8
+ # # requires the Ruby-WordNet module to be installed; if it is not installed,
9
+ # # calling the functions defined by this file will raise NotImplemented
10
+ # # exceptions if called. Requiring this file adds functions and constants to the
11
+ # # Linguistics::EN module.
12
+ # #
13
+ # # == Synopsis
14
+ # #
15
+ # # # Test to be sure the WordNet module loaded okay.
16
+ # # Linguistics::EN.has_wordnet?
17
+ # # # => true
18
+ # #
19
+ # # # Fetch the default synset for the word "balance"
20
+ # # "balance".synset
21
+ # # # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
22
+ # # (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
23
+ # #
24
+ # # # Fetch the synset for the first verb sense of "balance"
25
+ # # "balance".en.synset( :verb )
26
+ # # # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
27
+ # # (verb): "bring into balance or equilibrium; "She has to balance work and her
28
+ # # domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
29
+ # # verbGroups: 2, hypernyms: 1, hyponyms: 5)>
30
+ # #
31
+ # # # Fetch the second noun sense
32
+ # # "balance".en.synset( 2, :noun )
33
+ # # # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
34
+ # # on pull of gravity" (hypernyms: 1, hyponyms: 5)>
35
+ # #
36
+ # # # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
37
+ # # "balance".en.synset( 2, :noun ).hypernyms
38
+ # # # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
39
+ # # instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
40
+ # # hyponyms: 2)>]
41
+ # #
42
+ # # # A simpler way of doing the same thing:
43
+ # # "balance".en.hypernyms( 2, :noun )
44
+ # # # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
45
+ # # instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
46
+ # # hyponyms: 2)>]
47
+ # #
48
+ # # # Fetch the first hypernym's hypernyms
49
+ # # "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
50
+ # # # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
51
+ # # measuring device (noun): "instrument that shows the extent or amount or quantity
52
+ # # or degree of something" (hypernyms: 1, hyponyms: 83)>]
53
+ # #
54
+ # # # Find the synset to which both the second noun sense of "balance" and the
55
+ # # # default sense of "shovel" belong.
56
+ # # ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
57
+ # # # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
58
+ # # artifact (or system of artifacts) that is instrumental in accomplishing some
59
+ # # end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
60
+ # #
61
+ # # # Fetch just the words for the other kinds of "instruments"
62
+ # # "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
63
+ # # # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
64
+ # # "extractor", "instrument of execution", "instrument of punishment", "measuring
65
+ # # instrument", "measuring system", "measuring device", "medical instrument",
66
+ # # "navigational instrument", "optical instrument", "plotter", "scientific
67
+ # # instrument", "sonograph", "surveying instrument", "surveyor's instrument",
68
+ # # "tracer", "weapon", "arm", "weapon system", "whip"]
69
+ # #
70
+ # #
71
+ # # == Authors
72
+ # #
73
+ # # * Michael Granger <ged@FaerieMUD.org>
74
+ # #
75
+ # # == Copyright
76
+ # #
77
+ # # Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
78
+ # #
79
+ # # This module is free software. You may use, modify, and/or redistribute this
80
+ # # software under the terms of the Perl Artistic License. (See
81
+ # # http://language.perl.com/misc/Artistic.html)
82
+ # #
83
+ # # == Version
84
+ # #
85
+ # # $Id: wordnet.rb,v 1.3 2003/09/14 11:28:02 deveiant Exp $
86
+ # #
87
+ #
88
+ # module Linguistics::EN
89
+ #
90
+ # @has_wordnet = false
91
+ # @wn_error = nil
92
+ # @wn_lexicon = nil
93
+ #
94
+ # # Load WordNet and open the lexicon if possible, saving the error that
95
+ # # occurs if anything goes wrong.
96
+ # begin
97
+ # require 'wordnet'
98
+ # @has_wordnet = true
99
+ # rescue LoadError => err
100
+ # @wn_error = err
101
+ # end
102
+ #
103
+ #
104
+ # #################################################################
105
+ # ### M O D U L E M E T H O D S
106
+ # #################################################################
107
+ # class << self
108
+ #
109
+ # ### Returns +true+ if WordNet was loaded okay
110
+ # def has_wordnet? ; @has_wordnet; end
111
+ #
112
+ # ### If #haveWordnet? returns +false+, this can be called to fetch the
113
+ # ### exception which was raised when WordNet was loaded.
114
+ # def wn_error ; @wn_error; end
115
+ #
116
+ # ### The instance of the WordNet::Lexicon used for all Linguistics WordNet
117
+ # ### functions.
118
+ # def wn_lexicon
119
+ # if @wn_error
120
+ # raise NotImplementedError,
121
+ # "WordNet functions are not loaded: %s" %
122
+ # @wn_error.message
123
+ # end
124
+ #
125
+ # @wn_lexicon ||= WordNet::Lexicon::new
126
+ # end
127
+ #
128
+ # ### Make a function that calls the method +meth+ on the synset of an input
129
+ # ### word.
130
+ # def def_synset_function( meth )
131
+ # (class << self; self; end).instance_eval do
132
+ # define_method( meth ) {|*args|
133
+ # word, pos, sense = *args
134
+ # raise ArgumentError,
135
+ # "wrong number of arguments (0 for 1)" unless word
136
+ # sense ||= 1
137
+ #
138
+ # syn = synset( word.to_s, pos, sense )
139
+ # return syn.nil? ? nil : syn.send( meth )
140
+ # }
141
+ # end
142
+ # end
143
+ # end
144
+ #
145
+ #
146
+ #
147
+ # #################################################################
148
+ # ### W O R D N E T I N T E R F A C E
149
+ # #################################################################
150
+ #
151
+ # ###############
152
+ # module_function
153
+ # ###############
154
+ #
155
+ # ### Look up the synset associated with the given word or collocation in the
156
+ # ### WordNet lexicon and return a WordNet::Synset object.
157
+ # def synset( word, pos=nil, sense=1 )
158
+ # lex = Linguistics::EN::wn_lexicon
159
+ # if pos.is_a?( Fixnum )
160
+ # sense = pos
161
+ # pos = nil
162
+ # end
163
+ # postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
164
+ # syn = nil
165
+ #
166
+ # postries.each do |pos|
167
+ # break if syn = lex.lookupSynsets( word.to_s, pos, sense )
168
+ # end
169
+ #
170
+ # return syn
171
+ # end
172
+ #
173
+ #
174
+ # ### Look up all the synsets associated with the given word or collocation in
175
+ # ### the WordNet lexicon and return an Array of WordNet::Synset objects. If
176
+ # ### +pos+ is +nil+, return synsets for all parts of speech.
177
+ # def synsets( word, pos=nil )
178
+ # lex = Linguistics::EN::wn_lexicon
179
+ # postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
180
+ # syns = []
181
+ #
182
+ # postries.each {|pos|
183
+ # syns << lex.lookupSynsets( word.to_s, pos )
184
+ # }
185
+ #
186
+ # return syns.flatten.compact
187
+ # end
188
+ #
189
+ #
190
+ # # Returns definitions and/or example sentences as a String.
191
+ # def_synset_function :gloss
192
+ #
193
+ # # Returns definitions and/or example sentences as an Array.
194
+ # def_synset_function :glosses
195
+ #
196
+ # # Return nouns or verbs that have the same hypernym as the receiver.
197
+ # def_synset_function :coordinates
198
+ #
199
+ # # Returns the Array of synonyms contained in the synset for the receiver.
200
+ # def_synset_function :words
201
+ # def_synset_function :synonyms
202
+ #
203
+ # # Returns the name of the lexicographer file that contains the raw data for
204
+ # # the receiver.
205
+ # def_synset_function :lex_info
206
+ #
207
+ # # :TODO: Finish these comments, and figure out how the hell to get the
208
+ # # methods to show up in RDoc.
209
+ # def_synset_function :frames
210
+ #
211
+ #
212
+ # # Returns the synsets for the receiver's antonyms, if any. Ex:
213
+ # # 'opaque'.en.synset.antonyms
214
+ # # ==> [#<WordNet::Synset:0x010ca614/454927 clear (adjective): "free
215
+ # # from cloudiness; allowing light to pass through; "clear water";
216
+ # # "clear plastic bags"; "clear glass"; "the air is clear and clean""
217
+ # # (similarTos: 6, attributes: 1, derivations: 2, antonyms: 1,
218
+ # # seeAlsos: 1)>]
219
+ # def_synset_function :antonyms
220
+ #
221
+ # def_synset_function :hypernyms
222
+ # def_synset_function :instanceHypernyms
223
+ # def_synset_function :entailment
224
+ # def_synset_function :hyponyms
225
+ # def_synset_function :instanceHyponyms
226
+ # def_synset_function :causes
227
+ # def_synset_function :verbgroups
228
+ # def_synset_function :similarTo
229
+ # def_synset_function :participles
230
+ # def_synset_function :pertainyms
231
+ # def_synset_function :attributes
232
+ # def_synset_function :derivedFrom
233
+ # def_synset_function :seeAlso
234
+ # def_synset_function :functions
235
+ #
236
+ # def_synset_function :meronyms
237
+ # def_synset_function :memberMeronyms
238
+ # def_synset_function :stuffMeronyms
239
+ # def_synset_function :portionMeronyms
240
+ # def_synset_function :componentMeronyms
241
+ # def_synset_function :featureMeronyms
242
+ # def_synset_function :phaseMeronyms
243
+ # def_synset_function :placeMeronyms
244
+ #
245
+ # def_synset_function :holonyms
246
+ # def_synset_function :memberHolonyms
247
+ # def_synset_function :stuffHolonyms
248
+ # def_synset_function :portionHolonyms
249
+ # def_synset_function :componentHolonyms
250
+ # def_synset_function :featureHolonyms
251
+ # def_synset_function :phaseHolonyms
252
+ # def_synset_function :placeHolonyms
253
+ #
254
+ # def_synset_function :domains
255
+ # def_synset_function :categoryDomains
256
+ # def_synset_function :regionDomains
257
+ # def_synset_function :usageDomains
258
+ #
259
+ # def_synset_function :members
260
+ # def_synset_function :categoryMembers
261
+ # def_synset_function :regionMembers
262
+ # def_synset_function :usageMembers
263
+ #
264
+ #
265
+ # end # module Linguistics::EN
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "odin/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "odin"
7
+ s.version = Odin::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Benjamin Oakes"]
10
+ s.email = ["hello@benjaminoakes.com"]
11
+ s.homepage = "http://github.com/benjaminoakes/odin"
12
+ s.summary = %q{A parser for human languages.}
13
+ s.description = s.summary
14
+
15
+ s.rubyforge_project = "odin"
16
+ s.add_dependency('activesupport', '~> 2.0.1')
17
+ s.add_dependency('english', '~> 0.1')
18
+ s.add_dependency('facets', '~> 2.2.1')
19
+ s.add_dependency('linguistics', '~> 1.0.8')
20
+
21
+ s.add_development_dependency('rake', '~> 0.8.7')
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end