RubyGems - odin - Versions diffs - 0.0.4 - Mend

odin 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

data/.gitignore +19 -0
data/.rvmrc +1 -0
data/.travis.yml +2 -0
data/Gemfile +4 -0
data/Gemfile.lock +26 -0
data/HISTORY.md +102 -0
data/LICENSE.md +10 -0
data/README.md +46 -0
data/Rakefile +69 -0
data/app/controllers/grammar_checker.rb +51 -0
data/check_grammar.rb +24 -0
data/configure +9 -0
data/images/atn_diagram.graffle +0 -0
data/images/atn_diagram.pdf +0 -0
data/images/odin-ff6.gif +0 -0
data/lang/en/adjectives.rb +388 -0
data/lang/en/atn.rb +102 -0
data/lang/en/closed_class_words.rb +206 -0
data/lang/en/data.rb +1086 -0
data/lang/en/noun_inflections.rb +76 -0
data/lang/en/noun_inflector_test_cases.rb +235 -0
data/lang/en/pronoun_inflector_test_cases.rb +14 -0
data/lang/en/verbs.rb +648 -0
data/lang/iso639.rb +405 -0
data/lib/array.rb +15 -0
data/lib/atn.rb +82 -0
data/lib/augmented_transition_network.rb +146 -0
data/lib/dumper.rb +44 -0
data/lib/noun_inflector.rb +283 -0
data/lib/odin.rb +3 -0
data/lib/odin/version.rb +3 -0
data/lib/parts_of_speech.rb +402 -0
data/lib/star.rb +23 -0
data/lib/string.rb +99 -0
data/lib/string_bracketing.rb +100 -0
data/lib/word.rb +69 -0
data/lib/word_net.rb +265 -0
data/odin.gemspec +27 -0
data/simple_atn/README.md +45 -0
data/simple_atn/Rakefile +9 -0
data/simple_atn/array.rb +15 -0
data/simple_atn/augmented_transition_network.rb +146 -0
data/simple_atn/augmented_transition_network_test.rb +113 -0
data/simple_atn/english.rb +161 -0
data/simple_atn/string.rb +63 -0
data/test/fixtures/alice.txt +3594 -0
data/test/fixtures/art.txt +7 -0
data/test/fixtures/both.txt +1 -0
data/test/fixtures/existing.txt +0 -0
data/test/fixtures/existing.txt.checked.html +0 -0
data/test/fixtures/grammar_checker.css +4 -0
data/test/fixtures/grammatical.txt +1 -0
data/test/fixtures/ungrammatical.txt +1 -0
data/test/functional/grammar_checker_test.rb +64 -0
data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
data/test/test_helper.rb +82 -0
data/test/unit/atn_test.rb +240 -0
data/test/unit/noun_inflector_test.rb +249 -0
data/test/unit/pronoun_inflector_test.rb +17 -0
data/test/unit/star_test.rb +24 -0
data/test/unit/string_bracketing_test_module.rb +70 -0
data/test/unit/string_test.rb +92 -0
data/test/unit/word_test.rb +15 -0
metadata +223 -0

data/lib/string_bracketing.rb ADDED

@@ -0,0 +1,100 @@
+module StringBracketing
+  # TITLE:
+  #
+  #   String Bracketing Extensoins
+  #
+  # SUMMARY:
+  #
+  #   String extension methods which enclose on unenclose a striong.
+  #
+  # CREDITS:
+  #
+  #   - Thomas Sawyer
+  # Return a new string embraced by given brakets.
+  # If only one bracket char is given it will be placed
+  # on either side.
+  #
+  #   "wrap me".bracket('{')        #=> "{wrap me}"
+  #   "wrap me".bracket('--','!')   #=> "--wrap me!"
+  def bracket(bra, ket=nil)
+    #ket = String.bra2ket[$&] if ! ket && /^[\[({<]$/ =~ bra
+    ket = BRA2KET[bra] unless ket
+    "#{bra}#{self}#{ket ? ket : bra}"
+  end
+  # Inplace version of #braket.
+  def bracket!(bra, ket=nil)
+    self.replace(bracket(bra, ket))
+  end
+  # Return a new string embraced by given brakets.
+  # If only one bracket char is given it will be placed
+  # on either side.
+  #
+  #   "{unwrap me}".debracket('{')        #=> "unwrap me"
+  #   "--unwrap me!".debracket('--','!')  #=> "unwrap me!"
+  def unbracket(bra=nil, ket=nil)
+    if bra
+      ket = BRA2KET[bra] unless ket
+      ket = ket ? ket : bra
+      s = self.dup
+      s.gsub!(%r[^#{Regexp.escape(bra)}], '')
+      s.gsub!(%r[#{Regexp.escape(ket)}$], '')
+      return s
+    else
+      if m = BRA2KET[ self[0,1] ]
+        return self.slice(1...-1) if self[-1,1]  == m
+      end
+    end
+    return self.dup  # if nothing else
+  end
+  # Inplace version of #debraket.
+  def unbracket!(bra=nil, ket=nil)
+    self.replace( unbracket(bra, ket) )
+  end
+  # Return a new string embraced by given quotes.
+  # If no quotes are specified, then assumes single quotes.
+  #
+  #   "quote me".quote     #=> "'quote me'"
+  #   "quote me".quote(2)  #=> "\"quote me\""
+  def quote(type=:s)
+    case type.to_s.downcase
+    when 's', 'single'
+      bracket("'")
+    when 'd', 'double'
+      bracket('"')
+    when 'b', 'back'
+      bracket('`')
+    else
+      bracket("'")
+    end
+  end
+  # Remove quotes from string.
+  def dequote
+    s = self.dup
+    case self[0,1]
+    when "'", '"', '`'
+      s[0] = ''
+    end
+    case self[-1,1]
+    when "'", '"', '`'
+      s[-1] = ''
+    end
+    return s
+  end
+  # end of String Bracketing Extensoins
+end

data/lib/word.rb ADDED

@@ -0,0 +1,69 @@
+require 'rubygems'
+require 'facets'
+require File.dirname(__FILE__) + '/parts_of_speech.rb'
+# We have a separate class for this so that we know there are no spaces
+#
+# Uses the state pattern for parts of speech.  (Needs "facets")
+# See http://blog.jayfields.com/2007/08/ruby-state-pattern-using-modules-and.html
+# and http://blog.jayfields.com/2007/08/ruby-calling-methods-of-specific.html
+class Word < String
+  include ClosedClassWords
+  include Adjective
+  include Noun
+  include Verb
+  def initialize(content)
+    if content.words.length > 1
+      raise FormatException, "'#{content}' contains more than one word"
+    else
+      # Keeping a normalized form separate is nice for when we print out the output later.
+      @normalized = content.normalize
+      @part_of_speech = determine_part_of_speech
+      super(content)
+    end
+  end
+  def part_of_speech
+    return @part_of_speech
+  end
+  def plural?
+    # TODO
+    as(@part_of_speech).plural?
+  end
+  def singular?
+    # TODO
+    as(@part_of_speech).singular?
+  end
+  def inspect
+    return "\"#{self}\" (#{@part_of_speech.to_s})"
+  end
+  private
+    def determine_part_of_speech
+      # TODO
+      if @@Determiners.member?(@normalized) or @@PossesiveAdjectives.member?(@normalized)
+        return Determiner
+      elsif @@Pronouns.member?(@normalized)
+        return Pronoun
+      elsif @@Prepositions.member?(@normalized)
+        return Preposition
+      elsif @@IrregularVerbs.member?(@normalized) or @@RegularVerbs.member?(@normalized)
+        return Verb
+      elsif @@Adjectives.member?(@normalized)
+        return Adjective
+      elsif @@Conjunctions.member?(@normalized)
+        return Conjuction
+      else
+        # TODO add an error
+        return Noun
+      end
+    end
+    class FormatException < Exception; end
+end

data/lib/word_net.rb ADDED

@@ -0,0 +1,265 @@
+# # From the Ruby Linguistics Project, release 1.0.5
+# #
+# # http://www.deveiate.org/projects/Linguistics/browser/tags/RELEASE_1_0_5/lib/linguistics/en/wordnet.rb
+# #
+# #   #!/usr/bin/ruby
+#   #
+#   # This file contains functions for finding relations for English words. It
+#   # requires the Ruby-WordNet module to be installed; if it is not installed,
+#   # calling the functions defined by this file will raise NotImplemented
+#   # exceptions if called. Requiring this file adds functions and constants to the
+#   # Linguistics::EN module.
+#   #
+#   # == Synopsis
+# #
+# #   # Test to be sure the WordNet module loaded okay.
+# #   Linguistics::EN.has_wordnet?
+# #   # => true
+# #
+# #   # Fetch the default synset for the word "balance"
+# #   "balance".synset
+# #   # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
+# #    (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
+# #
+# #   # Fetch the synset for the first verb sense of "balance"
+# #   "balance".en.synset( :verb )
+# #   # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
+# #   (verb): "bring into balance or equilibrium; "She has to balance work and her
+# #   domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
+# #   verbGroups: 2, hypernyms: 1, hyponyms: 5)>
+# #
+# #   # Fetch the second noun sense
+# #   "balance".en.synset( 2, :noun )
+# #   # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
+# #   on pull of gravity" (hypernyms: 1, hyponyms: 5)>
+# #
+# #   # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
+# #   "balance".en.synset( 2, :noun ).hypernyms
+# #   # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
+# #   instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
+# #   hyponyms: 2)>]
+# #
+# #   # A simpler way of doing the same thing:
+# #   "balance".en.hypernyms( 2, :noun )
+# #   # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
+# #   instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
+# #   hyponyms: 2)>]
+# #
+# #   # Fetch the first hypernym's hypernyms
+# #   "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
+# #   # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
+# #   measuring device (noun): "instrument that shows the extent or amount or quantity
+# #   or degree of something" (hypernyms: 1, hyponyms: 83)>]
+# #
+# #   # Find the synset to which both the second noun sense of "balance" and the
+# #   # default sense of "shovel" belong.
+# #   ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
+# #   # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
+# #   artifact (or system of artifacts) that is instrumental in accomplishing some
+# #   end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
+# #
+# #   # Fetch just the words for the other kinds of "instruments"
+# #   "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
+# #   # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
+# #   "extractor", "instrument of execution", "instrument of punishment", "measuring
+# #   instrument", "measuring system", "measuring device", "medical instrument",
+# #   "navigational instrument", "optical instrument", "plotter", "scientific
+# #   instrument", "sonograph", "surveying instrument", "surveyor's instrument",
+# #   "tracer", "weapon", "arm", "weapon system", "whip"]
+# #
+# #
+# # == Authors
+# #
+# # * Michael Granger <ged@FaerieMUD.org>
+# #
+# # == Copyright
+# #
+# # Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
+# #
+# # This module is free software. You may use, modify, and/or redistribute this
+# # software under the terms of the Perl Artistic License. (See
+# # http://language.perl.com/misc/Artistic.html)
+# #
+# # == Version
+# #
+# #  $Id: wordnet.rb,v 1.3 2003/09/14 11:28:02 deveiant Exp $
+# #
+#
+# module Linguistics::EN
+#
+#         @has_wordnet            = false
+#         @wn_error               = nil
+#         @wn_lexicon             = nil
+#
+#         # Load WordNet and open the lexicon if possible, saving the error that
+#         # occurs if anything goes wrong.
+#         begin
+#                 require 'wordnet'
+#                 @has_wordnet = true
+#         rescue LoadError => err
+#                 @wn_error = err
+#         end
+#
+#
+#         #################################################################
+#         ###     M O D U L E   M E T H O D S
+#         #################################################################
+#         class << self
+#
+#                 ### Returns +true+ if WordNet was loaded okay
+#                 def has_wordnet? ; @has_wordnet; end
+#
+#                 ### If #haveWordnet? returns +false+, this can be called to fetch the
+#                 ### exception which was raised when WordNet was loaded.
+#                 def wn_error ; @wn_error; end
+#
+#                 ### The instance of the WordNet::Lexicon used for all Linguistics WordNet
+#                 ### functions.
+#                 def wn_lexicon
+#                         if @wn_error
+#                                 raise NotImplementedError,
+#                                         "WordNet functions are not loaded: %s" %
+#                                         @wn_error.message
+#                         end
+#
+#                         @wn_lexicon ||= WordNet::Lexicon::new
+#                 end
+#
+#                 ### Make a function that calls the method +meth+ on the synset of an input
+#                 ### word.
+#                 def def_synset_function( meth )
+#                         (class << self; self; end).instance_eval do
+#                                 define_method( meth ) {|*args|
+#                                         word, pos, sense = *args
+#                                         raise ArgumentError,
+#                                                 "wrong number of arguments (0 for 1)" unless word
+#                                         sense ||= 1
+#
+#                                         syn = synset( word.to_s, pos, sense )
+#                                         return syn.nil? ? nil : syn.send( meth )
+#                                 }
+#                         end
+#                 end
+#         end
+#
+#
+#
+#         #################################################################
+#         ###     W O R D N E T   I N T E R F A C E
+#         #################################################################
+#
+#         ###############
+#         module_function
+#         ###############
+#
+#         ### Look up the synset associated with the given word or collocation in the
+#         ### WordNet lexicon and return a WordNet::Synset object.
+#         def synset( word, pos=nil, sense=1 )
+#                 lex = Linguistics::EN::wn_lexicon
+#                 if pos.is_a?( Fixnum )
+#                         sense = pos
+#                         pos = nil
+#                 end
+#                 postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
+#                 syn = nil
+#
+#                 postries.each do |pos|
+#                         break if syn = lex.lookupSynsets( word.to_s, pos, sense )
+#                 end
+#
+#                 return syn
+#         end
+#
+#
+#         ### Look up all the synsets associated with the given word or collocation in
+#         ### the WordNet lexicon and return an Array of WordNet::Synset objects. If
+#         ### +pos+ is +nil+, return synsets for all parts of speech.
+#         def synsets( word, pos=nil )
+#                 lex = Linguistics::EN::wn_lexicon
+#                 postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
+#                 syns = []
+#
+#                 postries.each {|pos|
+#                         syns << lex.lookupSynsets( word.to_s, pos )
+#                 }
+#
+#                 return syns.flatten.compact
+#         end
+#
+#
+#         # Returns definitions and/or example sentences as a String.
+#         def_synset_function :gloss
+#
+#         # Returns definitions and/or example sentences as an Array.
+#         def_synset_function :glosses
+#
+#         # Return nouns or verbs that have the same hypernym as the receiver.
+#         def_synset_function :coordinates
+#
+#         # Returns the Array of synonyms contained in the synset for the receiver.
+#         def_synset_function :words
+#         def_synset_function :synonyms
+#
+#         # Returns the name of the lexicographer file that contains the raw data for
+#         # the receiver.
+#         def_synset_function :lex_info
+#
+#         # :TODO: Finish these comments, and figure out how the hell to get the
+#         # methods to show up in RDoc.
+#         def_synset_function :frames
+#
+#
+#         # Returns the synsets for the receiver's antonyms, if any. Ex:
+#         # 'opaque'.en.synset.antonyms
+#         #   ==> [#<WordNet::Synset:0x010ca614/454927 clear (adjective): "free
+#         #        from cloudiness; allowing light to pass through; "clear water";
+#         #        "clear plastic bags"; "clear glass"; "the air is clear and clean""
+#         #        (similarTos: 6, attributes: 1, derivations: 2, antonyms: 1,
+#         #        seeAlsos: 1)>]
+#         def_synset_function :antonyms
+#
+#         def_synset_function :hypernyms
+#     def_synset_function :instanceHypernyms
+#         def_synset_function :entailment
+#         def_synset_function :hyponyms
+#     def_synset_function :instanceHyponyms
+#         def_synset_function :causes
+#         def_synset_function :verbgroups
+#         def_synset_function :similarTo
+#         def_synset_function :participles
+#         def_synset_function :pertainyms
+#         def_synset_function :attributes
+#         def_synset_function :derivedFrom
+#         def_synset_function :seeAlso
+#         def_synset_function :functions
+#
+#         def_synset_function :meronyms
+#         def_synset_function :memberMeronyms
+#         def_synset_function :stuffMeronyms
+#         def_synset_function :portionMeronyms
+#         def_synset_function :componentMeronyms
+#         def_synset_function :featureMeronyms
+#         def_synset_function :phaseMeronyms
+#         def_synset_function :placeMeronyms
+#
+#         def_synset_function :holonyms
+#         def_synset_function :memberHolonyms
+#         def_synset_function :stuffHolonyms
+#         def_synset_function :portionHolonyms
+#         def_synset_function :componentHolonyms
+#         def_synset_function :featureHolonyms
+#         def_synset_function :phaseHolonyms
+#         def_synset_function :placeHolonyms
+#
+#         def_synset_function :domains
+#         def_synset_function :categoryDomains
+#         def_synset_function :regionDomains
+#         def_synset_function :usageDomains
+#
+#         def_synset_function :members
+#         def_synset_function :categoryMembers
+#         def_synset_function :regionMembers
+#         def_synset_function :usageMembers
+#
+#
+# end # module Linguistics::EN

data/odin.gemspec ADDED

@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "odin/version"
+Gem::Specification.new do |s|
+  s.name        = "odin"
+  s.version     = Odin::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Benjamin Oakes"]
+  s.email       = ["hello@benjaminoakes.com"]
+  s.homepage    = "http://github.com/benjaminoakes/odin"
+  s.summary     = %q{A parser for human languages.}
+  s.description = s.summary
+  s.rubyforge_project = "odin"
+  s.add_dependency('activesupport', '~> 2.0.1')
+  s.add_dependency('english', '~> 0.1')
+  s.add_dependency('facets', '~> 2.2.1')
+  s.add_dependency('linguistics', '~> 1.0.8')
+  s.add_development_dependency('rake', '~> 0.8.7')
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+end