RubyGems - odin - Versions diffs - 0.0.4 - Mend

odin 0.0.4

Files changed (64) hide show

data/.gitignore +19 -0
data/.rvmrc +1 -0
data/.travis.yml +2 -0
data/Gemfile +4 -0
data/Gemfile.lock +26 -0
data/HISTORY.md +102 -0
data/LICENSE.md +10 -0
data/README.md +46 -0
data/Rakefile +69 -0
data/app/controllers/grammar_checker.rb +51 -0
data/check_grammar.rb +24 -0
data/configure +9 -0
data/images/atn_diagram.graffle +0 -0
data/images/atn_diagram.pdf +0 -0
data/images/odin-ff6.gif +0 -0
data/lang/en/adjectives.rb +388 -0
data/lang/en/atn.rb +102 -0
data/lang/en/closed_class_words.rb +206 -0
data/lang/en/data.rb +1086 -0
data/lang/en/noun_inflections.rb +76 -0
data/lang/en/noun_inflector_test_cases.rb +235 -0
data/lang/en/pronoun_inflector_test_cases.rb +14 -0
data/lang/en/verbs.rb +648 -0
data/lang/iso639.rb +405 -0
data/lib/array.rb +15 -0
data/lib/atn.rb +82 -0
data/lib/augmented_transition_network.rb +146 -0
data/lib/dumper.rb +44 -0
data/lib/noun_inflector.rb +283 -0
data/lib/odin.rb +3 -0
data/lib/odin/version.rb +3 -0
data/lib/parts_of_speech.rb +402 -0
data/lib/star.rb +23 -0
data/lib/string.rb +99 -0
data/lib/string_bracketing.rb +100 -0
data/lib/word.rb +69 -0
data/lib/word_net.rb +265 -0
data/odin.gemspec +27 -0
data/simple_atn/README.md +45 -0
data/simple_atn/Rakefile +9 -0
data/simple_atn/array.rb +15 -0
data/simple_atn/augmented_transition_network.rb +146 -0
data/simple_atn/augmented_transition_network_test.rb +113 -0
data/simple_atn/english.rb +161 -0
data/simple_atn/string.rb +63 -0
data/test/fixtures/alice.txt +3594 -0
data/test/fixtures/art.txt +7 -0
data/test/fixtures/both.txt +1 -0
data/test/fixtures/existing.txt +0 -0
data/test/fixtures/existing.txt.checked.html +0 -0
data/test/fixtures/grammar_checker.css +4 -0
data/test/fixtures/grammatical.txt +1 -0
data/test/fixtures/ungrammatical.txt +1 -0
data/test/functional/grammar_checker_test.rb +64 -0
data/test/integration/en/word_and_noun_inflector_test.rb +29 -0
data/test/test_helper.rb +82 -0
data/test/unit/atn_test.rb +240 -0
data/test/unit/noun_inflector_test.rb +249 -0
data/test/unit/pronoun_inflector_test.rb +17 -0
data/test/unit/star_test.rb +24 -0
data/test/unit/string_bracketing_test_module.rb +70 -0
data/test/unit/string_test.rb +92 -0
data/test/unit/word_test.rb +15 -0
metadata +223 -0

data/lib/augmented_transition_network.rb ADDED

@@ -0,0 +1,146 @@
+require File.dirname(__FILE__) + '/string'
+require File.dirname(__FILE__) + '/array'
+class Ungrammatical < Exception; end
+class AugmentedTransitionNetwork
+  def initialize(language = :en)
+    if :en == language
+      require File.dirname(__FILE__) + '/english'
+      extend English
+    end
+    clear!
+  end
+  def parse(words, start_node = :sentence)
+    clear!
+    @words = words.dup
+    @words.freeze
+    send(start_node, 0, Hash.new)
+    # The result for the network traversal is located in @star.
+    return @star
+  end
+  def parse_to_string(words, start_node = :sentence)
+    parsed = parse(words, start_node)
+    return parsed.inspect.matches_for(/".*?"/).join(" ").gsub("\"", '')
+  end
+  private
+    def clear!
+      @star = nil
+      @words = []
+    end
+    # Tag a word or phrase with a functional role.
+    #
+    # For example, a single word may be labeled :noun.
+    # A phrase (multiple words) may be labeled :noun_phrase.  (Note that each constituent of a phrase
+    # should have a tag as well.)
+    def tag(marker, constituents)
+      # TODO Tag in a different way?  I have to call .last to get the real word...
+      tagged = [marker]
+      constituents.each do |constituent|
+        unless constituent.nil?
+          # if there's nothing in the register, etc, the value will be nil
+          # don't include the nil in the tagging
+          tagged << constituent
+        end
+      end
+      return tagged
+    end
+    # TODO
+    # def choose_arc(arcs, position, registers)
+    #   arcs.each do |arc|
+    #     begin
+    #       arc.call(position, registers)
+    #     rescue Ungrammatical
+    #       # Move onto the next one
+    #     end
+    #   end
+    #
+    #   raise Ungrammatical
+    # end
+    # Set a given register in the hash given as an argument.  The value that gets assigned
+    # to the key is specified in the optional 'extras' hash.  By default, the tag is the same
+    # as the destination register (register_name) and the content is the word at the given
+    # position.
+    def set_register(register_name, position, registers, extras = {})
+      # TODO I'm pretty sure there's an easier way to handle the argument hash
+      if extras[:tag]
+        tag = extras[:tag]
+      else
+        tag = register_name
+      end
+      if extras[:content]
+        content = extras[:content]
+      else
+        content = @words[position]
+      end
+      registers[register_name] = tag(tag, content)
+    end
+    def at_last_word?(position)
+      # puts("in at_last_word?")
+      if !@words[position].nil?
+        # puts("failing...")
+        raise Ungrammatical
+      else
+        return @words.length == position
+      end
+    end
+    def in_category?(category, position)
+      word = @words[position]
+      return (!word.nil? and word.send("#{category}?"))
+    end
+    def exact_word?(exact_word, position)
+      word = @words[position]
+      if word.nil? # if we're checking for a position outside the length of @words
+        raise Ungrammatical
+      else
+        return word == exact_word
+      end
+      # word = @words.at(position)
+      # return (!word.nil? and word == exact_word)
+    end
+    def follow_arc_to(node_name, position, registers)
+      send(node_name, position + 1, registers.dup)
+    end
+    def jump_to(node_name, position, registers)
+      send(node_name, position, registers.dup)
+    end
+    def push(node_name, position, registers, extras)
+      # TODO I'm pretty sure there's an easier way to handle the argument hash
+      if extras[:into].nil? or extras[:next].nil?
+        raise "You must give :into and :next for the 'extra' hash"
+      end
+      destination_register = extras[:into]
+      next_node = extras[:next]
+      # Traverse the subnetwork...
+      send(node_name, position, registers.dup)
+      # The result for the subnetwork traversal is located in @star.
+      registers[destination_register] = @star.dup
+      position += registers[destination_register].inspect.number_in_quotes
+      # Move along to the next node
+      send(next_node, position, registers.dup)
+    end
+    def pop(content)
+      @star = content
+    end
+end

data/lib/dumper.rb ADDED

@@ -0,0 +1,44 @@
+module Dumper
+  private
+    def heading(string)
+      length = string.length + 5
+      puts "=" * length
+      puts string.upcase
+      puts "=" * length
+      yield
+      puts "=" * length
+      puts
+    end
+    def section(string)
+      puts string
+      puts "-" * (string.length + 2)
+      yield
+      puts
+    end
+    def indent(string, length = 2, character = " ")
+      output = ""
+      string.each_line do |line|
+        output << (character * length) + line
+      end
+      return output
+    end
+    def inspect_tree(tree)
+      output = ""
+      tree.each do |branch|
+        if branch.class.to_s == "Array" # TODO better way?
+          output << indent(inspect_tree(branch))
+        else
+          output << "#{branch.inspect}\n"
+        end
+      end
+      return output
+    end
+end

data/lib/noun_inflector.rb ADDED

@@ -0,0 +1,283 @@
+# From Rails
+require 'singleton'
+# The NounInflector transforms words from singular to plural, class names to table names, modularized class names to ones without,
+# and class names to foreign keys. The default inflections for pluralization, singularization, and uncountable words are kept
+# in inflections.rb.
+module NounInflector
+  # A singleton instance of this class is yielded by NounInflector.inflections, which can then be used to specify additional
+  # inflection rules. Examples:
+  #
+  #   Inflector.inflections do |inflect|
+  #     inflect.plural /^(ox)$/i, '\1\2en'
+  #     inflect.singular /^(ox)en/i, '\1'
+  #
+  #     inflect.irregular 'octopus', 'octopi'
+  #
+  #     inflect.uncountable "equipment"
+  #   end
+  #
+  # New rules are added at the top. So in the example above, the irregular rule for octopus will now be the first of the
+  # pluralization and singularization rules that is runs. This guarantees that your rules run before any of the rules that may
+  # already have been loaded.
+  class Inflections
+    include Singleton
+    attr_reader :plurals, :singulars, :uncountables
+    def initialize
+      @plurals, @singulars, @uncountables = [], [], []
+    end
+    # Specifies a new pluralization rule and its replacement. The rule can either be a string or a regular expression.
+    # The replacement should always be a string that may include references to the matched data from the rule.
+    def plural(rule, replacement)
+      @plurals.insert(0, [rule, replacement])
+    end
+    # Specifies a new singularization rule and its replacement. The rule can either be a string or a regular expression.
+    # The replacement should always be a string that may include references to the matched data from the rule.
+    def singular(rule, replacement)
+      @singulars.insert(0, [rule, replacement])
+    end
+    # Specifies a new irregular that applies to both pluralization and singularization at the same time. This can only be used
+    # for strings, not regular expressions. You simply pass the irregular in singular and plural form.
+    #
+    # Examples:
+    #   irregular 'octopus', 'octopi'
+    #   irregular 'person', 'people'
+    def irregular(singular, plural)
+      if singular[0,1].upcase == plural[0,1].upcase
+        plural(Regexp.new("(#{singular[0,1]})#{singular[1..-1]}$", "i"), '\1' + plural[1..-1])
+        singular(Regexp.new("(#{plural[0,1]})#{plural[1..-1]}$", "i"), '\1' + singular[1..-1])
+      else
+        plural(Regexp.new("#{singular[0,1].upcase}(?i)#{singular[1..-1]}$"), plural[0,1].upcase + plural[1..-1])
+        plural(Regexp.new("#{singular[0,1].downcase}(?i)#{singular[1..-1]}$"), plural[0,1].downcase + plural[1..-1])
+        singular(Regexp.new("#{plural[0,1].upcase}(?i)#{plural[1..-1]}$"), singular[0,1].upcase + singular[1..-1])
+        singular(Regexp.new("#{plural[0,1].downcase}(?i)#{plural[1..-1]}$"), singular[0,1].downcase + singular[1..-1])
+      end
+    end
+    # Add uncountable words that shouldn't be attempted inflected.
+    #
+    # Examples:
+    #   uncountable "money"
+    #   uncountable "money", "information"
+    #   uncountable %w( money information rice )
+    def uncountable(*words)
+      (@uncountables << words).flatten!
+    end
+    # Clears the loaded inflections within a given scope (default is :all). Give the scope as a symbol of the inflection type,
+    # the options are: :plurals, :singulars, :uncountables
+    #
+    # Examples:
+    #   clear :all
+    #   clear :plurals
+    def clear(scope = :all)
+      case scope
+        when :all
+          @plurals, @singulars, @uncountables = [], [], []
+        else
+          instance_variable_set "@#{scope}", []
+      end
+    end
+  end
+  extend self
+  def inflections
+    if block_given?
+      yield Inflections.instance
+    else
+      Inflections.instance
+    end
+  end
+  # Returns the plural form of the word in the string.
+  #
+  # Examples
+  #   "post".pluralize #=> "posts"
+  #   "octopus".pluralize #=> "octopi"
+  #   "sheep".pluralize #=> "sheep"
+  #   "words".pluralize #=> "words"
+  #   "the blue mailman".pluralize #=> "the blue mailmen"
+  #   "CamelOctopus".pluralize #=> "CamelOctopi"
+  def pluralize(word)
+    result = word.to_s.dup
+    if word.empty? || inflections.uncountables.include?(result.downcase)
+      result
+    else
+      inflections.plurals.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
+      result
+    end
+  end
+  # The reverse of pluralize, returns the singular form of a word in a string.
+  #
+  # Examples
+  #   "posts".singularize #=> "post"
+  #   "octopi".singularize #=> "octopus"
+  #   "sheep".singluarize #=> "sheep"
+  #   "word".singluarize #=> "word"
+  #   "the blue mailmen".singularize #=> "the blue mailman"
+  #   "CamelOctopi".singularize #=> "CamelOctopus"
+  def singularize(word)
+    result = word.to_s.dup
+    if inflections.uncountables.include?(result.downcase)
+      result
+    else
+      inflections.singulars.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
+      result
+    end
+  end
+  # By default, camelize converts strings to UpperCamelCase. If the argument to camelize
+  # is set to ":lower" then camelize produces lowerCamelCase.
+  #
+  # camelize will also convert '/' to '::' which is useful for converting paths to namespaces
+  #
+  # Examples
+  #   "active_record".camelize #=> "ActiveRecord"
+  #   "active_record".camelize(:lower) #=> "activeRecord"
+  #   "active_record/errors".camelize #=> "ActiveRecord::Errors"
+  #   "active_record/errors".camelize(:lower) #=> "activeRecord::Errors"
+  def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
+    if first_letter_in_uppercase
+      lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
+    else
+      lower_case_and_underscored_word.first + camelize(lower_case_and_underscored_word)[1..-1]
+    end
+  end
+  # Capitalizes all the words and replaces some characters in the string to create
+  # a nicer looking title. Titleize is meant for creating pretty output. It is not
+  # used in the Rails internals.
+  #
+  # titleize is also aliased as as titlecase
+  #
+  # Examples
+  #   "man from the boondocks".titleize #=> "Man From The Boondocks"
+  #   "x-men: the last stand".titleize #=> "X Men: The Last Stand"
+  def titleize(word)
+    humanize(underscore(word)).gsub(/\b([a-z])/) { $1.capitalize }
+  end
+  # The reverse of +camelize+. Makes an underscored form from the expression in the string.
+  #
+  # Changes '::' to '/' to convert namespaces to paths.
+  #
+  # Examples
+  #   "ActiveRecord".underscore #=> "active_record"
+  #   "ActiveRecord::Errors".underscore #=> active_record/errors
+  def underscore(camel_cased_word)
+    camel_cased_word.to_s.gsub(/::/, '/').
+      gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+      gsub(/([a-z\d])([A-Z])/,'\1_\2').
+      tr("-", "_").
+      downcase
+  end
+  # Replaces underscores with dashes in the string.
+  #
+  # Example
+  #   "puni_puni" #=> "puni-puni"
+  def dasherize(underscored_word)
+    underscored_word.gsub(/_/, '-')
+  end
+  # Capitalizes the first word and turns underscores into spaces and strips _id.
+  # Like titleize, this is meant for creating pretty output.
+  #
+  # Examples
+  #   "employee_salary" #=> "Employee salary"
+  #   "author_id" #=> "Author"
+  def humanize(lower_case_and_underscored_word)
+    lower_case_and_underscored_word.to_s.gsub(/_id$/, "").gsub(/_/, " ").capitalize
+  end
+  # Removes the module part from the expression in the string
+  #
+  # Examples
+  #   "ActiveRecord::CoreExtensions::String::Inflections".demodulize #=> "Inflections"
+  #   "Inflections".demodulize #=> "Inflections"
+  def demodulize(class_name_in_module)
+    class_name_in_module.to_s.gsub(/^.*::/, '')
+  end
+  # Create the name of a table like Rails does for models to table names. This method
+  # uses the pluralize method on the last word in the string.
+  #
+  # Examples
+  #   "RawScaledScorer".tableize #=> "raw_scaled_scorers"
+  #   "egg_and_ham".tableize #=> "egg_and_hams"
+  #   "fancyCategory".tableize #=> "fancy_categories"
+  def tableize(class_name)
+    pluralize(underscore(class_name))
+  end
+  # Create a class name from a table name like Rails does for table names to models.
+  # Note that this returns a string and not a Class. (To convert to an actual class
+  # follow classify with constantize.)
+  #
+  # Examples
+  #   "egg_and_hams".classify #=> "EggAndHam"
+  #   "post".classify #=> "Post"
+  def classify(table_name)
+    # strip out any leading schema name
+    camelize(singularize(table_name.to_s.sub(/.*\./, '')))
+  end
+  # Creates a foreign key name from a class name.
+  # +separate_class_name_and_id_with_underscore+ sets whether
+  # the method should put '_' between the name and 'id'.
+  #
+  # Examples
+  #   "Message".foreign_key #=> "message_id"
+  #   "Message".foreign_key(false) #=> "messageid"
+  #   "Admin::Post".foreign_key #=> "post_id"
+  def foreign_key(class_name, separate_class_name_and_id_with_underscore = true)
+    underscore(demodulize(class_name)) + (separate_class_name_and_id_with_underscore ? "_id" : "id")
+  end
+  # Constantize tries to find a declared constant with the name specified
+  # in the string. It raises a NameError when the name is not in CamelCase
+  # or is not initialized.
+  #
+  # Examples
+  #   "Module".constantize #=> Module
+  #   "Class".constantize #=> Class
+  def constantize(camel_cased_word)
+    unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ camel_cased_word
+      raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
+    end
+    Object.module_eval("::#{$1}", __FILE__, __LINE__)
+  end
+  # Ordinalize turns a number into an ordinal string used to denote the
+  # position in an ordered sequence such as 1st, 2nd, 3rd, 4th.
+  #
+  # Examples
+  #   ordinalize(1)     # => "1st"
+  #   ordinalize(2)     # => "2nd"
+  #   ordinalize(1002)  # => "1002nd"
+  #   ordinalize(1003)  # => "1003rd"
+  def ordinalize(number)
+    if (11..13).include?(number.to_i % 100)
+      "#{number}th"
+    else
+      case number.to_i % 10
+        when 1; "#{number}st"
+        when 2; "#{number}nd"
+        when 3; "#{number}rd"
+        else    "#{number}th"
+      end
+    end
+  end
+end
+require File.dirname(__FILE__) + '/../lang/en/noun_inflections'