RubyGems - treat - Versions diffs - 0.2.5 → 1.0.0 - Mend

treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

data/LICENSE +3 -3
data/README.md +33 -0
data/files/INFO +1 -0
data/lib/treat.rb +40 -105
data/lib/treat/ai.rb +12 -0
data/lib/treat/ai/classifiers/id3.rb +27 -0
data/lib/treat/categories.rb +82 -35
data/lib/treat/categorizable.rb +44 -0
data/lib/treat/classification.rb +61 -0
data/lib/treat/configurable.rb +115 -0
data/lib/treat/data_set.rb +42 -0
data/lib/treat/dependencies.rb +24 -0
data/lib/treat/downloader.rb +87 -0
data/lib/treat/entities.rb +68 -66
data/lib/treat/entities/abilities.rb +10 -0
data/lib/treat/entities/abilities/buildable.rb +327 -0
data/lib/treat/entities/abilities/checkable.rb +31 -0
data/lib/treat/entities/abilities/copyable.rb +45 -0
data/lib/treat/entities/abilities/countable.rb +51 -0
data/lib/treat/entities/abilities/debuggable.rb +83 -0
data/lib/treat/entities/abilities/delegatable.rb +123 -0
data/lib/treat/entities/abilities/doable.rb +62 -0
data/lib/treat/entities/abilities/exportable.rb +11 -0
data/lib/treat/entities/abilities/iterable.rb +115 -0
data/lib/treat/entities/abilities/magical.rb +83 -0
data/lib/treat/entities/abilities/registrable.rb +74 -0
data/lib/treat/entities/abilities/stringable.rb +91 -0
data/lib/treat/entities/entities.rb +104 -0
data/lib/treat/entities/entity.rb +122 -245
data/lib/treat/exception.rb +4 -4
data/lib/treat/extractors.rb +77 -80
data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
data/lib/treat/extractors/language/what_language.rb +50 -45
data/lib/treat/extractors/name_tag/stanford.rb +55 -0
data/lib/treat/extractors/tf_idf/native.rb +87 -0
data/lib/treat/extractors/time/chronic.rb +55 -0
data/lib/treat/extractors/time/nickel.rb +86 -62
data/lib/treat/extractors/time/ruby.rb +53 -0
data/lib/treat/extractors/topic_words/lda.rb +67 -58
data/lib/treat/extractors/topics/reuters.rb +100 -87
data/lib/treat/formatters.rb +39 -35
data/lib/treat/formatters/readers/abw.rb +49 -29
data/lib/treat/formatters/readers/autoselect.rb +37 -33
data/lib/treat/formatters/readers/doc.rb +19 -13
data/lib/treat/formatters/readers/html.rb +52 -30
data/lib/treat/formatters/readers/image.rb +41 -40
data/lib/treat/formatters/readers/odt.rb +59 -45
data/lib/treat/formatters/readers/pdf.rb +28 -25
data/lib/treat/formatters/readers/txt.rb +12 -15
data/lib/treat/formatters/readers/xml.rb +73 -36
data/lib/treat/formatters/serializers/xml.rb +80 -79
data/lib/treat/formatters/serializers/yaml.rb +19 -18
data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
data/lib/treat/formatters/unserializers/xml.rb +94 -99
data/lib/treat/formatters/unserializers/yaml.rb +20 -19
data/lib/treat/formatters/visualizers/dot.rb +132 -132
data/lib/treat/formatters/visualizers/standoff.rb +52 -44
data/lib/treat/formatters/visualizers/tree.rb +26 -29
data/lib/treat/groupable.rb +153 -0
data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
data/lib/treat/inflectors.rb +50 -45
data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
data/lib/treat/inflectors/declensors/active_support.rb +31 -0
data/lib/treat/inflectors/declensors/english.rb +38 -0
data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
data/lib/treat/inflectors/stemmers/porter.rb +160 -0
data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
data/lib/treat/inflectors/stemmers/uea.rb +28 -0
data/lib/treat/installer.rb +308 -0
data/lib/treat/kernel.rb +105 -27
data/lib/treat/languages.rb +122 -88
data/lib/treat/languages/arabic.rb +15 -15
data/lib/treat/languages/chinese.rb +15 -15
data/lib/treat/languages/dutch.rb +15 -15
data/lib/treat/languages/english.rb +61 -62
data/lib/treat/languages/french.rb +19 -19
data/lib/treat/languages/german.rb +20 -20
data/lib/treat/languages/greek.rb +15 -15
data/lib/treat/languages/italian.rb +16 -16
data/lib/treat/languages/polish.rb +15 -15
data/lib/treat/languages/portuguese.rb +15 -15
data/lib/treat/languages/russian.rb +15 -15
data/lib/treat/languages/spanish.rb +16 -16
data/lib/treat/languages/swedish.rb +16 -16
data/lib/treat/lexicalizers.rb +34 -55
data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
data/lib/treat/linguistics.rb +9 -0
data/lib/treat/linguistics/categories.rb +11 -0
data/lib/treat/linguistics/tags.rb +422 -0
data/lib/treat/loaders/linguistics.rb +30 -0
data/lib/treat/loaders/stanford.rb +27 -0
data/lib/treat/object.rb +1 -0
data/lib/treat/processors.rb +37 -44
data/lib/treat/processors/chunkers/autoselect.rb +16 -0
data/lib/treat/processors/chunkers/html.rb +71 -0
data/lib/treat/processors/chunkers/txt.rb +18 -24
data/lib/treat/processors/parsers/enju.rb +253 -208
data/lib/treat/processors/parsers/stanford.rb +130 -131
data/lib/treat/processors/segmenters/punkt.rb +79 -45
data/lib/treat/processors/segmenters/stanford.rb +46 -48
data/lib/treat/processors/segmenters/tactful.rb +43 -36
data/lib/treat/processors/tokenizers/perl.rb +124 -92
data/lib/treat/processors/tokenizers/ptb.rb +81 -0
data/lib/treat/processors/tokenizers/punkt.rb +48 -42
data/lib/treat/processors/tokenizers/stanford.rb +39 -38
data/lib/treat/processors/tokenizers/tactful.rb +64 -55
data/lib/treat/proxies.rb +52 -35
data/lib/treat/retrievers.rb +26 -16
data/lib/treat/retrievers/indexers/ferret.rb +47 -26
data/lib/treat/retrievers/searchers/ferret.rb +69 -50
data/lib/treat/tree.rb +241 -183
data/spec/collection.rb +123 -0
data/spec/document.rb +93 -0
data/spec/entity.rb +408 -0
data/spec/languages.rb +25 -0
data/spec/phrase.rb +146 -0
data/spec/samples/mathematicians/archimedes.abw +34 -0
data/spec/samples/mathematicians/euler.html +21 -0
data/spec/samples/mathematicians/gauss.pdf +0 -0
data/spec/samples/mathematicians/leibniz.txt +13 -0
data/spec/samples/mathematicians/newton.doc +0 -0
data/spec/sandbox.rb +5 -0
data/spec/token.rb +109 -0
data/spec/treat.rb +52 -0
data/spec/tree.rb +117 -0
data/spec/word.rb +110 -0
data/spec/zone.rb +66 -0
data/tmp/INFO +1 -1
metadata +100 -201
data/INSTALL +0 -1
data/README +0 -3
data/TODO +0 -28
data/lib/economist/half_cocked_basel.txt +0 -16
data/lib/economist/hungarys_troubles.txt +0 -46
data/lib/economist/indias_slowdown.txt +0 -15
data/lib/economist/merkozy_rides_again.txt +0 -24
data/lib/economist/prada_is_not_walmart.txt +0 -9
data/lib/economist/to_infinity_and_beyond.txt +0 -15
data/lib/ferret/_11.cfs +0 -0
data/lib/ferret/_14.cfs +0 -0
data/lib/ferret/_p.cfs +0 -0
data/lib/ferret/_s.cfs +0 -0
data/lib/ferret/_v.cfs +0 -0
data/lib/ferret/_y.cfs +0 -0
data/lib/ferret/segments +0 -0
data/lib/ferret/segments_15 +0 -0
data/lib/treat/buildable.rb +0 -157
data/lib/treat/category.rb +0 -33
data/lib/treat/delegatable.rb +0 -116
data/lib/treat/doable.rb +0 -45
data/lib/treat/entities/collection.rb +0 -14
data/lib/treat/entities/document.rb +0 -12
data/lib/treat/entities/phrases.rb +0 -17
data/lib/treat/entities/tokens.rb +0 -61
data/lib/treat/entities/zones.rb +0 -41
data/lib/treat/extractors/coreferences/stanford.rb +0 -69
data/lib/treat/extractors/date/chronic.rb +0 -32
data/lib/treat/extractors/date/ruby.rb +0 -25
data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
data/lib/treat/extractors/language/language_extractor.rb +0 -27
data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
data/lib/treat/extractors/roles/naive.rb +0 -73
data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
data/lib/treat/extractors/statistics/position_in.rb +0 -14
data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
data/lib/treat/feature.rb +0 -58
data/lib/treat/features.rb +0 -7
data/lib/treat/formatters/visualizers/short_value.rb +0 -29
data/lib/treat/formatters/visualizers/txt.rb +0 -45
data/lib/treat/group.rb +0 -106
data/lib/treat/helpers/linguistics_loader.rb +0 -18
data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
data/lib/treat/inflectors/declensions/english.rb +0 -319
data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
data/lib/treat/inflectors/stem/porter.rb +0 -162
data/lib/treat/inflectors/stem/porter_c.rb +0 -26
data/lib/treat/inflectors/stem/uea.rb +0 -30
data/lib/treat/install.rb +0 -59
data/lib/treat/languages/tags.rb +0 -377
data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
data/lib/treat/lexicalizers/tag/brill.rb +0 -91
data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
data/lib/treat/registrable.rb +0 -28
data/lib/treat/sugar.rb +0 -50
data/lib/treat/viewable.rb +0 -29
data/lib/treat/visitable.rb +0 -28
data/test/profile.rb +0 -2
data/test/tc_entity.rb +0 -117
data/test/tc_extractors.rb +0 -73
data/test/tc_formatters.rb +0 -41
data/test/tc_inflectors.rb +0 -34
data/test/tc_lexicalizers.rb +0 -32
data/test/tc_processors.rb +0 -50
data/test/tc_resources.rb +0 -22
data/test/tc_treat.rb +0 -60
data/test/tc_tree.rb +0 -60
data/test/tests.rb +0 -20
data/test/texts.rb +0 -19
data/test/texts/english/half_cocked_basel.txt +0 -16
data/test/texts/english/hose_and_dry.doc +0 -0
data/test/texts/english/hungarys_troubles.abw +0 -70
data/test/texts/english/long.html +0 -24
data/test/texts/english/long.txt +0 -22
data/test/texts/english/medium.txt +0 -5
data/test/texts/english/republican_nomination.pdf +0 -0
data/test/texts/english/saving_the_euro.odt +0 -0
data/test/texts/english/short.txt +0 -3
data/test/texts/english/zero_sum.html +0 -111

data/lib/treat/formatters/visualizers/standoff.rb CHANGED Viewed

@@ -1,46 +1,54 @@
-module Treat
-  module Formatters
-    module Visualizers
-      # This class allows the visualization of
-      # an entity in standoff format; for example:
-      # (S (NP John) (VP has (VP come))).
-      class Standoff
-        # Default options for the visualizer.
-        DefaultOptions = { :indent => 0 }
-        # A lambda to recursively visualize the children
-        # of an entity.
-        Recurse = lambda do |entity, options|
-          v = ''
-          entity.each { |child| v += visualize(child, options) }
-          v
-        end
-        # Visualize the entity using standoff notation.
-        # This can only be called on sentences and smaller
-        # entities, as it is not a suitable format to
-        # represent larger entities.
-        def self.visualize(entity, options = {})
-          options = DefaultOptions.merge(options)
-          value = '';  spaces = ''
-          options[:indent].times { spaces << '   '}
-          options[:indent] += 1
-          if entity.is_a?(Treat::Entities::Token)
-            value += "#{spaces}(#{entity.tag} #{entity.value})"
-          elsif entity.is_a?(Treat::Entities::Phrase)
-            tag = entity.has?(:tag) ? entity.tag : ''
-            value += ("#{spaces}(#{tag}\n" +
-            "#{Recurse.call(entity, options)})\n")
-          elsif entity.is_a?(Treat::Entities::Sentence)
-            value += ("#{spaces}(S\n" +
-            "#{Recurse.call(entity, options)})\n")
-          else
-            raise 'Standoff format is unsuitable to represent' +
-            ' entities larger than sentences.'
-          end
-          options[:indent] -= 1
-          value.gsub!(")\n)", "))")
-          value
-        end
-      end
+# This class allows the visualization of
+# an entity in standoff format; for example:
+# (S (NP John) (VP has (VP come))).
+class Treat::Formatters::Visualizers::Standoff
+  # Start out with an indent of 0.
+  DefaultOptions = { :indent => 0 }
+  # A lambda to recursively visualize the children
+  # of an entity.
+  Recurse = lambda do |entity, options|
+    v = ''
+    entity.each { |child| v += visualize(child, options) }
+    v
+  end
+  # Fix - brackets
+  # Visualize the entity using standoff notation.
+  # This can only be called on sentences and smaller
+  # entities, as it is not a suitable format to
+  # represent larger entities.
+  def self.visualize(entity, options = {})
+    options = DefaultOptions.merge(options)
+    value = '';  spaces = ''
+    options[:indent].times { spaces << '   '}
+    options[:indent] += 1
+    if entity.is_a?(Treat::Entities::Token)
+      val = ptb_escape(entity.value)
+      value += "#{spaces}(#{entity.tag} #{val})"
+    elsif entity.is_a?(Treat::Entities::Phrase)
+      tag = entity.has?(:tag) ? entity.tag : ''
+      value += ("#{spaces}(#{tag}\n" +
+      "#{Recurse.call(entity, options)})\n")
+    elsif entity.is_a?(Treat::Entities::Sentence)
+      value += ("#{spaces}(S\n" +
+      "#{Recurse.call(entity, options)})\n")
+    else
+      raise 'Standoff format is unsuitable to represent' +
+      ' entities larger than sentences.'
+    end
+    options[:indent] -= 1
+    value.gsub!(")\n)", "))")
+    value
+  end
+  def self.ptb_escape(val)
+    Treat::Linguistics::Tags::
+    PTBEscapeCharacters.each do |char, esc|
+      val.gsub!(char, val)
     end
+    val
   end
-end
+end

data/lib/treat/formatters/visualizers/tree.rb CHANGED Viewed

@@ -1,32 +1,29 @@
-module Treat
-  module Formatters
-    module Visualizers
-      # This class generates an ASCII representation
-      # of a tree of entities.
-      class Tree
-        # Default options for the visualizer.
-        DefaultOptions = { :indent => 0 }
-        # Obtain a plain text tree representation
-        # of the entity.
-        def self.visualize(entity, options = {})
-          options = DefaultOptions.merge(options)
-          string = ''
-          if entity.has_children?
-            spacer = '--'
-            spaces = ''
-            options[:indent].times { spaces << '   '}
-            string << "+ #{entity.inspect}\n#{spaces}|"
-            options[:indent] += 1
-            entity.children.each do |child|
-              string = string + "\n" + spaces + '+' +
-              spacer + self.visualize(child, options)
-            end
-            options[:indent] -= 1
-            return string
-          end
-          '> ' + entity.inspect
-        end
+# This class generates an ASCII representation
+# of a tree of entities.
+class Treat::Formatters::Visualizers::Tree
+  # Start out with an indent at 0.
+  DefaultOptions = { :indent => 0 }
+  # Obtain a plain text tree representation
+  # of the entity.
+  def self.visualize(entity, options = {})
+    options = DefaultOptions.merge(options)
+    string = ''
+    if entity.has_children?
+      spacer = '--'
+      spaces = ''
+      options[:indent].times { spaces << '   '}
+      string << "+ #{entity.inspect}\n#{spaces}|"
+      options[:indent] += 1
+      entity.children.each do |child|
+        string = string + "\n" + spaces + '+' +
+        spacer + self.visualize(child, options)
       end
+      options[:indent] -= 1
+      return string
     end
+    '> ' + entity.inspect
   end
-end
+end

data/lib/treat/groupable.rb ADDED Viewed

@@ -0,0 +1,153 @@
+module Treat::Groupable
+  # Lazily load the worker classes in the group.
+  def const_missing(const)
+    bits = self.ancestors[0].to_s.split('::')
+    bits.collect! { |bit| ucc(bit) }
+    file = bits.join('/') + "/#{ucc(const)}"
+    if not File.readable?(Treat.lib + "#{file}.rb")
+      raise Treat::Exception,
+      "File '#{file}.rb' corresponding to " +
+      "requested worker #{self}::#{const} " +
+      "does not exist."
+    else
+      require file
+      if not const_defined?(const)
+        raise Treat::Exception,
+        "File #{file} does not define " +
+        "#{self}::#{const}."
+      end
+      const_get(const)
+    end
+  end
+  # Cache the list of workers to improve performance.
+  @@list = {}
+  # Populates once the list of the workers in the group
+  # by crawling the filesystem.
+  def list
+    mod = ucc(cl(self))
+    if @@list[mod].nil?
+      @@list[mod] = []
+      dirs = Dir[Treat.lib + "treat/*/#{mod}/*.rb"]
+      dirs.each do |file|
+        @@list[mod] <<
+        file.split('/')[-1][0..-4].intern
+      end
+    end
+    @@list[mod]
+  end
+  # Boolean - does the group have the supplied class
+  # included in its targets?
+  def has_target?(target, strict = false)
+    is_target = false
+    self.targets.each do |entity_type|
+      t = cc(entity_type)
+      entity_type = Treat::Entities.const_get(t)
+      if target < entity_type ||
+        entity_type == target
+        is_target = true; break
+      end
+    end
+    is_target
+  end
+  # Create a new algorithm within the group. Once
+  # the algorithm is added, it will be automatically
+  # installed on all the targets of the group.
+  def add(class_name, &block)
+    c = cc(class_name).intern
+    klass = self.const_set(c, Class.new)
+    method = self.method
+    @@list[ucc(cl(self))] << class_name
+    klass.send(:define_singleton_method,
+    method) do |entity, options={}|
+      block.call(entity, options)
+    end
+  end
+  # Get constants in this module, excluding by
+  # default those defined by parent modules.
+  def const_get(const)
+    super(const, false)
+  end
+  # Modify the extended class.
+  def self.extended(group)
+    group.module_eval do
+      class << self
+        # The type of the group. There are three types:
+        #
+        # - Transformers transform the tree of an entity.
+        # - Annotators compute a value and store it in the entity.
+        # - Computers compute a value and do not store it.
+        attr_accessor :type
+        # The default worker in the group, for language-
+        # independent tasks.
+        attr_accessor :default
+        # The entity types which the group's workers work on.
+        attr_accessor :targets
+        # Presets to automatically generate functions.
+        attr_accessor :presets
+        # The preset option to use with preset functions.
+        attr_accessor :preset_option
+      end
+      # Return the method corresponding to the group.
+      # This method resolves the name of the method
+      # that a group should provide based on the name
+      # of the group. Basically, if the group ends in
+      # -ers, the verb corresponding to the group is
+      # returned (tokenizers -> tokenize, inflectors ->
+      # inflect). Otherwise, the name of the method
+      # is the same as that of the group (encoding ->
+      # encoding, tag -> tag).
+      @method = nil
+      def self.method
+        return @method if @method
+        m = ucc(cl(self)).dup
+        if  m[-4..-1] == 'zers'
+          if type == :annotator
+            if m[-6] == 'l'
+              m[-5..-1] = ''
+            else
+              m[-5..-1] = 'y'
+            end
+          else
+            m = m[0..-3]
+          end
+          n = m
+        elsif m[-4..-1] == 'iers'
+          m[-4..-1] = 'y'
+          n = m
+        elsif m[-3..-1] == 'ers'
+          if ['k', 't', 'm', 'd',
+              'g', 'n', 'x', 'h'].
+              include? m[-4]
+            n = m[0..-4]
+            if n[-1] == n[-2]
+              n = n[0..-2]
+            end
+          else
+            n = m[0..-3]
+          end
+        elsif m[-3..-1] == 'ors'
+          n = m[0..-4] + 'e'
+        else
+          n = m
+        end
+        @method = n.intern
+      end
+      # Populate the group's list.
+      group.list
+    end
+  end
+end

data/lib/treat/helpers/decimal_point_escaper.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Treat::Helpers
+  class DecimalPointEscaper
+    EscapeChar = '^^'
+    EscapedEscapeChar = '\^\^'
+    def self.escape!(s)
+      s.gsub!(/([0-9]+)\.([0-9]+)/) do
+        $1 + EscapeChar + $2
+      end
+    end
+    def self.unescape!(s)
+      s.gsub!(/([0-9]+)#{EscapedEscapeChar}([0-9]+)/) do
+        $1 + '.' + $2
+      end
+    end
+  end
+end

data/lib/treat/inflectors.rb CHANGED Viewed

@@ -1,47 +1,52 @@
-module Treat
-  # Algorithms to retrieve the inflections of a word.
-  module Inflectors
-    # Return the stem (*not root form*) of a word.
-    module Stem
-      extend Group
-      self.type = :annotator
-      self.targets = [:word]
-    end
-    # Retrieve the different declensions of a noun (singular, plural).
-    module Declensions
-      extend Group
-      self.type = :annotator
-      self.targets = [:word]
-      self.presets = {
-        :plural => {:count => :plural},
-        :singular => {:count => :singular}
-      }
-    end
-    # Retrieve the different conjugations of a word.
-    module Conjugations
-      extend Group
-      self.type = :annotator
-      self.targets = [:word]
-      self.presets = {
-        :infinitive => {:mode => :infinitive},
-        :present_participle => {:tense => :present, :mode => :participle},
-        :plural_verb => {:count => :plural},
-        :singular_verb => {:count => :singular}
-      }
-    end
-    # Retrieve the full text description of a cardinal number.
-    module CardinalWords
-      extend Group
-      self.type = :annotator
-      self.targets = [:number]
-    end
-    # Retrieve the full text description of an ordinal number.
-    module OrdinalWords
-      extend Group
-      self.type = :annotator
-      self.targets = [:number]
-    end
-    extend Treat::Category
+# Category of worker groups that retrieve
+# the inflections of a word.
+module Treat::Inflectors
+  # Return the stem (*not root form*) of a word.
+  module Stemmers
+    extend Treat::Groupable
+    self.type = :annotator
+    self.targets = [:word]
+  end
+  # Retrieve the different declensions of a
+  # noun (singular, plural).
+  module Declensors
+    extend Treat::Groupable
+    self.type = :annotator
+    self.targets = [:word]
+    self.preset_option = :count
+    self.presets = [:plural, :singular]
+  end
+  # Retrieve the different conjugations of a word
+  # given a mode, tense, person, and/or number.
+  module Conjugators
+    extend Treat::Groupable
+    self.type = :annotator
+    self.targets = [:word]
+    self.preset_option = :form
+    self.presets = [:infinitive, :present_participle,
+                    :plural_verb, :singular_verb]
   end
-end
+  # Retrieve the full text description of a
+  # cardinal number.
+  module Cardinalizers
+    extend Treat::Groupable
+    self.type = :annotator
+    self.targets = [:number]
+  end
+  # Retrieve the full text description of an
+  # ordinal number.
+  module Ordinalizers
+    extend Treat::Groupable
+    self.type = :annotator
+    self.targets = [:number]
+  end
+  # Make Inflectors categorizable.
+  extend Treat::Categorizable
+end

data/lib/treat/inflectors/cardinalizers/linguistics.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# This class is a wrapper for the functions included
+# in the 'linguistics' gem that allow to describe a
+# number in words in cardinal form.
+#
+# Project website: http://deveiate.org/projects/Linguistics/
+module Treat::Inflectors::Cardinalizers::Linguistics
+  require 'treat/loaders/linguistics'
+  # Return the description of a cardinal number in words.
+  #
+  # Options:
+  #
+  # - :group => Controls how many numbers at a time are
+  # grouped together. Valid values are 0 (normal grouping),
+  # 1 (single-digit grouping, e.g., “one, two, three, four”),
+  # 2 (double-digit grouping, e.g., “twelve, thirty-four”, or
+  # 3 (triple-digit grouping, e.g., “one twenty-three, four”).
+  # - :comma => Set the character/s used to separate word groups.
+  # Defaults to ", ".
+  # - :and => Set the word and/or characters used where ' and '
+  # (the default) is normally used. Setting :and to ' ', for
+  # example, will cause 2556 to be returned as “two-thousand,
+  # five hundred fifty-six” instead of “two-thousand, five
+  # hundred and fifty-six”.
+  # - :zero => Set the word used to represent the numeral 0 in
+  # the result. 'zero' is the default.
+  # - :decimal => Set the translation of any decimal points in
+  # the number; the default is 'point'.
+  # - :asArray If set to a true value, the number will be returned
+  # as an array of word groups instead of a String.
+  #
+  # More specific options when using :type => :ordinal:
+  def self.cardinal(entity, options = {})
+    Treat::Loaders::Linguistics.
+    load(entity.language).
+    numwords(entity.to_s, options)
+  end
+end