treat 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +7 -8
 - data/TODO +16 -13
 - data/examples/keywords.rb +89 -1
 - data/lib/treat/buildable.rb +1 -8
 - data/lib/treat/categories.rb +3 -4
 - data/lib/treat/category.rb +1 -1
 - data/lib/treat/delegatable.rb +1 -1
 - data/lib/treat/detectors/encoding/native.rb +5 -0
 - data/lib/treat/detectors/encoding/r_chardet19.rb +2 -3
 - data/lib/treat/detectors/language/language_detector.rb +4 -0
 - data/lib/treat/detectors/language/what_language.rb +4 -4
 - data/lib/treat/detectors.rb +1 -1
 - data/lib/treat/entities/entity.rb +5 -3
 - data/lib/treat/entities/tokens.rb +14 -5
 - data/lib/treat/entities/zones.rb +4 -0
 - data/lib/treat/entities.rb +7 -5
 - data/lib/treat/extractors/keywords/topics_frequency.rb +40 -0
 - data/lib/treat/extractors/statistics/{frequency.rb → frequency_in.rb} +5 -4
 - data/lib/treat/extractors/statistics/frequency_of.rb +3 -5
 - data/lib/treat/extractors/statistics/{position_in.rb → position_in_parent.rb} +4 -3
 - data/lib/treat/extractors/statistics/tf_idf.rb +36 -0
 - data/lib/treat/extractors/statistics/transition_matrix.rb +25 -25
 - data/lib/treat/extractors/statistics/transition_probability.rb +8 -4
 - data/lib/treat/extractors/time/chronic.rb +8 -0
 - data/lib/treat/extractors/time/native.rb +6 -0
 - data/lib/treat/extractors/time/nickel.rb +31 -23
 - data/lib/treat/extractors/topic_words/lda.rb +21 -16
 - data/lib/treat/extractors/topics/reuters.rb +6 -4
 - data/lib/treat/extractors.rb +7 -7
 - data/lib/treat/formatters/readers/abw.rb +32 -0
 - data/lib/treat/formatters/readers/autoselect.rb +13 -11
 - data/lib/treat/formatters/readers/doc.rb +13 -0
 - data/lib/treat/formatters/readers/gocr.rb +2 -0
 - data/lib/treat/formatters/readers/html.rb +21 -1
 - data/lib/treat/formatters/readers/ocropus.rb +3 -3
 - data/lib/treat/formatters/readers/odt.rb +41 -0
 - data/lib/treat/formatters/readers/pdf.rb +5 -2
 - data/lib/treat/formatters/readers/txt.rb +2 -0
 - data/lib/treat/formatters/serializers/xml.rb +3 -2
 - data/lib/treat/formatters/serializers/yaml.rb +2 -0
 - data/lib/treat/formatters/unserializers/autoselect.rb +7 -1
 - data/lib/treat/formatters/unserializers/xml.rb +6 -1
 - data/lib/treat/formatters/unserializers/yaml.rb +5 -1
 - data/lib/treat/formatters/visualizers/dot.rb +35 -37
 - data/lib/treat/formatters/visualizers/html.rb +1 -0
 - data/lib/treat/formatters/visualizers/inspect.rb +4 -0
 - data/lib/treat/formatters/visualizers/short_value.rb +18 -3
 - data/lib/treat/formatters/visualizers/standoff.rb +11 -6
 - data/lib/treat/formatters/visualizers/tree.rb +5 -1
 - data/lib/treat/formatters/visualizers/txt.rb +6 -1
 - data/lib/treat/formatters.rb +1 -1
 - data/lib/treat/group.rb +4 -3
 - data/lib/treat/inflectors/cardinal_words/linguistics.rb +14 -17
 - data/lib/treat/inflectors/conjugations/linguistics.rb +16 -3
 - data/lib/treat/inflectors/declensions/linguistics.rb +17 -6
 - data/lib/treat/inflectors/ordinal_words/linguistics.rb +9 -10
 - data/lib/treat/inflectors/stem/porter.rb +6 -2
 - data/lib/treat/inflectors/stem/porter_c.rb +4 -1
 - data/lib/treat/inflectors/stem/uea.rb +4 -4
 - data/lib/treat/languages/english/tags.rb +16 -0
 - data/lib/treat/languages/english.rb +4 -1
 - data/lib/treat/lexicalizers/category/from_tag.rb +4 -4
 - data/lib/treat/lexicalizers/linkages/naive.rb +3 -3
 - data/lib/treat/lexicalizers/tag/brill.rb +3 -11
 - data/lib/treat/lexicalizers/tag/lingua.rb +4 -6
 - data/lib/treat/lexicalizers.rb +0 -2
 - data/lib/treat/processors/chunkers/txt.rb +4 -4
 - data/lib/treat/processors/parsers/enju.rb +3 -17
 - data/lib/treat/processors/parsers/stanford.rb +4 -0
 - data/lib/treat/processors/segmenters/punkt.rb +1 -0
 - data/lib/treat/processors/segmenters/stanford.rb +4 -0
 - data/lib/treat/processors/segmenters/tactful.rb +4 -1
 - data/lib/treat/processors/tokenizers/punkt.rb +1 -2
 - data/lib/treat/processors/tokenizers/stanford.rb +4 -0
 - data/lib/treat/processors/tokenizers/tactful.rb +1 -1
 - data/lib/treat/processors.rb +4 -4
 - data/lib/treat/proxies.rb +18 -11
 - data/lib/treat/registrable.rb +12 -5
 - data/lib/treat/sugar.rb +8 -3
 - data/lib/treat/tree.rb +10 -3
 - data/lib/treat.rb +55 -55
 - data/test/tc_entity.rb +7 -7
 - data/test/tc_extractors.rb +6 -4
 - data/test/tc_formatters.rb +0 -4
 - data/test/tests.rb +2 -0
 - data/test/texts.rb +4 -4
 - metadata +48 -56
 - data/examples/texts/bugged_out.txt +0 -26
 - data/examples/texts/half_cocked_basel.txt +0 -16
 - data/examples/texts/hedge_funds.txt +0 -24
 - data/examples/texts/hose_and_dry.txt +0 -19
 - data/examples/texts/hungarys_troubles.txt +0 -46
 - data/examples/texts/indias_slowdown.txt +0 -15
 - data/examples/texts/merkozy_rides_again.txt +0 -24
 - data/examples/texts/prada_is_not_walmart.txt +0 -9
 - data/examples/texts/republican_nomination.txt +0 -26
 - data/examples/texts/to_infinity_and_beyond.txt +0 -15
 - data/lib/treat/entities/text.rb +0 -7
 - data/lib/treat/extractors/key_sentences/topics_frequency.rb +0 -49
 - data/lib/treat/formatters/cleaners/html.rb +0 -17
 
| 
         @@ -2,62 +2,54 @@ module Treat 
     | 
|
| 
       2 
2 
     | 
    
         
             
              module Formatters
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Visualizers
         
     | 
| 
       4 
4 
     | 
    
         
             
                  class Dot
         
     | 
| 
       5 
     | 
    
         
            -
                     
     | 
| 
       6 
     | 
    
         
            -
                    BorderColors = {
         
     | 
| 
       7 
     | 
    
         
            -
                      :verb => "#00AABB",
         
     | 
| 
       8 
     | 
    
         
            -
                      :noun => "#FAD4A7",
         
     | 
| 
       9 
     | 
    
         
            -
                      :adverb => '#103585',
         
     | 
| 
       10 
     | 
    
         
            -
                      :adjective => '#D21D54'
         
     | 
| 
       11 
     | 
    
         
            -
                    }
         
     | 
| 
      
 5 
     | 
    
         
            +
                    DefaultOptions = {colors: {}, :features => :all}
         
     | 
| 
       12 
6 
     | 
    
         
             
                    # Create the top-most graph structure
         
     | 
| 
       13 
7 
     | 
    
         
             
                    # and delegate the creation of the graph
         
     | 
| 
       14 
8 
     | 
    
         
             
                    # nodes to to_dot.
         
     | 
| 
       15 
9 
     | 
    
         
             
                    def self.visualize(entity, options = {})
         
     | 
| 
      
 10 
     | 
    
         
            +
                      options = DefaultOptions.merge(options)
         
     | 
| 
       16 
11 
     | 
    
         
             
                      string = "graph {"
         
     | 
| 
       17 
     | 
    
         
            -
                      string << self.to_dot(entity)
         
     | 
| 
      
 12 
     | 
    
         
            +
                      string << self.to_dot(entity, options)
         
     | 
| 
       18 
13 
     | 
    
         
             
                      string << "\n}"
         
     | 
| 
       19 
14 
     | 
    
         
             
                    end
         
     | 
| 
       20 
15 
     | 
    
         
             
                    # dot -Tpdf test4.dot > test4.pdf
         
     | 
| 
       21 
     | 
    
         
            -
                    def self.to_dot(entity)
         
     | 
| 
      
 16 
     | 
    
         
            +
                    def self.to_dot(entity, options)
         
     | 
| 
      
 17 
     | 
    
         
            +
                      # Id
         
     | 
| 
       22 
18 
     | 
    
         
             
                      string = ''
         
     | 
| 
       23 
     | 
    
         
            -
                       
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                         
     | 
| 
       28 
     | 
    
         
            -
                          label = "label=\"#{entity.value.inspect[1..-2]}\","
         
     | 
| 
       29 
     | 
    
         
            -
                        end
         
     | 
| 
      
 19 
     | 
    
         
            +
                      label = ''
         
     | 
| 
      
 20 
     | 
    
         
            +
                      string = "\n#{entity.id} ["
         
     | 
| 
      
 21 
     | 
    
         
            +
                      # Value
         
     | 
| 
      
 22 
     | 
    
         
            +
                      if entity.is_a?(Treat::Entities::Token)
         
     | 
| 
      
 23 
     | 
    
         
            +
                        label = entity.to_s
         
     | 
| 
       30 
24 
     | 
    
         
             
                      else
         
     | 
| 
       31 
     | 
    
         
            -
                         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
                           
     | 
| 
       34 
     | 
    
         
            -
                        else
         
     | 
| 
       35 
     | 
    
         
            -
                          label = "label=\"#{cc(cl(entity.class))}\","
         
     | 
| 
      
 25 
     | 
    
         
            +
                        label = entity.type.to_s.capitalize + " "
         
     | 
| 
      
 26 
     | 
    
         
            +
                        if entity.is_leaf?
         
     | 
| 
      
 27 
     | 
    
         
            +
                          label = entity.short_value.gsub(' [...]', " [...] \\n")
         
     | 
| 
       36 
28 
     | 
    
         
             
                        end
         
     | 
| 
       37 
29 
     | 
    
         
             
                      end
         
     | 
| 
       38 
     | 
    
         
            -
                       
     | 
| 
      
 30 
     | 
    
         
            +
                      # Features
         
     | 
| 
       39 
31 
     | 
    
         
             
                      if entity.has_features?
         
     | 
| 
       40 
     | 
    
         
            -
                         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
                           
     | 
| 
       43 
     | 
    
         
            -
                             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
      
 32 
     | 
    
         
            +
                        unless options[:features] == :none
         
     | 
| 
      
 33 
     | 
    
         
            +
                          label << "\\n"
         
     | 
| 
      
 34 
     | 
    
         
            +
                          entity.features.each do |feature, value|
         
     | 
| 
      
 35 
     | 
    
         
            +
                            if options[:features] == :all ||
         
     | 
| 
      
 36 
     | 
    
         
            +
                              options[:features].include?(feature)
         
     | 
| 
      
 37 
     | 
    
         
            +
                              if value.is_a?(Treat::Entities::Entity)
         
     | 
| 
      
 38 
     | 
    
         
            +
                                label << "\\n#{feature}=\\\"*#{value.id}\\\","
         
     | 
| 
      
 39 
     | 
    
         
            +
                              else
         
     | 
| 
      
 40 
     | 
    
         
            +
                                label << "\\n#{feature}=\\\"#{value}\\\","
         
     | 
| 
      
 41 
     | 
    
         
            +
                              end
         
     | 
| 
      
 42 
     | 
    
         
            +
                            end
         
     | 
| 
       46 
43 
     | 
    
         
             
                          end
         
     | 
| 
       47 
44 
     | 
    
         
             
                        end
         
     | 
| 
       48 
     | 
    
         
            -
                        string = string[0..-2]
         
     | 
| 
       49 
     | 
    
         
            -
                        string << "]"
         
     | 
| 
       50 
     | 
    
         
            -
                      else
         
     | 
| 
       51 
     | 
    
         
            -
                        string << "#{label[0..-2]}]"
         
     | 
| 
       52 
45 
     | 
    
         
             
                      end
         
     | 
| 
      
 46 
     | 
    
         
            +
                      label = label[0..-2] if label[-1] == ','
         
     | 
| 
      
 47 
     | 
    
         
            +
                      string << "label=\"#{label}\"]"
         
     | 
| 
      
 48 
     | 
    
         
            +
                      # Parent-child relationships.
         
     | 
| 
       53 
49 
     | 
    
         
             
                      if entity.has_parent?
         
     | 
| 
       54 
50 
     | 
    
         
             
                        string << "\n#{entity.parent.id} -- #{entity.id};"
         
     | 
| 
       55 
51 
     | 
    
         
             
                      end
         
     | 
| 
       56 
     | 
    
         
            -
                       
     | 
| 
       57 
     | 
    
         
            -
                        entity.each do |child|
         
     | 
| 
       58 
     | 
    
         
            -
                          string << self.to_dot(child)
         
     | 
| 
       59 
     | 
    
         
            -
                        end
         
     | 
| 
       60 
     | 
    
         
            -
                      end
         
     | 
| 
      
 52 
     | 
    
         
            +
                      # Edges.
         
     | 
| 
       61 
53 
     | 
    
         
             
                      if entity.has_edges?
         
     | 
| 
       62 
54 
     | 
    
         
             
                        entity.edges.each_pair do |target, type|
         
     | 
| 
       63 
55 
     | 
    
         
             
                          string << "\n#{entity.id} -- #{target}"
         
     | 
| 
         @@ -65,6 +57,12 @@ module Treat 
     | 
|
| 
       65 
57 
     | 
    
         
             
                          string << "arrowhead=\"odiamond\"]"
         
     | 
| 
       66 
58 
     | 
    
         
             
                        end
         
     | 
| 
       67 
59 
     | 
    
         
             
                      end
         
     | 
| 
      
 60 
     | 
    
         
            +
                      # Recurse.
         
     | 
| 
      
 61 
     | 
    
         
            +
                      if entity.has_children?
         
     | 
| 
      
 62 
     | 
    
         
            +
                        entity.each do |child|
         
     | 
| 
      
 63 
     | 
    
         
            +
                          string << self.to_dot(child, options)
         
     | 
| 
      
 64 
     | 
    
         
            +
                        end
         
     | 
| 
      
 65 
     | 
    
         
            +
                      end
         
     | 
| 
       68 
66 
     | 
    
         
             
                      string
         
     | 
| 
       69 
67 
     | 
    
         
             
                    end
         
     | 
| 
       70 
68 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -1,7 +1,11 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Formatters
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Visualizers
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # Handles the call to inspect.
         
     | 
| 
       4 
5 
     | 
    
         
             
                  class Inspect
         
     | 
| 
      
 6 
     | 
    
         
            +
                    # Return a terminal-friendly visualization of an entity.
         
     | 
| 
      
 7 
     | 
    
         
            +
                    # 
         
     | 
| 
      
 8 
     | 
    
         
            +
                    # Options: none.
         
     | 
| 
       5 
9 
     | 
    
         
             
                    def self.visualize(entity, options = {})
         
     | 
| 
       6 
10 
     | 
    
         
             
                      s = "#{entity.class.to_s.split('::')[-1]} (#{entity.id.to_s})"
         
     | 
| 
       7 
11 
     | 
    
         
             
                      unless caller_method == :inspect
         
     | 
| 
         @@ -2,11 +2,26 @@ module Treat 
     | 
|
| 
       2 
2 
     | 
    
         
             
              module Formatters
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Visualizers
         
     | 
| 
       4 
4 
     | 
    
         
             
                  class ShortValue
         
     | 
| 
      
 5 
     | 
    
         
            +
                    # Default options for the visualizer.
         
     | 
| 
      
 6 
     | 
    
         
            +
                    DefaultOptions = { max_words: 6, max_length: 30 }
         
     | 
| 
      
 7 
     | 
    
         
            +
                    # Returns the text value of an entity, shortend
         
     | 
| 
      
 8 
     | 
    
         
            +
                    # with [..] if the value is longer than :max_words
         
     | 
| 
      
 9 
     | 
    
         
            +
                    # or longer than :max_length.
         
     | 
| 
      
 10 
     | 
    
         
            +
                    #
         
     | 
| 
      
 11 
     | 
    
         
            +
                    # Options:
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # - (Integer) :max_words => the maximum number
         
     | 
| 
      
 13 
     | 
    
         
            +
                    # of words in an entity before it is shortened.
         
     | 
| 
      
 14 
     | 
    
         
            +
                    # - (Integer) :max_length => the maximum number
         
     | 
| 
      
 15 
     | 
    
         
            +
                    # of characters in an entity before it is shortened.s
         
     | 
| 
       5 
16 
     | 
    
         
             
                    def self.visualize(entity, options = {})
         
     | 
| 
       6 
     | 
    
         
            -
                      options 
     | 
| 
      
 17 
     | 
    
         
            +
                      options = DefaultOptions.merge(options)
         
     | 
| 
       7 
18 
     | 
    
         
             
                      words = entity.to_s.split(' ')
         
     | 
| 
       8 
     | 
    
         
            -
                       
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
      
 19 
     | 
    
         
            +
                      if words.size < options[:max_words] || 
         
     | 
| 
      
 20 
     | 
    
         
            +
                        entity.to_s.length < options[:max_length]
         
     | 
| 
      
 21 
     | 
    
         
            +
                        entity.to_s 
         
     | 
| 
      
 22 
     | 
    
         
            +
                      else
         
     | 
| 
      
 23 
     | 
    
         
            +
                        words[0..2].join(' ') + ' [...] ' + words[-3..-1].join(' ')
         
     | 
| 
      
 24 
     | 
    
         
            +
                      end
         
     | 
| 
       10 
25 
     | 
    
         
             
                    end
         
     | 
| 
       11 
26 
     | 
    
         
             
                  end
         
     | 
| 
       12 
27 
     | 
    
         
             
                end
         
     | 
| 
         @@ -5,24 +5,29 @@ module Treat 
     | 
|
| 
       5 
5 
     | 
    
         
             
                  # an entity in standoff format; for example:
         
     | 
| 
       6 
6 
     | 
    
         
             
                  # (S (NP John) (VP has (VP come))).
         
     | 
| 
       7 
7 
     | 
    
         
             
                  class Standoff
         
     | 
| 
       8 
     | 
    
         
            -
                     
     | 
| 
      
 8 
     | 
    
         
            +
                    # Default options for the visualizer.
         
     | 
| 
      
 9 
     | 
    
         
            +
                    DefaultOptions = { indent: 0 }
         
     | 
| 
      
 10 
     | 
    
         
            +
                    # A lambda to recursively visualize the children
         
     | 
| 
      
 11 
     | 
    
         
            +
                    # of an entity.
         
     | 
| 
      
 12 
     | 
    
         
            +
                    Recurse = lambda do |entity, options|
         
     | 
| 
       9 
13 
     | 
    
         
             
                      v = ''
         
     | 
| 
       10 
14 
     | 
    
         
             
                      entity.each { |child| v += visualize(child, options) }
         
     | 
| 
       11 
15 
     | 
    
         
             
                      v
         
     | 
| 
       12 
16 
     | 
    
         
             
                    end
         
     | 
| 
       13 
17 
     | 
    
         
             
                    # Visualize the entity using standoff notation.
         
     | 
| 
       14 
     | 
    
         
            -
                    # This can only be called on sentences 
     | 
| 
       15 
     | 
    
         
            -
                    # is not a suitable format to  
     | 
| 
       16 
     | 
    
         
            -
                    #  
     | 
| 
      
 18 
     | 
    
         
            +
                    # This can only be called on sentences and smaller
         
     | 
| 
      
 19 
     | 
    
         
            +
                    # entities, as it is not a suitable format to 
         
     | 
| 
      
 20 
     | 
    
         
            +
                    # represent larger entities.
         
     | 
| 
       17 
21 
     | 
    
         
             
                    def self.visualize(entity, options = {})
         
     | 
| 
       18 
     | 
    
         
            -
                      options =  
     | 
| 
      
 22 
     | 
    
         
            +
                      options = DefaultOptions.merge(options)
         
     | 
| 
       19 
23 
     | 
    
         
             
                      value = '';  spaces = ''
         
     | 
| 
       20 
24 
     | 
    
         
             
                      options[:indent].times { spaces << '   '}
         
     | 
| 
       21 
25 
     | 
    
         
             
                      options[:indent] += 1
         
     | 
| 
       22 
26 
     | 
    
         
             
                      if entity.is_a?(Treat::Entities::Token)
         
     | 
| 
       23 
27 
     | 
    
         
             
                        value += "#{spaces}(#{entity.tag} #{entity.value})"
         
     | 
| 
       24 
28 
     | 
    
         
             
                      elsif entity.is_a?(Treat::Entities::Constituent)
         
     | 
| 
       25 
     | 
    
         
            -
                         
     | 
| 
      
 29 
     | 
    
         
            +
                        tag = entity.has?(:tag) ? entity.tag : ''
         
     | 
| 
      
 30 
     | 
    
         
            +
                        value += ("#{spaces}(#{tag}\n" +
         
     | 
| 
       26 
31 
     | 
    
         
             
                        "#{Recurse.call(entity, options)})\n")
         
     | 
| 
       27 
32 
     | 
    
         
             
                      elsif entity.is_a?(Treat::Entities::Sentence)
         
     | 
| 
       28 
33 
     | 
    
         
             
                        value += ("#{spaces}(S\n" +
         
     | 
| 
         @@ -1,11 +1,15 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Formatters
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Visualizers
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # This class generates an ASCII representation
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # of a tree of entities.
         
     | 
| 
       4 
6 
     | 
    
         
             
                  class Tree
         
     | 
| 
      
 7 
     | 
    
         
            +
                    # Default options for the visualizer.
         
     | 
| 
      
 8 
     | 
    
         
            +
                    DefaultOptions = { indent: 0 }
         
     | 
| 
       5 
9 
     | 
    
         
             
                    # Obtain a plain text tree representation 
         
     | 
| 
       6 
10 
     | 
    
         
             
                    # of the entity.
         
     | 
| 
       7 
11 
     | 
    
         
             
                    def self.visualize(entity, options = {})
         
     | 
| 
       8 
     | 
    
         
            -
                      options =  
     | 
| 
      
 12 
     | 
    
         
            +
                      options = DefaultOptions.merge(options)
         
     | 
| 
       9 
13 
     | 
    
         
             
                      string = ''
         
     | 
| 
       10 
14 
     | 
    
         
             
                      if entity.has_children?
         
     | 
| 
       11 
15 
     | 
    
         
             
                        spacer = '--'
         
     | 
| 
         @@ -3,10 +3,15 @@ module Treat 
     | 
|
| 
       3 
3 
     | 
    
         
             
                module Visualizers
         
     | 
| 
       4 
4 
     | 
    
         
             
                  # Creates a plain text visualization of an entity.
         
     | 
| 
       5 
5 
     | 
    
         
             
                  class Txt
         
     | 
| 
      
 6 
     | 
    
         
            +
                    # The default options for the visualizer.
         
     | 
| 
      
 7 
     | 
    
         
            +
                    DefaultOptions = { sep: ' ' }
         
     | 
| 
       6 
8 
     | 
    
         
             
                    # Obtain a plain text visualization of the entity,
         
     | 
| 
       7 
9 
     | 
    
         
             
                    # with no additional information.
         
     | 
| 
      
 10 
     | 
    
         
            +
                    # 
         
     | 
| 
      
 11 
     | 
    
         
            +
                    # Options:
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # (String) :sep => the separator to use between words.
         
     | 
| 
       8 
13 
     | 
    
         
             
                    def self.visualize(entity, options = {})
         
     | 
| 
       9 
     | 
    
         
            -
                      options 
     | 
| 
      
 14 
     | 
    
         
            +
                      options = DefaultOptions.merge(options)
         
     | 
| 
       10 
15 
     | 
    
         
             
                      return entity.value if !entity.has_children?
         
     | 
| 
       11 
16 
     | 
    
         
             
                      value = ''
         
     | 
| 
       12 
17 
     | 
    
         
             
                      entity.each do |child|
         
     | 
    
        data/lib/treat/formatters.rb
    CHANGED
    
    
    
        data/lib/treat/group.rb
    CHANGED
    
    | 
         @@ -61,14 +61,15 @@ module Treat 
     | 
|
| 
       61 
61 
     | 
    
         
             
                  end
         
     | 
| 
       62 
62 
     | 
    
         
             
                  is_target
         
     | 
| 
       63 
63 
     | 
    
         
             
                end
         
     | 
| 
      
 64 
     | 
    
         
            +
                # Cache the list of adaptors to improve performance.
         
     | 
| 
      
 65 
     | 
    
         
            +
                @@list = {}
         
     | 
| 
       64 
66 
     | 
    
         
             
                # Populates once the list of the adaptors in the group
         
     | 
| 
       65 
67 
     | 
    
         
             
                # by crawling the filesystem.
         
     | 
| 
       66 
     | 
    
         
            -
                @@list = {}
         
     | 
| 
       67 
68 
     | 
    
         
             
                def list
         
     | 
| 
       68 
69 
     | 
    
         
             
                  mod = ucc(cl(self))
         
     | 
| 
       69 
70 
     | 
    
         
             
                  if @@list[mod].nil?
         
     | 
| 
       70 
71 
     | 
    
         
             
                    @@list[mod] = []
         
     | 
| 
       71 
     | 
    
         
            -
                    dirs = Dir 
     | 
| 
      
 72 
     | 
    
         
            +
                    dirs = Dir.glob("#{Treat.lib}/treat/*/#{mod}/*.rb")
         
     | 
| 
       72 
73 
     | 
    
         
             
                    dirs.each do |file|
         
     | 
| 
       73 
74 
     | 
    
         
             
                      @@list[mod] <<
         
     | 
| 
       74 
75 
     | 
    
         
             
                      :"#{file.split('/')[-1][0..-4]}"
         
     | 
| 
         @@ -79,7 +80,7 @@ module Treat 
     | 
|
| 
       79 
80 
     | 
    
         
             
                # Get constants in this module, excluding those
         
     | 
| 
       80 
81 
     | 
    
         
             
                # defined by parent modules.
         
     | 
| 
       81 
82 
     | 
    
         
             
                def const_get(const); super(const, false); end
         
     | 
| 
       82 
     | 
    
         
            -
                #  
     | 
| 
      
 83 
     | 
    
         
            +
                # Lazy load the classes in the group.
         
     | 
| 
       83 
84 
     | 
    
         
             
                def const_missing(const)
         
     | 
| 
       84 
85 
     | 
    
         
             
                  bits = self.ancestors[0].to_s.split('::')
         
     | 
| 
       85 
86 
     | 
    
         
             
                  bits.collect! { |bit| ucc(bit) }
         
     | 
| 
         @@ -1,43 +1,40 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Inflectors
         
     | 
| 
       3 
3 
     | 
    
         
             
                module CardinalWords
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # This class is a wrapper for the functions included
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # in the 'linguistics' gem that allow to describe a
         
     | 
| 
      
 6 
     | 
    
         
            +
                  # number in words in cardinal form.
         
     | 
| 
      
 7 
     | 
    
         
            +
                  #
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # Project website: http://deveiate.org/projects/Linguistics/
         
     | 
| 
       4 
9 
     | 
    
         
             
                  class Linguistics
         
     | 
| 
      
 10 
     | 
    
         
            +
                    # Require the 'linguistics' gem.
         
     | 
| 
       5 
11 
     | 
    
         
             
                    silence_warnings { require 'linguistics' }
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # Return the description of a cardinal number in words.
         
     | 
| 
       6 
13 
     | 
    
         
             
                    # 
         
     | 
| 
       7 
14 
     | 
    
         
             
                    # Options:
         
     | 
| 
       8 
15 
     | 
    
         
             
                    #
         
     | 
| 
       9 
     | 
    
         
            -
                    # :group => Controls how many numbers at a time are 
         
     | 
| 
      
 16 
     | 
    
         
            +
                    # - :group => Controls how many numbers at a time are 
         
     | 
| 
       10 
17 
     | 
    
         
             
                    # grouped together. Valid values are 0 (normal grouping), 
         
     | 
| 
       11 
18 
     | 
    
         
             
                    # 1 (single-digit grouping, e.g., “one, two, three, four”), 
         
     | 
| 
       12 
19 
     | 
    
         
             
                    # 2 (double-digit grouping, e.g., “twelve, thirty-four”, or 
         
     | 
| 
       13 
20 
     | 
    
         
             
                    # 3 (triple-digit grouping, e.g., “one twenty-three, four”).
         
     | 
| 
       14 
     | 
    
         
            -
                    # :comma => Set the character/s used to separate word groups. 
         
     | 
| 
      
 21 
     | 
    
         
            +
                    # - :comma => Set the character/s used to separate word groups. 
         
     | 
| 
       15 
22 
     | 
    
         
             
                    # Defaults to ", ".
         
     | 
| 
       16 
     | 
    
         
            -
                    # :and => Set the word and/or characters used where ' and ' 
         
     | 
| 
      
 23 
     | 
    
         
            +
                    # - :and => Set the word and/or characters used where ' and ' 
         
     | 
| 
       17 
24 
     | 
    
         
             
                    # (the default) is normally used. Setting :and to ' ', for 
         
     | 
| 
       18 
25 
     | 
    
         
             
                    # example, will cause 2556 to be returned as “two-thousand, 
         
     | 
| 
       19 
26 
     | 
    
         
             
                    # five hundred fifty-six” instead of “two-thousand, five 
         
     | 
| 
       20 
27 
     | 
    
         
             
                    # hundred and fifty-six”.
         
     | 
| 
       21 
     | 
    
         
            -
                    # :zero => Set the word used to represent the numeral 0 in 
         
     | 
| 
      
 28 
     | 
    
         
            +
                    # - :zero => Set the word used to represent the numeral 0 in 
         
     | 
| 
       22 
29 
     | 
    
         
             
                    # the result. 'zero' is the default.
         
     | 
| 
       23 
     | 
    
         
            -
                    # :decimal => Set the translation of any decimal points in 
         
     | 
| 
      
 30 
     | 
    
         
            +
                    # - :decimal => Set the translation of any decimal points in 
         
     | 
| 
       24 
31 
     | 
    
         
             
                    # the number; the default is 'point'.
         
     | 
| 
       25 
     | 
    
         
            -
                    # :asArray If set to a true value, the number will be returned 
         
     | 
| 
      
 32 
     | 
    
         
            +
                    # - :asArray If set to a true value, the number will be returned 
         
     | 
| 
       26 
33 
     | 
    
         
             
                    # as an array of word groups instead of a String.
         
     | 
| 
       27 
34 
     | 
    
         
             
                    # 
         
     | 
| 
       28 
35 
     | 
    
         
             
                    # More specific options when using :type => :ordinal:
         
     | 
| 
       29 
     | 
    
         
            -
                    # 
         
     | 
| 
       30 
     | 
    
         
            -
                    # 
         
     | 
| 
       31 
36 
     | 
    
         
             
                    def self.cardinal_words(entity, options = {})
         
     | 
| 
       32 
     | 
    
         
            -
                       
     | 
| 
       33 
     | 
    
         
            -
                        l = entity.language.to_s.upcase
         
     | 
| 
       34 
     | 
    
         
            -
                        delegate = nil
         
     | 
| 
       35 
     | 
    
         
            -
                        silence_warnings { delegate = ::Linguistics.const_get(l) }
         
     | 
| 
       36 
     | 
    
         
            -
                      rescue RuntimeError
         
     | 
| 
       37 
     | 
    
         
            -
                        raise "Ruby Linguistics does not have a module " + 
         
     | 
| 
       38 
     | 
    
         
            -
                        " installed for the #{entity.language} language."
         
     | 
| 
       39 
     | 
    
         
            -
                      end
         
     | 
| 
       40 
     | 
    
         
            -
                      silence_warnings { delegate.numwords(entity.to_s, options) }
         
     | 
| 
      
 37 
     | 
    
         
            +
                      silence_warnings { ::Linguistics::EN.numwords(entity.to_s, options) }
         
     | 
| 
       41 
38 
     | 
    
         
             
                    end
         
     | 
| 
       42 
39 
     | 
    
         
             
                  end
         
     | 
| 
       43 
40 
     | 
    
         
             
                end
         
     | 
| 
         @@ -1,15 +1,28 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Inflectors
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Conjugations
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # This class is a wrapper for the functions included
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # in the 'linguistics' gem that allow to conjugate verbs.
         
     | 
| 
      
 6 
     | 
    
         
            +
                  #
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # Project website: http://deveiate.org/projects/Linguistics/
         
     | 
| 
       4 
8 
     | 
    
         
             
                  class Linguistics
         
     | 
| 
       5 
9 
     | 
    
         
             
                    silence_warnings { require 'linguistics' }
         
     | 
| 
       6 
     | 
    
         
            -
                     
     | 
| 
      
 10 
     | 
    
         
            +
                    # Conjugate a verb using ruby linguistics with the specified
         
     | 
| 
      
 11 
     | 
    
         
            +
                    # mode, tense, count and person.
         
     | 
| 
      
 12 
     | 
    
         
            +
                    #
         
     | 
| 
      
 13 
     | 
    
         
            +
                    # Options:
         
     | 
| 
      
 14 
     | 
    
         
            +
                    #
         
     | 
| 
      
 15 
     | 
    
         
            +
                    # - (Symbol) :mode => :infinitive, :indicative, :subjunctive, :participle
         
     | 
| 
      
 16 
     | 
    
         
            +
                    # - (Symbol) :tense => :past, :present, :future
         
     | 
| 
      
 17 
     | 
    
         
            +
                    # - (Symbol) :count => :singular, :plural
         
     | 
| 
      
 18 
     | 
    
         
            +
                    # - (Symbol) :person => :first, :second, :third
         
     | 
| 
      
 19 
     | 
    
         
            +
                    def self.conjugations(entity, parameters)
         
     | 
| 
       7 
20 
     | 
    
         
             
                      begin
         
     | 
| 
       8 
21 
     | 
    
         
             
                        l = entity.language.to_s.upcase
         
     | 
| 
       9 
22 
     | 
    
         
             
                        delegate = nil
         
     | 
| 
       10 
23 
     | 
    
         
             
                        silence_warnings { delegate = ::Linguistics.const_get(l) }
         
     | 
| 
       11 
24 
     | 
    
         
             
                      rescue RuntimeError
         
     | 
| 
       12 
     | 
    
         
            -
                        raise "Ruby Linguistics does not have a module " + 
     | 
| 
      
 25 
     | 
    
         
            +
                        raise "Ruby Linguistics does not have a module " +
         
     | 
| 
       13 
26 
     | 
    
         
             
                        " installed for the #{entity.language} language."
         
     | 
| 
       14 
27 
     | 
    
         
             
                      end
         
     | 
| 
       15 
28 
     | 
    
         
             
                      if parameters[:mode] == :infinitive
         
     | 
| 
         @@ -27,4 +40,4 @@ module Treat 
     | 
|
| 
       27 
40 
     | 
    
         
             
                  end
         
     | 
| 
       28 
41 
     | 
    
         
             
                end
         
     | 
| 
       29 
42 
     | 
    
         
             
              end
         
     | 
| 
       30 
     | 
    
         
            -
            end
         
     | 
| 
      
 43 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,24 +1,35 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Inflectors
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Declensions
         
     | 
| 
       4 
     | 
    
         
            -
                   
     | 
| 
       5 
     | 
    
         
            -
                  #  
     | 
| 
       6 
     | 
    
         
            -
                  #  
     | 
| 
      
 4 
     | 
    
         
            +
                  # This class is a wrapper for the functions included
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # in the 'linguistics' gem that allow to obtain the
         
     | 
| 
      
 6 
     | 
    
         
            +
                  # declensions of a word.
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # 
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # Project website: http://deveiate.org/projects/Linguistics/
         
     | 
| 
       7 
9 
     | 
    
         
             
                  class Linguistics
         
     | 
| 
       8 
     | 
    
         
            -
                     
     | 
| 
      
 10 
     | 
    
         
            +
                    # Require Ruby Linguistics
         
     | 
| 
      
 11 
     | 
    
         
            +
                    silence_warnings { require 'linguistics' }
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # Retrieve a declension of a word using the 'linguistics' gem.
         
     | 
| 
      
 13 
     | 
    
         
            +
                    # 
         
     | 
| 
      
 14 
     | 
    
         
            +
                    # Options:
         
     | 
| 
      
 15 
     | 
    
         
            +
                    #
         
     | 
| 
      
 16 
     | 
    
         
            +
                    # - (Identifier) :count => :singular, :plural
         
     | 
| 
      
 17 
     | 
    
         
            +
                    def self.declensions(entity, options = {})
         
     | 
| 
       9 
18 
     | 
    
         
             
                      begin
         
     | 
| 
       10 
19 
     | 
    
         
             
                        l = entity.language.to_s.upcase
         
     | 
| 
       11 
20 
     | 
    
         
             
                        delegate = nil
         
     | 
| 
       12 
21 
     | 
    
         
             
                        silence_warnings { delegate = ::Linguistics.const_get(l) }
         
     | 
| 
       13 
22 
     | 
    
         
             
                      rescue RuntimeError
         
     | 
| 
       14 
     | 
    
         
            -
                        raise "Ruby Linguistics does not have a module " + 
     | 
| 
      
 23 
     | 
    
         
            +
                        raise "Ruby Linguistics does not have a module " +
         
     | 
| 
       15 
24 
     | 
    
         
             
                        " installed for the #{entity.language} language."
         
     | 
| 
       16 
25 
     | 
    
         
             
                      end
         
     | 
| 
       17 
26 
     | 
    
         
             
                      string = entity.to_s
         
     | 
| 
       18 
27 
     | 
    
         
             
                      if options[:count] == :plural
         
     | 
| 
       19 
28 
     | 
    
         
             
                        if entity.has?(:category) &&
         
     | 
| 
       20 
29 
     | 
    
         
             
                          [:noun, :adjective, :verb].include?(entity.category)
         
     | 
| 
       21 
     | 
    
         
            -
                          silence_warnings  
     | 
| 
      
 30 
     | 
    
         
            +
                          silence_warnings do
         
     | 
| 
      
 31 
     | 
    
         
            +
                            delegate.send(:"plural_#{entity.category}", string)
         
     | 
| 
      
 32 
     | 
    
         
            +
                          end
         
     | 
| 
       22 
33 
     | 
    
         
             
                        else
         
     | 
| 
       23 
34 
     | 
    
         
             
                          silence_warnings { delegate.plural(string) }
         
     | 
| 
       24 
35 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -1,19 +1,18 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Inflectors
         
     | 
| 
       3 
3 
     | 
    
         
             
                module OrdinalWords
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # This class is a wrapper for the functions included
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # in the 'linguistics' gem that allow to describe a
         
     | 
| 
      
 6 
     | 
    
         
            +
                  # number in words in ordinal form.
         
     | 
| 
      
 7 
     | 
    
         
            +
                  #
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # Project website: http://deveiate.org/projects/Linguistics/
         
     | 
| 
       4 
9 
     | 
    
         
             
                  class Linguistics
         
     | 
| 
      
 10 
     | 
    
         
            +
                    # Require Ruby Linguistics.
         
     | 
| 
       5 
11 
     | 
    
         
             
                    silence_warnings { require 'linguistics' }
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # Desribe a number in words in ordinal form, using the
         
     | 
| 
      
 13 
     | 
    
         
            +
                    # 'linguistics' gem.
         
     | 
| 
       6 
14 
     | 
    
         
             
                    def self.ordinal_words(number, options = {})
         
     | 
| 
       7 
     | 
    
         
            -
                       
     | 
| 
       8 
     | 
    
         
            -
                        l = number.language.to_s.upcase
         
     | 
| 
       9 
     | 
    
         
            -
                        delegate = nil
         
     | 
| 
       10 
     | 
    
         
            -
                        silence_warnings { delegate = ::Linguistics.const_get(l) }
         
     | 
| 
       11 
     | 
    
         
            -
                      rescue RuntimeError
         
     | 
| 
       12 
     | 
    
         
            -
                        lang = Treat::Languages.describe(number.language)
         
     | 
| 
       13 
     | 
    
         
            -
                        raise "Ruby Linguistics does not have a module " +
         
     | 
| 
       14 
     | 
    
         
            -
                        " installed for the #{lang} language."
         
     | 
| 
       15 
     | 
    
         
            -
                      end
         
     | 
| 
       16 
     | 
    
         
            -
                      silence_warnings { delegate.ordinate(number.to_s) }
         
     | 
| 
      
 15 
     | 
    
         
            +
                      silence_warnings { ::Linguistics::EN.ordinate(number.to_s) }
         
     | 
| 
       17 
16 
     | 
    
         
             
                    end
         
     | 
| 
       18 
17 
     | 
    
         
             
                  end
         
     | 
| 
       19 
18 
     | 
    
         
             
                end
         
     | 
| 
         @@ -2,16 +2,20 @@ module Treat 
     | 
|
| 
       2 
2 
     | 
    
         
             
              module Inflectors
         
     | 
| 
       3 
3 
     | 
    
         
             
                module Stem
         
     | 
| 
       4 
4 
     | 
    
         
             
                  # Stem a word using a native Ruby implementation of the
         
     | 
| 
       5 
     | 
    
         
            -
                  # Porter stemming algorithm, ported to Ruby from  
     | 
| 
       6 
     | 
    
         
            -
                  # version coded up in Perl.
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # Porter stemming algorithm, ported to Ruby from a 
         
     | 
| 
      
 6 
     | 
    
         
            +
                  # version coded up in Perl. This is a simplified 
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # implementation; for a true and fast Porter stemmer,
         
     | 
| 
      
 8 
     | 
    
         
            +
                  # see Treat::Inflectors::Stem::PorterC.
         
     | 
| 
       7 
9 
     | 
    
         
             
                  #
         
     | 
| 
       8 
10 
     | 
    
         
             
                  # Authored by Ray Pereda (raypereda@hotmail.com).
         
     | 
| 
      
 11 
     | 
    
         
            +
                  # Unknown license.
         
     | 
| 
       9 
12 
     | 
    
         
             
                  #
         
     | 
| 
       10 
13 
     | 
    
         
             
                  # Original paper: Porter, 1980. An algorithm for suffix stripping, 
         
     | 
| 
       11 
14 
     | 
    
         
             
                  # Program, Vol. 14, no. 3, pp 130-137,
         
     | 
| 
       12 
15 
     | 
    
         
             
                  # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
         
     | 
| 
       13 
16 
     | 
    
         
             
                  class Porter
         
     | 
| 
       14 
17 
     | 
    
         
             
                    # Returns the stem of a word using a native Porter stemmer.
         
     | 
| 
      
 18 
     | 
    
         
            +
                    #
         
     | 
| 
       15 
19 
     | 
    
         
             
                    # Options: none.
         
     | 
| 
       16 
20 
     | 
    
         
             
                    def self.stem(word, options = {})
         
     | 
| 
       17 
21 
     | 
    
         
             
                      # Copy the word and convert it to a string.
         
     | 
| 
         @@ -9,10 +9,13 @@ module Treat 
     | 
|
| 
       9 
9 
     | 
    
         
             
                  # Program, Vol. 14, no. 3, pp 130-137,
         
     | 
| 
       10 
10 
     | 
    
         
             
                  # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
         
     | 
| 
       11 
11 
     | 
    
         
             
                  class PorterC
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # Require the 'ruby-stemmer' gem.
         
     | 
| 
       12 
13 
     | 
    
         
             
                    silence_warnings { require 'lingua/stemmer' }
         
     | 
| 
      
 14 
     | 
    
         
            +
                    # Remove a conflict between this gem and the 'engtagger' gem.
         
     | 
| 
       13 
15 
     | 
    
         
             
                    ::LinguaStemmer = ::Lingua
         
     | 
| 
       14 
16 
     | 
    
         
             
                    Object.instance_eval { remove_const :Lingua }
         
     | 
| 
       15 
     | 
    
         
            -
                    # Stem the word using  
     | 
| 
      
 17 
     | 
    
         
            +
                    # Stem the word using a full-blown Porter stemmer in C.
         
     | 
| 
      
 18 
     | 
    
         
            +
                    #
         
     | 
| 
       16 
19 
     | 
    
         
             
                    # Options: none.
         
     | 
| 
       17 
20 
     | 
    
         
             
                    def self.stem(word, options = {})
         
     | 
| 
       18 
21 
     | 
    
         
             
                      silence_warnings { ::LinguaStemmer.stemmer(word.to_s) }
         
     | 
| 
         @@ -9,10 +9,10 @@ module Treat 
     | 
|
| 
       9 
9 
     | 
    
         
             
                  # groups of rules: the first to clean the tokens, and 
         
     | 
| 
       10 
10 
     | 
    
         
             
                  # the second to alter suffixes."
         
     | 
| 
       11 
11 
     | 
    
         
             
                  #
         
     | 
| 
       12 
     | 
    
         
            -
                  # 
     | 
| 
       13 
     | 
    
         
            -
                  # 
     | 
| 
       14 
     | 
    
         
            -
                  # 
     | 
| 
       15 
     | 
    
         
            -
                  # 
     | 
| 
      
 12 
     | 
    
         
            +
                  # Project website: https://github.com/ealdent/uea-stemmer
         
     | 
| 
      
 13 
     | 
    
         
            +
                  # Original paper: Jenkins, Marie-Claire, Smith, Dan, 
         
     | 
| 
      
 14 
     | 
    
         
            +
                  # Conservative stemming for search and indexing, 2005.
         
     | 
| 
      
 15 
     | 
    
         
            +
                  # http://www.uea.ac.uk/polopoly_fs/1.85493!stemmer25feb.pdf
         
     | 
| 
       16 
16 
     | 
    
         
             
                  class UEA
         
     | 
| 
       17 
17 
     | 
    
         
             
                    # Require the 'uea-stemmer' gem.
         
     | 
| 
       18 
18 
     | 
    
         
             
                    silence_warnings { require 'uea-stemmer' }
         
     | 
| 
         @@ -183,6 +183,22 @@ module Treat 
     | 
|
| 
       183 
183 
     | 
    
         
             
                    ['PRT',	'Particle'],
         
     | 
| 
       184 
184 
     | 
    
         
             
                    ['S',	'Sentence']
         
     | 
| 
       185 
185 
     | 
    
         
             
                  ]
         
     | 
| 
      
 186 
     | 
    
         
            +
                  
         
     | 
| 
      
 187 
     | 
    
         
            +
                  # Maps Enju categories to Treat categories.
         
     | 
| 
      
 188 
     | 
    
         
            +
                  EnjuCatToCategory = {
         
     | 
| 
      
 189 
     | 
    
         
            +
                    'ADJ' => :adjective,
         
     | 
| 
      
 190 
     | 
    
         
            +
                    'ADV' => :adverb,
         
     | 
| 
      
 191 
     | 
    
         
            +
                    'CONJ' => :conjunction,
         
     | 
| 
      
 192 
     | 
    
         
            +
                    'COOD' => :conjunction,
         
     | 
| 
      
 193 
     | 
    
         
            +
                    'C' => :complementizer,
         
     | 
| 
      
 194 
     | 
    
         
            +
                    'D' => :determiner,
         
     | 
| 
      
 195 
     | 
    
         
            +
                    'N' => :noun,
         
     | 
| 
      
 196 
     | 
    
         
            +
                    'P' => :preposition,
         
     | 
| 
      
 197 
     | 
    
         
            +
                    'PN' => :punctuation,
         
     | 
| 
      
 198 
     | 
    
         
            +
                    'SC' => :conjunction,
         
     | 
| 
      
 199 
     | 
    
         
            +
                    'V' => :verb,
         
     | 
| 
      
 200 
     | 
    
         
            +
                    'PRT' => :particle
         
     | 
| 
      
 201 
     | 
    
         
            +
                  }
         
     | 
| 
       186 
202 
     | 
    
         | 
| 
       187 
203 
     | 
    
         
             
                  # Description of the xcat in the Enju output specification.
         
     | 
| 
       188 
204 
     | 
    
         
             
                  EnjuXCatDescription = [
         
     | 
| 
         @@ -1,8 +1,10 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Languages
         
     | 
| 
       3 
3 
     | 
    
         
             
                class English
         
     | 
| 
      
 4 
     | 
    
         
            +
                  
         
     | 
| 
       4 
5 
     | 
    
         
             
                  require 'treat/languages/english/tags'
         
     | 
| 
       5 
6 
     | 
    
         
             
                  require 'treat/languages/english/categories'
         
     | 
| 
      
 7 
     | 
    
         
            +
                  
         
     | 
| 
       6 
8 
     | 
    
         
             
                  Extractors = {
         
     | 
| 
       7 
9 
     | 
    
         
             
                    time: [:chronic],
         
     | 
| 
       8 
10 
     | 
    
         
             
                    topics: [:reuters],
         
     | 
| 
         @@ -11,7 +13,7 @@ module Treat 
     | 
|
| 
       11 
13 
     | 
    
         
             
                  }
         
     | 
| 
       12 
14 
     | 
    
         
             
                  Processors = {
         
     | 
| 
       13 
15 
     | 
    
         
             
                    chunkers: [:txt],
         
     | 
| 
       14 
     | 
    
         
            -
                    parsers: [: 
     | 
| 
      
 16 
     | 
    
         
            +
                    parsers: [:stanford, :enju],
         
     | 
| 
       15 
17 
     | 
    
         
             
                    segmenters: [:tactful, :punkt, :stanford],
         
     | 
| 
       16 
18 
     | 
    
         
             
                    tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
         
     | 
| 
       17 
19 
     | 
    
         
             
                  }
         
     | 
| 
         @@ -28,6 +30,7 @@ module Treat 
     | 
|
| 
       28 
30 
     | 
    
         
             
                    ordinal_words: [:linguistics],
         
     | 
| 
       29 
31 
     | 
    
         
             
                    cardinal_words: [:linguistics]
         
     | 
| 
       30 
32 
     | 
    
         
             
                  }
         
     | 
| 
      
 33 
     | 
    
         
            +
                  
         
     | 
| 
       31 
34 
     | 
    
         
             
                end
         
     | 
| 
       32 
35 
     | 
    
         
             
              end
         
     | 
| 
       33 
36 
     | 
    
         
             
            end
         
     | 
| 
         @@ -4,13 +4,12 @@ module Treat 
     | 
|
| 
       4 
4 
     | 
    
         
             
                  # A class that detects the category of a word from its tag,
         
     | 
| 
       5 
5 
     | 
    
         
             
                  # using the default tagger for the language of the entity.
         
     | 
| 
       6 
6 
     | 
    
         
             
                  class FromTag
         
     | 
| 
       7 
     | 
    
         
            -
                    DefaultOptions = { tagger: nil }
         
     | 
| 
       8 
7 
     | 
    
         
             
                    # Find the category of the current entity.
         
     | 
| 
      
 8 
     | 
    
         
            +
                    # 
         
     | 
| 
       9 
9 
     | 
    
         
             
                    # Options:
         
     | 
| 
       10 
     | 
    
         
            -
                    #  
     | 
| 
       11 
     | 
    
         
            -
                    #  
     | 
| 
      
 10 
     | 
    
         
            +
                    # 
         
     | 
| 
      
 11 
     | 
    
         
            +
                    # - (Symbol) :tagger => force the use of a tagger.
         
     | 
| 
       12 
12 
     | 
    
         
             
                    def self.category(entity, options = {})
         
     | 
| 
       13 
     | 
    
         
            -
                      options = DefaultOptions.merge(options)
         
     | 
| 
       14 
13 
     | 
    
         
             
                      tag = options[:tagger].nil? ? entity.tag : entity.tag(options[:tagger])
         
     | 
| 
       15 
14 
     | 
    
         
             
                      lang = Treat::Languages.get(entity.language)
         
     | 
| 
       16 
15 
     | 
    
         
             
                      cat = lang::WordTagToCategory[tag]
         
     | 
| 
         @@ -21,6 +20,7 @@ module Treat 
     | 
|
| 
       21 
20 
     | 
    
         
             
                        if cat.size == 1
         
     | 
| 
       22 
21 
     | 
    
         
             
                          return cat[0]
         
     | 
| 
       23 
22 
     | 
    
         
             
                        else
         
     | 
| 
      
 23 
     | 
    
         
            +
                          entity.set :tag_set, :penn
         
     | 
| 
       24 
24 
     | 
    
         
             
                          if entity.has?(:tag_set)
         
     | 
| 
       25 
25 
     | 
    
         
             
                            if cat[entity.tag_set]
         
     | 
| 
       26 
26 
     | 
    
         
             
                              return cat[entity.tag_set]
         
     | 
| 
         @@ -27,7 +27,7 @@ module Treat 
     | 
|
| 
       27 
27 
     | 
    
         
             
                    end
         
     | 
| 
       28 
28 
     | 
    
         
             
                    # Return the subject of the sentence|verb.
         
     | 
| 
       29 
29 
     | 
    
         
             
                    def self.subject(entity, options)
         
     | 
| 
       30 
     | 
    
         
            -
                      verb = entity.category == :verb ? 
         
     | 
| 
      
 30 
     | 
    
         
            +
                      verb = (entity.has?(:category) && entity.category == :verb) ? 
         
     | 
| 
       31 
31 
     | 
    
         
             
                      main_verb(entity) : entity.main_verb
         
     | 
| 
       32 
32 
     | 
    
         
             
                      args = []
         
     | 
| 
       33 
33 
     | 
    
         
             
                      main_verb.edges.each_pair do |id,edge|
         
     | 
| 
         @@ -37,7 +37,7 @@ module Treat 
     | 
|
| 
       37 
37 
     | 
    
         
             
                    end
         
     | 
| 
       38 
38 
     | 
    
         
             
                    # Return the object of the sentence|verb.
         
     | 
| 
       39 
39 
     | 
    
         
             
                    def self.object(entity, options)
         
     | 
| 
       40 
     | 
    
         
            -
                      verb = entity.category == :verb ? 
         
     | 
| 
      
 40 
     | 
    
         
            +
                      verb = (entity.has?(:category) && entity.category == :verb) ? 
         
     | 
| 
       41 
41 
     | 
    
         
             
                      main_verb(entity) : entity.main_verb
         
     | 
| 
       42 
42 
     | 
    
         
             
                      if verb.voice == 'passive'
         
     | 
| 
       43 
43 
     | 
    
         
             
                        return
         
     | 
| 
         @@ -50,7 +50,7 @@ module Treat 
     | 
|
| 
       50 
50 
     | 
    
         
             
                    end
         
     | 
| 
       51 
51 
     | 
    
         
             
                    # Find the main verb (shallowest verb in the tree).
         
     | 
| 
       52 
52 
     | 
    
         
             
                    def self.main_verb(entity, options)
         
     | 
| 
       53 
     | 
    
         
            -
                      verbs = entity. 
     | 
| 
      
 53 
     | 
    
         
            +
                      verbs = entity.verbs
         
     | 
| 
       54 
54 
     | 
    
         
             
                      if verbs.empty?
         
     | 
| 
       55 
55 
     | 
    
         
             
                        return
         
     | 
| 
       56 
56 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -52,24 +52,16 @@ module Treat 
     | 
|
| 
       52 
52 
     | 
    
         
             
                    @@tagger = nil
         
     | 
| 
       53 
53 
     | 
    
         
             
                    # Hold the user-set options
         
     | 
| 
       54 
54 
     | 
    
         
             
                    @@options = {}
         
     | 
| 
       55 
     | 
    
         
            -
                    # Hold the default options.
         
     | 
| 
       56 
     | 
    
         
            -
                    DefaultOptions =  {
         
     | 
| 
       57 
     | 
    
         
            -
                      lexicon: nil,
         
     | 
| 
       58 
     | 
    
         
            -
                      lexical_rules: nil, 
         
     | 
| 
       59 
     | 
    
         
            -
                      contextual_rules: nil
         
     | 
| 
       60 
     | 
    
         
            -
                    }
         
     | 
| 
       61 
55 
     | 
    
         
             
                    # Tag words using a native Brill tagger.
         
     | 
| 
       62 
56 
     | 
    
         
             
                    #
         
     | 
| 
       63 
     | 
    
         
            -
                    #  
     | 
| 
      
 57 
     | 
    
         
            +
                    # Options:
         
     | 
| 
      
 58 
     | 
    
         
            +
                    # 
         
     | 
| 
       64 
59 
     | 
    
         
             
                    # :lexicon => String (Lexicon file to use)
         
     | 
| 
       65 
60 
     | 
    
         
             
                    # :lexical_rules => String (Lexical rule file to use)
         
     | 
| 
       66 
61 
     | 
    
         
             
                    # :contextual_rules => String (Contextual rules file to use)
         
     | 
| 
       67 
62 
     | 
    
         
             
                    def self.tag(entity, options = {})
         
     | 
| 
       68 
63 
     | 
    
         
             
                      # Reinitialize the tagger if the options have changed.
         
     | 
| 
       69 
     | 
    
         
            -
                      if options != @@options
         
     | 
| 
       70 
     | 
    
         
            -
                        @@options = DefaultOptions.merge(options)
         
     | 
| 
       71 
     | 
    
         
            -
                        @@tagger = nil # Reset the tagger
         
     | 
| 
       72 
     | 
    
         
            -
                      end
         
     | 
| 
      
 64 
     | 
    
         
            +
                      @@tagger = nil if options != @@options
         
     | 
| 
       73 
65 
     | 
    
         
             
                      # Create the tagger if necessary
         
     | 
| 
       74 
66 
     | 
    
         
             
                      @@tagger ||= ::Brill::Tagger.new(options[:lexicon],
         
     | 
| 
       75 
67 
     | 
    
         
             
                      options[:lexical_rules], options[:contextual_rules])
         
     | 
| 
         @@ -24,9 +24,8 @@ module Treat 
     | 
|
| 
       24 
24 
     | 
    
         
             
                    @@options = {}
         
     | 
| 
       25 
25 
     | 
    
         
             
                    # Hold the default options.
         
     | 
| 
       26 
26 
     | 
    
         
             
                    DefaultOptions =  {
         
     | 
| 
       27 
     | 
    
         
            -
                      unknown_word_tag: ' 
     | 
| 
       28 
     | 
    
         
            -
                      relax: false 
     | 
| 
       29 
     | 
    
         
            -
                      debug: false
         
     | 
| 
      
 27 
     | 
    
         
            +
                      unknown_word_tag: 'FW',
         
     | 
| 
      
 28 
     | 
    
         
            +
                      relax: false
         
     | 
| 
       30 
29 
     | 
    
         
             
                    }
         
     | 
| 
       31 
30 
     | 
    
         
             
                    # Tag the word using a probabilistic model taking
         
     | 
| 
       32 
31 
     | 
    
         
             
                    # into account known words found in a lexicon and
         
     | 
| 
         @@ -34,11 +33,10 @@ module Treat 
     | 
|
| 
       34 
33 
     | 
    
         
             
                    #
         
     | 
| 
       35 
34 
     | 
    
         
             
                    # Options:
         
     | 
| 
       36 
35 
     | 
    
         
             
                    # 
         
     | 
| 
       37 
     | 
    
         
            -
                    # 
     | 
| 
      
 36 
     | 
    
         
            +
                    # - (Boolean) :relax => Relax the Hidden Markov Model: 
         
     | 
| 
       38 
37 
     | 
    
         
             
                    #   this may improve accuracy for uncommon words, 
         
     | 
| 
       39 
38 
     | 
    
         
             
                    #   particularly words used polysemously.
         
     | 
| 
       40 
     | 
    
         
            -
                    # 
     | 
| 
       41 
     | 
    
         
            -
                    #   :unknown_word_tag => (String) Tag for unknown words.
         
     | 
| 
      
 39 
     | 
    
         
            +
                    # - (String) :unknown_word_tag => Tag for unknown words.
         
     | 
| 
       42 
40 
     | 
    
         
             
                    def self.tag(entity, options = {})
         
     | 
| 
       43 
41 
     | 
    
         
             
                      # Reinitialize the tagger if the options have changed.
         
     | 
| 
       44 
42 
     | 
    
         
             
                      if options != @@options
         
     |