treat 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
 - data/lib/treat/config/data/core.rb +3 -1
 - data/lib/treat/config/data/languages/agnostic.rb +1 -1
 - data/lib/treat/core/dsl.rb +12 -44
 - data/lib/treat/version.rb +1 -1
 - data/lib/treat/workers/extractors/name_tag/stanford.rb +1 -1
 - data/lib/treat/workers/extractors/topic_words/lda.rb +1 -1
 - data/lib/treat/workers/formatters/readers/autoselect.rb +3 -1
 - data/lib/treat/workers/formatters/readers/html.rb +4 -2
 - data/lib/treat/workers/formatters/serializers/xml.rb +1 -1
 - data/lib/treat/workers/groupable.rb +1 -3
 - data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +3 -2
 - data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +12 -2
 - data/lib/treat/workers/lexicalizers/taggers/brill.rb +2 -1
 - data/lib/treat/workers/lexicalizers/taggers/lingua.rb +3 -1
 - data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -5
 - data/spec/entities/collection.rb +2 -2
 - data/spec/entities/entity.rb +4 -4
 - data/spec/helper.rb +16 -68
 - data/spec/{core → learning}/data_set.rb +0 -0
 - data/spec/{core → learning}/export.rb +0 -0
 - data/spec/{core → learning}/problem.rb +0 -0
 - data/spec/{core → learning}/question.rb +0 -0
 - data/spec/sandbox.rb +14 -3
 - data/spec/workers/agnostic.rb +80 -30
 - data/spec/workers/english.rb +475 -190
 - metadata +6 -11
 - data/files/21552208.html +0 -792
 - data/files/nethttp-cheat-sheet-2940.html +0 -392
 - data/lib/treat/config/data/config.rb +0 -50
 - data/spec/workers/language.rb +0 -280
 - data/spec/workers.rb +0 -28
 
    
        data/README.md
    CHANGED
    
    | 
         @@ -20,6 +20,17 @@ Treat is a toolkit for natural language processing and computational linguistics 
     | 
|
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
            I am actively seeking developers that can help maintain and expand this project. You can find a list of ideas for contributing to the project [here](https://github.com/louismullie/treat/wiki/Contributing).
         
     | 
| 
       22 
22 
     | 
    
         | 
| 
      
 23 
     | 
    
         
            +
            **Authors**
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            Lead developper: @louismullie [[Twitter](https://twitter.com/LouisMullie)]
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            Contributors:
         
     | 
| 
      
 28 
     | 
    
         
            +
            - @bdigital
         
     | 
| 
      
 29 
     | 
    
         
            +
            - @automatedtendencies
         
     | 
| 
      
 30 
     | 
    
         
            +
            - @LeFnord
         
     | 
| 
      
 31 
     | 
    
         
            +
            - @darkphantum
         
     | 
| 
      
 32 
     | 
    
         
            +
            - @whistlerbrk
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
       23 
34 
     | 
    
         
             
            **License**
         
     | 
| 
       24 
35 
     | 
    
         | 
| 
       25 
36 
     | 
    
         
             
            This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
         
     | 
| 
         @@ -3,7 +3,7 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
                'nokogiri', 'ferret',
         
     | 
| 
       4 
4 
     | 
    
         
             
                'bson_ext', 'mongo', 'lda-ruby',
         
     | 
| 
       5 
5 
     | 
    
         
             
                'stanford-core-nlp', 'linguistics',
         
     | 
| 
       6 
     | 
    
         
            -
                ' 
     | 
| 
      
 6 
     | 
    
         
            +
                'jruby-readability', 'whatlanguage',
         
     | 
| 
       7 
7 
     | 
    
         
             
                'chronic', 'nickel', 'decisiontree',
         
     | 
| 
       8 
8 
     | 
    
         
             
                'rb-libsvm', 'ruby-fann', 'zip',
         
     | 
| 
       9 
9 
     | 
    
         
             
                'tf-idf-similarity', 'narray'
         
     | 
    
        data/lib/treat/core/dsl.rb
    CHANGED
    
    | 
         @@ -1,53 +1,21 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Treat::Core::DSL
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
              # Message for deprecation of old DSL syntax.
         
     | 
| 
       4 
     | 
    
         
            -
              DeprecationMessage = "The DSL that used " +
         
     | 
| 
       5 
     | 
    
         
            -
              "capitalized entity names is now deprecated. " +
         
     | 
| 
       6 
     | 
    
         
            -
              "Use `include Treat::Core::DSL` along with " +
         
     | 
| 
       7 
     | 
    
         
            -
              "lowercase names from now on." 
         
     | 
| 
       8 
     | 
    
         
            -
              
         
     | 
| 
       9 
3 
     | 
    
         
             
              # Map all classes in Treat::Entities to
         
     | 
| 
       10 
4 
     | 
    
         
             
              # a global builder function (entity, word,
         
     | 
| 
       11 
5 
     | 
    
         
             
              # phrase, punctuation, symbol, list, etc.)
         
     | 
| 
       12 
6 
     | 
    
         
             
              def self.included(base)
         
     | 
| 
       13 
     | 
    
         
            -
                 
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                   
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
                    define_method(mname.capitalize) do |*args|
         
     | 
| 
       26 
     | 
    
         
            -
                      raise DeprecationMessage
         
     | 
| 
       27 
     | 
    
         
            -
                    end
         
     | 
| 
       28 
     | 
    
         
            -
                    old_mm = instance_method(:method_missing)
         
     | 
| 
       29 
     | 
    
         
            -
                    define_method(:method_missing) do |sym,*args,&block|
         
     | 
| 
       30 
     | 
    
         
            -
                      return klass.build(*args) if sym == mname
         
     | 
| 
       31 
     | 
    
         
            -
                      old_mm.bind(self).call(sym,*args,&block)
         
     | 
| 
       32 
     | 
    
         
            -
                    end
         
     | 
| 
       33 
     | 
    
         
            -
                  end
         
     | 
| 
       34 
     | 
    
         
            -
                end
         
     | 
| 
       35 
     | 
    
         
            -
              end
         
     | 
| 
       36 
     | 
    
         
            -
              
         
     | 
| 
       37 
     | 
    
         
            -
              # Map all classes in the Learning module
         
     | 
| 
       38 
     | 
    
         
            -
              # to a global builder function. Defines:
         
     | 
| 
       39 
     | 
    
         
            -
              # dataset, export, feature, tag, problem
         
     | 
| 
       40 
     | 
    
         
            -
              # question.
         
     | 
| 
       41 
     | 
    
         
            -
              def self.sweeten_learning(base, on = true)
         
     | 
| 
       42 
     | 
    
         
            -
                Treat::Learning.constants.each do |kname|
         
     | 
| 
       43 
     | 
    
         
            -
                  mname = kname.downcase
         
     | 
| 
       44 
     | 
    
         
            -
                  klass = Treat::Learning.const_get(kname)
         
     | 
| 
       45 
     | 
    
         
            -
                  base.class_eval do
         
     | 
| 
       46 
     | 
    
         
            -
                    old_mm = instance_method(:method_missing)
         
     | 
| 
       47 
     | 
    
         
            -
                    define_method(:method_missing) do |sym,*args,&block|
         
     | 
| 
       48 
     | 
    
         
            -
                      return klass.new(*args) if sym == mname
         
     | 
| 
       49 
     | 
    
         
            -
                      old_mm.bind(self).call(sym,*args,&block)
         
     | 
| 
       50 
     | 
    
         
            -
                    end
         
     | 
| 
      
 7 
     | 
    
         
            +
                def method_missing(sym,*args,&block)
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @@entities ||= Treat.core.entities.list
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @@learning ||= Treat.core.learning.list
         
     | 
| 
      
 10 
     | 
    
         
            +
                  if @@entities.include?(sym)
         
     | 
| 
      
 11 
     | 
    
         
            +
                    klass = Treat::Entities.const_get(sym.cc)
         
     | 
| 
      
 12 
     | 
    
         
            +
                    return klass.build(*args)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  elsif @@learning.include?(sym)
         
     | 
| 
      
 14 
     | 
    
         
            +
                    klass = Treat::Learning.const_get(sym.cc)
         
     | 
| 
      
 15 
     | 
    
         
            +
                    return klass.new(*args)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  else
         
     | 
| 
      
 17 
     | 
    
         
            +
                    super(sym,*args,&block)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    raise "Uncaught method ended up in Treat DSL."
         
     | 
| 
       51 
19 
     | 
    
         
             
                  end
         
     | 
| 
       52 
20 
     | 
    
         
             
                end
         
     | 
| 
       53 
21 
     | 
    
         
             
              end
         
     | 
    
        data/lib/treat/version.rb
    CHANGED
    
    
| 
         @@ -25,7 +25,7 @@ class Treat::Workers::Extractors::NameTag::Stanford 
     | 
|
| 
       25 
25 
     | 
    
         
             
                isolated_token = entity.is_a?(Treat::Entities::Token)
         
     | 
| 
       26 
26 
     | 
    
         
             
                tokens = isolated_token ? [entity] : entity.tokens
         
     | 
| 
       27 
27 
     | 
    
         | 
| 
       28 
     | 
    
         
            -
                ms = StanfordCoreNLP::Config::Models[:ner][language]
         
     | 
| 
      
 28 
     | 
    
         
            +
                ms = StanfordCoreNLP::Config::Models[:ner][language.intern]
         
     | 
| 
       29 
29 
     | 
    
         
             
                model_path = Treat.libraries.stanford.model_path ||
         
     | 
| 
       30 
30 
     | 
    
         
             
                (Treat.paths.models + '/stanford/')
         
     | 
| 
       31 
31 
     | 
    
         
             
                ms = model_path + '/' + 
         
     | 
| 
         @@ -12,7 +12,9 @@ class Treat::Workers::Formatters::Readers::Autoselect 
     | 
|
| 
       12 
12 
     | 
    
         
             
              #  - (Symbol) :default_to => format to default to.
         
     | 
| 
       13 
13 
     | 
    
         
             
              def self.read(document, options = {})
         
     | 
| 
       14 
14 
     | 
    
         
             
                options = DefaultOptions.merge(options)
         
     | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
      
 15 
     | 
    
         
            +
                fmt = detect_format(document.file, options[:default_to])
         
     | 
| 
      
 16 
     | 
    
         
            +
                Treat::Workers::Formatters::Readers.
         
     | 
| 
      
 17 
     | 
    
         
            +
                const_get(fmt.cc).read(document,options)
         
     | 
| 
       16 
18 
     | 
    
         
             
              end
         
     | 
| 
       17 
19 
     | 
    
         | 
| 
       18 
20 
     | 
    
         
             
              def self.detect_format(filename, default_to = nil)
         
     | 
| 
         @@ -6,12 +6,13 @@ 
     | 
|
| 
       6 
6 
     | 
    
         
             
            # https://github.com/iterationlabs/ruby-readability
         
     | 
| 
       7 
7 
     | 
    
         
             
            class Treat::Workers::Formatters::Readers::HTML
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
              silence_warnings { require ' 
     | 
| 
      
 9 
     | 
    
         
            +
              silence_warnings { require 'jruby-readability' }
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
              # By default, don't backup the original HTML
         
     | 
| 
       12 
12 
     | 
    
         
             
              DefaultOptions = {
         
     | 
| 
       13 
13 
     | 
    
         
             
                :keep_html => false,
         
     | 
| 
       14 
     | 
    
         
            -
                :tags => %w[p div h1 h2 h3 ul ol dl dt li]
         
     | 
| 
      
 14 
     | 
    
         
            +
                :tags => %w[p div h1 h2 h3 ul ol dl dt li img],
         
     | 
| 
      
 15 
     | 
    
         
            +
                
         
     | 
| 
       15 
16 
     | 
    
         
             
              }
         
     | 
| 
       16 
17 
     | 
    
         | 
| 
       17 
18 
     | 
    
         
             
              # Read the HTML document and strip it of its markup.
         
     | 
| 
         @@ -46,6 +47,7 @@ class Treat::Workers::Formatters::Readers::HTML 
     | 
|
| 
       46 
47 
     | 
    
         
             
                  d = Readability::Document.new(html, options)
         
     | 
| 
       47 
48 
     | 
    
         
             
                  document.value = "<h1>#{d.title}</h1>\n" + d.content
         
     | 
| 
       48 
49 
     | 
    
         
             
                  document.set :format, 'html'
         
     | 
| 
      
 50 
     | 
    
         
            +
                  document.set :images, d.images
         
     | 
| 
       49 
51 
     | 
    
         
             
                end
         
     | 
| 
       50 
52 
     | 
    
         | 
| 
       51 
53 
     | 
    
         
             
                document
         
     | 
| 
         @@ -69,9 +69,7 @@ module Treat::Workers::Groupable 
     | 
|
| 
       69 
69 
     | 
    
         | 
| 
       70 
70 
     | 
    
         
             
              # Get constants in this module, excluding by
         
     | 
| 
       71 
71 
     | 
    
         
             
              # default those defined by parent modules.
         
     | 
| 
       72 
     | 
    
         
            -
              def const_get(const)
         
     | 
| 
       73 
     | 
    
         
            -
                super(const, false)
         
     | 
| 
       74 
     | 
    
         
            -
              end
         
     | 
| 
      
 72 
     | 
    
         
            +
              def const_get(const); super(const, false); end
         
     | 
| 
       75 
73 
     | 
    
         | 
| 
       76 
74 
     | 
    
         
             
              # Modify the extended class.
         
     | 
| 
       77 
75 
     | 
    
         
             
              def self.extended(group)
         
     | 
| 
         @@ -28,8 +28,9 @@ class Treat::Workers::Lexicalizers::Categorizers::FromTag 
     | 
|
| 
       28 
28 
     | 
    
         | 
| 
       29 
29 
     | 
    
         
             
                tag = entity.check_has(:tag)
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
       31 
     | 
    
         
            -
                return 'unknown' if tag.nil? || tag == '' 
     | 
| 
       32 
     | 
    
         
            -
                return ' 
     | 
| 
      
 31 
     | 
    
         
            +
                return 'unknown' if tag.nil? || tag == ''
         
     | 
| 
      
 32 
     | 
    
         
            +
                return 'fragment' if tag == 'F'
         
     | 
| 
      
 33 
     | 
    
         
            +
                return 'sentence' if tag == 'S'
         
     | 
| 
       33 
34 
     | 
    
         
             
                return 'number' if entity.type == :number
         
     | 
| 
       34 
35 
     | 
    
         | 
| 
       35 
36 
     | 
    
         
             
                return Ptc[entity.to_s] if entity.type == :punctuation
         
     | 
| 
         @@ -29,9 +29,19 @@ class Treat::Workers::Lexicalizers::Sensers::Wordnet 
     | 
|
| 
       29 
29 
     | 
    
         | 
| 
       30 
30 
     | 
    
         
             
                category = word.check_has(:category)
         
     | 
| 
       31 
31 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
                 
     | 
| 
      
 32 
     | 
    
         
            +
                if !options[:nym] 
         
     | 
| 
       33 
33 
     | 
    
         
             
                  raise Treat::Exception, "You must supply " +
         
     | 
| 
       34 
     | 
    
         
            -
                  "the :nym option ( 
     | 
| 
      
 34 
     | 
    
         
            +
                  "the :nym option ('synonyms', 'hypernyms', etc.)"
         
     | 
| 
      
 35 
     | 
    
         
            +
                end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                if !options[:nym].is_a?(Symbol)
         
     | 
| 
      
 38 
     | 
    
         
            +
                  options[:nym] == options[:nym].intern
         
     | 
| 
      
 39 
     | 
    
         
            +
                end
         
     | 
| 
      
 40 
     | 
    
         
            +
              
         
     | 
| 
      
 41 
     | 
    
         
            +
                if ![:synonyms, :antonyms,
         
     | 
| 
      
 42 
     | 
    
         
            +
                  :hypernyms, :hyponyms].include?(options[:nym])
         
     | 
| 
      
 43 
     | 
    
         
            +
                  raise Treat::Exception, "You must supply " +
         
     | 
| 
      
 44 
     | 
    
         
            +
                  "a valid :nym option ('synonyms', 'hypernyms', etc.)" 
         
     | 
| 
       35 
45 
     | 
    
         
             
                end
         
     | 
| 
       36 
46 
     | 
    
         | 
| 
       37 
47 
     | 
    
         
             
                unless ['noun', 'adjective', 'verb'].
         
     | 
| 
         @@ -47,7 +47,8 @@ class Treat::Workers::Lexicalizers::Taggers::Brill 
     | 
|
| 
       47 
47 
     | 
    
         | 
| 
       48 
48 
     | 
    
         
             
                return 'S' if entity.is_a?(Treat::Entities::Sentence)
         
     | 
| 
       49 
49 
     | 
    
         
             
                return 'P' if entity.is_a?(Treat::Entities::Phrase)
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
      
 50 
     | 
    
         
            +
                return 'F' if entity.is_a?(Treat::Entities::Fragment)
         
     | 
| 
      
 51 
     | 
    
         
            +
                return 'G' if entity.is_a?(Treat::Entities::Group)
         
     | 
| 
       51 
52 
     | 
    
         
             
              end
         
     | 
| 
       52 
53 
     | 
    
         | 
| 
       53 
54 
     | 
    
         
             
            end
         
     | 
| 
         @@ -65,9 +65,11 @@ class Treat::Workers::Lexicalizers::Taggers::Lingua 
     | 
|
| 
       65 
65 
     | 
    
         
             
                  !entity.parent_sentence
         
     | 
| 
       66 
66 
     | 
    
         
             
                    entity.set :tag_set, :penn
         
     | 
| 
       67 
67 
     | 
    
         
             
                end
         
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
      
 68 
     | 
    
         
            +
                
         
     | 
| 
       69 
69 
     | 
    
         
             
                return 'S' if entity.is_a?(Treat::Entities::Sentence)
         
     | 
| 
       70 
70 
     | 
    
         
             
                return 'P' if entity.is_a?(Treat::Entities::Phrase)
         
     | 
| 
      
 71 
     | 
    
         
            +
                return 'F' if entity.is_a?(Treat::Entities::Fragment)
         
     | 
| 
      
 72 
     | 
    
         
            +
                return 'G' if entity.is_a?(Treat::Entities::Group)
         
     | 
| 
       71 
73 
     | 
    
         | 
| 
       72 
74 
     | 
    
         
             
              end
         
     | 
| 
       73 
75 
     | 
    
         | 
| 
         @@ -32,11 +32,10 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford 
     | 
|
| 
       32 
32 
     | 
    
         
             
                  entity.set :tag_set, tag_set
         
     | 
| 
       33 
33 
     | 
    
         
             
                end
         
     | 
| 
       34 
34 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
                if entity.is_a?(Treat::Entities::Sentence)
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
                 
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
                return 'S' if entity.is_a?(Treat::Entities::Sentence)
         
     | 
| 
      
 36 
     | 
    
         
            +
                return 'P' if entity.is_a?(Treat::Entities::Phrase)
         
     | 
| 
      
 37 
     | 
    
         
            +
                return 'F' if entity.is_a?(Treat::Entities::Fragment)
         
     | 
| 
      
 38 
     | 
    
         
            +
                return 'G' if entity.is_a?(Treat::Entities::Group)
         
     | 
| 
       40 
39 
     | 
    
         | 
| 
       41 
40 
     | 
    
         
             
                # Handle options and initialize the tagger.
         
     | 
| 
       42 
41 
     | 
    
         
             
                lang = entity.language
         
     | 
    
        data/spec/entities/collection.rb
    CHANGED
    
    | 
         @@ -15,7 +15,7 @@ describe Treat::Entities::Collection do 
     | 
|
| 
       15 
15 
     | 
    
         
             
                    it "recursively searches the folder for " +
         
     | 
| 
       16 
16 
     | 
    
         
             
                    "files and opens them into a collection of documents" do
         
     | 
| 
       17 
17 
     | 
    
         
             
                      collection = Treat::Entities::Collection.build(@file)
         
     | 
| 
       18 
     | 
    
         
            -
                      collection.size.should eql  
     | 
| 
      
 18 
     | 
    
         
            +
                      collection.size.should eql 6
         
     | 
| 
       19 
19 
     | 
    
         
             
                    end
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -40,7 +40,7 @@ describe Treat::Entities::Collection do 
     | 
|
| 
       40 
40 
     | 
    
         
             
                    f = Treat.paths.spec + 'workers/examples/english/economist'
         
     | 
| 
       41 
41 
     | 
    
         
             
                    c = Treat::Entities::Collection.build(f)
         
     | 
| 
       42 
42 
     | 
    
         
             
                    c << Treat::Entities::Document.new
         
     | 
| 
       43 
     | 
    
         
            -
                    c.size.should eql  
     | 
| 
      
 43 
     | 
    
         
            +
                    c.size.should eql 4
         
     | 
| 
       44 
44 
     | 
    
         
             
                  end
         
     | 
| 
       45 
45 
     | 
    
         | 
| 
       46 
46 
     | 
    
         
             
              end
         
     | 
    
        data/spec/entities/entity.rb
    CHANGED
    
    | 
         @@ -391,11 +391,11 @@ describe Treat::Entities::Entity do 
     | 
|
| 
       391 
391 
     | 
    
         
             
                  context "when language detection is disabled " +
         
     | 
| 
       392 
392 
     | 
    
         
             
                  "(Treat.core.detect is set to false)" do
         
     | 
| 
       393 
393 
     | 
    
         
             
                    it "returns the default language (Treat.core.language.default)" do
         
     | 
| 
       394 
     | 
    
         
            -
             
     | 
| 
       395 
     | 
    
         
            -
                       
     | 
| 
      
 394 
     | 
    
         
            +
                      Treat.core.language.detect = false
         
     | 
| 
      
 395 
     | 
    
         
            +
                      Treat.core.language.default = :test
         
     | 
| 
       396 
396 
     | 
    
         
             
                      s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
         
     | 
| 
       397 
     | 
    
         
            -
                       
     | 
| 
       398 
     | 
    
         
            -
                       
     | 
| 
      
 397 
     | 
    
         
            +
                      s.language.should eql :test
         
     | 
| 
      
 398 
     | 
    
         
            +
                      Treat.core.language.default = :english
         
     | 
| 
       399 
399 
     | 
    
         
             
                    end
         
     | 
| 
       400 
400 
     | 
    
         
             
                  end
         
     | 
| 
       401 
401 
     | 
    
         | 
    
        data/spec/helper.rb
    CHANGED
    
    | 
         @@ -1,13 +1,8 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require_relative '../lib/treat'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            module Treat::Specs
         
     | 
| 
       3 
4 
     | 
    
         | 
| 
       4 
     | 
    
         
            -
              # Require the worker specs.
         
     | 
| 
       5 
     | 
    
         
            -
              require_relative 'workers'
         
     | 
| 
       6 
     | 
    
         
            -
              # Require RSpec library.
         
     | 
| 
       7 
5 
     | 
    
         
             
              require 'rspec'
         
     | 
| 
       8 
     | 
    
         
            -
              # Require Ruby benchmark library.
         
     | 
| 
       9 
     | 
    
         
            -
              require 'benchmark'
         
     | 
| 
       10 
     | 
    
         
            -
              # Require gem to build ASCII tables.
         
     | 
| 
       11 
6 
     | 
    
         | 
| 
       12 
7 
     | 
    
         
             
              # Some configuration options for devel.
         
     | 
| 
       13 
8 
     | 
    
         
             
              Treat.databases.mongo.db = 'treat_test'
         
     | 
| 
         @@ -20,30 +15,11 @@ module Treat::Specs 
     | 
|
| 
       20 
15 
     | 
    
         
             
              Treat.libraries.reuters.model_path =
         
     | 
| 
       21 
16 
     | 
    
         
             
              '/ruby/reuters/'
         
     | 
| 
       22 
17 
     | 
    
         | 
| 
      
 18 
     | 
    
         
            +
              ModuleFiles = ['entities/*.rb', 'learning/*.rb']
         
     | 
| 
      
 19 
     | 
    
         
            +
              
         
     | 
| 
       23 
20 
     | 
    
         
             
              # Provide helper functions for running specs.
         
     | 
| 
       24 
21 
     | 
    
         
             
              class Helper
         
     | 
| 
       25 
22 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
                ModuleFiles = [
         
     | 
| 
       27 
     | 
    
         
            -
                  './spec/core/*.rb', 
         
     | 
| 
       28 
     | 
    
         
            -
                  './spec/entities/*.rb'
         
     | 
| 
       29 
     | 
    
         
            -
                ]
         
     | 
| 
       30 
     | 
    
         
            -
                
         
     | 
| 
       31 
     | 
    
         
            -
                # Run all worker example files as :specs
         
     | 
| 
       32 
     | 
    
         
            -
                # or :benchmarks for the given language.
         
     | 
| 
       33 
     | 
    
         
            -
                def self.run_examples_as(what, language)
         
     | 
| 
       34 
     | 
    
         
            -
                  self.require_language_files(language)
         
     | 
| 
       35 
     | 
    
         
            -
                  Treat::Specs::Workers::Language.
         
     | 
| 
       36 
     | 
    
         
            -
                  list.each { |l| l.new(what).run }
         
     | 
| 
       37 
     | 
    
         
            -
                  RSpec::Core::CommandLine.new([]).run($stderr, $stdout)
         
     | 
| 
       38 
     | 
    
         
            -
                end
         
     | 
| 
       39 
     | 
    
         
            -
                
         
     | 
| 
       40 
     | 
    
         
            -
                # Run specs for the core classes.
         
     | 
| 
       41 
     | 
    
         
            -
                def self.run_core_specs
         
     | 
| 
       42 
     | 
    
         
            -
                  RSpec::Core::Runner.run(
         
     | 
| 
       43 
     | 
    
         
            -
                  ModuleFiles.map { |d| Dir.glob(d) }, 
         
     | 
| 
       44 
     | 
    
         
            -
                  $stderr, $stdout)
         
     | 
| 
       45 
     | 
    
         
            -
                end
         
     | 
| 
       46 
     | 
    
         
            -
                
         
     | 
| 
       47 
23 
     | 
    
         
             
                # Start SimpleCov coverage.
         
     | 
| 
       48 
24 
     | 
    
         
             
                def self.start_coverage
         
     | 
| 
       49 
25 
     | 
    
         
             
                  require 'simplecov'
         
     | 
| 
         @@ -61,56 +37,28 @@ module Treat::Specs 
     | 
|
| 
       61 
37 
     | 
    
         
             
                  end
         
     | 
| 
       62 
38 
     | 
    
         
             
                end
         
     | 
| 
       63 
39 
     | 
    
         | 
| 
      
 40 
     | 
    
         
            +
                # Run specs for the core classes.
         
     | 
| 
      
 41 
     | 
    
         
            +
                def self.run_core_specs
         
     | 
| 
      
 42 
     | 
    
         
            +
                  files = ModuleFiles.map do |d|
         
     | 
| 
      
 43 
     | 
    
         
            +
                    Dir.glob(Treat.paths.spec + d)
         
     | 
| 
      
 44 
     | 
    
         
            +
                  end
         
     | 
| 
      
 45 
     | 
    
         
            +
                  RSpec::Core::Runner.run(files)
         
     | 
| 
      
 46 
     | 
    
         
            +
                end
         
     | 
| 
      
 47 
     | 
    
         
            +
                
         
     | 
| 
       64 
48 
     | 
    
         
             
                # Require language files based on the argument.
         
     | 
| 
       65 
     | 
    
         
            -
                def self. 
     | 
| 
       66 
     | 
    
         
            -
                  # Require the base language class.
         
     | 
| 
       67 
     | 
    
         
            -
                  require_relative 'workers/language'
         
     | 
| 
      
 49 
     | 
    
         
            +
                def self.run_language_specs(lang)
         
     | 
| 
       68 
50 
     | 
    
         
             
                  # If no language supplied, get all languages.
         
     | 
| 
       69 
     | 
    
         
            -
                  if ! 
     | 
| 
      
 51 
     | 
    
         
            +
                  if !lang || lang == ''
         
     | 
| 
       70 
52 
     | 
    
         
             
                    pattern = "./spec/workers/*.rb"
         
     | 
| 
       71 
53 
     | 
    
         
             
                  # Otherwise, get a specific language file.
         
     | 
| 
       72 
54 
     | 
    
         
             
                  else
         
     | 
| 
       73 
     | 
    
         
            -
                    pattern = "./spec/workers/#{ 
     | 
| 
       74 
     | 
    
         
            -
                    # Check if a spec file exists.
         
     | 
| 
      
 55 
     | 
    
         
            +
                    pattern = "./spec/workers/#{lang}.rb"
         
     | 
| 
       75 
56 
     | 
    
         
             
                    unless File.readable?(pattern)
         
     | 
| 
       76 
57 
     | 
    
         
             
                      raise Treat::Exception, 
         
     | 
| 
       77 
     | 
    
         
            -
                      "There are no examples for '#{ 
     | 
| 
       78 
     | 
    
         
            -
                    end
         
     | 
| 
       79 
     | 
    
         
            -
                  end
         
     | 
| 
       80 
     | 
    
         
            -
                  # Require all files matched by the pattern.
         
     | 
| 
       81 
     | 
    
         
            -
                  Dir.glob(pattern).each { |f| require f }
         
     | 
| 
       82 
     | 
    
         
            -
                end
         
     | 
| 
       83 
     | 
    
         
            -
                
         
     | 
| 
       84 
     | 
    
         
            -
                def self.text_table(headings, rows)
         
     | 
| 
       85 
     | 
    
         
            -
                  require 'terminal-table'
         
     | 
| 
       86 
     | 
    
         
            -
                  puts Terminal::Table.new(
         
     | 
| 
       87 
     | 
    
         
            -
                  headings: headings, rows: rows)
         
     | 
| 
       88 
     | 
    
         
            -
                end
         
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
                def self.html_table(headings, rows)
         
     | 
| 
       91 
     | 
    
         
            -
                  require 'fileutils'
         
     | 
| 
       92 
     | 
    
         
            -
                  html = "<table>\n"
         
     | 
| 
       93 
     | 
    
         
            -
                  html += "<tr>\n"
         
     | 
| 
       94 
     | 
    
         
            -
                  headings.each do |heading|
         
     | 
| 
       95 
     | 
    
         
            -
                    html += "<td>" + heading + "</td>\n"
         
     | 
| 
       96 
     | 
    
         
            -
                  end
         
     | 
| 
       97 
     | 
    
         
            -
                  html += "</tr>\n"
         
     | 
| 
       98 
     | 
    
         
            -
                  rows.each do |row|
         
     | 
| 
       99 
     | 
    
         
            -
                    html += "<tr>\n"
         
     | 
| 
       100 
     | 
    
         
            -
                    row.each do |el|
         
     | 
| 
       101 
     | 
    
         
            -
                      html += "<td>#{el}</td>"
         
     | 
| 
      
 58 
     | 
    
         
            +
                      "There are no examples for '#{lang}'."
         
     | 
| 
       102 
59 
     | 
    
         
             
                    end
         
     | 
| 
       103 
     | 
    
         
            -
                    html += "</tr>\n"
         
     | 
| 
       104 
     | 
    
         
            -
                  end
         
     | 
| 
       105 
     | 
    
         
            -
                  self.write_html('benchmark', html)
         
     | 
| 
       106 
     | 
    
         
            -
                end
         
     | 
| 
       107 
     | 
    
         
            -
                
         
     | 
| 
       108 
     | 
    
         
            -
                def self.write_html(dir, html)
         
     | 
| 
       109 
     | 
    
         
            -
                  unless FileTest.directory?(dir) 
         
     | 
| 
       110 
     | 
    
         
            -
                    FileUtils.mkdir('./' + dir) 
         
     | 
| 
       111 
60 
     | 
    
         
             
                  end
         
     | 
| 
       112 
     | 
    
         
            -
                   
     | 
| 
       113 
     | 
    
         
            -
                  File.open(fn, 'w+') { |f| f.write(html) }
         
     | 
| 
      
 61 
     | 
    
         
            +
                  RSpec::Core::Runner.run(Dir.glob(pattern))
         
     | 
| 
       114 
62 
     | 
    
         
             
                end
         
     | 
| 
       115 
63 
     | 
    
         | 
| 
       116 
64 
     | 
    
         
             
              end
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
    
        data/spec/sandbox.rb
    CHANGED
    
    | 
         @@ -4,6 +4,15 @@ require_relative '../lib/treat' 
     | 
|
| 
       4 
4 
     | 
    
         
             
            require 'treat'
         
     | 
| 
       5 
5 
     | 
    
         
             
            include Treat::Core::DSL
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
      
 7 
     | 
    
         
            +
            collection Treat.paths.spec + '/workers/examples/english/economist'
         
     | 
| 
      
 8 
     | 
    
         
            +
            collection.apply :chunk, :segment, :tokenize
         
     | 
| 
      
 9 
     | 
    
         
            +
            puts collection.topic_words.inspect
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            =begin
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            g = group("I was running")
         
     | 
| 
      
 14 
     | 
    
         
            +
            puts g.tag.inspect
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
       7 
16 
     | 
    
         
             
            Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
         
     | 
| 
       8 
17 
     | 
    
         
             
            Treat.libraries.stanford.model_path = '/ruby/treat/models/'
         
     | 
| 
       9 
18 
     | 
    
         | 
| 
         @@ -14,6 +23,8 @@ w = word 
     | 
|
| 
       14 
23 
     | 
    
         
             
            p = phrase 'hello world'
         
     | 
| 
       15 
24 
     | 
    
         
             
            e = email 'louis@gmail.com'
         
     | 
| 
       16 
25 
     | 
    
         | 
| 
      
 26 
     | 
    
         
            +
            d = question(:is_feature, :word)
         
     | 
| 
      
 27 
     | 
    
         
            +
            =end
         
     | 
| 
       17 
28 
     | 
    
         
             
            #d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
         
     | 
| 
       18 
29 
     | 
    
         
             
            #d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
         
     | 
| 
       19 
30 
     | 
    
         
             
            #d.print_tree
         
     | 
| 
         @@ -266,6 +277,6 @@ sect = section title(phra), para 
     | 
|
| 
       266 
277 
     | 
    
         
             
            =begin
         
     | 
| 
       267 
278 
     | 
    
         
             
            puts "beer".plural.inspect
         
     | 
| 
       268 
279 
     | 
    
         
             
            =end
         
     | 
| 
       269 
     | 
    
         
            -
            Treat.core.language.detect = true
         
     | 
| 
       270 
     | 
    
         
            -
            s = sentence "Du hast deiner Frau einen roten Ring gekauft."
         
     | 
| 
       271 
     | 
    
         
            -
            s.apply(:parse,:category).print_tree
         
     | 
| 
      
 280 
     | 
    
         
            +
            # Treat.core.language.detect = true
         
     | 
| 
      
 281 
     | 
    
         
            +
            # s = sentence "Du hast deiner Frau einen roten Ring gekauft."
         
     | 
| 
      
 282 
     | 
    
         
            +
            #s.apply(:parse,:category).print_tree
         
     | 
    
        data/spec/workers/agnostic.rb
    CHANGED
    
    | 
         @@ -1,3 +1,80 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            $workers = Treat.languages.agnostic.workers
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe Treat::Workers::Extractors::Language do
         
     | 
| 
      
 4 
     | 
    
         
            +
              before do
         
     | 
| 
      
 5 
     | 
    
         
            +
                @entities = ["Obama and Sarkozy will meet in Berlin."]
         
     | 
| 
      
 6 
     | 
    
         
            +
                @languages = ["english"]
         
     | 
| 
      
 7 
     | 
    
         
            +
              end
         
     | 
| 
      
 8 
     | 
    
         
            +
              context "when called on any textual entity" do
         
     | 
| 
      
 9 
     | 
    
         
            +
                it "returns the language of the entity" do
         
     | 
| 
      
 10 
     | 
    
         
            +
                  # Treat.core.language.detect = true
         
     | 
| 
      
 11 
     | 
    
         
            +
                  $workers.extractors.language.each do |extractor|
         
     | 
| 
      
 12 
     | 
    
         
            +
                    @entities.map(&:language).should eql @languages
         
     | 
| 
      
 13 
     | 
    
         
            +
                  end
         
     | 
| 
      
 14 
     | 
    
         
            +
                  # Treat.core.language.detect = false
         
     | 
| 
      
 15 
     | 
    
         
            +
                end
         
     | 
| 
      
 16 
     | 
    
         
            +
              end
         
     | 
| 
      
 17 
     | 
    
         
            +
            end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            describe Treat::Workers::Formatters::Serializers do
         
     | 
| 
      
 20 
     | 
    
         
            +
              before do
         
     | 
| 
      
 21 
     | 
    
         
            +
                @texts = ["A test entity"]
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
              context "when #serialize is called on any textual entity" do
         
     | 
| 
      
 24 
     | 
    
         
            +
                it "serializes the entity to disk and returns a pointer to the location" do
         
     | 
| 
      
 25 
     | 
    
         
            +
                  # m = Treat::Entities::Entity.build
         
     | 
| 
      
 26 
     | 
    
         
            +
                  @texts.map(&:to_entity).map(&:serialize)
         
     | 
| 
      
 27 
     | 
    
         
            +
                  .map(&method(:entity)).map(&:to_s).should eql @texts
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            describe Treat::Workers::Formatters::Unserializers do
         
     | 
| 
      
 33 
     | 
    
         
            +
              before do
         
     | 
| 
      
 34 
     | 
    
         
            +
                @texts = ["A te"]
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
              context "when #unserialize is called with a selector on any textual entity" do
         
     | 
| 
      
 37 
     | 
    
         
            +
                it "unserializes the file and loads it in the entity" do
         
     | 
| 
      
 38 
     | 
    
         
            +
                  
         
     | 
| 
      
 39 
     | 
    
         
            +
                end
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
            end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            visualize: {
         
     | 
| 
      
 44 
     | 
    
         
            +
              entity: {
         
     | 
| 
      
 45 
     | 
    
         
            +
                examples: {
         
     | 
| 
      
 46 
     | 
    
         
            +
                  standoff: [
         
     | 
| 
      
 47 
     | 
    
         
            +
                    ["I walked to the store.", "(S\n   (PRP I)   (VBD walked)   (TO to)   (DT the)   (NN store)   (. .))\n"]
         
     | 
| 
      
 48 
     | 
    
         
            +
                  ],
         
     | 
| 
      
 49 
     | 
    
         
            +
                  tree: [
         
     | 
| 
      
 50 
     | 
    
         
            +
                    ["I walked to the store.", "+ Sentence (*)  --- \"I walked to the store.\"  ---  {}   --- [] \n|\n+--> Word (*)  --- \"I\"  ---  {}   --- [] \n+--> Word (*)  --- \"walked\"  ---  {}   --- [] \n+--> Word (*)  --- \"to\"  ---  {}   --- [] \n+--> Word (*)  --- \"the\"  ---  {}   --- [] \n+--> Word (*)  --- \"store\"  ---  {}   --- [] \n+--> Punctuation (*)  --- \".\"  ---  {}   --- [] "]
         
     | 
| 
      
 51 
     | 
    
         
            +
                  ],
         
     | 
| 
      
 52 
     | 
    
         
            +
                  dot: [
         
     | 
| 
      
 53 
     | 
    
         
            +
                    ["I walked to the store.", "graph {\n* [label=\"Sentence\\n\\\"I walked to the store.\\\"\",color=\"\"]\n* [label=\"Word\\n\\\"I\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"walked\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"to\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"the\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"store\\\"\",color=\"\"]\n* -- *;\n* [label=\"Punctuation\\n\\\".\\\"\",color=\"\"]\n* -- *;\n}"]
         
     | 
| 
      
 54 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 55 
     | 
    
         
            +
                },
         
     | 
| 
      
 56 
     | 
    
         
            +
                preprocessor: lambda  { |entity| entity.tokenize },
         
     | 
| 
      
 57 
     | 
    
         
            +
                generator: lambda  { |result| result.gsub(/[0-9]+/, '*') }
         
     | 
| 
      
 58 
     | 
    
         
            +
              }
         
     | 
| 
      
 59 
     | 
    
         
            +
            },
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
            describe Treat::Workers::Formatters::Visualizers do
         
     | 
| 
      
 63 
     | 
    
         
            +
              before do
         
     | 
| 
      
 64 
     | 
    
         
            +
                @texts = ["I walked to the store."]
         
     | 
| 
      
 65 
     | 
    
         
            +
              end
         
     | 
| 
      
 66 
     | 
    
         
            +
              describe "when #visualize is called with the :dot worker" do
         
     | 
| 
      
 67 
     | 
    
         
            +
                
         
     | 
| 
      
 68 
     | 
    
         
            +
              end
         
     | 
| 
      
 69 
     | 
    
         
            +
              describe "when #visualize is called with the :tree worker" do
         
     | 
| 
      
 70 
     | 
    
         
            +
                
         
     | 
| 
      
 71 
     | 
    
         
            +
              end
         
     | 
| 
      
 72 
     | 
    
         
            +
              describe "when #visualize is called with the :dot worker" do
         
     | 
| 
      
 73 
     | 
    
         
            +
                
         
     | 
| 
      
 74 
     | 
    
         
            +
              end
         
     | 
| 
      
 75 
     | 
    
         
            +
            end
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
            =begin
         
     | 
| 
       1 
78 
     | 
    
         
             
            class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
         
     | 
| 
       2 
79 
     | 
    
         | 
| 
       3 
80 
     | 
    
         
             
              # TODO: :tf_idf, :keywords, :classifiers
         
     | 
| 
         @@ -5,15 +82,6 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language 
     | 
|
| 
       5 
82 
     | 
    
         | 
| 
       6 
83 
     | 
    
         
             
              Scenarios = {
         
     | 
| 
       7 
84 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
                # Also tests unserialize.
         
     | 
| 
       9 
     | 
    
         
            -
                serialize: {
         
     | 
| 
       10 
     | 
    
         
            -
                  entity: {
         
     | 
| 
       11 
     | 
    
         
            -
                    examples: [
         
     | 
| 
       12 
     | 
    
         
            -
                      ["A test entity.", "A test entity."]
         
     | 
| 
       13 
     | 
    
         
            -
                    ],
         
     | 
| 
       14 
     | 
    
         
            -
                    generator: lambda { |selector| Treat::Entities::Entity.build(selector).to_s }
         
     | 
| 
       15 
     | 
    
         
            -
                  }
         
     | 
| 
       16 
     | 
    
         
            -
                },
         
     | 
| 
       17 
85 
     | 
    
         
             
                classify: {
         
     | 
| 
       18 
86 
     | 
    
         
             
                  entity: {
         
     | 
| 
       19 
87 
     | 
    
         
             
                    examples: [
         
     | 
| 
         @@ -39,23 +107,6 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language 
     | 
|
| 
       39 
107 
     | 
    
         
             
                    end
         
     | 
| 
       40 
108 
     | 
    
         
             
                  }
         
     | 
| 
       41 
109 
     | 
    
         
             
                },
         
     | 
| 
       42 
     | 
    
         
            -
                visualize: {
         
     | 
| 
       43 
     | 
    
         
            -
                  entity: {
         
     | 
| 
       44 
     | 
    
         
            -
                    examples: {
         
     | 
| 
       45 
     | 
    
         
            -
                      standoff: [
         
     | 
| 
       46 
     | 
    
         
            -
                        ["I walked to the store.", "(S\n   (PRP I)   (VBD walked)   (TO to)   (DT the)   (NN store)   (. .))\n"]
         
     | 
| 
       47 
     | 
    
         
            -
                      ],
         
     | 
| 
       48 
     | 
    
         
            -
                      tree: [
         
     | 
| 
       49 
     | 
    
         
            -
                        ["I walked to the store.", "+ Sentence (*)  --- \"I walked to the store.\"  ---  {}   --- [] \n|\n+--> Word (*)  --- \"I\"  ---  {}   --- [] \n+--> Word (*)  --- \"walked\"  ---  {}   --- [] \n+--> Word (*)  --- \"to\"  ---  {}   --- [] \n+--> Word (*)  --- \"the\"  ---  {}   --- [] \n+--> Word (*)  --- \"store\"  ---  {}   --- [] \n+--> Punctuation (*)  --- \".\"  ---  {}   --- [] "]
         
     | 
| 
       50 
     | 
    
         
            -
                      ],
         
     | 
| 
       51 
     | 
    
         
            -
                      dot: [
         
     | 
| 
       52 
     | 
    
         
            -
                        ["I walked to the store.", "graph {\n* [label=\"Sentence\\n\\\"I walked to the store.\\\"\",color=\"\"]\n* [label=\"Word\\n\\\"I\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"walked\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"to\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"the\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"store\\\"\",color=\"\"]\n* -- *;\n* [label=\"Punctuation\\n\\\".\\\"\",color=\"\"]\n* -- *;\n}"]
         
     | 
| 
       53 
     | 
    
         
            -
                      ]
         
     | 
| 
       54 
     | 
    
         
            -
                    },
         
     | 
| 
       55 
     | 
    
         
            -
                    preprocessor: lambda  { |entity| entity.tokenize },
         
     | 
| 
       56 
     | 
    
         
            -
                    generator: lambda  { |result| result.gsub(/[0-9]+/, '*') }
         
     | 
| 
       57 
     | 
    
         
            -
                  }
         
     | 
| 
       58 
     | 
    
         
            -
                },
         
     | 
| 
       59 
110 
     | 
    
         | 
| 
       60 
111 
     | 
    
         
             
            =begin
         
     | 
| 
       61 
112 
     | 
    
         
             
                keywords: {
         
     | 
| 
         @@ -103,8 +154,7 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language 
     | 
|
| 
       103 
154 
     | 
    
         
             
                      preprocessor: lambda { |coll| coll.apply(:index) }
         
     | 
| 
       104 
155 
     | 
    
         
             
                  },
         
     | 
| 
       105 
156 
     | 
    
         
             
                },
         
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
     | 
    
         
            -
            =begin
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
       108 
158 
     | 
    
         
             
                keywords: {
         
     | 
| 
       109 
159 
     | 
    
         
             
                  document: {
         
     | 
| 
       110 
160 
     | 
    
         
             
                    examples: [
         
     | 
| 
         @@ -124,7 +174,7 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language 
     | 
|
| 
       124 
174 
     | 
    
         
             
                    ]
         
     | 
| 
       125 
175 
     | 
    
         
             
                  }
         
     | 
| 
       126 
176 
     | 
    
         
             
                },
         
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
       128 
178 
     | 
    
         
             
                topic_words: {
         
     | 
| 
       129 
179 
     | 
    
         
             
                  collection: {
         
     | 
| 
       130 
180 
     | 
    
         
             
                    examples: [
         
     | 
| 
         @@ -134,4 +184,4 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language 
     | 
|
| 
       134 
184 
     | 
    
         
             
                }
         
     | 
| 
       135 
185 
     | 
    
         
             
              }
         
     | 
| 
       136 
186 
     | 
    
         | 
| 
       137 
     | 
    
         
            -
            end
         
     | 
| 
      
 187 
     | 
    
         
            +
            =end
         
     |