treat 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/treat/ai/classifiers/id3.rb +1 -2
 - data/lib/treat/ai/classifiers/mlp.rb +30 -0
 - data/lib/treat/ai.rb +1 -1
 - data/lib/treat/classification.rb +11 -8
 - data/lib/treat/data_set.rb +9 -0
 - data/lib/treat/dependencies.rb +2 -1
 - data/lib/treat/entities/abilities/buildable.rb +1 -1
 - data/lib/treat/entities/abilities/countable.rb +1 -1
 - data/lib/treat/entities/abilities/iterable.rb +3 -5
 - data/lib/treat/entities/abilities/registrable.rb +10 -0
 - data/lib/treat/entities/entity.rb +0 -10
 - data/lib/treat/extractors/keywords/tf_idf.rb +2 -9
 - data/lib/treat/formatters/readers/autoselect.rb +6 -11
 - data/lib/treat/formatters/serializers/mongo.rb +64 -0
 - data/lib/treat/formatters/serializers/xml.rb +10 -4
 - data/lib/treat/formatters/unserializers/xml.rb +6 -0
 - data/lib/treat/kernel.rb +1 -1
 - data/lib/treat/processors/tokenizers/perl.rb +4 -0
 - data/lib/treat.rb +1 -1
 - data/spec/entity.rb +47 -12
 - data/spec/sandbox.rb +16 -108
 - metadata +6 -5
 
| 
         @@ -14,12 +14,11 @@ class Treat::AI::Classifiers::ID3 
     | 
|
| 
       14 
14 
     | 
    
         
             
                  set.labels.map { |l| l.to_s }, set.items, 
         
     | 
| 
       15 
15 
     | 
    
         
             
                  cl.default, cl.mode)
         
     | 
| 
       16 
16 
     | 
    
         
             
                  dec_tree.train
         
     | 
| 
      
 17 
     | 
    
         
            +
                  @@classifiers[cl] = dec_tree
         
     | 
| 
       17 
18 
     | 
    
         
             
                else
         
     | 
| 
       18 
19 
     | 
    
         
             
                  dec_tree = @@classifiers[cl]
         
     | 
| 
       19 
20 
     | 
    
         
             
                end
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
                cl.export_item(entity, false).inspect
         
     | 
| 
       22 
     | 
    
         
            -
                
         
     | 
| 
       23 
22 
     | 
    
         
             
                dec_tree.predict(
         
     | 
| 
       24 
23 
     | 
    
         
             
                  cl.export_item(entity, false)
         
     | 
| 
       25 
24 
     | 
    
         
             
                )
         
     | 
| 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Currently, this MLP is limited to 1 output.
         
     | 
| 
      
 2 
     | 
    
         
            +
            class Treat::AI::Classifiers::MLP
         
     | 
| 
      
 3 
     | 
    
         
            +
              
         
     | 
| 
      
 4 
     | 
    
         
            +
              require 'ai4r'
         
     | 
| 
      
 5 
     | 
    
         
            +
              
         
     | 
| 
      
 6 
     | 
    
         
            +
              @@mlps = {}
         
     | 
| 
      
 7 
     | 
    
         
            +
              
         
     | 
| 
      
 8 
     | 
    
         
            +
              def self.classify(entity, options = {})
         
     | 
| 
      
 9 
     | 
    
         
            +
                
         
     | 
| 
      
 10 
     | 
    
         
            +
                set = options[:training]
         
     | 
| 
      
 11 
     | 
    
         
            +
                cl = set.classification
         
     | 
| 
      
 12 
     | 
    
         
            +
                  
         
     | 
| 
      
 13 
     | 
    
         
            +
                if !@@mlps[cl]
         
     | 
| 
      
 14 
     | 
    
         
            +
                  net = Ai4r::NeuralNetwork::
         
     | 
| 
      
 15 
     | 
    
         
            +
                  Backpropagation.new([cl.labels.size, 3, 1])
         
     | 
| 
      
 16 
     | 
    
         
            +
                  set.items.each do |item|
         
     | 
| 
      
 17 
     | 
    
         
            +
                    inputs = item[0..-2]
         
     | 
| 
      
 18 
     | 
    
         
            +
                    outputs = [item[-1]]
         
     | 
| 
      
 19 
     | 
    
         
            +
                    net.train(inputs, outputs)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  end
         
     | 
| 
      
 21 
     | 
    
         
            +
                  @@mlps[cl] = net
         
     | 
| 
      
 22 
     | 
    
         
            +
                else
         
     | 
| 
      
 23 
     | 
    
         
            +
                  net = @@mlps[cl]
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                
         
     | 
| 
      
 26 
     | 
    
         
            +
                net.eval(cl.export_item(entity, false))[0]
         
     | 
| 
      
 27 
     | 
    
         
            +
                
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
              
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/treat/ai.rb
    CHANGED
    
    
    
        data/lib/treat/classification.rb
    CHANGED
    
    | 
         @@ -27,21 +27,25 @@ class Treat::Classification 
     | 
|
| 
       27 
27 
     | 
    
         
             
              end
         
     | 
| 
       28 
28 
     | 
    
         | 
| 
       29 
29 
     | 
    
         
             
              def export_item(e, include_question = true)
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
       30 
31 
     | 
    
         
             
                line = []
         
     | 
| 
       31 
32 
     | 
    
         | 
| 
       32 
33 
     | 
    
         
             
                @features.each do |cmd|
         
     | 
| 
      
 34 
     | 
    
         
            +
                  dflt = nil
         
     | 
| 
       33 
35 
     | 
    
         
             
                  begin
         
     | 
| 
       34 
36 
     | 
    
         
             
                    if cmd.is_a?(Array)
         
     | 
| 
       35 
     | 
    
         
            -
                       
     | 
| 
      
 37 
     | 
    
         
            +
                      if cmd.size == 3
         
     | 
| 
      
 38 
     | 
    
         
            +
                        r = cmd[1].call(e)
         
     | 
| 
      
 39 
     | 
    
         
            +
                        dflt = cmd[2]
         
     | 
| 
      
 40 
     | 
    
         
            +
                        line << (r ? r : dflt)
         
     | 
| 
      
 41 
     | 
    
         
            +
                      elsif cmd.size == 2
         
     | 
| 
      
 42 
     | 
    
         
            +
                        r = e.send(cmd[0])
         
     | 
| 
      
 43 
     | 
    
         
            +
                        dflt = cmd[1]
         
     | 
| 
      
 44 
     | 
    
         
            +
                        line << (r ? r : dflt)
         
     | 
| 
      
 45 
     | 
    
         
            +
                      end
         
     | 
| 
       36 
46 
     | 
    
         
             
                    else
         
     | 
| 
       37 
47 
     | 
    
         
             
                      line << e.send(cmd)
         
     | 
| 
       38 
48 
     | 
    
         
             
                    end
         
     | 
| 
       39 
     | 
    
         
            -
                  rescue Treat::Exception
         
     | 
| 
       40 
     | 
    
         
            -
                    dflt = (
         
     | 
| 
       41 
     | 
    
         
            -
                    (cmd.is_a?(Array) && cmd[2]) ?
         
     | 
| 
       42 
     | 
    
         
            -
                    cmd[2] : nil
         
     | 
| 
       43 
     | 
    
         
            -
                    )
         
     | 
| 
       44 
     | 
    
         
            -
                    line << dflt
         
     | 
| 
       45 
49 
     | 
    
         
             
                  end
         
     | 
| 
       46 
50 
     | 
    
         
             
                end
         
     | 
| 
       47 
51 
     | 
    
         | 
| 
         @@ -53,7 +57,6 @@ class Treat::Classification 
     | 
|
| 
       53 
57 
     | 
    
         
             
                  end
         
     | 
| 
       54 
58 
     | 
    
         
             
                end
         
     | 
| 
       55 
59 
     | 
    
         | 
| 
       56 
     | 
    
         
            -
                line[-1] = '' if line[-1].nil?
         
     | 
| 
       57 
60 
     | 
    
         
             
                line
         
     | 
| 
       58 
61 
     | 
    
         
             
              end
         
     | 
| 
       59 
62 
     | 
    
         | 
    
        data/lib/treat/data_set.rb
    CHANGED
    
    
    
        data/lib/treat/dependencies.rb
    CHANGED
    
    | 
         @@ -11,7 +11,8 @@ class Treat::Dependencies 
     | 
|
| 
       11 
11 
     | 
    
         
             
                ['linguistics', '>= 1.0.9', 'retrieve the inflection of nouns, verbs and numbers in English'],
         
     | 
| 
       12 
12 
     | 
    
         
             
                ['punkt-segmenter', '>= 0.9.1', 'segment texts into sentences'],
         
     | 
| 
       13 
13 
     | 
    
         
             
                ['chronic', '>= 0.6.7', 'detect date and time in text'],
         
     | 
| 
       14 
     | 
    
         
            -
                ['decisiontree', '>= 0.3.0', 'perform decision tree classification of text entities']
         
     | 
| 
      
 14 
     | 
    
         
            +
                ['decisiontree', '>= 0.3.0', 'perform decision tree classification of text entities'],
         
     | 
| 
      
 15 
     | 
    
         
            +
                ['ai4r', '>= 1.11', 'perform different kinds of classification tasks on text entities']
         
     | 
| 
       15 
16 
     | 
    
         
             
              ]
         
     | 
| 
       16 
17 
     | 
    
         | 
| 
       17 
18 
     | 
    
         
             
              Binary = [
         
     | 
| 
         @@ -30,10 +30,8 @@ module Treat::Entities::Abilities::Iterable 
     | 
|
| 
       30 
30 
     | 
    
         
             
                a = []
         
     | 
| 
       31 
31 
     | 
    
         
             
                type = :entity unless type
         
     | 
| 
       32 
32 
     | 
    
         
             
                each_entity(type) do |e|
         
     | 
| 
       33 
     | 
    
         
            -
                   
     | 
| 
       34 
     | 
    
         
            -
                  e 
     | 
| 
       35 
     | 
    
         
            -
                  ([:id, :value, :type].include?(feature) &&
         
     | 
| 
       36 
     | 
    
         
            -
                  e.send(feature) == value)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  r = e.send(feature)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  a << e if r == value
         
     | 
| 
       37 
35 
     | 
    
         
             
                end
         
     | 
| 
       38 
36 
     | 
    
         
             
                a
         
     | 
| 
       39 
37 
     | 
    
         
             
              end
         
     | 
| 
         @@ -51,7 +49,7 @@ module Treat::Entities::Abilities::Iterable 
     | 
|
| 
       51 
49 
     | 
    
         
             
              # Returns an array of the entities with the given
         
     | 
| 
       52 
50 
     | 
    
         
             
              # category.
         
     | 
| 
       53 
51 
     | 
    
         
             
              def entities_with_category(category, type = nil)
         
     | 
| 
       54 
     | 
    
         
            -
                entities_with_feature(:category, type)
         
     | 
| 
      
 52 
     | 
    
         
            +
                entities_with_feature(:category, category, type)
         
     | 
| 
       55 
53 
     | 
    
         
             
              end
         
     | 
| 
       56 
54 
     | 
    
         | 
| 
       57 
55 
     | 
    
         
             
              # Returns the first ancestor of this entity 
         
     | 
| 
         @@ -5,6 +5,16 @@ module Treat::Entities::Abilities::Registrable 
     | 
|
| 
       5 
5 
     | 
    
         
             
              # Registers a token in the @registry hash.
         
     | 
| 
       6 
6 
     | 
    
         
             
              def register(entity)
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
      
 8 
     | 
    
         
            +
                unless @registry
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @count = 0
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @registry = {
         
     | 
| 
      
 11 
     | 
    
         
            +
                    :value => {}, 
         
     | 
| 
      
 12 
     | 
    
         
            +
                    :position => {}, 
         
     | 
| 
      
 13 
     | 
    
         
            +
                    :type => {}, 
         
     | 
| 
      
 14 
     | 
    
         
            +
                    :id => {}
         
     | 
| 
      
 15 
     | 
    
         
            +
                  }
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
                
         
     | 
| 
       8 
18 
     | 
    
         
             
                if entity.is_a?(Treat::Entities::Token) ||
         
     | 
| 
       9 
19 
     | 
    
         
             
                  entity.is_a?(Treat::Entities::Phrase)
         
     | 
| 
       10 
20 
     | 
    
         
             
                  val = entity.to_s.downcase
         
     | 
| 
         @@ -64,18 +64,8 @@ module Treat::Entities 
     | 
|
| 
       64 
64 
     | 
    
         
             
                  super(value, id)
         
     | 
| 
       65 
65 
     | 
    
         
             
                  @type = :entity if self == Entity
         
     | 
| 
       66 
66 
     | 
    
         
             
                  @type ||= ucc(cl(self.class)).intern
         
     | 
| 
       67 
     | 
    
         
            -
                  unless is_a?(Treat::Entities::Token)
         
     | 
| 
       68 
     | 
    
         
            -
                    @count = 0
         
     | 
| 
       69 
     | 
    
         
            -
                    @registry = {
         
     | 
| 
       70 
     | 
    
         
            -
                      :id => {},
         
     | 
| 
       71 
     | 
    
         
            -
                      :value => {},
         
     | 
| 
       72 
     | 
    
         
            -
                      :type => {},
         
     | 
| 
       73 
     | 
    
         
            -
                      :position => {}
         
     | 
| 
       74 
     | 
    
         
            -
                    }
         
     | 
| 
       75 
     | 
    
         
            -
                  end
         
     | 
| 
       76 
67 
     | 
    
         
             
                end
         
     | 
| 
       77 
68 
     | 
    
         | 
| 
       78 
     | 
    
         
            -
                
         
     | 
| 
       79 
69 
     | 
    
         
             
                # Add an entity to the current entity.
         
     | 
| 
       80 
70 
     | 
    
         
             
                # Registers the entity in the root node
         
     | 
| 
       81 
71 
     | 
    
         
             
                # token registry if the entity is a leaf.
         
     | 
| 
         @@ -41,17 +41,10 @@ class Treat::Extractors::Keywords::TfIdf 
     | 
|
| 
       41 
41 
     | 
    
         
             
                entity.each_word do |word|
         
     | 
| 
       42 
42 
     | 
    
         | 
| 
       43 
43 
     | 
    
         
             
                  if keywords.include?(word.to_s)
         
     | 
| 
       44 
     | 
    
         
            -
                    word.set : 
     | 
| 
      
 44 
     | 
    
         
            +
                    word.set :keyword, true
         
     | 
| 
       45 
45 
     | 
    
         
             
                    pp = entity.parent_phrase
         
     | 
| 
       46 
     | 
    
         
            -
                    next unless pp
         
     | 
| 
       47 
     | 
    
         
            -
                    if pp.has? :keyword_count
         
     | 
| 
       48 
     | 
    
         
            -
                      pp.set :keyword_count, 
         
     | 
| 
       49 
     | 
    
         
            -
                      pp.keyword_count + 1
         
     | 
| 
       50 
     | 
    
         
            -
                    else
         
     | 
| 
       51 
     | 
    
         
            -
                      pp.set :keyword_count, 1
         
     | 
| 
       52 
     | 
    
         
            -
                    end
         
     | 
| 
       53 
46 
     | 
    
         
             
                  else
         
     | 
| 
       54 
     | 
    
         
            -
                    word.set : 
     | 
| 
      
 47 
     | 
    
         
            +
                    word.set :keyword, false
         
     | 
| 
       55 
48 
     | 
    
         
             
                  end
         
     | 
| 
       56 
49 
     | 
    
         | 
| 
       57 
50 
     | 
    
         
             
                end
         
     | 
| 
         @@ -15,21 +15,16 @@ class Treat::Formatters::Readers::Autoselect 
     | 
|
| 
       15 
15 
     | 
    
         
             
                document.read(detect_format(document.file, options[:default_to]))
         
     | 
| 
       16 
16 
     | 
    
         
             
              end
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
              def self.detect_format(filename, default_to =  
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
      
 18 
     | 
    
         
            +
              def self.detect_format(filename, default_to = nil)
         
     | 
| 
      
 19 
     | 
    
         
            +
                default_to ||= DefaultOptions[:default_to]
         
     | 
| 
       20 
20 
     | 
    
         
             
                ext = filename.scan(ExtensionRegexp)
         
     | 
| 
       21 
     | 
    
         
            -
                ext = (ext.is_a?(Array) && ext[0] && ext[0][0]) ?
         
     | 
| 
       22 
     | 
    
         
            -
                 
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                format =
         
     | 
| 
       25 
     | 
    
         
            -
                ImageExtensions.include?(ext) ?
         
     | 
| 
       26 
     | 
    
         
            -
                'image' : ext
         
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
                # Humanize extensions.
         
     | 
| 
      
 21 
     | 
    
         
            +
                ext = (ext.is_a?(Array) && ext[0] && ext[0][0]) ? ext[0][0] : ''
         
     | 
| 
      
 22 
     | 
    
         
            +
                
         
     | 
| 
      
 23 
     | 
    
         
            +
                format = ImageExtensions.include?(ext) ? 'image' : ext
         
     | 
| 
       29 
24 
     | 
    
         
             
                format = 'html' if format == 'htm'
         
     | 
| 
       30 
25 
     | 
    
         
             
                format = 'yaml' if format == 'yml'
         
     | 
| 
       31 
26 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
                format = default_to if format == ''
         
     | 
| 
      
 27 
     | 
    
         
            +
                format = default_to if format.to_s == ''
         
     | 
| 
       33 
28 
     | 
    
         | 
| 
       34 
29 
     | 
    
         
             
                format.intern
         
     | 
| 
       35 
30 
     | 
    
         | 
| 
         @@ -0,0 +1,64 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Stores an entity in a Mongo collection.
         
     | 
| 
      
 2 
     | 
    
         
            +
            class Treat::Formatters::Serializers::Mongo
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
              # Reauire the Mongo DB
         
     | 
| 
      
 5 
     | 
    
         
            +
              require 'mongo'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
              # Serialize an entity tree in XML format.
         
     | 
| 
      
 8 
     | 
    
         
            +
              #
         
     | 
| 
      
 9 
     | 
    
         
            +
              # Options:
         
     | 
| 
      
 10 
     | 
    
         
            +
              # - (String) :file => a file to write to.
         
     | 
| 
      
 11 
     | 
    
         
            +
              def self.serialize(entity, options = {})
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                unless options[:database]
         
     | 
| 
      
 14 
     | 
    
         
            +
                  raise Treat::Exception,
         
     | 
| 
      
 15 
     | 
    
         
            +
                  'Must supply the database name.'
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                @@conn ||= Mongo::Connection.new
         
     | 
| 
      
 19 
     | 
    
         
            +
                @@db ||= @@conn[options[:database]]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                path = []
         
     | 
| 
      
 22 
     | 
    
         
            +
                
         
     | 
| 
      
 23 
     | 
    
         
            +
                entity.each_ancestor do |ancestor|
         
     | 
| 
      
 24 
     | 
    
         
            +
                  path << [ancestor.type, ancestor.id]
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
                
         
     | 
| 
      
 27 
     | 
    
         
            +
                path = path.reverse
         
     | 
| 
      
 28 
     | 
    
         
            +
                
         
     | 
| 
      
 29 
     | 
    
         
            +
                target = @@db
         
     | 
| 
      
 30 
     | 
    
         
            +
                
         
     | 
| 
      
 31 
     | 
    
         
            +
                path.each do |type_id|
         
     | 
| 
      
 32 
     | 
    
         
            +
                  coll = @@db[type_id[0]][type_id[1]]
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
                
         
     | 
| 
      
 35 
     | 
    
         
            +
                # Store path
         
     | 
| 
      
 36 
     | 
    
         
            +
                
         
     | 
| 
      
 37 
     | 
    
         
            +
                Treat::Entities.list.each do |type|
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                  type = entity.type.to_s
         
     | 
| 
      
 40 
     | 
    
         
            +
                  type = (type == 'entity') ? 'entities' : (type + 's')
         
     | 
| 
      
 41 
     | 
    
         
            +
                  doc = coll[type]
         
     | 
| 
      
 42 
     | 
    
         
            +
                  
         
     | 
| 
      
 43 
     | 
    
         
            +
                  features = {}
         
     | 
| 
      
 44 
     | 
    
         
            +
                  features['id'] = entity.id
         
     | 
| 
      
 45 
     | 
    
         
            +
                  features['value'] = entity.value
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  entity.features.each_pair do |feature, value|
         
     | 
| 
      
 48 
     | 
    
         
            +
                    if value.is_a? Treat::Entities::Entity
         
     | 
| 
      
 49 
     | 
    
         
            +
                      value = value.id
         
     | 
| 
      
 50 
     | 
    
         
            +
                    elsif value.is_a?(Array) || value.is_a?(Hash)
         
     | 
| 
      
 51 
     | 
    
         
            +
                      value = value.inspect
         
     | 
| 
      
 52 
     | 
    
         
            +
                    else
         
     | 
| 
      
 53 
     | 
    
         
            +
                      value = value.to_s
         
     | 
| 
      
 54 
     | 
    
         
            +
                    end
         
     | 
| 
      
 55 
     | 
    
         
            +
                    features[feature.to_s] = value
         
     | 
| 
      
 56 
     | 
    
         
            +
                  end
         
     | 
| 
      
 57 
     | 
    
         
            +
                  
         
     | 
| 
      
 58 
     | 
    
         
            +
                  doc.insert(features)
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
                
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -8,12 +8,14 @@ class Treat::Formatters::Serializers::XML 
     | 
|
| 
       8 
8 
     | 
    
         
             
              # Options:
         
     | 
| 
       9 
9 
     | 
    
         
             
              # - (String) :file => a file to write to.
         
     | 
| 
       10 
10 
     | 
    
         
             
              def self.serialize(entity, options = {})
         
     | 
| 
       11 
     | 
    
         
            -
                
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
                if options[:indent].nil?
         
     | 
| 
      
 12 
     | 
    
         
            +
                  options = options.merge({:indent => 0})
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
       13 
14 
     | 
    
         
             
                indent = options[:indent]
         
     | 
| 
       14 
15 
     | 
    
         
             
                if options[:indent] == 0
         
     | 
| 
       15 
16 
     | 
    
         
             
                  enc = entity.to_s.encoding.to_s.downcase
         
     | 
| 
       16 
     | 
    
         
            -
                  string = "<?xml version=\"1.0\"  
     | 
| 
      
 17 
     | 
    
         
            +
                  string = "<?xml version=\"1.0\" " +
         
     | 
| 
      
 18 
     | 
    
         
            +
                  "encoding=\"#{enc}\" ?>\n<treat>\n"
         
     | 
| 
       17 
19 
     | 
    
         
             
                else
         
     | 
| 
       18 
20 
     | 
    
         
             
                  string = ''
         
     | 
| 
       19 
21 
     | 
    
         
             
                end
         
     | 
| 
         @@ -26,20 +28,24 @@ class Treat::Formatters::Serializers::XML 
     | 
|
| 
       26 
28 
     | 
    
         
             
                    if value.is_a? Treat::Entities::Entity
         
     | 
| 
       27 
29 
     | 
    
         
             
                      attributes << "#{feature}='#{value.id}' "
         
     | 
| 
       28 
30 
     | 
    
         
             
                    else
         
     | 
| 
      
 31 
     | 
    
         
            +
                      value = value.inspect if value.is_a?(Symbol)
         
     | 
| 
       29 
32 
     | 
    
         
             
                      attributes << "#{feature}='#{escape(value)}' "
         
     | 
| 
       30 
33 
     | 
    
         
             
                    end
         
     | 
| 
       31 
34 
     | 
    
         
             
                  end
         
     | 
| 
      
 35 
     | 
    
         
            +
                  ############ To be refactored
         
     | 
| 
       32 
36 
     | 
    
         
             
                  unless entity.dependencies.empty?
         
     | 
| 
       33 
37 
     | 
    
         
             
                    attributes << "dependencies='"
         
     | 
| 
       34 
38 
     | 
    
         
             
                    a = []
         
     | 
| 
       35 
39 
     | 
    
         
             
                    entity.dependencies.each do |dependency|
         
     | 
| 
       36 
     | 
    
         
            -
                      a << ("{target: #{dependency.target},  
     | 
| 
      
 40 
     | 
    
         
            +
                      a << ("{target: #{dependency.target}, "+
         
     | 
| 
      
 41 
     | 
    
         
            +
                      "type: #{dependency.type}, " +
         
     | 
| 
       37 
42 
     | 
    
         
             
                      "directed: #{dependency.directed}, " +
         
     | 
| 
       38 
43 
     | 
    
         
             
                      "direction: #{dependency.direction}}" )
         
     | 
| 
       39 
44 
     | 
    
         
             
                    end
         
     | 
| 
       40 
45 
     | 
    
         
             
                    # Structs.
         
     | 
| 
       41 
46 
     | 
    
         
             
                    attributes << a.join(',') + "'"
         
     | 
| 
       42 
47 
     | 
    
         
             
                  end
         
     | 
| 
      
 48 
     | 
    
         
            +
                  ############ End of ugly code
         
     | 
| 
       43 
49 
     | 
    
         
             
                end
         
     | 
| 
       44 
50 
     | 
    
         
             
                tag = entity.class.to_s.split('::')[-1].downcase
         
     | 
| 
       45 
51 
     | 
    
         
             
                string += "#{spaces}<#{tag}#{attributes}>"
         
     | 
| 
         @@ -36,6 +36,7 @@ module Treat::Formatters::Unserializers::XML 
     | 
|
| 
       36 
36 
     | 
    
         
             
                  id = nil; value = ''
         
     | 
| 
       37 
37 
     | 
    
         
             
                  attributes = {}
         
     | 
| 
       38 
38 
     | 
    
         
             
                  dependencies = []
         
     | 
| 
      
 39 
     | 
    
         
            +
                  
         
     | 
| 
       39 
40 
     | 
    
         
             
                  unless xml_reader.attributes.size == 0
         
     | 
| 
       40 
41 
     | 
    
         
             
                    xml_reader.attributes.each_pair do |k,v|
         
     | 
| 
       41 
42 
     | 
    
         
             
                      if k == 'id'
         
     | 
| 
         @@ -64,6 +65,11 @@ module Treat::Formatters::Unserializers::XML 
     | 
|
| 
       64 
65 
     | 
    
         
             
                      elsif k == 'value'
         
     | 
| 
       65 
66 
     | 
    
         
             
                        value = v
         
     | 
| 
       66 
67 
     | 
    
         
             
                      else
         
     | 
| 
      
 68 
     | 
    
         
            +
                        v = v[1..-1].intern if v[0] == ':'
         
     | 
| 
      
 69 
     | 
    
         
            +
                        v = v.to_i if v =~ /^[0-9]*$/
         
     | 
| 
      
 70 
     | 
    
         
            +
                        v = v.to_f if v =~ /^[0-9\.]*$/
         
     | 
| 
      
 71 
     | 
    
         
            +
                        v = false if v == 'false'
         
     | 
| 
      
 72 
     | 
    
         
            +
                        v = true if v == 'true'
         
     | 
| 
       67 
73 
     | 
    
         
             
                        attributes[k.intern] = v
         
     | 
| 
       68 
74 
     | 
    
         
             
                      end
         
     | 
| 
       69 
75 
     | 
    
         
             
                    end
         
     | 
    
        data/lib/treat/kernel.rb
    CHANGED
    
    | 
         @@ -10,7 +10,7 @@ module Kernel 
     | 
|
| 
       10 
10 
     | 
    
         
             
              # A list of acronyms used in class names within
         
     | 
| 
       11 
11 
     | 
    
         
             
              # the program. These do not CamelCase; they
         
     | 
| 
       12 
12 
     | 
    
         
             
              # CAMELCase.
         
     | 
| 
       13 
     | 
    
         
            -
              Acronyms = %w[xml html txt odt abw doc yaml uea lda pdf ptb dot ai id3 svo]
         
     | 
| 
      
 13 
     | 
    
         
            +
              Acronyms = %w[xml html txt odt abw doc yaml uea lda pdf ptb dot ai id3 svo mlp]
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
              # A cache to optimize camel casing.
         
     | 
| 
       16 
16 
     | 
    
         
             
              @@cc_cache = {}
         
     | 
| 
         @@ -43,6 +43,8 @@ module Treat::Processors::Tokenizers::Perl 
     | 
|
| 
       43 
43 
     | 
    
         | 
| 
       44 
44 
     | 
    
         
             
                # Replace all decimal points by ^^
         
     | 
| 
       45 
45 
     | 
    
         
             
                Treat::Helpers::DecimalPointEscaper.escape!(text)
         
     | 
| 
      
 46 
     | 
    
         
            +
                
         
     | 
| 
      
 47 
     | 
    
         
            +
            =begin
         
     | 
| 
       46 
48 
     | 
    
         | 
| 
       47 
49 
     | 
    
         
             
                # Translate some common extended ascii 
         
     | 
| 
       48 
50 
     | 
    
         
             
                # characters to quotes
         
     | 
| 
         @@ -60,6 +62,8 @@ module Treat::Processors::Tokenizers::Perl 
     | 
|
| 
       60 
62 
     | 
    
         
             
                text.gsub!(/\"(?=\s)/," '' ")
         
     | 
| 
       61 
63 
     | 
    
         
             
                #s{\"} { `` }g;
         
     | 
| 
       62 
64 
     | 
    
         
             
                text.gsub!(/\"(?=\s)/," `` ")
         
     | 
| 
      
 65 
     | 
    
         
            +
            =end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
       63 
67 
     | 
    
         
             
                # Isolate ellipses
         
     | 
| 
       64 
68 
     | 
    
         
             
                # s{\.\.\.}   { ... }g;
         
     | 
| 
       65 
69 
     | 
    
         
             
                text.gsub!(/\.\.\./,' ... ')
         
     | 
    
        data/lib/treat.rb
    CHANGED
    
    
    
        data/spec/entity.rb
    CHANGED
    
    | 
         @@ -113,7 +113,7 @@ describe Treat::Entities::Entity do 
     | 
|
| 
       113 
113 
     | 
    
         
             
              describe "Exportable" do
         
     | 
| 
       114 
114 
     | 
    
         | 
| 
       115 
115 
     | 
    
         
             
                context "when supplied with a classification to export" do
         
     | 
| 
       116 
     | 
    
         
            -
                  classification = Treat::Classification.new(:word, :tag, :is_keyword 
     | 
| 
      
 116 
     | 
    
         
            +
                  classification = Treat::Classification.new(:word, :tag, :is_keyword)
         
     | 
| 
       117 
117 
     | 
    
         
             
                  it "returns a data set with the exported features" do
         
     | 
| 
       118 
118 
     | 
    
         
             
                    ds = @sentence.export(classification)
         
     | 
| 
       119 
119 
     | 
    
         
             
                    ds.classification.should eql classification
         
     | 
| 
         @@ -316,13 +316,14 @@ describe Treat::Entities::Entity do 
     | 
|
| 
       316 
316 
     | 
    
         | 
| 
       317 
317 
     | 
    
         
             
              describe "Formatters" do 
         
     | 
| 
       318 
318 
     | 
    
         | 
| 
      
 319 
     | 
    
         
            +
                
         
     | 
| 
      
 320 
     | 
    
         
            +
                before do 
         
     | 
| 
      
 321 
     | 
    
         
            +
                  @serializers = [:xml, :yaml] # Treat::Languages::All::Serializers
         
     | 
| 
      
 322 
     | 
    
         
            +
                  @txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
         
     | 
| 
      
 323 
     | 
    
         
            +
                end
         
     | 
| 
      
 324 
     | 
    
         
            +
                
         
     | 
| 
       319 
325 
     | 
    
         
             
                describe "#serialize" do
         
     | 
| 
       320 
     | 
    
         
            -
             
     | 
| 
       321 
     | 
    
         
            -
                  before :all do 
         
     | 
| 
       322 
     | 
    
         
            -
                    @serializers = [:xml, :yaml] # Treat::Languages::All::Serializers
         
     | 
| 
       323 
     | 
    
         
            -
                    @txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
         
     | 
| 
       324 
     | 
    
         
            -
                  end
         
     | 
| 
       325 
     | 
    
         
            -
                  
         
     | 
| 
      
 326 
     | 
    
         
            +
             
     | 
| 
       326 
327 
     | 
    
         
             
                  context "when called with a file to save to" do
         
     | 
| 
       327 
328 
     | 
    
         | 
| 
       328 
329 
     | 
    
         
             
                    it "serializes a document to the supplied format" do
         
     | 
| 
         @@ -332,24 +333,58 @@ describe Treat::Entities::Entity do 
     | 
|
| 
       332 
333 
     | 
    
         
             
                        s = Treat::Entities::Paragraph.new(@txt)
         
     | 
| 
       333 
334 
     | 
    
         
             
                        s.do(:segment, :tokenize)
         
     | 
| 
       334 
335 
     | 
    
         
             
                        s.serialize(ser, :file => f)
         
     | 
| 
      
 336 
     | 
    
         
            +
                        File.delete(f)
         
     | 
| 
      
 337 
     | 
    
         
            +
                      end
         
     | 
| 
      
 338 
     | 
    
         
            +
                      
         
     | 
| 
      
 339 
     | 
    
         
            +
                    end
         
     | 
| 
      
 340 
     | 
    
         
            +
                    
         
     | 
| 
      
 341 
     | 
    
         
            +
                  end
         
     | 
| 
      
 342 
     | 
    
         
            +
                  
         
     | 
| 
      
 343 
     | 
    
         
            +
                end
         
     | 
| 
      
 344 
     | 
    
         
            +
                  
         
     | 
| 
      
 345 
     | 
    
         
            +
                describe "#unserialize" do
         
     | 
| 
      
 346 
     | 
    
         
            +
                  
         
     | 
| 
      
 347 
     | 
    
         
            +
                  context "when called with a serialized file" do
         
     | 
| 
      
 348 
     | 
    
         
            +
                    
         
     | 
| 
      
 349 
     | 
    
         
            +
                    it "reconstitutes the original entity" do
         
     | 
| 
      
 350 
     | 
    
         
            +
                      @serializers.each do |ser|
         
     | 
| 
      
 351 
     | 
    
         
            +
                      
         
     | 
| 
      
 352 
     | 
    
         
            +
                        f = Treat.spec + 'test.' + ser.to_s
         
     | 
| 
      
 353 
     | 
    
         
            +
                        s = Treat::Entities::Paragraph.new(@txt)
         
     | 
| 
      
 354 
     | 
    
         
            +
                      
         
     | 
| 
      
 355 
     | 
    
         
            +
                        s.set :test_int, 9
         
     | 
| 
      
 356 
     | 
    
         
            +
                        s.set :test_float, 9.9
         
     | 
| 
      
 357 
     | 
    
         
            +
                        s.set :test_string, 'hello'
         
     | 
| 
      
 358 
     | 
    
         
            +
                        s.set :test_sym, :hello
         
     | 
| 
      
 359 
     | 
    
         
            +
                        s.set :test_bool, false
         
     | 
| 
      
 360 
     | 
    
         
            +
                        
         
     | 
| 
      
 361 
     | 
    
         
            +
                        s.do(:segment, :tokenize)
         
     | 
| 
      
 362 
     | 
    
         
            +
                        
         
     | 
| 
      
 363 
     | 
    
         
            +
                        s.serialize(ser, :file => f)
         
     | 
| 
      
 364 
     | 
    
         
            +
                        
         
     | 
| 
       335 
365 
     | 
    
         
             
                        d = Treat::Entities::Document.build(f)
         
     | 
| 
      
 366 
     | 
    
         
            +
                      
         
     | 
| 
      
 367 
     | 
    
         
            +
                        d.test_int.should eql 9
         
     | 
| 
      
 368 
     | 
    
         
            +
                        d.test_float.should eql 9.9
         
     | 
| 
      
 369 
     | 
    
         
            +
                        d.test_string.should eql 'hello'
         
     | 
| 
      
 370 
     | 
    
         
            +
                        d.test_sym.should eql :hello
         
     | 
| 
      
 371 
     | 
    
         
            +
                        d.test_bool.should eql false
         
     | 
| 
      
 372 
     | 
    
         
            +
                      
         
     | 
| 
       336 
373 
     | 
    
         
             
                        d.to_s.should eql @txt
         
     | 
| 
       337 
374 
     | 
    
         
             
                        d.size.should eql s.size
         
     | 
| 
      
 375 
     | 
    
         
            +
                      
         
     | 
| 
       338 
376 
     | 
    
         
             
                        d.token_count.should eql s.token_count
         
     | 
| 
       339 
377 
     | 
    
         
             
                        d.tokens[0].id.should eql s.tokens[0].id
         
     | 
| 
      
 378 
     | 
    
         
            +
                      
         
     | 
| 
       340 
379 
     | 
    
         
             
                        File.delete(f)
         
     | 
| 
       341 
380 
     | 
    
         
             
                      end
         
     | 
| 
       342 
     | 
    
         
            -
             
     | 
| 
      
 381 
     | 
    
         
            +
                    
         
     | 
| 
       343 
382 
     | 
    
         
             
                    end
         
     | 
| 
       344 
383 
     | 
    
         | 
| 
       345 
384 
     | 
    
         
             
                  end
         
     | 
| 
       346 
385 
     | 
    
         | 
| 
       347 
386 
     | 
    
         
             
                end
         
     | 
| 
       348 
387 
     | 
    
         | 
| 
       349 
     | 
    
         
            -
                describe "#unserialize" do
         
     | 
| 
       350 
     | 
    
         
            -
                  
         
     | 
| 
       351 
     | 
    
         
            -
                end
         
     | 
| 
       352 
     | 
    
         
            -
                
         
     | 
| 
       353 
388 
     | 
    
         
             
              end
         
     | 
| 
       354 
389 
     | 
    
         | 
| 
       355 
390 
     | 
    
         
             
              describe "Extractors" do
         
     | 
    
        data/spec/sandbox.rb
    CHANGED
    
    | 
         @@ -1,116 +1,24 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            =begin
         
     | 
| 
       2 
1 
     | 
    
         
             
            require_relative '../lib/treat'
         
     | 
| 
       3 
2 
     | 
    
         | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
              *sentences.values
         
     | 
| 
       7 
     | 
    
         
            -
              .each_with_index
         
     | 
| 
       8 
     | 
    
         
            -
              .sort.reverse
         
     | 
| 
       9 
     | 
    
         
            -
              .map(&:last)
         
     | 
| 
       10 
     | 
    
         
            -
              .sort.take(n))
         
     | 
| 
       11 
     | 
    
         
            -
              .map(&:first)
         
     | 
| 
       12 
     | 
    
         
            -
            end
         
     | 
| 
      
 3 
     | 
    
         
            +
            s = Sentence "Barack Obama was killed last night."
         
     | 
| 
      
 4 
     | 
    
         
            +
            s.tokenize
         
     | 
| 
       13 
5 
     | 
    
         | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
            puts s.word_with_position(2).inspect
         
     | 
| 
       15 
7 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
                
         
     | 
| 
       18 
     | 
    
         
            -
                Treat.debug = true
         
     | 
| 
       19 
     | 
    
         
            -
                Treat.silence = true
         
     | 
| 
      
 8 
     | 
    
         
            +
            s.word_with_position(2).set :highlighted, 1
         
     | 
| 
       20 
9 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                
         
     | 
| 
       28 
     | 
    
         
            -
                c.each_document do |d|
         
     | 
| 
       29 
     | 
    
         
            -
                  
         
     | 
| 
       30 
     | 
    
         
            -
                   sentences = {}
         
     | 
| 
       31 
     | 
    
         
            -
                   
         
     | 
| 
       32 
     | 
    
         
            -
                  d.each_sentence do |sentence|
         
     | 
| 
       33 
     | 
    
         
            -
                    cx = sentence.classify(:training => context)
         
     | 
| 
       34 
     | 
    
         
            -
                    ct = sentence.classify(:training => content)
         
     | 
| 
       35 
     | 
    
         
            -
                    sentences[sentence] = cx[1] + ct[1]
         
     | 
| 
       36 
     | 
    
         
            -
                  end
         
     | 
| 
      
 10 
     | 
    
         
            +
            cl = Treat::Classification.new(
         
     | 
| 
      
 11 
     | 
    
         
            +
              :word,
         
     | 
| 
      
 12 
     | 
    
         
            +
              [[:position, 0]],
         
     | 
| 
      
 13 
     | 
    
         
            +
              :highlighted,
         
     | 
| 
      
 14 
     | 
    
         
            +
              0
         
     | 
| 
      
 15 
     | 
    
         
            +
            )
         
     | 
| 
       37 
16 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
                  puts d.titles[0].to_s
         
     | 
| 
       40 
     | 
    
         
            -
                  puts
         
     | 
| 
       41 
     | 
    
         
            -
                
         
     | 
| 
       42 
     | 
    
         
            -
                  puts extract(sentences, 5).map { |p| p.to_s }.join(' ')
         
     | 
| 
       43 
     | 
    
         
            -
                  
         
     | 
| 
       44 
     | 
    
         
            -
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
            data_set = s.export(cl)
         
     | 
| 
       45 
18 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
      
 19 
     | 
    
         
            +
            s2 = Sentence ''
         
     | 
| 
      
 20 
     | 
    
         
            +
            w = Word 'Hello'
         
     | 
| 
      
 21 
     | 
    
         
            +
            s2 << w
         
     | 
| 
      
 22 
     | 
    
         
            +
            w.set :position, 2
         
     | 
| 
       47 
23 
     | 
    
         | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                c.do :chunk, :segment, :tokenize, :tag, :name_tag, :tf_idf, :keywords
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
                # Topic word count ? Synonyms of keywords ?
         
     | 
| 
       52 
     | 
    
         
            -
                # Time expressions?
         
     | 
| 
       53 
     | 
    
         
            -
                classify_content = Treat::Classification.new(
         
     | 
| 
       54 
     | 
    
         
            -
                  :phrase, 
         
     | 
| 
       55 
     | 
    
         
            -
                  [:word_count, :number_count, 
         
     | 
| 
       56 
     | 
    
         
            -
                  :keyword_count, :name_tag_count], 
         
     | 
| 
       57 
     | 
    
         
            -
                  :has_key_content?
         
     | 
| 
       58 
     | 
    
         
            -
                )
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
                classify_context = Treat::Classification.new(
         
     | 
| 
       61 
     | 
    
         
            -
                  :phrase,
         
     | 
| 
       62 
     | 
    
         
            -
                  [:position,
         
     | 
| 
       63 
     | 
    
         
            -
                  :position_from_end,
         
     | 
| 
       64 
     | 
    
         
            -
                  :type_of_parent_zone,
         
     | 
| 
       65 
     | 
    
         
            -
                  :value_of_first_word,
         
     | 
| 
       66 
     | 
    
         
            -
                  :tag_of_first_word
         
     | 
| 
       67 
     | 
    
         
            -
                  ],
         
     | 
| 
       68 
     | 
    
         
            -
                  :has_key_context?,
         
     | 
| 
       69 
     | 
    
         
            -
                  false,
         
     | 
| 
       70 
     | 
    
         
            -
                  :discrete
         
     | 
| 
       71 
     | 
    
         
            -
                )
         
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
                c.each_sentence do |s|
         
     | 
| 
       74 
     | 
    
         
            -
                  puts s.to_s
         
     | 
| 
       75 
     | 
    
         
            -
                  a = STDIN.gets.to_s.strip
         
     | 
| 
       76 
     | 
    
         
            -
                  if a == ''
         
     | 
| 
       77 
     | 
    
         
            -
                    s.set :has_key_content?, false
         
     | 
| 
       78 
     | 
    
         
            -
                    s.set :has_key_context?, false
         
     | 
| 
       79 
     | 
    
         
            -
                  else
         
     | 
| 
       80 
     | 
    
         
            -
                    s.set :has_key_content?, true
         
     | 
| 
       81 
     | 
    
         
            -
                    s.set :has_key_context?, true
         
     | 
| 
       82 
     | 
    
         
            -
                  end
         
     | 
| 
       83 
     | 
    
         
            -
                end
         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                context = c.export(classify_context)
         
     | 
| 
       86 
     | 
    
         
            -
                content = c.export(classify_content)
         
     | 
| 
       87 
     | 
    
         
            -
                
         
     | 
| 
       88 
     | 
    
         
            -
                context.save('economist-context.yml')
         
     | 
| 
       89 
     | 
    
         
            -
                content.save('economist-content.yml')
         
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
                context = Treat::DataSet.open('economist-context.yml')
         
     | 
| 
       92 
     | 
    
         
            -
                content = Treat::DataSet.open('economist-content.yml')
         
     | 
| 
       93 
     | 
    
         
            -
                  
         
     | 
| 
       94 
     | 
    
         
            -
                c.each_document do |d|
         
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
                  sentences = {}
         
     | 
| 
       97 
     | 
    
         
            -
                
         
     | 
| 
       98 
     | 
    
         
            -
                  d.each_sentence do |sentence|
         
     | 
| 
       99 
     | 
    
         
            -
                    cx = sentence.classify(:training => context)
         
     | 
| 
       100 
     | 
    
         
            -
                    ct = sentence.classify(:training => content)
         
     | 
| 
       101 
     | 
    
         
            -
                    sentences[sentence] = cx[1] + ct[1]
         
     | 
| 
       102 
     | 
    
         
            -
                  end
         
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
                  puts
         
     | 
| 
       105 
     | 
    
         
            -
                  puts d.titles[0].to_s
         
     | 
| 
       106 
     | 
    
         
            -
                  puts
         
     | 
| 
       107 
     | 
    
         
            -
                
         
     | 
| 
       108 
     | 
    
         
            -
                  puts extract(sentences, 5).map { |p| p.to_s }.join(' ')
         
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
                end
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
             
     | 
| 
       113 
     | 
    
         
            -
              end
         
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
            end
         
     | 
| 
       116 
     | 
    
         
            -
            =end
         
     | 
| 
      
 24 
     | 
    
         
            +
            puts w.classify(:mlp, :training => data_set).inspect
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: treat
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.0.2
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,7 +9,7 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2012-04- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2012-04-14 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: rubyzip
         
     | 
| 
         @@ -75,8 +75,7 @@ dependencies: 
     | 
|
| 
       75 
75 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
       76 
76 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       77 
77 
     | 
    
         
             
                    version: 0.9.2
         
     | 
| 
       78 
     | 
    
         
            -
            description: ! ' Treat is a  
     | 
| 
       79 
     | 
    
         
            -
              and natural language processing. '
         
     | 
| 
      
 78 
     | 
    
         
            +
            description: ! ' Treat is a full-fledged natural language processing toolkit for Ruby. '
         
     | 
| 
       80 
79 
     | 
    
         
             
            email:
         
     | 
| 
       81 
80 
     | 
    
         
             
            - louis.mullie@gmail.com
         
     | 
| 
       82 
81 
     | 
    
         
             
            executables: []
         
     | 
| 
         @@ -84,6 +83,7 @@ extensions: [] 
     | 
|
| 
       84 
83 
     | 
    
         
             
            extra_rdoc_files: []
         
     | 
| 
       85 
84 
     | 
    
         
             
            files:
         
     | 
| 
       86 
85 
     | 
    
         
             
            - lib/treat/ai/classifiers/id3.rb
         
     | 
| 
      
 86 
     | 
    
         
            +
            - lib/treat/ai/classifiers/mlp.rb
         
     | 
| 
       87 
87 
     | 
    
         
             
            - lib/treat/ai.rb
         
     | 
| 
       88 
88 
     | 
    
         
             
            - lib/treat/categories.rb
         
     | 
| 
       89 
89 
     | 
    
         
             
            - lib/treat/categorizable.rb
         
     | 
| 
         @@ -128,6 +128,7 @@ files: 
     | 
|
| 
       128 
128 
     | 
    
         
             
            - lib/treat/formatters/readers/pdf.rb
         
     | 
| 
       129 
129 
     | 
    
         
             
            - lib/treat/formatters/readers/txt.rb
         
     | 
| 
       130 
130 
     | 
    
         
             
            - lib/treat/formatters/readers/xml.rb
         
     | 
| 
      
 131 
     | 
    
         
            +
            - lib/treat/formatters/serializers/mongo.rb
         
     | 
| 
       131 
132 
     | 
    
         
             
            - lib/treat/formatters/serializers/xml.rb
         
     | 
| 
       132 
133 
     | 
    
         
             
            - lib/treat/formatters/serializers/yaml.rb
         
     | 
| 
       133 
134 
     | 
    
         
             
            - lib/treat/formatters/unserializers/autoselect.rb
         
     | 
| 
         @@ -244,5 +245,5 @@ rubyforge_project: 
     | 
|
| 
       244 
245 
     | 
    
         
             
            rubygems_version: 1.8.21
         
     | 
| 
       245 
246 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       246 
247 
     | 
    
         
             
            specification_version: 3
         
     | 
| 
       247 
     | 
    
         
            -
            summary:  
     | 
| 
      
 248 
     | 
    
         
            +
            summary: Text Retrieval, Extraction and Annotation Toolkit.
         
     | 
| 
       248 
249 
     | 
    
         
             
            test_files: []
         
     |