RubyGems - treat - Versions diffs - 1.1.0 → 1.1.1 - Mend

treat 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

data/LICENSE +1 -1
data/README.md +3 -3
data/lib/treat/config.rb +10 -0
data/lib/treat/core/data_set.rb +80 -32
data/lib/treat/core/feature.rb +35 -0
data/lib/treat/core/problem.rb +43 -0
data/lib/treat/core/question.rb +27 -0
data/lib/treat/entities/abilities/buildable.rb +5 -3
data/lib/treat/entities/abilities/exportable.rb +4 -4
data/lib/treat/entities/collection.rb +1 -1
data/lib/treat/entities/document.rb +1 -1
data/lib/treat/entities/group.rb +8 -5
data/lib/treat/entities/section.rb +1 -1
data/lib/treat/entities/token.rb +20 -8
data/lib/treat/entities/zone.rb +6 -5
data/lib/treat/loaders/linguistics.rb +18 -19
data/lib/treat/loaders/stanford.rb +3 -2
data/lib/treat/version.rb +1 -1
data/lib/treat/workers/extractors/language/what_language.rb +53 -57
data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -5
data/lib/treat/workers/formatters/serializers/mongo.rb +33 -27
data/lib/treat/workers/formatters/unserializers/mongo.rb +14 -36
data/lib/treat/workers/learners/classifiers/id3.rb +4 -5
data/lib/treat/workers/learners/classifiers/mlp.rb +1 -1
data/lib/treat/workers.rb +1 -1
data/spec/entity.rb +7 -5
data/spec/phrase.rb +2 -2
data/spec/zone.rb +2 -3
metadata +37 -15
data/bin/stanford/bridge.jar +0 -0
data/bin/stanford/joda-time.jar +0 -0
data/bin/stanford/stanford-corenlp.jar +0 -0
data/bin/stanford/stanford-parser.jar +0 -0
data/bin/stanford/xom.jar +0 -0
data/files/21552208.html +0 -683
data/files/3_2_release_notes.html +0 -766
data/files/nethttp-cheat-sheet-2940.html +0 -395
data/files/weather-central-canada-heat-wave.html +0 -1370
data/lib/treat/core/classification.rb +0 -63
data/lib/treat/core/server.rb +0 -3
data/spec/sandbox.rb +0 -223
data/tmp/english.yaml +0 -10340

data/lib/treat/core/classification.rb DELETED Viewed

@@ -1,63 +0,0 @@
-class Treat::Core::Classification
-  attr_reader :types
-  attr_reader :features
-  attr_reader :question
-  attr_reader :labels
-  attr_reader :mode
-  attr_reader :default
-  def initialize(type_or_types, feature_or_features,
-    question, default = false, mode = :continuous)
-    @types, @features,
-    @question, @default =
-    [*type_or_types],
-    [*feature_or_features],
-    question, default
-    @mode = mode
-    @labels = []
-    @features.each do |cmd|
-      if cmd.is_a?(Array)
-        @labels << cmd[0]
-      else
-        @labels << cmd
-      end
-    end
-  end
-  def export_item(e, include_question = true)
-    line = []
-    @features.each do |cmd|
-      dflt = nil
-      begin
-        if cmd.is_a?(Array)
-          if cmd.size == 3
-            r = cmd[1].call(e)
-            dflt = cmd[2]
-            line << (r ? r : dflt)
-          elsif cmd.size == 2
-            r = e.send(cmd[0])
-            dflt = cmd[1]
-            line << (r ? r : dflt)
-          end
-        else
-          line << e.send(cmd)
-        end
-      end
-    end
-    if include_question
-      if e.has?(@question)
-        line << e.get(@question)
-      else
-        line << @default
-      end
-    end
-    line
-  end
-end

data/lib/treat/core/server.rb DELETED Viewed

@@ -1,3 +0,0 @@
-module Treat::Core::Server
-  # To implement.
-end

data/spec/sandbox.rb DELETED Viewed

@@ -1,223 +0,0 @@
-#encoding: utf-8
-require_relative '../lib/treat'
-require 'ruby-prof'
-Treat.databases.mongo.db = 'test2_treat'
-d = Document 'merkozy_rides_again.txt'
-d.do :chunk, :segment, :tokenize, :category, :tag
-d.serialize :mongo
-Treat::Entities::Document.from_db(:mongo, id: d.id, stop_at: :sentence).print_tree
-=begin
-d = Document 'http://www.cbc.ca/news/canada/story/2012/07/06/weather-central-canada-heat-wave.html'
-d.do :chunk, :segment, :tokenize, :tag, :category
-d.serialize :mongo, db: "test_treat"
-d2 = Treat::Entities::Document.from_db(:mongo, id: d.id)
-puts d2.inspect
-abort
-require 'benchmark'
-Benchmark.bm do |x|
-  x.report "Mongo serialization" do
-    10.times do
-      d.serialize :mongo, db: "test_treat"
-    end
-  end
-  x.report "Mongo deserialization" do
-    1.times do
-      Treat::Entities::Document.from_db(:mongo, id: d.id)
-    end
-  end
-end
-=end
-=begin
-f = Treat.paths.spec + 'samples/mathematicians/leibniz.txt'
-d = Treat::Entities::Document.build(f)
-d.do :chunk, :segment
-d.serialize :mongo, db: 'testing1234'
-d2 = Treat::Entities::Document.from_db(:mongo, db: 'testing1234', id: d.id)
-puts d2.to_s
-puts d2.print_tree
-=end
-=begin
-Treat.databases.mongo.db = 'treat_testing'
-p = Phrase 'this is'
-p.set :tag, 'VP'
-w = Word 'this'
-w.set :category, :determiner
-w2 = Word 'is'
-w2.set :category, 'verb'
-p << w
-p << w2
-p.serialize :mongo
-p2 = Phrase "#{p.id}.mongo"
-p2.print_tree
-=end
-=begin
-entity = Treat::Entities::Entity.create(
-  id: 1,
-  value: 'test',
-  children: [1, 2, 3],
-  features: [a: 'a', b: 'b', c: 'c']
-)
-entity.save
-=end
-w = Word 'hello'
-=begin
-require_relative '../lib/treat/loaders/stanford'
-Treat::Loaders::Stanford.model_path = '/ruby/stanford/models/'
-Treat::Loaders::Stanford.jar_path = '/ruby/stanford/bin/'
-class Treat::Entities::Sentence
-  def long_word_count
-    i = 0
-    each_word do |word|
-      i += 1 if word.syllable_count > 3
-    end
-    i
-  end
-  def flesch_kincaid
-    syllable_count / word_count
-  end
-  def syllable_count
-    c = 0
-    each_word do |word|
-      c += word.syllable_count
-    end
-    c
-  end
-end
-class Treat::Entities::Word
-  def syllable_count
-    w = to_s.downcase
-    return 1 if w.length <= 3
-    w.sub!(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '')
-    w.sub!(/^y/, '')
-    w.scan(/[aeiouy]{1,2}/).size
-  end
-end
-c = Collection Treat.paths.spec + 'samples/kant'
-d = Document Treat.paths.spec + 'samples/kant/kant_enlightnement.txt'
-d.do :chunk, :segment, :tokenize, :tag, :category, :name_tag
-# Position of sentence in containers - clustering??
-d.each_sentence do |s|
-  s.set :section_p, (s.parent_section.position.to_f / s.parent_document.children.size.to_f).round(2)
-  s.set :zone_p, (s.parent_zone.position.to_f / s.parent_section.children.size.to_f).round(2)
-  s.set :sentence_p, (s.position.to_f / s.parent_zone.children.size.to_f).round(2)
-end
-# Part of speech partitionning of the sentence
-d.each_sentence do |s|
-   s.set :noun_density, (s.noun_count.to_f / (s.word_count + 1).to_f).round(2)
-   s.set :verb_density, (s.verb_count.to_f / (s.word_count + 1).to_f).round(2)
-   s.set :adjective_density, (s.adjective_count.to_f / (s.word_count + 1).to_f).round(2)
-   s.set :adverb_density, (s.adverb_count.to_f / (s.word_count + 1).to_f).round(2)
-end
-# Sentence readability -> length and long words.
-d.each_sentence do |s|
-  s.set :word_count, s.word_count
-  s.set :long_word_count, s.long_word_count
-  s.set :flesch_kincaid, s.flesch_kincaid
-end
-# Domain specificity -> named entities according to domain.
-d.each_sentence do |s|
-  s.set :person_count, s.entities_with_feature(:name_tag, 'person').size
-  s.set :time_count, s.entities_with_feature(:name_tag, 'time').size
-  s.set :location_count, s.entities_with_feature(:name_tag, 'location').size
-  s.set :number_count, s.number_count
-  puts s.inspect
-end
-d.each_sentence do |s|
-  if Random.rand() >= 0.5
-    s.set :golden, true
-  else
-    s.set :golden, false
-  end
-end
-golden = []
-not_golden = []
-d.each_sentence do |s|
-  if s.golden
-    golden << s
-  else
-    not_golden << s
-  end
-end
-i = 0
-golden.each do |s|
-  puts s.sentence_p.to_s + ' ' + not_golden[i].sentence_p.to_s
-  i += 1
-end
-=end
-=begin
-d = Document 'http://www.cbc.ca/news/canada/montreal/story/2012/06/04/montreal-magnotta-search.html'
-d.do :chunk, :segment
-d.each_zone do |z|
-  puts '-------' + z.type.to_s
-  z.do tokenize: :ptb
-  z.each_sentence do |s|
-    puts s.to_s
-  end
-  #puts z.to_s
-  puts '-------'
-end
-abort
-Treat::Databases.connect :mongo
-p = Phrase ''
-w = Word 'test'
-p << w
-p.print_tree
-p.serialize :mongo, :db => 'treat'
-p2 = Treat::Workers::Formatters::Unserializers::Mongo.unserialize(Treat::Entities::Phrase.new('', p.id))
-p2.print_tree
-=end