treat 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/LICENSE +1 -1
  2. data/lib/treat/config/core/acronyms.rb +2 -1
  3. data/lib/treat/config/libraries/punkt.rb +1 -0
  4. data/lib/treat/config/libraries/reuters.rb +1 -0
  5. data/lib/treat/core/data_set.rb +125 -66
  6. data/lib/treat/core/export.rb +59 -0
  7. data/lib/treat/core/problem.rb +101 -18
  8. data/lib/treat/core/question.rb +23 -7
  9. data/lib/treat/entities/abilities/iterable.rb +7 -3
  10. data/lib/treat/entities/abilities/stringable.rb +5 -5
  11. data/lib/treat/entities/collection.rb +10 -6
  12. data/lib/treat/entities/entity.rb +1 -1
  13. data/lib/treat/helpers/objtohash.rb +8 -0
  14. data/lib/treat/loaders/stanford.rb +10 -8
  15. data/lib/treat/version.rb +1 -1
  16. data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
  17. data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
  18. data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
  19. data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
  20. data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
  21. data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
  22. data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
  23. data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
  24. data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
  25. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
  26. data/lib/treat/workers/processors/parsers/enju.rb +17 -17
  27. data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
  28. data/spec/collection.rb +3 -3
  29. data/spec/core.rb +430 -21
  30. data/spec/document.rb +1 -1
  31. data/spec/entity.rb +2 -8
  32. data/spec/helper.rb +34 -0
  33. data/spec/phrase.rb +1 -1
  34. data/spec/sandbox.rb +31 -8
  35. data/spec/token.rb +1 -1
  36. data/spec/treat.rb +1 -1
  37. data/spec/word.rb +1 -1
  38. data/spec/zone.rb +1 -1
  39. metadata +9 -8
  40. data/files/3_2_release_notes.html +0 -766
  41. data/files/bc-monty-robinson-sentencing.html +0 -1569
  42. data/files/syria-aleppo-clashes.html +0 -1376
  43. data/lib/treat/core/feature.rb +0 -42
  44. data/lib/treat/core/node.rb +0 -251
  45. data/spec/node.rb +0 -117
@@ -33,14 +33,14 @@ class Treat::Workers::Formatters::Serializers::XML
33
33
  end
34
34
  end
35
35
  ############ To be refactored
36
- unless entity.dependencies.empty?
37
- attributes << "dependencies='"
36
+ unless entity.edges.empty?
37
+ attributes << "edges='"
38
38
  a = []
39
- entity.dependencies.each do |dependency|
40
- a << ("{target: #{dependency.target}, "+
41
- "type: #{dependency.type}, " +
42
- "directed: #{dependency.directed}, " +
43
- "direction: #{dependency.direction}}" )
39
+ entity.edges.each do |edge|
40
+ a << ("{target: #{edge.target}, "+
41
+ "type: #{edge.type}, " +
42
+ "directed: #{edge.directed}, " +
43
+ "direction: #{edge.direction}}" )
44
44
  end
45
45
  # Structs.
46
46
  attributes << a.join(',') + "'"
@@ -22,7 +22,7 @@ module Treat::Workers::Formatters::Unserializers::Mongo
22
22
  entity.type.to_s.capitalize.intern).superclass).downcase
23
23
  supertype = entity.type.to_s if supertype == 'entity'
24
24
  supertypes = supertype + 's'
25
-
25
+ supertypes = 'documents' if entity.type == :collection
26
26
  coll = @@database.collection(supertypes)
27
27
  records = coll.find(selector).to_a
28
28
 
@@ -30,16 +30,24 @@ module Treat::Workers::Formatters::Unserializers::Mongo
30
30
  raise Treat::Exception,
31
31
  "Couldn't find any records using " +
32
32
  "selector #{selector.inspect}."
33
- elsif records.size == 1
34
- self.do_unserialize(
35
- records.first, options)
36
- else
37
- matches = []
33
+ end
34
+
35
+ if entity.type == :document
36
+ if records.size == 1
37
+ self.do_unserialize(
38
+ records.first, options)
39
+ else
40
+ raise Treat::Exception,
41
+ "More than one document matched" +
42
+ "your selector #{selector.inspect}."
43
+ end
44
+ elsif entity.type == :collection
45
+ collection = Treat::Entities::Collection.new
38
46
  records.each do |record|
39
- matches << self.
47
+ collection << self.
40
48
  do_unserialize(record, options)
41
49
  end
42
- matches
50
+ collection
43
51
  end
44
52
 
45
53
  end
@@ -35,13 +35,13 @@ module Treat::Workers::Formatters::Unserializers::XML
35
35
 
36
36
  id = nil; value = ''
37
37
  attributes = {}
38
- dependencies = []
38
+ edges = []
39
39
 
40
40
  unless xml_reader.attributes.size == 0
41
41
  xml_reader.attributes.each_pair do |k,v|
42
42
  if k == 'id'
43
43
  id = v.to_i
44
- elsif k == 'dependencies'
44
+ elsif k == 'edges'
45
45
  a = v.split('--')
46
46
  a.each do |b|
47
47
  c = b.split(';')
@@ -54,7 +54,7 @@ module Treat::Workers::Formatters::Unserializers::XML
54
54
  end
55
55
 
56
56
  target, type, directed, direction = *vals
57
- dependencies << [
57
+ edges << [
58
58
  target.to_i,
59
59
  type,
60
60
  (directed == 'true' ? true : false),
@@ -87,8 +87,8 @@ module Treat::Workers::Formatters::Unserializers::XML
87
87
  end
88
88
  current_element.features = attributes
89
89
  current_element.features = attributes
90
- dependencies.each do |dependency|
91
- target, type, directed, direction = *dependency
90
+ edges.each do |edge|
91
+ target, type, directed, direction = *edge
92
92
  current_element.link(target, type, directed, direction)
93
93
  end
94
94
  else
@@ -106,18 +106,18 @@ class Treat::Workers::Formatters::Visualizers::DOT
106
106
  string << "\n#{entity.parent.id} -- #{entity.id};"
107
107
  end
108
108
  end
109
- # Dependencies.
110
- if entity.has_dependencies?
111
- entity.dependencies.each do |dependency|
109
+ # edges.
110
+ if entity.has_edges?
111
+ entity.edges.each do |edge|
112
112
  dir = ''
113
- if dependency.directed == true
114
- dir = dependency.direction == 1 ? 'forward' : 'back'
113
+ if edge.directed == true
114
+ dir = edge.direction == 1 ? 'forward' : 'back'
115
115
  dir = ",dir=#{dir}"
116
116
  else
117
117
  dir = ",dir=both"
118
118
  end
119
- string << "\n#{entity.id} -- #{dependency.target}"
120
- string << "[label=#{dependency.type}#{dir}]"
119
+ string << "\n#{entity.id} -- #{edge.target}"
120
+ string << "[label=#{edge.type}#{dir}]"
121
121
  end
122
122
  end
123
123
  # Recurse.
@@ -11,8 +11,9 @@ class Treat::Workers::Learners::Classifiers::ID3
11
11
 
12
12
  if !@@classifiers[cl]
13
13
  dec_tree = DecisionTree::ID3Tree.new(
14
- cl.labels.map { |l| l.to_s }, set.items, cl.question.default,
15
- cl.question.type)
14
+ cl.feature_labels.map { |l| l.to_s },
15
+ set.items.map { |i| i[:features]},
16
+ cl.question.default, cl.question.type)
16
17
  dec_tree.train
17
18
  @@classifiers[cl] = dec_tree
18
19
  else
@@ -20,7 +21,7 @@ class Treat::Workers::Learners::Classifiers::ID3
20
21
  dec_tree.graph('testingbitch')
21
22
  end
22
23
  dec_tree.predict(
23
- cl.export_item(entity, false)
24
+ cl.export_features(entity, false)
24
25
  )
25
26
  end
26
27
 
@@ -0,0 +1,53 @@
1
+ class Treat::Workers::Learners::Classifiers::Linear
2
+
3
+ require 'linear'
4
+
5
+ @@classifiers = {}
6
+
7
+ DefaultOptions = {
8
+ bias: 1,
9
+ eps: 0.1,
10
+ solver_type: MCSVM_CS
11
+ }
12
+
13
+ def self.classify(entity, options = {})
14
+
15
+ options = DefaultOptions.merge(options)
16
+ set = options[:training]
17
+ problem = set.problem
18
+
19
+ if !@@classifiers[problem]
20
+ labels = problem.question.labels
21
+ unless labels
22
+ raise Treat::Exception,
23
+ "LibLinear requires that you provide the possible " +
24
+ "labels to assign to classification items when " +
25
+ "specifying the question."
26
+ end
27
+ param = LParameter.new
28
+ param.solver_type = options[:solver_type]
29
+ param.eps = options[:eps]
30
+ bias = options[:bias]
31
+ data = set.items.map do |item|
32
+ self.array_to_hash(item[:features])
33
+ end
34
+ prob = LProblem.new(labels, data, bias)
35
+ @@classifiers[problem] =
36
+ LModel.new(prob, param)
37
+ end
38
+
39
+ @@classifiers[problem].predict(
40
+ self.array_to_hash(problem.
41
+ export_features(entity, false)))
42
+
43
+ end
44
+
45
+ def self.array_to_hash(array)
46
+ hash = {}
47
+ 0.upto(array.length - 1) do |i|
48
+ hash[i] = array[i]
49
+ end
50
+ hash
51
+ end
52
+
53
+ end
@@ -11,11 +11,11 @@ class Treat::Workers::Learners::Classifiers::MLP
11
11
  cl = set.problem
12
12
 
13
13
  if !@@mlps[cl]
14
- net = Ai4r::NeuralNetwork::
15
- Backpropagation.new([cl.labels.size, 3, 1])
14
+ net = Ai4r::NeuralNetwork::Backpropagation.new(
15
+ [cl.feature_labels.size, 3, 1])
16
16
  set.items.each do |item|
17
- inputs = item[0..-2]
18
- outputs = [item[-1]]
17
+ inputs = item[:features][0..-2]
18
+ outputs = [item[:features][-1]]
19
19
  net.train(inputs, outputs)
20
20
  end
21
21
  @@mlps[cl] = net
@@ -23,7 +23,7 @@ class Treat::Workers::Learners::Classifiers::MLP
23
23
  net = @@mlps[cl]
24
24
  end
25
25
 
26
- net.eval(cl.export_item(entity, false))[0]
26
+ net.eval(cl.export_features(entity, false))[0]
27
27
 
28
28
  end
29
29
 
@@ -0,0 +1,31 @@
1
+ class Treat::Workers::Learners::Classifiers::SVM
2
+
3
+ require 'svm'
4
+
5
+ @@classifiers = {}
6
+
7
+ def self.classify(entity, options = {})
8
+
9
+ set = options[:training]
10
+ problem = set.problem
11
+
12
+ if !@@classifiers[problem]
13
+ labels = problem.question.labels
14
+ unless labels
15
+ raise Treat::Exception,
16
+ "LibSVM requires that you provide the possible " +
17
+ "labels to assign to classification items when " +
18
+ "specifying the question."
19
+ end
20
+ data = set.items.map { |item| item[:features] }
21
+ prob = Problem.new(labels, data)
22
+ param = Parameter.new(:kernel_type => LINEAR, :C => 10)
23
+ @@classifiers[problem] = Model.new(prob, param)
24
+ end
25
+
26
+ @@classifiers[problem].predict_probability(
27
+ problem.export_features(entity, false))[0]
28
+
29
+ end
30
+
31
+ end
@@ -53,8 +53,10 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
53
53
  def self.init_tagger(language)
54
54
  Treat::Loaders::Stanford.load(language)
55
55
  model = StanfordCoreNLP::Config::Models[:pos][language]
56
- model = Treat.paths.models + 'stanford/' +
57
- StanfordCoreNLP::Config::ModelFolders[:pos] + model
56
+ model_path = Treat.libraries.stanford.model_path ||
57
+ Treat.paths.models + 'stanford/'
58
+ model = model_path + StanfordCoreNLP::
59
+ Config::ModelFolders[:pos] + model
58
60
  @@taggers[language] ||=
59
61
  StanfordCoreNLP::MaxentTagger.new(model)
60
62
  end
@@ -38,7 +38,7 @@ module Treat::Workers::Processors::Parsers::Enju
38
38
  val = entity.to_s
39
39
 
40
40
  @@id_table = {}
41
- @@dependencies_table = {}
41
+ @@edges_table = {}
42
42
 
43
43
  stdin, stdout = proc
44
44
  text, remove_last = valid_text(val)
@@ -63,7 +63,7 @@ module Treat::Workers::Processors::Parsers::Enju
63
63
  end
64
64
 
65
65
  link_heads(entity)
66
- add_dependencies(entity)
66
+ add_edges(entity)
67
67
  end
68
68
 
69
69
  # Return the process running Enju.
@@ -102,12 +102,12 @@ module Treat::Workers::Processors::Parsers::Enju
102
102
  pd = cd
103
103
  next
104
104
  end
105
- # Get and format attributes and dependencies.
105
+ # Get and format attributes and edges.
106
106
  attributes = reader.attributes
107
107
  id = attributes.delete('id')
108
- new_attr = {}; dependencies = {}
108
+ new_attr = {}; edges = {}
109
109
  unless attributes.size == 0
110
- new_attr, dependencies =
110
+ new_attr, edges =
111
111
  cleanup_attributes(reader.name, attributes)
112
112
  end
113
113
  # Create the appropriate entity for the
@@ -117,17 +117,17 @@ module Treat::Workers::Processors::Parsers::Enju
117
117
  when 'sentence'
118
118
  entity = Treat::Entities::Sentence.new('')
119
119
  @@id_table[id] = entity.id
120
- @@dependencies_table[entity.id] = dependencies
120
+ @@edges_table[entity.id] = edges
121
121
  entity.features = new_attr
122
122
  when 'cons'
123
123
  entity = entity <<
124
124
  Treat::Entities::Phrase.new('')
125
125
  @@id_table[id] = entity.id
126
- @@dependencies_table[entity.id] = dependencies
126
+ @@edges_table[entity.id] = edges
127
127
  entity.features = new_attr
128
128
  when 'tok'
129
129
  tmp_attributes = new_attr
130
- tmp_dependencies = dependencies
130
+ tmp_edges = edges
131
131
  else
132
132
  current_value = reader.value.gsub(/\s+/, "")
133
133
  unless current_value.size == 0
@@ -136,7 +136,7 @@ module Treat::Workers::Processors::Parsers::Enju
136
136
  if entity.is_a?(Treat::Entities::Word)
137
137
  entity.features = tmp_attributes
138
138
  @@id_table[id] = entity.id
139
- @@dependencies_table[entity.id] = tmp_dependencies
139
+ @@edges_table[entity.id] = tmp_edges
140
140
  else
141
141
  # Do something useful here
142
142
  entity.set :tag, 'SYM'
@@ -179,15 +179,15 @@ module Treat::Workers::Processors::Parsers::Enju
179
179
  end
180
180
  end
181
181
 
182
- # Add dependencies a posteriori to a parsed entity.
183
- def self.add_dependencies(entity2)
182
+ # Add edges a posteriori to a parsed entity.
183
+ def self.add_edges(entity2)
184
184
 
185
185
  entity2.each_entity(:word, :phrase) do |entity|
186
- @@dependencies_table.each_pair do |id, dependencies|
187
- next if dependencies.nil?
186
+ @@edges_table.each_pair do |id, edges|
187
+ next if edges.nil?
188
188
  entity = entity2.root.find(id)
189
189
  next if entity.nil?
190
- dependencies.each_pair do |argument, type|
190
+ edges.each_pair do |argument, type|
191
191
  # Skip this argument if we
192
192
  # don't know the target node.
193
193
  next if argument == 'unk'
@@ -205,7 +205,7 @@ module Treat::Workers::Processors::Parsers::Enju
205
205
  def self.cleanup_attributes(name, attributes)
206
206
 
207
207
  new_attr = {}
208
- dependencies = {}
208
+ edges = {}
209
209
  pred = attributes.delete('pred')
210
210
 
211
211
  attributes.each_pair do |attribute2, value|
@@ -214,7 +214,7 @@ module Treat::Workers::Processors::Parsers::Enju
214
214
 
215
215
  if attribute == 'arg1' ||
216
216
  attribute == 'arg2'
217
- dependencies[value] = pred
217
+ edges[value] = pred
218
218
  next
219
219
  end
220
220
 
@@ -256,7 +256,7 @@ module Treat::Workers::Processors::Parsers::Enju
256
256
  new_attr.delete :base
257
257
  end
258
258
 
259
- return new_attr, dependencies
259
+ return new_attr, edges
260
260
 
261
261
  end
262
262
 
@@ -63,7 +63,9 @@ module Treat::Workers::Processors::Segmenters::Punkt
63
63
  if options[:model]
64
64
  model = options[:model]
65
65
  else
66
- model = "#{Treat.paths.models}punkt/#{lang}.yaml"
66
+ model_path = Treat.libraries.punkt.model_path ||
67
+ Treat.paths.models + 'punkt/'
68
+ model = model_path + "#{lang}.yaml"
67
69
  unless File.readable?(model)
68
70
  raise Treat::Exception,
69
71
  "Could not get the language model " +
data/spec/collection.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Collection do
4
4
 
@@ -29,7 +29,7 @@ describe Treat::Entities::Collection do
29
29
  f = Treat.paths.spec + 'samples/test'
30
30
  c = Treat::Entities::Collection.build(f)
31
31
  c << Treat::Entities::Document.new
32
- c.size.should eql 2
32
+ c.size.should eql 1
33
33
  FileUtils.rm_rf(f)
34
34
  end
35
35
  end
@@ -45,7 +45,7 @@ describe Treat::Entities::Collection do
45
45
  it "recursively searches the folder for " +
46
46
  "files and opens them into a collection of documents" do
47
47
  collection = Treat::Entities::Collection.build(@file)
48
- collection.size.should eql 6
48
+ collection.size.should eql 5
49
49
  end
50
50
 
51
51
  end