treat 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/LICENSE +1 -1
  2. data/lib/treat/config/core/acronyms.rb +2 -1
  3. data/lib/treat/config/libraries/punkt.rb +1 -0
  4. data/lib/treat/config/libraries/reuters.rb +1 -0
  5. data/lib/treat/core/data_set.rb +125 -66
  6. data/lib/treat/core/export.rb +59 -0
  7. data/lib/treat/core/problem.rb +101 -18
  8. data/lib/treat/core/question.rb +23 -7
  9. data/lib/treat/entities/abilities/iterable.rb +7 -3
  10. data/lib/treat/entities/abilities/stringable.rb +5 -5
  11. data/lib/treat/entities/collection.rb +10 -6
  12. data/lib/treat/entities/entity.rb +1 -1
  13. data/lib/treat/helpers/objtohash.rb +8 -0
  14. data/lib/treat/loaders/stanford.rb +10 -8
  15. data/lib/treat/version.rb +1 -1
  16. data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
  17. data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
  18. data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
  19. data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
  20. data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
  21. data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
  22. data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
  23. data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
  24. data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
  25. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
  26. data/lib/treat/workers/processors/parsers/enju.rb +17 -17
  27. data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
  28. data/spec/collection.rb +3 -3
  29. data/spec/core.rb +430 -21
  30. data/spec/document.rb +1 -1
  31. data/spec/entity.rb +2 -8
  32. data/spec/helper.rb +34 -0
  33. data/spec/phrase.rb +1 -1
  34. data/spec/sandbox.rb +31 -8
  35. data/spec/token.rb +1 -1
  36. data/spec/treat.rb +1 -1
  37. data/spec/word.rb +1 -1
  38. data/spec/zone.rb +1 -1
  39. metadata +9 -8
  40. data/files/3_2_release_notes.html +0 -766
  41. data/files/bc-monty-robinson-sentencing.html +0 -1569
  42. data/files/syria-aleppo-clashes.html +0 -1376
  43. data/lib/treat/core/feature.rb +0 -42
  44. data/lib/treat/core/node.rb +0 -251
  45. data/spec/node.rb +0 -117
@@ -33,14 +33,14 @@ class Treat::Workers::Formatters::Serializers::XML
33
33
  end
34
34
  end
35
35
  ############ To be refactored
36
- unless entity.dependencies.empty?
37
- attributes << "dependencies='"
36
+ unless entity.edges.empty?
37
+ attributes << "edges='"
38
38
  a = []
39
- entity.dependencies.each do |dependency|
40
- a << ("{target: #{dependency.target}, "+
41
- "type: #{dependency.type}, " +
42
- "directed: #{dependency.directed}, " +
43
- "direction: #{dependency.direction}}" )
39
+ entity.edges.each do |edge|
40
+ a << ("{target: #{edge.target}, "+
41
+ "type: #{edge.type}, " +
42
+ "directed: #{edge.directed}, " +
43
+ "direction: #{edge.direction}}" )
44
44
  end
45
45
  # Structs.
46
46
  attributes << a.join(',') + "'"
@@ -22,7 +22,7 @@ module Treat::Workers::Formatters::Unserializers::Mongo
22
22
  entity.type.to_s.capitalize.intern).superclass).downcase
23
23
  supertype = entity.type.to_s if supertype == 'entity'
24
24
  supertypes = supertype + 's'
25
-
25
+ supertypes = 'documents' if entity.type == :collection
26
26
  coll = @@database.collection(supertypes)
27
27
  records = coll.find(selector).to_a
28
28
 
@@ -30,16 +30,24 @@ module Treat::Workers::Formatters::Unserializers::Mongo
30
30
  raise Treat::Exception,
31
31
  "Couldn't find any records using " +
32
32
  "selector #{selector.inspect}."
33
- elsif records.size == 1
34
- self.do_unserialize(
35
- records.first, options)
36
- else
37
- matches = []
33
+ end
34
+
35
+ if entity.type == :document
36
+ if records.size == 1
37
+ self.do_unserialize(
38
+ records.first, options)
39
+ else
40
+ raise Treat::Exception,
41
+ "More than one document matched" +
42
+ "your selector #{selector.inspect}."
43
+ end
44
+ elsif entity.type == :collection
45
+ collection = Treat::Entities::Collection.new
38
46
  records.each do |record|
39
- matches << self.
47
+ collection << self.
40
48
  do_unserialize(record, options)
41
49
  end
42
- matches
50
+ collection
43
51
  end
44
52
 
45
53
  end
@@ -35,13 +35,13 @@ module Treat::Workers::Formatters::Unserializers::XML
35
35
 
36
36
  id = nil; value = ''
37
37
  attributes = {}
38
- dependencies = []
38
+ edges = []
39
39
 
40
40
  unless xml_reader.attributes.size == 0
41
41
  xml_reader.attributes.each_pair do |k,v|
42
42
  if k == 'id'
43
43
  id = v.to_i
44
- elsif k == 'dependencies'
44
+ elsif k == 'edges'
45
45
  a = v.split('--')
46
46
  a.each do |b|
47
47
  c = b.split(';')
@@ -54,7 +54,7 @@ module Treat::Workers::Formatters::Unserializers::XML
54
54
  end
55
55
 
56
56
  target, type, directed, direction = *vals
57
- dependencies << [
57
+ edges << [
58
58
  target.to_i,
59
59
  type,
60
60
  (directed == 'true' ? true : false),
@@ -87,8 +87,8 @@ module Treat::Workers::Formatters::Unserializers::XML
87
87
  end
88
88
  current_element.features = attributes
89
89
  current_element.features = attributes
90
- dependencies.each do |dependency|
91
- target, type, directed, direction = *dependency
90
+ edges.each do |edge|
91
+ target, type, directed, direction = *edge
92
92
  current_element.link(target, type, directed, direction)
93
93
  end
94
94
  else
@@ -106,18 +106,18 @@ class Treat::Workers::Formatters::Visualizers::DOT
106
106
  string << "\n#{entity.parent.id} -- #{entity.id};"
107
107
  end
108
108
  end
109
- # Dependencies.
110
- if entity.has_dependencies?
111
- entity.dependencies.each do |dependency|
109
+ # edges.
110
+ if entity.has_edges?
111
+ entity.edges.each do |edge|
112
112
  dir = ''
113
- if dependency.directed == true
114
- dir = dependency.direction == 1 ? 'forward' : 'back'
113
+ if edge.directed == true
114
+ dir = edge.direction == 1 ? 'forward' : 'back'
115
115
  dir = ",dir=#{dir}"
116
116
  else
117
117
  dir = ",dir=both"
118
118
  end
119
- string << "\n#{entity.id} -- #{dependency.target}"
120
- string << "[label=#{dependency.type}#{dir}]"
119
+ string << "\n#{entity.id} -- #{edge.target}"
120
+ string << "[label=#{edge.type}#{dir}]"
121
121
  end
122
122
  end
123
123
  # Recurse.
@@ -11,8 +11,9 @@ class Treat::Workers::Learners::Classifiers::ID3
11
11
 
12
12
  if !@@classifiers[cl]
13
13
  dec_tree = DecisionTree::ID3Tree.new(
14
- cl.labels.map { |l| l.to_s }, set.items, cl.question.default,
15
- cl.question.type)
14
+ cl.feature_labels.map { |l| l.to_s },
15
+ set.items.map { |i| i[:features]},
16
+ cl.question.default, cl.question.type)
16
17
  dec_tree.train
17
18
  @@classifiers[cl] = dec_tree
18
19
  else
@@ -20,7 +21,7 @@ class Treat::Workers::Learners::Classifiers::ID3
20
21
  dec_tree.graph('testingbitch')
21
22
  end
22
23
  dec_tree.predict(
23
- cl.export_item(entity, false)
24
+ cl.export_features(entity, false)
24
25
  )
25
26
  end
26
27
 
@@ -0,0 +1,53 @@
1
+ class Treat::Workers::Learners::Classifiers::Linear
2
+
3
+ require 'linear'
4
+
5
+ @@classifiers = {}
6
+
7
+ DefaultOptions = {
8
+ bias: 1,
9
+ eps: 0.1,
10
+ solver_type: MCSVM_CS
11
+ }
12
+
13
+ def self.classify(entity, options = {})
14
+
15
+ options = DefaultOptions.merge(options)
16
+ set = options[:training]
17
+ problem = set.problem
18
+
19
+ if !@@classifiers[problem]
20
+ labels = problem.question.labels
21
+ unless labels
22
+ raise Treat::Exception,
23
+ "LibLinear requires that you provide the possible " +
24
+ "labels to assign to classification items when " +
25
+ "specifying the question."
26
+ end
27
+ param = LParameter.new
28
+ param.solver_type = options[:solver_type]
29
+ param.eps = options[:eps]
30
+ bias = options[:bias]
31
+ data = set.items.map do |item|
32
+ self.array_to_hash(item[:features])
33
+ end
34
+ prob = LProblem.new(labels, data, bias)
35
+ @@classifiers[problem] =
36
+ LModel.new(prob, param)
37
+ end
38
+
39
+ @@classifiers[problem].predict(
40
+ self.array_to_hash(problem.
41
+ export_features(entity, false)))
42
+
43
+ end
44
+
45
+ def self.array_to_hash(array)
46
+ hash = {}
47
+ 0.upto(array.length - 1) do |i|
48
+ hash[i] = array[i]
49
+ end
50
+ hash
51
+ end
52
+
53
+ end
@@ -11,11 +11,11 @@ class Treat::Workers::Learners::Classifiers::MLP
11
11
  cl = set.problem
12
12
 
13
13
  if !@@mlps[cl]
14
- net = Ai4r::NeuralNetwork::
15
- Backpropagation.new([cl.labels.size, 3, 1])
14
+ net = Ai4r::NeuralNetwork::Backpropagation.new(
15
+ [cl.feature_labels.size, 3, 1])
16
16
  set.items.each do |item|
17
- inputs = item[0..-2]
18
- outputs = [item[-1]]
17
+ inputs = item[:features][0..-2]
18
+ outputs = [item[:features][-1]]
19
19
  net.train(inputs, outputs)
20
20
  end
21
21
  @@mlps[cl] = net
@@ -23,7 +23,7 @@ class Treat::Workers::Learners::Classifiers::MLP
23
23
  net = @@mlps[cl]
24
24
  end
25
25
 
26
- net.eval(cl.export_item(entity, false))[0]
26
+ net.eval(cl.export_features(entity, false))[0]
27
27
 
28
28
  end
29
29
 
@@ -0,0 +1,31 @@
1
+ class Treat::Workers::Learners::Classifiers::SVM
2
+
3
+ require 'svm'
4
+
5
+ @@classifiers = {}
6
+
7
+ def self.classify(entity, options = {})
8
+
9
+ set = options[:training]
10
+ problem = set.problem
11
+
12
+ if !@@classifiers[problem]
13
+ labels = problem.question.labels
14
+ unless labels
15
+ raise Treat::Exception,
16
+ "LibSVM requires that you provide the possible " +
17
+ "labels to assign to classification items when " +
18
+ "specifying the question."
19
+ end
20
+ data = set.items.map { |item| item[:features] }
21
+ prob = Problem.new(labels, data)
22
+ param = Parameter.new(:kernel_type => LINEAR, :C => 10)
23
+ @@classifiers[problem] = Model.new(prob, param)
24
+ end
25
+
26
+ @@classifiers[problem].predict_probability(
27
+ problem.export_features(entity, false))[0]
28
+
29
+ end
30
+
31
+ end
@@ -53,8 +53,10 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
53
53
  def self.init_tagger(language)
54
54
  Treat::Loaders::Stanford.load(language)
55
55
  model = StanfordCoreNLP::Config::Models[:pos][language]
56
- model = Treat.paths.models + 'stanford/' +
57
- StanfordCoreNLP::Config::ModelFolders[:pos] + model
56
+ model_path = Treat.libraries.stanford.model_path ||
57
+ Treat.paths.models + 'stanford/'
58
+ model = model_path + StanfordCoreNLP::
59
+ Config::ModelFolders[:pos] + model
58
60
  @@taggers[language] ||=
59
61
  StanfordCoreNLP::MaxentTagger.new(model)
60
62
  end
@@ -38,7 +38,7 @@ module Treat::Workers::Processors::Parsers::Enju
38
38
  val = entity.to_s
39
39
 
40
40
  @@id_table = {}
41
- @@dependencies_table = {}
41
+ @@edges_table = {}
42
42
 
43
43
  stdin, stdout = proc
44
44
  text, remove_last = valid_text(val)
@@ -63,7 +63,7 @@ module Treat::Workers::Processors::Parsers::Enju
63
63
  end
64
64
 
65
65
  link_heads(entity)
66
- add_dependencies(entity)
66
+ add_edges(entity)
67
67
  end
68
68
 
69
69
  # Return the process running Enju.
@@ -102,12 +102,12 @@ module Treat::Workers::Processors::Parsers::Enju
102
102
  pd = cd
103
103
  next
104
104
  end
105
- # Get and format attributes and dependencies.
105
+ # Get and format attributes and edges.
106
106
  attributes = reader.attributes
107
107
  id = attributes.delete('id')
108
- new_attr = {}; dependencies = {}
108
+ new_attr = {}; edges = {}
109
109
  unless attributes.size == 0
110
- new_attr, dependencies =
110
+ new_attr, edges =
111
111
  cleanup_attributes(reader.name, attributes)
112
112
  end
113
113
  # Create the appropriate entity for the
@@ -117,17 +117,17 @@ module Treat::Workers::Processors::Parsers::Enju
117
117
  when 'sentence'
118
118
  entity = Treat::Entities::Sentence.new('')
119
119
  @@id_table[id] = entity.id
120
- @@dependencies_table[entity.id] = dependencies
120
+ @@edges_table[entity.id] = edges
121
121
  entity.features = new_attr
122
122
  when 'cons'
123
123
  entity = entity <<
124
124
  Treat::Entities::Phrase.new('')
125
125
  @@id_table[id] = entity.id
126
- @@dependencies_table[entity.id] = dependencies
126
+ @@edges_table[entity.id] = edges
127
127
  entity.features = new_attr
128
128
  when 'tok'
129
129
  tmp_attributes = new_attr
130
- tmp_dependencies = dependencies
130
+ tmp_edges = edges
131
131
  else
132
132
  current_value = reader.value.gsub(/\s+/, "")
133
133
  unless current_value.size == 0
@@ -136,7 +136,7 @@ module Treat::Workers::Processors::Parsers::Enju
136
136
  if entity.is_a?(Treat::Entities::Word)
137
137
  entity.features = tmp_attributes
138
138
  @@id_table[id] = entity.id
139
- @@dependencies_table[entity.id] = tmp_dependencies
139
+ @@edges_table[entity.id] = tmp_edges
140
140
  else
141
141
  # Do something useful here
142
142
  entity.set :tag, 'SYM'
@@ -179,15 +179,15 @@ module Treat::Workers::Processors::Parsers::Enju
179
179
  end
180
180
  end
181
181
 
182
- # Add dependencies a posteriori to a parsed entity.
183
- def self.add_dependencies(entity2)
182
+ # Add edges a posteriori to a parsed entity.
183
+ def self.add_edges(entity2)
184
184
 
185
185
  entity2.each_entity(:word, :phrase) do |entity|
186
- @@dependencies_table.each_pair do |id, dependencies|
187
- next if dependencies.nil?
186
+ @@edges_table.each_pair do |id, edges|
187
+ next if edges.nil?
188
188
  entity = entity2.root.find(id)
189
189
  next if entity.nil?
190
- dependencies.each_pair do |argument, type|
190
+ edges.each_pair do |argument, type|
191
191
  # Skip this argument if we
192
192
  # don't know the target node.
193
193
  next if argument == 'unk'
@@ -205,7 +205,7 @@ module Treat::Workers::Processors::Parsers::Enju
205
205
  def self.cleanup_attributes(name, attributes)
206
206
 
207
207
  new_attr = {}
208
- dependencies = {}
208
+ edges = {}
209
209
  pred = attributes.delete('pred')
210
210
 
211
211
  attributes.each_pair do |attribute2, value|
@@ -214,7 +214,7 @@ module Treat::Workers::Processors::Parsers::Enju
214
214
 
215
215
  if attribute == 'arg1' ||
216
216
  attribute == 'arg2'
217
- dependencies[value] = pred
217
+ edges[value] = pred
218
218
  next
219
219
  end
220
220
 
@@ -256,7 +256,7 @@ module Treat::Workers::Processors::Parsers::Enju
256
256
  new_attr.delete :base
257
257
  end
258
258
 
259
- return new_attr, dependencies
259
+ return new_attr, edges
260
260
 
261
261
  end
262
262
 
@@ -63,7 +63,9 @@ module Treat::Workers::Processors::Segmenters::Punkt
63
63
  if options[:model]
64
64
  model = options[:model]
65
65
  else
66
- model = "#{Treat.paths.models}punkt/#{lang}.yaml"
66
+ model_path = Treat.libraries.punkt.model_path ||
67
+ Treat.paths.models + 'punkt/'
68
+ model = model_path + "#{lang}.yaml"
67
69
  unless File.readable?(model)
68
70
  raise Treat::Exception,
69
71
  "Could not get the language model " +
data/spec/collection.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Collection do
4
4
 
@@ -29,7 +29,7 @@ describe Treat::Entities::Collection do
29
29
  f = Treat.paths.spec + 'samples/test'
30
30
  c = Treat::Entities::Collection.build(f)
31
31
  c << Treat::Entities::Document.new
32
- c.size.should eql 2
32
+ c.size.should eql 1
33
33
  FileUtils.rm_rf(f)
34
34
  end
35
35
  end
@@ -45,7 +45,7 @@ describe Treat::Entities::Collection do
45
45
  it "recursively searches the folder for " +
46
46
  "files and opens them into a collection of documents" do
47
47
  collection = Treat::Entities::Collection.build(@file)
48
- collection.size.should eql 6
48
+ collection.size.should eql 5
49
49
  end
50
50
 
51
51
  end