treat 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/lib/treat/config/core/acronyms.rb +2 -1
- data/lib/treat/config/libraries/punkt.rb +1 -0
- data/lib/treat/config/libraries/reuters.rb +1 -0
- data/lib/treat/core/data_set.rb +125 -66
- data/lib/treat/core/export.rb +59 -0
- data/lib/treat/core/problem.rb +101 -18
- data/lib/treat/core/question.rb +23 -7
- data/lib/treat/entities/abilities/iterable.rb +7 -3
- data/lib/treat/entities/abilities/stringable.rb +5 -5
- data/lib/treat/entities/collection.rb +10 -6
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/helpers/objtohash.rb +8 -0
- data/lib/treat/loaders/stanford.rb +10 -8
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
- data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
- data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
- data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
- data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
- data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
- data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
- data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
- data/lib/treat/workers/processors/parsers/enju.rb +17 -17
- data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
- data/spec/collection.rb +3 -3
- data/spec/core.rb +430 -21
- data/spec/document.rb +1 -1
- data/spec/entity.rb +2 -8
- data/spec/helper.rb +34 -0
- data/spec/phrase.rb +1 -1
- data/spec/sandbox.rb +31 -8
- data/spec/token.rb +1 -1
- data/spec/treat.rb +1 -1
- data/spec/word.rb +1 -1
- data/spec/zone.rb +1 -1
- metadata +9 -8
- data/files/3_2_release_notes.html +0 -766
- data/files/bc-monty-robinson-sentencing.html +0 -1569
- data/files/syria-aleppo-clashes.html +0 -1376
- data/lib/treat/core/feature.rb +0 -42
- data/lib/treat/core/node.rb +0 -251
- data/spec/node.rb +0 -117
data/lib/treat/core/feature.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
# Represents a feature to be used
|
2
|
-
# in a classification task.
|
3
|
-
class Treat::Core::Feature
|
4
|
-
|
5
|
-
# The name of the feature. If no
|
6
|
-
# proc is supplied, this assumes
|
7
|
-
# that the target of your classification
|
8
|
-
# problem responds to the method
|
9
|
-
# corresponding to this name.
|
10
|
-
attr_reader :name
|
11
|
-
# A proc that can be used to perform
|
12
|
-
# calculations before storing a feature.
|
13
|
-
attr_accessor :proc
|
14
|
-
# The default value to be
|
15
|
-
attr_reader :default
|
16
|
-
|
17
|
-
# Initialize a feature for a classification
|
18
|
-
# problem. If two arguments are supplied,
|
19
|
-
# the second argument is assumed to be the
|
20
|
-
# default value. If three arguments are
|
21
|
-
# supplied, the second argument is the
|
22
|
-
# callback to generate the feature, and
|
23
|
-
# the third one is the default value.
|
24
|
-
def initialize(name, proc_or_default = nil, default = nil)
|
25
|
-
@name = name
|
26
|
-
if proc_or_default.is_a?(Proc)
|
27
|
-
@proc, @default =
|
28
|
-
proc_or_default, default
|
29
|
-
else
|
30
|
-
@proc = nil
|
31
|
-
@default = proc_or_default
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
# Custom comparison operator for features.
|
36
|
-
def ==(feature)
|
37
|
-
@name == feature.name &&
|
38
|
-
@proc == feature.proc &&
|
39
|
-
@default == feature.default
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
data/lib/treat/core/node.rb
DELETED
@@ -1,251 +0,0 @@
|
|
1
|
-
# This module provides an abstract tree structure.
|
2
|
-
module Treat::Core
|
3
|
-
|
4
|
-
# This class is a node for an N-ary tree data structure
|
5
|
-
# with a unique identifier, text value, children, features
|
6
|
-
# (annotations) and dependencies.
|
7
|
-
#
|
8
|
-
# This class was partly based on the 'rubytree' gem.
|
9
|
-
# RubyTree is licensed under the BSD license and can
|
10
|
-
# be found at http://rubytree.rubyforge.org/rdoc/.
|
11
|
-
# I have made several modifications in order to better
|
12
|
-
# suit this library and to avoid ugly monkey patching.
|
13
|
-
class Node
|
14
|
-
|
15
|
-
# A string containing the node's value (or empty).
|
16
|
-
attr_accessor :value
|
17
|
-
|
18
|
-
# A unique identifier for the node.
|
19
|
-
attr_reader :id
|
20
|
-
|
21
|
-
# An array containing the children of this node.
|
22
|
-
attr_reader :children
|
23
|
-
|
24
|
-
# A hash containing the features of this node.
|
25
|
-
attr_accessor :features
|
26
|
-
|
27
|
-
# An array containing the dependencies that link this
|
28
|
-
# node to other nodes.
|
29
|
-
attr_accessor :dependencies
|
30
|
-
|
31
|
-
# A struct for dependencies. # Fix
|
32
|
-
Struct.new('Dependency',
|
33
|
-
:target, :type, :directed, :direction)
|
34
|
-
|
35
|
-
# The parent of the node.
|
36
|
-
attr_accessor :parent
|
37
|
-
|
38
|
-
# Initialize the node with its value and id.
|
39
|
-
# Setup containers for the children, features
|
40
|
-
# and dependencies of this node.
|
41
|
-
def initialize(value, id = nil)
|
42
|
-
@parent = nil
|
43
|
-
@value, @id = value, id
|
44
|
-
@children = []
|
45
|
-
@children_hash = {}
|
46
|
-
@features = {}
|
47
|
-
@dependencies = []
|
48
|
-
end
|
49
|
-
|
50
|
-
# Iterate over each children in the node.
|
51
|
-
# Non-recursive.
|
52
|
-
def each
|
53
|
-
@children.each { |child| yield child }
|
54
|
-
end
|
55
|
-
|
56
|
-
# Boolean - does the node have dependencies?
|
57
|
-
def has_dependencies?; !(@dependencies.size == 0); end
|
58
|
-
|
59
|
-
# Boolean - does the node have children?
|
60
|
-
def has_children?; !(@children.size == 0); end
|
61
|
-
|
62
|
-
# Boolean - does the node have a parent?
|
63
|
-
def has_parent?; !@parent.nil?; end
|
64
|
-
|
65
|
-
# Boolean - does the node have features?
|
66
|
-
def has_features?; !(@features.size == 0); end
|
67
|
-
|
68
|
-
# Does the entity have a feature ?
|
69
|
-
def has_feature?(feature); @features.has_key?(feature); end
|
70
|
-
|
71
|
-
# Boolean - does the node not have a parent?
|
72
|
-
def is_root?; @parent.nil?; end
|
73
|
-
|
74
|
-
# Remove this node from its parent and set as root.
|
75
|
-
def set_as_root!; @parent = nil; self; end
|
76
|
-
|
77
|
-
# Boolean - is this node a leaf ?
|
78
|
-
# This is overriden in leaf classes.
|
79
|
-
def is_leaf?; !has_children?; end
|
80
|
-
|
81
|
-
# Add the nodes to the given child.
|
82
|
-
# This may be used with several nodes,
|
83
|
-
# for example: node << [child1, child2, child3]
|
84
|
-
def <<(nodes)
|
85
|
-
nodes = [nodes] unless nodes.is_a? Array
|
86
|
-
if nodes.include?(nil)
|
87
|
-
raise Treat::Exception,
|
88
|
-
'Trying to add a nil node.'
|
89
|
-
end
|
90
|
-
nodes.each do |node|
|
91
|
-
node.parent = self
|
92
|
-
@children << node
|
93
|
-
@children_hash[node.id] = node
|
94
|
-
end
|
95
|
-
nodes[0]
|
96
|
-
end
|
97
|
-
|
98
|
-
# Retrieve a child node by name or index.
|
99
|
-
def [](name_or_index)
|
100
|
-
if name_or_index == nil
|
101
|
-
raise Treat::Exception,
|
102
|
-
'Non-nil name or index needs to be provided.'
|
103
|
-
end
|
104
|
-
if name_or_index.kind_of?(Integer) &&
|
105
|
-
name_or_index < 1000
|
106
|
-
@children[name_or_index]
|
107
|
-
else
|
108
|
-
@children_hash[name_or_index]
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
# Remove the supplied node or id of a
|
113
|
-
# node from the children.
|
114
|
-
def remove!(ion)
|
115
|
-
return nil unless ion
|
116
|
-
if ion.is_a? Treat::Core::Node
|
117
|
-
@children.delete(ion)
|
118
|
-
@children_hash.delete(ion.id)
|
119
|
-
ion.set_as_root!
|
120
|
-
else
|
121
|
-
@children.delete(@children_hash[ion])
|
122
|
-
@children_hash.delete(ion)
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
# Remove all children.
|
127
|
-
def remove_all!
|
128
|
-
@children.each do |child|
|
129
|
-
child.set_as_root!
|
130
|
-
end
|
131
|
-
@children = []
|
132
|
-
@children_hash = {}
|
133
|
-
self
|
134
|
-
end
|
135
|
-
|
136
|
-
# Return the sibling with position #pos
|
137
|
-
# versus this one.
|
138
|
-
# #pos can be ... -1, 0, 1, ...
|
139
|
-
def sibling(pos)
|
140
|
-
return nil if is_root?
|
141
|
-
id = @parent.children.index(self)
|
142
|
-
@parent.children.at(id + pos)
|
143
|
-
end
|
144
|
-
|
145
|
-
# Return the sibling N positions to
|
146
|
-
# the left of this one.
|
147
|
-
def left(n = 1); sibling(-1*n); end
|
148
|
-
alias :previous_sibling :left
|
149
|
-
|
150
|
-
# Return the sibling N positions to the
|
151
|
-
# right of this one.
|
152
|
-
def right(n = 1); sibling(1*n); end
|
153
|
-
alias :next_sibling :right
|
154
|
-
|
155
|
-
# Return all brothers and sisters of this node.
|
156
|
-
def siblings
|
157
|
-
r = @parent.children.dup
|
158
|
-
r.delete(self)
|
159
|
-
r
|
160
|
-
end
|
161
|
-
|
162
|
-
# Total number of nodes in the subtree,
|
163
|
-
# including this one.
|
164
|
-
def size
|
165
|
-
@children.inject(1) do |sum, node|
|
166
|
-
sum += node.size
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
# Set the feature to the supplied value.
|
171
|
-
def set(feature, value)
|
172
|
-
@features ||= {}
|
173
|
-
@features[feature] = value
|
174
|
-
end
|
175
|
-
|
176
|
-
# Return a feature.
|
177
|
-
def get(feature)
|
178
|
-
return @value if feature == :value
|
179
|
-
return @id if feature == :id
|
180
|
-
@features[feature]
|
181
|
-
end
|
182
|
-
|
183
|
-
# Unset a feature.
|
184
|
-
def unset(*features)
|
185
|
-
if features.size == 1
|
186
|
-
@features.delete(features[0])
|
187
|
-
else
|
188
|
-
features.each do |feature|
|
189
|
-
@features.delete(feature)
|
190
|
-
end
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
# Return the depth of this node in the tree.
|
195
|
-
def depth
|
196
|
-
return 0 if is_root?
|
197
|
-
1 + parent.depth
|
198
|
-
end
|
199
|
-
|
200
|
-
alias :has? :has_feature?
|
201
|
-
|
202
|
-
# Link this node to the target node with
|
203
|
-
# the supplied dependency type.
|
204
|
-
def link(id_or_node, type = nil,
|
205
|
-
directed = true, direction = 1)
|
206
|
-
if id_or_node.is_a?(Treat::Core::Node)
|
207
|
-
id = root.find(id_or_node).id
|
208
|
-
else
|
209
|
-
id = id_or_node
|
210
|
-
end
|
211
|
-
@dependencies.each do |d|
|
212
|
-
return if d.target == id
|
213
|
-
end
|
214
|
-
@dependencies <<
|
215
|
-
Struct::Dependency.new(
|
216
|
-
id, type,
|
217
|
-
directed, direction
|
218
|
-
)
|
219
|
-
end
|
220
|
-
|
221
|
-
# Find the node in the tree with the given id.
|
222
|
-
def find(id_or_node)
|
223
|
-
if id_or_node.is_a?(Treat::Core::Node)
|
224
|
-
id = id_or_node.id
|
225
|
-
else
|
226
|
-
id = id_or_node
|
227
|
-
end
|
228
|
-
if @children_hash[id]
|
229
|
-
return @children_hash[id]
|
230
|
-
end
|
231
|
-
self.each do |child|
|
232
|
-
r = child.find(id)
|
233
|
-
return r if r.is_a? Treat::Core::Node
|
234
|
-
end
|
235
|
-
nil
|
236
|
-
end
|
237
|
-
|
238
|
-
# Find the root of the tree within which
|
239
|
-
# this node is contained.
|
240
|
-
def root
|
241
|
-
return self if !has_parent?
|
242
|
-
ancestor = @parent
|
243
|
-
while ancestor.has_parent?
|
244
|
-
ancestor = ancestor.parent
|
245
|
-
end
|
246
|
-
ancestor
|
247
|
-
end
|
248
|
-
|
249
|
-
end
|
250
|
-
|
251
|
-
end
|
data/spec/node.rb
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
require_relative '../lib/treat'
|
2
|
-
|
3
|
-
describe Treat::Core::Node do
|
4
|
-
|
5
|
-
before :each do
|
6
|
-
@root = Treat::Core::Node.new('root node', 'root')
|
7
|
-
@branch = Treat::Core::Node.new('branch node', 'branch')
|
8
|
-
@sibling = Treat::Core::Node.new('sibling node', 'sibling')
|
9
|
-
@leaf = Treat::Core::Node.new('leaf node', 'leaf')
|
10
|
-
@root << @branch << @leaf
|
11
|
-
@root << @sibling
|
12
|
-
|
13
|
-
@leaf.link(@sibling, 'some dependency')
|
14
|
-
@leaf.set :some_feature, 'value'
|
15
|
-
|
16
|
-
end
|
17
|
-
|
18
|
-
describe "#[]" do
|
19
|
-
it "allows traversal of the tree by node ID" do
|
20
|
-
@root['branch'].should eql @branch
|
21
|
-
@root['branch']['leaf'].should eql @leaf
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
=begin
|
26
|
-
describe "#right, #left" do
|
27
|
-
it "return the right/left sibling from the same parent node"
|
28
|
-
@branch.right.should eql @sibling
|
29
|
-
@sibling.left.should eql @branch
|
30
|
-
end
|
31
|
-
|
32
|
-
describe "#remove!" do
|
33
|
-
it "removes a children by instance or ID and returns it" do
|
34
|
-
@root.remove!(@sibling).should eql @sibling
|
35
|
-
@root.size.should eql 3
|
36
|
-
@root.remove!(@branch.id).should eql @branch
|
37
|
-
@root.size.should eql 2
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "#remove_all!" do
|
42
|
-
it "removes all a node's children"
|
43
|
-
@branch.remove_all!.size.should eql 0
|
44
|
-
end
|
45
|
-
|
46
|
-
=end
|
47
|
-
|
48
|
-
describe "#set(feature, value) and #get(feature)" do
|
49
|
-
it "set and get a feature in the @features hash" do
|
50
|
-
@root.set :foo, true
|
51
|
-
@root.get(:foo).should eql true
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
describe "#size" do
|
56
|
-
it "returns the total number of nodes in the tree" do
|
57
|
-
@root.size.should eql 4
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
describe "#id" do
|
62
|
-
it "returns the unique ID of the node" do
|
63
|
-
@root.id.should eql 'root'
|
64
|
-
@branch.id.should eql 'branch'
|
65
|
-
@leaf.id.should eql 'leaf'
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
describe "#value" do
|
70
|
-
it "contains the string value of the node" do
|
71
|
-
@root.value.should eql 'root node'
|
72
|
-
@branch.value.should eql 'branch node'
|
73
|
-
@leaf.value.should eql 'leaf node'
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
describe "#has_children?" do
|
78
|
-
it "tells whether the node has children or not" do
|
79
|
-
@root.has_children?.should eql true
|
80
|
-
@branch.has_children?.should eql true
|
81
|
-
@leaf.has_children?.should eql false
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
describe "#has_parent?" do
|
86
|
-
it "tells whether the node has a parent or not" do
|
87
|
-
@root.has_parent?.should eql false
|
88
|
-
@branch.has_parent?.should eql true
|
89
|
-
@leaf.has_parent?.should eql true
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
describe "#has_children?" do
|
94
|
-
it "tells whether the node has children or not" do
|
95
|
-
@root.has_children?.should eql true
|
96
|
-
@branch.has_children?.should eql true
|
97
|
-
@leaf.has_children?.should eql false
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
describe "#has_features?" do
|
102
|
-
it "tells whether the node has children or not" do
|
103
|
-
@root.has_features?.should eql false
|
104
|
-
@branch.has_features?.should eql false
|
105
|
-
@leaf.has_features?.should eql true
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
describe "#has_dependencies?" do
|
110
|
-
it "tells whether the node has dependencies or not" do
|
111
|
-
@root.has_dependencies?.should eql false
|
112
|
-
@branch.has_dependencies?.should eql false
|
113
|
-
@leaf.has_dependencies?.should eql true
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
end
|