treat 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/LICENSE +1 -1
  2. data/lib/treat/config/core/acronyms.rb +2 -1
  3. data/lib/treat/config/libraries/punkt.rb +1 -0
  4. data/lib/treat/config/libraries/reuters.rb +1 -0
  5. data/lib/treat/core/data_set.rb +125 -66
  6. data/lib/treat/core/export.rb +59 -0
  7. data/lib/treat/core/problem.rb +101 -18
  8. data/lib/treat/core/question.rb +23 -7
  9. data/lib/treat/entities/abilities/iterable.rb +7 -3
  10. data/lib/treat/entities/abilities/stringable.rb +5 -5
  11. data/lib/treat/entities/collection.rb +10 -6
  12. data/lib/treat/entities/entity.rb +1 -1
  13. data/lib/treat/helpers/objtohash.rb +8 -0
  14. data/lib/treat/loaders/stanford.rb +10 -8
  15. data/lib/treat/version.rb +1 -1
  16. data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
  17. data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
  18. data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
  19. data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
  20. data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
  21. data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
  22. data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
  23. data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
  24. data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
  25. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
  26. data/lib/treat/workers/processors/parsers/enju.rb +17 -17
  27. data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
  28. data/spec/collection.rb +3 -3
  29. data/spec/core.rb +430 -21
  30. data/spec/document.rb +1 -1
  31. data/spec/entity.rb +2 -8
  32. data/spec/helper.rb +34 -0
  33. data/spec/phrase.rb +1 -1
  34. data/spec/sandbox.rb +31 -8
  35. data/spec/token.rb +1 -1
  36. data/spec/treat.rb +1 -1
  37. data/spec/word.rb +1 -1
  38. data/spec/zone.rb +1 -1
  39. metadata +9 -8
  40. data/files/3_2_release_notes.html +0 -766
  41. data/files/bc-monty-robinson-sentencing.html +0 -1569
  42. data/files/syria-aleppo-clashes.html +0 -1376
  43. data/lib/treat/core/feature.rb +0 -42
  44. data/lib/treat/core/node.rb +0 -251
  45. data/spec/node.rb +0 -117
@@ -1,42 +0,0 @@
1
- # Represents a feature to be used
2
- # in a classification task.
3
- class Treat::Core::Feature
4
-
5
- # The name of the feature. If no
6
- # proc is supplied, this assumes
7
- # that the target of your classification
8
- # problem responds to the method
9
- # corresponding to this name.
10
- attr_reader :name
11
- # A proc that can be used to perform
12
- # calculations before storing a feature.
13
- attr_accessor :proc
14
- # The default value to be
15
- attr_reader :default
16
-
17
- # Initialize a feature for a classification
18
- # problem. If two arguments are supplied,
19
- # the second argument is assumed to be the
20
- # default value. If three arguments are
21
- # supplied, the second argument is the
22
- # callback to generate the feature, and
23
- # the third one is the default value.
24
- def initialize(name, proc_or_default = nil, default = nil)
25
- @name = name
26
- if proc_or_default.is_a?(Proc)
27
- @proc, @default =
28
- proc_or_default, default
29
- else
30
- @proc = nil
31
- @default = proc_or_default
32
- end
33
- end
34
-
35
- # Custom comparison operator for features.
36
- def ==(feature)
37
- @name == feature.name &&
38
- @proc == feature.proc &&
39
- @default == feature.default
40
- end
41
-
42
- end
@@ -1,251 +0,0 @@
1
- # This module provides an abstract tree structure.
2
- module Treat::Core
3
-
4
- # This class is a node for an N-ary tree data structure
5
- # with a unique identifier, text value, children, features
6
- # (annotations) and dependencies.
7
- #
8
- # This class was partly based on the 'rubytree' gem.
9
- # RubyTree is licensed under the BSD license and can
10
- # be found at http://rubytree.rubyforge.org/rdoc/.
11
- # I have made several modifications in order to better
12
- # suit this library and to avoid ugly monkey patching.
13
- class Node
14
-
15
- # A string containing the node's value (or empty).
16
- attr_accessor :value
17
-
18
- # A unique identifier for the node.
19
- attr_reader :id
20
-
21
- # An array containing the children of this node.
22
- attr_reader :children
23
-
24
- # A hash containing the features of this node.
25
- attr_accessor :features
26
-
27
- # An array containing the dependencies that link this
28
- # node to other nodes.
29
- attr_accessor :dependencies
30
-
31
- # A struct for dependencies. # Fix
32
- Struct.new('Dependency',
33
- :target, :type, :directed, :direction)
34
-
35
- # The parent of the node.
36
- attr_accessor :parent
37
-
38
- # Initialize the node with its value and id.
39
- # Setup containers for the children, features
40
- # and dependencies of this node.
41
- def initialize(value, id = nil)
42
- @parent = nil
43
- @value, @id = value, id
44
- @children = []
45
- @children_hash = {}
46
- @features = {}
47
- @dependencies = []
48
- end
49
-
50
- # Iterate over each children in the node.
51
- # Non-recursive.
52
- def each
53
- @children.each { |child| yield child }
54
- end
55
-
56
- # Boolean - does the node have dependencies?
57
- def has_dependencies?; !(@dependencies.size == 0); end
58
-
59
- # Boolean - does the node have children?
60
- def has_children?; !(@children.size == 0); end
61
-
62
- # Boolean - does the node have a parent?
63
- def has_parent?; !@parent.nil?; end
64
-
65
- # Boolean - does the node have features?
66
- def has_features?; !(@features.size == 0); end
67
-
68
- # Does the entity have a feature ?
69
- def has_feature?(feature); @features.has_key?(feature); end
70
-
71
- # Boolean - does the node not have a parent?
72
- def is_root?; @parent.nil?; end
73
-
74
- # Remove this node from its parent and set as root.
75
- def set_as_root!; @parent = nil; self; end
76
-
77
- # Boolean - is this node a leaf ?
78
- # This is overriden in leaf classes.
79
- def is_leaf?; !has_children?; end
80
-
81
- # Add the nodes to the given child.
82
- # This may be used with several nodes,
83
- # for example: node << [child1, child2, child3]
84
- def <<(nodes)
85
- nodes = [nodes] unless nodes.is_a? Array
86
- if nodes.include?(nil)
87
- raise Treat::Exception,
88
- 'Trying to add a nil node.'
89
- end
90
- nodes.each do |node|
91
- node.parent = self
92
- @children << node
93
- @children_hash[node.id] = node
94
- end
95
- nodes[0]
96
- end
97
-
98
- # Retrieve a child node by name or index.
99
- def [](name_or_index)
100
- if name_or_index == nil
101
- raise Treat::Exception,
102
- 'Non-nil name or index needs to be provided.'
103
- end
104
- if name_or_index.kind_of?(Integer) &&
105
- name_or_index < 1000
106
- @children[name_or_index]
107
- else
108
- @children_hash[name_or_index]
109
- end
110
- end
111
-
112
- # Remove the supplied node or id of a
113
- # node from the children.
114
- def remove!(ion)
115
- return nil unless ion
116
- if ion.is_a? Treat::Core::Node
117
- @children.delete(ion)
118
- @children_hash.delete(ion.id)
119
- ion.set_as_root!
120
- else
121
- @children.delete(@children_hash[ion])
122
- @children_hash.delete(ion)
123
- end
124
- end
125
-
126
- # Remove all children.
127
- def remove_all!
128
- @children.each do |child|
129
- child.set_as_root!
130
- end
131
- @children = []
132
- @children_hash = {}
133
- self
134
- end
135
-
136
- # Return the sibling with position #pos
137
- # versus this one.
138
- # #pos can be ... -1, 0, 1, ...
139
- def sibling(pos)
140
- return nil if is_root?
141
- id = @parent.children.index(self)
142
- @parent.children.at(id + pos)
143
- end
144
-
145
- # Return the sibling N positions to
146
- # the left of this one.
147
- def left(n = 1); sibling(-1*n); end
148
- alias :previous_sibling :left
149
-
150
- # Return the sibling N positions to the
151
- # right of this one.
152
- def right(n = 1); sibling(1*n); end
153
- alias :next_sibling :right
154
-
155
- # Return all brothers and sisters of this node.
156
- def siblings
157
- r = @parent.children.dup
158
- r.delete(self)
159
- r
160
- end
161
-
162
- # Total number of nodes in the subtree,
163
- # including this one.
164
- def size
165
- @children.inject(1) do |sum, node|
166
- sum += node.size
167
- end
168
- end
169
-
170
- # Set the feature to the supplied value.
171
- def set(feature, value)
172
- @features ||= {}
173
- @features[feature] = value
174
- end
175
-
176
- # Return a feature.
177
- def get(feature)
178
- return @value if feature == :value
179
- return @id if feature == :id
180
- @features[feature]
181
- end
182
-
183
- # Unset a feature.
184
- def unset(*features)
185
- if features.size == 1
186
- @features.delete(features[0])
187
- else
188
- features.each do |feature|
189
- @features.delete(feature)
190
- end
191
- end
192
- end
193
-
194
- # Return the depth of this node in the tree.
195
- def depth
196
- return 0 if is_root?
197
- 1 + parent.depth
198
- end
199
-
200
- alias :has? :has_feature?
201
-
202
- # Link this node to the target node with
203
- # the supplied dependency type.
204
- def link(id_or_node, type = nil,
205
- directed = true, direction = 1)
206
- if id_or_node.is_a?(Treat::Core::Node)
207
- id = root.find(id_or_node).id
208
- else
209
- id = id_or_node
210
- end
211
- @dependencies.each do |d|
212
- return if d.target == id
213
- end
214
- @dependencies <<
215
- Struct::Dependency.new(
216
- id, type,
217
- directed, direction
218
- )
219
- end
220
-
221
- # Find the node in the tree with the given id.
222
- def find(id_or_node)
223
- if id_or_node.is_a?(Treat::Core::Node)
224
- id = id_or_node.id
225
- else
226
- id = id_or_node
227
- end
228
- if @children_hash[id]
229
- return @children_hash[id]
230
- end
231
- self.each do |child|
232
- r = child.find(id)
233
- return r if r.is_a? Treat::Core::Node
234
- end
235
- nil
236
- end
237
-
238
- # Find the root of the tree within which
239
- # this node is contained.
240
- def root
241
- return self if !has_parent?
242
- ancestor = @parent
243
- while ancestor.has_parent?
244
- ancestor = ancestor.parent
245
- end
246
- ancestor
247
- end
248
-
249
- end
250
-
251
- end
data/spec/node.rb DELETED
@@ -1,117 +0,0 @@
1
- require_relative '../lib/treat'
2
-
3
- describe Treat::Core::Node do
4
-
5
- before :each do
6
- @root = Treat::Core::Node.new('root node', 'root')
7
- @branch = Treat::Core::Node.new('branch node', 'branch')
8
- @sibling = Treat::Core::Node.new('sibling node', 'sibling')
9
- @leaf = Treat::Core::Node.new('leaf node', 'leaf')
10
- @root << @branch << @leaf
11
- @root << @sibling
12
-
13
- @leaf.link(@sibling, 'some dependency')
14
- @leaf.set :some_feature, 'value'
15
-
16
- end
17
-
18
- describe "#[]" do
19
- it "allows traversal of the tree by node ID" do
20
- @root['branch'].should eql @branch
21
- @root['branch']['leaf'].should eql @leaf
22
- end
23
- end
24
-
25
- =begin
26
- describe "#right, #left" do
27
- it "return the right/left sibling from the same parent node"
28
- @branch.right.should eql @sibling
29
- @sibling.left.should eql @branch
30
- end
31
-
32
- describe "#remove!" do
33
- it "removes a children by instance or ID and returns it" do
34
- @root.remove!(@sibling).should eql @sibling
35
- @root.size.should eql 3
36
- @root.remove!(@branch.id).should eql @branch
37
- @root.size.should eql 2
38
- end
39
- end
40
-
41
- describe "#remove_all!" do
42
- it "removes all a node's children"
43
- @branch.remove_all!.size.should eql 0
44
- end
45
-
46
- =end
47
-
48
- describe "#set(feature, value) and #get(feature)" do
49
- it "set and get a feature in the @features hash" do
50
- @root.set :foo, true
51
- @root.get(:foo).should eql true
52
- end
53
- end
54
-
55
- describe "#size" do
56
- it "returns the total number of nodes in the tree" do
57
- @root.size.should eql 4
58
- end
59
- end
60
-
61
- describe "#id" do
62
- it "returns the unique ID of the node" do
63
- @root.id.should eql 'root'
64
- @branch.id.should eql 'branch'
65
- @leaf.id.should eql 'leaf'
66
- end
67
- end
68
-
69
- describe "#value" do
70
- it "contains the string value of the node" do
71
- @root.value.should eql 'root node'
72
- @branch.value.should eql 'branch node'
73
- @leaf.value.should eql 'leaf node'
74
- end
75
- end
76
-
77
- describe "#has_children?" do
78
- it "tells whether the node has children or not" do
79
- @root.has_children?.should eql true
80
- @branch.has_children?.should eql true
81
- @leaf.has_children?.should eql false
82
- end
83
- end
84
-
85
- describe "#has_parent?" do
86
- it "tells whether the node has a parent or not" do
87
- @root.has_parent?.should eql false
88
- @branch.has_parent?.should eql true
89
- @leaf.has_parent?.should eql true
90
- end
91
- end
92
-
93
- describe "#has_children?" do
94
- it "tells whether the node has children or not" do
95
- @root.has_children?.should eql true
96
- @branch.has_children?.should eql true
97
- @leaf.has_children?.should eql false
98
- end
99
- end
100
-
101
- describe "#has_features?" do
102
- it "tells whether the node has children or not" do
103
- @root.has_features?.should eql false
104
- @branch.has_features?.should eql false
105
- @leaf.has_features?.should eql true
106
- end
107
- end
108
-
109
- describe "#has_dependencies?" do
110
- it "tells whether the node has dependencies or not" do
111
- @root.has_dependencies?.should eql false
112
- @branch.has_dependencies?.should eql false
113
- @leaf.has_dependencies?.should eql true
114
- end
115
- end
116
-
117
- end