treat 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/LICENSE +1 -1
  2. data/lib/treat/config/core/acronyms.rb +2 -1
  3. data/lib/treat/config/libraries/punkt.rb +1 -0
  4. data/lib/treat/config/libraries/reuters.rb +1 -0
  5. data/lib/treat/core/data_set.rb +125 -66
  6. data/lib/treat/core/export.rb +59 -0
  7. data/lib/treat/core/problem.rb +101 -18
  8. data/lib/treat/core/question.rb +23 -7
  9. data/lib/treat/entities/abilities/iterable.rb +7 -3
  10. data/lib/treat/entities/abilities/stringable.rb +5 -5
  11. data/lib/treat/entities/collection.rb +10 -6
  12. data/lib/treat/entities/entity.rb +1 -1
  13. data/lib/treat/helpers/objtohash.rb +8 -0
  14. data/lib/treat/loaders/stanford.rb +10 -8
  15. data/lib/treat/version.rb +1 -1
  16. data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
  17. data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
  18. data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
  19. data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
  20. data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
  21. data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
  22. data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
  23. data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
  24. data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
  25. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
  26. data/lib/treat/workers/processors/parsers/enju.rb +17 -17
  27. data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
  28. data/spec/collection.rb +3 -3
  29. data/spec/core.rb +430 -21
  30. data/spec/document.rb +1 -1
  31. data/spec/entity.rb +2 -8
  32. data/spec/helper.rb +34 -0
  33. data/spec/phrase.rb +1 -1
  34. data/spec/sandbox.rb +31 -8
  35. data/spec/token.rb +1 -1
  36. data/spec/treat.rb +1 -1
  37. data/spec/word.rb +1 -1
  38. data/spec/zone.rb +1 -1
  39. metadata +9 -8
  40. data/files/3_2_release_notes.html +0 -766
  41. data/files/bc-monty-robinson-sentencing.html +0 -1569
  42. data/files/syria-aleppo-clashes.html +0 -1376
  43. data/lib/treat/core/feature.rb +0 -42
  44. data/lib/treat/core/node.rb +0 -251
  45. data/spec/node.rb +0 -117
@@ -1,42 +0,0 @@
1
- # Represents a feature to be used
2
- # in a classification task.
3
- class Treat::Core::Feature
4
-
5
- # The name of the feature. If no
6
- # proc is supplied, this assumes
7
- # that the target of your classification
8
- # problem responds to the method
9
- # corresponding to this name.
10
- attr_reader :name
11
- # A proc that can be used to perform
12
- # calculations before storing a feature.
13
- attr_accessor :proc
14
- # The default value to be
15
- attr_reader :default
16
-
17
- # Initialize a feature for a classification
18
- # problem. If two arguments are supplied,
19
- # the second argument is assumed to be the
20
- # default value. If three arguments are
21
- # supplied, the second argument is the
22
- # callback to generate the feature, and
23
- # the third one is the default value.
24
- def initialize(name, proc_or_default = nil, default = nil)
25
- @name = name
26
- if proc_or_default.is_a?(Proc)
27
- @proc, @default =
28
- proc_or_default, default
29
- else
30
- @proc = nil
31
- @default = proc_or_default
32
- end
33
- end
34
-
35
- # Custom comparison operator for features.
36
- def ==(feature)
37
- @name == feature.name &&
38
- @proc == feature.proc &&
39
- @default == feature.default
40
- end
41
-
42
- end
@@ -1,251 +0,0 @@
1
- # This module provides an abstract tree structure.
2
- module Treat::Core
3
-
4
- # This class is a node for an N-ary tree data structure
5
- # with a unique identifier, text value, children, features
6
- # (annotations) and dependencies.
7
- #
8
- # This class was partly based on the 'rubytree' gem.
9
- # RubyTree is licensed under the BSD license and can
10
- # be found at http://rubytree.rubyforge.org/rdoc/.
11
- # I have made several modifications in order to better
12
- # suit this library and to avoid ugly monkey patching.
13
- class Node
14
-
15
- # A string containing the node's value (or empty).
16
- attr_accessor :value
17
-
18
- # A unique identifier for the node.
19
- attr_reader :id
20
-
21
- # An array containing the children of this node.
22
- attr_reader :children
23
-
24
- # A hash containing the features of this node.
25
- attr_accessor :features
26
-
27
- # An array containing the dependencies that link this
28
- # node to other nodes.
29
- attr_accessor :dependencies
30
-
31
- # A struct for dependencies. # Fix
32
- Struct.new('Dependency',
33
- :target, :type, :directed, :direction)
34
-
35
- # The parent of the node.
36
- attr_accessor :parent
37
-
38
- # Initialize the node with its value and id.
39
- # Setup containers for the children, features
40
- # and dependencies of this node.
41
- def initialize(value, id = nil)
42
- @parent = nil
43
- @value, @id = value, id
44
- @children = []
45
- @children_hash = {}
46
- @features = {}
47
- @dependencies = []
48
- end
49
-
50
- # Iterate over each children in the node.
51
- # Non-recursive.
52
- def each
53
- @children.each { |child| yield child }
54
- end
55
-
56
- # Boolean - does the node have dependencies?
57
- def has_dependencies?; !(@dependencies.size == 0); end
58
-
59
- # Boolean - does the node have children?
60
- def has_children?; !(@children.size == 0); end
61
-
62
- # Boolean - does the node have a parent?
63
- def has_parent?; !@parent.nil?; end
64
-
65
- # Boolean - does the node have features?
66
- def has_features?; !(@features.size == 0); end
67
-
68
- # Does the entity have a feature ?
69
- def has_feature?(feature); @features.has_key?(feature); end
70
-
71
- # Boolean - does the node not have a parent?
72
- def is_root?; @parent.nil?; end
73
-
74
- # Remove this node from its parent and set as root.
75
- def set_as_root!; @parent = nil; self; end
76
-
77
- # Boolean - is this node a leaf ?
78
- # This is overriden in leaf classes.
79
- def is_leaf?; !has_children?; end
80
-
81
- # Add the nodes to the given child.
82
- # This may be used with several nodes,
83
- # for example: node << [child1, child2, child3]
84
- def <<(nodes)
85
- nodes = [nodes] unless nodes.is_a? Array
86
- if nodes.include?(nil)
87
- raise Treat::Exception,
88
- 'Trying to add a nil node.'
89
- end
90
- nodes.each do |node|
91
- node.parent = self
92
- @children << node
93
- @children_hash[node.id] = node
94
- end
95
- nodes[0]
96
- end
97
-
98
- # Retrieve a child node by name or index.
99
- def [](name_or_index)
100
- if name_or_index == nil
101
- raise Treat::Exception,
102
- 'Non-nil name or index needs to be provided.'
103
- end
104
- if name_or_index.kind_of?(Integer) &&
105
- name_or_index < 1000
106
- @children[name_or_index]
107
- else
108
- @children_hash[name_or_index]
109
- end
110
- end
111
-
112
- # Remove the supplied node or id of a
113
- # node from the children.
114
- def remove!(ion)
115
- return nil unless ion
116
- if ion.is_a? Treat::Core::Node
117
- @children.delete(ion)
118
- @children_hash.delete(ion.id)
119
- ion.set_as_root!
120
- else
121
- @children.delete(@children_hash[ion])
122
- @children_hash.delete(ion)
123
- end
124
- end
125
-
126
- # Remove all children.
127
- def remove_all!
128
- @children.each do |child|
129
- child.set_as_root!
130
- end
131
- @children = []
132
- @children_hash = {}
133
- self
134
- end
135
-
136
- # Return the sibling with position #pos
137
- # versus this one.
138
- # #pos can be ... -1, 0, 1, ...
139
- def sibling(pos)
140
- return nil if is_root?
141
- id = @parent.children.index(self)
142
- @parent.children.at(id + pos)
143
- end
144
-
145
- # Return the sibling N positions to
146
- # the left of this one.
147
- def left(n = 1); sibling(-1*n); end
148
- alias :previous_sibling :left
149
-
150
- # Return the sibling N positions to the
151
- # right of this one.
152
- def right(n = 1); sibling(1*n); end
153
- alias :next_sibling :right
154
-
155
- # Return all brothers and sisters of this node.
156
- def siblings
157
- r = @parent.children.dup
158
- r.delete(self)
159
- r
160
- end
161
-
162
- # Total number of nodes in the subtree,
163
- # including this one.
164
- def size
165
- @children.inject(1) do |sum, node|
166
- sum += node.size
167
- end
168
- end
169
-
170
- # Set the feature to the supplied value.
171
- def set(feature, value)
172
- @features ||= {}
173
- @features[feature] = value
174
- end
175
-
176
- # Return a feature.
177
- def get(feature)
178
- return @value if feature == :value
179
- return @id if feature == :id
180
- @features[feature]
181
- end
182
-
183
- # Unset a feature.
184
- def unset(*features)
185
- if features.size == 1
186
- @features.delete(features[0])
187
- else
188
- features.each do |feature|
189
- @features.delete(feature)
190
- end
191
- end
192
- end
193
-
194
- # Return the depth of this node in the tree.
195
- def depth
196
- return 0 if is_root?
197
- 1 + parent.depth
198
- end
199
-
200
- alias :has? :has_feature?
201
-
202
- # Link this node to the target node with
203
- # the supplied dependency type.
204
- def link(id_or_node, type = nil,
205
- directed = true, direction = 1)
206
- if id_or_node.is_a?(Treat::Core::Node)
207
- id = root.find(id_or_node).id
208
- else
209
- id = id_or_node
210
- end
211
- @dependencies.each do |d|
212
- return if d.target == id
213
- end
214
- @dependencies <<
215
- Struct::Dependency.new(
216
- id, type,
217
- directed, direction
218
- )
219
- end
220
-
221
- # Find the node in the tree with the given id.
222
- def find(id_or_node)
223
- if id_or_node.is_a?(Treat::Core::Node)
224
- id = id_or_node.id
225
- else
226
- id = id_or_node
227
- end
228
- if @children_hash[id]
229
- return @children_hash[id]
230
- end
231
- self.each do |child|
232
- r = child.find(id)
233
- return r if r.is_a? Treat::Core::Node
234
- end
235
- nil
236
- end
237
-
238
- # Find the root of the tree within which
239
- # this node is contained.
240
- def root
241
- return self if !has_parent?
242
- ancestor = @parent
243
- while ancestor.has_parent?
244
- ancestor = ancestor.parent
245
- end
246
- ancestor
247
- end
248
-
249
- end
250
-
251
- end
data/spec/node.rb DELETED
@@ -1,117 +0,0 @@
1
- require_relative '../lib/treat'
2
-
3
- describe Treat::Core::Node do
4
-
5
- before :each do
6
- @root = Treat::Core::Node.new('root node', 'root')
7
- @branch = Treat::Core::Node.new('branch node', 'branch')
8
- @sibling = Treat::Core::Node.new('sibling node', 'sibling')
9
- @leaf = Treat::Core::Node.new('leaf node', 'leaf')
10
- @root << @branch << @leaf
11
- @root << @sibling
12
-
13
- @leaf.link(@sibling, 'some dependency')
14
- @leaf.set :some_feature, 'value'
15
-
16
- end
17
-
18
- describe "#[]" do
19
- it "allows traversal of the tree by node ID" do
20
- @root['branch'].should eql @branch
21
- @root['branch']['leaf'].should eql @leaf
22
- end
23
- end
24
-
25
- =begin
26
- describe "#right, #left" do
27
- it "return the right/left sibling from the same parent node"
28
- @branch.right.should eql @sibling
29
- @sibling.left.should eql @branch
30
- end
31
-
32
- describe "#remove!" do
33
- it "removes a children by instance or ID and returns it" do
34
- @root.remove!(@sibling).should eql @sibling
35
- @root.size.should eql 3
36
- @root.remove!(@branch.id).should eql @branch
37
- @root.size.should eql 2
38
- end
39
- end
40
-
41
- describe "#remove_all!" do
42
- it "removes all a node's children"
43
- @branch.remove_all!.size.should eql 0
44
- end
45
-
46
- =end
47
-
48
- describe "#set(feature, value) and #get(feature)" do
49
- it "set and get a feature in the @features hash" do
50
- @root.set :foo, true
51
- @root.get(:foo).should eql true
52
- end
53
- end
54
-
55
- describe "#size" do
56
- it "returns the total number of nodes in the tree" do
57
- @root.size.should eql 4
58
- end
59
- end
60
-
61
- describe "#id" do
62
- it "returns the unique ID of the node" do
63
- @root.id.should eql 'root'
64
- @branch.id.should eql 'branch'
65
- @leaf.id.should eql 'leaf'
66
- end
67
- end
68
-
69
- describe "#value" do
70
- it "contains the string value of the node" do
71
- @root.value.should eql 'root node'
72
- @branch.value.should eql 'branch node'
73
- @leaf.value.should eql 'leaf node'
74
- end
75
- end
76
-
77
- describe "#has_children?" do
78
- it "tells whether the node has children or not" do
79
- @root.has_children?.should eql true
80
- @branch.has_children?.should eql true
81
- @leaf.has_children?.should eql false
82
- end
83
- end
84
-
85
- describe "#has_parent?" do
86
- it "tells whether the node has a parent or not" do
87
- @root.has_parent?.should eql false
88
- @branch.has_parent?.should eql true
89
- @leaf.has_parent?.should eql true
90
- end
91
- end
92
-
93
- describe "#has_children?" do
94
- it "tells whether the node has children or not" do
95
- @root.has_children?.should eql true
96
- @branch.has_children?.should eql true
97
- @leaf.has_children?.should eql false
98
- end
99
- end
100
-
101
- describe "#has_features?" do
102
- it "tells whether the node has children or not" do
103
- @root.has_features?.should eql false
104
- @branch.has_features?.should eql false
105
- @leaf.has_features?.should eql true
106
- end
107
- end
108
-
109
- describe "#has_dependencies?" do
110
- it "tells whether the node has dependencies or not" do
111
- @root.has_dependencies?.should eql false
112
- @branch.has_dependencies?.should eql false
113
- @leaf.has_dependencies?.should eql true
114
- end
115
- end
116
-
117
- end