treat 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/lib/treat/config/core/acronyms.rb +2 -1
- data/lib/treat/config/libraries/punkt.rb +1 -0
- data/lib/treat/config/libraries/reuters.rb +1 -0
- data/lib/treat/core/data_set.rb +125 -66
- data/lib/treat/core/export.rb +59 -0
- data/lib/treat/core/problem.rb +101 -18
- data/lib/treat/core/question.rb +23 -7
- data/lib/treat/entities/abilities/iterable.rb +7 -3
- data/lib/treat/entities/abilities/stringable.rb +5 -5
- data/lib/treat/entities/collection.rb +10 -6
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/helpers/objtohash.rb +8 -0
- data/lib/treat/loaders/stanford.rb +10 -8
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
- data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
- data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
- data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
- data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
- data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
- data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
- data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
- data/lib/treat/workers/processors/parsers/enju.rb +17 -17
- data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
- data/spec/collection.rb +3 -3
- data/spec/core.rb +430 -21
- data/spec/document.rb +1 -1
- data/spec/entity.rb +2 -8
- data/spec/helper.rb +34 -0
- data/spec/phrase.rb +1 -1
- data/spec/sandbox.rb +31 -8
- data/spec/token.rb +1 -1
- data/spec/treat.rb +1 -1
- data/spec/word.rb +1 -1
- data/spec/zone.rb +1 -1
- metadata +9 -8
- data/files/3_2_release_notes.html +0 -766
- data/files/bc-monty-robinson-sentencing.html +0 -1569
- data/files/syria-aleppo-clashes.html +0 -1376
- data/lib/treat/core/feature.rb +0 -42
- data/lib/treat/core/node.rb +0 -251
- data/spec/node.rb +0 -117
data/spec/core.rb
CHANGED
@@ -1,32 +1,441 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
describe Treat::Core::Question do
|
4
|
+
|
5
|
+
describe "#initialize" do
|
6
|
+
context "when supplied with acceptable parameters" do
|
7
|
+
it "should give access to the parameters" do
|
8
|
+
question = Treat::Core::Question.new(
|
9
|
+
:is_keyword, :word, :continuous, 0, [0, 1])
|
10
|
+
question.name.should eql :is_keyword
|
11
|
+
question.target.should eql :word
|
12
|
+
question.type.should eql :continuous
|
13
|
+
question.default.should eql 0
|
14
|
+
question.labels.should eql [0, 1]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
context "when supplied with wrong parameters" do
|
18
|
+
it "should raise an exception" do
|
19
|
+
# Name should be a symbol
|
20
|
+
expect { Treat::Core::Question.new(
|
21
|
+
nil, :sentence) }.to raise_error
|
22
|
+
# Target should be an actual entity type
|
23
|
+
expect { Treat::Core::Question.new(
|
24
|
+
:name, :foo) }.to raise_error
|
25
|
+
# Distribution type should be continuous or discrete
|
26
|
+
expect { Treat::Core::Question.new(
|
27
|
+
:name, :sentence, :nonsense) }.to raise_error
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "#==(question)" do
|
33
|
+
context "when supplied with an equal question" do
|
34
|
+
it "should return true" do
|
35
|
+
Treat::Core::Question.new(
|
36
|
+
:is_keyword, :word).
|
37
|
+
should == Treat::Core::Question.new(
|
38
|
+
:is_keyword, :word)
|
39
|
+
Treat::Core::Question.new(
|
40
|
+
:is_keyword, :word, :continuous).
|
41
|
+
should == Treat::Core::Question.new(
|
42
|
+
:is_keyword, :word, :continuous)
|
43
|
+
Treat::Core::Question.new(
|
44
|
+
:is_keyword, :word, :continuous, [0, 1]).
|
45
|
+
should == Treat::Core::Question.new(
|
46
|
+
:is_keyword, :word, :continuous, [0, 1])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
context "when supplied with a different question" do
|
50
|
+
it "should return false" do
|
51
|
+
Treat::Core::Question.new(
|
52
|
+
:is_keyword, :word).
|
53
|
+
should_not == Treat::Core::Question.new(
|
54
|
+
:is_keyword, :sentence)
|
55
|
+
Treat::Core::Question.new(
|
56
|
+
:is_keyword, :word, :continuous).
|
57
|
+
should_not == Treat::Core::Question.new(
|
58
|
+
:is_keyword, :word, :discrete)
|
59
|
+
Treat::Core::Question.new(
|
60
|
+
:is_keyword, :word, :continuous, [0, 1]).
|
61
|
+
should_not == Treat::Core::Question.new(
|
62
|
+
:is_keyword, :word, :continuous, [1, 0])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
describe Treat::Core::Export do
|
70
|
+
|
71
|
+
describe "#initialize" do
|
72
|
+
context "when supplied with acceptable parameters" do
|
73
|
+
it "should give access to the parameters" do
|
74
|
+
export = Treat::Core::Export.new(:name, 0, "->(e) { e }")
|
75
|
+
export.name.should eql :name
|
76
|
+
export.default.should eql 0
|
77
|
+
export.proc_string.should eql "->(e) { e }"
|
78
|
+
export.proc.should be_instance_of Proc
|
79
|
+
export.proc.call('x').should eql 'x'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
context "when supplied with wrong parameters" do
|
83
|
+
it "should raise an exception" do
|
84
|
+
# First argument should be a symbol representing the name of the export.
|
85
|
+
expect { Treat::Core::Export.new(nil) }.to raise_error
|
86
|
+
# Third argument, if supplied, should be a string that
|
87
|
+
# evaluates to a proc (NOT a proc/lambda).
|
88
|
+
expect { Treat::Core::Export.new(:name, 0, lambda { x } ) }.to raise_error
|
89
|
+
# Third argument should be proper ruby syntax.
|
90
|
+
expect { Treat::Core::Export.new(:name, 0, "->(e) { ") }.to raise_error
|
91
|
+
# Third argument should evaluate to a proc.
|
92
|
+
expect { Treat::Core::Export.new(:name, 0, "2") }.to raise_error
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "#==(question)" do
|
98
|
+
context "when supplied with an equal question" do
|
99
|
+
it "should return true" do
|
100
|
+
Treat::Core::Export.new(:name).
|
101
|
+
should == Treat::Core::Export.new(:name)
|
102
|
+
Treat::Core::Export.new(:name, 0).
|
103
|
+
should == Treat::Core::Export.new(:name, 0)
|
104
|
+
Treat::Core::Export.new(:name, 0, "->(e) { }").
|
105
|
+
should == Treat::Core::Export.new(:name, 0, "->(e) { }")
|
106
|
+
end
|
107
|
+
end
|
108
|
+
context "when supplied with a different question" do
|
109
|
+
it "should return false" do
|
110
|
+
Treat::Core::Export.new(:name).
|
111
|
+
should_not == Treat::Core::Export.new(:name2)
|
112
|
+
Treat::Core::Export.new(:name, 0).
|
113
|
+
should_not == Treat::Core::Export.new(:name, 1)
|
114
|
+
Treat::Core::Export.new(:name, 0, "->(e) { }").
|
115
|
+
should_not == Treat::Core::Export.new(:name, 0, "->(e) { x }")
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
2
121
|
|
3
122
|
describe Treat::Core::DataSet do
|
4
|
-
=begin
|
5
123
|
|
124
|
+
before do
|
125
|
+
@question = Treat::Core::Question.new(:is_key_sentence, :sentence, :continuous, 0, [0, 1])
|
126
|
+
@feature = Treat::Core::Feature.new(:word_count, 0)
|
127
|
+
@problem = Treat::Core::Problem.new(@question, @feature)
|
128
|
+
@tag = Treat::Core::Tag.new(:paragraph_length, 0,
|
129
|
+
"->(e) { e.parent_paragraph.word_count }")
|
130
|
+
@paragraph = Treat::Entities::Paragraph.new(
|
131
|
+
"Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
|
132
|
+
@paragraph.do :segment, :tokenize
|
133
|
+
@sentence = @paragraph.sentences[0]
|
134
|
+
@data_set = Treat::Core::DataSet.new(@problem)
|
135
|
+
end
|
136
|
+
|
137
|
+
describe "#initialize" do
|
138
|
+
context "when supplied with a problem" do
|
139
|
+
it "should initialize an empty data set" do
|
140
|
+
data_set = Treat::Core::DataSet.new(@problem)
|
141
|
+
data_set.items.should eql []
|
142
|
+
data_set.problem.should eql @problem
|
143
|
+
end
|
144
|
+
end
|
145
|
+
context "when supplied with an improper argument" do
|
146
|
+
it "should raise an error" do
|
147
|
+
# The argument to initialize should be a Problem.
|
148
|
+
expect { data_set = Treat::Core::DataSet.new("foo") }.to raise_error
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "#self.build" do
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "#==(other_data_set)" do
|
158
|
+
context "when supplied with an equivalent data set" do
|
159
|
+
it "returns true" do
|
160
|
+
data_set1 = Treat::Core::DataSet.new(@problem)
|
161
|
+
data_set2 = Treat::Core::DataSet.new(@problem)
|
162
|
+
data_set1.should == data_set2
|
163
|
+
data_set1 << @sentence
|
164
|
+
data_set2 << @sentence
|
165
|
+
data_set1.should == data_set2
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
context "when supplied with a non-equivalent data set" do
|
170
|
+
it "returns false" do
|
171
|
+
# Get two slightly different problems.
|
172
|
+
question1 = Treat::Core::Question.new(
|
173
|
+
:is_key_sentence, :sentence, :continuous, 0, [0, 1])
|
174
|
+
question2 = Treat::Core::Question.new(
|
175
|
+
:is_key_word, :sentence, :continuous, 0, [1, 1])
|
176
|
+
problem1 = Treat::Core::Problem.new(question1, @feature)
|
177
|
+
problem2 = Treat::Core::Problem.new(question2, @feature)
|
178
|
+
# Then the problems shouldn't be equal anymore.
|
179
|
+
problem1.should_not == problem2
|
180
|
+
# Create data sets with the different problems.
|
181
|
+
data_set1 = Treat::Core::DataSet.new(problem1)
|
182
|
+
data_set2 = Treat::Core::DataSet.new(problem2)
|
183
|
+
# Then the data sets shouldn't be equal anymore.
|
184
|
+
data_set1.should_not == data_set2
|
185
|
+
# Create two data sets with the same problems.
|
186
|
+
data_set1 = Treat::Core::DataSet.new(@problem)
|
187
|
+
data_set2 = Treat::Core::DataSet.new(@problem)
|
188
|
+
# Then these should be equal.
|
189
|
+
data_set1.should == data_set2
|
190
|
+
# But when different items are added
|
191
|
+
data_set1 << Treat::Entities::Sentence.new(
|
192
|
+
"This sentence is not the same as the other.").tokenize
|
193
|
+
data_set2 << Treat::Entities::Sentence.new(
|
194
|
+
"This sentence is similar to the other.").tokenize
|
195
|
+
# They shouldn't be equal anymore.
|
196
|
+
data_set1.should_not == data_set2
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
describe "#merge" do
|
203
|
+
context "when supplied with two data sets refering to the same problem" do
|
204
|
+
it "merges the two together" do
|
205
|
+
# Create two data sets with the same problem.
|
206
|
+
data_set1 = Treat::Core::DataSet.new(@problem)
|
207
|
+
data_set2 = Treat::Core::DataSet.new(@problem)
|
208
|
+
# Add a sentence to each data set.
|
209
|
+
data_set1 << Treat::Entities::Sentence.new(
|
210
|
+
"This sentence is not the same as the other.").tokenize
|
211
|
+
data_set2 << Treat::Entities::Sentence.new(
|
212
|
+
"This sentence is similar to the other.").tokenize
|
213
|
+
# Merge the two data sets together.
|
214
|
+
data_set1.merge(data_set2)
|
215
|
+
# Check if the merge has occured properly.
|
216
|
+
data_set1.items.size.should eql 2
|
217
|
+
data_set1.items[1].should eql data_set2.items[0]
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
context "when supplied with two data sets refering to different problems" do
|
222
|
+
it "raises an error" do
|
223
|
+
# Get two slightly different questions.
|
224
|
+
question1 = Treat::Core::Question.new(
|
225
|
+
:is_key_sentence, :sentence, :continuous, 0, [0, 1])
|
226
|
+
question2 = Treat::Core::Question.new(
|
227
|
+
:is_key_word, :sentence, :continuous, 0, [1, 1])
|
228
|
+
# Create two problems with the different questions.
|
229
|
+
problem1 = Treat::Core::Problem.new(question1, @feature)
|
230
|
+
problem2 = Treat::Core::Problem.new(question2, @feature)
|
231
|
+
# Create two data sets with the different problems.
|
232
|
+
data_set1 = Treat::Core::DataSet.new(problem1)
|
233
|
+
data_set2 = Treat::Core::DataSet.new(problem2)
|
234
|
+
# Add elements to each of the data sets.
|
235
|
+
data_set1 << Treat::Entities::Sentence.new(
|
236
|
+
"This sentence is not the same as the other.").tokenize
|
237
|
+
data_set2 << Treat::Entities::Sentence.new(
|
238
|
+
"This sentence is similar to the other.").tokenize
|
239
|
+
# Try to merge them; but this should fail.
|
240
|
+
expect { data_set1.merge(data_set2) }.to raise_error
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
describe "#<<(entity)" do
|
246
|
+
context "when supplied with a proper entity" do
|
247
|
+
it "exports the features and tags and adds them to the data set" do
|
248
|
+
problem = Treat::Core::Problem.new(@question, @feature, @tag)
|
249
|
+
data_set = Treat::Core::DataSet.new(problem)
|
250
|
+
data_set << @sentence
|
251
|
+
data_set.items.tap { |e| e[0][:id] = 0 }.
|
252
|
+
should eql [{:tags=>[11], :features=>[7, 0], :id=>0}]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
describe "#serialize" do
|
258
|
+
context "when asked to use a given adapter" do
|
259
|
+
it "calls the corresponding #to_something method" do
|
260
|
+
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
describe "#to_marshal, #self.from_marshal" do
|
266
|
+
context "when asked to successively serialize and deserialize data" do
|
267
|
+
it "completes a round trip without losing information" do
|
268
|
+
problem = Treat::Core::Problem.new(@question, @feature, @tag)
|
269
|
+
data_set = Treat::Core::DataSet.new(problem)
|
270
|
+
data_set << @sentence
|
271
|
+
data_set.to_marshal(file: 'test.dump')
|
272
|
+
Treat::Core::DataSet.from_marshal(
|
273
|
+
file: 'test.dump').should == data_set
|
274
|
+
FileUtils.rm('test.dump')
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
describe "#to_mongo" do
|
280
|
+
|
281
|
+
end
|
282
|
+
|
283
|
+
describe "#self.unserialize" do
|
284
|
+
context "when asked to use a given adapter" do
|
285
|
+
it "calls the corresponding #to_something method" do
|
286
|
+
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
describe "#self.from_mongo" do
|
292
|
+
|
293
|
+
end
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
describe Treat::Core::Problem do
|
298
|
+
|
299
|
+
before do
|
300
|
+
@question = Treat::Core::Question.new(:is_key_sentence,
|
301
|
+
:sentence, :continuous, 0, [0, 1])
|
302
|
+
@feature = Treat::Core::Feature.new(:word_count, 0)
|
303
|
+
@tag = Treat::Core::Tag.new(:paragraph_length, 0,
|
304
|
+
"->(e) { e.parent_paragraph.word_count }")
|
305
|
+
@paragraph = Treat::Entities::Paragraph.new(
|
306
|
+
"Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
|
307
|
+
@paragraph.do :segment, :tokenize
|
308
|
+
@sentence = @paragraph.sentences[0]
|
309
|
+
@hash = {"question"=>{"name"=>:is_key_sentence, "target"=>:sentence,
|
310
|
+
"type"=>:continuous, "default"=>0, "labels"=>[0, 1]}, "features"=>[
|
311
|
+
{"proc_string"=>nil, "default"=>0, "name"=>:word_count, "proc"=>nil}],
|
312
|
+
"tags"=>[{"proc_string"=>"->(e) { e.parent_paragraph.word_count }",
|
313
|
+
"default"=>0, "name"=>:paragraph_length, "proc"=>nil}], "id"=>0}
|
314
|
+
end
|
6
315
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
)
|
316
|
+
describe "#initialize" do
|
317
|
+
context "when supplied with proper arguments" do
|
318
|
+
it "initializes the problem and gives access to parameters" do
|
319
|
+
problem = Treat::Core::Problem.new(@question, @feature, @tag)
|
320
|
+
problem.question.should eql @question
|
321
|
+
problem.features.should eql [@feature]
|
322
|
+
problem.tags.should eql [@tag]
|
323
|
+
problem.feature_labels.should eql [@feature.name]
|
324
|
+
problem.tag_labels.should eql [@tag.name]
|
325
|
+
# ID ??? FIXME
|
326
|
+
end
|
327
|
+
end
|
328
|
+
context "when supplied with unacceptable arguments" do
|
329
|
+
it "raises an error" do
|
330
|
+
# First argument should be instance of Question.
|
331
|
+
expect { Treat::Core::Problem.new('foo') }.to raise_error
|
332
|
+
# Arguments >= 2 should be instances of Export.
|
333
|
+
expect { Treat::Core::Problem.new(@question, 'foo') }.to raise_error
|
334
|
+
# Should have at least one Feature in the arguments.
|
335
|
+
expect { Treat::Core::Problem.new(@question, @tag) }.to raise_error
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
11
339
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
)
|
340
|
+
describe "#==(problem)" do
|
341
|
+
context "when supplied with an equal problem" do
|
342
|
+
it "should return true" do
|
343
|
+
Treat::Core::Problem.new(@question, @feature).
|
344
|
+
should == Treat::Core::Problem.new(@question, @feature)
|
345
|
+
Treat::Core::Problem.new(@question, @feature, @tag).
|
346
|
+
should == Treat::Core::Problem.new(@question, @feature, @tag)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
context "when supplied with a different question" do
|
350
|
+
it "should return false" do
|
351
|
+
question = Treat::Core::Question.new(:is_key_sentence, :sentence)
|
352
|
+
feature = Treat::Core::Feature.new(:word_count, 999)
|
353
|
+
tag = Treat::Core::Tag.new(:paragraph_length, 999)
|
354
|
+
Treat::Core::Problem.new(@question, @feature).
|
355
|
+
should_not == Treat::Core::Problem.new(question, @feature)
|
356
|
+
Treat::Core::Problem.new(@question, @feature).
|
357
|
+
should_not == Treat::Core::Problem.new(@question, feature)
|
358
|
+
Treat::Core::Problem.new(@question, @feature, @tag).
|
359
|
+
should_not == Treat::Core::Problem.new(@question, @feature, tag)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
16
363
|
|
17
|
-
|
364
|
+
describe "#export_tags(entity)" do
|
365
|
+
context "when called on a problem that has tags" do
|
366
|
+
context "and called with an entity of the proper type" do
|
367
|
+
it "returns an array of the tags" do
|
368
|
+
problem = Treat::Core::Problem.new(@question, @feature, @tag)
|
369
|
+
problem.export_tags(@sentence).should eql [11]
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
context "when called on a problem that doesn't have tags" do
|
374
|
+
it "raises an error" do
|
375
|
+
problem = Treat::Core::Problem.new(@question, @feature)
|
376
|
+
expect { problem.export_tags(@sentence) }.to raise_error
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
18
380
|
|
19
|
-
|
20
|
-
text2 = Paragraph("Welcome here my friend. This is well, a text.")
|
381
|
+
describe "#export_features(entity, include_answer = true)" do
|
21
382
|
|
22
|
-
|
23
|
-
|
383
|
+
context "when called with an entity of the proper type" do
|
384
|
+
context "and include_answer is set to true" do
|
385
|
+
context "and the answer is already set on the entity" do
|
386
|
+
it "returns an array of the exported features, with the answer" do
|
387
|
+
problem = Treat::Core::Problem.new(@question, @feature)
|
388
|
+
@sentence.set :is_key_sentence, 1
|
389
|
+
problem.export_features(@sentence).should eql [7, 1]
|
390
|
+
end
|
391
|
+
end
|
392
|
+
context "and the answer is not already set on the entity" do
|
393
|
+
it "returns an array of the exported features, with the question's default answer" do
|
394
|
+
problem = Treat::Core::Problem.new(@question, @feature)
|
395
|
+
problem.export_features(@sentence).should eql [7, @question.default]
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
399
|
+
context "and include_answer is set to false" do
|
400
|
+
it "returns an array of the exported features, without the answer" do
|
401
|
+
problem = Treat::Core::Problem.new(@question, @feature)
|
402
|
+
problem.export_features(@sentence, false).should eql [7]
|
403
|
+
end
|
404
|
+
end
|
405
|
+
end
|
406
|
+
context "when supplied with an entity that is not of the proper type" do
|
407
|
+
it "raises an error" do
|
408
|
+
problem = Treat::Core::Problem.new(@question, @feature)
|
409
|
+
word = Treat::Entities::Word.new('test')
|
410
|
+
expect { problem.export_features(word) }.to raise_error
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
24
414
|
|
25
|
-
|
26
|
-
|
415
|
+
describe "#to_hash" do
|
416
|
+
context "when called on a problem" do
|
417
|
+
it "returns a hash describing the problem" do
|
418
|
+
Treat::Core::Problem.new(@question, @feature, @tag).
|
419
|
+
to_hash.tap { |e| e['id'] = 0 }.should eql @hash
|
420
|
+
end
|
421
|
+
end
|
422
|
+
end
|
27
423
|
|
28
|
-
|
424
|
+
describe "#self.from_hash" do
|
425
|
+
context "when called with a hash describing a problem" do
|
426
|
+
it "returns a problem based on the hash" do
|
427
|
+
problem = Treat::Core::Problem.from_hash(@hash)
|
428
|
+
problem.question.name.should eql :is_key_sentence
|
429
|
+
problem.question.target.should eql :sentence
|
430
|
+
problem.question.type.should eql :continuous
|
431
|
+
problem.question.default.should eql 0
|
432
|
+
problem.question.labels.should eql [0, 1]
|
433
|
+
problem.features[0].proc_string.should eql nil
|
434
|
+
problem.features[0].default.should eql 0
|
435
|
+
problem.features[0].name.should eql :word_count
|
436
|
+
problem.features[0].proc.should eql nil
|
437
|
+
end
|
438
|
+
end
|
439
|
+
end
|
29
440
|
|
30
|
-
|
31
|
-
=end
|
32
|
-
end
|
441
|
+
end
|
data/spec/document.rb
CHANGED
data/spec/entity.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'helper'
|
2
2
|
|
3
3
|
describe Treat::Entities::Entity do
|
4
4
|
|
@@ -119,13 +119,7 @@ describe Treat::Entities::Entity do
|
|
119
119
|
it "returns a data set with the exported features" do
|
120
120
|
ds = @sentence.export(problem)
|
121
121
|
ds.problem.should eql problem
|
122
|
-
|
123
|
-
ds.entities.should eql @sentence.words.map { |w| w.id }
|
124
|
-
ds.items.should eql [
|
125
|
-
["DT", false], ["JJ", false],
|
126
|
-
["NN", false], ["VBZ", false],
|
127
|
-
["VBG", false]
|
128
|
-
]
|
122
|
+
# MORE TESTS HERE - FIXME
|
129
123
|
end
|
130
124
|
end
|
131
125
|
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
|
3
|
+
SimpleCov.start do
|
4
|
+
|
5
|
+
add_filter '/spec/'
|
6
|
+
add_filter '/config/'
|
7
|
+
|
8
|
+
add_group 'Core', 'treat/core'
|
9
|
+
add_group 'Entities', 'treat/entities'
|
10
|
+
add_group 'Helpers', 'treat/helpers'
|
11
|
+
add_group 'Loaders', 'treat/loaders'
|
12
|
+
add_group 'Workers', 'treat/workers'
|
13
|
+
add_group 'Config', 'config.rb'
|
14
|
+
add_group 'Proxies', 'proxies.rb'
|
15
|
+
add_group 'Treat', 'treat.rb'
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
require_relative '../lib/treat'
|
20
|
+
=begin
|
21
|
+
Treat.core.language.detect = true
|
22
|
+
Treat.core.verbosity.debug = true
|
23
|
+
Treat.paths.files = './files/'
|
24
|
+
Treat.databases.mongo.db = 'gistify'
|
25
|
+
Treat.databases.default.adapter = :mongo
|
26
|
+
Treat.libraries.stanford.model_path =
|
27
|
+
'/ruby/stanford/stanford-core-nlp-all/'
|
28
|
+
Treat.libraries.stanford.jar_path =
|
29
|
+
'/ruby/stanford/stanford-core-nlp-all/'
|
30
|
+
Treat.libraries.punkt.model_path =
|
31
|
+
'/ruby/punkt/'
|
32
|
+
Treat.libraries.reuters.model_path =
|
33
|
+
'/ruby/reuters/'
|
34
|
+
=end
|
data/spec/phrase.rb
CHANGED
data/spec/sandbox.rb
CHANGED
@@ -1,13 +1,36 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'helper'
|
2
|
+
=begin
|
3
|
+
problem = Problem(
|
4
|
+
Question(:is_key_sentence, :sentence, :discrete, 0, [0, 1]),
|
5
|
+
Feature(:word_count, 0),
|
6
|
+
Tag(:number_count, 0)
|
7
|
+
)
|
2
8
|
|
3
|
-
|
9
|
+
problem.id = 70316753228720
|
10
|
+
|
11
|
+
test = Paragraph("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged.")
|
4
12
|
|
5
|
-
|
13
|
+
test.do :segment, :tokenize, :tag, :category
|
6
14
|
|
7
|
-
|
15
|
+
test.sentences[0].set :is_key_sentence, 1
|
16
|
+
test.sentences[1].set :is_key_sentence, 1
|
17
|
+
test.sentences[2].set :is_key_sentence, 0
|
8
18
|
|
9
|
-
|
10
|
-
c << d
|
11
|
-
c << d2
|
19
|
+
ds = test.export(problem)
|
12
20
|
|
13
|
-
|
21
|
+
test.each_sentence do |s|
|
22
|
+
puts s.classify :linear, training: ds
|
23
|
+
end
|
24
|
+
=end
|
25
|
+
=begin
|
26
|
+
Treat.databases.mongo.db = 'testing_ds'
|
27
|
+
ds1 = Treat::Core::DataSet.unserialize :marshal, file: 'test.dump'
|
28
|
+
ds1.serialize :mongo
|
29
|
+
puts ds1.problem.id
|
30
|
+
ds = Treat::Core::DataSet.unserialize :mongo, {problem: ds1.problem.id}
|
31
|
+
puts ds.inspect
|
32
|
+
|
33
|
+
Treat.databases.mongo.db = 'testingyetagain'
|
34
|
+
ds = DataSet(:dump, './all.dump')
|
35
|
+
ds.to_mongo({db: 'test_ds', collection: 'data'})
|
36
|
+
=end
|
data/spec/token.rb
CHANGED
data/spec/treat.rb
CHANGED
data/spec/word.rb
CHANGED
data/spec/zone.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schiphol
|
@@ -122,6 +122,8 @@ files:
|
|
122
122
|
- lib/treat/config/languages/russian.rb
|
123
123
|
- lib/treat/config/languages/spanish.rb
|
124
124
|
- lib/treat/config/languages/swedish.rb
|
125
|
+
- lib/treat/config/libraries/punkt.rb
|
126
|
+
- lib/treat/config/libraries/reuters.rb
|
125
127
|
- lib/treat/config/libraries/stanford.rb
|
126
128
|
- lib/treat/config/linguistics/categories.rb
|
127
129
|
- lib/treat/config/linguistics/punctuation.rb
|
@@ -139,8 +141,7 @@ files:
|
|
139
141
|
- lib/treat/config/workers/retrievers.rb
|
140
142
|
- lib/treat/config.rb
|
141
143
|
- lib/treat/core/data_set.rb
|
142
|
-
- lib/treat/core/
|
143
|
-
- lib/treat/core/node.rb
|
144
|
+
- lib/treat/core/export.rb
|
144
145
|
- lib/treat/core/problem.rb
|
145
146
|
- lib/treat/core/question.rb
|
146
147
|
- lib/treat/core.rb
|
@@ -168,6 +169,7 @@ files:
|
|
168
169
|
- lib/treat/helpers/didyoumean.rb
|
169
170
|
- lib/treat/helpers/escaping.rb
|
170
171
|
- lib/treat/helpers/formatting.rb
|
172
|
+
- lib/treat/helpers/objtohash.rb
|
171
173
|
- lib/treat/helpers/platform.rb
|
172
174
|
- lib/treat/helpers/reflection.rb
|
173
175
|
- lib/treat/helpers/temporary.rb
|
@@ -219,7 +221,9 @@ files:
|
|
219
221
|
- lib/treat/workers/inflectors/stemmers/porter_c.rb
|
220
222
|
- lib/treat/workers/inflectors/stemmers/uea.rb
|
221
223
|
- lib/treat/workers/learners/classifiers/id3.rb
|
224
|
+
- lib/treat/workers/learners/classifiers/linear.rb
|
222
225
|
- lib/treat/workers/learners/classifiers/mlp.rb
|
226
|
+
- lib/treat/workers/learners/classifiers/svm.rb
|
223
227
|
- lib/treat/workers/lexicalizers/categorizers/from_tag.rb
|
224
228
|
- lib/treat/workers/lexicalizers/sensers/wordnet/synset.rb
|
225
229
|
- lib/treat/workers/lexicalizers/sensers/wordnet.rb
|
@@ -247,7 +251,7 @@ files:
|
|
247
251
|
- spec/core.rb
|
248
252
|
- spec/document.rb
|
249
253
|
- spec/entity.rb
|
250
|
-
- spec/
|
254
|
+
- spec/helper.rb
|
251
255
|
- spec/phrase.rb
|
252
256
|
- spec/samples/mathematicians/archimedes.abw
|
253
257
|
- spec/samples/mathematicians/euler.html
|
@@ -260,10 +264,7 @@ files:
|
|
260
264
|
- spec/word.rb
|
261
265
|
- spec/zone.rb
|
262
266
|
- tmp/MANIFEST
|
263
|
-
- files/3_2_release_notes.html
|
264
|
-
- files/bc-monty-robinson-sentencing.html
|
265
267
|
- files/MANIFEST
|
266
|
-
- files/syria-aleppo-clashes.html
|
267
268
|
- README.md
|
268
269
|
- LICENSE
|
269
270
|
homepage: https://github.com/louismullie/treat
|