treat 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/LICENSE +1 -1
  2. data/lib/treat/config/core/acronyms.rb +2 -1
  3. data/lib/treat/config/libraries/punkt.rb +1 -0
  4. data/lib/treat/config/libraries/reuters.rb +1 -0
  5. data/lib/treat/core/data_set.rb +125 -66
  6. data/lib/treat/core/export.rb +59 -0
  7. data/lib/treat/core/problem.rb +101 -18
  8. data/lib/treat/core/question.rb +23 -7
  9. data/lib/treat/entities/abilities/iterable.rb +7 -3
  10. data/lib/treat/entities/abilities/stringable.rb +5 -5
  11. data/lib/treat/entities/collection.rb +10 -6
  12. data/lib/treat/entities/entity.rb +1 -1
  13. data/lib/treat/helpers/objtohash.rb +8 -0
  14. data/lib/treat/loaders/stanford.rb +10 -8
  15. data/lib/treat/version.rb +1 -1
  16. data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
  17. data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
  18. data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
  19. data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
  20. data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
  21. data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
  22. data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
  23. data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
  24. data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
  25. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
  26. data/lib/treat/workers/processors/parsers/enju.rb +17 -17
  27. data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
  28. data/spec/collection.rb +3 -3
  29. data/spec/core.rb +430 -21
  30. data/spec/document.rb +1 -1
  31. data/spec/entity.rb +2 -8
  32. data/spec/helper.rb +34 -0
  33. data/spec/phrase.rb +1 -1
  34. data/spec/sandbox.rb +31 -8
  35. data/spec/token.rb +1 -1
  36. data/spec/treat.rb +1 -1
  37. data/spec/word.rb +1 -1
  38. data/spec/zone.rb +1 -1
  39. metadata +9 -8
  40. data/files/3_2_release_notes.html +0 -766
  41. data/files/bc-monty-robinson-sentencing.html +0 -1569
  42. data/files/syria-aleppo-clashes.html +0 -1376
  43. data/lib/treat/core/feature.rb +0 -42
  44. data/lib/treat/core/node.rb +0 -251
  45. data/spec/node.rb +0 -117
data/spec/core.rb CHANGED
@@ -1,32 +1,441 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
+
3
+ describe Treat::Core::Question do
4
+
5
+ describe "#initialize" do
6
+ context "when supplied with acceptable parameters" do
7
+ it "should give access to the parameters" do
8
+ question = Treat::Core::Question.new(
9
+ :is_keyword, :word, :continuous, 0, [0, 1])
10
+ question.name.should eql :is_keyword
11
+ question.target.should eql :word
12
+ question.type.should eql :continuous
13
+ question.default.should eql 0
14
+ question.labels.should eql [0, 1]
15
+ end
16
+ end
17
+ context "when supplied with wrong parameters" do
18
+ it "should raise an exception" do
19
+ # Name should be a symbol
20
+ expect { Treat::Core::Question.new(
21
+ nil, :sentence) }.to raise_error
22
+ # Target should be an actual entity type
23
+ expect { Treat::Core::Question.new(
24
+ :name, :foo) }.to raise_error
25
+ # Distribution type should be continuous or discrete
26
+ expect { Treat::Core::Question.new(
27
+ :name, :sentence, :nonsense) }.to raise_error
28
+ end
29
+ end
30
+ end
31
+
32
+ describe "#==(question)" do
33
+ context "when supplied with an equal question" do
34
+ it "should return true" do
35
+ Treat::Core::Question.new(
36
+ :is_keyword, :word).
37
+ should == Treat::Core::Question.new(
38
+ :is_keyword, :word)
39
+ Treat::Core::Question.new(
40
+ :is_keyword, :word, :continuous).
41
+ should == Treat::Core::Question.new(
42
+ :is_keyword, :word, :continuous)
43
+ Treat::Core::Question.new(
44
+ :is_keyword, :word, :continuous, [0, 1]).
45
+ should == Treat::Core::Question.new(
46
+ :is_keyword, :word, :continuous, [0, 1])
47
+ end
48
+ end
49
+ context "when supplied with a different question" do
50
+ it "should return false" do
51
+ Treat::Core::Question.new(
52
+ :is_keyword, :word).
53
+ should_not == Treat::Core::Question.new(
54
+ :is_keyword, :sentence)
55
+ Treat::Core::Question.new(
56
+ :is_keyword, :word, :continuous).
57
+ should_not == Treat::Core::Question.new(
58
+ :is_keyword, :word, :discrete)
59
+ Treat::Core::Question.new(
60
+ :is_keyword, :word, :continuous, [0, 1]).
61
+ should_not == Treat::Core::Question.new(
62
+ :is_keyword, :word, :continuous, [1, 0])
63
+ end
64
+ end
65
+ end
66
+
67
+ end
68
+
69
+ describe Treat::Core::Export do
70
+
71
+ describe "#initialize" do
72
+ context "when supplied with acceptable parameters" do
73
+ it "should give access to the parameters" do
74
+ export = Treat::Core::Export.new(:name, 0, "->(e) { e }")
75
+ export.name.should eql :name
76
+ export.default.should eql 0
77
+ export.proc_string.should eql "->(e) { e }"
78
+ export.proc.should be_instance_of Proc
79
+ export.proc.call('x').should eql 'x'
80
+ end
81
+ end
82
+ context "when supplied with wrong parameters" do
83
+ it "should raise an exception" do
84
+ # First argument should be a symbol representing the name of the export.
85
+ expect { Treat::Core::Export.new(nil) }.to raise_error
86
+ # Third argument, if supplied, should be a string that
87
+ # evaluates to a proc (NOT a proc/lambda).
88
+ expect { Treat::Core::Export.new(:name, 0, lambda { x } ) }.to raise_error
89
+ # Third argument should be proper ruby syntax.
90
+ expect { Treat::Core::Export.new(:name, 0, "->(e) { ") }.to raise_error
91
+ # Third argument should evaluate to a proc.
92
+ expect { Treat::Core::Export.new(:name, 0, "2") }.to raise_error
93
+ end
94
+ end
95
+ end
96
+
97
+ describe "#==(question)" do
98
+ context "when supplied with an equal question" do
99
+ it "should return true" do
100
+ Treat::Core::Export.new(:name).
101
+ should == Treat::Core::Export.new(:name)
102
+ Treat::Core::Export.new(:name, 0).
103
+ should == Treat::Core::Export.new(:name, 0)
104
+ Treat::Core::Export.new(:name, 0, "->(e) { }").
105
+ should == Treat::Core::Export.new(:name, 0, "->(e) { }")
106
+ end
107
+ end
108
+ context "when supplied with a different question" do
109
+ it "should return false" do
110
+ Treat::Core::Export.new(:name).
111
+ should_not == Treat::Core::Export.new(:name2)
112
+ Treat::Core::Export.new(:name, 0).
113
+ should_not == Treat::Core::Export.new(:name, 1)
114
+ Treat::Core::Export.new(:name, 0, "->(e) { }").
115
+ should_not == Treat::Core::Export.new(:name, 0, "->(e) { x }")
116
+ end
117
+ end
118
+ end
119
+
120
+ end
2
121
 
3
122
  describe Treat::Core::DataSet do
4
- =begin
5
123
 
124
+ before do
125
+ @question = Treat::Core::Question.new(:is_key_sentence, :sentence, :continuous, 0, [0, 1])
126
+ @feature = Treat::Core::Feature.new(:word_count, 0)
127
+ @problem = Treat::Core::Problem.new(@question, @feature)
128
+ @tag = Treat::Core::Tag.new(:paragraph_length, 0,
129
+ "->(e) { e.parent_paragraph.word_count }")
130
+ @paragraph = Treat::Entities::Paragraph.new(
131
+ "Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
132
+ @paragraph.do :segment, :tokenize
133
+ @sentence = @paragraph.sentences[0]
134
+ @data_set = Treat::Core::DataSet.new(@problem)
135
+ end
136
+
137
+ describe "#initialize" do
138
+ context "when supplied with a problem" do
139
+ it "should initialize an empty data set" do
140
+ data_set = Treat::Core::DataSet.new(@problem)
141
+ data_set.items.should eql []
142
+ data_set.problem.should eql @problem
143
+ end
144
+ end
145
+ context "when supplied with an improper argument" do
146
+ it "should raise an error" do
147
+ # The argument to initialize should be a Problem.
148
+ expect { data_set = Treat::Core::DataSet.new("foo") }.to raise_error
149
+ end
150
+ end
151
+ end
152
+
153
+ describe "#self.build" do
154
+
155
+ end
156
+
157
+ describe "#==(other_data_set)" do
158
+ context "when supplied with an equivalent data set" do
159
+ it "returns true" do
160
+ data_set1 = Treat::Core::DataSet.new(@problem)
161
+ data_set2 = Treat::Core::DataSet.new(@problem)
162
+ data_set1.should == data_set2
163
+ data_set1 << @sentence
164
+ data_set2 << @sentence
165
+ data_set1.should == data_set2
166
+ end
167
+ end
168
+
169
+ context "when supplied with a non-equivalent data set" do
170
+ it "returns false" do
171
+ # Get two slightly different problems.
172
+ question1 = Treat::Core::Question.new(
173
+ :is_key_sentence, :sentence, :continuous, 0, [0, 1])
174
+ question2 = Treat::Core::Question.new(
175
+ :is_key_word, :sentence, :continuous, 0, [1, 1])
176
+ problem1 = Treat::Core::Problem.new(question1, @feature)
177
+ problem2 = Treat::Core::Problem.new(question2, @feature)
178
+ # Then the problems shouldn't be equal anymore.
179
+ problem1.should_not == problem2
180
+ # Create data sets with the different problems.
181
+ data_set1 = Treat::Core::DataSet.new(problem1)
182
+ data_set2 = Treat::Core::DataSet.new(problem2)
183
+ # Then the data sets shouldn't be equal anymore.
184
+ data_set1.should_not == data_set2
185
+ # Create two data sets with the same problems.
186
+ data_set1 = Treat::Core::DataSet.new(@problem)
187
+ data_set2 = Treat::Core::DataSet.new(@problem)
188
+ # Then these should be equal.
189
+ data_set1.should == data_set2
190
+ # But when different items are added
191
+ data_set1 << Treat::Entities::Sentence.new(
192
+ "This sentence is not the same as the other.").tokenize
193
+ data_set2 << Treat::Entities::Sentence.new(
194
+ "This sentence is similar to the other.").tokenize
195
+ # They shouldn't be equal anymore.
196
+ data_set1.should_not == data_set2
197
+ end
198
+ end
199
+
200
+ end
201
+
202
+ describe "#merge" do
203
+ context "when supplied with two data sets refering to the same problem" do
204
+ it "merges the two together" do
205
+ # Create two data sets with the same problem.
206
+ data_set1 = Treat::Core::DataSet.new(@problem)
207
+ data_set2 = Treat::Core::DataSet.new(@problem)
208
+ # Add a sentence to each data set.
209
+ data_set1 << Treat::Entities::Sentence.new(
210
+ "This sentence is not the same as the other.").tokenize
211
+ data_set2 << Treat::Entities::Sentence.new(
212
+ "This sentence is similar to the other.").tokenize
213
+ # Merge the two data sets together.
214
+ data_set1.merge(data_set2)
215
+ # Check if the merge has occured properly.
216
+ data_set1.items.size.should eql 2
217
+ data_set1.items[1].should eql data_set2.items[0]
218
+ end
219
+ end
220
+
221
+ context "when supplied with two data sets refering to different problems" do
222
+ it "raises an error" do
223
+ # Get two slightly different questions.
224
+ question1 = Treat::Core::Question.new(
225
+ :is_key_sentence, :sentence, :continuous, 0, [0, 1])
226
+ question2 = Treat::Core::Question.new(
227
+ :is_key_word, :sentence, :continuous, 0, [1, 1])
228
+ # Create two problems with the different questions.
229
+ problem1 = Treat::Core::Problem.new(question1, @feature)
230
+ problem2 = Treat::Core::Problem.new(question2, @feature)
231
+ # Create two data sets with the different problems.
232
+ data_set1 = Treat::Core::DataSet.new(problem1)
233
+ data_set2 = Treat::Core::DataSet.new(problem2)
234
+ # Add elements to each of the data sets.
235
+ data_set1 << Treat::Entities::Sentence.new(
236
+ "This sentence is not the same as the other.").tokenize
237
+ data_set2 << Treat::Entities::Sentence.new(
238
+ "This sentence is similar to the other.").tokenize
239
+ # Try to merge them; but this should fail.
240
+ expect { data_set1.merge(data_set2) }.to raise_error
241
+ end
242
+ end
243
+ end
244
+
245
+ describe "#<<(entity)" do
246
+ context "when supplied with a proper entity" do
247
+ it "exports the features and tags and adds them to the data set" do
248
+ problem = Treat::Core::Problem.new(@question, @feature, @tag)
249
+ data_set = Treat::Core::DataSet.new(problem)
250
+ data_set << @sentence
251
+ data_set.items.tap { |e| e[0][:id] = 0 }.
252
+ should eql [{:tags=>[11], :features=>[7, 0], :id=>0}]
253
+ end
254
+ end
255
+ end
256
+
257
+ describe "#serialize" do
258
+ context "when asked to use a given adapter" do
259
+ it "calls the corresponding #to_something method" do
260
+
261
+ end
262
+ end
263
+ end
264
+
265
+ describe "#to_marshal, #self.from_marshal" do
266
+ context "when asked to successively serialize and deserialize data" do
267
+ it "completes a round trip without losing information" do
268
+ problem = Treat::Core::Problem.new(@question, @feature, @tag)
269
+ data_set = Treat::Core::DataSet.new(problem)
270
+ data_set << @sentence
271
+ data_set.to_marshal(file: 'test.dump')
272
+ Treat::Core::DataSet.from_marshal(
273
+ file: 'test.dump').should == data_set
274
+ FileUtils.rm('test.dump')
275
+ end
276
+ end
277
+ end
278
+
279
+ describe "#to_mongo" do
280
+
281
+ end
282
+
283
+ describe "#self.unserialize" do
284
+ context "when asked to use a given adapter" do
285
+ it "calls the corresponding #to_something method" do
286
+
287
+ end
288
+ end
289
+ end
290
+
291
+ describe "#self.from_mongo" do
292
+
293
+ end
294
+
295
+ end
296
+
297
+ describe Treat::Core::Problem do
298
+
299
+ before do
300
+ @question = Treat::Core::Question.new(:is_key_sentence,
301
+ :sentence, :continuous, 0, [0, 1])
302
+ @feature = Treat::Core::Feature.new(:word_count, 0)
303
+ @tag = Treat::Core::Tag.new(:paragraph_length, 0,
304
+ "->(e) { e.parent_paragraph.word_count }")
305
+ @paragraph = Treat::Entities::Paragraph.new(
306
+ "Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
307
+ @paragraph.do :segment, :tokenize
308
+ @sentence = @paragraph.sentences[0]
309
+ @hash = {"question"=>{"name"=>:is_key_sentence, "target"=>:sentence,
310
+ "type"=>:continuous, "default"=>0, "labels"=>[0, 1]}, "features"=>[
311
+ {"proc_string"=>nil, "default"=>0, "name"=>:word_count, "proc"=>nil}],
312
+ "tags"=>[{"proc_string"=>"->(e) { e.parent_paragraph.word_count }",
313
+ "default"=>0, "name"=>:paragraph_length, "proc"=>nil}], "id"=>0}
314
+ end
6
315
 
7
- p = Problem(
8
- Question(:is_key_sentence, :sentence, false),
9
- Feature(:word_count, 0)
10
- )
316
+ describe "#initialize" do
317
+ context "when supplied with proper arguments" do
318
+ it "initializes the problem and gives access to parameters" do
319
+ problem = Treat::Core::Problem.new(@question, @feature, @tag)
320
+ problem.question.should eql @question
321
+ problem.features.should eql [@feature]
322
+ problem.tags.should eql [@tag]
323
+ problem.feature_labels.should eql [@feature.name]
324
+ problem.tag_labels.should eql [@tag.name]
325
+ # ID ??? FIXME
326
+ end
327
+ end
328
+ context "when supplied with unacceptable arguments" do
329
+ it "raises an error" do
330
+ # First argument should be instance of Question.
331
+ expect { Treat::Core::Problem.new('foo') }.to raise_error
332
+ # Arguments >= 2 should be instances of Export.
333
+ expect { Treat::Core::Problem.new(@question, 'foo') }.to raise_error
334
+ # Should have at least one Feature in the arguments.
335
+ expect { Treat::Core::Problem.new(@question, @tag) }.to raise_error
336
+ end
337
+ end
338
+ end
11
339
 
12
- p2 = Problem(
13
- Question(:is_key_sentence, :sentence, false),
14
- Feature(:word_count, 0)
15
- )
340
+ describe "#==(problem)" do
341
+ context "when supplied with an equal problem" do
342
+ it "should return true" do
343
+ Treat::Core::Problem.new(@question, @feature).
344
+ should == Treat::Core::Problem.new(@question, @feature)
345
+ Treat::Core::Problem.new(@question, @feature, @tag).
346
+ should == Treat::Core::Problem.new(@question, @feature, @tag)
347
+ end
348
+ end
349
+ context "when supplied with a different question" do
350
+ it "should return false" do
351
+ question = Treat::Core::Question.new(:is_key_sentence, :sentence)
352
+ feature = Treat::Core::Feature.new(:word_count, 999)
353
+ tag = Treat::Core::Tag.new(:paragraph_length, 999)
354
+ Treat::Core::Problem.new(@question, @feature).
355
+ should_not == Treat::Core::Problem.new(question, @feature)
356
+ Treat::Core::Problem.new(@question, @feature).
357
+ should_not == Treat::Core::Problem.new(@question, feature)
358
+ Treat::Core::Problem.new(@question, @feature, @tag).
359
+ should_not == Treat::Core::Problem.new(@question, @feature, tag)
360
+ end
361
+ end
362
+ end
16
363
 
17
- ds = DataSet(p)
364
+ describe "#export_tags(entity)" do
365
+ context "when called on a problem that has tags" do
366
+ context "and called with an entity of the proper type" do
367
+ it "returns an array of the tags" do
368
+ problem = Treat::Core::Problem.new(@question, @feature, @tag)
369
+ problem.export_tags(@sentence).should eql [11]
370
+ end
371
+ end
372
+ end
373
+ context "when called on a problem that doesn't have tags" do
374
+ it "raises an error" do
375
+ problem = Treat::Core::Problem.new(@question, @feature)
376
+ expect { problem.export_tags(@sentence) }.to raise_error
377
+ end
378
+ end
379
+ end
18
380
 
19
- text = Paragraph("Welcome to the zoo! This is a text.")
20
- text2 = Paragraph("Welcome here my friend. This is well, a text.")
381
+ describe "#export_features(entity, include_answer = true)" do
21
382
 
22
- text.do :segment, :tokenize
23
- text2.do :segment, :tokenize
383
+ context "when called with an entity of the proper type" do
384
+ context "and include_answer is set to true" do
385
+ context "and the answer is already set on the entity" do
386
+ it "returns an array of the exported features, with the answer" do
387
+ problem = Treat::Core::Problem.new(@question, @feature)
388
+ @sentence.set :is_key_sentence, 1
389
+ problem.export_features(@sentence).should eql [7, 1]
390
+ end
391
+ end
392
+ context "and the answer is not already set on the entity" do
393
+ it "returns an array of the exported features, with the question's default answer" do
394
+ problem = Treat::Core::Problem.new(@question, @feature)
395
+ problem.export_features(@sentence).should eql [7, @question.default]
396
+ end
397
+ end
398
+ end
399
+ context "and include_answer is set to false" do
400
+ it "returns an array of the exported features, without the answer" do
401
+ problem = Treat::Core::Problem.new(@question, @feature)
402
+ problem.export_features(@sentence, false).should eql [7]
403
+ end
404
+ end
405
+ end
406
+ context "when supplied with an entity that is not of the proper type" do
407
+ it "raises an error" do
408
+ problem = Treat::Core::Problem.new(@question, @feature)
409
+ word = Treat::Entities::Word.new('test')
410
+ expect { problem.export_features(word) }.to raise_error
411
+ end
412
+ end
413
+ end
24
414
 
25
- ds1 = text.export(p)
26
- ds2 = text2.export(p2)
415
+ describe "#to_hash" do
416
+ context "when called on a problem" do
417
+ it "returns a hash describing the problem" do
418
+ Treat::Core::Problem.new(@question, @feature, @tag).
419
+ to_hash.tap { |e| e['id'] = 0 }.should eql @hash
420
+ end
421
+ end
422
+ end
27
423
 
28
- ds1.merge(ds2)
424
+ describe "#self.from_hash" do
425
+ context "when called with a hash describing a problem" do
426
+ it "returns a problem based on the hash" do
427
+ problem = Treat::Core::Problem.from_hash(@hash)
428
+ problem.question.name.should eql :is_key_sentence
429
+ problem.question.target.should eql :sentence
430
+ problem.question.type.should eql :continuous
431
+ problem.question.default.should eql 0
432
+ problem.question.labels.should eql [0, 1]
433
+ problem.features[0].proc_string.should eql nil
434
+ problem.features[0].default.should eql 0
435
+ problem.features[0].name.should eql :word_count
436
+ problem.features[0].proc.should eql nil
437
+ end
438
+ end
439
+ end
29
440
 
30
- puts ds1.inspect
31
- =end
32
- end
441
+ end
data/spec/document.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Document do
4
4
 
data/spec/entity.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Entity do
4
4
 
@@ -119,13 +119,7 @@ describe Treat::Entities::Entity do
119
119
  it "returns a data set with the exported features" do
120
120
  ds = @sentence.export(problem)
121
121
  ds.problem.should eql problem
122
- ds.problem.labels.should eql [:tag]
123
- ds.entities.should eql @sentence.words.map { |w| w.id }
124
- ds.items.should eql [
125
- ["DT", false], ["JJ", false],
126
- ["NN", false], ["VBZ", false],
127
- ["VBG", false]
128
- ]
122
+ # MORE TESTS HERE - FIXME
129
123
  end
130
124
  end
131
125
 
data/spec/helper.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'simplecov'
2
+
3
+ SimpleCov.start do
4
+
5
+ add_filter '/spec/'
6
+ add_filter '/config/'
7
+
8
+ add_group 'Core', 'treat/core'
9
+ add_group 'Entities', 'treat/entities'
10
+ add_group 'Helpers', 'treat/helpers'
11
+ add_group 'Loaders', 'treat/loaders'
12
+ add_group 'Workers', 'treat/workers'
13
+ add_group 'Config', 'config.rb'
14
+ add_group 'Proxies', 'proxies.rb'
15
+ add_group 'Treat', 'treat.rb'
16
+
17
+ end
18
+
19
+ require_relative '../lib/treat'
20
+ =begin
21
+ Treat.core.language.detect = true
22
+ Treat.core.verbosity.debug = true
23
+ Treat.paths.files = './files/'
24
+ Treat.databases.mongo.db = 'gistify'
25
+ Treat.databases.default.adapter = :mongo
26
+ Treat.libraries.stanford.model_path =
27
+ '/ruby/stanford/stanford-core-nlp-all/'
28
+ Treat.libraries.stanford.jar_path =
29
+ '/ruby/stanford/stanford-core-nlp-all/'
30
+ Treat.libraries.punkt.model_path =
31
+ '/ruby/punkt/'
32
+ Treat.libraries.reuters.model_path =
33
+ '/ruby/reuters/'
34
+ =end
data/spec/phrase.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Phrase do
4
4
 
data/spec/sandbox.rb CHANGED
@@ -1,13 +1,36 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
+ =begin
3
+ problem = Problem(
4
+ Question(:is_key_sentence, :sentence, :discrete, 0, [0, 1]),
5
+ Feature(:word_count, 0),
6
+ Tag(:number_count, 0)
7
+ )
2
8
 
3
- Treat.databases.mongo.db = 'testingyetagain'
9
+ problem.id = 70316753228720
10
+
11
+ test = Paragraph("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged.")
4
12
 
5
- d = Document('http://www.cbc.ca/news/world/story/2012/07/21/syria-aleppo-clashes.html')
13
+ test.do :segment, :tokenize, :tag, :category
6
14
 
7
- d2 = Document('http://www.cbc.ca/news/canada/british-columbia/story/2012/07/19/bc-monty-robinson-sentencing.html')
15
+ test.sentences[0].set :is_key_sentence, 1
16
+ test.sentences[1].set :is_key_sentence, 1
17
+ test.sentences[2].set :is_key_sentence, 0
8
18
 
9
- c = Collection('testing')
10
- c << d
11
- c << d2
19
+ ds = test.export(problem)
12
20
 
13
- c.serialize :mongo
21
+ test.each_sentence do |s|
22
+ puts s.classify :linear, training: ds
23
+ end
24
+ =end
25
+ =begin
26
+ Treat.databases.mongo.db = 'testing_ds'
27
+ ds1 = Treat::Core::DataSet.unserialize :marshal, file: 'test.dump'
28
+ ds1.serialize :mongo
29
+ puts ds1.problem.id
30
+ ds = Treat::Core::DataSet.unserialize :mongo, {problem: ds1.problem.id}
31
+ puts ds.inspect
32
+
33
+ Treat.databases.mongo.db = 'testingyetagain'
34
+ ds = DataSet(:dump, './all.dump')
35
+ ds.to_mongo({db: 'test_ds', collection: 'data'})
36
+ =end
data/spec/token.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #encoding: utf-8
2
- require_relative '../lib/treat'
2
+ require_relative 'helper'
3
3
 
4
4
  describe Treat::Entities::Token do
5
5
 
data/spec/treat.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat do
4
4
 
data/spec/word.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Word do
4
4
 
data/spec/zone.rb CHANGED
@@ -1,4 +1,4 @@
1
- require_relative '../lib/treat'
1
+ require_relative 'helper'
2
2
 
3
3
  describe Treat::Entities::Zone do
4
4
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treat
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-23 00:00:00.000000000 Z
12
+ date: 2012-08-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: schiphol
@@ -122,6 +122,8 @@ files:
122
122
  - lib/treat/config/languages/russian.rb
123
123
  - lib/treat/config/languages/spanish.rb
124
124
  - lib/treat/config/languages/swedish.rb
125
+ - lib/treat/config/libraries/punkt.rb
126
+ - lib/treat/config/libraries/reuters.rb
125
127
  - lib/treat/config/libraries/stanford.rb
126
128
  - lib/treat/config/linguistics/categories.rb
127
129
  - lib/treat/config/linguistics/punctuation.rb
@@ -139,8 +141,7 @@ files:
139
141
  - lib/treat/config/workers/retrievers.rb
140
142
  - lib/treat/config.rb
141
143
  - lib/treat/core/data_set.rb
142
- - lib/treat/core/feature.rb
143
- - lib/treat/core/node.rb
144
+ - lib/treat/core/export.rb
144
145
  - lib/treat/core/problem.rb
145
146
  - lib/treat/core/question.rb
146
147
  - lib/treat/core.rb
@@ -168,6 +169,7 @@ files:
168
169
  - lib/treat/helpers/didyoumean.rb
169
170
  - lib/treat/helpers/escaping.rb
170
171
  - lib/treat/helpers/formatting.rb
172
+ - lib/treat/helpers/objtohash.rb
171
173
  - lib/treat/helpers/platform.rb
172
174
  - lib/treat/helpers/reflection.rb
173
175
  - lib/treat/helpers/temporary.rb
@@ -219,7 +221,9 @@ files:
219
221
  - lib/treat/workers/inflectors/stemmers/porter_c.rb
220
222
  - lib/treat/workers/inflectors/stemmers/uea.rb
221
223
  - lib/treat/workers/learners/classifiers/id3.rb
224
+ - lib/treat/workers/learners/classifiers/linear.rb
222
225
  - lib/treat/workers/learners/classifiers/mlp.rb
226
+ - lib/treat/workers/learners/classifiers/svm.rb
223
227
  - lib/treat/workers/lexicalizers/categorizers/from_tag.rb
224
228
  - lib/treat/workers/lexicalizers/sensers/wordnet/synset.rb
225
229
  - lib/treat/workers/lexicalizers/sensers/wordnet.rb
@@ -247,7 +251,7 @@ files:
247
251
  - spec/core.rb
248
252
  - spec/document.rb
249
253
  - spec/entity.rb
250
- - spec/node.rb
254
+ - spec/helper.rb
251
255
  - spec/phrase.rb
252
256
  - spec/samples/mathematicians/archimedes.abw
253
257
  - spec/samples/mathematicians/euler.html
@@ -260,10 +264,7 @@ files:
260
264
  - spec/word.rb
261
265
  - spec/zone.rb
262
266
  - tmp/MANIFEST
263
- - files/3_2_release_notes.html
264
- - files/bc-monty-robinson-sentencing.html
265
267
  - files/MANIFEST
266
- - files/syria-aleppo-clashes.html
267
268
  - README.md
268
269
  - LICENSE
269
270
  homepage: https://github.com/louismullie/treat