treat 2.0.2 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/files/21552208.html +786 -0
- data/files/nethttp-cheat-sheet-2940.html +393 -0
- data/lib/treat/builder.rb +6 -0
- data/lib/treat/config/data/languages/agnostic.rb +2 -2
- data/lib/treat/core/server.rb +1 -0
- data/lib/treat/entities/entity/buildable.rb +1 -1
- data/lib/treat/loaders/linguistics.rb +6 -7
- data/lib/treat/loaders/stanford.rb +45 -11
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/categorizable.rb +30 -32
- data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -24
- data/lib/treat/workers/formatters/readers/html.rb +1 -1
- data/lib/treat/workers/formatters/readers/xml.rb +1 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +1 -1
- data/lib/treat/workers/groupable.rb +2 -1
- data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +3 -3
- data/lib/treat/workers/inflectors/conjugators/linguistics.rb +6 -4
- data/lib/treat/workers/inflectors/declensors/linguistics.rb +11 -18
- data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +4 -4
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +1 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +23 -21
- data/lib/treat/workers/processors/parsers/stanford.rb +10 -20
- data/lib/treat/workers/processors/segmenters/stanford.rb +1 -1
- data/lib/treat/workers/processors/tokenizers/maxent.rb +29 -0
- data/lib/treat/workers/processors/tokenizers/stanford.rb +2 -4
- data/lib/treat.rb +1 -0
- data/spec/helper.rb +8 -6
- data/spec/sandbox.rb +18 -6
- data/spec/workers/agnostic.rb +76 -29
- data/spec/workers/english.rb +23 -73
- data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
- metadata +6 -18
data/spec/workers/agnostic.rb
CHANGED
@@ -1,44 +1,91 @@
|
|
1
|
-
|
1
|
+
class Treat::Specs::Workers::Agnostic
|
2
|
+
|
3
|
+
@@workers = Treat.languages.agnostic.workers
|
2
4
|
|
3
|
-
describe Treat::Workers::Extractors::Language do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
5
|
+
describe Treat::Workers::Extractors::Language do
|
6
|
+
before do
|
7
|
+
@entities = ["Obama and Sarkozy will meet in Berlin."]
|
8
|
+
@languages = ["english"]
|
9
|
+
end
|
10
|
+
context "when called on any textual entity" do
|
11
|
+
it "returns the language of the entity" do
|
12
|
+
# Treat.core.language.detect = true
|
13
|
+
@@workers.extractors.language.each do |extractor|
|
14
|
+
@entities.map(&:language).should eql @languages
|
15
|
+
end
|
16
|
+
# Treat.core.language.detect = false
|
13
17
|
end
|
14
|
-
# Treat.core.language.detect = false
|
15
18
|
end
|
16
19
|
end
|
17
|
-
end
|
18
20
|
|
19
|
-
describe Treat::Workers::
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
describe Treat::Workers::Extractors::TopicWords do
|
22
|
+
|
23
|
+
before do
|
24
|
+
@collections = ["./spec/workers/examples/english/economist"]
|
25
|
+
@topic_words = [["euro", "zone", "european", "mrs", "greece", "chancellor",
|
26
|
+
"berlin", "practice", "german", "germans"], ["bank", "minister", "central",
|
27
|
+
"bajnai", "mr", "hu", "orban", "commission", "hungarian", "government"],
|
28
|
+
["bank", "mr", "central", "bajnai", "prime", "government", "brussels",
|
29
|
+
"responsibility", "national", "independence"], ["mr", "bank", "central",
|
30
|
+
"policies", "prime", "minister", "today", "financial", "government", "funds"],
|
31
|
+
["euro", "merkel", "mr", "zone", "european", "greece", "german", "berlin",
|
32
|
+
"sarkozy", "government"], ["mr", "bajnai", "today", "orban", "government",
|
33
|
+
"forced", "independence", "part", "hand", "minister"], ["sarkozy", "mrs",
|
34
|
+
"zone", "euro", "fiscal", "called", "greece", "merkel", "german", "financial"],
|
35
|
+
["mr", "called", "central", "policies", "financial", "bank", "european",
|
36
|
+
"prime", "minister", "shift"], ["bajnai", "orban", "prime", "mr", "government",
|
37
|
+
"independence", "forced", "commission", "-", "hvg"], ["euro", "sarkozy", "fiscal",
|
38
|
+
"merkel", "mr", "chancellor", "european", "german", "agenda", "soap"], ["mr",
|
39
|
+
"bank", "called", "central", "today", "prime", "government", "minister", "european",
|
40
|
+
"crisis"], ["mr", "fiscal", "mrs", "sarkozy", "merkel", "euro", "summit", "tax",
|
41
|
+
"leaders", "ecb"], ["called", "government", "financial", "policies", "part", "bank",
|
42
|
+
"central", "press", "mr", "president"], ["sarkozy", "merkel", "euro", "mr", "summit",
|
43
|
+
"mrs", "fiscal", "merkozy", "economic", "german"], ["mr", "prime", "minister",
|
44
|
+
"policies", "government", "financial", "crisis", "bank", "called", "part"], ["mr",
|
45
|
+
"bank", "government", "today", "called", "central", "minister", "prime", "issues",
|
46
|
+
"president"], ["mr", "orban", "central", "government", "parliament", "hungarian",
|
47
|
+
"minister", "hu", "personal", "bajnai"], ["government", "called", "central", "european",
|
48
|
+
"today", "bank", "prime", "financial", "part", "deficit"], ["mr", "orban", "government",
|
49
|
+
"hungarian", "bank", "hvg", "minister", "-", "fidesz", "hand"], ["mr", "bank", "european",
|
50
|
+
"minister", "policies", "crisis", "government", "president", "called", "shift"]]
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when #topic_words is called on a chunked, segmented and tokenized collection" do
|
54
|
+
it "annotates the collection with the topic words and returns them" do
|
55
|
+
@@workers.extractors.topic_words.each do |extractor|
|
56
|
+
@collections.map(&method(:collection))
|
57
|
+
.map { |col| col.apply(:chunk,:segment,:tokenize) }
|
58
|
+
map { |col| col.topic_words }.should eql @topic_words
|
59
|
+
end
|
60
|
+
end
|
28
61
|
end
|
29
62
|
end
|
30
|
-
end
|
31
63
|
|
32
|
-
describe Treat::Workers::Formatters::
|
33
|
-
|
34
|
-
|
64
|
+
describe Treat::Workers::Formatters::Serializers do
|
65
|
+
before do
|
66
|
+
@texts = ["A test entity"]
|
67
|
+
end
|
68
|
+
context "when #serialize is called on any textual entity" do
|
69
|
+
it "serializes the entity to disk and returns a pointer to the location" do
|
70
|
+
# m = Treat::Entities::Entity.build
|
71
|
+
@texts.map(&:to_entity).map(&:serialize)
|
72
|
+
.map(&method(:entity)).map(&:to_s).should eql @texts
|
73
|
+
end
|
74
|
+
end
|
35
75
|
end
|
36
|
-
|
37
|
-
|
38
|
-
|
76
|
+
|
77
|
+
describe Treat::Workers::Formatters::Unserializers do
|
78
|
+
before do
|
79
|
+
@texts = ["A te"]
|
80
|
+
end
|
81
|
+
context "when #unserialize is called with a selector on any textual entity" do
|
82
|
+
it "unserializes the file and loads it in the entity" do
|
83
|
+
|
84
|
+
end
|
39
85
|
end
|
40
86
|
end
|
41
87
|
end
|
88
|
+
|
42
89
|
=begin
|
43
90
|
visualize: {
|
44
91
|
entity: {
|
data/spec/workers/english.rb
CHANGED
@@ -1,18 +1,11 @@
|
|
1
1
|
require 'rspec'
|
2
2
|
|
3
3
|
require_relative '../../lib/treat'
|
4
|
-
include Treat::Core::DSL
|
5
4
|
|
6
|
-
|
7
|
-
Treat.libraries.stanford.model_path = '/ruby/stanford/stanford-core-nlp-all/'
|
8
|
-
Treat.libraries.stanford.jar_path = '/ruby/stanford/stanford-core-nlp-all/'
|
9
|
-
Treat.libraries.punkt.model_path = '/ruby/punkt/'
|
10
|
-
Treat.libraries.reuters.model_path = '/ruby/reuters/'
|
11
|
-
=end
|
5
|
+
class Treat::Specs::Workers::English
|
12
6
|
|
13
|
-
|
7
|
+
@@workers = Treat.languages.english.workers
|
14
8
|
|
15
|
-
$workers = Treat.languages.english.workers
|
16
9
|
Treat.core.language.default = 'english'
|
17
10
|
Treat.core.language.detect = false
|
18
11
|
|
@@ -29,7 +22,7 @@ class English
|
|
29
22
|
|
30
23
|
context "when #segment is called on a zone" do
|
31
24
|
it "segments the zone into groups" do
|
32
|
-
|
25
|
+
@@workers.processors.segmenters.each do |segmenter|
|
33
26
|
@zones.map { |zone| zone.segment(segmenter) }
|
34
27
|
.map { |zone| zone.groups.map(&:to_s) }
|
35
28
|
.should eql @groups
|
@@ -72,7 +65,7 @@ class English
|
|
72
65
|
end
|
73
66
|
context "when #tokenize is called on a group" do
|
74
67
|
it "separates the group into tokens" do
|
75
|
-
|
68
|
+
@@workers.processors.tokenizers.each do |tokenizer|
|
76
69
|
@groups.dup.map { |text| group(text).tokenize(tokenizer) }
|
77
70
|
.map { |group| group.tokens.map(&:to_s) }
|
78
71
|
.should eql @tokens
|
@@ -88,7 +81,7 @@ class English
|
|
88
81
|
end
|
89
82
|
context "when #parse is called on a group" do
|
90
83
|
it "tokenizes and parses the group into its syntactical phrases" do
|
91
|
-
|
84
|
+
@@workers.processors.parsers.each do |parser|
|
92
85
|
@groups.dup.map { |text| group(text).parse(parser) }
|
93
86
|
.map { |group| group.phrases.map(&:to_s)}
|
94
87
|
.should eql @phrases
|
@@ -106,7 +99,7 @@ class English
|
|
106
99
|
end
|
107
100
|
context "when #tag is is called on a tokenized group" do
|
108
101
|
it "annotates each token in the group with its tag and returns the tag 'G'" do
|
109
|
-
|
102
|
+
@@workers.lexicalizers.taggers.each do |tagger|
|
110
103
|
@groups.map { |txt| group(txt).tag(tagger) }
|
111
104
|
.all? { |tag| tag == 'G' }.should be_true
|
112
105
|
@groups.map { |txt| group(txt).tokenize }
|
@@ -117,7 +110,7 @@ class English
|
|
117
110
|
end
|
118
111
|
context "when #tag is called on a token" do
|
119
112
|
it "annotates the token with its tag and returns it" do
|
120
|
-
|
113
|
+
@@workers.lexicalizers.taggers.each do |tagger|
|
121
114
|
@tokens.map { |tok| token(tok).tag(tagger) }
|
122
115
|
.should eql @token_tags
|
123
116
|
end
|
@@ -186,7 +179,7 @@ class English
|
|
186
179
|
context "when #synonym is called on a word, or #sense is "+
|
187
180
|
"called on a word with option :nym set to 'hyponyms'" do
|
188
181
|
it "returns the hyponyms of the word" do
|
189
|
-
|
182
|
+
@@workers.lexicalizers.sensers.each do |senser|
|
190
183
|
@words.map { |txt| word(txt) }
|
191
184
|
.map { |wrd| wrd.hyponyms(senser) }.should eql @hyponyms
|
192
185
|
@words.map { |txt| word(txt) }
|
@@ -199,7 +192,7 @@ class English
|
|
199
192
|
context "when #hypernyms is called on a word or #sense is "+
|
200
193
|
"called on a word with option :nym set to 'hyponyms'" do
|
201
194
|
it "returns the hyponyms of the word" do
|
202
|
-
|
195
|
+
@@workers.lexicalizers.sensers.each do |senser|
|
203
196
|
@words.map { |txt| word(txt) }
|
204
197
|
.map { |wrd| wrd.hypernyms(senser) }.should eql @hypernyms
|
205
198
|
@words.map { |txt| word(txt) }
|
@@ -212,7 +205,7 @@ class English
|
|
212
205
|
context "when #antonyms is called on a word or #sense is" +
|
213
206
|
"called on a word with option :nym set to 'antonyms'" do
|
214
207
|
it "returns the hyponyms of the word" do
|
215
|
-
|
208
|
+
@@workers.lexicalizers.sensers.each do |senser|
|
216
209
|
@words.map { |txt| word(txt) }
|
217
210
|
.map { |wrd| wrd.antonyms(senser) }.should eql @antonyms
|
218
211
|
@words.map { |txt| word(txt) }
|
@@ -225,7 +218,7 @@ class English
|
|
225
218
|
context "when #synonyms is called on a word or #sense is" +
|
226
219
|
"called on a word with option :nym set to 'synonyms'" do
|
227
220
|
it "returns the hyponyms of the word" do
|
228
|
-
|
221
|
+
@@workers.lexicalizers.sensers.each do |senser|
|
229
222
|
@words.map { |txt| word(txt) }
|
230
223
|
.map { |wrd| wrd.synonyms(senser) }.should eql @synonyms
|
231
224
|
@words.map { |txt| word(txt) }
|
@@ -251,7 +244,7 @@ class English
|
|
251
244
|
|
252
245
|
context "when #category is called on a tokenized and tagged group" do
|
253
246
|
it "returns a tag corresponding to the group name" do
|
254
|
-
|
247
|
+
@@workers.lexicalizers.categorizers.each do |categorizer|
|
255
248
|
[phrase(@phrase), fragment(@fragment), sentence(@sentence)]
|
256
249
|
.map { |grp| grp.apply(:tag).category(categorizer) }
|
257
250
|
.should eql @group_categories
|
@@ -261,7 +254,7 @@ class English
|
|
261
254
|
|
262
255
|
context "when #category is called called on a tagged token" do
|
263
256
|
it "returns the category corresponding to the token's tag" do
|
264
|
-
|
257
|
+
@@workers.lexicalizers.categorizers.each do |categorizer|
|
265
258
|
@tokens.map { |tok| token(tok).apply(:tag).category(categorizer) }
|
266
259
|
.should eql @token_tags
|
267
260
|
end
|
@@ -281,7 +274,7 @@ class English
|
|
281
274
|
|
282
275
|
context "when #ordinal is called on a number" do
|
283
276
|
it "returns the ordinal form (e.g. 'first') of the number" do
|
284
|
-
|
277
|
+
@@workers.inflectors.ordinalizers.each do |ordinalizer|
|
285
278
|
@numbers.map { |num| number(num) }
|
286
279
|
.map { |num| num.ordinal(ordinalizer) }.should eql @ordinal
|
287
280
|
end
|
@@ -290,7 +283,7 @@ class English
|
|
290
283
|
|
291
284
|
context "when #cardinal is called on a number" do
|
292
285
|
it "returns the cardinal form (e.g. 'second' of the number)" do
|
293
|
-
|
286
|
+
@@workers.inflectors.cardinalizers.each do |cardinalizer|
|
294
287
|
@numbers.map { |num| number(num) }
|
295
288
|
.map { |num| num.cardinal(cardinalizer) }.should eql @cardinal
|
296
289
|
end
|
@@ -306,7 +299,7 @@ class English
|
|
306
299
|
end
|
307
300
|
context "when #stem is called on a word" do
|
308
301
|
it "annotates the word with its stem and returns the stem" do
|
309
|
-
|
302
|
+
@@workers.inflectors.stemmers.each do |stemmer|
|
310
303
|
@words.map { |wrd| wrd.stem(stemmer) }.should eql @stems
|
311
304
|
end
|
312
305
|
end
|
@@ -321,7 +314,7 @@ class English
|
|
321
314
|
|
322
315
|
context "when #name_tag called on a tokenized group" do
|
323
316
|
it "tags each token with its name tag" do
|
324
|
-
|
317
|
+
@@workers.extractors.name_tag.each do |tagger|
|
325
318
|
@groups.map { |grp| grp.tokenize.apply(:name_tag) }
|
326
319
|
.map { |grp| grp.tokens.map { |t| t.get(:name_tag) } }
|
327
320
|
.should eql @tags
|
@@ -339,7 +332,7 @@ class English
|
|
339
332
|
end
|
340
333
|
context "when #topics is called on a chunked, segmented and tokenized document" do
|
341
334
|
it "annotates the document with its general topics and returns them" do
|
342
|
-
|
335
|
+
@@workers.extractors.topics.each do |extractor|
|
343
336
|
@files.map { |f| document(f).apply(:chunk, :segment, :tokenize) }
|
344
337
|
.map { |doc| doc.topics }.should eql @topics
|
345
338
|
end
|
@@ -354,7 +347,7 @@ class English
|
|
354
347
|
end
|
355
348
|
context "when called on a tokenized group representing a time expression" do
|
356
349
|
it "returns the DateTime object corresponding to the time" do
|
357
|
-
|
350
|
+
@@workers.extractors.time.each do |extractor|
|
358
351
|
puts @expressions.map(&:time).inspect
|
359
352
|
@expressions.map(&:time).all? { |time| time
|
360
353
|
.is_a?(DateTime) }.should be_true
|
@@ -365,49 +358,6 @@ class English
|
|
365
358
|
end
|
366
359
|
end
|
367
360
|
|
368
|
-
describe Treat::Workers::Extractors::TopicWords do
|
369
|
-
|
370
|
-
before do
|
371
|
-
@collections = ["./spec/workers/examples/english/economist"]
|
372
|
-
@topic_words = [["euro", "zone", "european", "mrs", "greece", "chancellor",
|
373
|
-
"berlin", "practice", "german", "germans"], ["bank", "minister", "central",
|
374
|
-
"bajnai", "mr", "hu", "orban", "commission", "hungarian", "government"],
|
375
|
-
["bank", "mr", "central", "bajnai", "prime", "government", "brussels",
|
376
|
-
"responsibility", "national", "independence"], ["mr", "bank", "central",
|
377
|
-
"policies", "prime", "minister", "today", "financial", "government", "funds"],
|
378
|
-
["euro", "merkel", "mr", "zone", "european", "greece", "german", "berlin",
|
379
|
-
"sarkozy", "government"], ["mr", "bajnai", "today", "orban", "government",
|
380
|
-
"forced", "independence", "part", "hand", "minister"], ["sarkozy", "mrs",
|
381
|
-
"zone", "euro", "fiscal", "called", "greece", "merkel", "german", "financial"],
|
382
|
-
["mr", "called", "central", "policies", "financial", "bank", "european",
|
383
|
-
"prime", "minister", "shift"], ["bajnai", "orban", "prime", "mr", "government",
|
384
|
-
"independence", "forced", "commission", "-", "hvg"], ["euro", "sarkozy", "fiscal",
|
385
|
-
"merkel", "mr", "chancellor", "european", "german", "agenda", "soap"], ["mr",
|
386
|
-
"bank", "called", "central", "today", "prime", "government", "minister", "european",
|
387
|
-
"crisis"], ["mr", "fiscal", "mrs", "sarkozy", "merkel", "euro", "summit", "tax",
|
388
|
-
"leaders", "ecb"], ["called", "government", "financial", "policies", "part", "bank",
|
389
|
-
"central", "press", "mr", "president"], ["sarkozy", "merkel", "euro", "mr", "summit",
|
390
|
-
"mrs", "fiscal", "merkozy", "economic", "german"], ["mr", "prime", "minister",
|
391
|
-
"policies", "government", "financial", "crisis", "bank", "called", "part"], ["mr",
|
392
|
-
"bank", "government", "today", "called", "central", "minister", "prime", "issues",
|
393
|
-
"president"], ["mr", "orban", "central", "government", "parliament", "hungarian",
|
394
|
-
"minister", "hu", "personal", "bajnai"], ["government", "called", "central", "european",
|
395
|
-
"today", "bank", "prime", "financial", "part", "deficit"], ["mr", "orban", "government",
|
396
|
-
"hungarian", "bank", "hvg", "minister", "-", "fidesz", "hand"], ["mr", "bank", "european",
|
397
|
-
"minister", "policies", "crisis", "government", "president", "called", "shift"]]
|
398
|
-
end
|
399
|
-
|
400
|
-
context "when #topic_words is called on a chunked, segmented and tokenized collection" do
|
401
|
-
it "annotates the collection with the topic words and returns them" do
|
402
|
-
$workers.extractors.topic_words.each do |extractor|
|
403
|
-
@collections.map(&method(:collection))
|
404
|
-
.map { |col| col.apply(:chunk,:segment,:tokenize) }
|
405
|
-
map { |col| col.topic_words }.should eql @topic_words
|
406
|
-
end
|
407
|
-
end
|
408
|
-
end
|
409
|
-
end
|
410
|
-
|
411
361
|
describe Treat::Workers::Inflectors::Conjugators do
|
412
362
|
before do
|
413
363
|
@infinitives = ["run"]
|
@@ -417,7 +367,7 @@ class English
|
|
417
367
|
context "when #present_participle is called on a word or #conjugate " +
|
418
368
|
"is called on a word with option :form set to 'present_participle'" do
|
419
369
|
it "returns the present participle form of the verb" do
|
420
|
-
|
370
|
+
@@workers.inflectors.conjugators.each do |conjugator|
|
421
371
|
@participles.map { |verb| verb
|
422
372
|
.infinitive(conjugator) }
|
423
373
|
.should eql @infinitives
|
@@ -431,7 +381,7 @@ class English
|
|
431
381
|
context "when #infinitive is called on a word or #conjugate is " +
|
432
382
|
"called on a word with option :form set to 'infinitive'" do
|
433
383
|
it "returns the infinitive form of the verb" do
|
434
|
-
|
384
|
+
@@workers.inflectors.conjugators.each do |conjugator|
|
435
385
|
@infinitives.map { |verb| verb
|
436
386
|
.present_participle(conjugator) }
|
437
387
|
.should eql @participles
|
@@ -452,7 +402,7 @@ class English
|
|
452
402
|
context "when #plural is called on a word, or #declense "+
|
453
403
|
"is called on a word with option :count set to 'plural'" do
|
454
404
|
it "returns the plural form of the word" do
|
455
|
-
|
405
|
+
@@workers.inflectors.declensors.each do |declensor|
|
456
406
|
@singulars.map { |word| word.plural(declensor) }
|
457
407
|
.should eql @plurals
|
458
408
|
@singulars.map { |word| word
|
@@ -464,7 +414,7 @@ class English
|
|
464
414
|
context "when #singular is called on a word, or #declense " +
|
465
415
|
"is called on a word with option :count set to 'singular'" do
|
466
416
|
it "returns the singular form of the word" do
|
467
|
-
|
417
|
+
@@workers.inflectors.declensors.each do |declensor|
|
468
418
|
next if declensor == :linguistics
|
469
419
|
@plurals.map { |word| word.singular(declensor) }
|
470
420
|
.should eql @singulars
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schiphol
|
@@ -75,22 +75,6 @@ dependencies:
|
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: terminal-table
|
80
|
-
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
|
-
requirements:
|
83
|
-
- - ! '>='
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
|
-
type: :development
|
87
|
-
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
|
-
requirements:
|
91
|
-
- - ! '>='
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
version: '0'
|
94
78
|
- !ruby/object:Gem::Dependency
|
95
79
|
name: simplecov
|
96
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,6 +100,7 @@ extra_rdoc_files: []
|
|
116
100
|
files:
|
117
101
|
- bin/MANIFEST
|
118
102
|
- lib/treat/autoload.rb
|
103
|
+
- lib/treat/builder.rb
|
119
104
|
- lib/treat/config/config.rb
|
120
105
|
- lib/treat/config/configurable.rb
|
121
106
|
- lib/treat/config/data/core.rb
|
@@ -244,6 +229,7 @@ files:
|
|
244
229
|
- lib/treat/workers/processors/segmenters/srx.rb
|
245
230
|
- lib/treat/workers/processors/segmenters/stanford.rb
|
246
231
|
- lib/treat/workers/processors/segmenters/tactful.rb
|
232
|
+
- lib/treat/workers/processors/tokenizers/maxent.rb
|
247
233
|
- lib/treat/workers/processors/tokenizers/ptb.rb
|
248
234
|
- lib/treat/workers/processors/tokenizers/punkt.rb
|
249
235
|
- lib/treat/workers/processors/tokenizers/stanford.rb
|
@@ -278,7 +264,9 @@ files:
|
|
278
264
|
- spec/workers/examples/english/test.txt
|
279
265
|
- models/MANIFEST
|
280
266
|
- tmp/MANIFEST
|
267
|
+
- files/21552208.html
|
281
268
|
- files/MANIFEST
|
269
|
+
- files/nethttp-cheat-sheet-2940.html
|
282
270
|
- README.md
|
283
271
|
- LICENSE
|
284
272
|
homepage: https://github.com/louismullie/treat
|