treat 2.0.3 → 2.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/treat/config/data/languages/agnostic.rb +6 -3
- data/lib/treat/config/data/languages/english.rb +1 -1
- data/lib/treat/config/data/workers/extractors.rb +8 -0
- data/lib/treat/loaders/stanford.rb +2 -0
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
- data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
- data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
- data/lib/treat/workers/extractors/time/chronic.rb +6 -41
- data/lib/treat/workers/extractors/time/kronic.rb +20 -0
- data/lib/treat/workers/extractors/time/nickel.rb +0 -15
- data/lib/treat/workers/extractors/time/ruby.rb +2 -33
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
- data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
- data/spec/entities/collection.rb +29 -25
- data/spec/entities/document.rb +45 -44
- data/spec/entities/entity.rb +295 -294
- data/spec/entities/phrase.rb +21 -17
- data/spec/entities/token.rb +43 -40
- data/spec/entities/word.rb +5 -1
- data/spec/entities/zone.rb +26 -22
- data/spec/helper.rb +7 -2
- data/spec/learning/data_set.rb +145 -141
- data/spec/learning/export.rb +46 -42
- data/spec/learning/problem.rb +114 -110
- data/spec/learning/question.rb +46 -42
- data/spec/treat.rb +41 -37
- data/spec/workers/agnostic.rb +2 -2
- data/spec/workers/english.rb +12 -12
- metadata +7 -8
- data/files/21552208.html +0 -786
- data/files/nethttp-cheat-sheet-2940.html +0 -393
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
- data/spec/sandbox.rb +0 -294
- data/spec/workers/examples/english/mathematicians/euler.html +0 -21
data/spec/entities/collection.rb
CHANGED
@@ -1,40 +1,42 @@
|
|
1
|
-
|
1
|
+
module Treat::Specs::Entities
|
2
|
+
|
3
|
+
describe Treat::Entities::Collection do
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
before :all do
|
6
|
+
@file = Treat.paths.spec +
|
7
|
+
'workers/examples/english/mathematicians'
|
8
|
+
end
|
7
9
|
|
8
10
|
|
9
|
-
|
11
|
+
describe "Buildable" do
|
10
12
|
|
11
|
-
|
13
|
+
describe "#build" do
|
12
14
|
|
13
|
-
|
15
|
+
context "when supplied with an existing folder name" do
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
it "recursively searches the folder for " +
|
18
|
+
"files and opens them into a collection of documents" do
|
19
|
+
collection = Treat::Entities::Collection.build(@file)
|
20
|
+
collection.size.should eql 5
|
21
|
+
end
|
20
22
|
|
21
|
-
|
23
|
+
end
|
22
24
|
|
23
|
-
|
25
|
+
context "when supplied a folder name that doesn't exist" do
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
it "creates the directory and opens the collection" do
|
28
|
+
f = Treat.paths.spec + 'workers/examples/english/test'
|
29
|
+
c = Treat::Entities::Collection.build(f)
|
30
|
+
FileTest.directory?(f).should eql true
|
31
|
+
c.should be_an_instance_of Treat::Entities::Collection
|
32
|
+
FileUtils.rm_rf(f)
|
33
|
+
end
|
31
34
|
end
|
32
35
|
end
|
33
|
-
end
|
34
36
|
|
35
|
-
|
37
|
+
end
|
36
38
|
|
37
|
-
|
39
|
+
describe "#<<" do
|
38
40
|
|
39
41
|
it "adds the object to the collection" do
|
40
42
|
f = Treat.paths.spec + 'workers/examples/english/economist'
|
@@ -43,6 +45,8 @@ describe Treat::Entities::Collection do
|
|
43
45
|
c.size.should eql 4
|
44
46
|
end
|
45
47
|
|
48
|
+
end
|
49
|
+
|
46
50
|
end
|
47
51
|
|
48
52
|
end
|
@@ -105,4 +109,4 @@ end
|
|
105
109
|
end
|
106
110
|
|
107
111
|
end
|
108
|
-
=end
|
112
|
+
=end
|
data/spec/entities/document.rb
CHANGED
@@ -1,55 +1,56 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
1
|
+
module Treat::Specs::Entities
|
2
|
+
describe Treat::Entities::Document do
|
3
|
+
|
4
|
+
describe "Buildable" do
|
5
|
+
|
6
|
+
describe "#build" do
|
7
|
+
|
8
|
+
context "when supplied with a readable file name" do
|
9
|
+
it "opens the file and reads its " +
|
10
|
+
"content into a document" do
|
11
|
+
f = Treat.paths.spec +
|
12
|
+
'workers/examples/english/mathematicians/leibniz.txt'
|
13
|
+
d = Treat::Entities::Document.build(f)
|
14
|
+
d.should be_an_instance_of Treat::Entities::Document
|
15
|
+
d.to_s.index('Gottfried Leibniz').should_not eql nil
|
16
|
+
end
|
15
17
|
end
|
16
|
-
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
19
|
+
context "when supplied with a url" do
|
20
|
+
it "downloads the file the URL points to and opens " +
|
21
|
+
"a document with the contents of the file" do
|
22
|
+
url = 'http://www.rubyinside.com/nethttp-cheat-sheet-2940.html'
|
23
|
+
d = Treat::Entities::Document.build(url)
|
24
|
+
d.format.should eql 'html'
|
25
|
+
d.should be_an_instance_of Treat::Entities::Document
|
26
|
+
d.to_s.index('Rubyist').should_not eql nil
|
27
|
+
end
|
26
28
|
end
|
27
|
-
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
30
|
+
context "when supplied with a url with no file extension" do
|
31
|
+
it "downloads the file the URL points to and opens " +
|
32
|
+
"a document with the contents of the file, assuming " +
|
33
|
+
"the downloaded file to be in HTML format" do
|
34
|
+
url = 'http://www.economist.com/node/21552208'
|
35
|
+
d = Treat::Entities::Document.build(url)
|
36
|
+
d.should be_an_instance_of Treat::Entities::Document
|
37
|
+
d.to_s.index('Ronnie Lupe').should_not eql nil
|
38
|
+
end
|
37
39
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
40
|
+
|
41
|
+
context "when called with anything else than a " +
|
42
|
+
"readable file name or url" do
|
43
|
+
|
44
|
+
it "raises an exception" do
|
45
|
+
lambda do
|
46
|
+
Treat::Entities::Document.build('nonexistent')
|
47
|
+
end.should raise_error
|
48
|
+
end
|
49
|
+
|
47
50
|
end
|
48
51
|
|
49
52
|
end
|
50
53
|
|
51
54
|
end
|
52
|
-
|
53
55
|
end
|
54
|
-
|
55
|
-
end
|
56
|
+
end
|
data/spec/entities/entity.rb
CHANGED
@@ -1,70 +1,71 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
1
|
+
module Treat::Specs::Entities
|
2
|
+
describe Treat::Entities::Entity do
|
3
|
+
|
4
|
+
before do
|
5
|
+
|
6
|
+
@paragraph = Treat::Entities::Paragraph.new
|
7
|
+
@sentence = Treat::Entities::Sentence.new
|
8
|
+
@noun_phrase = Treat::Entities::Phrase.new
|
9
|
+
@noun_phrase.set :tag, 'NP'
|
10
|
+
@verb_phrase = Treat::Entities::Phrase.new
|
11
|
+
@verb_phrase.set :tag, 'VP'
|
12
|
+
@adj_phrase = Treat::Entities::Phrase.new
|
13
|
+
@adj_phrase.set :tag, 'ADJP'
|
14
|
+
@det = Treat::Entities::Word.new('The')
|
15
|
+
@det.set :category, 'determiner'
|
16
|
+
@det.set :tag, 'DT'
|
17
|
+
@adj = Treat::Entities::Word.new('lazy')
|
18
|
+
@adj.set :category, 'adjective'
|
19
|
+
@adj.set :tag, 'JJ'
|
20
|
+
@noun = Treat::Entities::Word.new('fox')
|
21
|
+
@noun.set :category, 'noun'
|
22
|
+
@noun.set :tag, 'NN'
|
23
|
+
@aux = Treat::Entities::Word.new('is')
|
24
|
+
@aux.set :category, 'verb'
|
25
|
+
@aux.set :tag, 'VBZ'
|
26
|
+
@verb = Treat::Entities::Word.new('running')
|
27
|
+
@verb.set :category, 'verb'
|
28
|
+
@verb.set :tag, 'VBG'
|
29
|
+
@dot = Treat::Entities::Punctuation.new('.')
|
30
|
+
@dot.set :tag, '.'
|
31
|
+
@paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
|
32
|
+
@noun_phrase << [@det, @adj_phrase, @noun]
|
33
|
+
@adj_phrase << @adj
|
34
|
+
@verb_phrase << [@aux, @verb]
|
34
35
|
|
35
|
-
|
36
|
+
end
|
36
37
|
|
37
38
|
|
38
|
-
|
39
|
+
describe "Checkable" do
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
it "checks if an entity has the feature; if not, " +
|
43
|
-
"calls the default worker to get the feature if do_it " +
|
44
|
-
"is set to true; if the entity doesn't have the feature " +
|
45
|
-
" and do_it is set to false, it raises an exception." do
|
46
|
-
|
47
|
-
# NOT PASSING! Dependence on caller method.
|
48
|
-
|
49
|
-
# lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
|
50
|
-
|
51
|
-
end
|
41
|
+
describe "#check_has(feature, do_it = true) " do
|
52
42
|
|
53
|
-
|
43
|
+
it "checks if an entity has the feature; if not, " +
|
44
|
+
"calls the default worker to get the feature if do_it " +
|
45
|
+
"is set to true; if the entity doesn't have the feature " +
|
46
|
+
" and do_it is set to false, it raises an exception." do
|
54
47
|
|
55
|
-
|
48
|
+
# NOT PASSING! Dependence on caller method.
|
56
49
|
|
57
|
-
|
50
|
+
# lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
|
58
51
|
|
59
|
-
|
52
|
+
end
|
60
53
|
|
61
|
-
it "returns the position of the entity in its parent, sarting at 0" do
|
62
|
-
@noun_phrase.position.should eql 0
|
63
|
-
@det.position.should eql 0
|
64
54
|
end
|
65
55
|
|
66
56
|
end
|
67
57
|
|
58
|
+
describe "Countable" do
|
59
|
+
|
60
|
+
describe "#position" do
|
61
|
+
|
62
|
+
it "returns the position of the entity in its parent, sarting at 0" do
|
63
|
+
@noun_phrase.position.should eql 0
|
64
|
+
@det.position.should eql 0
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
68
69
|
=begin
|
69
70
|
|
70
71
|
describe "#frequency" do
|
@@ -88,334 +89,336 @@ describe Treat::Entities::Entity do
|
|
88
89
|
|
89
90
|
=end
|
90
91
|
|
91
|
-
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "Delegatable" do
|
92
95
|
|
93
|
-
|
96
|
+
describe "#self.call_worker" do
|
94
97
|
|
95
|
-
|
98
|
+
it "finds the worker class to " +
|
99
|
+
"perform a task and delegates the task to it " do
|
96
100
|
|
97
|
-
|
98
|
-
|
101
|
+
Treat::Entities::Entity.call_worker(
|
102
|
+
'$'.to_entity, :tag, :lingua,
|
103
|
+
Treat::Workers::Lexicalizers::Taggers, {}).should
|
104
|
+
eql '$'.tag(:lingua)
|
99
105
|
|
100
|
-
|
101
|
-
'$'.to_entity, :tag, :lingua,
|
102
|
-
Treat::Workers::Lexicalizers::Taggers, {}).should
|
103
|
-
eql '$'.tag(:lingua)
|
106
|
+
end
|
104
107
|
|
105
108
|
end
|
106
109
|
|
107
110
|
end
|
108
111
|
|
109
|
-
|
112
|
+
describe "Exportable" do
|
110
113
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
# MORE TESTS HERE - FIXME
|
114
|
+
context "when supplied with a classification to export" do
|
115
|
+
feature = Treat::Learning::Feature.new(:tag)
|
116
|
+
question = Treat::Learning::Question.new(:is_keyword, :word, false, :discrete)
|
117
|
+
problem = Treat::Learning::Problem.new(question, feature)
|
118
|
+
it "returns a data set with the exported features" do
|
119
|
+
ds = @sentence.export(problem)
|
120
|
+
ds.problem.should eql problem
|
121
|
+
# MORE TESTS HERE - FIXME
|
122
|
+
end
|
121
123
|
end
|
122
|
-
end
|
123
124
|
|
124
|
-
|
125
|
+
end
|
125
126
|
|
126
|
-
|
127
|
+
describe "Iterable" do
|
127
128
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
129
|
+
describe "#each { |child| ... }" do
|
130
|
+
it "yields each direct child of a node" do
|
131
|
+
a = []
|
132
|
+
@sentence.each do |child|
|
133
|
+
a << child
|
134
|
+
end
|
135
|
+
a.should eql [@noun_phrase, @verb_phrase, @dot]
|
133
136
|
end
|
134
|
-
a.should eql [@noun_phrase, @verb_phrase, @dot]
|
135
137
|
end
|
136
|
-
end
|
137
138
|
|
138
|
-
|
139
|
+
describe "#each_entity(*entity_types) { |entity| ... }" do
|
139
140
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
141
|
+
context "when called with no arguments" do
|
142
|
+
it "recursively yields each element in " +
|
143
|
+
"the tree, including itself, top-down " +
|
144
|
+
"first then left to right" do
|
144
145
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
146
|
+
a = []
|
147
|
+
@sentence.each_entity do |e|
|
148
|
+
a << e
|
149
|
+
end
|
149
150
|
|
150
|
-
|
151
|
-
|
152
|
-
|
151
|
+
a.should eql [@sentence, @noun_phrase, @det,
|
152
|
+
@adj_phrase, @adj, @noun,
|
153
|
+
@verb_phrase, @aux, @verb, @dot]
|
153
154
|
|
155
|
+
end
|
154
156
|
end
|
155
|
-
end
|
156
157
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
158
|
+
context "when called with one or more entity " +
|
159
|
+
"types supplied as lowercase symbols" do
|
160
|
+
it "recursively yields all elements with the given type(s), "+
|
161
|
+
"including the receiver if it matches on of the types" do
|
162
|
+
a = []
|
163
|
+
@sentence.each_entity(:phrase, :punctuation) do |e|
|
164
|
+
a << e
|
165
|
+
end
|
166
|
+
a.should eql [@noun_phrase,
|
167
|
+
@adj_phrase, @verb_phrase, @dot]
|
164
168
|
end
|
165
|
-
a.should eql [@noun_phrase,
|
166
|
-
@adj_phrase, @verb_phrase, @dot]
|
167
169
|
end
|
168
|
-
end
|
169
170
|
|
171
|
+
end
|
170
172
|
end
|
171
|
-
end
|
172
173
|
|
173
|
-
|
174
|
+
describe "Magical" do
|
174
175
|
|
175
|
-
|
176
|
-
|
176
|
+
describe "#<entity or word type> - e.g. " +
|
177
|
+
"#title, #paragraph, etc. and #adjective, #noun, etc." do
|
178
|
+
|
179
|
+
it "return the first entity with the corresponding " +
|
180
|
+
"type inside another entity, but raises an exception "+
|
181
|
+
"the type occurs more than once in the entity" do
|
182
|
+
@paragraph.sentence.should eql @sentence
|
183
|
+
end
|
177
184
|
|
178
|
-
it "return the first entity with the corresponding " +
|
179
|
-
"type inside another entity, but raises an exception "+
|
180
|
-
"the type occurs more than once in the entity" do
|
181
|
-
@paragraph.sentence.should eql @sentence
|
182
185
|
end
|
183
186
|
|
184
|
-
end
|
185
187
|
|
188
|
+
describe "#<entity or word type>s - e.g. " +
|
189
|
+
"#sections, #words, etc. and #nouns, #adverbs, etc." do
|
186
190
|
|
187
|
-
|
188
|
-
|
191
|
+
it "return an array of the entities with the " +
|
192
|
+
"corresponding type in the subtree of an entity" do
|
193
|
+
@paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
|
194
|
+
end
|
189
195
|
|
190
|
-
it "return an array of the entities with the " +
|
191
|
-
"corresponding type in the subtree of an entity" do
|
192
|
-
@paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
|
193
196
|
end
|
194
197
|
|
195
|
-
|
198
|
+
describe "#each_<entity type> - e.g. " +
|
199
|
+
"#each_sentence, #each_word, etc." do
|
196
200
|
|
197
|
-
|
198
|
-
|
201
|
+
it "yields each of the entities with the " +
|
202
|
+
"corresponding type in the subtree of an entity" do
|
203
|
+
a = []
|
199
204
|
|
200
|
-
|
201
|
-
|
202
|
-
|
205
|
+
@paragraph.each_phrase { |p| a << p }
|
206
|
+
a.should eql [@noun_phrase,
|
207
|
+
@adj_phrase, @verb_phrase]
|
203
208
|
|
204
|
-
|
205
|
-
a.should eql [@noun_phrase,
|
206
|
-
@adj_phrase, @verb_phrase]
|
209
|
+
end
|
207
210
|
|
208
211
|
end
|
209
212
|
|
210
|
-
|
213
|
+
describe "#<entity or word type>_count - e.g. " +
|
214
|
+
"#sentence_count, #paragraph_count, etc. and " +
|
215
|
+
"#noun_count, #verb_count, etc." do
|
211
216
|
|
212
|
-
|
213
|
-
|
214
|
-
|
217
|
+
it "return the number of entities with the " +
|
218
|
+
"corresponding type inside another entity" do
|
219
|
+
@paragraph.sentence_count.should eql 1
|
220
|
+
@paragraph.phrase_count.should eql 3
|
221
|
+
end
|
215
222
|
|
216
|
-
it "return the number of entities with the " +
|
217
|
-
"corresponding type inside another entity" do
|
218
|
-
@paragraph.sentence_count.should eql 1
|
219
|
-
@paragraph.phrase_count.should eql 3
|
220
223
|
end
|
221
224
|
|
222
|
-
|
225
|
+
describe "#<entity or word type>_with_<feature>(value) - " +
|
226
|
+
"e.g. #word_with_id(x) or #adverb_with_value('seemingly')" do
|
223
227
|
|
224
|
-
|
225
|
-
|
228
|
+
it "return the entity with the corresponding type " +
|
229
|
+
"that have [feature] set to the supplied value; raise" +
|
230
|
+
"a warning if there are many entities of that type" do
|
231
|
+
@paragraph.word_with_value('The').should eql @det
|
232
|
+
@paragraph.token_with_tag('.').should eql @dot
|
233
|
+
@sentence.phrase_with_tag('NP').should eql @noun_phrase
|
234
|
+
end
|
226
235
|
|
227
|
-
it "return the entity with the corresponding type " +
|
228
|
-
"that have [feature] set to the supplied value; raise" +
|
229
|
-
"a warning if there are many entities of that type" do
|
230
|
-
@paragraph.word_with_value('The').should eql @det
|
231
|
-
@paragraph.token_with_tag('.').should eql @dot
|
232
|
-
@sentence.phrase_with_tag('NP').should eql @noun_phrase
|
233
236
|
end
|
234
237
|
|
235
|
-
|
238
|
+
describe "#<entity or word type>s_with_<feature>(value) - " +
|
239
|
+
"e.g. #phrases_with_tag('NP'), #nouns_with_value('foo')" do
|
236
240
|
|
237
|
-
|
238
|
-
|
241
|
+
it "return an array of the entities with the " +
|
242
|
+
"corresponding type that have [feature] set to "+
|
243
|
+
"the supplied value" do
|
244
|
+
@paragraph.words_with_value('The').should eql [@det]
|
245
|
+
@paragraph.tokens_with_tag('.').should eql [@dot]
|
246
|
+
@sentence.phrases_with_tag('NP').should eql [@noun_phrase]
|
247
|
+
end
|
239
248
|
|
240
|
-
it "return an array of the entities with the " +
|
241
|
-
"corresponding type that have [feature] set to "+
|
242
|
-
"the supplied value" do
|
243
|
-
@paragraph.words_with_value('The').should eql [@det]
|
244
|
-
@paragraph.tokens_with_tag('.').should eql [@dot]
|
245
|
-
@sentence.phrases_with_tag('NP').should eql [@noun_phrase]
|
246
249
|
end
|
247
250
|
|
248
|
-
|
251
|
+
describe "#parent_<entity type> - e.g. " +
|
252
|
+
"#parent_document, #parent_collection, etc." do
|
249
253
|
|
250
|
-
|
251
|
-
|
254
|
+
it "return the first ancestor of the entity " +
|
255
|
+
"that has the supplied type, or nil if none" do
|
256
|
+
@sentence.parent_paragraph.should eql @paragraph
|
257
|
+
@adj.parent_sentence.should eql @sentence
|
258
|
+
end
|
252
259
|
|
253
|
-
it "return the first ancestor of the entity " +
|
254
|
-
"that has the supplied type, or nil if none" do
|
255
|
-
@sentence.parent_paragraph.should eql @paragraph
|
256
|
-
@adj.parent_sentence.should eql @sentence
|
257
260
|
end
|
258
261
|
|
259
|
-
|
262
|
+
describe "#frequency_in_<entity type> - e.g. " +
|
263
|
+
"#frequency_in_collection, #frequency_in_document, etc." do
|
260
264
|
|
261
|
-
|
262
|
-
|
265
|
+
it "return the frequency of this entity's value " +
|
266
|
+
"in the parent entity with the corresponding type" do
|
267
|
+
@adj.frequency_in_sentence.should eql 1
|
268
|
+
end
|
263
269
|
|
264
|
-
it "return the frequency of this entity's value " +
|
265
|
-
"in the parent entity with the corresponding type" do
|
266
|
-
@adj.frequency_in_sentence.should eql 1
|
267
270
|
end
|
268
271
|
|
269
272
|
end
|
270
273
|
|
271
|
-
|
274
|
+
describe "Stringable" do
|
272
275
|
|
273
|
-
|
276
|
+
describe "#to_string" do
|
277
|
+
it "returns the true text value of the entity " +
|
278
|
+
"or an empty string if it has none" do
|
279
|
+
@paragraph.to_string.should eql ''
|
280
|
+
@noun.to_string.should eql 'fox'
|
281
|
+
end
|
282
|
+
end
|
274
283
|
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
284
|
+
describe "#to_s" do
|
285
|
+
it "returns the string value of the " +
|
286
|
+
"entity or its full subtree" do
|
287
|
+
@paragraph.to_s.should
|
288
|
+
eql 'The lazy fox is running.'
|
289
|
+
@noun.to_s.should eql 'fox'
|
290
|
+
end
|
280
291
|
end
|
281
|
-
end
|
282
292
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
293
|
+
describe "#inspect" do
|
294
|
+
it "returns an informative string " +
|
295
|
+
"concerning the entity" do
|
296
|
+
@paragraph.inspect.should
|
297
|
+
be_an_instance_of String
|
298
|
+
end
|
289
299
|
end
|
290
|
-
end
|
291
300
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
301
|
+
describe "#short_value" do
|
302
|
+
it "returns a shortened version of the " +
|
303
|
+
"entity's string value" do
|
304
|
+
@paragraph.short_value.should
|
305
|
+
eql 'The lazy fox is running.'
|
306
|
+
end
|
297
307
|
end
|
308
|
+
|
298
309
|
end
|
299
310
|
|
300
|
-
describe "
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
311
|
+
describe "Formatters" do
|
312
|
+
|
313
|
+
|
314
|
+
before do
|
315
|
+
@serializers = Treat.languages.agnostic.
|
316
|
+
workers.formatters.serializers
|
317
|
+
@txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
|
305
318
|
end
|
306
|
-
end
|
307
319
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
+
describe "#serialize" do
|
321
|
+
|
322
|
+
context "when called with a file to save to" do
|
323
|
+
|
324
|
+
it "serializes a document to the supplied format" do
|
325
|
+
|
326
|
+
@serializers.each do |ser|
|
327
|
+
next if ser == :mongo # Fix this!
|
328
|
+
f = Treat.paths.spec + 'test.' + ser.to_s
|
329
|
+
s = Treat::Entities::Paragraph.new(@txt)
|
330
|
+
s.apply(:segment, :tokenize)
|
331
|
+
s.serialize(ser, :file => f)
|
332
|
+
File.delete(f)
|
333
|
+
end
|
320
334
|
|
321
|
-
context "when called with a file to save to" do
|
322
|
-
|
323
|
-
it "serializes a document to the supplied format" do
|
324
|
-
|
325
|
-
@serializers.each do |ser|
|
326
|
-
next if ser == :mongo # Fix this!
|
327
|
-
f = Treat.paths.spec + 'test.' + ser.to_s
|
328
|
-
s = Treat::Entities::Paragraph.new(@txt)
|
329
|
-
s.do(:segment, :tokenize)
|
330
|
-
s.serialize(ser, :file => f)
|
331
|
-
File.delete(f)
|
332
335
|
end
|
333
|
-
|
336
|
+
|
334
337
|
end
|
335
|
-
|
338
|
+
|
336
339
|
end
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
340
|
+
|
341
|
+
describe "#unserialize" do
|
342
|
+
|
343
|
+
context "when called with a serialized file" do
|
344
|
+
|
345
|
+
it "reconstitutes the original entity" do
|
346
|
+
@serializers.each do |ser|
|
347
|
+
next if ser == :mongo # Fix this!
|
348
|
+
|
349
|
+
f = Treat.paths.spec + 'test.' + ser.to_s
|
350
|
+
s = Treat::Entities::Paragraph.new(@txt)
|
351
|
+
|
352
|
+
s.set :test_int, 9
|
353
|
+
s.set :test_float, 9.9
|
354
|
+
s.set :test_string, 'hello'
|
355
|
+
s.set :test_sym, :hello
|
356
|
+
s.set :test_bool, false
|
357
|
+
|
358
|
+
s.apply(:segment, :tokenize)
|
359
|
+
|
360
|
+
s.serialize(ser, file: f)
|
361
|
+
|
362
|
+
d = Treat::Entities::Document.build(f)
|
363
|
+
|
364
|
+
d.test_int.should eql 9
|
365
|
+
d.test_float.should eql 9.9
|
366
|
+
d.test_string.should eql 'hello'
|
367
|
+
d.test_sym.should eql :hello
|
368
|
+
d.test_bool.should eql false
|
369
|
+
|
370
|
+
d.to_s.should eql "The story of the fox." +
|
371
|
+
" The quick brown fox jumped over the lazy dog."
|
372
|
+
d.size.should eql s.size
|
373
|
+
|
374
|
+
d.token_count.should eql s.token_count
|
375
|
+
d.tokens[0].id.should eql s.tokens[0].id
|
376
|
+
|
377
|
+
File.delete(f)
|
378
|
+
|
379
|
+
end
|
380
|
+
|
378
381
|
end
|
379
|
-
|
382
|
+
|
380
383
|
end
|
381
|
-
|
384
|
+
|
382
385
|
end
|
383
|
-
|
386
|
+
|
384
387
|
end
|
385
|
-
|
386
|
-
end
|
387
388
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
389
|
+
describe "Extractors" do
|
390
|
+
|
391
|
+
describe "#language" do
|
392
|
+
context "when language detection is disabled " +
|
393
|
+
"(Treat.core.detect is set to false)" do
|
394
|
+
it "returns the default language (Treat.core.language.default)" do
|
395
|
+
Treat.core.language.detect = false
|
396
|
+
Treat.core.language.default = :test
|
397
|
+
s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
|
398
|
+
s.language.should eql :test
|
399
|
+
Treat.core.language.default = :english
|
400
|
+
end
|
399
401
|
end
|
400
|
-
end
|
401
402
|
|
402
|
-
|
403
|
-
|
403
|
+
context "when language detection is enabled " +
|
404
|
+
"(Treat.detect_language is set to true)" do
|
405
|
+
|
406
|
+
it "guesses the language of the entity" do
|
404
407
|
|
405
|
-
|
408
|
+
Treat.core.language.detect = true
|
409
|
+
a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
|
410
|
+
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
|
411
|
+
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
|
412
|
+
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
|
413
|
+
a.language.should eql :english
|
414
|
+
#b.language.should eql :spanish
|
415
|
+
#c.language.should eql :french
|
416
|
+
#d.language.should eql :german
|
406
417
|
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
|
411
|
-
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
|
412
|
-
a.language.should eql :english
|
413
|
-
#b.language.should eql :spanish
|
414
|
-
#c.language.should eql :french
|
415
|
-
#d.language.should eql :german
|
418
|
+
# Reset default
|
419
|
+
Treat.core.language.detect = false
|
420
|
+
end
|
416
421
|
|
417
|
-
# Reset default
|
418
|
-
Treat.core.language.detect = false
|
419
422
|
end
|
420
423
|
|
421
424
|
end
|
@@ -425,8 +428,6 @@ describe Treat::Entities::Entity do
|
|
425
428
|
end
|
426
429
|
|
427
430
|
end
|
428
|
-
|
429
|
-
|
430
431
|
=begin
|
431
432
|
|
432
433
|
|