treat 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/treat/config/data/languages/agnostic.rb +6 -3
- data/lib/treat/config/data/languages/english.rb +1 -1
- data/lib/treat/config/data/workers/extractors.rb +8 -0
- data/lib/treat/loaders/stanford.rb +2 -0
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
- data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
- data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
- data/lib/treat/workers/extractors/time/chronic.rb +6 -41
- data/lib/treat/workers/extractors/time/kronic.rb +20 -0
- data/lib/treat/workers/extractors/time/nickel.rb +0 -15
- data/lib/treat/workers/extractors/time/ruby.rb +2 -33
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
- data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
- data/spec/entities/collection.rb +29 -25
- data/spec/entities/document.rb +45 -44
- data/spec/entities/entity.rb +295 -294
- data/spec/entities/phrase.rb +21 -17
- data/spec/entities/token.rb +43 -40
- data/spec/entities/word.rb +5 -1
- data/spec/entities/zone.rb +26 -22
- data/spec/helper.rb +7 -2
- data/spec/learning/data_set.rb +145 -141
- data/spec/learning/export.rb +46 -42
- data/spec/learning/problem.rb +114 -110
- data/spec/learning/question.rb +46 -42
- data/spec/treat.rb +41 -37
- data/spec/workers/agnostic.rb +2 -2
- data/spec/workers/english.rb +12 -12
- metadata +7 -8
- data/files/21552208.html +0 -786
- data/files/nethttp-cheat-sheet-2940.html +0 -393
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
- data/spec/sandbox.rb +0 -294
- data/spec/workers/examples/english/mathematicians/euler.html +0 -21
data/spec/entities/collection.rb
CHANGED
@@ -1,40 +1,42 @@
|
|
1
|
-
|
1
|
+
module Treat::Specs::Entities
|
2
|
+
|
3
|
+
describe Treat::Entities::Collection do
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
before :all do
|
6
|
+
@file = Treat.paths.spec +
|
7
|
+
'workers/examples/english/mathematicians'
|
8
|
+
end
|
7
9
|
|
8
10
|
|
9
|
-
|
11
|
+
describe "Buildable" do
|
10
12
|
|
11
|
-
|
13
|
+
describe "#build" do
|
12
14
|
|
13
|
-
|
15
|
+
context "when supplied with an existing folder name" do
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
it "recursively searches the folder for " +
|
18
|
+
"files and opens them into a collection of documents" do
|
19
|
+
collection = Treat::Entities::Collection.build(@file)
|
20
|
+
collection.size.should eql 5
|
21
|
+
end
|
20
22
|
|
21
|
-
|
23
|
+
end
|
22
24
|
|
23
|
-
|
25
|
+
context "when supplied a folder name that doesn't exist" do
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
it "creates the directory and opens the collection" do
|
28
|
+
f = Treat.paths.spec + 'workers/examples/english/test'
|
29
|
+
c = Treat::Entities::Collection.build(f)
|
30
|
+
FileTest.directory?(f).should eql true
|
31
|
+
c.should be_an_instance_of Treat::Entities::Collection
|
32
|
+
FileUtils.rm_rf(f)
|
33
|
+
end
|
31
34
|
end
|
32
35
|
end
|
33
|
-
end
|
34
36
|
|
35
|
-
|
37
|
+
end
|
36
38
|
|
37
|
-
|
39
|
+
describe "#<<" do
|
38
40
|
|
39
41
|
it "adds the object to the collection" do
|
40
42
|
f = Treat.paths.spec + 'workers/examples/english/economist'
|
@@ -43,6 +45,8 @@ describe Treat::Entities::Collection do
|
|
43
45
|
c.size.should eql 4
|
44
46
|
end
|
45
47
|
|
48
|
+
end
|
49
|
+
|
46
50
|
end
|
47
51
|
|
48
52
|
end
|
@@ -105,4 +109,4 @@ end
|
|
105
109
|
end
|
106
110
|
|
107
111
|
end
|
108
|
-
=end
|
112
|
+
=end
|
data/spec/entities/document.rb
CHANGED
@@ -1,55 +1,56 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
1
|
+
module Treat::Specs::Entities
|
2
|
+
describe Treat::Entities::Document do
|
3
|
+
|
4
|
+
describe "Buildable" do
|
5
|
+
|
6
|
+
describe "#build" do
|
7
|
+
|
8
|
+
context "when supplied with a readable file name" do
|
9
|
+
it "opens the file and reads its " +
|
10
|
+
"content into a document" do
|
11
|
+
f = Treat.paths.spec +
|
12
|
+
'workers/examples/english/mathematicians/leibniz.txt'
|
13
|
+
d = Treat::Entities::Document.build(f)
|
14
|
+
d.should be_an_instance_of Treat::Entities::Document
|
15
|
+
d.to_s.index('Gottfried Leibniz').should_not eql nil
|
16
|
+
end
|
15
17
|
end
|
16
|
-
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
19
|
+
context "when supplied with a url" do
|
20
|
+
it "downloads the file the URL points to and opens " +
|
21
|
+
"a document with the contents of the file" do
|
22
|
+
url = 'http://www.rubyinside.com/nethttp-cheat-sheet-2940.html'
|
23
|
+
d = Treat::Entities::Document.build(url)
|
24
|
+
d.format.should eql 'html'
|
25
|
+
d.should be_an_instance_of Treat::Entities::Document
|
26
|
+
d.to_s.index('Rubyist').should_not eql nil
|
27
|
+
end
|
26
28
|
end
|
27
|
-
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
30
|
+
context "when supplied with a url with no file extension" do
|
31
|
+
it "downloads the file the URL points to and opens " +
|
32
|
+
"a document with the contents of the file, assuming " +
|
33
|
+
"the downloaded file to be in HTML format" do
|
34
|
+
url = 'http://www.economist.com/node/21552208'
|
35
|
+
d = Treat::Entities::Document.build(url)
|
36
|
+
d.should be_an_instance_of Treat::Entities::Document
|
37
|
+
d.to_s.index('Ronnie Lupe').should_not eql nil
|
38
|
+
end
|
37
39
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
40
|
+
|
41
|
+
context "when called with anything else than a " +
|
42
|
+
"readable file name or url" do
|
43
|
+
|
44
|
+
it "raises an exception" do
|
45
|
+
lambda do
|
46
|
+
Treat::Entities::Document.build('nonexistent')
|
47
|
+
end.should raise_error
|
48
|
+
end
|
49
|
+
|
47
50
|
end
|
48
51
|
|
49
52
|
end
|
50
53
|
|
51
54
|
end
|
52
|
-
|
53
55
|
end
|
54
|
-
|
55
|
-
end
|
56
|
+
end
|
data/spec/entities/entity.rb
CHANGED
@@ -1,70 +1,71 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
1
|
+
module Treat::Specs::Entities
|
2
|
+
describe Treat::Entities::Entity do
|
3
|
+
|
4
|
+
before do
|
5
|
+
|
6
|
+
@paragraph = Treat::Entities::Paragraph.new
|
7
|
+
@sentence = Treat::Entities::Sentence.new
|
8
|
+
@noun_phrase = Treat::Entities::Phrase.new
|
9
|
+
@noun_phrase.set :tag, 'NP'
|
10
|
+
@verb_phrase = Treat::Entities::Phrase.new
|
11
|
+
@verb_phrase.set :tag, 'VP'
|
12
|
+
@adj_phrase = Treat::Entities::Phrase.new
|
13
|
+
@adj_phrase.set :tag, 'ADJP'
|
14
|
+
@det = Treat::Entities::Word.new('The')
|
15
|
+
@det.set :category, 'determiner'
|
16
|
+
@det.set :tag, 'DT'
|
17
|
+
@adj = Treat::Entities::Word.new('lazy')
|
18
|
+
@adj.set :category, 'adjective'
|
19
|
+
@adj.set :tag, 'JJ'
|
20
|
+
@noun = Treat::Entities::Word.new('fox')
|
21
|
+
@noun.set :category, 'noun'
|
22
|
+
@noun.set :tag, 'NN'
|
23
|
+
@aux = Treat::Entities::Word.new('is')
|
24
|
+
@aux.set :category, 'verb'
|
25
|
+
@aux.set :tag, 'VBZ'
|
26
|
+
@verb = Treat::Entities::Word.new('running')
|
27
|
+
@verb.set :category, 'verb'
|
28
|
+
@verb.set :tag, 'VBG'
|
29
|
+
@dot = Treat::Entities::Punctuation.new('.')
|
30
|
+
@dot.set :tag, '.'
|
31
|
+
@paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
|
32
|
+
@noun_phrase << [@det, @adj_phrase, @noun]
|
33
|
+
@adj_phrase << @adj
|
34
|
+
@verb_phrase << [@aux, @verb]
|
34
35
|
|
35
|
-
|
36
|
+
end
|
36
37
|
|
37
38
|
|
38
|
-
|
39
|
+
describe "Checkable" do
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
it "checks if an entity has the feature; if not, " +
|
43
|
-
"calls the default worker to get the feature if do_it " +
|
44
|
-
"is set to true; if the entity doesn't have the feature " +
|
45
|
-
" and do_it is set to false, it raises an exception." do
|
46
|
-
|
47
|
-
# NOT PASSING! Dependence on caller method.
|
48
|
-
|
49
|
-
# lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
|
50
|
-
|
51
|
-
end
|
41
|
+
describe "#check_has(feature, do_it = true) " do
|
52
42
|
|
53
|
-
|
43
|
+
it "checks if an entity has the feature; if not, " +
|
44
|
+
"calls the default worker to get the feature if do_it " +
|
45
|
+
"is set to true; if the entity doesn't have the feature " +
|
46
|
+
" and do_it is set to false, it raises an exception." do
|
54
47
|
|
55
|
-
|
48
|
+
# NOT PASSING! Dependence on caller method.
|
56
49
|
|
57
|
-
|
50
|
+
# lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
|
58
51
|
|
59
|
-
|
52
|
+
end
|
60
53
|
|
61
|
-
it "returns the position of the entity in its parent, sarting at 0" do
|
62
|
-
@noun_phrase.position.should eql 0
|
63
|
-
@det.position.should eql 0
|
64
54
|
end
|
65
55
|
|
66
56
|
end
|
67
57
|
|
58
|
+
describe "Countable" do
|
59
|
+
|
60
|
+
describe "#position" do
|
61
|
+
|
62
|
+
it "returns the position of the entity in its parent, sarting at 0" do
|
63
|
+
@noun_phrase.position.should eql 0
|
64
|
+
@det.position.should eql 0
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
68
69
|
=begin
|
69
70
|
|
70
71
|
describe "#frequency" do
|
@@ -88,334 +89,336 @@ describe Treat::Entities::Entity do
|
|
88
89
|
|
89
90
|
=end
|
90
91
|
|
91
|
-
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "Delegatable" do
|
92
95
|
|
93
|
-
|
96
|
+
describe "#self.call_worker" do
|
94
97
|
|
95
|
-
|
98
|
+
it "finds the worker class to " +
|
99
|
+
"perform a task and delegates the task to it " do
|
96
100
|
|
97
|
-
|
98
|
-
|
101
|
+
Treat::Entities::Entity.call_worker(
|
102
|
+
'$'.to_entity, :tag, :lingua,
|
103
|
+
Treat::Workers::Lexicalizers::Taggers, {}).should
|
104
|
+
eql '$'.tag(:lingua)
|
99
105
|
|
100
|
-
|
101
|
-
'$'.to_entity, :tag, :lingua,
|
102
|
-
Treat::Workers::Lexicalizers::Taggers, {}).should
|
103
|
-
eql '$'.tag(:lingua)
|
106
|
+
end
|
104
107
|
|
105
108
|
end
|
106
109
|
|
107
110
|
end
|
108
111
|
|
109
|
-
|
112
|
+
describe "Exportable" do
|
110
113
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
# MORE TESTS HERE - FIXME
|
114
|
+
context "when supplied with a classification to export" do
|
115
|
+
feature = Treat::Learning::Feature.new(:tag)
|
116
|
+
question = Treat::Learning::Question.new(:is_keyword, :word, false, :discrete)
|
117
|
+
problem = Treat::Learning::Problem.new(question, feature)
|
118
|
+
it "returns a data set with the exported features" do
|
119
|
+
ds = @sentence.export(problem)
|
120
|
+
ds.problem.should eql problem
|
121
|
+
# MORE TESTS HERE - FIXME
|
122
|
+
end
|
121
123
|
end
|
122
|
-
end
|
123
124
|
|
124
|
-
|
125
|
+
end
|
125
126
|
|
126
|
-
|
127
|
+
describe "Iterable" do
|
127
128
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
129
|
+
describe "#each { |child| ... }" do
|
130
|
+
it "yields each direct child of a node" do
|
131
|
+
a = []
|
132
|
+
@sentence.each do |child|
|
133
|
+
a << child
|
134
|
+
end
|
135
|
+
a.should eql [@noun_phrase, @verb_phrase, @dot]
|
133
136
|
end
|
134
|
-
a.should eql [@noun_phrase, @verb_phrase, @dot]
|
135
137
|
end
|
136
|
-
end
|
137
138
|
|
138
|
-
|
139
|
+
describe "#each_entity(*entity_types) { |entity| ... }" do
|
139
140
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
141
|
+
context "when called with no arguments" do
|
142
|
+
it "recursively yields each element in " +
|
143
|
+
"the tree, including itself, top-down " +
|
144
|
+
"first then left to right" do
|
144
145
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
146
|
+
a = []
|
147
|
+
@sentence.each_entity do |e|
|
148
|
+
a << e
|
149
|
+
end
|
149
150
|
|
150
|
-
|
151
|
-
|
152
|
-
|
151
|
+
a.should eql [@sentence, @noun_phrase, @det,
|
152
|
+
@adj_phrase, @adj, @noun,
|
153
|
+
@verb_phrase, @aux, @verb, @dot]
|
153
154
|
|
155
|
+
end
|
154
156
|
end
|
155
|
-
end
|
156
157
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
158
|
+
context "when called with one or more entity " +
|
159
|
+
"types supplied as lowercase symbols" do
|
160
|
+
it "recursively yields all elements with the given type(s), "+
|
161
|
+
"including the receiver if it matches on of the types" do
|
162
|
+
a = []
|
163
|
+
@sentence.each_entity(:phrase, :punctuation) do |e|
|
164
|
+
a << e
|
165
|
+
end
|
166
|
+
a.should eql [@noun_phrase,
|
167
|
+
@adj_phrase, @verb_phrase, @dot]
|
164
168
|
end
|
165
|
-
a.should eql [@noun_phrase,
|
166
|
-
@adj_phrase, @verb_phrase, @dot]
|
167
169
|
end
|
168
|
-
end
|
169
170
|
|
171
|
+
end
|
170
172
|
end
|
171
|
-
end
|
172
173
|
|
173
|
-
|
174
|
+
describe "Magical" do
|
174
175
|
|
175
|
-
|
176
|
-
|
176
|
+
describe "#<entity or word type> - e.g. " +
|
177
|
+
"#title, #paragraph, etc. and #adjective, #noun, etc." do
|
178
|
+
|
179
|
+
it "return the first entity with the corresponding " +
|
180
|
+
"type inside another entity, but raises an exception "+
|
181
|
+
"the type occurs more than once in the entity" do
|
182
|
+
@paragraph.sentence.should eql @sentence
|
183
|
+
end
|
177
184
|
|
178
|
-
it "return the first entity with the corresponding " +
|
179
|
-
"type inside another entity, but raises an exception "+
|
180
|
-
"the type occurs more than once in the entity" do
|
181
|
-
@paragraph.sentence.should eql @sentence
|
182
185
|
end
|
183
186
|
|
184
|
-
end
|
185
187
|
|
188
|
+
describe "#<entity or word type>s - e.g. " +
|
189
|
+
"#sections, #words, etc. and #nouns, #adverbs, etc." do
|
186
190
|
|
187
|
-
|
188
|
-
|
191
|
+
it "return an array of the entities with the " +
|
192
|
+
"corresponding type in the subtree of an entity" do
|
193
|
+
@paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
|
194
|
+
end
|
189
195
|
|
190
|
-
it "return an array of the entities with the " +
|
191
|
-
"corresponding type in the subtree of an entity" do
|
192
|
-
@paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
|
193
196
|
end
|
194
197
|
|
195
|
-
|
198
|
+
describe "#each_<entity type> - e.g. " +
|
199
|
+
"#each_sentence, #each_word, etc." do
|
196
200
|
|
197
|
-
|
198
|
-
|
201
|
+
it "yields each of the entities with the " +
|
202
|
+
"corresponding type in the subtree of an entity" do
|
203
|
+
a = []
|
199
204
|
|
200
|
-
|
201
|
-
|
202
|
-
|
205
|
+
@paragraph.each_phrase { |p| a << p }
|
206
|
+
a.should eql [@noun_phrase,
|
207
|
+
@adj_phrase, @verb_phrase]
|
203
208
|
|
204
|
-
|
205
|
-
a.should eql [@noun_phrase,
|
206
|
-
@adj_phrase, @verb_phrase]
|
209
|
+
end
|
207
210
|
|
208
211
|
end
|
209
212
|
|
210
|
-
|
213
|
+
describe "#<entity or word type>_count - e.g. " +
|
214
|
+
"#sentence_count, #paragraph_count, etc. and " +
|
215
|
+
"#noun_count, #verb_count, etc." do
|
211
216
|
|
212
|
-
|
213
|
-
|
214
|
-
|
217
|
+
it "return the number of entities with the " +
|
218
|
+
"corresponding type inside another entity" do
|
219
|
+
@paragraph.sentence_count.should eql 1
|
220
|
+
@paragraph.phrase_count.should eql 3
|
221
|
+
end
|
215
222
|
|
216
|
-
it "return the number of entities with the " +
|
217
|
-
"corresponding type inside another entity" do
|
218
|
-
@paragraph.sentence_count.should eql 1
|
219
|
-
@paragraph.phrase_count.should eql 3
|
220
223
|
end
|
221
224
|
|
222
|
-
|
225
|
+
describe "#<entity or word type>_with_<feature>(value) - " +
|
226
|
+
"e.g. #word_with_id(x) or #adverb_with_value('seemingly')" do
|
223
227
|
|
224
|
-
|
225
|
-
|
228
|
+
it "return the entity with the corresponding type " +
|
229
|
+
"that have [feature] set to the supplied value; raise" +
|
230
|
+
"a warning if there are many entities of that type" do
|
231
|
+
@paragraph.word_with_value('The').should eql @det
|
232
|
+
@paragraph.token_with_tag('.').should eql @dot
|
233
|
+
@sentence.phrase_with_tag('NP').should eql @noun_phrase
|
234
|
+
end
|
226
235
|
|
227
|
-
it "return the entity with the corresponding type " +
|
228
|
-
"that have [feature] set to the supplied value; raise" +
|
229
|
-
"a warning if there are many entities of that type" do
|
230
|
-
@paragraph.word_with_value('The').should eql @det
|
231
|
-
@paragraph.token_with_tag('.').should eql @dot
|
232
|
-
@sentence.phrase_with_tag('NP').should eql @noun_phrase
|
233
236
|
end
|
234
237
|
|
235
|
-
|
238
|
+
describe "#<entity or word type>s_with_<feature>(value) - " +
|
239
|
+
"e.g. #phrases_with_tag('NP'), #nouns_with_value('foo')" do
|
236
240
|
|
237
|
-
|
238
|
-
|
241
|
+
it "return an array of the entities with the " +
|
242
|
+
"corresponding type that have [feature] set to "+
|
243
|
+
"the supplied value" do
|
244
|
+
@paragraph.words_with_value('The').should eql [@det]
|
245
|
+
@paragraph.tokens_with_tag('.').should eql [@dot]
|
246
|
+
@sentence.phrases_with_tag('NP').should eql [@noun_phrase]
|
247
|
+
end
|
239
248
|
|
240
|
-
it "return an array of the entities with the " +
|
241
|
-
"corresponding type that have [feature] set to "+
|
242
|
-
"the supplied value" do
|
243
|
-
@paragraph.words_with_value('The').should eql [@det]
|
244
|
-
@paragraph.tokens_with_tag('.').should eql [@dot]
|
245
|
-
@sentence.phrases_with_tag('NP').should eql [@noun_phrase]
|
246
249
|
end
|
247
250
|
|
248
|
-
|
251
|
+
describe "#parent_<entity type> - e.g. " +
|
252
|
+
"#parent_document, #parent_collection, etc." do
|
249
253
|
|
250
|
-
|
251
|
-
|
254
|
+
it "return the first ancestor of the entity " +
|
255
|
+
"that has the supplied type, or nil if none" do
|
256
|
+
@sentence.parent_paragraph.should eql @paragraph
|
257
|
+
@adj.parent_sentence.should eql @sentence
|
258
|
+
end
|
252
259
|
|
253
|
-
it "return the first ancestor of the entity " +
|
254
|
-
"that has the supplied type, or nil if none" do
|
255
|
-
@sentence.parent_paragraph.should eql @paragraph
|
256
|
-
@adj.parent_sentence.should eql @sentence
|
257
260
|
end
|
258
261
|
|
259
|
-
|
262
|
+
describe "#frequency_in_<entity type> - e.g. " +
|
263
|
+
"#frequency_in_collection, #frequency_in_document, etc." do
|
260
264
|
|
261
|
-
|
262
|
-
|
265
|
+
it "return the frequency of this entity's value " +
|
266
|
+
"in the parent entity with the corresponding type" do
|
267
|
+
@adj.frequency_in_sentence.should eql 1
|
268
|
+
end
|
263
269
|
|
264
|
-
it "return the frequency of this entity's value " +
|
265
|
-
"in the parent entity with the corresponding type" do
|
266
|
-
@adj.frequency_in_sentence.should eql 1
|
267
270
|
end
|
268
271
|
|
269
272
|
end
|
270
273
|
|
271
|
-
|
274
|
+
describe "Stringable" do
|
272
275
|
|
273
|
-
|
276
|
+
describe "#to_string" do
|
277
|
+
it "returns the true text value of the entity " +
|
278
|
+
"or an empty string if it has none" do
|
279
|
+
@paragraph.to_string.should eql ''
|
280
|
+
@noun.to_string.should eql 'fox'
|
281
|
+
end
|
282
|
+
end
|
274
283
|
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
284
|
+
describe "#to_s" do
|
285
|
+
it "returns the string value of the " +
|
286
|
+
"entity or its full subtree" do
|
287
|
+
@paragraph.to_s.should
|
288
|
+
eql 'The lazy fox is running.'
|
289
|
+
@noun.to_s.should eql 'fox'
|
290
|
+
end
|
280
291
|
end
|
281
|
-
end
|
282
292
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
293
|
+
describe "#inspect" do
|
294
|
+
it "returns an informative string " +
|
295
|
+
"concerning the entity" do
|
296
|
+
@paragraph.inspect.should
|
297
|
+
be_an_instance_of String
|
298
|
+
end
|
289
299
|
end
|
290
|
-
end
|
291
300
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
301
|
+
describe "#short_value" do
|
302
|
+
it "returns a shortened version of the " +
|
303
|
+
"entity's string value" do
|
304
|
+
@paragraph.short_value.should
|
305
|
+
eql 'The lazy fox is running.'
|
306
|
+
end
|
297
307
|
end
|
308
|
+
|
298
309
|
end
|
299
310
|
|
300
|
-
describe "
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
311
|
+
describe "Formatters" do
|
312
|
+
|
313
|
+
|
314
|
+
before do
|
315
|
+
@serializers = Treat.languages.agnostic.
|
316
|
+
workers.formatters.serializers
|
317
|
+
@txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
|
305
318
|
end
|
306
|
-
end
|
307
319
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
+
describe "#serialize" do
|
321
|
+
|
322
|
+
context "when called with a file to save to" do
|
323
|
+
|
324
|
+
it "serializes a document to the supplied format" do
|
325
|
+
|
326
|
+
@serializers.each do |ser|
|
327
|
+
next if ser == :mongo # Fix this!
|
328
|
+
f = Treat.paths.spec + 'test.' + ser.to_s
|
329
|
+
s = Treat::Entities::Paragraph.new(@txt)
|
330
|
+
s.apply(:segment, :tokenize)
|
331
|
+
s.serialize(ser, :file => f)
|
332
|
+
File.delete(f)
|
333
|
+
end
|
320
334
|
|
321
|
-
context "when called with a file to save to" do
|
322
|
-
|
323
|
-
it "serializes a document to the supplied format" do
|
324
|
-
|
325
|
-
@serializers.each do |ser|
|
326
|
-
next if ser == :mongo # Fix this!
|
327
|
-
f = Treat.paths.spec + 'test.' + ser.to_s
|
328
|
-
s = Treat::Entities::Paragraph.new(@txt)
|
329
|
-
s.do(:segment, :tokenize)
|
330
|
-
s.serialize(ser, :file => f)
|
331
|
-
File.delete(f)
|
332
335
|
end
|
333
|
-
|
336
|
+
|
334
337
|
end
|
335
|
-
|
338
|
+
|
336
339
|
end
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
340
|
+
|
341
|
+
describe "#unserialize" do
|
342
|
+
|
343
|
+
context "when called with a serialized file" do
|
344
|
+
|
345
|
+
it "reconstitutes the original entity" do
|
346
|
+
@serializers.each do |ser|
|
347
|
+
next if ser == :mongo # Fix this!
|
348
|
+
|
349
|
+
f = Treat.paths.spec + 'test.' + ser.to_s
|
350
|
+
s = Treat::Entities::Paragraph.new(@txt)
|
351
|
+
|
352
|
+
s.set :test_int, 9
|
353
|
+
s.set :test_float, 9.9
|
354
|
+
s.set :test_string, 'hello'
|
355
|
+
s.set :test_sym, :hello
|
356
|
+
s.set :test_bool, false
|
357
|
+
|
358
|
+
s.apply(:segment, :tokenize)
|
359
|
+
|
360
|
+
s.serialize(ser, file: f)
|
361
|
+
|
362
|
+
d = Treat::Entities::Document.build(f)
|
363
|
+
|
364
|
+
d.test_int.should eql 9
|
365
|
+
d.test_float.should eql 9.9
|
366
|
+
d.test_string.should eql 'hello'
|
367
|
+
d.test_sym.should eql :hello
|
368
|
+
d.test_bool.should eql false
|
369
|
+
|
370
|
+
d.to_s.should eql "The story of the fox." +
|
371
|
+
" The quick brown fox jumped over the lazy dog."
|
372
|
+
d.size.should eql s.size
|
373
|
+
|
374
|
+
d.token_count.should eql s.token_count
|
375
|
+
d.tokens[0].id.should eql s.tokens[0].id
|
376
|
+
|
377
|
+
File.delete(f)
|
378
|
+
|
379
|
+
end
|
380
|
+
|
378
381
|
end
|
379
|
-
|
382
|
+
|
380
383
|
end
|
381
|
-
|
384
|
+
|
382
385
|
end
|
383
|
-
|
386
|
+
|
384
387
|
end
|
385
|
-
|
386
|
-
end
|
387
388
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
389
|
+
describe "Extractors" do
|
390
|
+
|
391
|
+
describe "#language" do
|
392
|
+
context "when language detection is disabled " +
|
393
|
+
"(Treat.core.detect is set to false)" do
|
394
|
+
it "returns the default language (Treat.core.language.default)" do
|
395
|
+
Treat.core.language.detect = false
|
396
|
+
Treat.core.language.default = :test
|
397
|
+
s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
|
398
|
+
s.language.should eql :test
|
399
|
+
Treat.core.language.default = :english
|
400
|
+
end
|
399
401
|
end
|
400
|
-
end
|
401
402
|
|
402
|
-
|
403
|
-
|
403
|
+
context "when language detection is enabled " +
|
404
|
+
"(Treat.detect_language is set to true)" do
|
405
|
+
|
406
|
+
it "guesses the language of the entity" do
|
404
407
|
|
405
|
-
|
408
|
+
Treat.core.language.detect = true
|
409
|
+
a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
|
410
|
+
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
|
411
|
+
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
|
412
|
+
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
|
413
|
+
a.language.should eql :english
|
414
|
+
#b.language.should eql :spanish
|
415
|
+
#c.language.should eql :french
|
416
|
+
#d.language.should eql :german
|
406
417
|
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
|
411
|
-
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
|
412
|
-
a.language.should eql :english
|
413
|
-
#b.language.should eql :spanish
|
414
|
-
#c.language.should eql :french
|
415
|
-
#d.language.should eql :german
|
418
|
+
# Reset default
|
419
|
+
Treat.core.language.detect = false
|
420
|
+
end
|
416
421
|
|
417
|
-
# Reset default
|
418
|
-
Treat.core.language.detect = false
|
419
422
|
end
|
420
423
|
|
421
424
|
end
|
@@ -425,8 +428,6 @@ describe Treat::Entities::Entity do
|
|
425
428
|
end
|
426
429
|
|
427
430
|
end
|
428
|
-
|
429
|
-
|
430
431
|
=begin
|
431
432
|
|
432
433
|
|