treat 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/lib/treat/config/data/languages/agnostic.rb +6 -3
  2. data/lib/treat/config/data/languages/english.rb +1 -1
  3. data/lib/treat/config/data/workers/extractors.rb +8 -0
  4. data/lib/treat/loaders/stanford.rb +2 -0
  5. data/lib/treat/version.rb +1 -1
  6. data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
  7. data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
  8. data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
  9. data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
  10. data/lib/treat/workers/extractors/time/chronic.rb +6 -41
  11. data/lib/treat/workers/extractors/time/kronic.rb +20 -0
  12. data/lib/treat/workers/extractors/time/nickel.rb +0 -15
  13. data/lib/treat/workers/extractors/time/ruby.rb +2 -33
  14. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
  15. data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
  16. data/spec/entities/collection.rb +29 -25
  17. data/spec/entities/document.rb +45 -44
  18. data/spec/entities/entity.rb +295 -294
  19. data/spec/entities/phrase.rb +21 -17
  20. data/spec/entities/token.rb +43 -40
  21. data/spec/entities/word.rb +5 -1
  22. data/spec/entities/zone.rb +26 -22
  23. data/spec/helper.rb +7 -2
  24. data/spec/learning/data_set.rb +145 -141
  25. data/spec/learning/export.rb +46 -42
  26. data/spec/learning/problem.rb +114 -110
  27. data/spec/learning/question.rb +46 -42
  28. data/spec/treat.rb +41 -37
  29. data/spec/workers/agnostic.rb +2 -2
  30. data/spec/workers/english.rb +12 -12
  31. metadata +7 -8
  32. data/files/21552208.html +0 -786
  33. data/files/nethttp-cheat-sheet-2940.html +0 -393
  34. data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
  35. data/spec/sandbox.rb +0 -294
  36. data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,40 +1,42 @@
1
- describe Treat::Entities::Collection do
1
+ module Treat::Specs::Entities
2
+
3
+ describe Treat::Entities::Collection do
2
4
 
3
- before :all do
4
- @file = Treat.paths.spec +
5
- 'workers/examples/english/mathematicians'
6
- end
5
+ before :all do
6
+ @file = Treat.paths.spec +
7
+ 'workers/examples/english/mathematicians'
8
+ end
7
9
 
8
10
 
9
- describe "Buildable" do
11
+ describe "Buildable" do
10
12
 
11
- describe "#build" do
13
+ describe "#build" do
12
14
 
13
- context "when supplied with an existing folder name" do
15
+ context "when supplied with an existing folder name" do
14
16
 
15
- it "recursively searches the folder for " +
16
- "files and opens them into a collection of documents" do
17
- collection = Treat::Entities::Collection.build(@file)
18
- collection.size.should eql 6
19
- end
17
+ it "recursively searches the folder for " +
18
+ "files and opens them into a collection of documents" do
19
+ collection = Treat::Entities::Collection.build(@file)
20
+ collection.size.should eql 5
21
+ end
20
22
 
21
- end
23
+ end
22
24
 
23
- context "when supplied a folder name that doesn't exist" do
25
+ context "when supplied a folder name that doesn't exist" do
24
26
 
25
- it "creates the directory and opens the collection" do
26
- f = Treat.paths.spec + 'workers/examples/english/test'
27
- c = Treat::Entities::Collection.build(f)
28
- FileTest.directory?(f).should eql true
29
- c.should be_an_instance_of Treat::Entities::Collection
30
- FileUtils.rm_rf(f)
27
+ it "creates the directory and opens the collection" do
28
+ f = Treat.paths.spec + 'workers/examples/english/test'
29
+ c = Treat::Entities::Collection.build(f)
30
+ FileTest.directory?(f).should eql true
31
+ c.should be_an_instance_of Treat::Entities::Collection
32
+ FileUtils.rm_rf(f)
33
+ end
31
34
  end
32
35
  end
33
- end
34
36
 
35
- end
37
+ end
36
38
 
37
- describe "#<<" do
39
+ describe "#<<" do
38
40
 
39
41
  it "adds the object to the collection" do
40
42
  f = Treat.paths.spec + 'workers/examples/english/economist'
@@ -43,6 +45,8 @@ describe Treat::Entities::Collection do
43
45
  c.size.should eql 4
44
46
  end
45
47
 
48
+ end
49
+
46
50
  end
47
51
 
48
52
  end
@@ -105,4 +109,4 @@ end
105
109
  end
106
110
 
107
111
  end
108
- =end
112
+ =end
@@ -1,55 +1,56 @@
1
- describe Treat::Entities::Document do
2
-
3
- describe "Buildable" do
4
-
5
- describe "#build" do
6
-
7
- context "when supplied with a readable file name" do
8
- it "opens the file and reads its " +
9
- "content into a document" do
10
- f = Treat.paths.spec +
11
- 'workers/examples/english/mathematicians/leibniz.txt'
12
- d = Treat::Entities::Document.build(f)
13
- d.should be_an_instance_of Treat::Entities::Document
14
- d.to_s.index('Gottfried Leibniz').should_not eql nil
1
+ module Treat::Specs::Entities
2
+ describe Treat::Entities::Document do
3
+
4
+ describe "Buildable" do
5
+
6
+ describe "#build" do
7
+
8
+ context "when supplied with a readable file name" do
9
+ it "opens the file and reads its " +
10
+ "content into a document" do
11
+ f = Treat.paths.spec +
12
+ 'workers/examples/english/mathematicians/leibniz.txt'
13
+ d = Treat::Entities::Document.build(f)
14
+ d.should be_an_instance_of Treat::Entities::Document
15
+ d.to_s.index('Gottfried Leibniz').should_not eql nil
16
+ end
15
17
  end
16
- end
17
18
 
18
- context "when supplied with a url" do
19
- it "downloads the file the URL points to and opens " +
20
- "a document with the contents of the file" do
21
- url = 'http://www.rubyinside.com/nethttp-cheat-sheet-2940.html'
22
- d = Treat::Entities::Document.build(url)
23
- d.format.should eql 'html'
24
- d.should be_an_instance_of Treat::Entities::Document
25
- d.to_s.index('Rubyist').should_not eql nil
19
+ context "when supplied with a url" do
20
+ it "downloads the file the URL points to and opens " +
21
+ "a document with the contents of the file" do
22
+ url = 'http://www.rubyinside.com/nethttp-cheat-sheet-2940.html'
23
+ d = Treat::Entities::Document.build(url)
24
+ d.format.should eql 'html'
25
+ d.should be_an_instance_of Treat::Entities::Document
26
+ d.to_s.index('Rubyist').should_not eql nil
27
+ end
26
28
  end
27
- end
28
29
 
29
- context "when supplied with a url with no file extension" do
30
- it "downloads the file the URL points to and opens " +
31
- "a document with the contents of the file, assuming " +
32
- "the downloaded file to be in HTML format" do
33
- url = 'http://www.economist.com/node/21552208'
34
- d = Treat::Entities::Document.build(url)
35
- d.should be_an_instance_of Treat::Entities::Document
36
- d.to_s.index('Ronnie Lupe').should_not eql nil
30
+ context "when supplied with a url with no file extension" do
31
+ it "downloads the file the URL points to and opens " +
32
+ "a document with the contents of the file, assuming " +
33
+ "the downloaded file to be in HTML format" do
34
+ url = 'http://www.economist.com/node/21552208'
35
+ d = Treat::Entities::Document.build(url)
36
+ d.should be_an_instance_of Treat::Entities::Document
37
+ d.to_s.index('Ronnie Lupe').should_not eql nil
38
+ end
37
39
  end
38
- end
39
-
40
- context "when called with anything else than a " +
41
- "readable file name or url" do
42
-
43
- it "raises an exception" do
44
- lambda do
45
- Treat::Entities::Document.build('nonexistent')
46
- end.should raise_error
40
+
41
+ context "when called with anything else than a " +
42
+ "readable file name or url" do
43
+
44
+ it "raises an exception" do
45
+ lambda do
46
+ Treat::Entities::Document.build('nonexistent')
47
+ end.should raise_error
48
+ end
49
+
47
50
  end
48
51
 
49
52
  end
50
53
 
51
54
  end
52
-
53
55
  end
54
-
55
- end
56
+ end
@@ -1,70 +1,71 @@
1
- describe Treat::Entities::Entity do
2
-
3
- before do
4
-
5
- @paragraph = Treat::Entities::Paragraph.new
6
- @sentence = Treat::Entities::Sentence.new
7
- @noun_phrase = Treat::Entities::Phrase.new
8
- @noun_phrase.set :tag, 'NP'
9
- @verb_phrase = Treat::Entities::Phrase.new
10
- @verb_phrase.set :tag, 'VP'
11
- @adj_phrase = Treat::Entities::Phrase.new
12
- @adj_phrase.set :tag, 'ADJP'
13
- @det = Treat::Entities::Word.new('The')
14
- @det.set :category, 'determiner'
15
- @det.set :tag, 'DT'
16
- @adj = Treat::Entities::Word.new('lazy')
17
- @adj.set :category, 'adjective'
18
- @adj.set :tag, 'JJ'
19
- @noun = Treat::Entities::Word.new('fox')
20
- @noun.set :category, 'noun'
21
- @noun.set :tag, 'NN'
22
- @aux = Treat::Entities::Word.new('is')
23
- @aux.set :category, 'verb'
24
- @aux.set :tag, 'VBZ'
25
- @verb = Treat::Entities::Word.new('running')
26
- @verb.set :category, 'verb'
27
- @verb.set :tag, 'VBG'
28
- @dot = Treat::Entities::Punctuation.new('.')
29
- @dot.set :tag, '.'
30
- @paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
31
- @noun_phrase << [@det, @adj_phrase, @noun]
32
- @adj_phrase << @adj
33
- @verb_phrase << [@aux, @verb]
1
+ module Treat::Specs::Entities
2
+ describe Treat::Entities::Entity do
3
+
4
+ before do
5
+
6
+ @paragraph = Treat::Entities::Paragraph.new
7
+ @sentence = Treat::Entities::Sentence.new
8
+ @noun_phrase = Treat::Entities::Phrase.new
9
+ @noun_phrase.set :tag, 'NP'
10
+ @verb_phrase = Treat::Entities::Phrase.new
11
+ @verb_phrase.set :tag, 'VP'
12
+ @adj_phrase = Treat::Entities::Phrase.new
13
+ @adj_phrase.set :tag, 'ADJP'
14
+ @det = Treat::Entities::Word.new('The')
15
+ @det.set :category, 'determiner'
16
+ @det.set :tag, 'DT'
17
+ @adj = Treat::Entities::Word.new('lazy')
18
+ @adj.set :category, 'adjective'
19
+ @adj.set :tag, 'JJ'
20
+ @noun = Treat::Entities::Word.new('fox')
21
+ @noun.set :category, 'noun'
22
+ @noun.set :tag, 'NN'
23
+ @aux = Treat::Entities::Word.new('is')
24
+ @aux.set :category, 'verb'
25
+ @aux.set :tag, 'VBZ'
26
+ @verb = Treat::Entities::Word.new('running')
27
+ @verb.set :category, 'verb'
28
+ @verb.set :tag, 'VBG'
29
+ @dot = Treat::Entities::Punctuation.new('.')
30
+ @dot.set :tag, '.'
31
+ @paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
32
+ @noun_phrase << [@det, @adj_phrase, @noun]
33
+ @adj_phrase << @adj
34
+ @verb_phrase << [@aux, @verb]
34
35
 
35
- end
36
+ end
36
37
 
37
38
 
38
- describe "Checkable" do
39
+ describe "Checkable" do
39
40
 
40
- describe "#check_has(feature, do_it = true) " do
41
-
42
- it "checks if an entity has the feature; if not, " +
43
- "calls the default worker to get the feature if do_it " +
44
- "is set to true; if the entity doesn't have the feature " +
45
- " and do_it is set to false, it raises an exception." do
46
-
47
- # NOT PASSING! Dependence on caller method.
48
-
49
- # lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
50
-
51
- end
41
+ describe "#check_has(feature, do_it = true) " do
52
42
 
53
- end
43
+ it "checks if an entity has the feature; if not, " +
44
+ "calls the default worker to get the feature if do_it " +
45
+ "is set to true; if the entity doesn't have the feature " +
46
+ " and do_it is set to false, it raises an exception." do
54
47
 
55
- end
48
+ # NOT PASSING! Dependence on caller method.
56
49
 
57
- describe "Countable" do
50
+ # lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
58
51
 
59
- describe "#position" do
52
+ end
60
53
 
61
- it "returns the position of the entity in its parent, sarting at 0" do
62
- @noun_phrase.position.should eql 0
63
- @det.position.should eql 0
64
54
  end
65
55
 
66
56
  end
67
57
 
58
+ describe "Countable" do
59
+
60
+ describe "#position" do
61
+
62
+ it "returns the position of the entity in its parent, sarting at 0" do
63
+ @noun_phrase.position.should eql 0
64
+ @det.position.should eql 0
65
+ end
66
+
67
+ end
68
+
68
69
  =begin
69
70
 
70
71
  describe "#frequency" do
@@ -88,334 +89,336 @@ describe Treat::Entities::Entity do
88
89
 
89
90
  =end
90
91
 
91
- end
92
+ end
93
+
94
+ describe "Delegatable" do
92
95
 
93
- describe "Delegatable" do
96
+ describe "#self.call_worker" do
94
97
 
95
- describe "#self.call_worker" do
98
+ it "finds the worker class to " +
99
+ "perform a task and delegates the task to it " do
96
100
 
97
- it "finds the worker class to " +
98
- "perform a task and delegates the task to it " do
101
+ Treat::Entities::Entity.call_worker(
102
+ '$'.to_entity, :tag, :lingua,
103
+ Treat::Workers::Lexicalizers::Taggers, {}).should
104
+ eql '$'.tag(:lingua)
99
105
 
100
- Treat::Entities::Entity.call_worker(
101
- '$'.to_entity, :tag, :lingua,
102
- Treat::Workers::Lexicalizers::Taggers, {}).should
103
- eql '$'.tag(:lingua)
106
+ end
104
107
 
105
108
  end
106
109
 
107
110
  end
108
111
 
109
- end
112
+ describe "Exportable" do
110
113
 
111
- describe "Exportable" do
112
-
113
- context "when supplied with a classification to export" do
114
- feature = Treat::Learning::Feature.new(:tag)
115
- question = Treat::Learning::Question.new(:is_keyword, :word, false, :discrete)
116
- problem = Treat::Learning::Problem.new(question, feature)
117
- it "returns a data set with the exported features" do
118
- ds = @sentence.export(problem)
119
- ds.problem.should eql problem
120
- # MORE TESTS HERE - FIXME
114
+ context "when supplied with a classification to export" do
115
+ feature = Treat::Learning::Feature.new(:tag)
116
+ question = Treat::Learning::Question.new(:is_keyword, :word, false, :discrete)
117
+ problem = Treat::Learning::Problem.new(question, feature)
118
+ it "returns a data set with the exported features" do
119
+ ds = @sentence.export(problem)
120
+ ds.problem.should eql problem
121
+ # MORE TESTS HERE - FIXME
122
+ end
121
123
  end
122
- end
123
124
 
124
- end
125
+ end
125
126
 
126
- describe "Iterable" do
127
+ describe "Iterable" do
127
128
 
128
- describe "#each { |child| ... }" do
129
- it "yields each direct child of a node" do
130
- a = []
131
- @sentence.each do |child|
132
- a << child
129
+ describe "#each { |child| ... }" do
130
+ it "yields each direct child of a node" do
131
+ a = []
132
+ @sentence.each do |child|
133
+ a << child
134
+ end
135
+ a.should eql [@noun_phrase, @verb_phrase, @dot]
133
136
  end
134
- a.should eql [@noun_phrase, @verb_phrase, @dot]
135
137
  end
136
- end
137
138
 
138
- describe "#each_entity(*entity_types) { |entity| ... }" do
139
+ describe "#each_entity(*entity_types) { |entity| ... }" do
139
140
 
140
- context "when called with no arguments" do
141
- it "recursively yields each element in " +
142
- "the tree, including itself, top-down " +
143
- "first then left to right" do
141
+ context "when called with no arguments" do
142
+ it "recursively yields each element in " +
143
+ "the tree, including itself, top-down " +
144
+ "first then left to right" do
144
145
 
145
- a = []
146
- @sentence.each_entity do |e|
147
- a << e
148
- end
146
+ a = []
147
+ @sentence.each_entity do |e|
148
+ a << e
149
+ end
149
150
 
150
- a.should eql [@sentence, @noun_phrase, @det,
151
- @adj_phrase, @adj, @noun,
152
- @verb_phrase, @aux, @verb, @dot]
151
+ a.should eql [@sentence, @noun_phrase, @det,
152
+ @adj_phrase, @adj, @noun,
153
+ @verb_phrase, @aux, @verb, @dot]
153
154
 
155
+ end
154
156
  end
155
- end
156
157
 
157
- context "when called with one or more entity " +
158
- "types supplied as lowercase symbols" do
159
- it "recursively yields all elements with the given type(s), "+
160
- "including the receiver if it matches on of the types" do
161
- a = []
162
- @sentence.each_entity(:phrase, :punctuation) do |e|
163
- a << e
158
+ context "when called with one or more entity " +
159
+ "types supplied as lowercase symbols" do
160
+ it "recursively yields all elements with the given type(s), "+
161
+ "including the receiver if it matches on of the types" do
162
+ a = []
163
+ @sentence.each_entity(:phrase, :punctuation) do |e|
164
+ a << e
165
+ end
166
+ a.should eql [@noun_phrase,
167
+ @adj_phrase, @verb_phrase, @dot]
164
168
  end
165
- a.should eql [@noun_phrase,
166
- @adj_phrase, @verb_phrase, @dot]
167
169
  end
168
- end
169
170
 
171
+ end
170
172
  end
171
- end
172
173
 
173
- describe "Magical" do
174
+ describe "Magical" do
174
175
 
175
- describe "#<entity or word type> - e.g. " +
176
- "#title, #paragraph, etc. and #adjective, #noun, etc." do
176
+ describe "#<entity or word type> - e.g. " +
177
+ "#title, #paragraph, etc. and #adjective, #noun, etc." do
178
+
179
+ it "return the first entity with the corresponding " +
180
+ "type inside another entity, but raises an exception "+
181
+ "the type occurs more than once in the entity" do
182
+ @paragraph.sentence.should eql @sentence
183
+ end
177
184
 
178
- it "return the first entity with the corresponding " +
179
- "type inside another entity, but raises an exception "+
180
- "the type occurs more than once in the entity" do
181
- @paragraph.sentence.should eql @sentence
182
185
  end
183
186
 
184
- end
185
187
 
188
+ describe "#<entity or word type>s - e.g. " +
189
+ "#sections, #words, etc. and #nouns, #adverbs, etc." do
186
190
 
187
- describe "#<entity or word type>s - e.g. " +
188
- "#sections, #words, etc. and #nouns, #adverbs, etc." do
191
+ it "return an array of the entities with the " +
192
+ "corresponding type in the subtree of an entity" do
193
+ @paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
194
+ end
189
195
 
190
- it "return an array of the entities with the " +
191
- "corresponding type in the subtree of an entity" do
192
- @paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
193
196
  end
194
197
 
195
- end
198
+ describe "#each_<entity type> - e.g. " +
199
+ "#each_sentence, #each_word, etc." do
196
200
 
197
- describe "#each_<entity type> - e.g. " +
198
- "#each_sentence, #each_word, etc." do
201
+ it "yields each of the entities with the " +
202
+ "corresponding type in the subtree of an entity" do
203
+ a = []
199
204
 
200
- it "yields each of the entities with the " +
201
- "corresponding type in the subtree of an entity" do
202
- a = []
205
+ @paragraph.each_phrase { |p| a << p }
206
+ a.should eql [@noun_phrase,
207
+ @adj_phrase, @verb_phrase]
203
208
 
204
- @paragraph.each_phrase { |p| a << p }
205
- a.should eql [@noun_phrase,
206
- @adj_phrase, @verb_phrase]
209
+ end
207
210
 
208
211
  end
209
212
 
210
- end
213
+ describe "#<entity or word type>_count - e.g. " +
214
+ "#sentence_count, #paragraph_count, etc. and " +
215
+ "#noun_count, #verb_count, etc." do
211
216
 
212
- describe "#<entity or word type>_count - e.g. " +
213
- "#sentence_count, #paragraph_count, etc. and " +
214
- "#noun_count, #verb_count, etc." do
217
+ it "return the number of entities with the " +
218
+ "corresponding type inside another entity" do
219
+ @paragraph.sentence_count.should eql 1
220
+ @paragraph.phrase_count.should eql 3
221
+ end
215
222
 
216
- it "return the number of entities with the " +
217
- "corresponding type inside another entity" do
218
- @paragraph.sentence_count.should eql 1
219
- @paragraph.phrase_count.should eql 3
220
223
  end
221
224
 
222
- end
225
+ describe "#<entity or word type>_with_<feature>(value) - " +
226
+ "e.g. #word_with_id(x) or #adverb_with_value('seemingly')" do
223
227
 
224
- describe "#<entity or word type>_with_<feature>(value) - " +
225
- "e.g. #word_with_id(x) or #adverb_with_value('seemingly')" do
228
+ it "return the entity with the corresponding type " +
229
+ "that have [feature] set to the supplied value; raise" +
230
+ "a warning if there are many entities of that type" do
231
+ @paragraph.word_with_value('The').should eql @det
232
+ @paragraph.token_with_tag('.').should eql @dot
233
+ @sentence.phrase_with_tag('NP').should eql @noun_phrase
234
+ end
226
235
 
227
- it "return the entity with the corresponding type " +
228
- "that have [feature] set to the supplied value; raise" +
229
- "a warning if there are many entities of that type" do
230
- @paragraph.word_with_value('The').should eql @det
231
- @paragraph.token_with_tag('.').should eql @dot
232
- @sentence.phrase_with_tag('NP').should eql @noun_phrase
233
236
  end
234
237
 
235
- end
238
+ describe "#<entity or word type>s_with_<feature>(value) - " +
239
+ "e.g. #phrases_with_tag('NP'), #nouns_with_value('foo')" do
236
240
 
237
- describe "#<entity or word type>s_with_<feature>(value) - " +
238
- "e.g. #phrases_with_tag('NP'), #nouns_with_value('foo')" do
241
+ it "return an array of the entities with the " +
242
+ "corresponding type that have [feature] set to "+
243
+ "the supplied value" do
244
+ @paragraph.words_with_value('The').should eql [@det]
245
+ @paragraph.tokens_with_tag('.').should eql [@dot]
246
+ @sentence.phrases_with_tag('NP').should eql [@noun_phrase]
247
+ end
239
248
 
240
- it "return an array of the entities with the " +
241
- "corresponding type that have [feature] set to "+
242
- "the supplied value" do
243
- @paragraph.words_with_value('The').should eql [@det]
244
- @paragraph.tokens_with_tag('.').should eql [@dot]
245
- @sentence.phrases_with_tag('NP').should eql [@noun_phrase]
246
249
  end
247
250
 
248
- end
251
+ describe "#parent_<entity type> - e.g. " +
252
+ "#parent_document, #parent_collection, etc." do
249
253
 
250
- describe "#parent_<entity type> - e.g. " +
251
- "#parent_document, #parent_collection, etc." do
254
+ it "return the first ancestor of the entity " +
255
+ "that has the supplied type, or nil if none" do
256
+ @sentence.parent_paragraph.should eql @paragraph
257
+ @adj.parent_sentence.should eql @sentence
258
+ end
252
259
 
253
- it "return the first ancestor of the entity " +
254
- "that has the supplied type, or nil if none" do
255
- @sentence.parent_paragraph.should eql @paragraph
256
- @adj.parent_sentence.should eql @sentence
257
260
  end
258
261
 
259
- end
262
+ describe "#frequency_in_<entity type> - e.g. " +
263
+ "#frequency_in_collection, #frequency_in_document, etc." do
260
264
 
261
- describe "#frequency_in_<entity type> - e.g. " +
262
- "#frequency_in_collection, #frequency_in_document, etc." do
265
+ it "return the frequency of this entity's value " +
266
+ "in the parent entity with the corresponding type" do
267
+ @adj.frequency_in_sentence.should eql 1
268
+ end
263
269
 
264
- it "return the frequency of this entity's value " +
265
- "in the parent entity with the corresponding type" do
266
- @adj.frequency_in_sentence.should eql 1
267
270
  end
268
271
 
269
272
  end
270
273
 
271
- end
274
+ describe "Stringable" do
272
275
 
273
- describe "Stringable" do
276
+ describe "#to_string" do
277
+ it "returns the true text value of the entity " +
278
+ "or an empty string if it has none" do
279
+ @paragraph.to_string.should eql ''
280
+ @noun.to_string.should eql 'fox'
281
+ end
282
+ end
274
283
 
275
- describe "#to_string" do
276
- it "returns the true text value of the entity " +
277
- "or an empty string if it has none" do
278
- @paragraph.to_string.should eql ''
279
- @noun.to_string.should eql 'fox'
284
+ describe "#to_s" do
285
+ it "returns the string value of the " +
286
+ "entity or its full subtree" do
287
+ @paragraph.to_s.should
288
+ eql 'The lazy fox is running.'
289
+ @noun.to_s.should eql 'fox'
290
+ end
280
291
  end
281
- end
282
292
 
283
- describe "#to_s" do
284
- it "returns the string value of the " +
285
- "entity or its full subtree" do
286
- @paragraph.to_s.should
287
- eql 'The lazy fox is running.'
288
- @noun.to_s.should eql 'fox'
293
+ describe "#inspect" do
294
+ it "returns an informative string " +
295
+ "concerning the entity" do
296
+ @paragraph.inspect.should
297
+ be_an_instance_of String
298
+ end
289
299
  end
290
- end
291
300
 
292
- describe "#inspect" do
293
- it "returns an informative string " +
294
- "concerning the entity" do
295
- @paragraph.inspect.should
296
- be_an_instance_of String
301
+ describe "#short_value" do
302
+ it "returns a shortened version of the " +
303
+ "entity's string value" do
304
+ @paragraph.short_value.should
305
+ eql 'The lazy fox is running.'
306
+ end
297
307
  end
308
+
298
309
  end
299
310
 
300
- describe "#short_value" do
301
- it "returns a shortened version of the " +
302
- "entity's string value" do
303
- @paragraph.short_value.should
304
- eql 'The lazy fox is running.'
311
+ describe "Formatters" do
312
+
313
+
314
+ before do
315
+ @serializers = Treat.languages.agnostic.
316
+ workers.formatters.serializers
317
+ @txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
305
318
  end
306
- end
307
319
 
308
- end
309
-
310
- describe "Formatters" do
311
-
312
-
313
- before do
314
- @serializers = Treat.languages.agnostic.
315
- workers.formatters.serializers
316
- @txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
317
- end
318
-
319
- describe "#serialize" do
320
+ describe "#serialize" do
321
+
322
+ context "when called with a file to save to" do
323
+
324
+ it "serializes a document to the supplied format" do
325
+
326
+ @serializers.each do |ser|
327
+ next if ser == :mongo # Fix this!
328
+ f = Treat.paths.spec + 'test.' + ser.to_s
329
+ s = Treat::Entities::Paragraph.new(@txt)
330
+ s.apply(:segment, :tokenize)
331
+ s.serialize(ser, :file => f)
332
+ File.delete(f)
333
+ end
320
334
 
321
- context "when called with a file to save to" do
322
-
323
- it "serializes a document to the supplied format" do
324
-
325
- @serializers.each do |ser|
326
- next if ser == :mongo # Fix this!
327
- f = Treat.paths.spec + 'test.' + ser.to_s
328
- s = Treat::Entities::Paragraph.new(@txt)
329
- s.do(:segment, :tokenize)
330
- s.serialize(ser, :file => f)
331
- File.delete(f)
332
335
  end
333
-
336
+
334
337
  end
335
-
338
+
336
339
  end
337
-
338
- end
339
-
340
- describe "#unserialize" do
341
-
342
- context "when called with a serialized file" do
343
-
344
- it "reconstitutes the original entity" do
345
- @serializers.each do |ser|
346
- next if ser == :mongo # Fix this!
347
-
348
- f = Treat.paths.spec + 'test.' + ser.to_s
349
- s = Treat::Entities::Paragraph.new(@txt)
350
-
351
- s.set :test_int, 9
352
- s.set :test_float, 9.9
353
- s.set :test_string, 'hello'
354
- s.set :test_sym, :hello
355
- s.set :test_bool, false
356
-
357
- s.do(:segment, :tokenize)
358
-
359
- s.serialize(ser, file: f)
360
-
361
- d = Treat::Entities::Document.build(f)
362
-
363
- d.test_int.should eql 9
364
- d.test_float.should eql 9.9
365
- d.test_string.should eql 'hello'
366
- d.test_sym.should eql :hello
367
- d.test_bool.should eql false
368
-
369
- d.to_s.should eql "The story of the fox." +
370
- " The quick brown fox jumped over the lazy dog."
371
- d.size.should eql s.size
372
-
373
- d.token_count.should eql s.token_count
374
- d.tokens[0].id.should eql s.tokens[0].id
375
-
376
- File.delete(f)
377
-
340
+
341
+ describe "#unserialize" do
342
+
343
+ context "when called with a serialized file" do
344
+
345
+ it "reconstitutes the original entity" do
346
+ @serializers.each do |ser|
347
+ next if ser == :mongo # Fix this!
348
+
349
+ f = Treat.paths.spec + 'test.' + ser.to_s
350
+ s = Treat::Entities::Paragraph.new(@txt)
351
+
352
+ s.set :test_int, 9
353
+ s.set :test_float, 9.9
354
+ s.set :test_string, 'hello'
355
+ s.set :test_sym, :hello
356
+ s.set :test_bool, false
357
+
358
+ s.apply(:segment, :tokenize)
359
+
360
+ s.serialize(ser, file: f)
361
+
362
+ d = Treat::Entities::Document.build(f)
363
+
364
+ d.test_int.should eql 9
365
+ d.test_float.should eql 9.9
366
+ d.test_string.should eql 'hello'
367
+ d.test_sym.should eql :hello
368
+ d.test_bool.should eql false
369
+
370
+ d.to_s.should eql "The story of the fox." +
371
+ " The quick brown fox jumped over the lazy dog."
372
+ d.size.should eql s.size
373
+
374
+ d.token_count.should eql s.token_count
375
+ d.tokens[0].id.should eql s.tokens[0].id
376
+
377
+ File.delete(f)
378
+
379
+ end
380
+
378
381
  end
379
-
382
+
380
383
  end
381
-
384
+
382
385
  end
383
-
386
+
384
387
  end
385
-
386
- end
387
388
 
388
- describe "Extractors" do
389
-
390
- describe "#language" do
391
- context "when language detection is disabled " +
392
- "(Treat.core.detect is set to false)" do
393
- it "returns the default language (Treat.core.language.default)" do
394
- Treat.core.language.detect = false
395
- Treat.core.language.default = :test
396
- s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
397
- s.language.should eql :test
398
- Treat.core.language.default = :english
389
+ describe "Extractors" do
390
+
391
+ describe "#language" do
392
+ context "when language detection is disabled " +
393
+ "(Treat.core.detect is set to false)" do
394
+ it "returns the default language (Treat.core.language.default)" do
395
+ Treat.core.language.detect = false
396
+ Treat.core.language.default = :test
397
+ s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
398
+ s.language.should eql :test
399
+ Treat.core.language.default = :english
400
+ end
399
401
  end
400
- end
401
402
 
402
- context "when language detection is enabled " +
403
- "(Treat.detect_language is set to true)" do
403
+ context "when language detection is enabled " +
404
+ "(Treat.detect_language is set to true)" do
405
+
406
+ it "guesses the language of the entity" do
404
407
 
405
- it "guesses the language of the entity" do
408
+ Treat.core.language.detect = true
409
+ a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
410
+ b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
411
+ c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
412
+ d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
413
+ a.language.should eql :english
414
+ #b.language.should eql :spanish
415
+ #c.language.should eql :french
416
+ #d.language.should eql :german
406
417
 
407
- Treat.core.language.detect = true
408
- a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
409
- b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
410
- c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
411
- d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
412
- a.language.should eql :english
413
- #b.language.should eql :spanish
414
- #c.language.should eql :french
415
- #d.language.should eql :german
418
+ # Reset default
419
+ Treat.core.language.detect = false
420
+ end
416
421
 
417
- # Reset default
418
- Treat.core.language.detect = false
419
422
  end
420
423
 
421
424
  end
@@ -425,8 +428,6 @@ describe Treat::Entities::Entity do
425
428
  end
426
429
 
427
430
  end
428
-
429
-
430
431
  =begin
431
432
 
432
433