treat 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/lib/treat/config/data/languages/agnostic.rb +6 -3
  2. data/lib/treat/config/data/languages/english.rb +1 -1
  3. data/lib/treat/config/data/workers/extractors.rb +8 -0
  4. data/lib/treat/loaders/stanford.rb +2 -0
  5. data/lib/treat/version.rb +1 -1
  6. data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
  7. data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
  8. data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
  9. data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
  10. data/lib/treat/workers/extractors/time/chronic.rb +6 -41
  11. data/lib/treat/workers/extractors/time/kronic.rb +20 -0
  12. data/lib/treat/workers/extractors/time/nickel.rb +0 -15
  13. data/lib/treat/workers/extractors/time/ruby.rb +2 -33
  14. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
  15. data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
  16. data/spec/entities/collection.rb +29 -25
  17. data/spec/entities/document.rb +45 -44
  18. data/spec/entities/entity.rb +295 -294
  19. data/spec/entities/phrase.rb +21 -17
  20. data/spec/entities/token.rb +43 -40
  21. data/spec/entities/word.rb +5 -1
  22. data/spec/entities/zone.rb +26 -22
  23. data/spec/helper.rb +7 -2
  24. data/spec/learning/data_set.rb +145 -141
  25. data/spec/learning/export.rb +46 -42
  26. data/spec/learning/problem.rb +114 -110
  27. data/spec/learning/question.rb +46 -42
  28. data/spec/treat.rb +41 -37
  29. data/spec/workers/agnostic.rb +2 -2
  30. data/spec/workers/english.rb +12 -12
  31. metadata +7 -8
  32. data/files/21552208.html +0 -786
  33. data/files/nethttp-cheat-sheet-2940.html +0 -393
  34. data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
  35. data/spec/sandbox.rb +0 -294
  36. data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,40 +1,42 @@
1
- describe Treat::Entities::Collection do
1
+ module Treat::Specs::Entities
2
+
3
+ describe Treat::Entities::Collection do
2
4
 
3
- before :all do
4
- @file = Treat.paths.spec +
5
- 'workers/examples/english/mathematicians'
6
- end
5
+ before :all do
6
+ @file = Treat.paths.spec +
7
+ 'workers/examples/english/mathematicians'
8
+ end
7
9
 
8
10
 
9
- describe "Buildable" do
11
+ describe "Buildable" do
10
12
 
11
- describe "#build" do
13
+ describe "#build" do
12
14
 
13
- context "when supplied with an existing folder name" do
15
+ context "when supplied with an existing folder name" do
14
16
 
15
- it "recursively searches the folder for " +
16
- "files and opens them into a collection of documents" do
17
- collection = Treat::Entities::Collection.build(@file)
18
- collection.size.should eql 6
19
- end
17
+ it "recursively searches the folder for " +
18
+ "files and opens them into a collection of documents" do
19
+ collection = Treat::Entities::Collection.build(@file)
20
+ collection.size.should eql 5
21
+ end
20
22
 
21
- end
23
+ end
22
24
 
23
- context "when supplied a folder name that doesn't exist" do
25
+ context "when supplied a folder name that doesn't exist" do
24
26
 
25
- it "creates the directory and opens the collection" do
26
- f = Treat.paths.spec + 'workers/examples/english/test'
27
- c = Treat::Entities::Collection.build(f)
28
- FileTest.directory?(f).should eql true
29
- c.should be_an_instance_of Treat::Entities::Collection
30
- FileUtils.rm_rf(f)
27
+ it "creates the directory and opens the collection" do
28
+ f = Treat.paths.spec + 'workers/examples/english/test'
29
+ c = Treat::Entities::Collection.build(f)
30
+ FileTest.directory?(f).should eql true
31
+ c.should be_an_instance_of Treat::Entities::Collection
32
+ FileUtils.rm_rf(f)
33
+ end
31
34
  end
32
35
  end
33
- end
34
36
 
35
- end
37
+ end
36
38
 
37
- describe "#<<" do
39
+ describe "#<<" do
38
40
 
39
41
  it "adds the object to the collection" do
40
42
  f = Treat.paths.spec + 'workers/examples/english/economist'
@@ -43,6 +45,8 @@ describe Treat::Entities::Collection do
43
45
  c.size.should eql 4
44
46
  end
45
47
 
48
+ end
49
+
46
50
  end
47
51
 
48
52
  end
@@ -105,4 +109,4 @@ end
105
109
  end
106
110
 
107
111
  end
108
- =end
112
+ =end
@@ -1,55 +1,56 @@
1
- describe Treat::Entities::Document do
2
-
3
- describe "Buildable" do
4
-
5
- describe "#build" do
6
-
7
- context "when supplied with a readable file name" do
8
- it "opens the file and reads its " +
9
- "content into a document" do
10
- f = Treat.paths.spec +
11
- 'workers/examples/english/mathematicians/leibniz.txt'
12
- d = Treat::Entities::Document.build(f)
13
- d.should be_an_instance_of Treat::Entities::Document
14
- d.to_s.index('Gottfried Leibniz').should_not eql nil
1
+ module Treat::Specs::Entities
2
+ describe Treat::Entities::Document do
3
+
4
+ describe "Buildable" do
5
+
6
+ describe "#build" do
7
+
8
+ context "when supplied with a readable file name" do
9
+ it "opens the file and reads its " +
10
+ "content into a document" do
11
+ f = Treat.paths.spec +
12
+ 'workers/examples/english/mathematicians/leibniz.txt'
13
+ d = Treat::Entities::Document.build(f)
14
+ d.should be_an_instance_of Treat::Entities::Document
15
+ d.to_s.index('Gottfried Leibniz').should_not eql nil
16
+ end
15
17
  end
16
- end
17
18
 
18
- context "when supplied with a url" do
19
- it "downloads the file the URL points to and opens " +
20
- "a document with the contents of the file" do
21
- url = 'http://www.rubyinside.com/nethttp-cheat-sheet-2940.html'
22
- d = Treat::Entities::Document.build(url)
23
- d.format.should eql 'html'
24
- d.should be_an_instance_of Treat::Entities::Document
25
- d.to_s.index('Rubyist').should_not eql nil
19
+ context "when supplied with a url" do
20
+ it "downloads the file the URL points to and opens " +
21
+ "a document with the contents of the file" do
22
+ url = 'http://www.rubyinside.com/nethttp-cheat-sheet-2940.html'
23
+ d = Treat::Entities::Document.build(url)
24
+ d.format.should eql 'html'
25
+ d.should be_an_instance_of Treat::Entities::Document
26
+ d.to_s.index('Rubyist').should_not eql nil
27
+ end
26
28
  end
27
- end
28
29
 
29
- context "when supplied with a url with no file extension" do
30
- it "downloads the file the URL points to and opens " +
31
- "a document with the contents of the file, assuming " +
32
- "the downloaded file to be in HTML format" do
33
- url = 'http://www.economist.com/node/21552208'
34
- d = Treat::Entities::Document.build(url)
35
- d.should be_an_instance_of Treat::Entities::Document
36
- d.to_s.index('Ronnie Lupe').should_not eql nil
30
+ context "when supplied with a url with no file extension" do
31
+ it "downloads the file the URL points to and opens " +
32
+ "a document with the contents of the file, assuming " +
33
+ "the downloaded file to be in HTML format" do
34
+ url = 'http://www.economist.com/node/21552208'
35
+ d = Treat::Entities::Document.build(url)
36
+ d.should be_an_instance_of Treat::Entities::Document
37
+ d.to_s.index('Ronnie Lupe').should_not eql nil
38
+ end
37
39
  end
38
- end
39
-
40
- context "when called with anything else than a " +
41
- "readable file name or url" do
42
-
43
- it "raises an exception" do
44
- lambda do
45
- Treat::Entities::Document.build('nonexistent')
46
- end.should raise_error
40
+
41
+ context "when called with anything else than a " +
42
+ "readable file name or url" do
43
+
44
+ it "raises an exception" do
45
+ lambda do
46
+ Treat::Entities::Document.build('nonexistent')
47
+ end.should raise_error
48
+ end
49
+
47
50
  end
48
51
 
49
52
  end
50
53
 
51
54
  end
52
-
53
55
  end
54
-
55
- end
56
+ end
@@ -1,70 +1,71 @@
1
- describe Treat::Entities::Entity do
2
-
3
- before do
4
-
5
- @paragraph = Treat::Entities::Paragraph.new
6
- @sentence = Treat::Entities::Sentence.new
7
- @noun_phrase = Treat::Entities::Phrase.new
8
- @noun_phrase.set :tag, 'NP'
9
- @verb_phrase = Treat::Entities::Phrase.new
10
- @verb_phrase.set :tag, 'VP'
11
- @adj_phrase = Treat::Entities::Phrase.new
12
- @adj_phrase.set :tag, 'ADJP'
13
- @det = Treat::Entities::Word.new('The')
14
- @det.set :category, 'determiner'
15
- @det.set :tag, 'DT'
16
- @adj = Treat::Entities::Word.new('lazy')
17
- @adj.set :category, 'adjective'
18
- @adj.set :tag, 'JJ'
19
- @noun = Treat::Entities::Word.new('fox')
20
- @noun.set :category, 'noun'
21
- @noun.set :tag, 'NN'
22
- @aux = Treat::Entities::Word.new('is')
23
- @aux.set :category, 'verb'
24
- @aux.set :tag, 'VBZ'
25
- @verb = Treat::Entities::Word.new('running')
26
- @verb.set :category, 'verb'
27
- @verb.set :tag, 'VBG'
28
- @dot = Treat::Entities::Punctuation.new('.')
29
- @dot.set :tag, '.'
30
- @paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
31
- @noun_phrase << [@det, @adj_phrase, @noun]
32
- @adj_phrase << @adj
33
- @verb_phrase << [@aux, @verb]
1
+ module Treat::Specs::Entities
2
+ describe Treat::Entities::Entity do
3
+
4
+ before do
5
+
6
+ @paragraph = Treat::Entities::Paragraph.new
7
+ @sentence = Treat::Entities::Sentence.new
8
+ @noun_phrase = Treat::Entities::Phrase.new
9
+ @noun_phrase.set :tag, 'NP'
10
+ @verb_phrase = Treat::Entities::Phrase.new
11
+ @verb_phrase.set :tag, 'VP'
12
+ @adj_phrase = Treat::Entities::Phrase.new
13
+ @adj_phrase.set :tag, 'ADJP'
14
+ @det = Treat::Entities::Word.new('The')
15
+ @det.set :category, 'determiner'
16
+ @det.set :tag, 'DT'
17
+ @adj = Treat::Entities::Word.new('lazy')
18
+ @adj.set :category, 'adjective'
19
+ @adj.set :tag, 'JJ'
20
+ @noun = Treat::Entities::Word.new('fox')
21
+ @noun.set :category, 'noun'
22
+ @noun.set :tag, 'NN'
23
+ @aux = Treat::Entities::Word.new('is')
24
+ @aux.set :category, 'verb'
25
+ @aux.set :tag, 'VBZ'
26
+ @verb = Treat::Entities::Word.new('running')
27
+ @verb.set :category, 'verb'
28
+ @verb.set :tag, 'VBG'
29
+ @dot = Treat::Entities::Punctuation.new('.')
30
+ @dot.set :tag, '.'
31
+ @paragraph << @sentence << [@noun_phrase, @verb_phrase, @dot]
32
+ @noun_phrase << [@det, @adj_phrase, @noun]
33
+ @adj_phrase << @adj
34
+ @verb_phrase << [@aux, @verb]
34
35
 
35
- end
36
+ end
36
37
 
37
38
 
38
- describe "Checkable" do
39
+ describe "Checkable" do
39
40
 
40
- describe "#check_has(feature, do_it = true) " do
41
-
42
- it "checks if an entity has the feature; if not, " +
43
- "calls the default worker to get the feature if do_it " +
44
- "is set to true; if the entity doesn't have the feature " +
45
- " and do_it is set to false, it raises an exception." do
46
-
47
- # NOT PASSING! Dependence on caller method.
48
-
49
- # lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
50
-
51
- end
41
+ describe "#check_has(feature, do_it = true) " do
52
42
 
53
- end
43
+ it "checks if an entity has the feature; if not, " +
44
+ "calls the default worker to get the feature if do_it " +
45
+ "is set to true; if the entity doesn't have the feature " +
46
+ " and do_it is set to false, it raises an exception." do
54
47
 
55
- end
48
+ # NOT PASSING! Dependence on caller method.
56
49
 
57
- describe "Countable" do
50
+ # lambda { '$'.to_entity.check_has(:tag, false) }.should raise_error Treat::Exception
58
51
 
59
- describe "#position" do
52
+ end
60
53
 
61
- it "returns the position of the entity in its parent, sarting at 0" do
62
- @noun_phrase.position.should eql 0
63
- @det.position.should eql 0
64
54
  end
65
55
 
66
56
  end
67
57
 
58
+ describe "Countable" do
59
+
60
+ describe "#position" do
61
+
62
+ it "returns the position of the entity in its parent, sarting at 0" do
63
+ @noun_phrase.position.should eql 0
64
+ @det.position.should eql 0
65
+ end
66
+
67
+ end
68
+
68
69
  =begin
69
70
 
70
71
  describe "#frequency" do
@@ -88,334 +89,336 @@ describe Treat::Entities::Entity do
88
89
 
89
90
  =end
90
91
 
91
- end
92
+ end
93
+
94
+ describe "Delegatable" do
92
95
 
93
- describe "Delegatable" do
96
+ describe "#self.call_worker" do
94
97
 
95
- describe "#self.call_worker" do
98
+ it "finds the worker class to " +
99
+ "perform a task and delegates the task to it " do
96
100
 
97
- it "finds the worker class to " +
98
- "perform a task and delegates the task to it " do
101
+ Treat::Entities::Entity.call_worker(
102
+ '$'.to_entity, :tag, :lingua,
103
+ Treat::Workers::Lexicalizers::Taggers, {}).should
104
+ eql '$'.tag(:lingua)
99
105
 
100
- Treat::Entities::Entity.call_worker(
101
- '$'.to_entity, :tag, :lingua,
102
- Treat::Workers::Lexicalizers::Taggers, {}).should
103
- eql '$'.tag(:lingua)
106
+ end
104
107
 
105
108
  end
106
109
 
107
110
  end
108
111
 
109
- end
112
+ describe "Exportable" do
110
113
 
111
- describe "Exportable" do
112
-
113
- context "when supplied with a classification to export" do
114
- feature = Treat::Learning::Feature.new(:tag)
115
- question = Treat::Learning::Question.new(:is_keyword, :word, false, :discrete)
116
- problem = Treat::Learning::Problem.new(question, feature)
117
- it "returns a data set with the exported features" do
118
- ds = @sentence.export(problem)
119
- ds.problem.should eql problem
120
- # MORE TESTS HERE - FIXME
114
+ context "when supplied with a classification to export" do
115
+ feature = Treat::Learning::Feature.new(:tag)
116
+ question = Treat::Learning::Question.new(:is_keyword, :word, false, :discrete)
117
+ problem = Treat::Learning::Problem.new(question, feature)
118
+ it "returns a data set with the exported features" do
119
+ ds = @sentence.export(problem)
120
+ ds.problem.should eql problem
121
+ # MORE TESTS HERE - FIXME
122
+ end
121
123
  end
122
- end
123
124
 
124
- end
125
+ end
125
126
 
126
- describe "Iterable" do
127
+ describe "Iterable" do
127
128
 
128
- describe "#each { |child| ... }" do
129
- it "yields each direct child of a node" do
130
- a = []
131
- @sentence.each do |child|
132
- a << child
129
+ describe "#each { |child| ... }" do
130
+ it "yields each direct child of a node" do
131
+ a = []
132
+ @sentence.each do |child|
133
+ a << child
134
+ end
135
+ a.should eql [@noun_phrase, @verb_phrase, @dot]
133
136
  end
134
- a.should eql [@noun_phrase, @verb_phrase, @dot]
135
137
  end
136
- end
137
138
 
138
- describe "#each_entity(*entity_types) { |entity| ... }" do
139
+ describe "#each_entity(*entity_types) { |entity| ... }" do
139
140
 
140
- context "when called with no arguments" do
141
- it "recursively yields each element in " +
142
- "the tree, including itself, top-down " +
143
- "first then left to right" do
141
+ context "when called with no arguments" do
142
+ it "recursively yields each element in " +
143
+ "the tree, including itself, top-down " +
144
+ "first then left to right" do
144
145
 
145
- a = []
146
- @sentence.each_entity do |e|
147
- a << e
148
- end
146
+ a = []
147
+ @sentence.each_entity do |e|
148
+ a << e
149
+ end
149
150
 
150
- a.should eql [@sentence, @noun_phrase, @det,
151
- @adj_phrase, @adj, @noun,
152
- @verb_phrase, @aux, @verb, @dot]
151
+ a.should eql [@sentence, @noun_phrase, @det,
152
+ @adj_phrase, @adj, @noun,
153
+ @verb_phrase, @aux, @verb, @dot]
153
154
 
155
+ end
154
156
  end
155
- end
156
157
 
157
- context "when called with one or more entity " +
158
- "types supplied as lowercase symbols" do
159
- it "recursively yields all elements with the given type(s), "+
160
- "including the receiver if it matches on of the types" do
161
- a = []
162
- @sentence.each_entity(:phrase, :punctuation) do |e|
163
- a << e
158
+ context "when called with one or more entity " +
159
+ "types supplied as lowercase symbols" do
160
+ it "recursively yields all elements with the given type(s), "+
161
+ "including the receiver if it matches on of the types" do
162
+ a = []
163
+ @sentence.each_entity(:phrase, :punctuation) do |e|
164
+ a << e
165
+ end
166
+ a.should eql [@noun_phrase,
167
+ @adj_phrase, @verb_phrase, @dot]
164
168
  end
165
- a.should eql [@noun_phrase,
166
- @adj_phrase, @verb_phrase, @dot]
167
169
  end
168
- end
169
170
 
171
+ end
170
172
  end
171
- end
172
173
 
173
- describe "Magical" do
174
+ describe "Magical" do
174
175
 
175
- describe "#<entity or word type> - e.g. " +
176
- "#title, #paragraph, etc. and #adjective, #noun, etc." do
176
+ describe "#<entity or word type> - e.g. " +
177
+ "#title, #paragraph, etc. and #adjective, #noun, etc." do
178
+
179
+ it "return the first entity with the corresponding " +
180
+ "type inside another entity, but raises an exception "+
181
+ "the type occurs more than once in the entity" do
182
+ @paragraph.sentence.should eql @sentence
183
+ end
177
184
 
178
- it "return the first entity with the corresponding " +
179
- "type inside another entity, but raises an exception "+
180
- "the type occurs more than once in the entity" do
181
- @paragraph.sentence.should eql @sentence
182
185
  end
183
186
 
184
- end
185
187
 
188
+ describe "#<entity or word type>s - e.g. " +
189
+ "#sections, #words, etc. and #nouns, #adverbs, etc." do
186
190
 
187
- describe "#<entity or word type>s - e.g. " +
188
- "#sections, #words, etc. and #nouns, #adverbs, etc." do
191
+ it "return an array of the entities with the " +
192
+ "corresponding type in the subtree of an entity" do
193
+ @paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
194
+ end
189
195
 
190
- it "return an array of the entities with the " +
191
- "corresponding type in the subtree of an entity" do
192
- @paragraph.phrases.should eql [@noun_phrase, @adj_phrase, @verb_phrase]
193
196
  end
194
197
 
195
- end
198
+ describe "#each_<entity type> - e.g. " +
199
+ "#each_sentence, #each_word, etc." do
196
200
 
197
- describe "#each_<entity type> - e.g. " +
198
- "#each_sentence, #each_word, etc." do
201
+ it "yields each of the entities with the " +
202
+ "corresponding type in the subtree of an entity" do
203
+ a = []
199
204
 
200
- it "yields each of the entities with the " +
201
- "corresponding type in the subtree of an entity" do
202
- a = []
205
+ @paragraph.each_phrase { |p| a << p }
206
+ a.should eql [@noun_phrase,
207
+ @adj_phrase, @verb_phrase]
203
208
 
204
- @paragraph.each_phrase { |p| a << p }
205
- a.should eql [@noun_phrase,
206
- @adj_phrase, @verb_phrase]
209
+ end
207
210
 
208
211
  end
209
212
 
210
- end
213
+ describe "#<entity or word type>_count - e.g. " +
214
+ "#sentence_count, #paragraph_count, etc. and " +
215
+ "#noun_count, #verb_count, etc." do
211
216
 
212
- describe "#<entity or word type>_count - e.g. " +
213
- "#sentence_count, #paragraph_count, etc. and " +
214
- "#noun_count, #verb_count, etc." do
217
+ it "return the number of entities with the " +
218
+ "corresponding type inside another entity" do
219
+ @paragraph.sentence_count.should eql 1
220
+ @paragraph.phrase_count.should eql 3
221
+ end
215
222
 
216
- it "return the number of entities with the " +
217
- "corresponding type inside another entity" do
218
- @paragraph.sentence_count.should eql 1
219
- @paragraph.phrase_count.should eql 3
220
223
  end
221
224
 
222
- end
225
+ describe "#<entity or word type>_with_<feature>(value) - " +
226
+ "e.g. #word_with_id(x) or #adverb_with_value('seemingly')" do
223
227
 
224
- describe "#<entity or word type>_with_<feature>(value) - " +
225
- "e.g. #word_with_id(x) or #adverb_with_value('seemingly')" do
228
+ it "return the entity with the corresponding type " +
229
+ "that have [feature] set to the supplied value; raise" +
230
+ "a warning if there are many entities of that type" do
231
+ @paragraph.word_with_value('The').should eql @det
232
+ @paragraph.token_with_tag('.').should eql @dot
233
+ @sentence.phrase_with_tag('NP').should eql @noun_phrase
234
+ end
226
235
 
227
- it "return the entity with the corresponding type " +
228
- "that have [feature] set to the supplied value; raise" +
229
- "a warning if there are many entities of that type" do
230
- @paragraph.word_with_value('The').should eql @det
231
- @paragraph.token_with_tag('.').should eql @dot
232
- @sentence.phrase_with_tag('NP').should eql @noun_phrase
233
236
  end
234
237
 
235
- end
238
+ describe "#<entity or word type>s_with_<feature>(value) - " +
239
+ "e.g. #phrases_with_tag('NP'), #nouns_with_value('foo')" do
236
240
 
237
- describe "#<entity or word type>s_with_<feature>(value) - " +
238
- "e.g. #phrases_with_tag('NP'), #nouns_with_value('foo')" do
241
+ it "return an array of the entities with the " +
242
+ "corresponding type that have [feature] set to "+
243
+ "the supplied value" do
244
+ @paragraph.words_with_value('The').should eql [@det]
245
+ @paragraph.tokens_with_tag('.').should eql [@dot]
246
+ @sentence.phrases_with_tag('NP').should eql [@noun_phrase]
247
+ end
239
248
 
240
- it "return an array of the entities with the " +
241
- "corresponding type that have [feature] set to "+
242
- "the supplied value" do
243
- @paragraph.words_with_value('The').should eql [@det]
244
- @paragraph.tokens_with_tag('.').should eql [@dot]
245
- @sentence.phrases_with_tag('NP').should eql [@noun_phrase]
246
249
  end
247
250
 
248
- end
251
+ describe "#parent_<entity type> - e.g. " +
252
+ "#parent_document, #parent_collection, etc." do
249
253
 
250
- describe "#parent_<entity type> - e.g. " +
251
- "#parent_document, #parent_collection, etc." do
254
+ it "return the first ancestor of the entity " +
255
+ "that has the supplied type, or nil if none" do
256
+ @sentence.parent_paragraph.should eql @paragraph
257
+ @adj.parent_sentence.should eql @sentence
258
+ end
252
259
 
253
- it "return the first ancestor of the entity " +
254
- "that has the supplied type, or nil if none" do
255
- @sentence.parent_paragraph.should eql @paragraph
256
- @adj.parent_sentence.should eql @sentence
257
260
  end
258
261
 
259
- end
262
+ describe "#frequency_in_<entity type> - e.g. " +
263
+ "#frequency_in_collection, #frequency_in_document, etc." do
260
264
 
261
- describe "#frequency_in_<entity type> - e.g. " +
262
- "#frequency_in_collection, #frequency_in_document, etc." do
265
+ it "return the frequency of this entity's value " +
266
+ "in the parent entity with the corresponding type" do
267
+ @adj.frequency_in_sentence.should eql 1
268
+ end
263
269
 
264
- it "return the frequency of this entity's value " +
265
- "in the parent entity with the corresponding type" do
266
- @adj.frequency_in_sentence.should eql 1
267
270
  end
268
271
 
269
272
  end
270
273
 
271
- end
274
+ describe "Stringable" do
272
275
 
273
- describe "Stringable" do
276
+ describe "#to_string" do
277
+ it "returns the true text value of the entity " +
278
+ "or an empty string if it has none" do
279
+ @paragraph.to_string.should eql ''
280
+ @noun.to_string.should eql 'fox'
281
+ end
282
+ end
274
283
 
275
- describe "#to_string" do
276
- it "returns the true text value of the entity " +
277
- "or an empty string if it has none" do
278
- @paragraph.to_string.should eql ''
279
- @noun.to_string.should eql 'fox'
284
+ describe "#to_s" do
285
+ it "returns the string value of the " +
286
+ "entity or its full subtree" do
287
+ @paragraph.to_s.should
288
+ eql 'The lazy fox is running.'
289
+ @noun.to_s.should eql 'fox'
290
+ end
280
291
  end
281
- end
282
292
 
283
- describe "#to_s" do
284
- it "returns the string value of the " +
285
- "entity or its full subtree" do
286
- @paragraph.to_s.should
287
- eql 'The lazy fox is running.'
288
- @noun.to_s.should eql 'fox'
293
+ describe "#inspect" do
294
+ it "returns an informative string " +
295
+ "concerning the entity" do
296
+ @paragraph.inspect.should
297
+ be_an_instance_of String
298
+ end
289
299
  end
290
- end
291
300
 
292
- describe "#inspect" do
293
- it "returns an informative string " +
294
- "concerning the entity" do
295
- @paragraph.inspect.should
296
- be_an_instance_of String
301
+ describe "#short_value" do
302
+ it "returns a shortened version of the " +
303
+ "entity's string value" do
304
+ @paragraph.short_value.should
305
+ eql 'The lazy fox is running.'
306
+ end
297
307
  end
308
+
298
309
  end
299
310
 
300
- describe "#short_value" do
301
- it "returns a shortened version of the " +
302
- "entity's string value" do
303
- @paragraph.short_value.should
304
- eql 'The lazy fox is running.'
311
+ describe "Formatters" do
312
+
313
+
314
+ before do
315
+ @serializers = Treat.languages.agnostic.
316
+ workers.formatters.serializers
317
+ @txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
305
318
  end
306
- end
307
319
 
308
- end
309
-
310
- describe "Formatters" do
311
-
312
-
313
- before do
314
- @serializers = Treat.languages.agnostic.
315
- workers.formatters.serializers
316
- @txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
317
- end
318
-
319
- describe "#serialize" do
320
+ describe "#serialize" do
321
+
322
+ context "when called with a file to save to" do
323
+
324
+ it "serializes a document to the supplied format" do
325
+
326
+ @serializers.each do |ser|
327
+ next if ser == :mongo # Fix this!
328
+ f = Treat.paths.spec + 'test.' + ser.to_s
329
+ s = Treat::Entities::Paragraph.new(@txt)
330
+ s.apply(:segment, :tokenize)
331
+ s.serialize(ser, :file => f)
332
+ File.delete(f)
333
+ end
320
334
 
321
- context "when called with a file to save to" do
322
-
323
- it "serializes a document to the supplied format" do
324
-
325
- @serializers.each do |ser|
326
- next if ser == :mongo # Fix this!
327
- f = Treat.paths.spec + 'test.' + ser.to_s
328
- s = Treat::Entities::Paragraph.new(@txt)
329
- s.do(:segment, :tokenize)
330
- s.serialize(ser, :file => f)
331
- File.delete(f)
332
335
  end
333
-
336
+
334
337
  end
335
-
338
+
336
339
  end
337
-
338
- end
339
-
340
- describe "#unserialize" do
341
-
342
- context "when called with a serialized file" do
343
-
344
- it "reconstitutes the original entity" do
345
- @serializers.each do |ser|
346
- next if ser == :mongo # Fix this!
347
-
348
- f = Treat.paths.spec + 'test.' + ser.to_s
349
- s = Treat::Entities::Paragraph.new(@txt)
350
-
351
- s.set :test_int, 9
352
- s.set :test_float, 9.9
353
- s.set :test_string, 'hello'
354
- s.set :test_sym, :hello
355
- s.set :test_bool, false
356
-
357
- s.do(:segment, :tokenize)
358
-
359
- s.serialize(ser, file: f)
360
-
361
- d = Treat::Entities::Document.build(f)
362
-
363
- d.test_int.should eql 9
364
- d.test_float.should eql 9.9
365
- d.test_string.should eql 'hello'
366
- d.test_sym.should eql :hello
367
- d.test_bool.should eql false
368
-
369
- d.to_s.should eql "The story of the fox." +
370
- " The quick brown fox jumped over the lazy dog."
371
- d.size.should eql s.size
372
-
373
- d.token_count.should eql s.token_count
374
- d.tokens[0].id.should eql s.tokens[0].id
375
-
376
- File.delete(f)
377
-
340
+
341
+ describe "#unserialize" do
342
+
343
+ context "when called with a serialized file" do
344
+
345
+ it "reconstitutes the original entity" do
346
+ @serializers.each do |ser|
347
+ next if ser == :mongo # Fix this!
348
+
349
+ f = Treat.paths.spec + 'test.' + ser.to_s
350
+ s = Treat::Entities::Paragraph.new(@txt)
351
+
352
+ s.set :test_int, 9
353
+ s.set :test_float, 9.9
354
+ s.set :test_string, 'hello'
355
+ s.set :test_sym, :hello
356
+ s.set :test_bool, false
357
+
358
+ s.apply(:segment, :tokenize)
359
+
360
+ s.serialize(ser, file: f)
361
+
362
+ d = Treat::Entities::Document.build(f)
363
+
364
+ d.test_int.should eql 9
365
+ d.test_float.should eql 9.9
366
+ d.test_string.should eql 'hello'
367
+ d.test_sym.should eql :hello
368
+ d.test_bool.should eql false
369
+
370
+ d.to_s.should eql "The story of the fox." +
371
+ " The quick brown fox jumped over the lazy dog."
372
+ d.size.should eql s.size
373
+
374
+ d.token_count.should eql s.token_count
375
+ d.tokens[0].id.should eql s.tokens[0].id
376
+
377
+ File.delete(f)
378
+
379
+ end
380
+
378
381
  end
379
-
382
+
380
383
  end
381
-
384
+
382
385
  end
383
-
386
+
384
387
  end
385
-
386
- end
387
388
 
388
- describe "Extractors" do
389
-
390
- describe "#language" do
391
- context "when language detection is disabled " +
392
- "(Treat.core.detect is set to false)" do
393
- it "returns the default language (Treat.core.language.default)" do
394
- Treat.core.language.detect = false
395
- Treat.core.language.default = :test
396
- s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
397
- s.language.should eql :test
398
- Treat.core.language.default = :english
389
+ describe "Extractors" do
390
+
391
+ describe "#language" do
392
+ context "when language detection is disabled " +
393
+ "(Treat.core.detect is set to false)" do
394
+ it "returns the default language (Treat.core.language.default)" do
395
+ Treat.core.language.detect = false
396
+ Treat.core.language.default = :test
397
+ s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
398
+ s.language.should eql :test
399
+ Treat.core.language.default = :english
400
+ end
399
401
  end
400
- end
401
402
 
402
- context "when language detection is enabled " +
403
- "(Treat.detect_language is set to true)" do
403
+ context "when language detection is enabled " +
404
+ "(Treat.detect_language is set to true)" do
405
+
406
+ it "guesses the language of the entity" do
404
407
 
405
- it "guesses the language of the entity" do
408
+ Treat.core.language.detect = true
409
+ a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
410
+ b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
411
+ c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
412
+ d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
413
+ a.language.should eql :english
414
+ #b.language.should eql :spanish
415
+ #c.language.should eql :french
416
+ #d.language.should eql :german
406
417
 
407
- Treat.core.language.detect = true
408
- a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
409
- b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
410
- c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
411
- d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
412
- a.language.should eql :english
413
- #b.language.should eql :spanish
414
- #c.language.should eql :french
415
- #d.language.should eql :german
418
+ # Reset default
419
+ Treat.core.language.detect = false
420
+ end
416
421
 
417
- # Reset default
418
- Treat.core.language.detect = false
419
422
  end
420
423
 
421
424
  end
@@ -425,8 +428,6 @@ describe Treat::Entities::Entity do
425
428
  end
426
429
 
427
430
  end
428
-
429
-
430
431
  =begin
431
432
 
432
433