treat 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/lib/treat/config/data/languages/agnostic.rb +6 -3
  2. data/lib/treat/config/data/languages/english.rb +1 -1
  3. data/lib/treat/config/data/workers/extractors.rb +8 -0
  4. data/lib/treat/loaders/stanford.rb +2 -0
  5. data/lib/treat/version.rb +1 -1
  6. data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
  7. data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
  8. data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
  9. data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
  10. data/lib/treat/workers/extractors/time/chronic.rb +6 -41
  11. data/lib/treat/workers/extractors/time/kronic.rb +20 -0
  12. data/lib/treat/workers/extractors/time/nickel.rb +0 -15
  13. data/lib/treat/workers/extractors/time/ruby.rb +2 -33
  14. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
  15. data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
  16. data/spec/entities/collection.rb +29 -25
  17. data/spec/entities/document.rb +45 -44
  18. data/spec/entities/entity.rb +295 -294
  19. data/spec/entities/phrase.rb +21 -17
  20. data/spec/entities/token.rb +43 -40
  21. data/spec/entities/word.rb +5 -1
  22. data/spec/entities/zone.rb +26 -22
  23. data/spec/helper.rb +7 -2
  24. data/spec/learning/data_set.rb +145 -141
  25. data/spec/learning/export.rb +46 -42
  26. data/spec/learning/problem.rb +114 -110
  27. data/spec/learning/question.rb +46 -42
  28. data/spec/treat.rb +41 -37
  29. data/spec/workers/agnostic.rb +2 -2
  30. data/spec/workers/english.rb +12 -12
  31. metadata +7 -8
  32. data/files/21552208.html +0 -786
  33. data/files/nethttp-cheat-sheet-2940.html +0 -393
  34. data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
  35. data/spec/sandbox.rb +0 -294
  36. data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,52 +1,56 @@
1
- describe Treat::Learning::Export do
1
+ module Treat::Specs::Learning
2
2
 
3
- describe "#initialize" do
4
- context "when supplied with acceptable parameters" do
5
- it "should give access to the parameters" do
6
- export = Treat::Learning::Export.new(:name, 0, "->(e) { e }")
7
- export.name.should eql :name
8
- export.default.should eql 0
9
- export.proc_string.should eql "->(e) { e }"
10
- export.proc.should be_instance_of Proc
11
- export.proc.call('x').should eql 'x'
3
+ describe Treat::Learning::Export do
4
+
5
+ describe "#initialize" do
6
+ context "when supplied with acceptable parameters" do
7
+ it "should give access to the parameters" do
8
+ export = Treat::Learning::Export.new(:name, 0, "->(e) { e }")
9
+ export.name.should eql :name
10
+ export.default.should eql 0
11
+ export.proc_string.should eql "->(e) { e }"
12
+ export.proc.should be_instance_of Proc
13
+ export.proc.call('x').should eql 'x'
14
+ end
12
15
  end
13
- end
14
- context "when supplied with wrong parameters" do
15
- it "should raise an exception" do
16
- # First argument should be a symbol representing the name of the export.
17
- expect { Treat::Learning::Export.new(nil) }.to raise_error
18
- # Third argument, if supplied, should be a string that
19
- # evaluates to a proc (NOT a proc/lambda).
20
- expect { Treat::Learning::Export.new(:name, 0, lambda { x } ) }.to raise_error
21
- # Third argument should be proper ruby syntax.
22
- expect { Treat::Learning::Export.new(:name, 0, "->(e) { ") }.to raise_error
23
- # Third argument should evaluate to a proc.
24
- expect { Treat::Learning::Export.new(:name, 0, "2") }.to raise_error
16
+ context "when supplied with wrong parameters" do
17
+ it "should raise an exception" do
18
+ # First argument should be a symbol representing the name of the export.
19
+ expect { Treat::Learning::Export.new(nil) }.to raise_error
20
+ # Third argument, if supplied, should be a string that
21
+ # evaluates to a proc (NOT a proc/lambda).
22
+ expect { Treat::Learning::Export.new(:name, 0, lambda { x } ) }.to raise_error
23
+ # Third argument should be proper ruby syntax.
24
+ expect { Treat::Learning::Export.new(:name, 0, "->(e) { ") }.to raise_error
25
+ # Third argument should evaluate to a proc.
26
+ expect { Treat::Learning::Export.new(:name, 0, "2") }.to raise_error
27
+ end
25
28
  end
26
29
  end
27
- end
28
30
 
29
- describe "#==(question)" do
30
- context "when supplied with an equal question" do
31
- it "should return true" do
32
- Treat::Learning::Export.new(:name).
33
- should == Treat::Learning::Export.new(:name)
34
- Treat::Learning::Export.new(:name, 0).
35
- should == Treat::Learning::Export.new(:name, 0)
36
- Treat::Learning::Export.new(:name, 0, "->(e) { }").
37
- should == Treat::Learning::Export.new(:name, 0, "->(e) { }")
31
+ describe "#==(question)" do
32
+ context "when supplied with an equal question" do
33
+ it "should return true" do
34
+ Treat::Learning::Export.new(:name).
35
+ should == Treat::Learning::Export.new(:name)
36
+ Treat::Learning::Export.new(:name, 0).
37
+ should == Treat::Learning::Export.new(:name, 0)
38
+ Treat::Learning::Export.new(:name, 0, "->(e) { }").
39
+ should == Treat::Learning::Export.new(:name, 0, "->(e) { }")
40
+ end
38
41
  end
39
- end
40
- context "when supplied with a different question" do
41
- it "should return false" do
42
- Treat::Learning::Export.new(:name).
43
- should_not == Treat::Learning::Export.new(:name2)
44
- Treat::Learning::Export.new(:name, 0).
45
- should_not == Treat::Learning::Export.new(:name, 1)
46
- Treat::Learning::Export.new(:name, 0, "->(e) { }").
47
- should_not == Treat::Learning::Export.new(:name, 0, "->(e) { x }")
42
+ context "when supplied with a different question" do
43
+ it "should return false" do
44
+ Treat::Learning::Export.new(:name).
45
+ should_not == Treat::Learning::Export.new(:name2)
46
+ Treat::Learning::Export.new(:name, 0).
47
+ should_not == Treat::Learning::Export.new(:name, 1)
48
+ Treat::Learning::Export.new(:name, 0, "->(e) { }").
49
+ should_not == Treat::Learning::Export.new(:name, 0, "->(e) { x }")
50
+ end
48
51
  end
49
52
  end
53
+
50
54
  end
51
55
 
52
- end
56
+ end
@@ -1,144 +1,148 @@
1
- describe Treat::Learning::Problem do
1
+ module Treat::Specs::Learning
2
2
 
3
- before do
4
- @question = Treat::Learning::Question.new(:is_key_sentence,
5
- :sentence, 0, :continuous)
6
- @feature = Treat::Learning::Feature.new(:word_count, 0)
7
- @tag = Treat::Learning::Tag.new(:paragraph_length, 0,
8
- "->(e) { e.parent_paragraph.word_count }")
9
- @paragraph = Treat::Entities::Paragraph.new(
10
- "Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
11
- @paragraph.do :segment, :tokenize
12
- @sentence = @paragraph.sentences[0]
13
- @hash = {"question"=>{"name"=>:is_key_sentence, "target"=>:sentence,
14
- "type"=>:continuous, "default"=>0}, "features"=>[
15
- {"proc_string"=>nil, "default"=>0, "name"=>:word_count, "proc"=>nil}],
16
- "tags"=>[{"proc_string"=>"->(e) { e.parent_paragraph.word_count }",
17
- "default"=>0, "name"=>:paragraph_length, "proc"=>nil}], "id"=>0}
18
- end
3
+ describe Treat::Learning::Problem do
19
4
 
20
- describe "#initialize" do
21
- context "when supplied with proper arguments" do
22
- it "initializes the problem and gives access to parameters" do
23
- problem = Treat::Learning::Problem.new(@question, @feature, @tag)
24
- problem.question.should eql @question
25
- problem.features.should eql [@feature]
26
- problem.tags.should eql [@tag]
27
- problem.feature_labels.should eql [@feature.name]
28
- problem.tag_labels.should eql [@tag.name]
29
- # ID ??? FIXME
30
- end
5
+ before do
6
+ @question = Treat::Learning::Question.new(:is_key_sentence,
7
+ :sentence, 0, :continuous)
8
+ @feature = Treat::Learning::Feature.new(:word_count, 0)
9
+ @tag = Treat::Learning::Tag.new(:paragraph_length, 0,
10
+ "->(e) { e.parent_paragraph.word_count }")
11
+ @paragraph = Treat::Entities::Paragraph.new(
12
+ "Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
13
+ @paragraph.apply :segment, :tokenize
14
+ @sentence = @paragraph.sentences[0]
15
+ @hash = {"question"=>{"name"=>:is_key_sentence, "target"=>:sentence,
16
+ "type"=>:continuous, "default"=>0}, "features"=>[
17
+ {"proc_string"=>nil, "default"=>0, "name"=>:word_count, "proc"=>nil}],
18
+ "tags"=>[{"proc_string"=>"->(e) { e.parent_paragraph.word_count }",
19
+ "default"=>0, "name"=>:paragraph_length, "proc"=>nil}], "id"=>0}
31
20
  end
32
- context "when supplied with unacceptable arguments" do
33
- it "raises an error" do
34
- # First argument should be instance of Question.
35
- expect { Treat::Learning::Problem.new('foo') }.to raise_error
36
- # Arguments >= 2 should be instances of Export.
37
- expect { Treat::Learning::Problem.new(@question, 'foo') }.to raise_error
38
- # Should have at least one Feature in the arguments.
39
- expect { Treat::Learning::Problem.new(@question, @tag) }.to raise_error
21
+
22
+ describe "#initialize" do
23
+ context "when supplied with proper arguments" do
24
+ it "initializes the problem and gives access to parameters" do
25
+ problem = Treat::Learning::Problem.new(@question, @feature, @tag)
26
+ problem.question.should eql @question
27
+ problem.features.should eql [@feature]
28
+ problem.tags.should eql [@tag]
29
+ problem.feature_labels.should eql [@feature.name]
30
+ problem.tag_labels.should eql [@tag.name]
31
+ # ID ??? FIXME
32
+ end
33
+ end
34
+ context "when supplied with unacceptable arguments" do
35
+ it "raises an error" do
36
+ # First argument should be instance of Question.
37
+ expect { Treat::Learning::Problem.new('foo') }.to raise_error
38
+ # Arguments >= 2 should be instances of Export.
39
+ expect { Treat::Learning::Problem.new(@question, 'foo') }.to raise_error
40
+ # Should have at least one Feature in the arguments.
41
+ expect { Treat::Learning::Problem.new(@question, @tag) }.to raise_error
42
+ end
40
43
  end
41
44
  end
42
- end
43
45
 
44
- describe "#==(problem)" do
45
- context "when supplied with an equal problem" do
46
- it "should return true" do
47
- Treat::Learning::Problem.new(@question, @feature).
48
- should == Treat::Learning::Problem.new(@question, @feature)
49
- Treat::Learning::Problem.new(@question, @feature, @tag).
50
- should == Treat::Learning::Problem.new(@question, @feature, @tag)
46
+ describe "#==(problem)" do
47
+ context "when supplied with an equal problem" do
48
+ it "should return true" do
49
+ Treat::Learning::Problem.new(@question, @feature).
50
+ should == Treat::Learning::Problem.new(@question, @feature)
51
+ Treat::Learning::Problem.new(@question, @feature, @tag).
52
+ should == Treat::Learning::Problem.new(@question, @feature, @tag)
53
+ end
51
54
  end
52
- end
53
- context "when supplied with a different question" do
54
- it "should return false" do
55
- question = Treat::Learning::Question.new(:is_key_sentence, :sentence)
56
- feature = Treat::Learning::Feature.new(:word_count, 999)
57
- tag = Treat::Learning::Tag.new(:paragraph_length, 999)
58
- Treat::Learning::Problem.new(@question, @feature).
59
- should_not == Treat::Learning::Problem.new(question, @feature)
60
- Treat::Learning::Problem.new(@question, @feature).
61
- should_not == Treat::Learning::Problem.new(@question, feature)
62
- Treat::Learning::Problem.new(@question, @feature, @tag).
63
- should_not == Treat::Learning::Problem.new(@question, @feature, tag)
55
+ context "when supplied with a different question" do
56
+ it "should return false" do
57
+ question = Treat::Learning::Question.new(:is_key_sentence, :sentence)
58
+ feature = Treat::Learning::Feature.new(:word_count, 999)
59
+ tag = Treat::Learning::Tag.new(:paragraph_length, 999)
60
+ Treat::Learning::Problem.new(@question, @feature).
61
+ should_not == Treat::Learning::Problem.new(question, @feature)
62
+ Treat::Learning::Problem.new(@question, @feature).
63
+ should_not == Treat::Learning::Problem.new(@question, feature)
64
+ Treat::Learning::Problem.new(@question, @feature, @tag).
65
+ should_not == Treat::Learning::Problem.new(@question, @feature, tag)
66
+ end
64
67
  end
65
68
  end
66
- end
67
69
 
68
- describe "#export_tags(entity)" do
69
- context "when called on a problem that has tags" do
70
- context "and called with an entity of the proper type" do
71
- it "returns an array of the tags" do
72
- problem = Treat::Learning::Problem.new(@question, @feature, @tag)
73
- problem.export_tags(@sentence).should eql [11]
70
+ describe "#export_tags(entity)" do
71
+ context "when called on a problem that has tags" do
72
+ context "and called with an entity of the proper type" do
73
+ it "returns an array of the tags" do
74
+ problem = Treat::Learning::Problem.new(@question, @feature, @tag)
75
+ problem.export_tags(@sentence).should eql [11]
76
+ end
74
77
  end
75
78
  end
76
- end
77
- context "when called on a problem that doesn't have tags" do
78
- it "raises an error" do
79
- problem = Treat::Learning::Problem.new(@question, @feature)
80
- expect { problem.export_tags(@sentence) }.to raise_error
79
+ context "when called on a problem that doesn't have tags" do
80
+ it "raises an error" do
81
+ problem = Treat::Learning::Problem.new(@question, @feature)
82
+ expect { problem.export_tags(@sentence) }.to raise_error
83
+ end
81
84
  end
82
85
  end
83
- end
84
86
 
85
- describe "#export_features(entity, include_answer = true)" do
87
+ describe "#export_features(entity, include_answer = true)" do
86
88
 
87
- context "when called with an entity of the proper type" do
88
- context "and include_answer is set to true" do
89
- context "and the answer is already set on the entity" do
90
- it "returns an array of the exported features, with the answer" do
91
- problem = Treat::Learning::Problem.new(@question, @feature)
92
- @sentence.set :is_key_sentence, 1
93
- problem.export_features(@sentence).should eql [7, 1]
89
+ context "when called with an entity of the proper type" do
90
+ context "and include_answer is set to true" do
91
+ context "and the answer is already set on the entity" do
92
+ it "returns an array of the exported features, with the answer" do
93
+ problem = Treat::Learning::Problem.new(@question, @feature)
94
+ @sentence.set :is_key_sentence, 1
95
+ problem.export_features(@sentence).should eql [7, 1]
96
+ end
97
+ end
98
+ context "and the answer is not already set on the entity" do
99
+ it "returns an array of the exported features, with the question's default answer" do
100
+ problem = Treat::Learning::Problem.new(@question, @feature)
101
+ problem.export_features(@sentence).should eql [7, @question.default]
102
+ end
94
103
  end
95
104
  end
96
- context "and the answer is not already set on the entity" do
97
- it "returns an array of the exported features, with the question's default answer" do
105
+ context "and include_answer is set to false" do
106
+ it "returns an array of the exported features, without the answer" do
98
107
  problem = Treat::Learning::Problem.new(@question, @feature)
99
- problem.export_features(@sentence).should eql [7, @question.default]
108
+ problem.export_features(@sentence, false).should eql [7]
100
109
  end
101
110
  end
102
111
  end
103
- context "and include_answer is set to false" do
104
- it "returns an array of the exported features, without the answer" do
112
+ context "when supplied with an entity that is not of the proper type" do
113
+ it "raises an error" do
105
114
  problem = Treat::Learning::Problem.new(@question, @feature)
106
- problem.export_features(@sentence, false).should eql [7]
115
+ word = Treat::Entities::Word.new('test')
116
+ expect { problem.export_features(word) }.to raise_error
107
117
  end
108
118
  end
109
119
  end
110
- context "when supplied with an entity that is not of the proper type" do
111
- it "raises an error" do
112
- problem = Treat::Learning::Problem.new(@question, @feature)
113
- word = Treat::Entities::Word.new('test')
114
- expect { problem.export_features(word) }.to raise_error
115
- end
116
- end
117
- end
118
120
 
119
- describe "#to_hash" do
120
- context "when called on a problem" do
121
- it "returns a hash describing the problem" do
122
- Treat::Learning::Problem.new(@question, @feature, @tag).
123
- to_hash.tap { |e| e['id'] = 0 }.should eql @hash
121
+ describe "#to_hash" do
122
+ context "when called on a problem" do
123
+ it "returns a hash describing the problem" do
124
+ Treat::Learning::Problem.new(@question, @feature, @tag).
125
+ to_hash.tap { |e| e['id'] = 0 }.should eql @hash
126
+ end
124
127
  end
125
128
  end
126
- end
127
129
 
128
- describe "#self.from_hash" do
129
- context "when called with a hash describing a problem" do
130
- it "returns a problem based on the hash" do
131
- problem = Treat::Learning::Problem.from_hash(@hash)
132
- problem.question.name.should eql :is_key_sentence
133
- problem.question.target.should eql :sentence
134
- problem.question.type.should eql :continuous
135
- problem.question.default.should eql 0
136
- problem.features[0].proc_string.should eql nil
137
- problem.features[0].default.should eql 0
138
- problem.features[0].name.should eql :word_count
139
- problem.features[0].proc.should eql nil
130
+ describe "#self.from_hash" do
131
+ context "when called with a hash describing a problem" do
132
+ it "returns a problem based on the hash" do
133
+ problem = Treat::Learning::Problem.from_hash(@hash)
134
+ problem.question.name.should eql :is_key_sentence
135
+ problem.question.target.should eql :sentence
136
+ problem.question.type.should eql :continuous
137
+ problem.question.default.should eql 0
138
+ problem.features[0].proc_string.should eql nil
139
+ problem.features[0].default.should eql 0
140
+ problem.features[0].name.should eql :word_count
141
+ problem.features[0].proc.should eql nil
142
+ end
140
143
  end
141
144
  end
145
+
142
146
  end
143
147
 
144
- end
148
+ end
@@ -1,52 +1,56 @@
1
- describe Treat::Learning::Question do
1
+ module Treat::Specs::Learning
2
2
 
3
- describe "#initialize" do
4
- context "when supplied with acceptable parameters" do
5
- it "should give access to the parameters" do
6
- question = Treat::Learning::Question.new(
7
- :is_keyword, :word, 0, :continuous)
8
- question.name.should eql :is_keyword
9
- question.target.should eql :word
10
- question.type.should eql :continuous
11
- question.default.should eql 0
3
+ describe Treat::Learning::Question do
4
+
5
+ describe "#initialize" do
6
+ context "when supplied with acceptable parameters" do
7
+ it "should give access to the parameters" do
8
+ question = Treat::Learning::Question.new(
9
+ :is_keyword, :word, 0, :continuous)
10
+ question.name.should eql :is_keyword
11
+ question.target.should eql :word
12
+ question.type.should eql :continuous
13
+ question.default.should eql 0
14
+ end
12
15
  end
13
- end
14
- context "when supplied with wrong parameters" do
15
- it "should raise an exception" do
16
- # Name should be a symbol
17
- expect { Treat::Learning::Question.new(
18
- nil, :sentence) }.to raise_error
19
- # Target should be an actual entity type
20
- expect { Treat::Learning::Question.new(
21
- :name, :foo) }.to raise_error
22
- # Distribution type should be continuous or discrete
23
- expect { Treat::Learning::Question.new(
24
- :name, :sentence, 0, :nonsense) }.to raise_error
16
+ context "when supplied with wrong parameters" do
17
+ it "should raise an exception" do
18
+ # Name should be a symbol
19
+ expect { Treat::Learning::Question.new(
20
+ nil, :sentence) }.to raise_error
21
+ # Target should be an actual entity type
22
+ expect { Treat::Learning::Question.new(
23
+ :name, :foo) }.to raise_error
24
+ # Distribution type should be continuous or discrete
25
+ expect { Treat::Learning::Question.new(
26
+ :name, :sentence, 0, :nonsense) }.to raise_error
27
+ end
25
28
  end
26
29
  end
27
- end
28
30
 
29
- describe "#==(question)" do
30
- context "when supplied with an equal question" do
31
- it "should return true" do
32
- Treat::Learning::Question.new(
33
- :is_keyword, :word).
34
- should == Treat::Learning::Question.new(
35
- :is_keyword, :word)
31
+ describe "#==(question)" do
32
+ context "when supplied with an equal question" do
33
+ it "should return true" do
34
+ Treat::Learning::Question.new(
35
+ :is_keyword, :word).
36
+ should == Treat::Learning::Question.new(
37
+ :is_keyword, :word)
38
+ end
36
39
  end
37
- end
38
- context "when supplied with a different question" do
39
- it "should return false" do
40
- Treat::Learning::Question.new(
41
- :is_keyword, :word).
42
- should_not == Treat::Learning::Question.new(
43
- :is_keyword, :sentence)
44
- Treat::Learning::Question.new(
45
- :is_keyword, :word, 0, :continuous).
46
- should_not == Treat::Learning::Question.new(
47
- :is_keyword, :word, 0, :discrete)
40
+ context "when supplied with a different question" do
41
+ it "should return false" do
42
+ Treat::Learning::Question.new(
43
+ :is_keyword, :word).
44
+ should_not == Treat::Learning::Question.new(
45
+ :is_keyword, :sentence)
46
+ Treat::Learning::Question.new(
47
+ :is_keyword, :word, 0, :continuous).
48
+ should_not == Treat::Learning::Question.new(
49
+ :is_keyword, :word, 0, :discrete)
50
+ end
48
51
  end
49
52
  end
53
+
50
54
  end
51
55
 
52
- end
56
+ end