treat 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/treat/config/data/languages/agnostic.rb +6 -3
- data/lib/treat/config/data/languages/english.rb +1 -1
- data/lib/treat/config/data/workers/extractors.rb +8 -0
- data/lib/treat/loaders/stanford.rb +2 -0
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
- data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
- data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
- data/lib/treat/workers/extractors/time/chronic.rb +6 -41
- data/lib/treat/workers/extractors/time/kronic.rb +20 -0
- data/lib/treat/workers/extractors/time/nickel.rb +0 -15
- data/lib/treat/workers/extractors/time/ruby.rb +2 -33
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
- data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
- data/spec/entities/collection.rb +29 -25
- data/spec/entities/document.rb +45 -44
- data/spec/entities/entity.rb +295 -294
- data/spec/entities/phrase.rb +21 -17
- data/spec/entities/token.rb +43 -40
- data/spec/entities/word.rb +5 -1
- data/spec/entities/zone.rb +26 -22
- data/spec/helper.rb +7 -2
- data/spec/learning/data_set.rb +145 -141
- data/spec/learning/export.rb +46 -42
- data/spec/learning/problem.rb +114 -110
- data/spec/learning/question.rb +46 -42
- data/spec/treat.rb +41 -37
- data/spec/workers/agnostic.rb +2 -2
- data/spec/workers/english.rb +12 -12
- metadata +7 -8
- data/files/21552208.html +0 -786
- data/files/nethttp-cheat-sheet-2940.html +0 -393
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
- data/spec/sandbox.rb +0 -294
- data/spec/workers/examples/english/mathematicians/euler.html +0 -21
data/spec/learning/export.rb
CHANGED
@@ -1,52 +1,56 @@
|
|
1
|
-
|
1
|
+
module Treat::Specs::Learning
|
2
2
|
|
3
|
-
describe
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
describe Treat::Learning::Export do
|
4
|
+
|
5
|
+
describe "#initialize" do
|
6
|
+
context "when supplied with acceptable parameters" do
|
7
|
+
it "should give access to the parameters" do
|
8
|
+
export = Treat::Learning::Export.new(:name, 0, "->(e) { e }")
|
9
|
+
export.name.should eql :name
|
10
|
+
export.default.should eql 0
|
11
|
+
export.proc_string.should eql "->(e) { e }"
|
12
|
+
export.proc.should be_instance_of Proc
|
13
|
+
export.proc.call('x').should eql 'x'
|
14
|
+
end
|
12
15
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
16
|
+
context "when supplied with wrong parameters" do
|
17
|
+
it "should raise an exception" do
|
18
|
+
# First argument should be a symbol representing the name of the export.
|
19
|
+
expect { Treat::Learning::Export.new(nil) }.to raise_error
|
20
|
+
# Third argument, if supplied, should be a string that
|
21
|
+
# evaluates to a proc (NOT a proc/lambda).
|
22
|
+
expect { Treat::Learning::Export.new(:name, 0, lambda { x } ) }.to raise_error
|
23
|
+
# Third argument should be proper ruby syntax.
|
24
|
+
expect { Treat::Learning::Export.new(:name, 0, "->(e) { ") }.to raise_error
|
25
|
+
# Third argument should evaluate to a proc.
|
26
|
+
expect { Treat::Learning::Export.new(:name, 0, "2") }.to raise_error
|
27
|
+
end
|
25
28
|
end
|
26
29
|
end
|
27
|
-
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
describe "#==(question)" do
|
32
|
+
context "when supplied with an equal question" do
|
33
|
+
it "should return true" do
|
34
|
+
Treat::Learning::Export.new(:name).
|
35
|
+
should == Treat::Learning::Export.new(:name)
|
36
|
+
Treat::Learning::Export.new(:name, 0).
|
37
|
+
should == Treat::Learning::Export.new(:name, 0)
|
38
|
+
Treat::Learning::Export.new(:name, 0, "->(e) { }").
|
39
|
+
should == Treat::Learning::Export.new(:name, 0, "->(e) { }")
|
40
|
+
end
|
38
41
|
end
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
42
|
+
context "when supplied with a different question" do
|
43
|
+
it "should return false" do
|
44
|
+
Treat::Learning::Export.new(:name).
|
45
|
+
should_not == Treat::Learning::Export.new(:name2)
|
46
|
+
Treat::Learning::Export.new(:name, 0).
|
47
|
+
should_not == Treat::Learning::Export.new(:name, 1)
|
48
|
+
Treat::Learning::Export.new(:name, 0, "->(e) { }").
|
49
|
+
should_not == Treat::Learning::Export.new(:name, 0, "->(e) { x }")
|
50
|
+
end
|
48
51
|
end
|
49
52
|
end
|
53
|
+
|
50
54
|
end
|
51
55
|
|
52
|
-
end
|
56
|
+
end
|
data/spec/learning/problem.rb
CHANGED
@@ -1,144 +1,148 @@
|
|
1
|
-
|
1
|
+
module Treat::Specs::Learning
|
2
2
|
|
3
|
-
|
4
|
-
@question = Treat::Learning::Question.new(:is_key_sentence,
|
5
|
-
:sentence, 0, :continuous)
|
6
|
-
@feature = Treat::Learning::Feature.new(:word_count, 0)
|
7
|
-
@tag = Treat::Learning::Tag.new(:paragraph_length, 0,
|
8
|
-
"->(e) { e.parent_paragraph.word_count }")
|
9
|
-
@paragraph = Treat::Entities::Paragraph.new(
|
10
|
-
"Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
|
11
|
-
@paragraph.do :segment, :tokenize
|
12
|
-
@sentence = @paragraph.sentences[0]
|
13
|
-
@hash = {"question"=>{"name"=>:is_key_sentence, "target"=>:sentence,
|
14
|
-
"type"=>:continuous, "default"=>0}, "features"=>[
|
15
|
-
{"proc_string"=>nil, "default"=>0, "name"=>:word_count, "proc"=>nil}],
|
16
|
-
"tags"=>[{"proc_string"=>"->(e) { e.parent_paragraph.word_count }",
|
17
|
-
"default"=>0, "name"=>:paragraph_length, "proc"=>nil}], "id"=>0}
|
18
|
-
end
|
3
|
+
describe Treat::Learning::Problem do
|
19
4
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
5
|
+
before do
|
6
|
+
@question = Treat::Learning::Question.new(:is_key_sentence,
|
7
|
+
:sentence, 0, :continuous)
|
8
|
+
@feature = Treat::Learning::Feature.new(:word_count, 0)
|
9
|
+
@tag = Treat::Learning::Tag.new(:paragraph_length, 0,
|
10
|
+
"->(e) { e.parent_paragraph.word_count }")
|
11
|
+
@paragraph = Treat::Entities::Paragraph.new(
|
12
|
+
"Ranga and I went to the store. Meanwhile, Ryan was sleeping.")
|
13
|
+
@paragraph.apply :segment, :tokenize
|
14
|
+
@sentence = @paragraph.sentences[0]
|
15
|
+
@hash = {"question"=>{"name"=>:is_key_sentence, "target"=>:sentence,
|
16
|
+
"type"=>:continuous, "default"=>0}, "features"=>[
|
17
|
+
{"proc_string"=>nil, "default"=>0, "name"=>:word_count, "proc"=>nil}],
|
18
|
+
"tags"=>[{"proc_string"=>"->(e) { e.parent_paragraph.word_count }",
|
19
|
+
"default"=>0, "name"=>:paragraph_length, "proc"=>nil}], "id"=>0}
|
31
20
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
21
|
+
|
22
|
+
describe "#initialize" do
|
23
|
+
context "when supplied with proper arguments" do
|
24
|
+
it "initializes the problem and gives access to parameters" do
|
25
|
+
problem = Treat::Learning::Problem.new(@question, @feature, @tag)
|
26
|
+
problem.question.should eql @question
|
27
|
+
problem.features.should eql [@feature]
|
28
|
+
problem.tags.should eql [@tag]
|
29
|
+
problem.feature_labels.should eql [@feature.name]
|
30
|
+
problem.tag_labels.should eql [@tag.name]
|
31
|
+
# ID ??? FIXME
|
32
|
+
end
|
33
|
+
end
|
34
|
+
context "when supplied with unacceptable arguments" do
|
35
|
+
it "raises an error" do
|
36
|
+
# First argument should be instance of Question.
|
37
|
+
expect { Treat::Learning::Problem.new('foo') }.to raise_error
|
38
|
+
# Arguments >= 2 should be instances of Export.
|
39
|
+
expect { Treat::Learning::Problem.new(@question, 'foo') }.to raise_error
|
40
|
+
# Should have at least one Feature in the arguments.
|
41
|
+
expect { Treat::Learning::Problem.new(@question, @tag) }.to raise_error
|
42
|
+
end
|
40
43
|
end
|
41
44
|
end
|
42
|
-
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
describe "#==(problem)" do
|
47
|
+
context "when supplied with an equal problem" do
|
48
|
+
it "should return true" do
|
49
|
+
Treat::Learning::Problem.new(@question, @feature).
|
50
|
+
should == Treat::Learning::Problem.new(@question, @feature)
|
51
|
+
Treat::Learning::Problem.new(@question, @feature, @tag).
|
52
|
+
should == Treat::Learning::Problem.new(@question, @feature, @tag)
|
53
|
+
end
|
51
54
|
end
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
55
|
+
context "when supplied with a different question" do
|
56
|
+
it "should return false" do
|
57
|
+
question = Treat::Learning::Question.new(:is_key_sentence, :sentence)
|
58
|
+
feature = Treat::Learning::Feature.new(:word_count, 999)
|
59
|
+
tag = Treat::Learning::Tag.new(:paragraph_length, 999)
|
60
|
+
Treat::Learning::Problem.new(@question, @feature).
|
61
|
+
should_not == Treat::Learning::Problem.new(question, @feature)
|
62
|
+
Treat::Learning::Problem.new(@question, @feature).
|
63
|
+
should_not == Treat::Learning::Problem.new(@question, feature)
|
64
|
+
Treat::Learning::Problem.new(@question, @feature, @tag).
|
65
|
+
should_not == Treat::Learning::Problem.new(@question, @feature, tag)
|
66
|
+
end
|
64
67
|
end
|
65
68
|
end
|
66
|
-
end
|
67
69
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
70
|
+
describe "#export_tags(entity)" do
|
71
|
+
context "when called on a problem that has tags" do
|
72
|
+
context "and called with an entity of the proper type" do
|
73
|
+
it "returns an array of the tags" do
|
74
|
+
problem = Treat::Learning::Problem.new(@question, @feature, @tag)
|
75
|
+
problem.export_tags(@sentence).should eql [11]
|
76
|
+
end
|
74
77
|
end
|
75
78
|
end
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
79
|
+
context "when called on a problem that doesn't have tags" do
|
80
|
+
it "raises an error" do
|
81
|
+
problem = Treat::Learning::Problem.new(@question, @feature)
|
82
|
+
expect { problem.export_tags(@sentence) }.to raise_error
|
83
|
+
end
|
81
84
|
end
|
82
85
|
end
|
83
|
-
end
|
84
86
|
|
85
|
-
|
87
|
+
describe "#export_features(entity, include_answer = true)" do
|
86
88
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
89
|
+
context "when called with an entity of the proper type" do
|
90
|
+
context "and include_answer is set to true" do
|
91
|
+
context "and the answer is already set on the entity" do
|
92
|
+
it "returns an array of the exported features, with the answer" do
|
93
|
+
problem = Treat::Learning::Problem.new(@question, @feature)
|
94
|
+
@sentence.set :is_key_sentence, 1
|
95
|
+
problem.export_features(@sentence).should eql [7, 1]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
context "and the answer is not already set on the entity" do
|
99
|
+
it "returns an array of the exported features, with the question's default answer" do
|
100
|
+
problem = Treat::Learning::Problem.new(@question, @feature)
|
101
|
+
problem.export_features(@sentence).should eql [7, @question.default]
|
102
|
+
end
|
94
103
|
end
|
95
104
|
end
|
96
|
-
context "and
|
97
|
-
it "returns an array of the exported features,
|
105
|
+
context "and include_answer is set to false" do
|
106
|
+
it "returns an array of the exported features, without the answer" do
|
98
107
|
problem = Treat::Learning::Problem.new(@question, @feature)
|
99
|
-
problem.export_features(@sentence).should eql [7
|
108
|
+
problem.export_features(@sentence, false).should eql [7]
|
100
109
|
end
|
101
110
|
end
|
102
111
|
end
|
103
|
-
context "
|
104
|
-
it "
|
112
|
+
context "when supplied with an entity that is not of the proper type" do
|
113
|
+
it "raises an error" do
|
105
114
|
problem = Treat::Learning::Problem.new(@question, @feature)
|
106
|
-
|
115
|
+
word = Treat::Entities::Word.new('test')
|
116
|
+
expect { problem.export_features(word) }.to raise_error
|
107
117
|
end
|
108
118
|
end
|
109
119
|
end
|
110
|
-
context "when supplied with an entity that is not of the proper type" do
|
111
|
-
it "raises an error" do
|
112
|
-
problem = Treat::Learning::Problem.new(@question, @feature)
|
113
|
-
word = Treat::Entities::Word.new('test')
|
114
|
-
expect { problem.export_features(word) }.to raise_error
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
120
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
121
|
+
describe "#to_hash" do
|
122
|
+
context "when called on a problem" do
|
123
|
+
it "returns a hash describing the problem" do
|
124
|
+
Treat::Learning::Problem.new(@question, @feature, @tag).
|
125
|
+
to_hash.tap { |e| e['id'] = 0 }.should eql @hash
|
126
|
+
end
|
124
127
|
end
|
125
128
|
end
|
126
|
-
end
|
127
129
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
130
|
+
describe "#self.from_hash" do
|
131
|
+
context "when called with a hash describing a problem" do
|
132
|
+
it "returns a problem based on the hash" do
|
133
|
+
problem = Treat::Learning::Problem.from_hash(@hash)
|
134
|
+
problem.question.name.should eql :is_key_sentence
|
135
|
+
problem.question.target.should eql :sentence
|
136
|
+
problem.question.type.should eql :continuous
|
137
|
+
problem.question.default.should eql 0
|
138
|
+
problem.features[0].proc_string.should eql nil
|
139
|
+
problem.features[0].default.should eql 0
|
140
|
+
problem.features[0].name.should eql :word_count
|
141
|
+
problem.features[0].proc.should eql nil
|
142
|
+
end
|
140
143
|
end
|
141
144
|
end
|
145
|
+
|
142
146
|
end
|
143
147
|
|
144
|
-
end
|
148
|
+
end
|
data/spec/learning/question.rb
CHANGED
@@ -1,52 +1,56 @@
|
|
1
|
-
|
1
|
+
module Treat::Specs::Learning
|
2
2
|
|
3
|
-
describe
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
describe Treat::Learning::Question do
|
4
|
+
|
5
|
+
describe "#initialize" do
|
6
|
+
context "when supplied with acceptable parameters" do
|
7
|
+
it "should give access to the parameters" do
|
8
|
+
question = Treat::Learning::Question.new(
|
9
|
+
:is_keyword, :word, 0, :continuous)
|
10
|
+
question.name.should eql :is_keyword
|
11
|
+
question.target.should eql :word
|
12
|
+
question.type.should eql :continuous
|
13
|
+
question.default.should eql 0
|
14
|
+
end
|
12
15
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
16
|
+
context "when supplied with wrong parameters" do
|
17
|
+
it "should raise an exception" do
|
18
|
+
# Name should be a symbol
|
19
|
+
expect { Treat::Learning::Question.new(
|
20
|
+
nil, :sentence) }.to raise_error
|
21
|
+
# Target should be an actual entity type
|
22
|
+
expect { Treat::Learning::Question.new(
|
23
|
+
:name, :foo) }.to raise_error
|
24
|
+
# Distribution type should be continuous or discrete
|
25
|
+
expect { Treat::Learning::Question.new(
|
26
|
+
:name, :sentence, 0, :nonsense) }.to raise_error
|
27
|
+
end
|
25
28
|
end
|
26
29
|
end
|
27
|
-
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
describe "#==(question)" do
|
32
|
+
context "when supplied with an equal question" do
|
33
|
+
it "should return true" do
|
34
|
+
Treat::Learning::Question.new(
|
35
|
+
:is_keyword, :word).
|
36
|
+
should == Treat::Learning::Question.new(
|
37
|
+
:is_keyword, :word)
|
38
|
+
end
|
36
39
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
40
|
+
context "when supplied with a different question" do
|
41
|
+
it "should return false" do
|
42
|
+
Treat::Learning::Question.new(
|
43
|
+
:is_keyword, :word).
|
44
|
+
should_not == Treat::Learning::Question.new(
|
45
|
+
:is_keyword, :sentence)
|
46
|
+
Treat::Learning::Question.new(
|
47
|
+
:is_keyword, :word, 0, :continuous).
|
48
|
+
should_not == Treat::Learning::Question.new(
|
49
|
+
:is_keyword, :word, 0, :discrete)
|
50
|
+
end
|
48
51
|
end
|
49
52
|
end
|
53
|
+
|
50
54
|
end
|
51
55
|
|
52
|
-
end
|
56
|
+
end
|