treat 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/lib/treat/config/data/core.rb +3 -1
- data/lib/treat/config/data/languages/agnostic.rb +1 -1
- data/lib/treat/core/dsl.rb +12 -44
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/name_tag/stanford.rb +1 -1
- data/lib/treat/workers/extractors/topic_words/lda.rb +1 -1
- data/lib/treat/workers/formatters/readers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/readers/html.rb +4 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +1 -1
- data/lib/treat/workers/groupable.rb +1 -3
- data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +3 -2
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +12 -2
- data/lib/treat/workers/lexicalizers/taggers/brill.rb +2 -1
- data/lib/treat/workers/lexicalizers/taggers/lingua.rb +3 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -5
- data/spec/entities/collection.rb +2 -2
- data/spec/entities/entity.rb +4 -4
- data/spec/helper.rb +16 -68
- data/spec/{core → learning}/data_set.rb +0 -0
- data/spec/{core → learning}/export.rb +0 -0
- data/spec/{core → learning}/problem.rb +0 -0
- data/spec/{core → learning}/question.rb +0 -0
- data/spec/sandbox.rb +14 -3
- data/spec/workers/agnostic.rb +80 -30
- data/spec/workers/english.rb +475 -190
- metadata +6 -11
- data/files/21552208.html +0 -792
- data/files/nethttp-cheat-sheet-2940.html +0 -392
- data/lib/treat/config/data/config.rb +0 -50
- data/spec/workers/language.rb +0 -280
- data/spec/workers.rb +0 -28
data/spec/workers/language.rb
DELETED
@@ -1,280 +0,0 @@
|
|
1
|
-
module Treat::Specs::Workers
|
2
|
-
|
3
|
-
class Language
|
4
|
-
|
5
|
-
include Treat::Core::DSL
|
6
|
-
|
7
|
-
@@list = []
|
8
|
-
|
9
|
-
# Headings for the list of workers table.
|
10
|
-
BenchmarkHeadings =
|
11
|
-
['Method', 'Worker', 'Description',
|
12
|
-
'Reference', 'User time', 'System time',
|
13
|
-
'Real time', 'Accuracy']
|
14
|
-
|
15
|
-
# Add the language to the list,
|
16
|
-
# and define an initialize method.
|
17
|
-
def self.inherited(base)
|
18
|
-
@@list << base
|
19
|
-
base.class_eval do
|
20
|
-
def initialize(mode)
|
21
|
-
klass = self.class.const_get(:Scenarios)
|
22
|
-
@scenarios, @mode = klass, mode
|
23
|
-
@language = self.class.mn.downcase
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# Return the list of registered languages.
|
29
|
-
def self.list; @@list; end
|
30
|
-
|
31
|
-
# Default options for #run.
|
32
|
-
DefaultOptions = { save_html: true }
|
33
|
-
|
34
|
-
# Runs the benchmarks or spec tasks.
|
35
|
-
def run(options = {})
|
36
|
-
options = DefaultOptions.merge(options)
|
37
|
-
results = run_scenarios
|
38
|
-
if @mode == 'benchmark'
|
39
|
-
l = @language.capitalize
|
40
|
-
print "\n\nBenchmark for #{l}\n"
|
41
|
-
Treat::Specs::Helper.text_table(
|
42
|
-
BenchmarkHeadings, results)
|
43
|
-
if options[:save_html]
|
44
|
-
Treat::Specs::Helper.html_table(
|
45
|
-
BenchmarkHeadings, results)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# Run all scenarios for a language, for all of the
|
51
|
-
# algorithm categories (e.g. Processors, Extractors).
|
52
|
-
def run_scenarios
|
53
|
-
categories = Treat.languages[
|
54
|
-
@language].workers
|
55
|
-
results = []
|
56
|
-
method = "run_scenarios_as_#{@mode}s"
|
57
|
-
categories.members.each do |cat|
|
58
|
-
category = categories[cat]
|
59
|
-
category.members.each do |grp|
|
60
|
-
group = category[grp]
|
61
|
-
group_class = Treat::Workers.
|
62
|
-
const_get(cat.cc).
|
63
|
-
const_get(grp.cc)
|
64
|
-
#next unless group_class ==
|
65
|
-
#Treat::Workers::Learners::Classifiers
|
66
|
-
group.each do |worker|
|
67
|
-
next if worker == :mongo # FIXME
|
68
|
-
next if worker == :html # FIXME
|
69
|
-
next if worker == :lda # FIXME
|
70
|
-
results << send(method,
|
71
|
-
worker, group_class)
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
results
|
76
|
-
end
|
77
|
-
|
78
|
-
# Run all benchmarks.
|
79
|
-
def run_scenarios_as_benchmarks(worker, group)
|
80
|
-
info = get_worker_info(worker, group)
|
81
|
-
description, reference =
|
82
|
-
info[:description], info[:reference]
|
83
|
-
accuracy = 0
|
84
|
-
time = ::Benchmark.measure do |x|
|
85
|
-
accuracy = run_scenarios_for_all_workers(
|
86
|
-
worker, group, 'benchmark')
|
87
|
-
end
|
88
|
-
# Return a row for the table.
|
89
|
-
[ group.method.to_s, worker.to_s,
|
90
|
-
description.strip,
|
91
|
-
reference ? reference : '-',
|
92
|
-
time.utime.round(4).to_s,
|
93
|
-
time.stime.round(4).to_s,
|
94
|
-
time.real.round(4).to_s,
|
95
|
-
accuracy ]
|
96
|
-
end
|
97
|
-
|
98
|
-
# Run examples as specs on each
|
99
|
-
# of the worker's target entities.
|
100
|
-
def run_scenarios_as_specs(worker, group)
|
101
|
-
run_scenarios_for_all_workers(worker, group, 'spec')
|
102
|
-
end
|
103
|
-
|
104
|
-
# Run a scenario (i.e. spec or benchmark
|
105
|
-
# all workers available to perform a given
|
106
|
-
# method call in a certain language).
|
107
|
-
def run_scenarios_for_all_workers(worker, group, mode)
|
108
|
-
accuracy = 0; i = 0; n = 0
|
109
|
-
method = "run_worker_#{mode}s"
|
110
|
-
group.targets.each do |target|
|
111
|
-
next if target == :section ### FIXME
|
112
|
-
i2, n2 = send(method, worker, group, target)
|
113
|
-
i += i2; n += n2
|
114
|
-
end
|
115
|
-
# Return the accuracy of the worker.
|
116
|
-
accuracy = (i.to_f/n.to_f*100).round(2)
|
117
|
-
accuracy
|
118
|
-
end
|
119
|
-
|
120
|
-
# Run all examples available to test the worker
|
121
|
-
# on a given target entity type as benchmarks.
|
122
|
-
# Outputs [# successes, # tries].
|
123
|
-
def run_worker_benchmarks(worker, group, target)
|
124
|
-
scenario = find_scenario(group.method, target)
|
125
|
-
return [0, 1] unless scenario
|
126
|
-
scenario = @scenarios[group.method][target]
|
127
|
-
if scenario[:examples].is_a?(Hash)
|
128
|
-
i, n = run_scenario_presets(
|
129
|
-
worker, group, target, scenario)
|
130
|
-
else
|
131
|
-
i, n = Treat::Specs::Workers::Language.
|
132
|
-
run_examples(worker, group, target, scenario)
|
133
|
-
end
|
134
|
-
[i, n]
|
135
|
-
end
|
136
|
-
|
137
|
-
|
138
|
-
# Run all examples available to test the worker
|
139
|
-
# on a given target entity type as RSpec tests.
|
140
|
-
def run_worker_specs(worker, group, target)
|
141
|
-
scenario = find_scenario(group.method, target)
|
142
|
-
return [0, 1] unless scenario
|
143
|
-
does = Treat::Specs::Workers::
|
144
|
-
Descriptions[group.method]
|
145
|
-
i = 0; n = 0;
|
146
|
-
rspec_task = RSpec::Core::ExampleGroup.describe(group) do
|
147
|
-
context "when it is called on a #{target}" do
|
148
|
-
if scenario[:examples].is_a?(Hash) && group.preset_option
|
149
|
-
preset_examples = scenario[:examples]
|
150
|
-
preset_examples.each do |preset, examples|
|
151
|
-
context "and #{group.preset_option} is set to #{preset}" do
|
152
|
-
it does[preset] do
|
153
|
-
options = {group.preset_option => preset}
|
154
|
-
bm = scenario.dup; bm[:examples] = examples
|
155
|
-
i2, n2 = *Treat::Specs::Workers::Language.
|
156
|
-
run_examples(worker, group, target, bm, options)
|
157
|
-
(i2.to_f/n2.to_f*100).round(2).should eql 100.0
|
158
|
-
i += i2; n += n2
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
else
|
163
|
-
it does do
|
164
|
-
i, n = Treat::Specs::Workers::Language.
|
165
|
-
run_examples(worker, group, target, scenario)
|
166
|
-
(i.to_f/n.to_f*100).round(2).should eql 100.0
|
167
|
-
end
|
168
|
-
end
|
169
|
-
# Check for accuracy.
|
170
|
-
end
|
171
|
-
end
|
172
|
-
rspec_task.register
|
173
|
-
[i, n]
|
174
|
-
end
|
175
|
-
|
176
|
-
def self.run_examples(worker, group, target, scenario, options = {})
|
177
|
-
i = 0; n = 0
|
178
|
-
examples, generator, preprocessor =
|
179
|
-
scenario[:examples], scenario[:generator],
|
180
|
-
scenario[:preprocessor]
|
181
|
-
target_class = Treat::Entities.
|
182
|
-
const_get(target.cc)
|
183
|
-
if examples.is_a?(Hash)
|
184
|
-
unless examples[worker]
|
185
|
-
raise Treat::Exception,
|
186
|
-
"No example defined for worker #{worker}."
|
187
|
-
end
|
188
|
-
examples = examples[worker]
|
189
|
-
end
|
190
|
-
examples.each do |example|
|
191
|
-
value, expectation, options2 = *example
|
192
|
-
entity = target_class.build(value)
|
193
|
-
begin
|
194
|
-
if preprocessor
|
195
|
-
preprocessor.call(entity)
|
196
|
-
end
|
197
|
-
if options2.is_a?(::Proc)
|
198
|
-
options2 = options2.call
|
199
|
-
end
|
200
|
-
options = options.merge(options2 || {})
|
201
|
-
if generator
|
202
|
-
result = entity.send(group.
|
203
|
-
method, worker, options)
|
204
|
-
operand = (group.type ==
|
205
|
-
:computer ? result : entity)
|
206
|
-
result = generator.call(operand)
|
207
|
-
else
|
208
|
-
result = entity.send(group.
|
209
|
-
method, worker, options)
|
210
|
-
end
|
211
|
-
rescue Treat::Exception => e
|
212
|
-
puts e.message
|
213
|
-
next
|
214
|
-
end
|
215
|
-
puts result.inspect
|
216
|
-
i += 1 if result == expectation
|
217
|
-
n += 1
|
218
|
-
end
|
219
|
-
(i == 0 && n == 0) ? [1, 1] : [i, n]
|
220
|
-
end
|
221
|
-
|
222
|
-
# * Helpers * #
|
223
|
-
|
224
|
-
# Given a method and a target,
|
225
|
-
# find a scenario for the current
|
226
|
-
# language class instance.
|
227
|
-
def find_scenario(method, target)
|
228
|
-
unless @scenarios[method]
|
229
|
-
puts "Warning: there is no scenario for " +
|
230
|
-
"method ##{method} called on " +
|
231
|
-
"#{target.to_s.plural} in the " +
|
232
|
-
"#{@language.capitalize} language."
|
233
|
-
return nil
|
234
|
-
end
|
235
|
-
unless @scenarios[method]
|
236
|
-
puts "Warning: there is a scenario for " +
|
237
|
-
"method ##{method} in the " +
|
238
|
-
"#{@language.capitalize} language, " +
|
239
|
-
"but there are no examples for target " +
|
240
|
-
"entity type '#{target.to_s.plural}'."
|
241
|
-
return nil
|
242
|
-
end
|
243
|
-
@scenarios[method][target]
|
244
|
-
end
|
245
|
-
|
246
|
-
# Parse out the description and reference from
|
247
|
-
# the Ruby file defining the worker/adapter.
|
248
|
-
def get_worker_info(worker, group)
|
249
|
-
bits = group.to_s.split('::')
|
250
|
-
bits.collect! { |bit| bit.ucc }
|
251
|
-
file = bits.join('/') + "/#{worker}.rb"
|
252
|
-
contents = File.read(Treat.paths.lib + file)
|
253
|
-
head = contents[0...contents.index('class')]
|
254
|
-
parts = head.gsub("\n# ", "\n").gsub('#', '').
|
255
|
-
gsub('encoding: utf-8', '').
|
256
|
-
gsub(/Authors: (.*)/m, ''). # ouch
|
257
|
-
gsub(/License: (.*)/m, '').
|
258
|
-
gsub(/Website: (.*)/m, '').
|
259
|
-
split('Original paper: ')
|
260
|
-
{description: parts[0] || '',
|
261
|
-
reference: parts[1] || '-'}
|
262
|
-
end
|
263
|
-
|
264
|
-
# Runs a benchmark for each preset.
|
265
|
-
def run_scenario_presets(worker, group, target, scenario)
|
266
|
-
i, n = 0, 0
|
267
|
-
examples = scenario[:examples]
|
268
|
-
examples.each do |preset, examples|
|
269
|
-
options = {group.preset_option => preset}
|
270
|
-
sc = scenario.dup; sc[:examples] = examples
|
271
|
-
i2, n2 = Treat::Specs::Workers::Language.
|
272
|
-
run_examples(worker, group, target, sc, options)
|
273
|
-
i += i2; n += n2
|
274
|
-
end
|
275
|
-
[i, n]
|
276
|
-
end
|
277
|
-
|
278
|
-
end
|
279
|
-
|
280
|
-
end
|
data/spec/workers.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
module Treat::Specs::Workers
|
2
|
-
Descriptions = {
|
3
|
-
stem: "returns the stem of the word",
|
4
|
-
conjugate: {
|
5
|
-
infinitive: "returns the infinitive form of a verb",
|
6
|
-
present_participle: "returns the present participle form of a verb"
|
7
|
-
},
|
8
|
-
declense: {
|
9
|
-
plural: "returns the plural form of the word",
|
10
|
-
singular: "returns the singular form of the word"
|
11
|
-
},
|
12
|
-
ordinal: "returns the ordinal form of a number",
|
13
|
-
sense: {
|
14
|
-
synonyms: "returns the synonyms of the word",
|
15
|
-
antonyms: "returns the antonyms of the word",
|
16
|
-
hypernyms: "returns the hypernyms of the word",
|
17
|
-
hyponyms:"returns the hyponyms of the word"
|
18
|
-
},
|
19
|
-
tag: "returns the tag of the token",
|
20
|
-
category: "returns the category of the number, punctuation or symbol",
|
21
|
-
name_tag: "tags the named entity words in the group of words",
|
22
|
-
time: "annotates all entities within the group with time information",
|
23
|
-
tokenize: "splits the group of words into tokens and adds them as children of the group",
|
24
|
-
parse: "parses a group of words into its syntax tree, adding nested phrases and tokens as children of the group",
|
25
|
-
topics: "returns a list of general topics the document belongs to",
|
26
|
-
segment: "splits a zone into phrases/sentences and adds them as children of the zone"
|
27
|
-
}
|
28
|
-
end
|