treat 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/lib/treat/config/data/core.rb +3 -1
- data/lib/treat/config/data/languages/agnostic.rb +1 -1
- data/lib/treat/core/dsl.rb +12 -44
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/name_tag/stanford.rb +1 -1
- data/lib/treat/workers/extractors/topic_words/lda.rb +1 -1
- data/lib/treat/workers/formatters/readers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/readers/html.rb +4 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +1 -1
- data/lib/treat/workers/groupable.rb +1 -3
- data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +3 -2
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +12 -2
- data/lib/treat/workers/lexicalizers/taggers/brill.rb +2 -1
- data/lib/treat/workers/lexicalizers/taggers/lingua.rb +3 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -5
- data/spec/entities/collection.rb +2 -2
- data/spec/entities/entity.rb +4 -4
- data/spec/helper.rb +16 -68
- data/spec/{core → learning}/data_set.rb +0 -0
- data/spec/{core → learning}/export.rb +0 -0
- data/spec/{core → learning}/problem.rb +0 -0
- data/spec/{core → learning}/question.rb +0 -0
- data/spec/sandbox.rb +14 -3
- data/spec/workers/agnostic.rb +80 -30
- data/spec/workers/english.rb +475 -190
- metadata +6 -11
- data/files/21552208.html +0 -792
- data/files/nethttp-cheat-sheet-2940.html +0 -392
- data/lib/treat/config/data/config.rb +0 -50
- data/spec/workers/language.rb +0 -280
- data/spec/workers.rb +0 -28
data/spec/workers/language.rb
DELETED
@@ -1,280 +0,0 @@
|
|
1
|
-
module Treat::Specs::Workers
|
2
|
-
|
3
|
-
class Language
|
4
|
-
|
5
|
-
include Treat::Core::DSL
|
6
|
-
|
7
|
-
@@list = []
|
8
|
-
|
9
|
-
# Headings for the list of workers table.
|
10
|
-
BenchmarkHeadings =
|
11
|
-
['Method', 'Worker', 'Description',
|
12
|
-
'Reference', 'User time', 'System time',
|
13
|
-
'Real time', 'Accuracy']
|
14
|
-
|
15
|
-
# Add the language to the list,
|
16
|
-
# and define an initialize method.
|
17
|
-
def self.inherited(base)
|
18
|
-
@@list << base
|
19
|
-
base.class_eval do
|
20
|
-
def initialize(mode)
|
21
|
-
klass = self.class.const_get(:Scenarios)
|
22
|
-
@scenarios, @mode = klass, mode
|
23
|
-
@language = self.class.mn.downcase
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# Return the list of registered languages.
|
29
|
-
def self.list; @@list; end
|
30
|
-
|
31
|
-
# Default options for #run.
|
32
|
-
DefaultOptions = { save_html: true }
|
33
|
-
|
34
|
-
# Runs the benchmarks or spec tasks.
|
35
|
-
def run(options = {})
|
36
|
-
options = DefaultOptions.merge(options)
|
37
|
-
results = run_scenarios
|
38
|
-
if @mode == 'benchmark'
|
39
|
-
l = @language.capitalize
|
40
|
-
print "\n\nBenchmark for #{l}\n"
|
41
|
-
Treat::Specs::Helper.text_table(
|
42
|
-
BenchmarkHeadings, results)
|
43
|
-
if options[:save_html]
|
44
|
-
Treat::Specs::Helper.html_table(
|
45
|
-
BenchmarkHeadings, results)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# Run all scenarios for a language, for all of the
|
51
|
-
# algorithm categories (e.g. Processors, Extractors).
|
52
|
-
def run_scenarios
|
53
|
-
categories = Treat.languages[
|
54
|
-
@language].workers
|
55
|
-
results = []
|
56
|
-
method = "run_scenarios_as_#{@mode}s"
|
57
|
-
categories.members.each do |cat|
|
58
|
-
category = categories[cat]
|
59
|
-
category.members.each do |grp|
|
60
|
-
group = category[grp]
|
61
|
-
group_class = Treat::Workers.
|
62
|
-
const_get(cat.cc).
|
63
|
-
const_get(grp.cc)
|
64
|
-
#next unless group_class ==
|
65
|
-
#Treat::Workers::Learners::Classifiers
|
66
|
-
group.each do |worker|
|
67
|
-
next if worker == :mongo # FIXME
|
68
|
-
next if worker == :html # FIXME
|
69
|
-
next if worker == :lda # FIXME
|
70
|
-
results << send(method,
|
71
|
-
worker, group_class)
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
results
|
76
|
-
end
|
77
|
-
|
78
|
-
# Run all benchmarks.
|
79
|
-
def run_scenarios_as_benchmarks(worker, group)
|
80
|
-
info = get_worker_info(worker, group)
|
81
|
-
description, reference =
|
82
|
-
info[:description], info[:reference]
|
83
|
-
accuracy = 0
|
84
|
-
time = ::Benchmark.measure do |x|
|
85
|
-
accuracy = run_scenarios_for_all_workers(
|
86
|
-
worker, group, 'benchmark')
|
87
|
-
end
|
88
|
-
# Return a row for the table.
|
89
|
-
[ group.method.to_s, worker.to_s,
|
90
|
-
description.strip,
|
91
|
-
reference ? reference : '-',
|
92
|
-
time.utime.round(4).to_s,
|
93
|
-
time.stime.round(4).to_s,
|
94
|
-
time.real.round(4).to_s,
|
95
|
-
accuracy ]
|
96
|
-
end
|
97
|
-
|
98
|
-
# Run examples as specs on each
|
99
|
-
# of the worker's target entities.
|
100
|
-
def run_scenarios_as_specs(worker, group)
|
101
|
-
run_scenarios_for_all_workers(worker, group, 'spec')
|
102
|
-
end
|
103
|
-
|
104
|
-
# Run a scenario (i.e. spec or benchmark
|
105
|
-
# all workers available to perform a given
|
106
|
-
# method call in a certain language).
|
107
|
-
def run_scenarios_for_all_workers(worker, group, mode)
|
108
|
-
accuracy = 0; i = 0; n = 0
|
109
|
-
method = "run_worker_#{mode}s"
|
110
|
-
group.targets.each do |target|
|
111
|
-
next if target == :section ### FIXME
|
112
|
-
i2, n2 = send(method, worker, group, target)
|
113
|
-
i += i2; n += n2
|
114
|
-
end
|
115
|
-
# Return the accuracy of the worker.
|
116
|
-
accuracy = (i.to_f/n.to_f*100).round(2)
|
117
|
-
accuracy
|
118
|
-
end
|
119
|
-
|
120
|
-
# Run all examples available to test the worker
|
121
|
-
# on a given target entity type as benchmarks.
|
122
|
-
# Outputs [# successes, # tries].
|
123
|
-
def run_worker_benchmarks(worker, group, target)
|
124
|
-
scenario = find_scenario(group.method, target)
|
125
|
-
return [0, 1] unless scenario
|
126
|
-
scenario = @scenarios[group.method][target]
|
127
|
-
if scenario[:examples].is_a?(Hash)
|
128
|
-
i, n = run_scenario_presets(
|
129
|
-
worker, group, target, scenario)
|
130
|
-
else
|
131
|
-
i, n = Treat::Specs::Workers::Language.
|
132
|
-
run_examples(worker, group, target, scenario)
|
133
|
-
end
|
134
|
-
[i, n]
|
135
|
-
end
|
136
|
-
|
137
|
-
|
138
|
-
# Run all examples available to test the worker
|
139
|
-
# on a given target entity type as RSpec tests.
|
140
|
-
def run_worker_specs(worker, group, target)
|
141
|
-
scenario = find_scenario(group.method, target)
|
142
|
-
return [0, 1] unless scenario
|
143
|
-
does = Treat::Specs::Workers::
|
144
|
-
Descriptions[group.method]
|
145
|
-
i = 0; n = 0;
|
146
|
-
rspec_task = RSpec::Core::ExampleGroup.describe(group) do
|
147
|
-
context "when it is called on a #{target}" do
|
148
|
-
if scenario[:examples].is_a?(Hash) && group.preset_option
|
149
|
-
preset_examples = scenario[:examples]
|
150
|
-
preset_examples.each do |preset, examples|
|
151
|
-
context "and #{group.preset_option} is set to #{preset}" do
|
152
|
-
it does[preset] do
|
153
|
-
options = {group.preset_option => preset}
|
154
|
-
bm = scenario.dup; bm[:examples] = examples
|
155
|
-
i2, n2 = *Treat::Specs::Workers::Language.
|
156
|
-
run_examples(worker, group, target, bm, options)
|
157
|
-
(i2.to_f/n2.to_f*100).round(2).should eql 100.0
|
158
|
-
i += i2; n += n2
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
else
|
163
|
-
it does do
|
164
|
-
i, n = Treat::Specs::Workers::Language.
|
165
|
-
run_examples(worker, group, target, scenario)
|
166
|
-
(i.to_f/n.to_f*100).round(2).should eql 100.0
|
167
|
-
end
|
168
|
-
end
|
169
|
-
# Check for accuracy.
|
170
|
-
end
|
171
|
-
end
|
172
|
-
rspec_task.register
|
173
|
-
[i, n]
|
174
|
-
end
|
175
|
-
|
176
|
-
def self.run_examples(worker, group, target, scenario, options = {})
|
177
|
-
i = 0; n = 0
|
178
|
-
examples, generator, preprocessor =
|
179
|
-
scenario[:examples], scenario[:generator],
|
180
|
-
scenario[:preprocessor]
|
181
|
-
target_class = Treat::Entities.
|
182
|
-
const_get(target.cc)
|
183
|
-
if examples.is_a?(Hash)
|
184
|
-
unless examples[worker]
|
185
|
-
raise Treat::Exception,
|
186
|
-
"No example defined for worker #{worker}."
|
187
|
-
end
|
188
|
-
examples = examples[worker]
|
189
|
-
end
|
190
|
-
examples.each do |example|
|
191
|
-
value, expectation, options2 = *example
|
192
|
-
entity = target_class.build(value)
|
193
|
-
begin
|
194
|
-
if preprocessor
|
195
|
-
preprocessor.call(entity)
|
196
|
-
end
|
197
|
-
if options2.is_a?(::Proc)
|
198
|
-
options2 = options2.call
|
199
|
-
end
|
200
|
-
options = options.merge(options2 || {})
|
201
|
-
if generator
|
202
|
-
result = entity.send(group.
|
203
|
-
method, worker, options)
|
204
|
-
operand = (group.type ==
|
205
|
-
:computer ? result : entity)
|
206
|
-
result = generator.call(operand)
|
207
|
-
else
|
208
|
-
result = entity.send(group.
|
209
|
-
method, worker, options)
|
210
|
-
end
|
211
|
-
rescue Treat::Exception => e
|
212
|
-
puts e.message
|
213
|
-
next
|
214
|
-
end
|
215
|
-
puts result.inspect
|
216
|
-
i += 1 if result == expectation
|
217
|
-
n += 1
|
218
|
-
end
|
219
|
-
(i == 0 && n == 0) ? [1, 1] : [i, n]
|
220
|
-
end
|
221
|
-
|
222
|
-
# * Helpers * #
|
223
|
-
|
224
|
-
# Given a method and a target,
|
225
|
-
# find a scenario for the current
|
226
|
-
# language class instance.
|
227
|
-
def find_scenario(method, target)
|
228
|
-
unless @scenarios[method]
|
229
|
-
puts "Warning: there is no scenario for " +
|
230
|
-
"method ##{method} called on " +
|
231
|
-
"#{target.to_s.plural} in the " +
|
232
|
-
"#{@language.capitalize} language."
|
233
|
-
return nil
|
234
|
-
end
|
235
|
-
unless @scenarios[method]
|
236
|
-
puts "Warning: there is a scenario for " +
|
237
|
-
"method ##{method} in the " +
|
238
|
-
"#{@language.capitalize} language, " +
|
239
|
-
"but there are no examples for target " +
|
240
|
-
"entity type '#{target.to_s.plural}'."
|
241
|
-
return nil
|
242
|
-
end
|
243
|
-
@scenarios[method][target]
|
244
|
-
end
|
245
|
-
|
246
|
-
# Parse out the description and reference from
|
247
|
-
# the Ruby file defining the worker/adapter.
|
248
|
-
def get_worker_info(worker, group)
|
249
|
-
bits = group.to_s.split('::')
|
250
|
-
bits.collect! { |bit| bit.ucc }
|
251
|
-
file = bits.join('/') + "/#{worker}.rb"
|
252
|
-
contents = File.read(Treat.paths.lib + file)
|
253
|
-
head = contents[0...contents.index('class')]
|
254
|
-
parts = head.gsub("\n# ", "\n").gsub('#', '').
|
255
|
-
gsub('encoding: utf-8', '').
|
256
|
-
gsub(/Authors: (.*)/m, ''). # ouch
|
257
|
-
gsub(/License: (.*)/m, '').
|
258
|
-
gsub(/Website: (.*)/m, '').
|
259
|
-
split('Original paper: ')
|
260
|
-
{description: parts[0] || '',
|
261
|
-
reference: parts[1] || '-'}
|
262
|
-
end
|
263
|
-
|
264
|
-
# Runs a benchmark for each preset.
|
265
|
-
def run_scenario_presets(worker, group, target, scenario)
|
266
|
-
i, n = 0, 0
|
267
|
-
examples = scenario[:examples]
|
268
|
-
examples.each do |preset, examples|
|
269
|
-
options = {group.preset_option => preset}
|
270
|
-
sc = scenario.dup; sc[:examples] = examples
|
271
|
-
i2, n2 = Treat::Specs::Workers::Language.
|
272
|
-
run_examples(worker, group, target, sc, options)
|
273
|
-
i += i2; n += n2
|
274
|
-
end
|
275
|
-
[i, n]
|
276
|
-
end
|
277
|
-
|
278
|
-
end
|
279
|
-
|
280
|
-
end
|
data/spec/workers.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
module Treat::Specs::Workers
|
2
|
-
Descriptions = {
|
3
|
-
stem: "returns the stem of the word",
|
4
|
-
conjugate: {
|
5
|
-
infinitive: "returns the infinitive form of a verb",
|
6
|
-
present_participle: "returns the present participle form of a verb"
|
7
|
-
},
|
8
|
-
declense: {
|
9
|
-
plural: "returns the plural form of the word",
|
10
|
-
singular: "returns the singular form of the word"
|
11
|
-
},
|
12
|
-
ordinal: "returns the ordinal form of a number",
|
13
|
-
sense: {
|
14
|
-
synonyms: "returns the synonyms of the word",
|
15
|
-
antonyms: "returns the antonyms of the word",
|
16
|
-
hypernyms: "returns the hypernyms of the word",
|
17
|
-
hyponyms:"returns the hyponyms of the word"
|
18
|
-
},
|
19
|
-
tag: "returns the tag of the token",
|
20
|
-
category: "returns the category of the number, punctuation or symbol",
|
21
|
-
name_tag: "tags the named entity words in the group of words",
|
22
|
-
time: "annotates all entities within the group with time information",
|
23
|
-
tokenize: "splits the group of words into tokens and adds them as children of the group",
|
24
|
-
parse: "parses a group of words into its syntax tree, adding nested phrases and tokens as children of the group",
|
25
|
-
topics: "returns a list of general topics the document belongs to",
|
26
|
-
segment: "splits a zone into phrases/sentences and adds them as children of the zone"
|
27
|
-
}
|
28
|
-
end
|