treat 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,280 +0,0 @@
1
- module Treat::Specs::Workers
2
-
3
- class Language
4
-
5
- include Treat::Core::DSL
6
-
7
- @@list = []
8
-
9
- # Headings for the list of workers table.
10
- BenchmarkHeadings =
11
- ['Method', 'Worker', 'Description',
12
- 'Reference', 'User time', 'System time',
13
- 'Real time', 'Accuracy']
14
-
15
- # Add the language to the list,
16
- # and define an initialize method.
17
- def self.inherited(base)
18
- @@list << base
19
- base.class_eval do
20
- def initialize(mode)
21
- klass = self.class.const_get(:Scenarios)
22
- @scenarios, @mode = klass, mode
23
- @language = self.class.mn.downcase
24
- end
25
- end
26
- end
27
-
28
- # Return the list of registered languages.
29
- def self.list; @@list; end
30
-
31
- # Default options for #run.
32
- DefaultOptions = { save_html: true }
33
-
34
- # Runs the benchmarks or spec tasks.
35
- def run(options = {})
36
- options = DefaultOptions.merge(options)
37
- results = run_scenarios
38
- if @mode == 'benchmark'
39
- l = @language.capitalize
40
- print "\n\nBenchmark for #{l}\n"
41
- Treat::Specs::Helper.text_table(
42
- BenchmarkHeadings, results)
43
- if options[:save_html]
44
- Treat::Specs::Helper.html_table(
45
- BenchmarkHeadings, results)
46
- end
47
- end
48
- end
49
-
50
- # Run all scenarios for a language, for all of the
51
- # algorithm categories (e.g. Processors, Extractors).
52
- def run_scenarios
53
- categories = Treat.languages[
54
- @language].workers
55
- results = []
56
- method = "run_scenarios_as_#{@mode}s"
57
- categories.members.each do |cat|
58
- category = categories[cat]
59
- category.members.each do |grp|
60
- group = category[grp]
61
- group_class = Treat::Workers.
62
- const_get(cat.cc).
63
- const_get(grp.cc)
64
- #next unless group_class ==
65
- #Treat::Workers::Learners::Classifiers
66
- group.each do |worker|
67
- next if worker == :mongo # FIXME
68
- next if worker == :html # FIXME
69
- next if worker == :lda # FIXME
70
- results << send(method,
71
- worker, group_class)
72
- end
73
- end
74
- end
75
- results
76
- end
77
-
78
- # Run all benchmarks.
79
- def run_scenarios_as_benchmarks(worker, group)
80
- info = get_worker_info(worker, group)
81
- description, reference =
82
- info[:description], info[:reference]
83
- accuracy = 0
84
- time = ::Benchmark.measure do |x|
85
- accuracy = run_scenarios_for_all_workers(
86
- worker, group, 'benchmark')
87
- end
88
- # Return a row for the table.
89
- [ group.method.to_s, worker.to_s,
90
- description.strip,
91
- reference ? reference : '-',
92
- time.utime.round(4).to_s,
93
- time.stime.round(4).to_s,
94
- time.real.round(4).to_s,
95
- accuracy ]
96
- end
97
-
98
- # Run examples as specs on each
99
- # of the worker's target entities.
100
- def run_scenarios_as_specs(worker, group)
101
- run_scenarios_for_all_workers(worker, group, 'spec')
102
- end
103
-
104
- # Run a scenario (i.e. spec or benchmark
105
- # all workers available to perform a given
106
- # method call in a certain language).
107
- def run_scenarios_for_all_workers(worker, group, mode)
108
- accuracy = 0; i = 0; n = 0
109
- method = "run_worker_#{mode}s"
110
- group.targets.each do |target|
111
- next if target == :section ### FIXME
112
- i2, n2 = send(method, worker, group, target)
113
- i += i2; n += n2
114
- end
115
- # Return the accuracy of the worker.
116
- accuracy = (i.to_f/n.to_f*100).round(2)
117
- accuracy
118
- end
119
-
120
- # Run all examples available to test the worker
121
- # on a given target entity type as benchmarks.
122
- # Outputs [# successes, # tries].
123
- def run_worker_benchmarks(worker, group, target)
124
- scenario = find_scenario(group.method, target)
125
- return [0, 1] unless scenario
126
- scenario = @scenarios[group.method][target]
127
- if scenario[:examples].is_a?(Hash)
128
- i, n = run_scenario_presets(
129
- worker, group, target, scenario)
130
- else
131
- i, n = Treat::Specs::Workers::Language.
132
- run_examples(worker, group, target, scenario)
133
- end
134
- [i, n]
135
- end
136
-
137
-
138
- # Run all examples available to test the worker
139
- # on a given target entity type as RSpec tests.
140
- def run_worker_specs(worker, group, target)
141
- scenario = find_scenario(group.method, target)
142
- return [0, 1] unless scenario
143
- does = Treat::Specs::Workers::
144
- Descriptions[group.method]
145
- i = 0; n = 0;
146
- rspec_task = RSpec::Core::ExampleGroup.describe(group) do
147
- context "when it is called on a #{target}" do
148
- if scenario[:examples].is_a?(Hash) && group.preset_option
149
- preset_examples = scenario[:examples]
150
- preset_examples.each do |preset, examples|
151
- context "and #{group.preset_option} is set to #{preset}" do
152
- it does[preset] do
153
- options = {group.preset_option => preset}
154
- bm = scenario.dup; bm[:examples] = examples
155
- i2, n2 = *Treat::Specs::Workers::Language.
156
- run_examples(worker, group, target, bm, options)
157
- (i2.to_f/n2.to_f*100).round(2).should eql 100.0
158
- i += i2; n += n2
159
- end
160
- end
161
- end
162
- else
163
- it does do
164
- i, n = Treat::Specs::Workers::Language.
165
- run_examples(worker, group, target, scenario)
166
- (i.to_f/n.to_f*100).round(2).should eql 100.0
167
- end
168
- end
169
- # Check for accuracy.
170
- end
171
- end
172
- rspec_task.register
173
- [i, n]
174
- end
175
-
176
- def self.run_examples(worker, group, target, scenario, options = {})
177
- i = 0; n = 0
178
- examples, generator, preprocessor =
179
- scenario[:examples], scenario[:generator],
180
- scenario[:preprocessor]
181
- target_class = Treat::Entities.
182
- const_get(target.cc)
183
- if examples.is_a?(Hash)
184
- unless examples[worker]
185
- raise Treat::Exception,
186
- "No example defined for worker #{worker}."
187
- end
188
- examples = examples[worker]
189
- end
190
- examples.each do |example|
191
- value, expectation, options2 = *example
192
- entity = target_class.build(value)
193
- begin
194
- if preprocessor
195
- preprocessor.call(entity)
196
- end
197
- if options2.is_a?(::Proc)
198
- options2 = options2.call
199
- end
200
- options = options.merge(options2 || {})
201
- if generator
202
- result = entity.send(group.
203
- method, worker, options)
204
- operand = (group.type ==
205
- :computer ? result : entity)
206
- result = generator.call(operand)
207
- else
208
- result = entity.send(group.
209
- method, worker, options)
210
- end
211
- rescue Treat::Exception => e
212
- puts e.message
213
- next
214
- end
215
- puts result.inspect
216
- i += 1 if result == expectation
217
- n += 1
218
- end
219
- (i == 0 && n == 0) ? [1, 1] : [i, n]
220
- end
221
-
222
- # * Helpers * #
223
-
224
- # Given a method and a target,
225
- # find a scenario for the current
226
- # language class instance.
227
- def find_scenario(method, target)
228
- unless @scenarios[method]
229
- puts "Warning: there is no scenario for " +
230
- "method ##{method} called on " +
231
- "#{target.to_s.plural} in the " +
232
- "#{@language.capitalize} language."
233
- return nil
234
- end
235
- unless @scenarios[method]
236
- puts "Warning: there is a scenario for " +
237
- "method ##{method} in the " +
238
- "#{@language.capitalize} language, " +
239
- "but there are no examples for target " +
240
- "entity type '#{target.to_s.plural}'."
241
- return nil
242
- end
243
- @scenarios[method][target]
244
- end
245
-
246
- # Parse out the description and reference from
247
- # the Ruby file defining the worker/adapter.
248
- def get_worker_info(worker, group)
249
- bits = group.to_s.split('::')
250
- bits.collect! { |bit| bit.ucc }
251
- file = bits.join('/') + "/#{worker}.rb"
252
- contents = File.read(Treat.paths.lib + file)
253
- head = contents[0...contents.index('class')]
254
- parts = head.gsub("\n# ", "\n").gsub('#', '').
255
- gsub('encoding: utf-8', '').
256
- gsub(/Authors: (.*)/m, ''). # ouch
257
- gsub(/License: (.*)/m, '').
258
- gsub(/Website: (.*)/m, '').
259
- split('Original paper: ')
260
- {description: parts[0] || '',
261
- reference: parts[1] || '-'}
262
- end
263
-
264
- # Runs a benchmark for each preset.
265
- def run_scenario_presets(worker, group, target, scenario)
266
- i, n = 0, 0
267
- examples = scenario[:examples]
268
- examples.each do |preset, examples|
269
- options = {group.preset_option => preset}
270
- sc = scenario.dup; sc[:examples] = examples
271
- i2, n2 = Treat::Specs::Workers::Language.
272
- run_examples(worker, group, target, sc, options)
273
- i += i2; n += n2
274
- end
275
- [i, n]
276
- end
277
-
278
- end
279
-
280
- end
data/spec/workers.rb DELETED
@@ -1,28 +0,0 @@
1
- module Treat::Specs::Workers
2
- Descriptions = {
3
- stem: "returns the stem of the word",
4
- conjugate: {
5
- infinitive: "returns the infinitive form of a verb",
6
- present_participle: "returns the present participle form of a verb"
7
- },
8
- declense: {
9
- plural: "returns the plural form of the word",
10
- singular: "returns the singular form of the word"
11
- },
12
- ordinal: "returns the ordinal form of a number",
13
- sense: {
14
- synonyms: "returns the synonyms of the word",
15
- antonyms: "returns the antonyms of the word",
16
- hypernyms: "returns the hypernyms of the word",
17
- hyponyms:"returns the hyponyms of the word"
18
- },
19
- tag: "returns the tag of the token",
20
- category: "returns the category of the number, punctuation or symbol",
21
- name_tag: "tags the named entity words in the group of words",
22
- time: "annotates all entities within the group with time information",
23
- tokenize: "splits the group of words into tokens and adds them as children of the group",
24
- parse: "parses a group of words into its syntax tree, adding nested phrases and tokens as children of the group",
25
- topics: "returns a list of general topics the document belongs to",
26
- segment: "splits a zone into phrases/sentences and adds them as children of the zone"
27
- }
28
- end