answersengine 0.10.1 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CODE_OF_CONDUCT.md +1 -1
- data/LICENSE.txt +1 -1
- data/README.md +3 -4
- data/answersengine.gemspec +6 -12
- data/exe/answersengine +3 -2
- data/lib/answersengine.rb +20 -3
- metadata +14 -152
- data/examples/fetchtest/libraries/hello.rb +0 -9
- data/examples/fetchtest/libraries/hello_fail.rb +0 -10
- data/examples/fetchtest/parsers/failed.rb +0 -2
- data/examples/fetchtest/parsers/find_outputs.rb +0 -18
- data/examples/fetchtest/parsers/home.rb +0 -50
- data/examples/fetchtest/parsers/nested_fail.rb +0 -3
- data/examples/fetchtest/parsers/simple.rb +0 -14
- data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
- data/examples/fetchtest/seeders/failed.rb +0 -1
- data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
- data/examples/fetchtest/seeders/seed.rb +0 -28
- data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
- data/lib/answersengine/cli.rb +0 -45
- data/lib/answersengine/cli/env_var.rb +0 -48
- data/lib/answersengine/cli/finisher.rb +0 -40
- data/lib/answersengine/cli/global_page.rb +0 -39
- data/lib/answersengine/cli/job.rb +0 -30
- data/lib/answersengine/cli/job_output.rb +0 -69
- data/lib/answersengine/cli/parser.rb +0 -64
- data/lib/answersengine/cli/scraper.rb +0 -185
- data/lib/answersengine/cli/scraper_deployment.rb +0 -24
- data/lib/answersengine/cli/scraper_export.rb +0 -51
- data/lib/answersengine/cli/scraper_exporter.rb +0 -40
- data/lib/answersengine/cli/scraper_finisher.rb +0 -20
- data/lib/answersengine/cli/scraper_job.rb +0 -75
- data/lib/answersengine/cli/scraper_job_var.rb +0 -48
- data/lib/answersengine/cli/scraper_page.rb +0 -203
- data/lib/answersengine/cli/scraper_var.rb +0 -48
- data/lib/answersengine/cli/seeder.rb +0 -40
- data/lib/answersengine/client.rb +0 -29
- data/lib/answersengine/client/auth_token.rb +0 -50
- data/lib/answersengine/client/backblaze_content.rb +0 -45
- data/lib/answersengine/client/base.rb +0 -55
- data/lib/answersengine/client/deploy_key.rb +0 -21
- data/lib/answersengine/client/env_var.rb +0 -28
- data/lib/answersengine/client/export.rb +0 -10
- data/lib/answersengine/client/global_page.rb +0 -18
- data/lib/answersengine/client/job.rb +0 -64
- data/lib/answersengine/client/job_export.rb +0 -10
- data/lib/answersengine/client/job_log.rb +0 -26
- data/lib/answersengine/client/job_output.rb +0 -19
- data/lib/answersengine/client/job_page.rb +0 -58
- data/lib/answersengine/client/job_stat.rb +0 -16
- data/lib/answersengine/client/scraper.rb +0 -57
- data/lib/answersengine/client/scraper_deployment.rb +0 -18
- data/lib/answersengine/client/scraper_export.rb +0 -22
- data/lib/answersengine/client/scraper_exporter.rb +0 -14
- data/lib/answersengine/client/scraper_finisher.rb +0 -16
- data/lib/answersengine/client/scraper_job.rb +0 -49
- data/lib/answersengine/client/scraper_job_output.rb +0 -19
- data/lib/answersengine/client/scraper_job_page.rb +0 -67
- data/lib/answersengine/client/scraper_job_var.rb +0 -28
- data/lib/answersengine/client/scraper_var.rb +0 -28
- data/lib/answersengine/plugin.rb +0 -6
- data/lib/answersengine/plugin/context_exposer.rb +0 -55
- data/lib/answersengine/scraper.rb +0 -18
- data/lib/answersengine/scraper/executor.rb +0 -373
- data/lib/answersengine/scraper/finisher.rb +0 -18
- data/lib/answersengine/scraper/parser.rb +0 -18
- data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
- data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
- data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
- data/lib/answersengine/scraper/seeder.rb +0 -18
- data/lib/answersengine/version.rb +0 -3
@@ -1,18 +0,0 @@
|
|
1
|
-
require "answersengine/plugin"
|
2
|
-
require "answersengine/scraper/parser"
|
3
|
-
require "answersengine/scraper/seeder"
|
4
|
-
require "answersengine/scraper/finisher"
|
5
|
-
require "answersengine/scraper/executor"
|
6
|
-
require "answersengine/scraper/ruby_parser_executor"
|
7
|
-
require "answersengine/scraper/ruby_seeder_executor"
|
8
|
-
require "answersengine/scraper/ruby_finisher_executor"
|
9
|
-
require "answersengine/client"
|
10
|
-
|
11
|
-
module AnswersEngine
|
12
|
-
module Scraper
|
13
|
-
# def self.list(opts={})
|
14
|
-
# scraper = Client::Scraper.new(opts)
|
15
|
-
# "Listing scrapers #{ENV['ANSWERSENGINE_TOKEN']} for #{scraper.all}"
|
16
|
-
# end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,373 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
module AnswersEngine
|
3
|
-
module Scraper
|
4
|
-
# @abstract
|
5
|
-
class Executor
|
6
|
-
# Max allowed page size when query outputs (see #find_outputs).
|
7
|
-
MAX_FIND_OUTPUTS_PER_PAGE = 500
|
8
|
-
|
9
|
-
attr_accessor :filename, :gid, :job_id
|
10
|
-
|
11
|
-
include AnswersEngine::Plugin::ContextExposer
|
12
|
-
|
13
|
-
def exec_parser(save=false)
|
14
|
-
raise "should be implemented in subclass"
|
15
|
-
end
|
16
|
-
|
17
|
-
def init_page()
|
18
|
-
if job_id
|
19
|
-
puts "getting Job Page"
|
20
|
-
init_job_page
|
21
|
-
else
|
22
|
-
puts "getting Global Page"
|
23
|
-
init_global_page()
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def init_job_page()
|
29
|
-
client = Client::JobPage.new()
|
30
|
-
job_page = client.find(job_id, gid)
|
31
|
-
unless job_page.code == 200
|
32
|
-
raise "Job #{job_id} or GID #{gid} not found. Aborting execution!"
|
33
|
-
else
|
34
|
-
job_page
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def parsing_update(options={})
|
40
|
-
client = Client::JobPage.new()
|
41
|
-
job_id = options.fetch(:job_id)
|
42
|
-
gid = options.fetch(:gid)
|
43
|
-
|
44
|
-
client.parsing_update(job_id, gid, options)
|
45
|
-
end
|
46
|
-
|
47
|
-
def seeding_update(options={})
|
48
|
-
client = Client::Job.new()
|
49
|
-
job_id = options.fetch(:job_id)
|
50
|
-
|
51
|
-
client.seeding_update(job_id, options)
|
52
|
-
end
|
53
|
-
|
54
|
-
def finisher_update(options={})
|
55
|
-
client = Client::Job.new()
|
56
|
-
job_id = options.fetch(:job_id)
|
57
|
-
|
58
|
-
client.finisher_update(job_id, options)
|
59
|
-
end
|
60
|
-
|
61
|
-
def init_global_page()
|
62
|
-
client = Client::GlobalPage.new()
|
63
|
-
client.find(gid)
|
64
|
-
end
|
65
|
-
|
66
|
-
def get_content(gid)
|
67
|
-
client = Client::GlobalPage.new()
|
68
|
-
content_json = client.find_content(gid)
|
69
|
-
|
70
|
-
if content_json['available']
|
71
|
-
signed_url = content_json['signed_url']
|
72
|
-
Client::BackblazeContent.new.get_gunzipped_content(signed_url)
|
73
|
-
else
|
74
|
-
nil
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def get_failed_content(gid)
|
79
|
-
client = Client::GlobalPage.new()
|
80
|
-
content_json = client.find_failed_content(gid)
|
81
|
-
|
82
|
-
if content_json['available']
|
83
|
-
signed_url = content_json['signed_url']
|
84
|
-
Client::BackblazeContent.new.get_gunzipped_content(signed_url)
|
85
|
-
else
|
86
|
-
nil
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
# Get current job id from scraper or default when scraper_name is null.
|
91
|
-
#
|
92
|
-
# @param [String|nil] scraper_name Scraper name.
|
93
|
-
# @param [Integer|nil] default (nil) Default job id when no scraper name.
|
94
|
-
#
|
95
|
-
# @raise [Exception] When scraper name is not null, and scraper doesn't
|
96
|
-
# exists or it has no current job.
|
97
|
-
def get_job_id scraper_name, default = nil
|
98
|
-
return default if scraper_name.nil?
|
99
|
-
job = Client::ScraperJob.new().find(scraper_name)
|
100
|
-
raise JSON.pretty_generate(job) if job['id'].nil?
|
101
|
-
job['id']
|
102
|
-
end
|
103
|
-
|
104
|
-
# Find outputs by collection and query with pagination.
|
105
|
-
#
|
106
|
-
# @param [String] collection ('default') Collection name.
|
107
|
-
# @param [Hash] query ({}) Filters to query.
|
108
|
-
# @param [Integer] page (1) Page number.
|
109
|
-
# @param [Integer] per_page (100) Page size.
|
110
|
-
# @param [Hash] opts ({}) Configuration options.
|
111
|
-
# @option opts [String|nil] :scraper_name (nil) Scraper name to query
|
112
|
-
# from.
|
113
|
-
# @option opts [Integer|nil] :job_id (nil) Job's id to query from.
|
114
|
-
#
|
115
|
-
# @raise [ArgumentError] +collection+ is not String.
|
116
|
-
# @raise [ArgumentError] +query+ is not a Hash.
|
117
|
-
# @raise [ArgumentError] +page+ is not an Integer greater than 0.
|
118
|
-
# @raise [ArgumentError] +per_page+ is not an Integer between 1 and 500.
|
119
|
-
#
|
120
|
-
# @return [Array]
|
121
|
-
#
|
122
|
-
# @example
|
123
|
-
# find_outputs
|
124
|
-
# @example
|
125
|
-
# find_outputs 'my_collection'
|
126
|
-
# @example
|
127
|
-
# find_outputs 'my_collection', {}
|
128
|
-
# @example
|
129
|
-
# find_outputs 'my_collection', {}, 1
|
130
|
-
# @example
|
131
|
-
# find_outputs 'my_collection', {}, 1, 100
|
132
|
-
# @example Find from another scraper by name
|
133
|
-
# find_outputs 'my_collection', {}, 1, 100, scraper_name: 'my_scraper'
|
134
|
-
# @example Find from another scraper by job_id
|
135
|
-
# find_outputs 'my_collection', {}, 1, 100, job_id: 123
|
136
|
-
#
|
137
|
-
# @note *opts `:job_id` option is prioritize over `:scraper_name` when
|
138
|
-
# both exists. If none add provided or nil values, then current job
|
139
|
-
# will be used to query instead, this is the defaul behavior.
|
140
|
-
def find_outputs(collection='default', query={}, page=1, per_page=100, opts = {})
|
141
|
-
# Validate parameters out from nil for easier user usage.
|
142
|
-
raise ArgumentError.new("collection needs to be a String") unless collection.is_a?(String)
|
143
|
-
raise ArgumentError.new("query needs to be a Hash, instead of: #{query}") unless query.is_a?(Hash)
|
144
|
-
unless page.is_a?(Integer) && page > 0
|
145
|
-
raise ArgumentError.new("page needs to be an Integer greater than 0")
|
146
|
-
end
|
147
|
-
unless per_page.is_a?(Integer) && per_page > 0 && per_page <= MAX_FIND_OUTPUTS_PER_PAGE
|
148
|
-
raise ArgumentError.new("per_page needs to be an Integer between 1 and #{MAX_FIND_OUTPUTS_PER_PAGE}")
|
149
|
-
end
|
150
|
-
|
151
|
-
options = {
|
152
|
-
query: query,
|
153
|
-
page: page,
|
154
|
-
per_page: per_page}
|
155
|
-
|
156
|
-
# Get job_id
|
157
|
-
query_job_id = opts[:job_id] || get_job_id(opts[:scraper_name], self.job_id)
|
158
|
-
|
159
|
-
client = Client::JobOutput.new(options)
|
160
|
-
response = client.all(query_job_id, collection)
|
161
|
-
|
162
|
-
if response.code != 200
|
163
|
-
raise "response_code: #{response.code}|#{response.parsed_response}"
|
164
|
-
end
|
165
|
-
(response.body != 'null') ? response.parsed_response : []
|
166
|
-
end
|
167
|
-
|
168
|
-
# Find one output by collection and query with pagination.
|
169
|
-
#
|
170
|
-
# @param [String] collection ('default') Collection name.
|
171
|
-
# @param [Hash] query ({}) Filters to query.
|
172
|
-
# @param [Hash] opts ({}) Configuration options.
|
173
|
-
# @option opts [String|nil] :scraper_name (nil) Scraper name to query
|
174
|
-
# from.
|
175
|
-
# @option opts [Integer|nil] :job_id (nil) Job's id to query from.
|
176
|
-
#
|
177
|
-
# @raise [ArgumentError] +collection+ is not String.
|
178
|
-
# @raise [ArgumentError] +query+ is not a Hash.
|
179
|
-
#
|
180
|
-
# @return [Hash|nil] `Hash` when found, and `nil` when no output is found.
|
181
|
-
#
|
182
|
-
# @example
|
183
|
-
# find_output
|
184
|
-
# @example
|
185
|
-
# find_output 'my_collection'
|
186
|
-
# @example
|
187
|
-
# find_output 'my_collection', {}
|
188
|
-
# @example Find from another scraper by name
|
189
|
-
# find_output 'my_collection', {}, scraper_name: 'my_scraper'
|
190
|
-
# @example Find from another scraper by job_id
|
191
|
-
# find_output 'my_collection', {}, job_id: 123
|
192
|
-
#
|
193
|
-
# @note *opts `:job_id` option is prioritize over `:scraper_name` when
|
194
|
-
# both exists. If none add provided or nil values, then current job
|
195
|
-
# will be used to query instead, this is the defaul behavior.
|
196
|
-
def find_output(collection='default', query={}, opts = {})
|
197
|
-
result = find_outputs(collection, query, 1, 1, opts)
|
198
|
-
result.respond_to?(:first) ? result.first : nil
|
199
|
-
end
|
200
|
-
|
201
|
-
# Remove dups by prioritizing the latest dup.
|
202
|
-
#
|
203
|
-
# @param [Array] list List of hashes to dedup.
|
204
|
-
# @param [Hash] key_defaults Key and default value pair hash to use on
|
205
|
-
# uniq validation.
|
206
|
-
#
|
207
|
-
# @return [Integer] Removed duplicated items count.
|
208
|
-
def remove_old_dups!(list, key_defaults)
|
209
|
-
raw_count = list.count
|
210
|
-
keys = key_defaults.keys
|
211
|
-
force_uniq = 0
|
212
|
-
list.reverse!.uniq! do |item|
|
213
|
-
# Extract stringify keys as hash
|
214
|
-
key_hash = Hash[item.map{|k,v|keys.include?(k.to_s) ? [k.to_s,v] : nil}.select{|i|!i.nil?}]
|
215
|
-
|
216
|
-
# Apply defaults for uniq validation
|
217
|
-
key_defaults.each{|k,v| key_hash[k] = v if key_hash[k].nil?}
|
218
|
-
|
219
|
-
# Don't dedup nil key defaults
|
220
|
-
skip_dedup = !keys.find{|k| key_hash[k].nil?}.nil?
|
221
|
-
skip_dedup ? (force_uniq += 1) : key_hash
|
222
|
-
end
|
223
|
-
list.reverse!
|
224
|
-
dup_count = raw_count - list.count
|
225
|
-
dup_count
|
226
|
-
end
|
227
|
-
|
228
|
-
# Remove page dups by prioritizing the latest dup.
|
229
|
-
#
|
230
|
-
# @param [Array] list List of pages to dedup.
|
231
|
-
#
|
232
|
-
# @return [Integer] Removed duplicated items count.
|
233
|
-
#
|
234
|
-
# @note It will not dedup for now as it is hard to build gid.
|
235
|
-
# TODO: Build gid so we can dedup
|
236
|
-
def remove_old_page_dups!(list)
|
237
|
-
key_defaults = {
|
238
|
-
'gid' => nil
|
239
|
-
}
|
240
|
-
remove_old_dups! list, key_defaults
|
241
|
-
end
|
242
|
-
|
243
|
-
# Remove dups by prioritizing the latest dup.
|
244
|
-
#
|
245
|
-
# @param [Array] list List of outputs to dedup.
|
246
|
-
#
|
247
|
-
# @return [Integer] Removed duplicated items count.
|
248
|
-
def remove_old_output_dups!(list)
|
249
|
-
key_defaults = {
|
250
|
-
'_id' => nil,
|
251
|
-
'_collection' => 'default'
|
252
|
-
}
|
253
|
-
remove_old_dups! list, key_defaults
|
254
|
-
end
|
255
|
-
|
256
|
-
def save_pages_and_outputs(pages = [], outputs = [], status)
|
257
|
-
total_pages = pages.count
|
258
|
-
total_outputs = outputs.count
|
259
|
-
records_per_slice = 100
|
260
|
-
until pages.empty? && outputs.empty?
|
261
|
-
pages_slice = pages.shift(records_per_slice)
|
262
|
-
pages_dup_count = remove_old_page_dups! pages_slice
|
263
|
-
outputs_slice = outputs.shift(records_per_slice)
|
264
|
-
outputs_dup_count = remove_old_output_dups! outputs_slice
|
265
|
-
|
266
|
-
log_msgs = []
|
267
|
-
unless pages_slice.empty?
|
268
|
-
page_dups_ignored = pages_dup_count > 0 ? " (#{pages_dup_count} dups ignored)" : ''
|
269
|
-
log_msgs << "#{pages_slice.count} out of #{total_pages} Pages#{page_dups_ignored}"
|
270
|
-
|
271
|
-
unless save
|
272
|
-
puts '----------------------------------------'
|
273
|
-
puts "Trying to validate #{log_msgs.last}#{page_dups_ignored}"
|
274
|
-
puts JSON.pretty_generate pages_slice
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
unless outputs_slice.empty?
|
279
|
-
output_dups_ignored = outputs_dup_count > 0 ? " (#{outputs_dup_count} dups ignored)" : ''
|
280
|
-
log_msgs << "#{outputs_slice.count} out of #{total_outputs} Outputs#{output_dups_ignored}"
|
281
|
-
|
282
|
-
unless save
|
283
|
-
puts '----------------------------------------'
|
284
|
-
puts "Trying to validate #{log_msgs.last}#{output_dups_ignored}"
|
285
|
-
puts JSON.pretty_generate outputs_slice
|
286
|
-
end
|
287
|
-
end
|
288
|
-
|
289
|
-
# behave differently if it is a real save
|
290
|
-
if save
|
291
|
-
log_msg = "Saving #{log_msgs.join(' and ')}."
|
292
|
-
puts "#{log_msg}"
|
293
|
-
else
|
294
|
-
status = "#{status}_try"
|
295
|
-
end
|
296
|
-
|
297
|
-
# saving to server
|
298
|
-
response = update_to_server(
|
299
|
-
job_id: job_id,
|
300
|
-
gid: gid,
|
301
|
-
pages: pages_slice,
|
302
|
-
outputs: outputs_slice,
|
303
|
-
status: status)
|
304
|
-
|
305
|
-
if response.code == 200
|
306
|
-
if save
|
307
|
-
log_msg = "Saved."
|
308
|
-
puts "#{log_msg}"
|
309
|
-
else
|
310
|
-
puts "Validation successful"
|
311
|
-
end
|
312
|
-
else
|
313
|
-
if save
|
314
|
-
puts "Error: Unable to save Pages and/or Outputs to server: #{response.body}"
|
315
|
-
raise "Unable to save Pages and/or Outputs to server: #{response.body}"
|
316
|
-
else
|
317
|
-
puts "Error: Invalid Pages and/or Outputs: #{response.body}"
|
318
|
-
raise "Invalid Pages and/or Outputs: #{response.body}"
|
319
|
-
end
|
320
|
-
end
|
321
|
-
end
|
322
|
-
end
|
323
|
-
|
324
|
-
def update_to_server(opts = {})
|
325
|
-
raise "Implemented in Subclass"
|
326
|
-
end
|
327
|
-
|
328
|
-
def clean_backtrace(backtrace)
|
329
|
-
i = backtrace.index{|x| x =~ /gems\/answersengine/i}
|
330
|
-
if i.to_i < 1
|
331
|
-
return []
|
332
|
-
else
|
333
|
-
return backtrace[0..(i-1)]
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
337
|
-
def save_type
|
338
|
-
raise NotImplementedError.new('Need to implement "save_type" method.')
|
339
|
-
end
|
340
|
-
|
341
|
-
# Saves pages from an array and clear it.
|
342
|
-
#
|
343
|
-
# @param [Array] pages ([]) Page array to save. Warning: all elements will
|
344
|
-
# be removed from the array.
|
345
|
-
#
|
346
|
-
# @note IMPORTANT: +pages+ array's elements will be removed.
|
347
|
-
def save_pages(pages=[])
|
348
|
-
save_pages_and_outputs(pages, [], save_type)
|
349
|
-
end
|
350
|
-
|
351
|
-
# Saves outputs from an array and clear it.
|
352
|
-
#
|
353
|
-
# @param [Array] outputs ([]) Output array to save. Warning: all elements
|
354
|
-
# will be removed from the array.
|
355
|
-
#
|
356
|
-
# @note IMPORTANT: +outputs+ array's elements will be removed.
|
357
|
-
def save_outputs(outputs=[])
|
358
|
-
save_pages_and_outputs([], outputs, save_type)
|
359
|
-
end
|
360
|
-
|
361
|
-
# Eval a filename with a custom binding
|
362
|
-
#
|
363
|
-
# @param [String] file_path File path to read.
|
364
|
-
# @param [Binding] context Context binding to evaluate with.
|
365
|
-
#
|
366
|
-
# @note Using this method will allow scripts to contain `return` to
|
367
|
-
# exit the script sooner along some improved security.
|
368
|
-
def eval_with_context file_path, context
|
369
|
-
eval(File.read(file_path), context, file_path)
|
370
|
-
end
|
371
|
-
end
|
372
|
-
end
|
373
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
module Scraper
|
3
|
-
class Finisher
|
4
|
-
|
5
|
-
def self.exec_finisher(filename, job_id=nil, save=false)
|
6
|
-
extname = File.extname(filename)
|
7
|
-
case extname
|
8
|
-
when '.rb'
|
9
|
-
executor = RubyFinisherExecutor.new(filename: filename, job_id: job_id)
|
10
|
-
executor.exec_finisher(save)
|
11
|
-
else
|
12
|
-
puts "Unable to find a finisher executor for file type \"#{extname}\""
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
module Scraper
|
3
|
-
class Parser
|
4
|
-
def self.exec_parser_page(filename, gid, job_id=nil, save=false, vars = {})
|
5
|
-
extname = File.extname(filename)
|
6
|
-
case extname
|
7
|
-
when '.rb'
|
8
|
-
executor = RubyParserExecutor.new(filename: filename, gid: gid, job_id: job_id, vars: vars)
|
9
|
-
executor.exec_parser(save)
|
10
|
-
else
|
11
|
-
puts "Unable to find a parser executor for file type \"#{extname}\""
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,116 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
module Scraper
|
3
|
-
class RubyFinisherExecutor < Executor
|
4
|
-
attr_accessor :save
|
5
|
-
|
6
|
-
def initialize(options={})
|
7
|
-
@filename = options.fetch(:filename) { raise "Filename is required"}
|
8
|
-
@job_id = options[:job_id]
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.exposed_methods
|
12
|
-
[
|
13
|
-
:outputs,
|
14
|
-
:save_outputs,
|
15
|
-
:find_output,
|
16
|
-
:find_outputs
|
17
|
-
].freeze
|
18
|
-
end
|
19
|
-
|
20
|
-
def exec_finisher(save=false)
|
21
|
-
@save = save
|
22
|
-
if save
|
23
|
-
puts "Executing finisher script"
|
24
|
-
else
|
25
|
-
puts "Trying finisher script"
|
26
|
-
end
|
27
|
-
|
28
|
-
eval_finisher_script(save)
|
29
|
-
end
|
30
|
-
|
31
|
-
def eval_finisher_script(save=false)
|
32
|
-
update_finisher_starting_status
|
33
|
-
|
34
|
-
proc = Proc.new do
|
35
|
-
outputs = []
|
36
|
-
|
37
|
-
begin
|
38
|
-
context = isolated_binding({
|
39
|
-
outputs: outputs,
|
40
|
-
job_id: job_id
|
41
|
-
})
|
42
|
-
eval_with_context filename, context
|
43
|
-
rescue SyntaxError => e
|
44
|
-
handle_error(e) if save
|
45
|
-
raise e
|
46
|
-
rescue => e
|
47
|
-
handle_error(e) if save
|
48
|
-
raise e
|
49
|
-
end
|
50
|
-
|
51
|
-
puts "=========== Finisher Executed ==========="
|
52
|
-
begin
|
53
|
-
save_outputs(outputs)
|
54
|
-
rescue => e
|
55
|
-
handle_error(e) if save
|
56
|
-
raise e
|
57
|
-
end
|
58
|
-
|
59
|
-
update_finisher_done_status
|
60
|
-
end
|
61
|
-
proc.call
|
62
|
-
end
|
63
|
-
|
64
|
-
def save_type
|
65
|
-
:executing
|
66
|
-
end
|
67
|
-
|
68
|
-
def update_to_server(opts = {})
|
69
|
-
finisher_update(
|
70
|
-
job_id: opts[:job_id],
|
71
|
-
outputs: opts[:outputs],
|
72
|
-
finisher_status: opts[:status])
|
73
|
-
end
|
74
|
-
|
75
|
-
def update_finisher_starting_status
|
76
|
-
return unless save
|
77
|
-
|
78
|
-
response = finisher_update(
|
79
|
-
job_id: job_id,
|
80
|
-
finisher_status: :starting)
|
81
|
-
|
82
|
-
if response.code == 200
|
83
|
-
puts "Finisher Status Updated."
|
84
|
-
else
|
85
|
-
puts "Error: Unable to save Finisher Status to server: #{response.body}"
|
86
|
-
raise "Unable to save Finisher Status to server: #{response.body}"
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
def update_finisher_done_status
|
91
|
-
return unless save
|
92
|
-
|
93
|
-
response = finisher_update(
|
94
|
-
job_id: job_id,
|
95
|
-
finisher_status: :done)
|
96
|
-
|
97
|
-
if response.code == 200
|
98
|
-
puts "Finisher Done."
|
99
|
-
else
|
100
|
-
puts "Error: Unable to save Finisher Done Status to server: #{response.body}"
|
101
|
-
raise "Unable to save Finisher Done Status to server: #{response.body}"
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
def handle_error(e)
|
106
|
-
error = ["Finisher #{e.class}: #{e.to_s} (Job:#{job_id}",clean_backtrace(e.backtrace)].join("\n")
|
107
|
-
|
108
|
-
finisher_update(
|
109
|
-
job_id: job_id,
|
110
|
-
finisher_status: :failed,
|
111
|
-
log_error: error)
|
112
|
-
end
|
113
|
-
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|