answersengine 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/answersengine/cli/finisher.rb +40 -0
- data/lib/answersengine/cli.rb +5 -1
- data/lib/answersengine/client/auth_token.rb +10 -10
- data/lib/answersengine/client/deploy_key.rb +6 -4
- data/lib/answersengine/client/export.rb +2 -2
- data/lib/answersengine/client/job.rb +17 -5
- data/lib/answersengine/client/job_log.rb +8 -9
- data/lib/answersengine/client/job_page.rb +8 -7
- data/lib/answersengine/client/scraper.rb +8 -7
- data/lib/answersengine/client/scraper_deployment.rb +4 -3
- data/lib/answersengine/client/scraper_export.rb +2 -2
- data/lib/answersengine/client/scraper_exporter.rb +2 -2
- data/lib/answersengine/client/scraper_job.rb +6 -6
- data/lib/answersengine/client/scraper_job_page.rb +12 -11
- data/lib/answersengine/scraper/executor.rb +7 -0
- data/lib/answersengine/scraper/finisher.rb +18 -0
- data/lib/answersengine/scraper/ruby_finisher_executor.rb +110 -0
- data/lib/answersengine/scraper.rb +2 -0
- data/lib/answersengine/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be8e1b2d4a1ca60a2b0f1ccc1fd5c00f33aa2f412214d9294e357ec2bfd353f1
|
4
|
+
data.tar.gz: ec3e082f0b8905a313f43a1d7d6018640c06baa6d06501f62126cab2dbe0af32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dec73a502a3431a5471f02a2b4093d57ed0cd23528a5ea6ec3915ea67031e724471df15d36499e5217a37ad0f32a51a87e7329d095670baedde6f5186ee2f42
|
7
|
+
data.tar.gz: a2864beba98cdc6660e091649e89fb0756f0edd3528443ba8231ae17d079cd1efa8adab80329271f32b7dc80ab68b0f1ca9d12897dea82ab0284d66bda2566f3
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module AnswersEngine
|
2
|
+
class CLI < Thor
|
3
|
+
class Finisher < Thor
|
4
|
+
desc "try <scraper_name> <finisher_file>", "Tries a finisher file"
|
5
|
+
long_desc <<-LONGDESC
|
6
|
+
Takes a finisher script and tries to execute it without saving anything.\x5
|
7
|
+
<seeder_file>: Finisher script file will be executed.\x5
|
8
|
+
LONGDESC
|
9
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
10
|
+
def try_finisher(scraper_name, finisher_file)
|
11
|
+
if options[:job]
|
12
|
+
job_id = options[:job]
|
13
|
+
else
|
14
|
+
job = Client::ScraperJob.new(options).find(scraper_name)
|
15
|
+
job_id = job['id']
|
16
|
+
end
|
17
|
+
|
18
|
+
puts AnswersEngine::Scraper::Finisher.exec_finisher(finisher_file, job_id, false)
|
19
|
+
end
|
20
|
+
|
21
|
+
desc "exec <scraper_name> <finisher_file>", "Executes a finisher script onto a scraper's current job."
|
22
|
+
long_desc <<-LONGDESC
|
23
|
+
Takes a finisher script and execute it against a job and save outputs into the scraper's current job\x5
|
24
|
+
<finisher_file>: Finisher script file that will be executed on the scraper's current job.\x5
|
25
|
+
LONGDESC
|
26
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
27
|
+
def exec_parse(scraper_name, finisher_file)
|
28
|
+
if options[:job]
|
29
|
+
job_id = options[:job]
|
30
|
+
else
|
31
|
+
job = Client::ScraperJob.new(options).find(scraper_name)
|
32
|
+
job_id = job['id']
|
33
|
+
end
|
34
|
+
|
35
|
+
puts AnswersEngine::Scraper::Finisher.exec_finisher(finisher_file, job_id, true)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
data/lib/answersengine/cli.rb
CHANGED
@@ -11,6 +11,7 @@ require 'answersengine/cli/scraper_deployment'
|
|
11
11
|
require 'answersengine/cli/scraper'
|
12
12
|
require 'answersengine/cli/parser'
|
13
13
|
require 'answersengine/cli/seeder'
|
14
|
+
require 'answersengine/cli/finisher'
|
14
15
|
|
15
16
|
|
16
17
|
module AnswersEngine
|
@@ -29,5 +30,8 @@ module AnswersEngine
|
|
29
30
|
|
30
31
|
desc "seeder SUBCOMMAND ...ARGS", "for seeding related activities"
|
31
32
|
subcommand "seeder", Seeder
|
33
|
+
|
34
|
+
desc "seeder SUBCOMMAND ...ARGS", "for seeding related activities"
|
35
|
+
subcommand "finisher", Finisher
|
32
36
|
end
|
33
|
-
end
|
37
|
+
end
|
@@ -7,7 +7,8 @@ module AnswersEngine
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def all(opts={})
|
10
|
-
|
10
|
+
params = @options.merge(opts)
|
11
|
+
self.class.get("/auth_tokens", params)
|
11
12
|
end
|
12
13
|
|
13
14
|
def create(role, description, opts={})
|
@@ -15,8 +16,8 @@ module AnswersEngine
|
|
15
16
|
role: role,
|
16
17
|
description: description}
|
17
18
|
|
18
|
-
@options.merge
|
19
|
-
self.class.post("/auth_tokens",
|
19
|
+
params = @options.merge({body: body.to_json})
|
20
|
+
self.class.post("/auth_tokens", params)
|
20
21
|
end
|
21
22
|
|
22
23
|
def create_on_account(account_id, role, description)
|
@@ -24,8 +25,8 @@ module AnswersEngine
|
|
24
25
|
role: role,
|
25
26
|
description: description}
|
26
27
|
|
27
|
-
@options.merge
|
28
|
-
self.class.post("/accounts/#{account_id}/auth_tokens",
|
28
|
+
params = @options.merge({body: body.to_json})
|
29
|
+
self.class.post("/accounts/#{account_id}/auth_tokens", params)
|
29
30
|
end
|
30
31
|
|
31
32
|
def update(token, role, description="", opts={})
|
@@ -33,18 +34,17 @@ module AnswersEngine
|
|
33
34
|
|
34
35
|
body[:role] = role
|
35
36
|
body[:description] = description if description.present?
|
36
|
-
@options.merge
|
37
|
+
params = @options.merge({body: body.to_json})
|
37
38
|
|
38
|
-
self.class.put("/auth_tokens/#{token}",
|
39
|
+
self.class.put("/auth_tokens/#{token}", params)
|
39
40
|
end
|
40
41
|
|
41
42
|
def delete(token, opts={})
|
42
43
|
body = {}
|
43
|
-
@options.merge
|
44
|
+
params = @options.merge({body: body.to_json})
|
44
45
|
|
45
|
-
self.class.delete("/auth_tokens/#{token}",
|
46
|
+
self.class.delete("/auth_tokens/#{token}", params)
|
46
47
|
end
|
47
48
|
end
|
48
49
|
end
|
49
50
|
end
|
50
|
-
|
@@ -3,17 +3,19 @@ module AnswersEngine
|
|
3
3
|
class DeployKey < AnswersEngine::Client::Base
|
4
4
|
|
5
5
|
def find(opts={})
|
6
|
-
|
6
|
+
params = @options.merge(opts)
|
7
|
+
self.class.get("/deploy_key", params)
|
7
8
|
end
|
8
9
|
|
9
10
|
def create(opts={})
|
10
|
-
|
11
|
+
params = @options.merge(opts)
|
12
|
+
self.class.post("/deploy_key", params)
|
11
13
|
end
|
12
14
|
|
13
15
|
def delete(opts={})
|
14
|
-
|
16
|
+
params = @options.merge(opts)
|
17
|
+
self.class.delete("/deploy_key", params)
|
15
18
|
end
|
16
19
|
end
|
17
20
|
end
|
18
21
|
end
|
19
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class Job < AnswersEngine::Client::Base
|
4
4
|
def all(opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/jobs", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def find(job_id)
|
@@ -14,9 +15,9 @@ module AnswersEngine
|
|
14
15
|
body[:status] = opts[:status] if opts[:status]
|
15
16
|
body[:standard_worker_count] = opts[:workers] if opts[:workers]
|
16
17
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
17
|
-
@options.merge
|
18
|
+
params = @options.merge({body: body.to_json})
|
18
19
|
|
19
|
-
self.class.put("/jobs/#{job_id}",
|
20
|
+
self.class.put("/jobs/#{job_id}", params)
|
20
21
|
end
|
21
22
|
|
22
23
|
def cancel(job_id, opts={})
|
@@ -41,9 +42,20 @@ module AnswersEngine
|
|
41
42
|
body[:seeding_status] = opts.fetch(:seeding_status){ nil }
|
42
43
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
43
44
|
|
44
|
-
@options.merge
|
45
|
+
params = @options.merge({body: body.to_json})
|
45
46
|
|
46
|
-
self.class.put("/jobs/#{job_id}/seeding_update",
|
47
|
+
self.class.put("/jobs/#{job_id}/seeding_update", params)
|
48
|
+
end
|
49
|
+
|
50
|
+
def finisher_update(job_id, opts={})
|
51
|
+
body = {}
|
52
|
+
body[:outputs] = opts.fetch(:outputs) {[]}
|
53
|
+
body[:finisher_status] = opts.fetch(:finisher_status){ nil }
|
54
|
+
body[:log_error] = opts[:log_error] if opts[:log_error]
|
55
|
+
|
56
|
+
params = @options.merge({body: body.to_json})
|
57
|
+
|
58
|
+
self.class.put("/jobs/#{job_id}/finisher_update", params)
|
47
59
|
end
|
48
60
|
|
49
61
|
end
|
@@ -2,26 +2,25 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class JobLog < AnswersEngine::Client::Base
|
4
4
|
def all_job_page_log(job_id, gid, opts={})
|
5
|
-
@options.merge
|
6
|
-
self.class.get("/jobs/#{job_id}/pages/#{gid}/log",
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/jobs/#{job_id}/pages/#{gid}/log", params)
|
7
7
|
end
|
8
8
|
|
9
9
|
def scraper_all_job_page_log(scraper_name, gid, opts={})
|
10
|
-
@options.merge
|
11
|
-
self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log",
|
10
|
+
params = @options.merge(opts)
|
11
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log", params)
|
12
12
|
end
|
13
13
|
|
14
14
|
def all_job_log(job_id, opts={})
|
15
|
-
@options.merge
|
16
|
-
self.class.get("/jobs/#{job_id}/log",
|
15
|
+
params = @options.merge(opts)
|
16
|
+
self.class.get("/jobs/#{job_id}/log", params)
|
17
17
|
end
|
18
18
|
|
19
19
|
def scraper_all_job_log(scraper_name, opts={})
|
20
|
-
@options.merge
|
21
|
-
self.class.get("/scrapers/#{scraper_name}/current_job/log",
|
20
|
+
params = @options.merge(opts)
|
21
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/log", params)
|
22
22
|
end
|
23
23
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
27
|
-
|
@@ -6,7 +6,8 @@ module AnswersEngine
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def all(job_id, opts={})
|
9
|
-
|
9
|
+
params = @options.merge(opts)
|
10
|
+
self.class.get("/jobs/#{job_id}/pages", params)
|
10
11
|
end
|
11
12
|
|
12
13
|
def update(job_id, gid, opts={})
|
@@ -15,9 +16,9 @@ module AnswersEngine
|
|
15
16
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
17
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
18
|
|
18
|
-
@options.merge
|
19
|
+
params = @options.merge({body: body.to_json})
|
19
20
|
|
20
|
-
self.class.put("/jobs/#{job_id}/pages/#{gid}",
|
21
|
+
self.class.put("/jobs/#{job_id}/pages/#{gid}", params)
|
21
22
|
end
|
22
23
|
|
23
24
|
def enqueue(job_id, method, url, opts={})
|
@@ -36,9 +37,9 @@ module AnswersEngine
|
|
36
37
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
37
38
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
38
39
|
|
39
|
-
@options.merge
|
40
|
+
params = @options.merge({body: body.to_json})
|
40
41
|
|
41
|
-
self.class.post("/jobs/#{job_id}/pages",
|
42
|
+
self.class.post("/jobs/#{job_id}/pages", params)
|
42
43
|
end
|
43
44
|
|
44
45
|
def parsing_update(job_id, gid, opts={})
|
@@ -48,9 +49,9 @@ module AnswersEngine
|
|
48
49
|
body[:parsing_status] = opts.fetch(:parsing_status){ nil }
|
49
50
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
50
51
|
|
51
|
-
@options.merge
|
52
|
+
params = @options.merge({body: body.to_json})
|
52
53
|
|
53
|
-
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update",
|
54
|
+
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
|
54
55
|
end
|
55
56
|
end
|
56
57
|
end
|
@@ -7,7 +7,8 @@ module AnswersEngine
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def all(opts={})
|
10
|
-
|
10
|
+
params = @options.merge opts
|
11
|
+
self.class.get("/scrapers", params)
|
11
12
|
end
|
12
13
|
|
13
14
|
def create(scraper_name, git_repository, opts={})
|
@@ -24,8 +25,8 @@ module AnswersEngine
|
|
24
25
|
body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
|
25
26
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
26
27
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
27
|
-
@options.merge
|
28
|
-
self.class.post("/scrapers",
|
28
|
+
params = @options.merge({body: body.to_json})
|
29
|
+
self.class.post("/scrapers", params)
|
29
30
|
end
|
30
31
|
|
31
32
|
def update(scraper_name, opts={})
|
@@ -42,15 +43,15 @@ module AnswersEngine
|
|
42
43
|
body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job") || opts.has_key?(:cancel_current_job)
|
43
44
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
44
45
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
45
|
-
@options.merge
|
46
|
+
params = @options.merge({body: body.to_json})
|
46
47
|
|
47
|
-
self.class.put("/scrapers/#{scraper_name}",
|
48
|
+
self.class.put("/scrapers/#{scraper_name}", params)
|
48
49
|
end
|
49
50
|
|
50
51
|
def delete(scraper_name, opts={})
|
51
|
-
|
52
|
+
params = @options.merge(opts)
|
53
|
+
self.class.delete("/scrapers/#{scraper_name}", params)
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
55
57
|
end
|
56
|
-
|
@@ -3,15 +3,16 @@ module AnswersEngine
|
|
3
3
|
class ScraperDeployment < AnswersEngine::Client::Base
|
4
4
|
|
5
5
|
def all(scraper_name, opts={})
|
6
|
-
|
6
|
+
params = @options.merge(opts)
|
7
|
+
self.class.get("/scrapers/#{scraper_name}/deployments", params)
|
7
8
|
end
|
8
9
|
|
9
10
|
|
10
11
|
def deploy(scraper_name, opts={})
|
11
|
-
|
12
|
+
params = @options.merge(opts)
|
13
|
+
self.class.post("/scrapers/#{scraper_name}/deployments", params)
|
12
14
|
end
|
13
15
|
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
17
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class ScraperExport < AnswersEngine::Client::Base
|
4
4
|
def all(scraper_name, opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/scrapers/#{scraper_name}/exports", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def find(export_id)
|
@@ -19,4 +20,3 @@ module AnswersEngine
|
|
19
20
|
end
|
20
21
|
end
|
21
22
|
end
|
22
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class ScraperExporter < AnswersEngine::Client::Base
|
4
4
|
def all(scraper_name, opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/scrapers/#{scraper_name}/exporters", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def find(scraper_name, exporter_name)
|
@@ -11,4 +12,3 @@ module AnswersEngine
|
|
11
12
|
end
|
12
13
|
end
|
13
14
|
end
|
14
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class ScraperJob < AnswersEngine::Client::Base
|
4
4
|
def all(scraper_name, opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/scrapers/#{scraper_name}/jobs", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def create(scraper_name, opts={})
|
@@ -10,8 +11,8 @@ module AnswersEngine
|
|
10
11
|
body[:standard_worker_count] = opts[:workers] if opts[:workers]
|
11
12
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
12
13
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
13
|
-
@options.merge
|
14
|
-
self.class.post("/scrapers/#{scraper_name}/jobs",
|
14
|
+
params = @options.merge({body: body.to_json})
|
15
|
+
self.class.post("/scrapers/#{scraper_name}/jobs", params)
|
15
16
|
end
|
16
17
|
|
17
18
|
def find(scraper_name)
|
@@ -24,9 +25,9 @@ module AnswersEngine
|
|
24
25
|
body[:standard_worker_count] = opts[:workers] if opts[:workers]
|
25
26
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
26
27
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
27
|
-
@options.merge
|
28
|
+
params = @options.merge({body: body.to_json})
|
28
29
|
|
29
|
-
self.class.put("/scrapers/#{scraper_name}/current_job",
|
30
|
+
self.class.put("/scrapers/#{scraper_name}/current_job", params)
|
30
31
|
end
|
31
32
|
|
32
33
|
def cancel(scraper_name, opts={})
|
@@ -46,4 +47,3 @@ module AnswersEngine
|
|
46
47
|
end
|
47
48
|
end
|
48
49
|
end
|
49
|
-
|
@@ -6,7 +6,8 @@ module AnswersEngine
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def all(scraper_name, opts={})
|
9
|
-
|
9
|
+
params = @options.merge(opts)
|
10
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/pages", params)
|
10
11
|
end
|
11
12
|
|
12
13
|
def update(scraper_name, gid, opts={})
|
@@ -15,19 +16,19 @@ module AnswersEngine
|
|
15
16
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
17
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
18
|
|
18
|
-
@options.merge
|
19
|
+
params = @options.merge({body: body.to_json})
|
19
20
|
|
20
|
-
self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}",
|
21
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", params)
|
21
22
|
end
|
22
23
|
|
23
|
-
def refetch(scraper_name, opts
|
24
|
-
|
25
|
-
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch",
|
24
|
+
def refetch(scraper_name, opts={})
|
25
|
+
params = @options.merge(opts)
|
26
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
|
26
27
|
end
|
27
28
|
|
28
|
-
def reparse(scraper_name, opts
|
29
|
-
|
30
|
-
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse",
|
29
|
+
def reparse(scraper_name, opts={})
|
30
|
+
params = @options.merge(opts)
|
31
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
|
31
32
|
end
|
32
33
|
|
33
34
|
def enqueue(scraper_name, method, url, opts={})
|
@@ -46,9 +47,9 @@ module AnswersEngine
|
|
46
47
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
47
48
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
48
49
|
|
49
|
-
@options.merge
|
50
|
+
params = @options.merge({body: body.to_json})
|
50
51
|
|
51
|
-
self.class.post("/scrapers/#{scraper_name}/current_job/pages",
|
52
|
+
self.class.post("/scrapers/#{scraper_name}/current_job/pages", params)
|
52
53
|
end
|
53
54
|
|
54
55
|
end
|
@@ -51,6 +51,13 @@ module AnswersEngine
|
|
51
51
|
client.seeding_update(job_id, options)
|
52
52
|
end
|
53
53
|
|
54
|
+
def finisher_update(options={})
|
55
|
+
client = Client::Job.new()
|
56
|
+
job_id = options.fetch(:job_id)
|
57
|
+
|
58
|
+
client.finisher_update(job_id, options)
|
59
|
+
end
|
60
|
+
|
54
61
|
def init_global_page()
|
55
62
|
client = Client::GlobalPage.new()
|
56
63
|
client.find(gid)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module AnswersEngine
|
2
|
+
module Scraper
|
3
|
+
class Finisher
|
4
|
+
|
5
|
+
def self.exec_finisher(filename, job_id=nil, save=false)
|
6
|
+
extname = File.extname(filename)
|
7
|
+
case extname
|
8
|
+
when '.rb'
|
9
|
+
executor = RubyFinisherExecutor.new(filename: filename, job_id: job_id)
|
10
|
+
executor.exec_finisher(save)
|
11
|
+
else
|
12
|
+
puts "Unable to find a finisher executor for file type \"#{extname}\""
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module AnswersEngine
|
2
|
+
module Scraper
|
3
|
+
class RubyFinisherExecutor < Executor
|
4
|
+
attr_accessor :save
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
@filename = options.fetch(:filename) { raise "Filename is required"}
|
8
|
+
@job_id = options[:job_id]
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.exposed_methods
|
12
|
+
[
|
13
|
+
:outputs,
|
14
|
+
:save_outputs,
|
15
|
+
:find_output,
|
16
|
+
:find_outputs
|
17
|
+
].freeze
|
18
|
+
end
|
19
|
+
|
20
|
+
def exec_finisher(save=false)
|
21
|
+
@save = save
|
22
|
+
if save
|
23
|
+
puts "Executing finisher script"
|
24
|
+
else
|
25
|
+
puts "Trying finisher script"
|
26
|
+
end
|
27
|
+
|
28
|
+
eval_finisher_script(save)
|
29
|
+
end
|
30
|
+
|
31
|
+
def eval_finisher_script(save=false)
|
32
|
+
update_finisher_starting_status
|
33
|
+
|
34
|
+
proc = Proc.new do
|
35
|
+
outputs = []
|
36
|
+
|
37
|
+
begin
|
38
|
+
context = isolated_binding({
|
39
|
+
outputs: outputs,
|
40
|
+
job_id: job_id
|
41
|
+
})
|
42
|
+
eval_with_context filename, context
|
43
|
+
rescue SyntaxError => e
|
44
|
+
handle_error(e) if save
|
45
|
+
raise e
|
46
|
+
rescue => e
|
47
|
+
handle_error(e) if save
|
48
|
+
raise e
|
49
|
+
end
|
50
|
+
|
51
|
+
puts "=========== Finisher Executed ==========="
|
52
|
+
save_outputs(outputs)
|
53
|
+
update_finisher_done_status
|
54
|
+
end
|
55
|
+
proc.call
|
56
|
+
end
|
57
|
+
|
58
|
+
def save_type
|
59
|
+
:executing
|
60
|
+
end
|
61
|
+
|
62
|
+
def update_to_server(opts = {})
|
63
|
+
finisher_update(
|
64
|
+
job_id: opts[:job_id],
|
65
|
+
outputs: opts[:outputs],
|
66
|
+
finisher_status: opts[:status])
|
67
|
+
end
|
68
|
+
|
69
|
+
def update_finisher_starting_status
|
70
|
+
return unless save
|
71
|
+
|
72
|
+
response = finisher_update(
|
73
|
+
job_id: job_id,
|
74
|
+
finisher_status: :starting)
|
75
|
+
|
76
|
+
if response.code == 200
|
77
|
+
puts "Finisher Status Updated."
|
78
|
+
else
|
79
|
+
puts "Error: Unable to save Finisher Status to server: #{response.body}"
|
80
|
+
raise "Unable to save Finisher Status to server: #{response.body}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def update_finisher_done_status
|
85
|
+
return unless save
|
86
|
+
|
87
|
+
response = finisher_update(
|
88
|
+
job_id: job_id,
|
89
|
+
finisher_status: :done)
|
90
|
+
|
91
|
+
if response.code == 200
|
92
|
+
puts "Finisher Done."
|
93
|
+
else
|
94
|
+
puts "Error: Unable to save Finisher Done Status to server: #{response.body}"
|
95
|
+
raise "Unable to save Finisher Done Status to server: #{response.body}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def handle_error(e)
|
100
|
+
error = ["Finisher #{e.class}: #{e.to_s} (Job:#{job_id}",clean_backtrace(e.backtrace)].join("\n")
|
101
|
+
|
102
|
+
finisher_update(
|
103
|
+
job_id: job_id,
|
104
|
+
finisher_status: :failed,
|
105
|
+
log_error: error)
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
require "answersengine/plugin"
|
2
2
|
require "answersengine/scraper/parser"
|
3
3
|
require "answersengine/scraper/seeder"
|
4
|
+
require "answersengine/scraper/finisher"
|
4
5
|
require "answersengine/scraper/executor"
|
5
6
|
require "answersengine/scraper/ruby_parser_executor"
|
6
7
|
require "answersengine/scraper/ruby_seeder_executor"
|
8
|
+
require "answersengine/scraper/ruby_finisher_executor"
|
7
9
|
require "answersengine/client"
|
8
10
|
|
9
11
|
module AnswersEngine
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -189,6 +189,7 @@ files:
|
|
189
189
|
- exe/answersengine
|
190
190
|
- lib/answersengine.rb
|
191
191
|
- lib/answersengine/cli.rb
|
192
|
+
- lib/answersengine/cli/finisher.rb
|
192
193
|
- lib/answersengine/cli/global_page.rb
|
193
194
|
- lib/answersengine/cli/job.rb
|
194
195
|
- lib/answersengine/cli/job_output.rb
|
@@ -224,7 +225,9 @@ files:
|
|
224
225
|
- lib/answersengine/plugin/context_exposer.rb
|
225
226
|
- lib/answersengine/scraper.rb
|
226
227
|
- lib/answersengine/scraper/executor.rb
|
228
|
+
- lib/answersengine/scraper/finisher.rb
|
227
229
|
- lib/answersengine/scraper/parser.rb
|
230
|
+
- lib/answersengine/scraper/ruby_finisher_executor.rb
|
228
231
|
- lib/answersengine/scraper/ruby_parser_executor.rb
|
229
232
|
- lib/answersengine/scraper/ruby_seeder_executor.rb
|
230
233
|
- lib/answersengine/scraper/seeder.rb
|