answersengine 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/answersengine/cli/finisher.rb +40 -0
- data/lib/answersengine/cli.rb +5 -1
- data/lib/answersengine/client/auth_token.rb +10 -10
- data/lib/answersengine/client/deploy_key.rb +6 -4
- data/lib/answersengine/client/export.rb +2 -2
- data/lib/answersengine/client/job.rb +17 -5
- data/lib/answersengine/client/job_log.rb +8 -9
- data/lib/answersengine/client/job_page.rb +8 -7
- data/lib/answersengine/client/scraper.rb +8 -7
- data/lib/answersengine/client/scraper_deployment.rb +4 -3
- data/lib/answersengine/client/scraper_export.rb +2 -2
- data/lib/answersengine/client/scraper_exporter.rb +2 -2
- data/lib/answersengine/client/scraper_job.rb +6 -6
- data/lib/answersengine/client/scraper_job_page.rb +12 -11
- data/lib/answersengine/scraper/executor.rb +7 -0
- data/lib/answersengine/scraper/finisher.rb +18 -0
- data/lib/answersengine/scraper/ruby_finisher_executor.rb +110 -0
- data/lib/answersengine/scraper.rb +2 -0
- data/lib/answersengine/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be8e1b2d4a1ca60a2b0f1ccc1fd5c00f33aa2f412214d9294e357ec2bfd353f1
|
4
|
+
data.tar.gz: ec3e082f0b8905a313f43a1d7d6018640c06baa6d06501f62126cab2dbe0af32
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dec73a502a3431a5471f02a2b4093d57ed0cd23528a5ea6ec3915ea67031e724471df15d36499e5217a37ad0f32a51a87e7329d095670baedde6f5186ee2f42
|
7
|
+
data.tar.gz: a2864beba98cdc6660e091649e89fb0756f0edd3528443ba8231ae17d079cd1efa8adab80329271f32b7dc80ab68b0f1ca9d12897dea82ab0284d66bda2566f3
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module AnswersEngine
|
2
|
+
class CLI < Thor
|
3
|
+
class Finisher < Thor
|
4
|
+
desc "try <scraper_name> <finisher_file>", "Tries a finisher file"
|
5
|
+
long_desc <<-LONGDESC
|
6
|
+
Takes a finisher script and tries to execute it without saving anything.\x5
|
7
|
+
<seeder_file>: Finisher script file will be executed.\x5
|
8
|
+
LONGDESC
|
9
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
10
|
+
def try_finisher(scraper_name, finisher_file)
|
11
|
+
if options[:job]
|
12
|
+
job_id = options[:job]
|
13
|
+
else
|
14
|
+
job = Client::ScraperJob.new(options).find(scraper_name)
|
15
|
+
job_id = job['id']
|
16
|
+
end
|
17
|
+
|
18
|
+
puts AnswersEngine::Scraper::Finisher.exec_finisher(finisher_file, job_id, false)
|
19
|
+
end
|
20
|
+
|
21
|
+
desc "exec <scraper_name> <finisher_file>", "Executes a finisher script onto a scraper's current job."
|
22
|
+
long_desc <<-LONGDESC
|
23
|
+
Takes a finisher script and execute it against a job and save outputs into the scraper's current job\x5
|
24
|
+
<finisher_file>: Finisher script file that will be executed on the scraper's current job.\x5
|
25
|
+
LONGDESC
|
26
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
27
|
+
def exec_parse(scraper_name, finisher_file)
|
28
|
+
if options[:job]
|
29
|
+
job_id = options[:job]
|
30
|
+
else
|
31
|
+
job = Client::ScraperJob.new(options).find(scraper_name)
|
32
|
+
job_id = job['id']
|
33
|
+
end
|
34
|
+
|
35
|
+
puts AnswersEngine::Scraper::Finisher.exec_finisher(finisher_file, job_id, true)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
data/lib/answersengine/cli.rb
CHANGED
@@ -11,6 +11,7 @@ require 'answersengine/cli/scraper_deployment'
|
|
11
11
|
require 'answersengine/cli/scraper'
|
12
12
|
require 'answersengine/cli/parser'
|
13
13
|
require 'answersengine/cli/seeder'
|
14
|
+
require 'answersengine/cli/finisher'
|
14
15
|
|
15
16
|
|
16
17
|
module AnswersEngine
|
@@ -29,5 +30,8 @@ module AnswersEngine
|
|
29
30
|
|
30
31
|
desc "seeder SUBCOMMAND ...ARGS", "for seeding related activities"
|
31
32
|
subcommand "seeder", Seeder
|
33
|
+
|
34
|
+
desc "seeder SUBCOMMAND ...ARGS", "for seeding related activities"
|
35
|
+
subcommand "finisher", Finisher
|
32
36
|
end
|
33
|
-
end
|
37
|
+
end
|
@@ -7,7 +7,8 @@ module AnswersEngine
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def all(opts={})
|
10
|
-
|
10
|
+
params = @options.merge(opts)
|
11
|
+
self.class.get("/auth_tokens", params)
|
11
12
|
end
|
12
13
|
|
13
14
|
def create(role, description, opts={})
|
@@ -15,8 +16,8 @@ module AnswersEngine
|
|
15
16
|
role: role,
|
16
17
|
description: description}
|
17
18
|
|
18
|
-
@options.merge
|
19
|
-
self.class.post("/auth_tokens",
|
19
|
+
params = @options.merge({body: body.to_json})
|
20
|
+
self.class.post("/auth_tokens", params)
|
20
21
|
end
|
21
22
|
|
22
23
|
def create_on_account(account_id, role, description)
|
@@ -24,8 +25,8 @@ module AnswersEngine
|
|
24
25
|
role: role,
|
25
26
|
description: description}
|
26
27
|
|
27
|
-
@options.merge
|
28
|
-
self.class.post("/accounts/#{account_id}/auth_tokens",
|
28
|
+
params = @options.merge({body: body.to_json})
|
29
|
+
self.class.post("/accounts/#{account_id}/auth_tokens", params)
|
29
30
|
end
|
30
31
|
|
31
32
|
def update(token, role, description="", opts={})
|
@@ -33,18 +34,17 @@ module AnswersEngine
|
|
33
34
|
|
34
35
|
body[:role] = role
|
35
36
|
body[:description] = description if description.present?
|
36
|
-
@options.merge
|
37
|
+
params = @options.merge({body: body.to_json})
|
37
38
|
|
38
|
-
self.class.put("/auth_tokens/#{token}",
|
39
|
+
self.class.put("/auth_tokens/#{token}", params)
|
39
40
|
end
|
40
41
|
|
41
42
|
def delete(token, opts={})
|
42
43
|
body = {}
|
43
|
-
@options.merge
|
44
|
+
params = @options.merge({body: body.to_json})
|
44
45
|
|
45
|
-
self.class.delete("/auth_tokens/#{token}",
|
46
|
+
self.class.delete("/auth_tokens/#{token}", params)
|
46
47
|
end
|
47
48
|
end
|
48
49
|
end
|
49
50
|
end
|
50
|
-
|
@@ -3,17 +3,19 @@ module AnswersEngine
|
|
3
3
|
class DeployKey < AnswersEngine::Client::Base
|
4
4
|
|
5
5
|
def find(opts={})
|
6
|
-
|
6
|
+
params = @options.merge(opts)
|
7
|
+
self.class.get("/deploy_key", params)
|
7
8
|
end
|
8
9
|
|
9
10
|
def create(opts={})
|
10
|
-
|
11
|
+
params = @options.merge(opts)
|
12
|
+
self.class.post("/deploy_key", params)
|
11
13
|
end
|
12
14
|
|
13
15
|
def delete(opts={})
|
14
|
-
|
16
|
+
params = @options.merge(opts)
|
17
|
+
self.class.delete("/deploy_key", params)
|
15
18
|
end
|
16
19
|
end
|
17
20
|
end
|
18
21
|
end
|
19
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class Job < AnswersEngine::Client::Base
|
4
4
|
def all(opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/jobs", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def find(job_id)
|
@@ -14,9 +15,9 @@ module AnswersEngine
|
|
14
15
|
body[:status] = opts[:status] if opts[:status]
|
15
16
|
body[:standard_worker_count] = opts[:workers] if opts[:workers]
|
16
17
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
17
|
-
@options.merge
|
18
|
+
params = @options.merge({body: body.to_json})
|
18
19
|
|
19
|
-
self.class.put("/jobs/#{job_id}",
|
20
|
+
self.class.put("/jobs/#{job_id}", params)
|
20
21
|
end
|
21
22
|
|
22
23
|
def cancel(job_id, opts={})
|
@@ -41,9 +42,20 @@ module AnswersEngine
|
|
41
42
|
body[:seeding_status] = opts.fetch(:seeding_status){ nil }
|
42
43
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
43
44
|
|
44
|
-
@options.merge
|
45
|
+
params = @options.merge({body: body.to_json})
|
45
46
|
|
46
|
-
self.class.put("/jobs/#{job_id}/seeding_update",
|
47
|
+
self.class.put("/jobs/#{job_id}/seeding_update", params)
|
48
|
+
end
|
49
|
+
|
50
|
+
def finisher_update(job_id, opts={})
|
51
|
+
body = {}
|
52
|
+
body[:outputs] = opts.fetch(:outputs) {[]}
|
53
|
+
body[:finisher_status] = opts.fetch(:finisher_status){ nil }
|
54
|
+
body[:log_error] = opts[:log_error] if opts[:log_error]
|
55
|
+
|
56
|
+
params = @options.merge({body: body.to_json})
|
57
|
+
|
58
|
+
self.class.put("/jobs/#{job_id}/finisher_update", params)
|
47
59
|
end
|
48
60
|
|
49
61
|
end
|
@@ -2,26 +2,25 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class JobLog < AnswersEngine::Client::Base
|
4
4
|
def all_job_page_log(job_id, gid, opts={})
|
5
|
-
@options.merge
|
6
|
-
self.class.get("/jobs/#{job_id}/pages/#{gid}/log",
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/jobs/#{job_id}/pages/#{gid}/log", params)
|
7
7
|
end
|
8
8
|
|
9
9
|
def scraper_all_job_page_log(scraper_name, gid, opts={})
|
10
|
-
@options.merge
|
11
|
-
self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log",
|
10
|
+
params = @options.merge(opts)
|
11
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log", params)
|
12
12
|
end
|
13
13
|
|
14
14
|
def all_job_log(job_id, opts={})
|
15
|
-
@options.merge
|
16
|
-
self.class.get("/jobs/#{job_id}/log",
|
15
|
+
params = @options.merge(opts)
|
16
|
+
self.class.get("/jobs/#{job_id}/log", params)
|
17
17
|
end
|
18
18
|
|
19
19
|
def scraper_all_job_log(scraper_name, opts={})
|
20
|
-
@options.merge
|
21
|
-
self.class.get("/scrapers/#{scraper_name}/current_job/log",
|
20
|
+
params = @options.merge(opts)
|
21
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/log", params)
|
22
22
|
end
|
23
23
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
27
|
-
|
@@ -6,7 +6,8 @@ module AnswersEngine
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def all(job_id, opts={})
|
9
|
-
|
9
|
+
params = @options.merge(opts)
|
10
|
+
self.class.get("/jobs/#{job_id}/pages", params)
|
10
11
|
end
|
11
12
|
|
12
13
|
def update(job_id, gid, opts={})
|
@@ -15,9 +16,9 @@ module AnswersEngine
|
|
15
16
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
17
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
18
|
|
18
|
-
@options.merge
|
19
|
+
params = @options.merge({body: body.to_json})
|
19
20
|
|
20
|
-
self.class.put("/jobs/#{job_id}/pages/#{gid}",
|
21
|
+
self.class.put("/jobs/#{job_id}/pages/#{gid}", params)
|
21
22
|
end
|
22
23
|
|
23
24
|
def enqueue(job_id, method, url, opts={})
|
@@ -36,9 +37,9 @@ module AnswersEngine
|
|
36
37
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
37
38
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
38
39
|
|
39
|
-
@options.merge
|
40
|
+
params = @options.merge({body: body.to_json})
|
40
41
|
|
41
|
-
self.class.post("/jobs/#{job_id}/pages",
|
42
|
+
self.class.post("/jobs/#{job_id}/pages", params)
|
42
43
|
end
|
43
44
|
|
44
45
|
def parsing_update(job_id, gid, opts={})
|
@@ -48,9 +49,9 @@ module AnswersEngine
|
|
48
49
|
body[:parsing_status] = opts.fetch(:parsing_status){ nil }
|
49
50
|
body[:log_error] = opts[:log_error] if opts[:log_error]
|
50
51
|
|
51
|
-
@options.merge
|
52
|
+
params = @options.merge({body: body.to_json})
|
52
53
|
|
53
|
-
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update",
|
54
|
+
self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
|
54
55
|
end
|
55
56
|
end
|
56
57
|
end
|
@@ -7,7 +7,8 @@ module AnswersEngine
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def all(opts={})
|
10
|
-
|
10
|
+
params = @options.merge opts
|
11
|
+
self.class.get("/scrapers", params)
|
11
12
|
end
|
12
13
|
|
13
14
|
def create(scraper_name, git_repository, opts={})
|
@@ -24,8 +25,8 @@ module AnswersEngine
|
|
24
25
|
body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
|
25
26
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
26
27
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
27
|
-
@options.merge
|
28
|
-
self.class.post("/scrapers",
|
28
|
+
params = @options.merge({body: body.to_json})
|
29
|
+
self.class.post("/scrapers", params)
|
29
30
|
end
|
30
31
|
|
31
32
|
def update(scraper_name, opts={})
|
@@ -42,15 +43,15 @@ module AnswersEngine
|
|
42
43
|
body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job") || opts.has_key?(:cancel_current_job)
|
43
44
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
44
45
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
45
|
-
@options.merge
|
46
|
+
params = @options.merge({body: body.to_json})
|
46
47
|
|
47
|
-
self.class.put("/scrapers/#{scraper_name}",
|
48
|
+
self.class.put("/scrapers/#{scraper_name}", params)
|
48
49
|
end
|
49
50
|
|
50
51
|
def delete(scraper_name, opts={})
|
51
|
-
|
52
|
+
params = @options.merge(opts)
|
53
|
+
self.class.delete("/scrapers/#{scraper_name}", params)
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
55
57
|
end
|
56
|
-
|
@@ -3,15 +3,16 @@ module AnswersEngine
|
|
3
3
|
class ScraperDeployment < AnswersEngine::Client::Base
|
4
4
|
|
5
5
|
def all(scraper_name, opts={})
|
6
|
-
|
6
|
+
params = @options.merge(opts)
|
7
|
+
self.class.get("/scrapers/#{scraper_name}/deployments", params)
|
7
8
|
end
|
8
9
|
|
9
10
|
|
10
11
|
def deploy(scraper_name, opts={})
|
11
|
-
|
12
|
+
params = @options.merge(opts)
|
13
|
+
self.class.post("/scrapers/#{scraper_name}/deployments", params)
|
12
14
|
end
|
13
15
|
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
17
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class ScraperExport < AnswersEngine::Client::Base
|
4
4
|
def all(scraper_name, opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/scrapers/#{scraper_name}/exports", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def find(export_id)
|
@@ -19,4 +20,3 @@ module AnswersEngine
|
|
19
20
|
end
|
20
21
|
end
|
21
22
|
end
|
22
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class ScraperExporter < AnswersEngine::Client::Base
|
4
4
|
def all(scraper_name, opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/scrapers/#{scraper_name}/exporters", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def find(scraper_name, exporter_name)
|
@@ -11,4 +12,3 @@ module AnswersEngine
|
|
11
12
|
end
|
12
13
|
end
|
13
14
|
end
|
14
|
-
|
@@ -2,7 +2,8 @@ module AnswersEngine
|
|
2
2
|
module Client
|
3
3
|
class ScraperJob < AnswersEngine::Client::Base
|
4
4
|
def all(scraper_name, opts={})
|
5
|
-
|
5
|
+
params = @options.merge(opts)
|
6
|
+
self.class.get("/scrapers/#{scraper_name}/jobs", params)
|
6
7
|
end
|
7
8
|
|
8
9
|
def create(scraper_name, opts={})
|
@@ -10,8 +11,8 @@ module AnswersEngine
|
|
10
11
|
body[:standard_worker_count] = opts[:workers] if opts[:workers]
|
11
12
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
12
13
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
13
|
-
@options.merge
|
14
|
-
self.class.post("/scrapers/#{scraper_name}/jobs",
|
14
|
+
params = @options.merge({body: body.to_json})
|
15
|
+
self.class.post("/scrapers/#{scraper_name}/jobs", params)
|
15
16
|
end
|
16
17
|
|
17
18
|
def find(scraper_name)
|
@@ -24,9 +25,9 @@ module AnswersEngine
|
|
24
25
|
body[:standard_worker_count] = opts[:workers] if opts[:workers]
|
25
26
|
body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
|
26
27
|
body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
|
27
|
-
@options.merge
|
28
|
+
params = @options.merge({body: body.to_json})
|
28
29
|
|
29
|
-
self.class.put("/scrapers/#{scraper_name}/current_job",
|
30
|
+
self.class.put("/scrapers/#{scraper_name}/current_job", params)
|
30
31
|
end
|
31
32
|
|
32
33
|
def cancel(scraper_name, opts={})
|
@@ -46,4 +47,3 @@ module AnswersEngine
|
|
46
47
|
end
|
47
48
|
end
|
48
49
|
end
|
49
|
-
|
@@ -6,7 +6,8 @@ module AnswersEngine
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def all(scraper_name, opts={})
|
9
|
-
|
9
|
+
params = @options.merge(opts)
|
10
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/pages", params)
|
10
11
|
end
|
11
12
|
|
12
13
|
def update(scraper_name, gid, opts={})
|
@@ -15,19 +16,19 @@ module AnswersEngine
|
|
15
16
|
body[:priority] = opts[:priority] if opts[:priority]
|
16
17
|
body[:vars] = opts[:vars] if opts[:vars]
|
17
18
|
|
18
|
-
@options.merge
|
19
|
+
params = @options.merge({body: body.to_json})
|
19
20
|
|
20
|
-
self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}",
|
21
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/#{gid}", params)
|
21
22
|
end
|
22
23
|
|
23
|
-
def refetch(scraper_name, opts
|
24
|
-
|
25
|
-
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch",
|
24
|
+
def refetch(scraper_name, opts={})
|
25
|
+
params = @options.merge(opts)
|
26
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
|
26
27
|
end
|
27
28
|
|
28
|
-
def reparse(scraper_name, opts
|
29
|
-
|
30
|
-
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse",
|
29
|
+
def reparse(scraper_name, opts={})
|
30
|
+
params = @options.merge(opts)
|
31
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
|
31
32
|
end
|
32
33
|
|
33
34
|
def enqueue(scraper_name, method, url, opts={})
|
@@ -46,9 +47,9 @@ module AnswersEngine
|
|
46
47
|
body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
|
47
48
|
body[:cookie] = opts[:cookie] if opts[:cookie]
|
48
49
|
|
49
|
-
@options.merge
|
50
|
+
params = @options.merge({body: body.to_json})
|
50
51
|
|
51
|
-
self.class.post("/scrapers/#{scraper_name}/current_job/pages",
|
52
|
+
self.class.post("/scrapers/#{scraper_name}/current_job/pages", params)
|
52
53
|
end
|
53
54
|
|
54
55
|
end
|
@@ -51,6 +51,13 @@ module AnswersEngine
|
|
51
51
|
client.seeding_update(job_id, options)
|
52
52
|
end
|
53
53
|
|
54
|
+
def finisher_update(options={})
|
55
|
+
client = Client::Job.new()
|
56
|
+
job_id = options.fetch(:job_id)
|
57
|
+
|
58
|
+
client.finisher_update(job_id, options)
|
59
|
+
end
|
60
|
+
|
54
61
|
def init_global_page()
|
55
62
|
client = Client::GlobalPage.new()
|
56
63
|
client.find(gid)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module AnswersEngine
|
2
|
+
module Scraper
|
3
|
+
class Finisher
|
4
|
+
|
5
|
+
def self.exec_finisher(filename, job_id=nil, save=false)
|
6
|
+
extname = File.extname(filename)
|
7
|
+
case extname
|
8
|
+
when '.rb'
|
9
|
+
executor = RubyFinisherExecutor.new(filename: filename, job_id: job_id)
|
10
|
+
executor.exec_finisher(save)
|
11
|
+
else
|
12
|
+
puts "Unable to find a finisher executor for file type \"#{extname}\""
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module AnswersEngine
|
2
|
+
module Scraper
|
3
|
+
class RubyFinisherExecutor < Executor
|
4
|
+
attr_accessor :save
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
@filename = options.fetch(:filename) { raise "Filename is required"}
|
8
|
+
@job_id = options[:job_id]
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.exposed_methods
|
12
|
+
[
|
13
|
+
:outputs,
|
14
|
+
:save_outputs,
|
15
|
+
:find_output,
|
16
|
+
:find_outputs
|
17
|
+
].freeze
|
18
|
+
end
|
19
|
+
|
20
|
+
def exec_finisher(save=false)
|
21
|
+
@save = save
|
22
|
+
if save
|
23
|
+
puts "Executing finisher script"
|
24
|
+
else
|
25
|
+
puts "Trying finisher script"
|
26
|
+
end
|
27
|
+
|
28
|
+
eval_finisher_script(save)
|
29
|
+
end
|
30
|
+
|
31
|
+
def eval_finisher_script(save=false)
|
32
|
+
update_finisher_starting_status
|
33
|
+
|
34
|
+
proc = Proc.new do
|
35
|
+
outputs = []
|
36
|
+
|
37
|
+
begin
|
38
|
+
context = isolated_binding({
|
39
|
+
outputs: outputs,
|
40
|
+
job_id: job_id
|
41
|
+
})
|
42
|
+
eval_with_context filename, context
|
43
|
+
rescue SyntaxError => e
|
44
|
+
handle_error(e) if save
|
45
|
+
raise e
|
46
|
+
rescue => e
|
47
|
+
handle_error(e) if save
|
48
|
+
raise e
|
49
|
+
end
|
50
|
+
|
51
|
+
puts "=========== Finisher Executed ==========="
|
52
|
+
save_outputs(outputs)
|
53
|
+
update_finisher_done_status
|
54
|
+
end
|
55
|
+
proc.call
|
56
|
+
end
|
57
|
+
|
58
|
+
def save_type
|
59
|
+
:executing
|
60
|
+
end
|
61
|
+
|
62
|
+
def update_to_server(opts = {})
|
63
|
+
finisher_update(
|
64
|
+
job_id: opts[:job_id],
|
65
|
+
outputs: opts[:outputs],
|
66
|
+
finisher_status: opts[:status])
|
67
|
+
end
|
68
|
+
|
69
|
+
def update_finisher_starting_status
|
70
|
+
return unless save
|
71
|
+
|
72
|
+
response = finisher_update(
|
73
|
+
job_id: job_id,
|
74
|
+
finisher_status: :starting)
|
75
|
+
|
76
|
+
if response.code == 200
|
77
|
+
puts "Finisher Status Updated."
|
78
|
+
else
|
79
|
+
puts "Error: Unable to save Finisher Status to server: #{response.body}"
|
80
|
+
raise "Unable to save Finisher Status to server: #{response.body}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def update_finisher_done_status
|
85
|
+
return unless save
|
86
|
+
|
87
|
+
response = finisher_update(
|
88
|
+
job_id: job_id,
|
89
|
+
finisher_status: :done)
|
90
|
+
|
91
|
+
if response.code == 200
|
92
|
+
puts "Finisher Done."
|
93
|
+
else
|
94
|
+
puts "Error: Unable to save Finisher Done Status to server: #{response.body}"
|
95
|
+
raise "Unable to save Finisher Done Status to server: #{response.body}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def handle_error(e)
|
100
|
+
error = ["Finisher #{e.class}: #{e.to_s} (Job:#{job_id}",clean_backtrace(e.backtrace)].join("\n")
|
101
|
+
|
102
|
+
finisher_update(
|
103
|
+
job_id: job_id,
|
104
|
+
finisher_status: :failed,
|
105
|
+
log_error: error)
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
require "answersengine/plugin"
|
2
2
|
require "answersengine/scraper/parser"
|
3
3
|
require "answersengine/scraper/seeder"
|
4
|
+
require "answersengine/scraper/finisher"
|
4
5
|
require "answersengine/scraper/executor"
|
5
6
|
require "answersengine/scraper/ruby_parser_executor"
|
6
7
|
require "answersengine/scraper/ruby_seeder_executor"
|
8
|
+
require "answersengine/scraper/ruby_finisher_executor"
|
7
9
|
require "answersengine/client"
|
8
10
|
|
9
11
|
module AnswersEngine
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: answersengine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -189,6 +189,7 @@ files:
|
|
189
189
|
- exe/answersengine
|
190
190
|
- lib/answersengine.rb
|
191
191
|
- lib/answersengine/cli.rb
|
192
|
+
- lib/answersengine/cli/finisher.rb
|
192
193
|
- lib/answersengine/cli/global_page.rb
|
193
194
|
- lib/answersengine/cli/job.rb
|
194
195
|
- lib/answersengine/cli/job_output.rb
|
@@ -224,7 +225,9 @@ files:
|
|
224
225
|
- lib/answersengine/plugin/context_exposer.rb
|
225
226
|
- lib/answersengine/scraper.rb
|
226
227
|
- lib/answersengine/scraper/executor.rb
|
228
|
+
- lib/answersengine/scraper/finisher.rb
|
227
229
|
- lib/answersengine/scraper/parser.rb
|
230
|
+
- lib/answersengine/scraper/ruby_finisher_executor.rb
|
228
231
|
- lib/answersengine/scraper/ruby_parser_executor.rb
|
229
232
|
- lib/answersengine/scraper/ruby_seeder_executor.rb
|
230
233
|
- lib/answersengine/scraper/seeder.rb
|