answersengine 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CODE_OF_CONDUCT.md +1 -1
- data/LICENSE.txt +1 -1
- data/README.md +3 -4
- data/answersengine.gemspec +6 -12
- data/exe/answersengine +3 -2
- data/lib/answersengine.rb +20 -3
- metadata +14 -152
- data/examples/fetchtest/libraries/hello.rb +0 -9
- data/examples/fetchtest/libraries/hello_fail.rb +0 -10
- data/examples/fetchtest/parsers/failed.rb +0 -2
- data/examples/fetchtest/parsers/find_outputs.rb +0 -18
- data/examples/fetchtest/parsers/home.rb +0 -50
- data/examples/fetchtest/parsers/nested_fail.rb +0 -3
- data/examples/fetchtest/parsers/simple.rb +0 -14
- data/examples/fetchtest/seeders/csv_seeder.rb +0 -12
- data/examples/fetchtest/seeders/failed.rb +0 -1
- data/examples/fetchtest/seeders/list_of_urls.csv +0 -5
- data/examples/fetchtest/seeders/seed.rb +0 -28
- data/examples/fetchtest/seeders/test_reset_page.rb +0 -4
- data/lib/answersengine/cli.rb +0 -45
- data/lib/answersengine/cli/env_var.rb +0 -48
- data/lib/answersengine/cli/finisher.rb +0 -40
- data/lib/answersengine/cli/global_page.rb +0 -39
- data/lib/answersengine/cli/job.rb +0 -30
- data/lib/answersengine/cli/job_output.rb +0 -69
- data/lib/answersengine/cli/parser.rb +0 -64
- data/lib/answersengine/cli/scraper.rb +0 -185
- data/lib/answersengine/cli/scraper_deployment.rb +0 -24
- data/lib/answersengine/cli/scraper_export.rb +0 -51
- data/lib/answersengine/cli/scraper_exporter.rb +0 -40
- data/lib/answersengine/cli/scraper_finisher.rb +0 -20
- data/lib/answersengine/cli/scraper_job.rb +0 -75
- data/lib/answersengine/cli/scraper_job_var.rb +0 -48
- data/lib/answersengine/cli/scraper_page.rb +0 -203
- data/lib/answersengine/cli/scraper_var.rb +0 -48
- data/lib/answersengine/cli/seeder.rb +0 -40
- data/lib/answersengine/client.rb +0 -29
- data/lib/answersengine/client/auth_token.rb +0 -50
- data/lib/answersengine/client/backblaze_content.rb +0 -45
- data/lib/answersengine/client/base.rb +0 -55
- data/lib/answersengine/client/deploy_key.rb +0 -21
- data/lib/answersengine/client/env_var.rb +0 -28
- data/lib/answersengine/client/export.rb +0 -10
- data/lib/answersengine/client/global_page.rb +0 -18
- data/lib/answersengine/client/job.rb +0 -64
- data/lib/answersengine/client/job_export.rb +0 -10
- data/lib/answersengine/client/job_log.rb +0 -26
- data/lib/answersengine/client/job_output.rb +0 -19
- data/lib/answersengine/client/job_page.rb +0 -58
- data/lib/answersengine/client/job_stat.rb +0 -16
- data/lib/answersengine/client/scraper.rb +0 -57
- data/lib/answersengine/client/scraper_deployment.rb +0 -18
- data/lib/answersengine/client/scraper_export.rb +0 -22
- data/lib/answersengine/client/scraper_exporter.rb +0 -14
- data/lib/answersengine/client/scraper_finisher.rb +0 -16
- data/lib/answersengine/client/scraper_job.rb +0 -49
- data/lib/answersengine/client/scraper_job_output.rb +0 -19
- data/lib/answersengine/client/scraper_job_page.rb +0 -67
- data/lib/answersengine/client/scraper_job_var.rb +0 -28
- data/lib/answersengine/client/scraper_var.rb +0 -28
- data/lib/answersengine/plugin.rb +0 -6
- data/lib/answersengine/plugin/context_exposer.rb +0 -55
- data/lib/answersengine/scraper.rb +0 -18
- data/lib/answersengine/scraper/executor.rb +0 -373
- data/lib/answersengine/scraper/finisher.rb +0 -18
- data/lib/answersengine/scraper/parser.rb +0 -18
- data/lib/answersengine/scraper/ruby_finisher_executor.rb +0 -116
- data/lib/answersengine/scraper/ruby_parser_executor.rb +0 -200
- data/lib/answersengine/scraper/ruby_seeder_executor.rb +0 -120
- data/lib/answersengine/scraper/seeder.rb +0 -18
- data/lib/answersengine/version.rb +0 -3
@@ -1,24 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperDeployment < Thor
|
4
|
-
|
5
|
-
package_name "scraper deployment"
|
6
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
7
|
-
"#{basename} #{@package_name} #{command.usage}"
|
8
|
-
end
|
9
|
-
|
10
|
-
|
11
|
-
desc "list <scraper_name>", "List deployments on a scraper"
|
12
|
-
long_desc <<-LONGDESC
|
13
|
-
List deployments on a scraper.
|
14
|
-
LONGDESC
|
15
|
-
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
16
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
17
|
-
def list(scraper_name)
|
18
|
-
client = Client::ScraperDeployment.new(options)
|
19
|
-
puts "#{client.all(scraper_name)}"
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperExport < Thor
|
4
|
-
package_name "scraper export"
|
5
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
6
|
-
"#{basename} #{@package_name} #{command.usage}"
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "show <export_id>", "Show a scraper's export"
|
10
|
-
def show(export_id)
|
11
|
-
client = Client::ScraperExport.new(options)
|
12
|
-
puts "#{client.find(export_id)}"
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
desc "list", "Gets a list of exports"
|
17
|
-
long_desc <<-LONGDESC
|
18
|
-
List exports.
|
19
|
-
LONGDESC
|
20
|
-
option :scraper_name, :aliases => :s, type: :string, desc: 'Filter by a specific scraper_name'
|
21
|
-
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
22
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
23
|
-
def list()
|
24
|
-
if options[:scraper_name]
|
25
|
-
client = Client::ScraperExport.new(options)
|
26
|
-
puts "#{client.all(options[:scraper_name])}"
|
27
|
-
else
|
28
|
-
client = Client::Export.new(options)
|
29
|
-
puts "#{client.all}"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
desc "download <export_id>", "Download the exported file"
|
34
|
-
def download(export_id)
|
35
|
-
client = Client::ScraperExport.new(options)
|
36
|
-
result = JSON.parse(client.download(export_id).to_s)
|
37
|
-
|
38
|
-
if result['signed_url']
|
39
|
-
puts "Download url: \"#{result['signed_url']}\""
|
40
|
-
`open "#{result['signed_url']}"`
|
41
|
-
else
|
42
|
-
puts "Exported file does not exist"
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperExporter < Thor
|
4
|
-
package_name "scraper exporter"
|
5
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
6
|
-
"#{basename} #{@package_name} #{command.usage}"
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "show <scraper_name> <exporter_name>", "Show a scraper's exporter"
|
10
|
-
def show(scraper_name, exporter_name)
|
11
|
-
client = Client::ScraperExporter.new(options)
|
12
|
-
puts "#{client.find(scraper_name, exporter_name)}"
|
13
|
-
end
|
14
|
-
|
15
|
-
desc "start <scraper_name> <exporter_name>", "Starts an export"
|
16
|
-
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
17
|
-
def start(scraper_name, exporter_name)
|
18
|
-
if options[:job]
|
19
|
-
client = Client::JobExport.new(options)
|
20
|
-
puts "#{client.create(options[:job], exporter_name)}"
|
21
|
-
else
|
22
|
-
client = Client::ScraperExport.new(options)
|
23
|
-
puts "#{client.create(scraper_name, exporter_name)}"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
desc "list <scraper_name>", "gets a list of exporters on a scraper"
|
28
|
-
long_desc <<-LONGDESC
|
29
|
-
List exporters on a scraper.
|
30
|
-
LONGDESC
|
31
|
-
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
32
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
33
|
-
def list(scraper_name)
|
34
|
-
client = Client::ScraperExporter.new(options)
|
35
|
-
puts "#{client.all(scraper_name)}"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperFinisher < Thor
|
4
|
-
|
5
|
-
package_name "scraper finisher"
|
6
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
7
|
-
"#{basename} #{@package_name} #{command.usage}"
|
8
|
-
end
|
9
|
-
|
10
|
-
desc "reset <scraper_name>", "Reset finisher on a scraper's current job"
|
11
|
-
long_desc <<-LONGDESC
|
12
|
-
Reset finisher on a scraper's current job.\x5
|
13
|
-
LONGDESC
|
14
|
-
def reset(scraper_name)
|
15
|
-
client = Client::ScraperFinisher.new(options)
|
16
|
-
puts "#{client.reset(scraper_name)}"
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperJob < Thor
|
4
|
-
package_name "scraper job"
|
5
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
6
|
-
"#{basename} #{@package_name} #{command.usage}"
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "show <scraper_name>", "Show a scraper's current job"
|
10
|
-
def show(scraper_name)
|
11
|
-
client = Client::ScraperJob.new(options)
|
12
|
-
puts "#{client.find(scraper_name)}"
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
desc "list <scraper_name>", "gets a list of jobs on a scraper"
|
17
|
-
long_desc <<-LONGDESC
|
18
|
-
List jobs on a scraper.
|
19
|
-
LONGDESC
|
20
|
-
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
21
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
22
|
-
def list(scraper_name)
|
23
|
-
client = Client::ScraperJob.new(options)
|
24
|
-
puts "#{client.all(scraper_name)}"
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
desc "cancel <scraper_name>", "cancels a scraper's current job"
|
29
|
-
long_desc <<-LONGDESC
|
30
|
-
Cancels a scraper's current job
|
31
|
-
LONGDESC
|
32
|
-
def cancel(scraper_name)
|
33
|
-
client = Client::ScraperJob.new(options)
|
34
|
-
puts "#{client.cancel(scraper_name)}"
|
35
|
-
end
|
36
|
-
|
37
|
-
desc "resume <scraper_name>", "resumes a scraper's current job"
|
38
|
-
long_desc <<-LONGDESC
|
39
|
-
Resumes a scraper's current job
|
40
|
-
LONGDESC
|
41
|
-
def resume(scraper_name)
|
42
|
-
client = Client::ScraperJob.new(options)
|
43
|
-
puts "#{client.resume(scraper_name)}"
|
44
|
-
end
|
45
|
-
|
46
|
-
desc "pause <scraper_name>", "pauses a scraper's current job"
|
47
|
-
long_desc <<-LONGDESC
|
48
|
-
pauses a scraper's current job
|
49
|
-
LONGDESC
|
50
|
-
def pause(scraper_name)
|
51
|
-
client = Client::ScraperJob.new(options)
|
52
|
-
puts "#{client.pause(scraper_name)}"
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
desc "update <scraper_name>", "updates a scraper's current job"
|
57
|
-
long_desc <<-LONGDESC
|
58
|
-
Updates a scraper's current job.
|
59
|
-
LONGDESC
|
60
|
-
option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
|
61
|
-
option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
|
62
|
-
option :proxy_type, desc: 'Set the Proxy type. Default: standard'
|
63
|
-
def update(scraper_name)
|
64
|
-
client = Client::ScraperJob.new(options)
|
65
|
-
puts "#{client.update(scraper_name, options)}"
|
66
|
-
end
|
67
|
-
|
68
|
-
desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
|
69
|
-
subcommand "var", ScraperJobVar
|
70
|
-
|
71
|
-
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperJobVar < Thor
|
4
|
-
|
5
|
-
package_name "job var"
|
6
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
7
|
-
"#{basename} scraper #{@package_name} #{command.usage}"
|
8
|
-
end
|
9
|
-
|
10
|
-
desc "list <scraper_name>", "List environment variables on the scrape job"
|
11
|
-
long_desc <<-LONGDESC
|
12
|
-
List all environment variables on the scrape job.
|
13
|
-
LONGDESC
|
14
|
-
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
15
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
16
|
-
def list(scraper_name)
|
17
|
-
client = Client::ScraperJobVar.new(options)
|
18
|
-
puts "#{client.all(scraper_name)}"
|
19
|
-
end
|
20
|
-
|
21
|
-
desc "set <scraper_name> <var_name> <value>", "Set an environment var on the scrape job"
|
22
|
-
long_desc <<-LONGDESC
|
23
|
-
Creates an environment variable\x5
|
24
|
-
<var_name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your scrape job, otherwise it will be overwritten.\x5
|
25
|
-
<value>: Value of variable.\x5
|
26
|
-
LONGDESC
|
27
|
-
option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
|
28
|
-
def set(scraper_name, var_name, value)
|
29
|
-
# puts "options #{options}"
|
30
|
-
client = Client::ScraperJobVar.new(options)
|
31
|
-
puts "#{client.set(scraper_name, var_name, value, options)}"
|
32
|
-
end
|
33
|
-
|
34
|
-
desc "show <scraper_name> <var_name>", "Show an environment variable on the scrape job"
|
35
|
-
def show(scraper_name, var_name)
|
36
|
-
client = Client::ScraperJobVar.new(options)
|
37
|
-
puts "#{client.find(scraper_name, var_name)}"
|
38
|
-
end
|
39
|
-
|
40
|
-
desc "unset <scraper_name> <var_name>", "Deletes an environment variable on the scrape job"
|
41
|
-
def unset(scraper_name, var_name)
|
42
|
-
client = Client::ScraperJobVar.new(options)
|
43
|
-
puts "#{client.unset(scraper_name, var_name)}"
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|
@@ -1,203 +0,0 @@
|
|
1
|
-
module AnswersEngine
|
2
|
-
class CLI < Thor
|
3
|
-
class ScraperPage < Thor
|
4
|
-
|
5
|
-
package_name "scraper page"
|
6
|
-
def self.banner(command, namespace = nil, subcommand = false)
|
7
|
-
"#{basename} #{@package_name} #{command.usage}"
|
8
|
-
end
|
9
|
-
|
10
|
-
desc "list <scraper_name>", "List Pages on a scraper's current job"
|
11
|
-
long_desc <<-LONGDESC
|
12
|
-
List all pages in a scraper's current job.\x5
|
13
|
-
LONGDESC
|
14
|
-
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
15
|
-
option :page_type, :aliases => :t, type: :string, desc: 'Filter by page_type'
|
16
|
-
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
17
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
18
|
-
option :fetch_fail, type: :boolean, desc: 'Returns only pages that fails fetching.'
|
19
|
-
option :parse_fail, type: :boolean, desc: 'Returns only pages that fails parsing.'
|
20
|
-
def list(scraper_name)
|
21
|
-
if options[:job]
|
22
|
-
client = Client::JobPage.new(options)
|
23
|
-
puts "#{client.all(options[:job])}"
|
24
|
-
else
|
25
|
-
client = Client::ScraperJobPage.new(options)
|
26
|
-
puts "#{client.all(scraper_name)}"
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
desc "add <scraper_name> <url>", "Enqueues a page to a scraper's current job"
|
31
|
-
long_desc <<-LONGDESC
|
32
|
-
Enqueues a page to a scraper's current job\x5
|
33
|
-
LONGDESC
|
34
|
-
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
35
|
-
option :method, :aliases => :m, type: :string, desc: 'Set request method. Default: GET'
|
36
|
-
option :headers, :aliases => :H, type: :string, banner: :JSON, desc: 'Set request headers. Must be in json format. i.e: {"Foo":"bar"} '
|
37
|
-
option :cookie, :aliases => :c, type: :string, desc: 'Set request cookie.'
|
38
|
-
option :vars, :aliases => :v, type: :string, banner: :JSON, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
|
39
|
-
option :page_type, :aliases => :t, desc: 'Set page type'
|
40
|
-
option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
|
41
|
-
option :fetch_type, :aliases => :F, desc: 'Set fetch type. Default: http'
|
42
|
-
option :body, :aliases => :b, desc: 'Set request body'
|
43
|
-
option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
|
44
|
-
option :freshness, :aliases => :s, desc: 'Set how fresh the page cache is. Accepts timestap format.'
|
45
|
-
option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
|
46
|
-
option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
|
47
|
-
def add(scraper_name, url)
|
48
|
-
begin
|
49
|
-
options[:headers] = JSON.parse(options[:headers]) if options[:headers]
|
50
|
-
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
51
|
-
method = options[:method]
|
52
|
-
|
53
|
-
if options[:job]
|
54
|
-
client = Client::JobPage.new(options)
|
55
|
-
puts "#{client.enqueue(options[:job], method, url, options)}"
|
56
|
-
else
|
57
|
-
client = Client::ScraperJobPage.new(options)
|
58
|
-
puts "#{client.enqueue(scraper_name, method, url, options)}"
|
59
|
-
end
|
60
|
-
|
61
|
-
rescue JSON::ParserError
|
62
|
-
if options[:headers]
|
63
|
-
puts "Error: #{options[:headers]} on headers is not a valid JSON"
|
64
|
-
end
|
65
|
-
if options[:vars]
|
66
|
-
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
desc "update <scraper_name> <gid>", "Update a page in a scraper's current job"
|
73
|
-
long_desc <<-LONGDESC
|
74
|
-
Updates a page in a scraper's current job. Only page_type or page vars is updateable.\x5
|
75
|
-
LONGDESC
|
76
|
-
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
77
|
-
option :page_type, :aliases => :t, desc: 'Set page type'
|
78
|
-
option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
|
79
|
-
option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
|
80
|
-
def update(scraper_name, gid)
|
81
|
-
begin
|
82
|
-
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
83
|
-
|
84
|
-
if options[:job]
|
85
|
-
client = Client::JobPage.new(options)
|
86
|
-
puts "#{client.update(options[:job], gid, options)}"
|
87
|
-
else
|
88
|
-
client = Client::ScraperJobPage.new(options)
|
89
|
-
puts "#{client.update(scraper_name, gid, options)}"
|
90
|
-
end
|
91
|
-
|
92
|
-
rescue JSON::ParserError
|
93
|
-
if options[:vars]
|
94
|
-
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
|
100
|
-
long_desc <<-LONGDESC
|
101
|
-
Refetch pages in a scraper's current job. You need to specify either a --gid or --fetch-fail or --parse-fail or --status.\x5
|
102
|
-
LONGDESC
|
103
|
-
option :gid, :aliases => :g, type: :string, desc: 'Refetch a specific GID'
|
104
|
-
option :fetch_fail, type: :boolean, desc: 'Refetches only pages that fails fetching.'
|
105
|
-
option :parse_fail, type: :boolean, desc: 'Refetches only pages that fails parsing.'
|
106
|
-
option :status, type: :string, desc: 'Refetches only pages with a specific status.'
|
107
|
-
def refetch(scraper_name)
|
108
|
-
if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status)
|
109
|
-
puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
|
110
|
-
return
|
111
|
-
end
|
112
|
-
client = Client::ScraperJobPage.new(options)
|
113
|
-
puts "#{client.refetch(scraper_name)}"
|
114
|
-
end
|
115
|
-
|
116
|
-
desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
|
117
|
-
long_desc <<-LONGDESC
|
118
|
-
Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail or --status.\x5
|
119
|
-
LONGDESC
|
120
|
-
option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
|
121
|
-
option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
|
122
|
-
option :status, type: :string, desc: 'Reparse only pages with a specific status.'
|
123
|
-
def reparse(scraper_name)
|
124
|
-
begin
|
125
|
-
options[:vars] = JSON.parse(options[:vars]) if options[:vars]
|
126
|
-
|
127
|
-
if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
|
128
|
-
puts "Must specify either a --gid, --parse-fail or --status"
|
129
|
-
return
|
130
|
-
end
|
131
|
-
|
132
|
-
client = Client::ScraperJobPage.new(options)
|
133
|
-
puts "#{client.reparse(scraper_name)}"
|
134
|
-
|
135
|
-
rescue JSON::ParserError
|
136
|
-
if options[:vars]
|
137
|
-
puts "Error: #{options[:vars]} on vars is not a valid JSON"
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
desc "show <scraper_name> <gid>", "Show a page in scraper's current job"
|
143
|
-
long_desc <<-LONGDESC
|
144
|
-
Shows a page in a scraper's current job.\x5
|
145
|
-
LONGDESC
|
146
|
-
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
147
|
-
def show(scraper_name, gid)
|
148
|
-
if options[:job]
|
149
|
-
client = Client::JobPage.new(options)
|
150
|
-
puts "#{client.find(options[:job], gid)}"
|
151
|
-
else
|
152
|
-
client = Client::ScraperJobPage.new(options)
|
153
|
-
puts "#{client.find(scraper_name, gid)}"
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
desc "log <scraper_name> <gid>", "List log entries related to a job page"
|
158
|
-
long_desc <<-LONGDESC
|
159
|
-
Shows log related to a page in the job. Defaults to showing the most recent entries\x5
|
160
|
-
LONGDESC
|
161
|
-
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
162
|
-
option :head, :aliases => :H, desc: 'Show the oldest log entries. If not set, newest entries is shown'
|
163
|
-
option :parsing, :aliases => :p, type: :boolean, desc: 'Show only log entries related to parsing'
|
164
|
-
option :more, :aliases => :m, desc: 'Show next set of log entries. Enter the `More token`'
|
165
|
-
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 5000 per page.'
|
166
|
-
def log(scraper_name, gid)
|
167
|
-
client = Client::JobLog.new(options)
|
168
|
-
|
169
|
-
query = {}
|
170
|
-
query["order"] = options.delete(:head) if options[:head]
|
171
|
-
query["job_type"] = "parsing" if options[:parsing]
|
172
|
-
|
173
|
-
query["page_token"] = options.delete(:more) if options[:more]
|
174
|
-
query["per_page"] = options.delete(:per_page) if options[:per_page]
|
175
|
-
|
176
|
-
puts "Fetching page logs..."
|
177
|
-
|
178
|
-
if options[:job]
|
179
|
-
result = client.all_job_page_log(options[:job], gid, {query: query})
|
180
|
-
else
|
181
|
-
result = client.scraper_all_job_page_log(scraper_name, gid, {query: query})
|
182
|
-
end
|
183
|
-
|
184
|
-
if result['entries'].nil? || result["entries"].length == 0
|
185
|
-
puts "No logs yet, please try again later."
|
186
|
-
else
|
187
|
-
|
188
|
-
more_token = result["more_token"]
|
189
|
-
|
190
|
-
result["entries"].each do |entry|
|
191
|
-
puts "#{entry["timestamp"]} #{entry["severity"]}: #{entry["payload"]}" if entry.is_a?(Hash)
|
192
|
-
end
|
193
|
-
|
194
|
-
unless more_token.nil?
|
195
|
-
puts "to see more entries, add: \"--more #{more_token}\""
|
196
|
-
end
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
end
|