datahen 0.10.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +7 -0
  4. data/CODE_OF_CONDUCT.md +74 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +29 -0
  8. data/Rakefile +22 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/datahen.gemspec +47 -0
  12. data/examples/fetchtest/libraries/hello.rb +9 -0
  13. data/examples/fetchtest/libraries/hello_fail.rb +10 -0
  14. data/examples/fetchtest/parsers/failed.rb +2 -0
  15. data/examples/fetchtest/parsers/find_outputs.rb +18 -0
  16. data/examples/fetchtest/parsers/home.rb +50 -0
  17. data/examples/fetchtest/parsers/nested_fail.rb +3 -0
  18. data/examples/fetchtest/parsers/simple.rb +14 -0
  19. data/examples/fetchtest/seeders/csv_seeder.rb +12 -0
  20. data/examples/fetchtest/seeders/failed.rb +1 -0
  21. data/examples/fetchtest/seeders/list_of_urls.csv +5 -0
  22. data/examples/fetchtest/seeders/seed.rb +28 -0
  23. data/examples/fetchtest/seeders/test_reset_page.rb +4 -0
  24. data/exe/hen +3 -0
  25. data/lib/datahen.rb +5 -0
  26. data/lib/datahen/cli.rb +45 -0
  27. data/lib/datahen/cli/env_var.rb +48 -0
  28. data/lib/datahen/cli/finisher.rb +40 -0
  29. data/lib/datahen/cli/global_page.rb +39 -0
  30. data/lib/datahen/cli/job.rb +30 -0
  31. data/lib/datahen/cli/job_output.rb +69 -0
  32. data/lib/datahen/cli/parser.rb +64 -0
  33. data/lib/datahen/cli/scraper.rb +185 -0
  34. data/lib/datahen/cli/scraper_deployment.rb +24 -0
  35. data/lib/datahen/cli/scraper_export.rb +51 -0
  36. data/lib/datahen/cli/scraper_exporter.rb +40 -0
  37. data/lib/datahen/cli/scraper_finisher.rb +20 -0
  38. data/lib/datahen/cli/scraper_job.rb +75 -0
  39. data/lib/datahen/cli/scraper_job_var.rb +48 -0
  40. data/lib/datahen/cli/scraper_page.rb +203 -0
  41. data/lib/datahen/cli/scraper_var.rb +48 -0
  42. data/lib/datahen/cli/seeder.rb +40 -0
  43. data/lib/datahen/client.rb +29 -0
  44. data/lib/datahen/client/auth_token.rb +50 -0
  45. data/lib/datahen/client/backblaze_content.rb +45 -0
  46. data/lib/datahen/client/base.rb +69 -0
  47. data/lib/datahen/client/deploy_key.rb +21 -0
  48. data/lib/datahen/client/env_var.rb +28 -0
  49. data/lib/datahen/client/export.rb +10 -0
  50. data/lib/datahen/client/global_page.rb +18 -0
  51. data/lib/datahen/client/job.rb +64 -0
  52. data/lib/datahen/client/job_export.rb +10 -0
  53. data/lib/datahen/client/job_log.rb +26 -0
  54. data/lib/datahen/client/job_output.rb +19 -0
  55. data/lib/datahen/client/job_page.rb +58 -0
  56. data/lib/datahen/client/job_stat.rb +16 -0
  57. data/lib/datahen/client/scraper.rb +57 -0
  58. data/lib/datahen/client/scraper_deployment.rb +18 -0
  59. data/lib/datahen/client/scraper_export.rb +22 -0
  60. data/lib/datahen/client/scraper_exporter.rb +14 -0
  61. data/lib/datahen/client/scraper_finisher.rb +16 -0
  62. data/lib/datahen/client/scraper_job.rb +49 -0
  63. data/lib/datahen/client/scraper_job_output.rb +19 -0
  64. data/lib/datahen/client/scraper_job_page.rb +67 -0
  65. data/lib/datahen/client/scraper_job_var.rb +28 -0
  66. data/lib/datahen/client/scraper_var.rb +28 -0
  67. data/lib/datahen/plugin.rb +6 -0
  68. data/lib/datahen/plugin/context_exposer.rb +55 -0
  69. data/lib/datahen/scraper.rb +18 -0
  70. data/lib/datahen/scraper/executor.rb +373 -0
  71. data/lib/datahen/scraper/finisher.rb +18 -0
  72. data/lib/datahen/scraper/parser.rb +18 -0
  73. data/lib/datahen/scraper/ruby_finisher_executor.rb +116 -0
  74. data/lib/datahen/scraper/ruby_parser_executor.rb +200 -0
  75. data/lib/datahen/scraper/ruby_seeder_executor.rb +120 -0
  76. data/lib/datahen/scraper/seeder.rb +18 -0
  77. data/lib/datahen/version.rb +3 -0
  78. metadata +270 -0
@@ -0,0 +1,24 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperDeployment < Thor
4
+
5
+ package_name "scraper deployment"
6
+ def self.banner(command, namespace = nil, subcommand = false)
7
+ "#{basename} #{@package_name} #{command.usage}"
8
+ end
9
+
10
+
11
+ desc "list <scraper_name>", "List deployments on a scraper"
12
+ long_desc <<-LONGDESC
13
+ List deployments on a scraper.
14
+ LONGDESC
15
+ option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
16
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
17
+ def list(scraper_name)
18
+ client = Client::ScraperDeployment.new(options)
19
+ puts "#{client.all(scraper_name)}"
20
+ end
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,51 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperExport < Thor
4
+ package_name "scraper export"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "show <export_id>", "Show a scraper's export"
10
+ def show(export_id)
11
+ client = Client::ScraperExport.new(options)
12
+ puts "#{client.find(export_id)}"
13
+ end
14
+
15
+
16
+ desc "list", "Gets a list of exports"
17
+ long_desc <<-LONGDESC
18
+ List exports.
19
+ LONGDESC
20
+ option :scraper_name, :aliases => :s, type: :string, desc: 'Filter by a specific scraper_name'
21
+ option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
22
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
23
+ def list()
24
+ if options[:scraper_name]
25
+ client = Client::ScraperExport.new(options)
26
+ puts "#{client.all(options[:scraper_name])}"
27
+ else
28
+ client = Client::Export.new(options)
29
+ puts "#{client.all}"
30
+ end
31
+ end
32
+
33
+ desc "download <export_id>", "Download the exported file"
34
+ def download(export_id)
35
+ client = Client::ScraperExport.new(options)
36
+ result = JSON.parse(client.download(export_id).to_s)
37
+
38
+ if result['signed_url']
39
+ puts "Download url: \"#{result['signed_url']}\""
40
+ `open "#{result['signed_url']}"`
41
+ else
42
+ puts "Exported file does not exist"
43
+ end
44
+ end
45
+
46
+
47
+
48
+ end
49
+ end
50
+
51
+ end
@@ -0,0 +1,40 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperExporter < Thor
4
+ package_name "scraper exporter"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "show <scraper_name> <exporter_name>", "Show a scraper's exporter"
10
+ def show(scraper_name, exporter_name)
11
+ client = Client::ScraperExporter.new(options)
12
+ puts "#{client.find(scraper_name, exporter_name)}"
13
+ end
14
+
15
+ desc "start <scraper_name> <exporter_name>", "Starts an export"
16
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
17
+ def start(scraper_name, exporter_name)
18
+ if options[:job]
19
+ client = Client::JobExport.new(options)
20
+ puts "#{client.create(options[:job], exporter_name)}"
21
+ else
22
+ client = Client::ScraperExport.new(options)
23
+ puts "#{client.create(scraper_name, exporter_name)}"
24
+ end
25
+ end
26
+
27
+ desc "list <scraper_name>", "gets a list of exporters on a scraper"
28
+ long_desc <<-LONGDESC
29
+ List exporters on a scraper.
30
+ LONGDESC
31
+ option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
32
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
33
+ def list(scraper_name)
34
+ client = Client::ScraperExporter.new(options)
35
+ puts "#{client.all(scraper_name)}"
36
+ end
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,20 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperFinisher < Thor
4
+
5
+ package_name "scraper finisher"
6
+ def self.banner(command, namespace = nil, subcommand = false)
7
+ "#{basename} #{@package_name} #{command.usage}"
8
+ end
9
+
10
+ desc "reset <scraper_name>", "Reset finisher on a scraper's current job"
11
+ long_desc <<-LONGDESC
12
+ Reset finisher on a scraper's current job.\x5
13
+ LONGDESC
14
+ def reset(scraper_name)
15
+ client = Client::ScraperFinisher.new(options)
16
+ puts "#{client.reset(scraper_name)}"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,75 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperJob < Thor
4
+ package_name "scraper job"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "show <scraper_name>", "Show a scraper's current job"
10
+ def show(scraper_name)
11
+ client = Client::ScraperJob.new(options)
12
+ puts "#{client.find(scraper_name)}"
13
+ end
14
+
15
+
16
+ desc "list <scraper_name>", "gets a list of jobs on a scraper"
17
+ long_desc <<-LONGDESC
18
+ List jobs on a scraper.
19
+ LONGDESC
20
+ option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
21
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
22
+ def list(scraper_name)
23
+ client = Client::ScraperJob.new(options)
24
+ puts "#{client.all(scraper_name)}"
25
+ end
26
+
27
+
28
+ desc "cancel <scraper_name>", "cancels a scraper's current job"
29
+ long_desc <<-LONGDESC
30
+ Cancels a scraper's current job
31
+ LONGDESC
32
+ def cancel(scraper_name)
33
+ client = Client::ScraperJob.new(options)
34
+ puts "#{client.cancel(scraper_name)}"
35
+ end
36
+
37
+ desc "resume <scraper_name>", "resumes a scraper's current job"
38
+ long_desc <<-LONGDESC
39
+ Resumes a scraper's current job
40
+ LONGDESC
41
+ def resume(scraper_name)
42
+ client = Client::ScraperJob.new(options)
43
+ puts "#{client.resume(scraper_name)}"
44
+ end
45
+
46
+ desc "pause <scraper_name>", "pauses a scraper's current job"
47
+ long_desc <<-LONGDESC
48
+ pauses a scraper's current job
49
+ LONGDESC
50
+ def pause(scraper_name)
51
+ client = Client::ScraperJob.new(options)
52
+ puts "#{client.pause(scraper_name)}"
53
+ end
54
+
55
+
56
+ desc "update <scraper_name>", "updates a scraper's current job"
57
+ long_desc <<-LONGDESC
58
+ Updates a scraper's current job.
59
+ LONGDESC
60
+ option :workers, :aliases => :w, type: :numeric, desc: 'Set how many standard workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 1. '
61
+ option :browsers, type: :numeric, desc: 'Set how many browser workers to use. Scraper job must be restarted(paused then resumed, or cancelled then resumed) for it to take effect. Default: 0. '
62
+ option :proxy_type, desc: 'Set the Proxy type. Default: standard'
63
+ def update(scraper_name)
64
+ client = Client::ScraperJob.new(options)
65
+ puts "#{client.update(scraper_name, options)}"
66
+ end
67
+
68
+ desc "var SUBCOMMAND ...ARGS", "for managing scraper's job variables"
69
+ subcommand "var", ScraperJobVar
70
+
71
+
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,48 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperJobVar < Thor
4
+
5
+ package_name "job var"
6
+ def self.banner(command, namespace = nil, subcommand = false)
7
+ "#{basename} scraper #{@package_name} #{command.usage}"
8
+ end
9
+
10
+ desc "list <scraper_name>", "List environment variables on the scrape job"
11
+ long_desc <<-LONGDESC
12
+ List all environment variables on the scrape job.
13
+ LONGDESC
14
+ option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
15
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
16
+ def list(scraper_name)
17
+ client = Client::ScraperJobVar.new(options)
18
+ puts "#{client.all(scraper_name)}"
19
+ end
20
+
21
+ desc "set <scraper_name> <var_name> <value>", "Set an environment var on the scrape job"
22
+ long_desc <<-LONGDESC
23
+ Creates an environment variable\x5
24
+ <var_name>: Var name can only consist of alphabets, numbers, underscores. Name must be unique to your scrape job, otherwise it will be overwritten.\x5
25
+ <value>: Value of variable.\x5
26
+ LONGDESC
27
+ option :secret, type: :boolean, desc: 'Set true to make it decrypt the value. Default: false'
28
+ def set(scraper_name, var_name, value)
29
+ # puts "options #{options}"
30
+ client = Client::ScraperJobVar.new(options)
31
+ puts "#{client.set(scraper_name, var_name, value, options)}"
32
+ end
33
+
34
+ desc "show <scraper_name> <var_name>", "Show an environment variable on the scrape job"
35
+ def show(scraper_name, var_name)
36
+ client = Client::ScraperJobVar.new(options)
37
+ puts "#{client.find(scraper_name, var_name)}"
38
+ end
39
+
40
+ desc "unset <scraper_name> <var_name>", "Deletes an environment variable on the scrape job"
41
+ def unset(scraper_name, var_name)
42
+ client = Client::ScraperJobVar.new(options)
43
+ puts "#{client.unset(scraper_name, var_name)}"
44
+ end
45
+ end
46
+ end
47
+
48
+ end
@@ -0,0 +1,203 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperPage < Thor
4
+
5
+ package_name "scraper page"
6
+ def self.banner(command, namespace = nil, subcommand = false)
7
+ "#{basename} #{@package_name} #{command.usage}"
8
+ end
9
+
10
+ desc "list <scraper_name>", "List Pages on a scraper's current job"
11
+ long_desc <<-LONGDESC
12
+ List all pages in a scraper's current job.\x5
13
+ LONGDESC
14
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
15
+ option :page_type, :aliases => :t, type: :string, desc: 'Filter by page_type'
16
+ option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
17
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
18
+ option :fetch_fail, type: :boolean, desc: 'Returns only pages that fails fetching.'
19
+ option :parse_fail, type: :boolean, desc: 'Returns only pages that fails parsing.'
20
+ def list(scraper_name)
21
+ if options[:job]
22
+ client = Client::JobPage.new(options)
23
+ puts "#{client.all(options[:job])}"
24
+ else
25
+ client = Client::ScraperJobPage.new(options)
26
+ puts "#{client.all(scraper_name)}"
27
+ end
28
+ end
29
+
30
+ desc "add <scraper_name> <url>", "Enqueues a page to a scraper's current job"
31
+ long_desc <<-LONGDESC
32
+ Enqueues a page to a scraper's current job\x5
33
+ LONGDESC
34
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
35
+ option :method, :aliases => :m, type: :string, desc: 'Set request method. Default: GET'
36
+ option :headers, :aliases => :H, type: :string, banner: :JSON, desc: 'Set request headers. Must be in json format. i.e: {"Foo":"bar"} '
37
+ option :cookie, :aliases => :c, type: :string, desc: 'Set request cookie.'
38
+ option :vars, :aliases => :v, type: :string, banner: :JSON, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
39
+ option :page_type, :aliases => :t, desc: 'Set page type'
40
+ option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
41
+ option :fetch_type, :aliases => :F, desc: 'Set fetch type. Default: http'
42
+ option :body, :aliases => :b, desc: 'Set request body'
43
+ option :force_fetch, :aliases => :f, type: :boolean, desc: 'Set true to force fetch page that is not within freshness criteria. Default: false'
44
+ option :freshness, :aliases => :s, desc: 'Set how fresh the page cache is. Accepts timestap format.'
45
+ option :ua_type, :aliases => :u, desc: 'Set user agent type. Default: desktop'
46
+ option :no_redirect, :aliases => :n, type: :boolean, desc: 'Set true to not follow redirect. Default: false'
47
+ def add(scraper_name, url)
48
+ begin
49
+ options[:headers] = JSON.parse(options[:headers]) if options[:headers]
50
+ options[:vars] = JSON.parse(options[:vars]) if options[:vars]
51
+ method = options[:method]
52
+
53
+ if options[:job]
54
+ client = Client::JobPage.new(options)
55
+ puts "#{client.enqueue(options[:job], method, url, options)}"
56
+ else
57
+ client = Client::ScraperJobPage.new(options)
58
+ puts "#{client.enqueue(scraper_name, method, url, options)}"
59
+ end
60
+
61
+ rescue JSON::ParserError
62
+ if options[:headers]
63
+ puts "Error: #{options[:headers]} on headers is not a valid JSON"
64
+ end
65
+ if options[:vars]
66
+ puts "Error: #{options[:vars]} on vars is not a valid JSON"
67
+ end
68
+ end
69
+ end
70
+
71
+
72
+ desc "update <scraper_name> <gid>", "Update a page in a scraper's current job"
73
+ long_desc <<-LONGDESC
74
+ Updates a page in a scraper's current job. Only page_type or page vars is updateable.\x5
75
+ LONGDESC
76
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
77
+ option :page_type, :aliases => :t, desc: 'Set page type'
78
+ option :priority, type: :numeric, desc: 'Set fetch priority. The higher the value, the sooner the page gets fetched. Default: 0'
79
+ option :vars, :aliases => :v, type: :string, desc: 'Set user-defined page variables. Must be in json format. i.e: {"Foo":"bar"}'
80
+ def update(scraper_name, gid)
81
+ begin
82
+ options[:vars] = JSON.parse(options[:vars]) if options[:vars]
83
+
84
+ if options[:job]
85
+ client = Client::JobPage.new(options)
86
+ puts "#{client.update(options[:job], gid, options)}"
87
+ else
88
+ client = Client::ScraperJobPage.new(options)
89
+ puts "#{client.update(scraper_name, gid, options)}"
90
+ end
91
+
92
+ rescue JSON::ParserError
93
+ if options[:vars]
94
+ puts "Error: #{options[:vars]} on vars is not a valid JSON"
95
+ end
96
+ end
97
+ end
98
+
99
+ desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
100
+ long_desc <<-LONGDESC
101
+ Refetch pages in a scraper's current job. You need to specify either a --gid or --fetch-fail or --parse-fail or --status.\x5
102
+ LONGDESC
103
+ option :gid, :aliases => :g, type: :string, desc: 'Refetch a specific GID'
104
+ option :fetch_fail, type: :boolean, desc: 'Refetches only pages that fails fetching.'
105
+ option :parse_fail, type: :boolean, desc: 'Refetches only pages that fails parsing.'
106
+ option :status, type: :string, desc: 'Refetches only pages with a specific status.'
107
+ def refetch(scraper_name)
108
+ if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status)
109
+ puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
110
+ return
111
+ end
112
+ client = Client::ScraperJobPage.new(options)
113
+ puts "#{client.refetch(scraper_name)}"
114
+ end
115
+
116
+ desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
117
+ long_desc <<-LONGDESC
118
+ Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail or --status.\x5
119
+ LONGDESC
120
+ option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
121
+ option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
122
+ option :status, type: :string, desc: 'Reparse only pages with a specific status.'
123
+ def reparse(scraper_name)
124
+ begin
125
+ options[:vars] = JSON.parse(options[:vars]) if options[:vars]
126
+
127
+ if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
128
+ puts "Must specify either a --gid, --parse-fail or --status"
129
+ return
130
+ end
131
+
132
+ client = Client::ScraperJobPage.new(options)
133
+ puts "#{client.reparse(scraper_name)}"
134
+
135
+ rescue JSON::ParserError
136
+ if options[:vars]
137
+ puts "Error: #{options[:vars]} on vars is not a valid JSON"
138
+ end
139
+ end
140
+ end
141
+
142
+ desc "show <scraper_name> <gid>", "Show a page in scraper's current job"
143
+ long_desc <<-LONGDESC
144
+ Shows a page in a scraper's current job.\x5
145
+ LONGDESC
146
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
147
+ def show(scraper_name, gid)
148
+ if options[:job]
149
+ client = Client::JobPage.new(options)
150
+ puts "#{client.find(options[:job], gid)}"
151
+ else
152
+ client = Client::ScraperJobPage.new(options)
153
+ puts "#{client.find(scraper_name, gid)}"
154
+ end
155
+ end
156
+
157
+ desc "log <scraper_name> <gid>", "List log entries related to a job page"
158
+ long_desc <<-LONGDESC
159
+ Shows log related to a page in the job. Defaults to showing the most recent entries\x5
160
+ LONGDESC
161
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
162
+ option :head, :aliases => :H, desc: 'Show the oldest log entries. If not set, newest entries is shown'
163
+ option :parsing, :aliases => :p, type: :boolean, desc: 'Show only log entries related to parsing'
164
+ option :more, :aliases => :m, desc: 'Show next set of log entries. Enter the `More token`'
165
+ option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 5000 per page.'
166
+ def log(scraper_name, gid)
167
+ client = Client::JobLog.new(options)
168
+
169
+ query = {}
170
+ query["order"] = options.delete(:head) if options[:head]
171
+ query["job_type"] = "parsing" if options[:parsing]
172
+
173
+ query["page_token"] = options.delete(:more) if options[:more]
174
+ query["per_page"] = options.delete(:per_page) if options[:per_page]
175
+
176
+ puts "Fetching page logs..."
177
+
178
+ if options[:job]
179
+ result = client.all_job_page_log(options[:job], gid, {query: query})
180
+ else
181
+ result = client.scraper_all_job_page_log(scraper_name, gid, {query: query})
182
+ end
183
+
184
+ if result['entries'].nil? || result["entries"].length == 0
185
+ puts "No logs yet, please try again later."
186
+ else
187
+
188
+ more_token = result["more_token"]
189
+
190
+ result["entries"].each do |entry|
191
+ puts "#{entry["timestamp"]} #{entry["severity"]}: #{entry["payload"]}" if entry.is_a?(Hash)
192
+ end
193
+
194
+ unless more_token.nil?
195
+ puts "to see more entries, add: \"--more #{more_token}\""
196
+ end
197
+ end
198
+ end
199
+
200
+ end
201
+ end
202
+
203
+ end