datahen 0.13.7 → 0.14.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c05f6ab973fe74a21e7f68411a66b97444575cc75c5812e99c5e0da4ffe05d56
4
- data.tar.gz: dac1d2be8f6281d3da328abd5f844036765991764bb1e3ffad4b72ee4c18eac6
3
+ metadata.gz: b5f93e47c85c172722c4517fe4af0805d81387a440fb06922fb5d6e941994f66
4
+ data.tar.gz: 1c895d1e6ec3e8415202d581ed9fe48e3c0443f53355b0ea34c0a3418b4e306d
5
5
  SHA512:
6
- metadata.gz: 48c15f9830308488d434dce7b7cf4888795724d88d4ca9de63cc2deca8397798b815145430af2143eb646366bfc4e97f37e563315214258b434276919e724ac7
7
- data.tar.gz: faee0369f81ab45dbb6c62258a6214d530d51d5dcb0f6b6ebd7028bda2c48ccbe630b41006ffcc96dba773a28e84e724b6b1f2266ef7866605e5423f1ced61d2
6
+ metadata.gz: 0bc52173785501b7fe3ad3bd1de448d818033441eefb9a0526f68cb6a17595c4dda91e7f231501c4212892aaccda68e28ea2ac72c90ce5c89a7e78d06db5bf4b
7
+ data.tar.gz: d9fa5425007496e404258504127bedb1a7b786d609d9db15f6cdc485a2922aea5c7e8f25ca124142e0e41daadf22039d79c666f507029c3fc0c8388ca4a221df
@@ -18,12 +18,24 @@ module Datahen
18
18
  puts "#{client.all()}"
19
19
  end
20
20
 
21
- desc "show <job_id>", "Show a job"
21
+ desc "show <job_id>", "Show a job (Defaults to showing data from cached job)"
22
+ option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
22
23
  def show(job_id)
23
24
  client = Client::Job.new(options)
24
- puts "#{client.find(job_id)}"
25
+ puts "#{client.find(job_id, options)}"
25
26
  end
26
27
 
28
+ desc "stats <job_id>", "Get the stat for a job (Defaults to showing data from cached stats)"
29
+ long_desc <<-LONGDESC
30
+ Get stats for a scraper's current job\n
31
+ LONGDESC
32
+ option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
33
+ def stats(job_id)
34
+ client = Client::JobStat.new(options)
35
+ puts "#{client.job_current_stats(job_id, options)}"
36
+ end
37
+
38
+
27
39
  end
28
40
  end
29
41
 
@@ -140,17 +140,18 @@ module Datahen
140
140
  end
141
141
  end
142
142
 
143
- desc "stats <scraper_name>", "Get the current stat for a job"
143
+ desc "stats <scraper_name>", "Get the stat for a current job (Defaults to showing data from cached stats)"
144
144
  long_desc <<-LONGDESC
145
145
  Get stats for a scraper's current job\n
146
146
  LONGDESC
147
147
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
148
+ option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
148
149
  def stats(scraper_name)
149
150
  client = Client::JobStat.new(options)
150
151
  if options[:job]
151
- puts "#{client.job_current_stats(options[:job])}"
152
+ puts "#{client.job_current_stats(options[:job], options)}"
152
153
  else
153
- puts "#{client.scraper_job_current_stats(scraper_name)}"
154
+ puts "#{client.scraper_job_current_stats(scraper_name, options)}"
154
155
  end
155
156
  end
156
157
 
@@ -163,12 +164,13 @@ module Datahen
163
164
  option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
164
165
  option :"limit", type: :numeric, desc: 'Limit stats retrieved'
165
166
  option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
167
+ option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
166
168
  def history(scraper_name)
167
169
  client = Client::JobStat.new(options)
168
170
  if options[:job]
169
- puts "#{client.job_stats_history(options[:job])}"
171
+ puts "#{client.job_stats_history(options[:job], options)}"
170
172
  else
171
- puts "#{client.scraper_job_stats_history(scraper_name)}"
173
+ puts "#{client.scraper_job_stats_history(scraper_name, options)}"
172
174
  end
173
175
  end
174
176
 
@@ -6,10 +6,17 @@ module Datahen
6
6
  "#{basename} #{@package_name} #{command.usage}"
7
7
  end
8
8
 
9
- desc "show <scraper_name>", "Show a scraper's current job"
9
+ desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
10
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
11
+ option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
10
12
  def show(scraper_name)
11
- client = Client::ScraperJob.new(options)
12
- puts "#{client.find(scraper_name)}"
13
+ if options[:job]
14
+ client = Client::Job.new(options)
15
+ puts "#{client.find(options[:job], options)}"
16
+ else
17
+ client = Client::ScraperJob.new(options)
18
+ puts "#{client.find(scraper_name, options)}"
19
+ end
13
20
  end
14
21
 
15
22
 
@@ -230,7 +230,7 @@ module Datahen
230
230
  end
231
231
  end
232
232
 
233
- desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
233
+ desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
234
234
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
235
235
  def failedcontent(scraper_name, gid)
236
236
  result = nil
@@ -5,6 +5,8 @@ module Datahen
5
5
  class Base
6
6
  include HTTParty
7
7
 
8
+ default_timeout 60
9
+
8
10
  def self.env_auth_token
9
11
  ENV['DATAHEN_TOKEN']
10
12
  end
@@ -6,8 +6,12 @@ module Datahen
6
6
  self.class.get("/jobs", params)
7
7
  end
8
8
 
9
- def find(job_id)
10
- self.class.get("/jobs/#{job_id}", @options)
9
+ def find(job_id, opts={})
10
+ if opts[:live]
11
+ self.class.get("/jobs/#{job_id}", @options)
12
+ else
13
+ self.class.get("/cached/jobs/#{job_id}", @options)
14
+ end
11
15
  end
12
16
 
13
17
  def update(job_id, opts={})
@@ -2,20 +2,36 @@ module Datahen
2
2
  module Client
3
3
  class JobStat < Datahen::Client::Base
4
4
 
5
- def job_current_stats(job_id)
6
- self.class.get("/jobs/#{job_id}/stats/current", @options)
5
+ def job_current_stats(job_id, opts={})
6
+ if opts[:live]
7
+ self.class.get("/jobs/#{job_id}/stats/current", @options)
8
+ else
9
+ self.class.get("/cached/jobs/#{job_id}/stats/current", @options)
10
+ end
7
11
  end
8
12
 
9
- def scraper_job_current_stats(scraper_name)
10
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
13
+ def scraper_job_current_stats(scraper_name, opts={})
14
+ if opts[:live]
15
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
16
+ else
17
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/current", @options)
18
+ end
11
19
  end
12
20
 
13
- def job_stats_history(job_id)
14
- self.class.get("/jobs/#{job_id}/stats/history", @options)
21
+ def job_stats_history(job_id, opts={})
22
+ if opts[:live]
23
+ self.class.get("/jobs/#{job_id}/stats/history", @options)
24
+ else
25
+ self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
26
+ end
15
27
  end
16
28
 
17
- def scraper_job_stats_history(scraper_name)
18
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
29
+ def scraper_job_stats_history(scraper_name, opts={})
30
+ if opts[:live]
31
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
32
+ else
33
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
34
+ end
19
35
  end
20
36
 
21
37
  end
@@ -15,8 +15,12 @@ module Datahen
15
15
  self.class.post("/scrapers/#{scraper_name}/jobs", params)
16
16
  end
17
17
 
18
- def find(scraper_name)
19
- self.class.get("/scrapers/#{scraper_name}/current_job", @options)
18
+ def find(scraper_name, opts={})
19
+ if opts[:live]
20
+ self.class.get("/scrapers/#{scraper_name}/current_job", @options)
21
+ else
22
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job", @options)
23
+ end
20
24
  end
21
25
 
22
26
  def update(scraper_name, opts={})
@@ -60,7 +60,12 @@ module Datahen
60
60
 
61
61
  def init_global_page()
62
62
  client = Client::GlobalPage.new()
63
- client.find(gid)
63
+ global_page = client.find(gid)
64
+ unless global_page.code == 200
65
+ raise "GID #{gid} not found. Aborting execution!"
66
+ else
67
+ global_page
68
+ end
64
69
  end
65
70
 
66
71
  def get_content(job_id, gid)
@@ -287,11 +292,12 @@ module Datahen
287
292
  end
288
293
 
289
294
  # behave differently if it is a real save
295
+ save_status = status
290
296
  if save
291
297
  log_msg = "Saving #{log_msgs.join(' and ')}."
292
298
  puts "#{log_msg}"
293
299
  else
294
- status = "#{status}_try"
300
+ save_status = "#{status}_try"
295
301
  end
296
302
 
297
303
  # saving to server
@@ -300,7 +306,7 @@ module Datahen
300
306
  gid: gid,
301
307
  pages: pages_slice,
302
308
  outputs: outputs_slice,
303
- status: status)
309
+ status: save_status)
304
310
 
305
311
  if response.code == 200
306
312
  if save
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.13.7"
2
+ VERSION = "0.14.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.7
4
+ version: 0.14.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-06 00:00:00.000000000 Z
11
+ date: 2020-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -264,7 +264,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
264
264
  - !ruby/object:Gem::Version
265
265
  version: '0'
266
266
  requirements: []
267
- rubygems_version: 3.0.3
267
+ rubygems_version: 3.1.2
268
268
  signing_key:
269
269
  specification_version: 4
270
270
  summary: DataHen toolbelt for developers