datahen 0.13.7 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c05f6ab973fe74a21e7f68411a66b97444575cc75c5812e99c5e0da4ffe05d56
4
- data.tar.gz: dac1d2be8f6281d3da328abd5f844036765991764bb1e3ffad4b72ee4c18eac6
3
+ metadata.gz: b5f93e47c85c172722c4517fe4af0805d81387a440fb06922fb5d6e941994f66
4
+ data.tar.gz: 1c895d1e6ec3e8415202d581ed9fe48e3c0443f53355b0ea34c0a3418b4e306d
5
5
  SHA512:
6
- metadata.gz: 48c15f9830308488d434dce7b7cf4888795724d88d4ca9de63cc2deca8397798b815145430af2143eb646366bfc4e97f37e563315214258b434276919e724ac7
7
- data.tar.gz: faee0369f81ab45dbb6c62258a6214d530d51d5dcb0f6b6ebd7028bda2c48ccbe630b41006ffcc96dba773a28e84e724b6b1f2266ef7866605e5423f1ced61d2
6
+ metadata.gz: 0bc52173785501b7fe3ad3bd1de448d818033441eefb9a0526f68cb6a17595c4dda91e7f231501c4212892aaccda68e28ea2ac72c90ce5c89a7e78d06db5bf4b
7
+ data.tar.gz: d9fa5425007496e404258504127bedb1a7b786d609d9db15f6cdc485a2922aea5c7e8f25ca124142e0e41daadf22039d79c666f507029c3fc0c8388ca4a221df
@@ -18,12 +18,24 @@ module Datahen
18
18
  puts "#{client.all()}"
19
19
  end
20
20
 
21
- desc "show <job_id>", "Show a job"
21
+ desc "show <job_id>", "Show a job (Defaults to showing data from cached job)"
22
+ option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
22
23
  def show(job_id)
23
24
  client = Client::Job.new(options)
24
- puts "#{client.find(job_id)}"
25
+ puts "#{client.find(job_id, options)}"
25
26
  end
26
27
 
28
+ desc "stats <job_id>", "Get the stat for a job (Defaults to showing data from cached stats)"
29
+ long_desc <<-LONGDESC
30
+ Get stats for a scraper's current job\n
31
+ LONGDESC
32
+ option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
33
+ def stats(job_id)
34
+ client = Client::JobStat.new(options)
35
+ puts "#{client.job_current_stats(job_id, options)}"
36
+ end
37
+
38
+
27
39
  end
28
40
  end
29
41
 
@@ -140,17 +140,18 @@ module Datahen
140
140
  end
141
141
  end
142
142
 
143
- desc "stats <scraper_name>", "Get the current stat for a job"
143
+ desc "stats <scraper_name>", "Get the stat for a current job (Defaults to showing data from cached stats)"
144
144
  long_desc <<-LONGDESC
145
145
  Get stats for a scraper's current job\n
146
146
  LONGDESC
147
147
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
148
+ option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
148
149
  def stats(scraper_name)
149
150
  client = Client::JobStat.new(options)
150
151
  if options[:job]
151
- puts "#{client.job_current_stats(options[:job])}"
152
+ puts "#{client.job_current_stats(options[:job], options)}"
152
153
  else
153
- puts "#{client.scraper_job_current_stats(scraper_name)}"
154
+ puts "#{client.scraper_job_current_stats(scraper_name, options)}"
154
155
  end
155
156
  end
156
157
 
@@ -163,12 +164,13 @@ module Datahen
163
164
  option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
164
165
  option :"limit", type: :numeric, desc: 'Limit stats retrieved'
165
166
  option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
167
+ option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
166
168
  def history(scraper_name)
167
169
  client = Client::JobStat.new(options)
168
170
  if options[:job]
169
- puts "#{client.job_stats_history(options[:job])}"
171
+ puts "#{client.job_stats_history(options[:job], options)}"
170
172
  else
171
- puts "#{client.scraper_job_stats_history(scraper_name)}"
173
+ puts "#{client.scraper_job_stats_history(scraper_name, options)}"
172
174
  end
173
175
  end
174
176
 
@@ -6,10 +6,17 @@ module Datahen
6
6
  "#{basename} #{@package_name} #{command.usage}"
7
7
  end
8
8
 
9
- desc "show <scraper_name>", "Show a scraper's current job"
9
+ desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
10
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
11
+ option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
10
12
  def show(scraper_name)
11
- client = Client::ScraperJob.new(options)
12
- puts "#{client.find(scraper_name)}"
13
+ if options[:job]
14
+ client = Client::Job.new(options)
15
+ puts "#{client.find(options[:job], options)}"
16
+ else
17
+ client = Client::ScraperJob.new(options)
18
+ puts "#{client.find(scraper_name, options)}"
19
+ end
13
20
  end
14
21
 
15
22
 
@@ -230,7 +230,7 @@ module Datahen
230
230
  end
231
231
  end
232
232
 
233
- desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
233
+ desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
234
234
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
235
235
  def failedcontent(scraper_name, gid)
236
236
  result = nil
@@ -5,6 +5,8 @@ module Datahen
5
5
  class Base
6
6
  include HTTParty
7
7
 
8
+ default_timeout 60
9
+
8
10
  def self.env_auth_token
9
11
  ENV['DATAHEN_TOKEN']
10
12
  end
@@ -6,8 +6,12 @@ module Datahen
6
6
  self.class.get("/jobs", params)
7
7
  end
8
8
 
9
- def find(job_id)
10
- self.class.get("/jobs/#{job_id}", @options)
9
+ def find(job_id, opts={})
10
+ if opts[:live]
11
+ self.class.get("/jobs/#{job_id}", @options)
12
+ else
13
+ self.class.get("/cached/jobs/#{job_id}", @options)
14
+ end
11
15
  end
12
16
 
13
17
  def update(job_id, opts={})
@@ -2,20 +2,36 @@ module Datahen
2
2
  module Client
3
3
  class JobStat < Datahen::Client::Base
4
4
 
5
- def job_current_stats(job_id)
6
- self.class.get("/jobs/#{job_id}/stats/current", @options)
5
+ def job_current_stats(job_id, opts={})
6
+ if opts[:live]
7
+ self.class.get("/jobs/#{job_id}/stats/current", @options)
8
+ else
9
+ self.class.get("/cached/jobs/#{job_id}/stats/current", @options)
10
+ end
7
11
  end
8
12
 
9
- def scraper_job_current_stats(scraper_name)
10
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
13
+ def scraper_job_current_stats(scraper_name, opts={})
14
+ if opts[:live]
15
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
16
+ else
17
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/current", @options)
18
+ end
11
19
  end
12
20
 
13
- def job_stats_history(job_id)
14
- self.class.get("/jobs/#{job_id}/stats/history", @options)
21
+ def job_stats_history(job_id, opts={})
22
+ if opts[:live]
23
+ self.class.get("/jobs/#{job_id}/stats/history", @options)
24
+ else
25
+ self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
26
+ end
15
27
  end
16
28
 
17
- def scraper_job_stats_history(scraper_name)
18
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
29
+ def scraper_job_stats_history(scraper_name, opts={})
30
+ if opts[:live]
31
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
32
+ else
33
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
34
+ end
19
35
  end
20
36
 
21
37
  end
@@ -15,8 +15,12 @@ module Datahen
15
15
  self.class.post("/scrapers/#{scraper_name}/jobs", params)
16
16
  end
17
17
 
18
- def find(scraper_name)
19
- self.class.get("/scrapers/#{scraper_name}/current_job", @options)
18
+ def find(scraper_name, opts={})
19
+ if opts[:live]
20
+ self.class.get("/scrapers/#{scraper_name}/current_job", @options)
21
+ else
22
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job", @options)
23
+ end
20
24
  end
21
25
 
22
26
  def update(scraper_name, opts={})
@@ -60,7 +60,12 @@ module Datahen
60
60
 
61
61
  def init_global_page()
62
62
  client = Client::GlobalPage.new()
63
- client.find(gid)
63
+ global_page = client.find(gid)
64
+ unless global_page.code == 200
65
+ raise "GID #{gid} not found. Aborting execution!"
66
+ else
67
+ global_page
68
+ end
64
69
  end
65
70
 
66
71
  def get_content(job_id, gid)
@@ -287,11 +292,12 @@ module Datahen
287
292
  end
288
293
 
289
294
  # behave differently if it is a real save
295
+ save_status = status
290
296
  if save
291
297
  log_msg = "Saving #{log_msgs.join(' and ')}."
292
298
  puts "#{log_msg}"
293
299
  else
294
- status = "#{status}_try"
300
+ save_status = "#{status}_try"
295
301
  end
296
302
 
297
303
  # saving to server
@@ -300,7 +306,7 @@ module Datahen
300
306
  gid: gid,
301
307
  pages: pages_slice,
302
308
  outputs: outputs_slice,
303
- status: status)
309
+ status: save_status)
304
310
 
305
311
  if response.code == 200
306
312
  if save
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.13.7"
2
+ VERSION = "0.14.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.7
4
+ version: 0.14.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-06 00:00:00.000000000 Z
11
+ date: 2020-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -264,7 +264,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
264
264
  - !ruby/object:Gem::Version
265
265
  version: '0'
266
266
  requirements: []
267
- rubygems_version: 3.0.3
267
+ rubygems_version: 3.1.2
268
268
  signing_key:
269
269
  specification_version: 4
270
270
  summary: DataHen toolbelt for developers