datahen 0.14.0 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ff2ed2cd4772450c01e3e88248ae89441de709198fdd177d3e572bbc5f0e474
4
- data.tar.gz: 5701717fcba8a05b6f3e027d9bce33a3830fa20dabe3413255779899478cb4ab
3
+ metadata.gz: e81f515b9874f6b9cd2943873e7ce54249d0c4662833e4eb9133bb8827901a3f
4
+ data.tar.gz: 02f860429555067ab0d034c7e46164ee3537844c34f40ea0949300a2de294187
5
5
  SHA512:
6
- metadata.gz: 949ad06a090a4ac8c2ef5b4e053ed4b7668c051be15b6959a2948614e771c25e18774d9ee97fe1f5c03c130986b671a8b26ac253f592a993fa4ad393bcad7673
7
- data.tar.gz: b73cfc6c070314f97cbc7917d571de67031247aac42f3474b2e71d04e8b3d650fc380a0ce3ca65c1d8339bf8743d94b666ecccca4431f7b89df4e7485a03a382
6
+ metadata.gz: 8cffdec9fa56f0d5ee22215b0eb64c0f2de45b1a3f031729f9929625ccf69e270fea2e554a34d583de71e02c7bbc4f0acc82ca6ad5c0f3ea496176277afd8818
7
+ data.tar.gz: 66b108bc587b3381a6202877f7915653a4ee1c1e058eeeab192aa1ba3614d7843c8dbcb67a7f08d57c6131ac03398a20a09ac7f309acdd0c244f06c46aa84d30
@@ -60,7 +60,7 @@ module Datahen
60
60
  desc "show <scraper_name>", "Show a scraper"
61
61
  def show(scraper_name)
62
62
  client = Client::Scraper.new(options)
63
- puts "#{client.find(scraper_name, options)}"
63
+ puts "#{client.find(scraper_name)}"
64
64
  end
65
65
 
66
66
  desc "delete <scraper_name>", "Delete a scraper and related records"
@@ -164,12 +164,14 @@ module Datahen
164
164
  option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
165
165
  option :"limit", type: :numeric, desc: 'Limit stats retrieved'
166
166
  option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
167
+ option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
168
+ option :filter, type: :string, desc: 'Filter results on `day` or `hour`, if not specified will return all records.'
167
169
  def history(scraper_name)
168
170
  client = Client::JobStat.new(options)
169
171
  if options[:job]
170
- puts "#{client.job_stats_history(options[:job])}"
172
+ puts "#{client.job_stats_history(options[:job], options)}"
171
173
  else
172
- puts "#{client.scraper_job_stats_history(scraper_name)}"
174
+ puts "#{client.scraper_job_stats_history(scraper_name, options)}"
173
175
  end
174
176
  end
175
177
 
@@ -7,10 +7,16 @@ module Datahen
7
7
  end
8
8
 
9
9
  desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
10
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
11
  option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
11
12
  def show(scraper_name)
12
- client = Client::ScraperJob.new(options)
13
- puts "#{client.find(scraper_name, options)}"
13
+ if options[:job]
14
+ client = Client::Job.new(options)
15
+ puts "#{client.find(options[:job], options)}"
16
+ else
17
+ client = Client::ScraperJob.new(options)
18
+ puts "#{client.find(scraper_name, options)}"
19
+ end
14
20
  end
15
21
 
16
22
 
@@ -230,7 +230,7 @@ module Datahen
230
230
  end
231
231
  end
232
232
 
233
- desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
233
+ desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
234
234
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
235
235
  def failedcontent(scraper_name, gid)
236
236
  result = nil
@@ -5,6 +5,8 @@ module Datahen
5
5
  class Base
6
6
  include HTTParty
7
7
 
8
+ default_timeout 60
9
+
8
10
  def self.env_auth_token
9
11
  ENV['DATAHEN_TOKEN']
10
12
  end
@@ -55,6 +57,7 @@ module Datahen
55
57
  query[:"max-timestamp"] = opts[:"max-timestamp"] if opts[:"max-timestamp"]
56
58
  query[:limit] = opts[:limit] if opts[:limit]
57
59
  query[:order] = opts[:order] if opts[:order]
60
+ query[:filter] = opts[:filter] if opts[:filter]
58
61
 
59
62
  if opts[:query]
60
63
  if opts[:query].is_a?(Hash)
@@ -18,12 +18,20 @@ module Datahen
18
18
  end
19
19
  end
20
20
 
21
- def job_stats_history(job_id)
22
- self.class.get("/jobs/#{job_id}/stats/history", @options)
21
+ def job_stats_history(job_id, opts={})
22
+ if opts[:live]
23
+ self.class.get("/jobs/#{job_id}/stats/history", @options)
24
+ else
25
+ self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
26
+ end
23
27
  end
24
28
 
25
- def scraper_job_stats_history(scraper_name)
26
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
29
+ def scraper_job_stats_history(scraper_name, opts={})
30
+ if opts[:live]
31
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
32
+ else
33
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
34
+ end
27
35
  end
28
36
 
29
37
  end
@@ -60,7 +60,12 @@ module Datahen
60
60
 
61
61
  def init_global_page()
62
62
  client = Client::GlobalPage.new()
63
- client.find(gid)
63
+ global_page = client.find(gid)
64
+ unless global_page.code == 200
65
+ raise "GID #{gid} not found. Aborting execution!"
66
+ else
67
+ global_page
68
+ end
64
69
  end
65
70
 
66
71
  def get_content(job_id, gid)
@@ -287,11 +292,12 @@ module Datahen
287
292
  end
288
293
 
289
294
  # behave differently if it is a real save
295
+ save_status = status
290
296
  if save
291
297
  log_msg = "Saving #{log_msgs.join(' and ')}."
292
298
  puts "#{log_msg}"
293
299
  else
294
- status = "#{status}_try"
300
+ save_status = "#{status}_try"
295
301
  end
296
302
 
297
303
  # saving to server
@@ -300,7 +306,7 @@ module Datahen
300
306
  gid: gid,
301
307
  pages: pages_slice,
302
308
  outputs: outputs_slice,
303
- status: status)
309
+ status: save_status)
304
310
 
305
311
  if response.code == 200
306
312
  if save
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.0"
2
+ VERSION = "0.14.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.14.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-26 00:00:00.000000000 Z
11
+ date: 2020-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor