datahen 0.14.0 → 0.14.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ff2ed2cd4772450c01e3e88248ae89441de709198fdd177d3e572bbc5f0e474
4
- data.tar.gz: 5701717fcba8a05b6f3e027d9bce33a3830fa20dabe3413255779899478cb4ab
3
+ metadata.gz: e81f515b9874f6b9cd2943873e7ce54249d0c4662833e4eb9133bb8827901a3f
4
+ data.tar.gz: 02f860429555067ab0d034c7e46164ee3537844c34f40ea0949300a2de294187
5
5
  SHA512:
6
- metadata.gz: 949ad06a090a4ac8c2ef5b4e053ed4b7668c051be15b6959a2948614e771c25e18774d9ee97fe1f5c03c130986b671a8b26ac253f592a993fa4ad393bcad7673
7
- data.tar.gz: b73cfc6c070314f97cbc7917d571de67031247aac42f3474b2e71d04e8b3d650fc380a0ce3ca65c1d8339bf8743d94b666ecccca4431f7b89df4e7485a03a382
6
+ metadata.gz: 8cffdec9fa56f0d5ee22215b0eb64c0f2de45b1a3f031729f9929625ccf69e270fea2e554a34d583de71e02c7bbc4f0acc82ca6ad5c0f3ea496176277afd8818
7
+ data.tar.gz: 66b108bc587b3381a6202877f7915653a4ee1c1e058eeeab192aa1ba3614d7843c8dbcb67a7f08d57c6131ac03398a20a09ac7f309acdd0c244f06c46aa84d30
@@ -60,7 +60,7 @@ module Datahen
60
60
  desc "show <scraper_name>", "Show a scraper"
61
61
  def show(scraper_name)
62
62
  client = Client::Scraper.new(options)
63
- puts "#{client.find(scraper_name, options)}"
63
+ puts "#{client.find(scraper_name)}"
64
64
  end
65
65
 
66
66
  desc "delete <scraper_name>", "Delete a scraper and related records"
@@ -164,12 +164,14 @@ module Datahen
164
164
  option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
165
165
  option :"limit", type: :numeric, desc: 'Limit stats retrieved'
166
166
  option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
167
+ option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
168
+ option :filter, type: :string, desc: 'Filter results on `day` or `hour`, if not specified will return all records.'
167
169
  def history(scraper_name)
168
170
  client = Client::JobStat.new(options)
169
171
  if options[:job]
170
- puts "#{client.job_stats_history(options[:job])}"
172
+ puts "#{client.job_stats_history(options[:job], options)}"
171
173
  else
172
- puts "#{client.scraper_job_stats_history(scraper_name)}"
174
+ puts "#{client.scraper_job_stats_history(scraper_name, options)}"
173
175
  end
174
176
  end
175
177
 
@@ -7,10 +7,16 @@ module Datahen
7
7
  end
8
8
 
9
9
  desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
10
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
10
11
  option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
11
12
  def show(scraper_name)
12
- client = Client::ScraperJob.new(options)
13
- puts "#{client.find(scraper_name, options)}"
13
+ if options[:job]
14
+ client = Client::Job.new(options)
15
+ puts "#{client.find(options[:job], options)}"
16
+ else
17
+ client = Client::ScraperJob.new(options)
18
+ puts "#{client.find(scraper_name, options)}"
19
+ end
14
20
  end
15
21
 
16
22
 
@@ -230,7 +230,7 @@ module Datahen
230
230
  end
231
231
  end
232
232
 
233
- desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
233
+ desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
234
234
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
235
235
  def failedcontent(scraper_name, gid)
236
236
  result = nil
@@ -5,6 +5,8 @@ module Datahen
5
5
  class Base
6
6
  include HTTParty
7
7
 
8
+ default_timeout 60
9
+
8
10
  def self.env_auth_token
9
11
  ENV['DATAHEN_TOKEN']
10
12
  end
@@ -55,6 +57,7 @@ module Datahen
55
57
  query[:"max-timestamp"] = opts[:"max-timestamp"] if opts[:"max-timestamp"]
56
58
  query[:limit] = opts[:limit] if opts[:limit]
57
59
  query[:order] = opts[:order] if opts[:order]
60
+ query[:filter] = opts[:filter] if opts[:filter]
58
61
 
59
62
  if opts[:query]
60
63
  if opts[:query].is_a?(Hash)
@@ -18,12 +18,20 @@ module Datahen
18
18
  end
19
19
  end
20
20
 
21
- def job_stats_history(job_id)
22
- self.class.get("/jobs/#{job_id}/stats/history", @options)
21
+ def job_stats_history(job_id, opts={})
22
+ if opts[:live]
23
+ self.class.get("/jobs/#{job_id}/stats/history", @options)
24
+ else
25
+ self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
26
+ end
23
27
  end
24
28
 
25
- def scraper_job_stats_history(scraper_name)
26
- self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
29
+ def scraper_job_stats_history(scraper_name, opts={})
30
+ if opts[:live]
31
+ self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
32
+ else
33
+ self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
34
+ end
27
35
  end
28
36
 
29
37
  end
@@ -60,7 +60,12 @@ module Datahen
60
60
 
61
61
  def init_global_page()
62
62
  client = Client::GlobalPage.new()
63
- client.find(gid)
63
+ global_page = client.find(gid)
64
+ unless global_page.code == 200
65
+ raise "GID #{gid} not found. Aborting execution!"
66
+ else
67
+ global_page
68
+ end
64
69
  end
65
70
 
66
71
  def get_content(job_id, gid)
@@ -287,11 +292,12 @@ module Datahen
287
292
  end
288
293
 
289
294
  # behave differently if it is a real save
295
+ save_status = status
290
296
  if save
291
297
  log_msg = "Saving #{log_msgs.join(' and ')}."
292
298
  puts "#{log_msg}"
293
299
  else
294
- status = "#{status}_try"
300
+ save_status = "#{status}_try"
295
301
  end
296
302
 
297
303
  # saving to server
@@ -300,7 +306,7 @@ module Datahen
300
306
  gid: gid,
301
307
  pages: pages_slice,
302
308
  outputs: outputs_slice,
303
- status: status)
309
+ status: save_status)
304
310
 
305
311
  if response.code == 200
306
312
  if save
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.0"
2
+ VERSION = "0.14.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.14.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-26 00:00:00.000000000 Z
11
+ date: 2020-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor