datahen 0.13.7 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datahen/cli/job.rb +14 -2
- data/lib/datahen/cli/scraper.rb +7 -5
- data/lib/datahen/cli/scraper_job.rb +10 -3
- data/lib/datahen/cli/scraper_page.rb +1 -1
- data/lib/datahen/client/base.rb +2 -0
- data/lib/datahen/client/job.rb +6 -2
- data/lib/datahen/client/job_stat.rb +24 -8
- data/lib/datahen/client/scraper_job.rb +6 -2
- data/lib/datahen/scraper/executor.rb +9 -3
- data/lib/datahen/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5f93e47c85c172722c4517fe4af0805d81387a440fb06922fb5d6e941994f66
|
4
|
+
data.tar.gz: 1c895d1e6ec3e8415202d581ed9fe48e3c0443f53355b0ea34c0a3418b4e306d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bc52173785501b7fe3ad3bd1de448d818033441eefb9a0526f68cb6a17595c4dda91e7f231501c4212892aaccda68e28ea2ac72c90ce5c89a7e78d06db5bf4b
|
7
|
+
data.tar.gz: d9fa5425007496e404258504127bedb1a7b786d609d9db15f6cdc485a2922aea5c7e8f25ca124142e0e41daadf22039d79c666f507029c3fc0c8388ca4a221df
|
data/lib/datahen/cli/job.rb
CHANGED
@@ -18,12 +18,24 @@ module Datahen
|
|
18
18
|
puts "#{client.all()}"
|
19
19
|
end
|
20
20
|
|
21
|
-
desc "show <job_id>", "Show a job"
|
21
|
+
desc "show <job_id>", "Show a job (Defaults to showing data from cached job)"
|
22
|
+
option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
|
22
23
|
def show(job_id)
|
23
24
|
client = Client::Job.new(options)
|
24
|
-
puts "#{client.find(job_id)}"
|
25
|
+
puts "#{client.find(job_id, options)}"
|
25
26
|
end
|
26
27
|
|
28
|
+
desc "stats <job_id>", "Get the stat for a job (Defaults to showing data from cached stats)"
|
29
|
+
long_desc <<-LONGDESC
|
30
|
+
Get stats for a scraper's current job\n
|
31
|
+
LONGDESC
|
32
|
+
option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
|
33
|
+
def stats(job_id)
|
34
|
+
client = Client::JobStat.new(options)
|
35
|
+
puts "#{client.job_current_stats(job_id, options)}"
|
36
|
+
end
|
37
|
+
|
38
|
+
|
27
39
|
end
|
28
40
|
end
|
29
41
|
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -140,17 +140,18 @@ module Datahen
|
|
140
140
|
end
|
141
141
|
end
|
142
142
|
|
143
|
-
desc "stats <scraper_name>", "Get the
|
143
|
+
desc "stats <scraper_name>", "Get the stat for a current job (Defaults to showing data from cached stats)"
|
144
144
|
long_desc <<-LONGDESC
|
145
145
|
Get stats for a scraper's current job\n
|
146
146
|
LONGDESC
|
147
147
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
148
|
+
option :live, type: :boolean, desc: 'Get data from the live stats, not cached stats.'
|
148
149
|
def stats(scraper_name)
|
149
150
|
client = Client::JobStat.new(options)
|
150
151
|
if options[:job]
|
151
|
-
puts "#{client.job_current_stats(options[:job])}"
|
152
|
+
puts "#{client.job_current_stats(options[:job], options)}"
|
152
153
|
else
|
153
|
-
puts "#{client.scraper_job_current_stats(scraper_name)}"
|
154
|
+
puts "#{client.scraper_job_current_stats(scraper_name, options)}"
|
154
155
|
end
|
155
156
|
end
|
156
157
|
|
@@ -163,12 +164,13 @@ module Datahen
|
|
163
164
|
option :"max-timestamp", type: :string, desc: 'Ending timestamp point in time to query historic stats (inclusive)'
|
164
165
|
option :"limit", type: :numeric, desc: 'Limit stats retrieved'
|
165
166
|
option :"order", type: :numeric, desc: 'Order stats by timestamp [DESC]'
|
167
|
+
option :live, type: :boolean, desc: 'Get data from the live stats history, not cached stats history.'
|
166
168
|
def history(scraper_name)
|
167
169
|
client = Client::JobStat.new(options)
|
168
170
|
if options[:job]
|
169
|
-
puts "#{client.job_stats_history(options[:job])}"
|
171
|
+
puts "#{client.job_stats_history(options[:job], options)}"
|
170
172
|
else
|
171
|
-
puts "#{client.scraper_job_stats_history(scraper_name)}"
|
173
|
+
puts "#{client.scraper_job_stats_history(scraper_name, options)}"
|
172
174
|
end
|
173
175
|
end
|
174
176
|
|
@@ -6,10 +6,17 @@ module Datahen
|
|
6
6
|
"#{basename} #{@package_name} #{command.usage}"
|
7
7
|
end
|
8
8
|
|
9
|
-
desc "show <scraper_name>", "Show a scraper's current job"
|
9
|
+
desc "show <scraper_name>", "Show a scraper's current job (Defaults to showing data from cached job)"
|
10
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
11
|
+
option :live, type: :boolean, desc: 'Get data from the live job, not cached job.'
|
10
12
|
def show(scraper_name)
|
11
|
-
|
12
|
-
|
13
|
+
if options[:job]
|
14
|
+
client = Client::Job.new(options)
|
15
|
+
puts "#{client.find(options[:job], options)}"
|
16
|
+
else
|
17
|
+
client = Client::ScraperJob.new(options)
|
18
|
+
puts "#{client.find(scraper_name, options)}"
|
19
|
+
end
|
13
20
|
end
|
14
21
|
|
15
22
|
|
@@ -230,7 +230,7 @@ module Datahen
|
|
230
230
|
end
|
231
231
|
end
|
232
232
|
|
233
|
-
desc "failedcontent <gid>", "Show a page's failed content in scraper's current job"
|
233
|
+
desc "failedcontent <scraper_name> <gid>", "Show a page's failed content in scraper's current job"
|
234
234
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
235
235
|
def failedcontent(scraper_name, gid)
|
236
236
|
result = nil
|
data/lib/datahen/client/base.rb
CHANGED
data/lib/datahen/client/job.rb
CHANGED
@@ -6,8 +6,12 @@ module Datahen
|
|
6
6
|
self.class.get("/jobs", params)
|
7
7
|
end
|
8
8
|
|
9
|
-
def find(job_id)
|
10
|
-
|
9
|
+
def find(job_id, opts={})
|
10
|
+
if opts[:live]
|
11
|
+
self.class.get("/jobs/#{job_id}", @options)
|
12
|
+
else
|
13
|
+
self.class.get("/cached/jobs/#{job_id}", @options)
|
14
|
+
end
|
11
15
|
end
|
12
16
|
|
13
17
|
def update(job_id, opts={})
|
@@ -2,20 +2,36 @@ module Datahen
|
|
2
2
|
module Client
|
3
3
|
class JobStat < Datahen::Client::Base
|
4
4
|
|
5
|
-
def job_current_stats(job_id)
|
6
|
-
|
5
|
+
def job_current_stats(job_id, opts={})
|
6
|
+
if opts[:live]
|
7
|
+
self.class.get("/jobs/#{job_id}/stats/current", @options)
|
8
|
+
else
|
9
|
+
self.class.get("/cached/jobs/#{job_id}/stats/current", @options)
|
10
|
+
end
|
7
11
|
end
|
8
12
|
|
9
|
-
def scraper_job_current_stats(scraper_name)
|
10
|
-
|
13
|
+
def scraper_job_current_stats(scraper_name, opts={})
|
14
|
+
if opts[:live]
|
15
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
|
16
|
+
else
|
17
|
+
self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/current", @options)
|
18
|
+
end
|
11
19
|
end
|
12
20
|
|
13
|
-
def job_stats_history(job_id)
|
14
|
-
|
21
|
+
def job_stats_history(job_id, opts={})
|
22
|
+
if opts[:live]
|
23
|
+
self.class.get("/jobs/#{job_id}/stats/history", @options)
|
24
|
+
else
|
25
|
+
self.class.get("/cached/jobs/#{job_id}/stats/history", @options)
|
26
|
+
end
|
15
27
|
end
|
16
28
|
|
17
|
-
def scraper_job_stats_history(scraper_name)
|
18
|
-
|
29
|
+
def scraper_job_stats_history(scraper_name, opts={})
|
30
|
+
if opts[:live]
|
31
|
+
self.class.get("/scrapers/#{scraper_name}/current_job/stats/history", @options)
|
32
|
+
else
|
33
|
+
self.class.get("/cached/scrapers/#{scraper_name}/current_job/stats/history", @options)
|
34
|
+
end
|
19
35
|
end
|
20
36
|
|
21
37
|
end
|
@@ -15,8 +15,12 @@ module Datahen
|
|
15
15
|
self.class.post("/scrapers/#{scraper_name}/jobs", params)
|
16
16
|
end
|
17
17
|
|
18
|
-
def find(scraper_name)
|
19
|
-
|
18
|
+
def find(scraper_name, opts={})
|
19
|
+
if opts[:live]
|
20
|
+
self.class.get("/scrapers/#{scraper_name}/current_job", @options)
|
21
|
+
else
|
22
|
+
self.class.get("/cached/scrapers/#{scraper_name}/current_job", @options)
|
23
|
+
end
|
20
24
|
end
|
21
25
|
|
22
26
|
def update(scraper_name, opts={})
|
@@ -60,7 +60,12 @@ module Datahen
|
|
60
60
|
|
61
61
|
def init_global_page()
|
62
62
|
client = Client::GlobalPage.new()
|
63
|
-
client.find(gid)
|
63
|
+
global_page = client.find(gid)
|
64
|
+
unless global_page.code == 200
|
65
|
+
raise "GID #{gid} not found. Aborting execution!"
|
66
|
+
else
|
67
|
+
global_page
|
68
|
+
end
|
64
69
|
end
|
65
70
|
|
66
71
|
def get_content(job_id, gid)
|
@@ -287,11 +292,12 @@ module Datahen
|
|
287
292
|
end
|
288
293
|
|
289
294
|
# behave differently if it is a real save
|
295
|
+
save_status = status
|
290
296
|
if save
|
291
297
|
log_msg = "Saving #{log_msgs.join(' and ')}."
|
292
298
|
puts "#{log_msg}"
|
293
299
|
else
|
294
|
-
|
300
|
+
save_status = "#{status}_try"
|
295
301
|
end
|
296
302
|
|
297
303
|
# saving to server
|
@@ -300,7 +306,7 @@ module Datahen
|
|
300
306
|
gid: gid,
|
301
307
|
pages: pages_slice,
|
302
308
|
outputs: outputs_slice,
|
303
|
-
status:
|
309
|
+
status: save_status)
|
304
310
|
|
305
311
|
if response.code == 200
|
306
312
|
if save
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -264,7 +264,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
264
264
|
- !ruby/object:Gem::Version
|
265
265
|
version: '0'
|
266
266
|
requirements: []
|
267
|
-
rubygems_version: 3.
|
267
|
+
rubygems_version: 3.1.2
|
268
268
|
signing_key:
|
269
269
|
specification_version: 4
|
270
270
|
summary: DataHen toolbelt for developers
|