datahen 0.14.9 → 0.14.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datahen/cli/global_page.rb +4 -1
- data/lib/datahen/cli/job.rb +2 -3
- data/lib/datahen/cli/scraper.rb +4 -0
- data/lib/datahen/cli/scraper_export.rb +4 -1
- data/lib/datahen/cli/scraper_job.rb +19 -2
- data/lib/datahen/cli/scraper_page.rb +8 -2
- data/lib/datahen/client/auth_token.rb +1 -1
- data/lib/datahen/client/base.rb +1 -0
- data/lib/datahen/client/job.rb +5 -0
- data/lib/datahen/client/scraper.rb +4 -0
- data/lib/datahen/client/scraper_job.rb +5 -0
- data/lib/datahen/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 206ac0cf7cc5dc64deecff24f1d731f353951390c4c96d3cc6721a4dd0ecb258
|
4
|
+
data.tar.gz: 00d8e528649b2296d585d5008ebbf2de62d100449e5841f4f220147fcebc2484
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c66c9527f38db20a7f0ed589c560a2dbafdd038e1834b8552305cd665fec8063fab3c6dcd31eab9df07f2bc04ce5682272bea61772f0dfac20cf84f05b4be35
|
7
|
+
data.tar.gz: 6c327375e3e1eba2235194f0b58ab280b5ecbbb36fd0cf4808d9cf89b539240d0fead54ee8a4019bd0f196ddd2c7621b2ace8c1dd2da80ba6b665b1ca41a015f
|
@@ -15,7 +15,10 @@ module Datahen
|
|
15
15
|
|
16
16
|
if result['available'] == true
|
17
17
|
puts "Preview content url: \"#{result['preview_url']}\""
|
18
|
-
|
18
|
+
begin
|
19
|
+
`open "#{result['preview_url']}"`
|
20
|
+
rescue
|
21
|
+
end
|
19
22
|
else
|
20
23
|
puts "Content does not exist"
|
21
24
|
end
|
data/lib/datahen/cli/job.rb
CHANGED
@@ -6,10 +6,10 @@ module Datahen
|
|
6
6
|
"#{basename} #{@package_name} #{command.usage}"
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
9
|
desc "list", "gets a list of jobs"
|
11
10
|
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
12
11
|
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
12
|
+
option :status, type: :string, desc: 'Returns jobs with a certain status'
|
13
13
|
long_desc <<-LONGDESC
|
14
14
|
List scrape jobs.
|
15
15
|
LONGDESC
|
@@ -34,8 +34,7 @@ module Datahen
|
|
34
34
|
client = Client::JobStat.new(options)
|
35
35
|
puts "#{client.job_current_stats(job_id, options)}"
|
36
36
|
end
|
37
|
-
|
38
|
-
|
37
|
+
|
39
38
|
end
|
40
39
|
end
|
41
40
|
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -30,6 +30,8 @@ module Datahen
|
|
30
30
|
option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
|
31
31
|
option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
|
32
32
|
option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
|
33
|
+
option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
|
34
|
+
option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
|
33
35
|
def create(scraper_name, git_repository)
|
34
36
|
# puts "options #{options}"
|
35
37
|
client = Client::Scraper.new(options)
|
@@ -53,6 +55,8 @@ module Datahen
|
|
53
55
|
option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
|
54
56
|
option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
|
55
57
|
option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
|
58
|
+
option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
|
59
|
+
option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
|
56
60
|
def update(scraper_name)
|
57
61
|
client = Client::Scraper.new(options)
|
58
62
|
puts "#{client.update(scraper_name, options)}"
|
@@ -26,6 +26,7 @@ module Datahen
|
|
26
26
|
LONGDESC
|
27
27
|
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
28
28
|
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
29
|
+
option :status, type: :string, desc: 'Returns jobs with a certain status'
|
29
30
|
def list(scraper_name)
|
30
31
|
client = Client::ScraperJob.new(options)
|
31
32
|
puts "#{client.all(scraper_name)}"
|
@@ -47,6 +48,21 @@ module Datahen
|
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
51
|
+
desc "delete <scraper_name>", "delete a scraper's current job"
|
52
|
+
long_desc <<-LONGDESC
|
53
|
+
Delete a scraper's current job
|
54
|
+
LONGDESC
|
55
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
56
|
+
def delete(scraper_name)
|
57
|
+
if options[:job]
|
58
|
+
client = Client::Job.new(options)
|
59
|
+
puts "#{client.delete(options[:job])}"
|
60
|
+
else
|
61
|
+
client = Client::ScraperJob.new(options)
|
62
|
+
puts "#{client.delete(scraper_name)}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
50
66
|
desc "resume <scraper_name>", "resumes a scraper's current job"
|
51
67
|
long_desc <<-LONGDESC
|
52
68
|
Resumes a scraper's current job
|
@@ -67,13 +83,14 @@ module Datahen
|
|
67
83
|
Pauses a scraper's current job
|
68
84
|
LONGDESC
|
69
85
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
86
|
+
option :force, :aliases => :f, type: :boolean, desc: 'Force a job to be paused from a done or cancelled status'
|
70
87
|
def pause(scraper_name)
|
71
88
|
if options[:job]
|
72
89
|
client = Client::Job.new(options)
|
73
|
-
puts "#{client.pause(options[:job])}"
|
90
|
+
puts "#{client.pause(options[:job], options)}"
|
74
91
|
else
|
75
92
|
client = Client::ScraperJob.new(options)
|
76
|
-
puts "#{client.pause(scraper_name)}"
|
93
|
+
puts "#{client.pause(scraper_name, options)}"
|
77
94
|
end
|
78
95
|
end
|
79
96
|
|
@@ -224,7 +224,10 @@ module Datahen
|
|
224
224
|
|
225
225
|
if result['available'] == true
|
226
226
|
puts "Preview content url: \"#{result['preview_url']}\""
|
227
|
-
|
227
|
+
begin
|
228
|
+
`open "#{result['preview_url']}"`
|
229
|
+
rescue
|
230
|
+
end
|
228
231
|
else
|
229
232
|
puts "Content does not exist"
|
230
233
|
end
|
@@ -244,7 +247,10 @@ module Datahen
|
|
244
247
|
|
245
248
|
if result['available'] == true
|
246
249
|
puts "Preview failed content url: \"#{result['preview_url']}\""
|
247
|
-
|
250
|
+
begin
|
251
|
+
`open "#{result['preview_url']}"`
|
252
|
+
rescue
|
253
|
+
end
|
248
254
|
else
|
249
255
|
puts "Failed Content does not exist"
|
250
256
|
end
|
data/lib/datahen/client/base.rb
CHANGED
@@ -58,6 +58,7 @@ module Datahen
|
|
58
58
|
query[:limit] = opts[:limit] if opts[:limit]
|
59
59
|
query[:order] = opts[:order] if opts[:order]
|
60
60
|
query[:filter] = opts[:filter] if opts[:filter]
|
61
|
+
query[:force] = opts[:force] if opts[:force]
|
61
62
|
|
62
63
|
if opts[:query]
|
63
64
|
if opts[:query].is_a?(Hash)
|
data/lib/datahen/client/job.rb
CHANGED
@@ -26,6 +26,8 @@ module Datahen
|
|
26
26
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
27
27
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
28
28
|
body[:profile] = opts[:profile] if opts[:profile]
|
29
|
+
body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
|
30
|
+
body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
|
29
31
|
params = @options.merge({body: body.to_json})
|
30
32
|
self.class.post("/scrapers", params)
|
31
33
|
end
|
@@ -45,6 +47,8 @@ module Datahen
|
|
45
47
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
46
48
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
47
49
|
body[:profile] = opts[:profile] if opts[:profile]
|
50
|
+
body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
|
51
|
+
body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
|
48
52
|
params = @options.merge({body: body.to_json})
|
49
53
|
|
50
54
|
self.class.put("/scrapers/#{scraper_name}", params)
|
@@ -55,6 +55,11 @@ module Datahen
|
|
55
55
|
|
56
56
|
self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
|
57
57
|
end
|
58
|
+
|
59
|
+
def delete(scraper_name, opts={})
|
60
|
+
params = @options.merge(opts)
|
61
|
+
self.class.delete("/scrapers/#{scraper_name}/current_job", params)
|
62
|
+
end
|
58
63
|
end
|
59
64
|
end
|
60
65
|
end
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.14.
|
4
|
+
version: 0.14.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -266,7 +266,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
266
266
|
- !ruby/object:Gem::Version
|
267
267
|
version: '0'
|
268
268
|
requirements: []
|
269
|
-
rubygems_version: 3.
|
269
|
+
rubygems_version: 3.1.2
|
270
270
|
signing_key:
|
271
271
|
specification_version: 4
|
272
272
|
summary: DataHen toolbelt for developers
|