datahen 0.14.10 → 0.14.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/datahen/cli/global_page.rb +4 -1
- data/lib/datahen/cli/job.rb +1 -3
- data/lib/datahen/cli/scraper.rb +4 -0
- data/lib/datahen/cli/scraper_export.rb +4 -1
- data/lib/datahen/cli/scraper_job.rb +18 -2
- data/lib/datahen/cli/scraper_page.rb +39 -19
- data/lib/datahen/client/auth_token.rb +1 -1
- data/lib/datahen/client/base.rb +1 -0
- data/lib/datahen/client/job.rb +5 -0
- data/lib/datahen/client/job_page.rb +5 -0
- data/lib/datahen/client/scraper.rb +4 -0
- data/lib/datahen/client/scraper_job.rb +5 -0
- data/lib/datahen/client/scraper_job_page.rb +6 -1
- data/lib/datahen/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3cf1bf50610ab9ba523ca8f9ae1fdb307831cb56018036076f31353b357edfa
|
4
|
+
data.tar.gz: '0199ea98a2f171675168699adc523932418b9822ff156c71786458c6362b6cdb'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d66226573dbd9bd3ef795ce021eb9ee202b21cce1d0b79211093bb9abda6f1982a721bf8ae1fbc9bf84aae6e50d1878bf9d389ee67d685e4d1c440fd88cceeb
|
7
|
+
data.tar.gz: 6ea1a7748cf77ae1cb8f750da71df86120f68e671c84e535e8536ee3324ed71026034a042662d081108ff7b72ccb3f0ef3ab3f049bdb45c92fc0e86ba2e10d46
|
@@ -15,7 +15,10 @@ module Datahen
|
|
15
15
|
|
16
16
|
if result['available'] == true
|
17
17
|
puts "Preview content url: \"#{result['preview_url']}\""
|
18
|
-
|
18
|
+
begin
|
19
|
+
`open "#{result['preview_url']}"`
|
20
|
+
rescue
|
21
|
+
end
|
19
22
|
else
|
20
23
|
puts "Content does not exist"
|
21
24
|
end
|
data/lib/datahen/cli/job.rb
CHANGED
@@ -6,7 +6,6 @@ module Datahen
|
|
6
6
|
"#{basename} #{@package_name} #{command.usage}"
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
9
|
desc "list", "gets a list of jobs"
|
11
10
|
option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
|
12
11
|
option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
|
@@ -35,8 +34,7 @@ module Datahen
|
|
35
34
|
client = Client::JobStat.new(options)
|
36
35
|
puts "#{client.job_current_stats(job_id, options)}"
|
37
36
|
end
|
38
|
-
|
39
|
-
|
37
|
+
|
40
38
|
end
|
41
39
|
end
|
42
40
|
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -30,6 +30,8 @@ module Datahen
|
|
30
30
|
option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
|
31
31
|
option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
|
32
32
|
option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
|
33
|
+
option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
|
34
|
+
option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
|
33
35
|
def create(scraper_name, git_repository)
|
34
36
|
# puts "options #{options}"
|
35
37
|
client = Client::Scraper.new(options)
|
@@ -53,6 +55,8 @@ module Datahen
|
|
53
55
|
option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
|
54
56
|
option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
|
55
57
|
option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
|
58
|
+
option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
|
59
|
+
option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
|
56
60
|
def update(scraper_name)
|
57
61
|
client = Client::Scraper.new(options)
|
58
62
|
puts "#{client.update(scraper_name, options)}"
|
@@ -48,6 +48,21 @@ module Datahen
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
+
desc "delete <scraper_name>", "delete a scraper's current job"
|
52
|
+
long_desc <<-LONGDESC
|
53
|
+
Delete a scraper's current job
|
54
|
+
LONGDESC
|
55
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
56
|
+
def delete(scraper_name)
|
57
|
+
if options[:job]
|
58
|
+
client = Client::Job.new(options)
|
59
|
+
puts "#{client.delete(options[:job])}"
|
60
|
+
else
|
61
|
+
client = Client::ScraperJob.new(options)
|
62
|
+
puts "#{client.delete(scraper_name)}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
51
66
|
desc "resume <scraper_name>", "resumes a scraper's current job"
|
52
67
|
long_desc <<-LONGDESC
|
53
68
|
Resumes a scraper's current job
|
@@ -68,13 +83,14 @@ module Datahen
|
|
68
83
|
Pauses a scraper's current job
|
69
84
|
LONGDESC
|
70
85
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
86
|
+
option :force, :aliases => :f, type: :boolean, desc: 'Force a job to be paused from a done or cancelled status'
|
71
87
|
def pause(scraper_name)
|
72
88
|
if options[:job]
|
73
89
|
client = Client::Job.new(options)
|
74
|
-
puts "#{client.pause(options[:job])}"
|
90
|
+
puts "#{client.pause(options[:job], options)}"
|
75
91
|
else
|
76
92
|
client = Client::ScraperJob.new(options)
|
77
|
-
puts "#{client.pause(scraper_name)}"
|
93
|
+
puts "#{client.pause(scraper_name, options)}"
|
78
94
|
end
|
79
95
|
end
|
80
96
|
|
@@ -111,6 +111,7 @@ module Datahen
|
|
111
111
|
puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
|
112
112
|
return
|
113
113
|
end
|
114
|
+
|
114
115
|
if options[:job]
|
115
116
|
client = Client::JobPage.new(options)
|
116
117
|
puts "#{client.refetch(options[:job])}"
|
@@ -129,26 +130,39 @@ module Datahen
|
|
129
130
|
option :status, type: :string, desc: 'Reparse only pages with a specific status.'
|
130
131
|
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
131
132
|
def reparse(scraper_name)
|
132
|
-
|
133
|
-
|
133
|
+
if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
|
134
|
+
puts "Must specify either a --gid, --parse-fail or --status"
|
135
|
+
return
|
136
|
+
end
|
134
137
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
138
|
+
if options[:job]
|
139
|
+
client = Client::JobPage.new(options)
|
140
|
+
puts "#{client.reparse(options[:job])}"
|
141
|
+
else
|
142
|
+
client = Client::ScraperJobPage.new(options)
|
143
|
+
puts "#{client.reparse(scraper_name)}"
|
144
|
+
end
|
145
|
+
end
|
139
146
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
+
desc "limbo <scraper_name>", "Move pages on a scraper's current job to limbo"
|
148
|
+
long_desc <<-LONGDESC
|
149
|
+
Move pages in a scraper's current job to limbo. You need to specify either a --gid or --status.\x5
|
150
|
+
LONGDESC
|
151
|
+
option :gid, :aliases => :g, type: :string, desc: 'Move a specific GID to limbo'
|
152
|
+
option :status, type: :string, desc: 'Move pages with a specific status to limbo.'
|
153
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
154
|
+
def limbo(scraper_name)
|
155
|
+
if !options.key?(:gid) && !options.key?(:status)
|
156
|
+
puts "Must specify either a --gid or --status"
|
157
|
+
return
|
158
|
+
end
|
147
159
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
160
|
+
if options[:job]
|
161
|
+
client = Client::JobPage.new(options)
|
162
|
+
puts "#{client.limbo(options[:job])}"
|
163
|
+
else
|
164
|
+
client = Client::ScraperJobPage.new(options)
|
165
|
+
puts "#{client.limbo(scraper_name)}"
|
152
166
|
end
|
153
167
|
end
|
154
168
|
|
@@ -224,7 +238,10 @@ module Datahen
|
|
224
238
|
|
225
239
|
if result['available'] == true
|
226
240
|
puts "Preview content url: \"#{result['preview_url']}\""
|
227
|
-
|
241
|
+
begin
|
242
|
+
`open "#{result['preview_url']}"`
|
243
|
+
rescue
|
244
|
+
end
|
228
245
|
else
|
229
246
|
puts "Content does not exist"
|
230
247
|
end
|
@@ -244,7 +261,10 @@ module Datahen
|
|
244
261
|
|
245
262
|
if result['available'] == true
|
246
263
|
puts "Preview failed content url: \"#{result['preview_url']}\""
|
247
|
-
|
264
|
+
begin
|
265
|
+
`open "#{result['preview_url']}"`
|
266
|
+
rescue
|
267
|
+
end
|
248
268
|
else
|
249
269
|
puts "Failed Content does not exist"
|
250
270
|
end
|
data/lib/datahen/client/base.rb
CHANGED
@@ -58,6 +58,7 @@ module Datahen
|
|
58
58
|
query[:limit] = opts[:limit] if opts[:limit]
|
59
59
|
query[:order] = opts[:order] if opts[:order]
|
60
60
|
query[:filter] = opts[:filter] if opts[:filter]
|
61
|
+
query[:force] = opts[:force] if opts[:force]
|
61
62
|
|
62
63
|
if opts[:query]
|
63
64
|
if opts[:query].is_a?(Hash)
|
data/lib/datahen/client/job.rb
CHANGED
@@ -72,6 +72,11 @@ module Datahen
|
|
72
72
|
params = @options.merge(opts)
|
73
73
|
self.class.put("/jobs/#{job_id}/pages/refetch", params)
|
74
74
|
end
|
75
|
+
|
76
|
+
def limbo(job_id, opts={})
|
77
|
+
params = @options.merge(opts)
|
78
|
+
self.class.put("/jobs/#{job_id}/pages/limbo", params)
|
79
|
+
end
|
75
80
|
end
|
76
81
|
end
|
77
82
|
end
|
@@ -26,6 +26,8 @@ module Datahen
|
|
26
26
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
27
27
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
28
28
|
body[:profile] = opts[:profile] if opts[:profile]
|
29
|
+
body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
|
30
|
+
body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
|
29
31
|
params = @options.merge({body: body.to_json})
|
30
32
|
self.class.post("/scrapers", params)
|
31
33
|
end
|
@@ -45,6 +47,8 @@ module Datahen
|
|
45
47
|
body[:schedule] = opts[:schedule] if opts[:schedule]
|
46
48
|
body[:timezone] = opts[:timezone] if opts[:timezone]
|
47
49
|
body[:profile] = opts[:profile] if opts[:profile]
|
50
|
+
body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
|
51
|
+
body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
|
48
52
|
params = @options.merge({body: body.to_json})
|
49
53
|
|
50
54
|
self.class.put("/scrapers/#{scraper_name}", params)
|
@@ -55,6 +55,11 @@ module Datahen
|
|
55
55
|
|
56
56
|
self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
|
57
57
|
end
|
58
|
+
|
59
|
+
def delete(scraper_name, opts={})
|
60
|
+
params = @options.merge(opts)
|
61
|
+
self.class.delete("/scrapers/#{scraper_name}/current_job", params)
|
62
|
+
end
|
58
63
|
end
|
59
64
|
end
|
60
65
|
end
|
@@ -26,7 +26,7 @@ module Datahen
|
|
26
26
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
|
27
27
|
end
|
28
28
|
|
29
|
-
# Deprecated, please use Datahen::Client::
|
29
|
+
# Deprecated, please use Datahen::Client::JobPage#refetch instead.
|
30
30
|
#
|
31
31
|
# @note This method will be removed at some point in the future.
|
32
32
|
def refetch_by_job(job_id, opts={})
|
@@ -39,6 +39,11 @@ module Datahen
|
|
39
39
|
self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
|
40
40
|
end
|
41
41
|
|
42
|
+
def limbo(scraper_name, opts={})
|
43
|
+
params = @options.merge(opts)
|
44
|
+
self.class.put("/scrapers/#{scraper_name}/current_job/pages/limbo", params)
|
45
|
+
end
|
46
|
+
|
42
47
|
def enqueue(scraper_name, method, url, opts={})
|
43
48
|
body = {}
|
44
49
|
body[:method] = method != "" ? method : "GET"
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.14.
|
4
|
+
version: 0.14.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -266,7 +266,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
266
266
|
- !ruby/object:Gem::Version
|
267
267
|
version: '0'
|
268
268
|
requirements: []
|
269
|
-
rubygems_version: 3.
|
269
|
+
rubygems_version: 3.0.3
|
270
270
|
signing_key:
|
271
271
|
specification_version: 4
|
272
272
|
summary: DataHen toolbelt for developers
|