datahen 0.14.10 → 0.14.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 68b2f97e2b71ab8e79edacc9fa0b719c1d4b93bac684631ab084e46f44407795
4
- data.tar.gz: 2e4a453502ac9a72c7cbea81b3f36100373adf9894e5c2ca4250cb1fa709c548
3
+ metadata.gz: a3cf1bf50610ab9ba523ca8f9ae1fdb307831cb56018036076f31353b357edfa
4
+ data.tar.gz: '0199ea98a2f171675168699adc523932418b9822ff156c71786458c6362b6cdb'
5
5
  SHA512:
6
- metadata.gz: e73e87d091b590fd1efc4c31d5589b930e0fae458a8b8e5439c47dfae008c1db9186672f49ddcf41e7a0c32e31398b1703b45f1cde81a3a4e397c8f16008a4ea
7
- data.tar.gz: 52fd87fe09082f7ebd3a6e96ce0fc699b8a474ceb867a2187f21fb54431ac91f4cffdaf3a0ede51feb2a18fa9085a13a00048ad83a362d63399f2c4b87739c78
6
+ metadata.gz: 8d66226573dbd9bd3ef795ce021eb9ee202b21cce1d0b79211093bb9abda6f1982a721bf8ae1fbc9bf84aae6e50d1878bf9d389ee67d685e4d1c440fd88cceeb
7
+ data.tar.gz: 6ea1a7748cf77ae1cb8f750da71df86120f68e671c84e535e8536ee3324ed71026034a042662d081108ff7b72ccb3f0ef3ab3f049bdb45c92fc0e86ba2e10d46
@@ -15,7 +15,10 @@ module Datahen
15
15
 
16
16
  if result['available'] == true
17
17
  puts "Preview content url: \"#{result['preview_url']}\""
18
- `open "#{result['preview_url']}"`
18
+ begin
19
+ `open "#{result['preview_url']}"`
20
+ rescue
21
+ end
19
22
  else
20
23
  puts "Content does not exist"
21
24
  end
@@ -6,7 +6,6 @@ module Datahen
6
6
  "#{basename} #{@package_name} #{command.usage}"
7
7
  end
8
8
 
9
-
10
9
  desc "list", "gets a list of jobs"
11
10
  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
12
11
  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
@@ -35,8 +34,7 @@ module Datahen
35
34
  client = Client::JobStat.new(options)
36
35
  puts "#{client.job_current_stats(job_id, options)}"
37
36
  end
38
-
39
-
37
+
40
38
  end
41
39
  end
42
40
 
@@ -30,6 +30,8 @@ module Datahen
30
30
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
31
31
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
32
32
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
33
+ option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
34
+ option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
33
35
  def create(scraper_name, git_repository)
34
36
  # puts "options #{options}"
35
37
  client = Client::Scraper.new(options)
@@ -53,6 +55,8 @@ module Datahen
53
55
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
54
56
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
55
57
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
58
+ option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
59
+ option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
56
60
  def update(scraper_name)
57
61
  client = Client::Scraper.new(options)
58
62
  puts "#{client.update(scraper_name, options)}"
@@ -36,7 +36,10 @@ module Datahen
36
36
 
37
37
  if result['signed_url']
38
38
  puts "Download url: \"#{result['signed_url']}\""
39
- `open "#{result['signed_url']}"`
39
+ begin
40
+ `open "#{result['signed_url']}"`
41
+ rescue
42
+ end
40
43
  else
41
44
  puts "Exported file does not exist"
42
45
  end
@@ -48,6 +48,21 @@ module Datahen
48
48
  end
49
49
  end
50
50
 
51
+ desc "delete <scraper_name>", "delete a scraper's current job"
52
+ long_desc <<-LONGDESC
53
+ Delete a scraper's current job
54
+ LONGDESC
55
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
56
+ def delete(scraper_name)
57
+ if options[:job]
58
+ client = Client::Job.new(options)
59
+ puts "#{client.delete(options[:job])}"
60
+ else
61
+ client = Client::ScraperJob.new(options)
62
+ puts "#{client.delete(scraper_name)}"
63
+ end
64
+ end
65
+
51
66
  desc "resume <scraper_name>", "resumes a scraper's current job"
52
67
  long_desc <<-LONGDESC
53
68
  Resumes a scraper's current job
@@ -68,13 +83,14 @@ module Datahen
68
83
  Pauses a scraper's current job
69
84
  LONGDESC
70
85
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
86
+ option :force, :aliases => :f, type: :boolean, desc: 'Force a job to be paused from a done or cancelled status'
71
87
  def pause(scraper_name)
72
88
  if options[:job]
73
89
  client = Client::Job.new(options)
74
- puts "#{client.pause(options[:job])}"
90
+ puts "#{client.pause(options[:job], options)}"
75
91
  else
76
92
  client = Client::ScraperJob.new(options)
77
- puts "#{client.pause(scraper_name)}"
93
+ puts "#{client.pause(scraper_name, options)}"
78
94
  end
79
95
  end
80
96
 
@@ -111,6 +111,7 @@ module Datahen
111
111
  puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
112
112
  return
113
113
  end
114
+
114
115
  if options[:job]
115
116
  client = Client::JobPage.new(options)
116
117
  puts "#{client.refetch(options[:job])}"
@@ -129,26 +130,39 @@ module Datahen
129
130
  option :status, type: :string, desc: 'Reparse only pages with a specific status.'
130
131
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
131
132
  def reparse(scraper_name)
132
- begin
133
- options[:vars] = JSON.parse(options[:vars]) if options[:vars]
133
+ if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
134
+ puts "Must specify either a --gid, --parse-fail or --status"
135
+ return
136
+ end
134
137
 
135
- if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
136
- puts "Must specify either a --gid, --parse-fail or --status"
137
- return
138
- end
138
+ if options[:job]
139
+ client = Client::JobPage.new(options)
140
+ puts "#{client.reparse(options[:job])}"
141
+ else
142
+ client = Client::ScraperJobPage.new(options)
143
+ puts "#{client.reparse(scraper_name)}"
144
+ end
145
+ end
139
146
 
140
- if options[:job]
141
- client = Client::JobPage.new(options)
142
- puts "#{client.reparse(options[:job])}"
143
- else
144
- client = Client::ScraperJobPage.new(options)
145
- puts "#{client.reparse(scraper_name)}"
146
- end
147
+ desc "limbo <scraper_name>", "Move pages on a scraper's current job to limbo"
148
+ long_desc <<-LONGDESC
149
+ Move pages in a scraper's current job to limbo. You need to specify either a --gid or --status.\x5
150
+ LONGDESC
151
+ option :gid, :aliases => :g, type: :string, desc: 'Move a specific GID to limbo'
152
+ option :status, type: :string, desc: 'Move pages with a specific status to limbo.'
153
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
154
+ def limbo(scraper_name)
155
+ if !options.key?(:gid) && !options.key?(:status)
156
+ puts "Must specify either a --gid or --status"
157
+ return
158
+ end
147
159
 
148
- rescue JSON::ParserError
149
- if options[:vars]
150
- puts "Error: #{options[:vars]} on vars is not a valid JSON"
151
- end
160
+ if options[:job]
161
+ client = Client::JobPage.new(options)
162
+ puts "#{client.limbo(options[:job])}"
163
+ else
164
+ client = Client::ScraperJobPage.new(options)
165
+ puts "#{client.limbo(scraper_name)}"
152
166
  end
153
167
  end
154
168
 
@@ -224,7 +238,10 @@ module Datahen
224
238
 
225
239
  if result['available'] == true
226
240
  puts "Preview content url: \"#{result['preview_url']}\""
227
- `open "#{result['preview_url']}"`
241
+ begin
242
+ `open "#{result['preview_url']}"`
243
+ rescue
244
+ end
228
245
  else
229
246
  puts "Content does not exist"
230
247
  end
@@ -244,7 +261,10 @@ module Datahen
244
261
 
245
262
  if result['available'] == true
246
263
  puts "Preview failed content url: \"#{result['preview_url']}\""
247
- `open "#{result['preview_url']}"`
264
+ begin
265
+ `open "#{result['preview_url']}"`
266
+ rescue
267
+ end
248
268
  else
249
269
  puts "Failed Content does not exist"
250
270
  end
@@ -16,7 +16,7 @@ module Datahen
16
16
  role: role,
17
17
  description: description}
18
18
 
19
- params = @options.merge({body: body.to_json})
19
+ params = @options.merge({body: body.to_json}).merge(opts)
20
20
  self.class.post("/auth_tokens", params)
21
21
  end
22
22
 
@@ -58,6 +58,7 @@ module Datahen
58
58
  query[:limit] = opts[:limit] if opts[:limit]
59
59
  query[:order] = opts[:order] if opts[:order]
60
60
  query[:filter] = opts[:filter] if opts[:filter]
61
+ query[:force] = opts[:force] if opts[:force]
61
62
 
62
63
  if opts[:query]
63
64
  if opts[:query].is_a?(Hash)
@@ -71,6 +71,11 @@ module Datahen
71
71
  self.class.get("/jobs/#{job_id}/profile", params)
72
72
  end
73
73
 
74
+ def delete(job_id, opts={})
75
+ params = @options.merge(opts)
76
+ self.class.delete("/jobs/#{job_id}", params)
77
+ end
78
+
74
79
  end
75
80
 
76
81
  end
@@ -72,6 +72,11 @@ module Datahen
72
72
  params = @options.merge(opts)
73
73
  self.class.put("/jobs/#{job_id}/pages/refetch", params)
74
74
  end
75
+
76
+ def limbo(job_id, opts={})
77
+ params = @options.merge(opts)
78
+ self.class.put("/jobs/#{job_id}/pages/limbo", params)
79
+ end
75
80
  end
76
81
  end
77
82
  end
@@ -26,6 +26,8 @@ module Datahen
26
26
  body[:schedule] = opts[:schedule] if opts[:schedule]
27
27
  body[:timezone] = opts[:timezone] if opts[:timezone]
28
28
  body[:profile] = opts[:profile] if opts[:profile]
29
+ body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
30
+ body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
29
31
  params = @options.merge({body: body.to_json})
30
32
  self.class.post("/scrapers", params)
31
33
  end
@@ -45,6 +47,8 @@ module Datahen
45
47
  body[:schedule] = opts[:schedule] if opts[:schedule]
46
48
  body[:timezone] = opts[:timezone] if opts[:timezone]
47
49
  body[:profile] = opts[:profile] if opts[:profile]
50
+ body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
51
+ body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
48
52
  params = @options.merge({body: body.to_json})
49
53
 
50
54
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -55,6 +55,11 @@ module Datahen
55
55
 
56
56
  self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
57
57
  end
58
+
59
+ def delete(scraper_name, opts={})
60
+ params = @options.merge(opts)
61
+ self.class.delete("/scrapers/#{scraper_name}/current_job", params)
62
+ end
58
63
  end
59
64
  end
60
65
  end
@@ -26,7 +26,7 @@ module Datahen
26
26
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
27
  end
28
28
 
29
- # Deprecated, please use Datahen::Client::JobVar#refetch instead.
29
+ # Deprecated, please use Datahen::Client::JobPage#refetch instead.
30
30
  #
31
31
  # @note This method will be removed at some point in the future.
32
32
  def refetch_by_job(job_id, opts={})
@@ -39,6 +39,11 @@ module Datahen
39
39
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
40
40
  end
41
41
 
42
+ def limbo(scraper_name, opts={})
43
+ params = @options.merge(opts)
44
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/limbo", params)
45
+ end
46
+
42
47
  def enqueue(scraper_name, method, url, opts={})
43
48
  body = {}
44
49
  body[:method] = method != "" ? method : "GET"
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.10"
2
+ VERSION = "0.14.17"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.10
4
+ version: 0.14.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-10 00:00:00.000000000 Z
11
+ date: 2020-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -266,7 +266,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
266
266
  - !ruby/object:Gem::Version
267
267
  version: '0'
268
268
  requirements: []
269
- rubygems_version: 3.1.2
269
+ rubygems_version: 3.0.3
270
270
  signing_key:
271
271
  specification_version: 4
272
272
  summary: DataHen toolbelt for developers