datahen 0.14.10 → 0.14.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 68b2f97e2b71ab8e79edacc9fa0b719c1d4b93bac684631ab084e46f44407795
4
- data.tar.gz: 2e4a453502ac9a72c7cbea81b3f36100373adf9894e5c2ca4250cb1fa709c548
3
+ metadata.gz: a3cf1bf50610ab9ba523ca8f9ae1fdb307831cb56018036076f31353b357edfa
4
+ data.tar.gz: '0199ea98a2f171675168699adc523932418b9822ff156c71786458c6362b6cdb'
5
5
  SHA512:
6
- metadata.gz: e73e87d091b590fd1efc4c31d5589b930e0fae458a8b8e5439c47dfae008c1db9186672f49ddcf41e7a0c32e31398b1703b45f1cde81a3a4e397c8f16008a4ea
7
- data.tar.gz: 52fd87fe09082f7ebd3a6e96ce0fc699b8a474ceb867a2187f21fb54431ac91f4cffdaf3a0ede51feb2a18fa9085a13a00048ad83a362d63399f2c4b87739c78
6
+ metadata.gz: 8d66226573dbd9bd3ef795ce021eb9ee202b21cce1d0b79211093bb9abda6f1982a721bf8ae1fbc9bf84aae6e50d1878bf9d389ee67d685e4d1c440fd88cceeb
7
+ data.tar.gz: 6ea1a7748cf77ae1cb8f750da71df86120f68e671c84e535e8536ee3324ed71026034a042662d081108ff7b72ccb3f0ef3ab3f049bdb45c92fc0e86ba2e10d46
@@ -15,7 +15,10 @@ module Datahen
15
15
 
16
16
  if result['available'] == true
17
17
  puts "Preview content url: \"#{result['preview_url']}\""
18
- `open "#{result['preview_url']}"`
18
+ begin
19
+ `open "#{result['preview_url']}"`
20
+ rescue
21
+ end
19
22
  else
20
23
  puts "Content does not exist"
21
24
  end
@@ -6,7 +6,6 @@ module Datahen
6
6
  "#{basename} #{@package_name} #{command.usage}"
7
7
  end
8
8
 
9
-
10
9
  desc "list", "gets a list of jobs"
11
10
  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
12
11
  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
@@ -35,8 +34,7 @@ module Datahen
35
34
  client = Client::JobStat.new(options)
36
35
  puts "#{client.job_current_stats(job_id, options)}"
37
36
  end
38
-
39
-
37
+
40
38
  end
41
39
  end
42
40
 
@@ -30,6 +30,8 @@ module Datahen
30
30
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
31
31
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
32
32
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
33
+ option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
34
+ option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
33
35
  def create(scraper_name, git_repository)
34
36
  # puts "options #{options}"
35
37
  client = Client::Scraper.new(options)
@@ -53,6 +55,8 @@ module Datahen
53
55
  option :schedule, type: :string, desc: 'Set the schedule of the scraper to run. Must be in CRON format.'
54
56
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
55
57
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
58
+ option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
59
+ option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
56
60
  def update(scraper_name)
57
61
  client = Client::Scraper.new(options)
58
62
  puts "#{client.update(scraper_name, options)}"
@@ -36,7 +36,10 @@ module Datahen
36
36
 
37
37
  if result['signed_url']
38
38
  puts "Download url: \"#{result['signed_url']}\""
39
- `open "#{result['signed_url']}"`
39
+ begin
40
+ `open "#{result['signed_url']}"`
41
+ rescue
42
+ end
40
43
  else
41
44
  puts "Exported file does not exist"
42
45
  end
@@ -48,6 +48,21 @@ module Datahen
48
48
  end
49
49
  end
50
50
 
51
+ desc "delete <scraper_name>", "delete a scraper's current job"
52
+ long_desc <<-LONGDESC
53
+ Delete a scraper's current job
54
+ LONGDESC
55
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
56
+ def delete(scraper_name)
57
+ if options[:job]
58
+ client = Client::Job.new(options)
59
+ puts "#{client.delete(options[:job])}"
60
+ else
61
+ client = Client::ScraperJob.new(options)
62
+ puts "#{client.delete(scraper_name)}"
63
+ end
64
+ end
65
+
51
66
  desc "resume <scraper_name>", "resumes a scraper's current job"
52
67
  long_desc <<-LONGDESC
53
68
  Resumes a scraper's current job
@@ -68,13 +83,14 @@ module Datahen
68
83
  Pauses a scraper's current job
69
84
  LONGDESC
70
85
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
86
+ option :force, :aliases => :f, type: :boolean, desc: 'Force a job to be paused from a done or cancelled status'
71
87
  def pause(scraper_name)
72
88
  if options[:job]
73
89
  client = Client::Job.new(options)
74
- puts "#{client.pause(options[:job])}"
90
+ puts "#{client.pause(options[:job], options)}"
75
91
  else
76
92
  client = Client::ScraperJob.new(options)
77
- puts "#{client.pause(scraper_name)}"
93
+ puts "#{client.pause(scraper_name, options)}"
78
94
  end
79
95
  end
80
96
 
@@ -111,6 +111,7 @@ module Datahen
111
111
  puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
112
112
  return
113
113
  end
114
+
114
115
  if options[:job]
115
116
  client = Client::JobPage.new(options)
116
117
  puts "#{client.refetch(options[:job])}"
@@ -129,26 +130,39 @@ module Datahen
129
130
  option :status, type: :string, desc: 'Reparse only pages with a specific status.'
130
131
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
131
132
  def reparse(scraper_name)
132
- begin
133
- options[:vars] = JSON.parse(options[:vars]) if options[:vars]
133
+ if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
134
+ puts "Must specify either a --gid, --parse-fail or --status"
135
+ return
136
+ end
134
137
 
135
- if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
136
- puts "Must specify either a --gid, --parse-fail or --status"
137
- return
138
- end
138
+ if options[:job]
139
+ client = Client::JobPage.new(options)
140
+ puts "#{client.reparse(options[:job])}"
141
+ else
142
+ client = Client::ScraperJobPage.new(options)
143
+ puts "#{client.reparse(scraper_name)}"
144
+ end
145
+ end
139
146
 
140
- if options[:job]
141
- client = Client::JobPage.new(options)
142
- puts "#{client.reparse(options[:job])}"
143
- else
144
- client = Client::ScraperJobPage.new(options)
145
- puts "#{client.reparse(scraper_name)}"
146
- end
147
+ desc "limbo <scraper_name>", "Move pages on a scraper's current job to limbo"
148
+ long_desc <<-LONGDESC
149
+ Move pages in a scraper's current job to limbo. You need to specify either a --gid or --status.\x5
150
+ LONGDESC
151
+ option :gid, :aliases => :g, type: :string, desc: 'Move a specific GID to limbo'
152
+ option :status, type: :string, desc: 'Move pages with a specific status to limbo.'
153
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
154
+ def limbo(scraper_name)
155
+ if !options.key?(:gid) && !options.key?(:status)
156
+ puts "Must specify either a --gid or --status"
157
+ return
158
+ end
147
159
 
148
- rescue JSON::ParserError
149
- if options[:vars]
150
- puts "Error: #{options[:vars]} on vars is not a valid JSON"
151
- end
160
+ if options[:job]
161
+ client = Client::JobPage.new(options)
162
+ puts "#{client.limbo(options[:job])}"
163
+ else
164
+ client = Client::ScraperJobPage.new(options)
165
+ puts "#{client.limbo(scraper_name)}"
152
166
  end
153
167
  end
154
168
 
@@ -224,7 +238,10 @@ module Datahen
224
238
 
225
239
  if result['available'] == true
226
240
  puts "Preview content url: \"#{result['preview_url']}\""
227
- `open "#{result['preview_url']}"`
241
+ begin
242
+ `open "#{result['preview_url']}"`
243
+ rescue
244
+ end
228
245
  else
229
246
  puts "Content does not exist"
230
247
  end
@@ -244,7 +261,10 @@ module Datahen
244
261
 
245
262
  if result['available'] == true
246
263
  puts "Preview failed content url: \"#{result['preview_url']}\""
247
- `open "#{result['preview_url']}"`
264
+ begin
265
+ `open "#{result['preview_url']}"`
266
+ rescue
267
+ end
248
268
  else
249
269
  puts "Failed Content does not exist"
250
270
  end
@@ -16,7 +16,7 @@ module Datahen
16
16
  role: role,
17
17
  description: description}
18
18
 
19
- params = @options.merge({body: body.to_json})
19
+ params = @options.merge({body: body.to_json}).merge(opts)
20
20
  self.class.post("/auth_tokens", params)
21
21
  end
22
22
 
@@ -58,6 +58,7 @@ module Datahen
58
58
  query[:limit] = opts[:limit] if opts[:limit]
59
59
  query[:order] = opts[:order] if opts[:order]
60
60
  query[:filter] = opts[:filter] if opts[:filter]
61
+ query[:force] = opts[:force] if opts[:force]
61
62
 
62
63
  if opts[:query]
63
64
  if opts[:query].is_a?(Hash)
@@ -71,6 +71,11 @@ module Datahen
71
71
  self.class.get("/jobs/#{job_id}/profile", params)
72
72
  end
73
73
 
74
+ def delete(job_id, opts={})
75
+ params = @options.merge(opts)
76
+ self.class.delete("/jobs/#{job_id}", params)
77
+ end
78
+
74
79
  end
75
80
 
76
81
  end
@@ -72,6 +72,11 @@ module Datahen
72
72
  params = @options.merge(opts)
73
73
  self.class.put("/jobs/#{job_id}/pages/refetch", params)
74
74
  end
75
+
76
+ def limbo(job_id, opts={})
77
+ params = @options.merge(opts)
78
+ self.class.put("/jobs/#{job_id}/pages/limbo", params)
79
+ end
75
80
  end
76
81
  end
77
82
  end
@@ -26,6 +26,8 @@ module Datahen
26
26
  body[:schedule] = opts[:schedule] if opts[:schedule]
27
27
  body[:timezone] = opts[:timezone] if opts[:timezone]
28
28
  body[:profile] = opts[:profile] if opts[:profile]
29
+ body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
30
+ body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
29
31
  params = @options.merge({body: body.to_json})
30
32
  self.class.post("/scrapers", params)
31
33
  end
@@ -45,6 +47,8 @@ module Datahen
45
47
  body[:schedule] = opts[:schedule] if opts[:schedule]
46
48
  body[:timezone] = opts[:timezone] if opts[:timezone]
47
49
  body[:profile] = opts[:profile] if opts[:profile]
50
+ body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
51
+ body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
48
52
  params = @options.merge({body: body.to_json})
49
53
 
50
54
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -55,6 +55,11 @@ module Datahen
55
55
 
56
56
  self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
57
57
  end
58
+
59
+ def delete(scraper_name, opts={})
60
+ params = @options.merge(opts)
61
+ self.class.delete("/scrapers/#{scraper_name}/current_job", params)
62
+ end
58
63
  end
59
64
  end
60
65
  end
@@ -26,7 +26,7 @@ module Datahen
26
26
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
27
  end
28
28
 
29
- # Deprecated, please use Datahen::Client::JobVar#refetch instead.
29
+ # Deprecated, please use Datahen::Client::JobPage#refetch instead.
30
30
  #
31
31
  # @note This method will be removed at some point in the future.
32
32
  def refetch_by_job(job_id, opts={})
@@ -39,6 +39,11 @@ module Datahen
39
39
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
40
40
  end
41
41
 
42
+ def limbo(scraper_name, opts={})
43
+ params = @options.merge(opts)
44
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/limbo", params)
45
+ end
46
+
42
47
  def enqueue(scraper_name, method, url, opts={})
43
48
  body = {}
44
49
  body[:method] = method != "" ? method : "GET"
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.10"
2
+ VERSION = "0.14.17"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.10
4
+ version: 0.14.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-10 00:00:00.000000000 Z
11
+ date: 2020-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -266,7 +266,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
266
266
  - !ruby/object:Gem::Version
267
267
  version: '0'
268
268
  requirements: []
269
- rubygems_version: 3.1.2
269
+ rubygems_version: 3.0.3
270
270
  signing_key:
271
271
  specification_version: 4
272
272
  summary: DataHen toolbelt for developers