datahen 0.14.12 → 0.14.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19a6cfd035a3cd5ba9bd10c1cf34e6baa0590cb2f07664e2bdd3ee2f8e032ebf
4
- data.tar.gz: b7cd0aa204baa98013c5853df68b93eb8562c1b4c202bf65b8a3c931ca2135d1
3
+ metadata.gz: b483263384d6a00e51fd345499ad020d69c9d07c1a77e3521e6b55a4528195de
4
+ data.tar.gz: 167c0a417c402198f13151daf29bddb0f8429cac3876c8a84d6b7ba396b1ac90
5
5
  SHA512:
6
- metadata.gz: 8ac98d022003ea6db219c1a07518f127bed088db30ebca3791569bd643acdf8ddc2ad01ddc553f42976a0a6983dfe2928fceadc2f91ed4464b4c2539939df28b
7
- data.tar.gz: 33a65a6ffe29cb8ab0e75283436909f683354365a07e11ee672fac4311367ec55c31d2db6e8bcc02de0569768169360fb2120d4e6149aae8e0151cabea8b6752
6
+ metadata.gz: 344168b7a8a4cb746347aba1640d7b4e75a901cd5a1e2ab7acd00f6e8eee3d8c297795812259f84a21757b444e653331e44dd9e1efbec372472b6aeb98c56619
7
+ data.tar.gz: 1ef00c32694830740ea477fb4291de9234957ee4b7f7ad02642aa9d5cb91d1b3b1cd6a56542db4cbf84d92d42039546f2e9bfb8a40cf22d103256e8d53b0ac28
@@ -15,7 +15,10 @@ module Datahen
15
15
 
16
16
  if result['available'] == true
17
17
  puts "Preview content url: \"#{result['preview_url']}\""
18
- `open "#{result['preview_url']}"`
18
+ begin
19
+ `open "#{result['preview_url']}"`
20
+ rescue
21
+ end
19
22
  else
20
23
  puts "Content does not exist"
21
24
  end
@@ -6,7 +6,6 @@ module Datahen
6
6
  "#{basename} #{@package_name} #{command.usage}"
7
7
  end
8
8
 
9
-
10
9
  desc "list", "gets a list of jobs"
11
10
  option :page, :aliases => :p, type: :numeric, desc: 'Get the next set of records by page.'
12
11
  option :per_page, :aliases => :P, type: :numeric, desc: 'Number of records per page. Max 500 per page.'
@@ -35,8 +34,7 @@ module Datahen
35
34
  client = Client::JobStat.new(options)
36
35
  puts "#{client.job_current_stats(job_id, options)}"
37
36
  end
38
-
39
-
37
+
40
38
  end
41
39
  end
42
40
 
@@ -31,6 +31,7 @@ module Datahen
31
31
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
32
32
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
33
33
  option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
34
+ option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
34
35
  def create(scraper_name, git_repository)
35
36
  # puts "options #{options}"
36
37
  client = Client::Scraper.new(options)
@@ -55,6 +56,7 @@ module Datahen
55
56
  option :timezone, type: :string, desc: "Set the scheduler's timezone. Must be in IANA Timezone format. Defaults to \"America/Toronto\""
56
57
  option :profile, type: :string, desc: 'Set the profiles (comma separated) to apply to the job. Default: default'
57
58
  option :multiple_jobs, type: :boolean, desc: 'Set true to enable multiple jobs. Default: false'
59
+ option :max_job_count, type: :numeric, desc: 'Set a value to set max number of jobs available. Set -1 for unlimited. Default: 3'
58
60
  def update(scraper_name)
59
61
  client = Client::Scraper.new(options)
60
62
  puts "#{client.update(scraper_name, options)}"
@@ -36,7 +36,10 @@ module Datahen
36
36
 
37
37
  if result['signed_url']
38
38
  puts "Download url: \"#{result['signed_url']}\""
39
- `open "#{result['signed_url']}"`
39
+ begin
40
+ `open "#{result['signed_url']}"`
41
+ rescue
42
+ end
40
43
  else
41
44
  puts "Exported file does not exist"
42
45
  end
@@ -48,6 +48,21 @@ module Datahen
48
48
  end
49
49
  end
50
50
 
51
+ desc "delete <scraper_name>", "delete a scraper's current job"
52
+ long_desc <<-LONGDESC
53
+ Delete a scraper's current job
54
+ LONGDESC
55
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
56
+ def delete(scraper_name)
57
+ if options[:job]
58
+ client = Client::Job.new(options)
59
+ puts "#{client.delete(options[:job])}"
60
+ else
61
+ client = Client::ScraperJob.new(options)
62
+ puts "#{client.delete(scraper_name)}"
63
+ end
64
+ end
65
+
51
66
  desc "resume <scraper_name>", "resumes a scraper's current job"
52
67
  long_desc <<-LONGDESC
53
68
  Resumes a scraper's current job
@@ -99,18 +99,20 @@ module Datahen
99
99
 
100
100
  desc "refetch <scraper_name>", "Refetch Pages on a scraper's current job"
101
101
  long_desc <<-LONGDESC
102
- Refetch pages in a scraper's current job. You need to specify either a --gid or --fetch-fail or --parse-fail or --status.\x5
102
+ Refetch pages in a scraper's current job. You need to specify either a --gid or --fetch-fail or --parse-fail or --status or --page-type.\x5
103
103
  LONGDESC
104
104
  option :gid, :aliases => :g, type: :string, desc: 'Refetch a specific GID'
105
105
  option :fetch_fail, type: :boolean, desc: 'Refetches only pages that fails fetching.'
106
106
  option :parse_fail, type: :boolean, desc: 'Refetches only pages that fails parsing.'
107
107
  option :status, type: :string, desc: 'Refetches only pages with a specific status.'
108
+ option :page_type, type: :string, desc: 'Refetches only pages with a specific page type.'
108
109
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
109
110
  def refetch(scraper_name)
110
- if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status)
111
- puts "Must specify either a --gid, --fetch-fail, --parse-fail or --status"
111
+ if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
112
+ puts "Must specify either a --gid, --fetch-fail, --parse-fail, --status or --page-type"
112
113
  return
113
114
  end
115
+
114
116
  if options[:job]
115
117
  client = Client::JobPage.new(options)
116
118
  puts "#{client.refetch(options[:job])}"
@@ -122,33 +124,47 @@ module Datahen
122
124
 
123
125
  desc "reparse <scraper_name>", "Reparse Pages on a scraper's current job"
124
126
  long_desc <<-LONGDESC
125
- Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail or --status.\x5
127
+ Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail or --status or --page-type.\x5
126
128
  LONGDESC
127
129
  option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
128
130
  option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
129
131
  option :status, type: :string, desc: 'Reparse only pages with a specific status.'
132
+ option :page_type, type: :string, desc: 'Refetches only pages with a specific page type.'
130
133
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
131
134
  def reparse(scraper_name)
132
- begin
133
- options[:vars] = JSON.parse(options[:vars]) if options[:vars]
135
+ if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
136
+ puts "Must specify either a --gid, --parse-fail, --status or --page-type"
137
+ return
138
+ end
134
139
 
135
- if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status)
136
- puts "Must specify either a --gid, --parse-fail or --status"
137
- return
138
- end
140
+ if options[:job]
141
+ client = Client::JobPage.new(options)
142
+ puts "#{client.reparse(options[:job])}"
143
+ else
144
+ client = Client::ScraperJobPage.new(options)
145
+ puts "#{client.reparse(scraper_name)}"
146
+ end
147
+ end
139
148
 
140
- if options[:job]
141
- client = Client::JobPage.new(options)
142
- puts "#{client.reparse(options[:job])}"
143
- else
144
- client = Client::ScraperJobPage.new(options)
145
- puts "#{client.reparse(scraper_name)}"
146
- end
149
+ desc "limbo <scraper_name>", "Move pages on a scraper's current job to limbo"
150
+ long_desc <<-LONGDESC
151
+ Move pages in a scraper's current job to limbo. You need to specify either a --gid or --status.\x5
152
+ LONGDESC
153
+ option :gid, :aliases => :g, type: :string, desc: 'Move a specific GID to limbo'
154
+ option :status, type: :string, desc: 'Move pages with a specific status to limbo.'
155
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
156
+ def limbo(scraper_name)
157
+ if !options.key?(:gid) && !options.key?(:status)
158
+ puts "Must specify either a --gid or --status"
159
+ return
160
+ end
147
161
 
148
- rescue JSON::ParserError
149
- if options[:vars]
150
- puts "Error: #{options[:vars]} on vars is not a valid JSON"
151
- end
162
+ if options[:job]
163
+ client = Client::JobPage.new(options)
164
+ puts "#{client.limbo(options[:job])}"
165
+ else
166
+ client = Client::ScraperJobPage.new(options)
167
+ puts "#{client.limbo(scraper_name)}"
152
168
  end
153
169
  end
154
170
 
@@ -224,7 +240,10 @@ module Datahen
224
240
 
225
241
  if result['available'] == true
226
242
  puts "Preview content url: \"#{result['preview_url']}\""
227
- `open "#{result['preview_url']}"`
243
+ begin
244
+ `open "#{result['preview_url']}"`
245
+ rescue
246
+ end
228
247
  else
229
248
  puts "Content does not exist"
230
249
  end
@@ -244,7 +263,10 @@ module Datahen
244
263
 
245
264
  if result['available'] == true
246
265
  puts "Preview failed content url: \"#{result['preview_url']}\""
247
- `open "#{result['preview_url']}"`
266
+ begin
267
+ `open "#{result['preview_url']}"`
268
+ rescue
269
+ end
248
270
  else
249
271
  puts "Failed Content does not exist"
250
272
  end
@@ -16,7 +16,7 @@ module Datahen
16
16
  role: role,
17
17
  description: description}
18
18
 
19
- params = @options.merge({body: body.to_json})
19
+ params = @options.merge({body: body.to_json}).merge(opts)
20
20
  self.class.post("/auth_tokens", params)
21
21
  end
22
22
 
@@ -71,6 +71,11 @@ module Datahen
71
71
  self.class.get("/jobs/#{job_id}/profile", params)
72
72
  end
73
73
 
74
+ def delete(job_id, opts={})
75
+ params = @options.merge(opts)
76
+ self.class.delete("/jobs/#{job_id}", params)
77
+ end
78
+
74
79
  end
75
80
 
76
81
  end
@@ -72,6 +72,11 @@ module Datahen
72
72
  params = @options.merge(opts)
73
73
  self.class.put("/jobs/#{job_id}/pages/refetch", params)
74
74
  end
75
+
76
+ def limbo(job_id, opts={})
77
+ params = @options.merge(opts)
78
+ self.class.put("/jobs/#{job_id}/pages/limbo", params)
79
+ end
75
80
  end
76
81
  end
77
82
  end
@@ -27,6 +27,7 @@ module Datahen
27
27
  body[:timezone] = opts[:timezone] if opts[:timezone]
28
28
  body[:profile] = opts[:profile] if opts[:profile]
29
29
  body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
30
+ body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
30
31
  params = @options.merge({body: body.to_json})
31
32
  self.class.post("/scrapers", params)
32
33
  end
@@ -47,6 +48,7 @@ module Datahen
47
48
  body[:timezone] = opts[:timezone] if opts[:timezone]
48
49
  body[:profile] = opts[:profile] if opts[:profile]
49
50
  body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
51
+ body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
50
52
  params = @options.merge({body: body.to_json})
51
53
 
52
54
  self.class.put("/scrapers/#{scraper_name}", params)
@@ -55,6 +55,11 @@ module Datahen
55
55
 
56
56
  self.class.get("/scrapers/#{scraper_name}/current_job/profile", params)
57
57
  end
58
+
59
+ def delete(scraper_name, opts={})
60
+ params = @options.merge(opts)
61
+ self.class.delete("/scrapers/#{scraper_name}/current_job", params)
62
+ end
58
63
  end
59
64
  end
60
65
  end
@@ -26,7 +26,7 @@ module Datahen
26
26
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/refetch", params)
27
27
  end
28
28
 
29
- # Deprecated, please use Datahen::Client::JobVar#refetch instead.
29
+ # Deprecated, please use Datahen::Client::JobPage#refetch instead.
30
30
  #
31
31
  # @note This method will be removed at some point in the future.
32
32
  def refetch_by_job(job_id, opts={})
@@ -39,6 +39,11 @@ module Datahen
39
39
  self.class.put("/scrapers/#{scraper_name}/current_job/pages/reparse", params)
40
40
  end
41
41
 
42
+ def limbo(scraper_name, opts={})
43
+ params = @options.merge(opts)
44
+ self.class.put("/scrapers/#{scraper_name}/current_job/pages/limbo", params)
45
+ end
46
+
42
47
  def enqueue(scraper_name, method, url, opts={})
43
48
  body = {}
44
49
  body[:method] = method != "" ? method : "GET"
@@ -0,0 +1,6 @@
1
+ require 'datahen/error/safe_terminate_error'
2
+
3
+ module Datahen
4
+ module Error
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Datahen
2
+ module Error
3
+ class SafeTerminateError < Exception
4
+ end
5
+ end
6
+ end
@@ -1,3 +1,4 @@
1
+ require "datahen/error"
1
2
  require "datahen/plugin"
2
3
  require "datahen/scraper/parser"
3
4
  require "datahen/scraper/seeder"
@@ -112,7 +112,7 @@ module Datahen
112
112
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
113
113
  if page_gid == gid
114
114
  self.refetch_self = true
115
- return
115
+ raise Error::SafeTerminateError
116
116
  end
117
117
  refetch_page page_gid
118
118
  end
@@ -130,7 +130,7 @@ module Datahen
130
130
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
131
131
  if page_gid == gid
132
132
  self.reparse_self = true
133
- return
133
+ raise Error::SafeTerminateError
134
134
  end
135
135
  reparse_page page_gid
136
136
  end
@@ -153,6 +153,8 @@ module Datahen
153
153
  page: page
154
154
  })
155
155
  eval_with_context filename, context
156
+ rescue Error::SafeTerminateError => e
157
+ # do nothing, this is fine
156
158
  rescue SyntaxError => e
157
159
  handle_error(e) if save
158
160
  raise e
@@ -163,7 +165,7 @@ module Datahen
163
165
 
164
166
  puts "=========== Parsing Executed ==========="
165
167
  begin
166
- save_pages_and_outputs(pages, outputs, :parsing)
168
+ save_pages_and_outputs(pages, outputs, :parsing) unless refetch_self
167
169
  rescue => e
168
170
  handle_error(e) if save
169
171
  raise e
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.12"
2
+ VERSION = "0.14.19"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.12
4
+ version: 0.14.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-01 00:00:00.000000000 Z
11
+ date: 2020-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -233,6 +233,8 @@ files:
233
233
  - lib/datahen/client/scraper_job_page.rb
234
234
  - lib/datahen/client/scraper_job_var.rb
235
235
  - lib/datahen/client/scraper_var.rb
236
+ - lib/datahen/error.rb
237
+ - lib/datahen/error/safe_terminate_error.rb
236
238
  - lib/datahen/plugin.rb
237
239
  - lib/datahen/plugin/context_exposer.rb
238
240
  - lib/datahen/scraper.rb
@@ -251,7 +253,7 @@ metadata:
251
253
  allowed_push_host: https://rubygems.org
252
254
  homepage_uri: https://datahen.com
253
255
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
254
- post_install_message:
256
+ post_install_message:
255
257
  rdoc_options: []
256
258
  require_paths:
257
259
  - lib
@@ -267,7 +269,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
267
269
  version: '0'
268
270
  requirements: []
269
271
  rubygems_version: 3.0.3
270
- signing_key:
272
+ signing_key:
271
273
  specification_version: 4
272
274
  summary: DataHen toolbelt for developers
273
275
  test_files: []