datahen 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datahen/cli/scraper.rb +3 -0
- data/lib/datahen/cli/scraper_resource.rb +35 -0
- data/lib/datahen/cli.rb +1 -0
- data/lib/datahen/client/base.rb +9 -2
- data/lib/datahen/client/job.rb +2 -2
- data/lib/datahen/client/job_output.rb +1 -1
- data/lib/datahen/client/job_page.rb +1 -1
- data/lib/datahen/client/job_resource.rb +11 -0
- data/lib/datahen/client/scraper_resource.rb +11 -0
- data/lib/datahen/client.rb +2 -0
- data/lib/datahen/version.rb +1 -1
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 94937cbe6614336d283523740d1bd237b154a7e084dd3c0757dda96a068cc4c2
|
4
|
+
data.tar.gz: 51552871510232be116af8d0f108e5ee51d6733657abb681b1561d78eb68efff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a37ba23e5e4915540ee5ad656d7c93dbd4d78ab36ad9e7f05ee2019d51061793b5a914e9e5527ce40bdf42ca5f6cb758124503293fb229ccf112b8d1e2f70193
|
7
|
+
data.tar.gz: 10f1cf69238cf4e8fae71e9ae892bc771d392b426d98b7168bbe9fbc9985bfcc3f0cf62dff8f9ba6fbca3484b13f29a4e2513259e0659a1a33b6708b764dccfa
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Datahen
|
2
|
+
class CLI < Thor
|
3
|
+
class ScraperResource < Thor
|
4
|
+
package_name "scraper resource"
|
5
|
+
def self.banner(command, namespace = nil, subcommand = false)
|
6
|
+
"#{basename} #{@package_name} #{command.usage}"
|
7
|
+
end
|
8
|
+
|
9
|
+
desc "list", "List resources on a scraper's current job"
|
10
|
+
long_desc <<-LONGDESC
|
11
|
+
List all resources in a scraper's current job or given job ID.\x5
|
12
|
+
LONGDESC
|
13
|
+
option :scraper_name, :aliases => :s, type: :string, desc: 'Filter by a specific scraper_name'
|
14
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
15
|
+
option :pod, type: :string, desc: 'Returns only tasks with specific pod.'
|
16
|
+
option :container, type: :string, desc: 'Returns only tasks with specific container.'
|
17
|
+
option :executor, type: :string, desc: 'Returns only tasks with specific executor.'
|
18
|
+
def list()
|
19
|
+
if options[:job]
|
20
|
+
client = Client::JobResource.new(options)
|
21
|
+
puts "#{client.all(options[:job])}"
|
22
|
+
else
|
23
|
+
if options[:scraper_name]
|
24
|
+
client = Client::ScraperResource.new(options)
|
25
|
+
puts "#{client.all(options[:scraper_name])}"
|
26
|
+
else
|
27
|
+
puts 'Must specify either a job ID or a scraper name'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/lib/datahen/cli.rb
CHANGED
@@ -7,6 +7,7 @@ require 'datahen/cli/scraper_job_var'
|
|
7
7
|
require 'datahen/cli/scraper_job'
|
8
8
|
require 'datahen/cli/scraper_finisher'
|
9
9
|
require 'datahen/cli/global_page'
|
10
|
+
require 'datahen/cli/scraper_resource'
|
10
11
|
require 'datahen/cli/scraper_page'
|
11
12
|
require 'datahen/cli/job_output'
|
12
13
|
require 'datahen/cli/job'
|
data/lib/datahen/client/base.rb
CHANGED
@@ -13,6 +13,9 @@ module Datahen
|
|
13
13
|
finisher: nil
|
14
14
|
}
|
15
15
|
|
16
|
+
CHECK_NIL = lambda{|v|v.nil?}
|
17
|
+
CHECK_EMPTY_BODY = lambda{|v|v.body.nil? || v.body.empty?}
|
18
|
+
|
16
19
|
def self.env_auth_token
|
17
20
|
ENV['DATAHEN_TOKEN']
|
18
21
|
end
|
@@ -56,14 +59,14 @@ module Datahen
|
|
56
59
|
target.merge(source.select{|k,v|target.has_key?(k)})
|
57
60
|
end
|
58
61
|
|
59
|
-
def retry
|
62
|
+
def retry(times, delay = nil, err_msg = nil, stream = false, check_nil = CHECK_NIL)
|
60
63
|
limit = times.nil? ? nil : times.to_i
|
61
64
|
delay = delay.nil? ? 5 : delay.to_i
|
62
65
|
count = 0
|
63
66
|
begin
|
64
67
|
val = yield
|
65
68
|
if stream
|
66
|
-
return if val
|
69
|
+
return if check_nil.call(val)
|
67
70
|
if val['error'] != ""
|
68
71
|
raise StandardError.new(val['error'])
|
69
72
|
end
|
@@ -125,6 +128,10 @@ module Datahen
|
|
125
128
|
query[:force] = opts[:force] if opts[:force]
|
126
129
|
query[:action] = opts[:action] if opts[:action]
|
127
130
|
query[:"include-system"] = opts[:"include-system"] if opts[:"include-system"]
|
131
|
+
query[:"pod"] = opts[:"pod"] if opts[:"pod"]
|
132
|
+
query[:"container"] = opts[:"container"] if opts[:"container"]
|
133
|
+
query[:"executor"] = opts[:"executor"] if opts[:"executor"]
|
134
|
+
|
128
135
|
|
129
136
|
if opts[:query]
|
130
137
|
if opts[:query].is_a?(Hash)
|
data/lib/datahen/client/job.rb
CHANGED
@@ -60,7 +60,7 @@ module Datahen
|
|
60
60
|
params = @options.merge({body: body.to_json})
|
61
61
|
|
62
62
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:seeder]
|
63
|
-
self.retry(limit, 5, "Error while updating the seeder.") do
|
63
|
+
self.retry(limit, 5, "Error while updating the seeder.", false, CHECK_EMPTY_BODY) do
|
64
64
|
response = self.class.put("/jobs/#{job_id}/seeding_update", params)
|
65
65
|
if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
|
66
66
|
raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
|
@@ -78,7 +78,7 @@ module Datahen
|
|
78
78
|
params = @options.merge({body: body.to_json})
|
79
79
|
|
80
80
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:finisher]
|
81
|
-
self.retry(limit, 5, "Error while updating the finisher.") do
|
81
|
+
self.retry(limit, 5, "Error while updating the finisher.", false, CHECK_EMPTY_BODY) do
|
82
82
|
response = self.class.put("/jobs/#{job_id}/finisher_update", params)
|
83
83
|
if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/
|
84
84
|
raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
|
@@ -7,7 +7,7 @@ module Datahen
|
|
7
7
|
|
8
8
|
def all(job_id, collection = 'default', opts = {})
|
9
9
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : 0
|
10
|
-
self.retry(limit, 10, "Error while updating the seeder.", true) do
|
10
|
+
self.retry(limit, 10, "Error while updating the seeder.", true, CHECK_EMPTY_BODY) do
|
11
11
|
self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records", @options)
|
12
12
|
end
|
13
13
|
end
|
@@ -63,7 +63,7 @@ module Datahen
|
|
63
63
|
params = @options.merge({body: body.to_json})
|
64
64
|
|
65
65
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:parser]
|
66
|
-
self.retry(limit, 5, "Error while updating the parser.") do
|
66
|
+
self.retry(limit, 5, "Error while updating the parser.", false, CHECK_EMPTY_BODY) do
|
67
67
|
response = self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
|
68
68
|
if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
|
69
69
|
raise Error::CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
|
data/lib/datahen/client.rb
CHANGED
@@ -26,6 +26,8 @@ require "datahen/client/scraper_job_var"
|
|
26
26
|
require "datahen/client/job_finisher"
|
27
27
|
require "datahen/client/job_task"
|
28
28
|
require "datahen/client/scraper_task"
|
29
|
+
require "datahen/client/job_resource"
|
30
|
+
require "datahen/client/scraper_resource"
|
29
31
|
|
30
32
|
module Datahen
|
31
33
|
module Client
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -227,6 +227,7 @@ files:
|
|
227
227
|
- lib/datahen/cli/scraper_job.rb
|
228
228
|
- lib/datahen/cli/scraper_job_var.rb
|
229
229
|
- lib/datahen/cli/scraper_page.rb
|
230
|
+
- lib/datahen/cli/scraper_resource.rb
|
230
231
|
- lib/datahen/cli/scraper_task.rb
|
231
232
|
- lib/datahen/cli/scraper_var.rb
|
232
233
|
- lib/datahen/cli/seeder.rb
|
@@ -245,6 +246,7 @@ files:
|
|
245
246
|
- lib/datahen/client/job_log.rb
|
246
247
|
- lib/datahen/client/job_output.rb
|
247
248
|
- lib/datahen/client/job_page.rb
|
249
|
+
- lib/datahen/client/job_resource.rb
|
248
250
|
- lib/datahen/client/job_stat.rb
|
249
251
|
- lib/datahen/client/job_task.rb
|
250
252
|
- lib/datahen/client/job_var.rb
|
@@ -257,6 +259,7 @@ files:
|
|
257
259
|
- lib/datahen/client/scraper_job_output.rb
|
258
260
|
- lib/datahen/client/scraper_job_page.rb
|
259
261
|
- lib/datahen/client/scraper_job_var.rb
|
262
|
+
- lib/datahen/client/scraper_resource.rb
|
260
263
|
- lib/datahen/client/scraper_task.rb
|
261
264
|
- lib/datahen/client/scraper_var.rb
|
262
265
|
- lib/datahen/error.rb
|
@@ -281,7 +284,7 @@ metadata:
|
|
281
284
|
allowed_push_host: https://rubygems.org
|
282
285
|
homepage_uri: https://datahen.com
|
283
286
|
source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
|
284
|
-
post_install_message:
|
287
|
+
post_install_message:
|
285
288
|
rdoc_options: []
|
286
289
|
require_paths:
|
287
290
|
- lib
|
@@ -296,8 +299,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
296
299
|
- !ruby/object:Gem::Version
|
297
300
|
version: '0'
|
298
301
|
requirements: []
|
299
|
-
rubygems_version: 3.
|
300
|
-
signing_key:
|
302
|
+
rubygems_version: 3.2.15
|
303
|
+
signing_key:
|
301
304
|
specification_version: 4
|
302
305
|
summary: DataHen toolbelt for developers
|
303
306
|
test_files: []
|