datahen 1.5.2 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/datahen/cli/scraper.rb +3 -0
- data/lib/datahen/cli/scraper_resource.rb +35 -0
- data/lib/datahen/cli.rb +1 -0
- data/lib/datahen/client/base.rb +9 -2
- data/lib/datahen/client/job.rb +2 -2
- data/lib/datahen/client/job_output.rb +1 -1
- data/lib/datahen/client/job_page.rb +1 -1
- data/lib/datahen/client/job_resource.rb +11 -0
- data/lib/datahen/client/scraper_resource.rb +11 -0
- data/lib/datahen/client.rb +2 -0
- data/lib/datahen/version.rb +1 -1
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 94937cbe6614336d283523740d1bd237b154a7e084dd3c0757dda96a068cc4c2
|
4
|
+
data.tar.gz: 51552871510232be116af8d0f108e5ee51d6733657abb681b1561d78eb68efff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a37ba23e5e4915540ee5ad656d7c93dbd4d78ab36ad9e7f05ee2019d51061793b5a914e9e5527ce40bdf42ca5f6cb758124503293fb229ccf112b8d1e2f70193
|
7
|
+
data.tar.gz: 10f1cf69238cf4e8fae71e9ae892bc771d392b426d98b7168bbe9fbc9985bfcc3f0cf62dff8f9ba6fbca3484b13f29a4e2513259e0659a1a33b6708b764dccfa
|
data/lib/datahen/cli/scraper.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Datahen
|
2
|
+
class CLI < Thor
|
3
|
+
class ScraperResource < Thor
|
4
|
+
package_name "scraper resource"
|
5
|
+
def self.banner(command, namespace = nil, subcommand = false)
|
6
|
+
"#{basename} #{@package_name} #{command.usage}"
|
7
|
+
end
|
8
|
+
|
9
|
+
desc "list", "List resources on a scraper's current job"
|
10
|
+
long_desc <<-LONGDESC
|
11
|
+
List all resources in a scraper's current job or given job ID.\x5
|
12
|
+
LONGDESC
|
13
|
+
option :scraper_name, :aliases => :s, type: :string, desc: 'Filter by a specific scraper_name'
|
14
|
+
option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
|
15
|
+
option :pod, type: :string, desc: 'Returns only tasks with specific pod.'
|
16
|
+
option :container, type: :string, desc: 'Returns only tasks with specific container.'
|
17
|
+
option :executor, type: :string, desc: 'Returns only tasks with specific executor.'
|
18
|
+
def list()
|
19
|
+
if options[:job]
|
20
|
+
client = Client::JobResource.new(options)
|
21
|
+
puts "#{client.all(options[:job])}"
|
22
|
+
else
|
23
|
+
if options[:scraper_name]
|
24
|
+
client = Client::ScraperResource.new(options)
|
25
|
+
puts "#{client.all(options[:scraper_name])}"
|
26
|
+
else
|
27
|
+
puts 'Must specify either a job ID or a scraper name'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/lib/datahen/cli.rb
CHANGED
@@ -7,6 +7,7 @@ require 'datahen/cli/scraper_job_var'
|
|
7
7
|
require 'datahen/cli/scraper_job'
|
8
8
|
require 'datahen/cli/scraper_finisher'
|
9
9
|
require 'datahen/cli/global_page'
|
10
|
+
require 'datahen/cli/scraper_resource'
|
10
11
|
require 'datahen/cli/scraper_page'
|
11
12
|
require 'datahen/cli/job_output'
|
12
13
|
require 'datahen/cli/job'
|
data/lib/datahen/client/base.rb
CHANGED
@@ -13,6 +13,9 @@ module Datahen
|
|
13
13
|
finisher: nil
|
14
14
|
}
|
15
15
|
|
16
|
+
CHECK_NIL = lambda{|v|v.nil?}
|
17
|
+
CHECK_EMPTY_BODY = lambda{|v|v.body.nil? || v.body.empty?}
|
18
|
+
|
16
19
|
def self.env_auth_token
|
17
20
|
ENV['DATAHEN_TOKEN']
|
18
21
|
end
|
@@ -56,14 +59,14 @@ module Datahen
|
|
56
59
|
target.merge(source.select{|k,v|target.has_key?(k)})
|
57
60
|
end
|
58
61
|
|
59
|
-
def retry
|
62
|
+
def retry(times, delay = nil, err_msg = nil, stream = false, check_nil = CHECK_NIL)
|
60
63
|
limit = times.nil? ? nil : times.to_i
|
61
64
|
delay = delay.nil? ? 5 : delay.to_i
|
62
65
|
count = 0
|
63
66
|
begin
|
64
67
|
val = yield
|
65
68
|
if stream
|
66
|
-
return if val
|
69
|
+
return if check_nil.call(val)
|
67
70
|
if val['error'] != ""
|
68
71
|
raise StandardError.new(val['error'])
|
69
72
|
end
|
@@ -125,6 +128,10 @@ module Datahen
|
|
125
128
|
query[:force] = opts[:force] if opts[:force]
|
126
129
|
query[:action] = opts[:action] if opts[:action]
|
127
130
|
query[:"include-system"] = opts[:"include-system"] if opts[:"include-system"]
|
131
|
+
query[:"pod"] = opts[:"pod"] if opts[:"pod"]
|
132
|
+
query[:"container"] = opts[:"container"] if opts[:"container"]
|
133
|
+
query[:"executor"] = opts[:"executor"] if opts[:"executor"]
|
134
|
+
|
128
135
|
|
129
136
|
if opts[:query]
|
130
137
|
if opts[:query].is_a?(Hash)
|
data/lib/datahen/client/job.rb
CHANGED
@@ -60,7 +60,7 @@ module Datahen
|
|
60
60
|
params = @options.merge({body: body.to_json})
|
61
61
|
|
62
62
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:seeder]
|
63
|
-
self.retry(limit, 5, "Error while updating the seeder.") do
|
63
|
+
self.retry(limit, 5, "Error while updating the seeder.", false, CHECK_EMPTY_BODY) do
|
64
64
|
response = self.class.put("/jobs/#{job_id}/seeding_update", params)
|
65
65
|
if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
|
66
66
|
raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
|
@@ -78,7 +78,7 @@ module Datahen
|
|
78
78
|
params = @options.merge({body: body.to_json})
|
79
79
|
|
80
80
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:finisher]
|
81
|
-
self.retry(limit, 5, "Error while updating the finisher.") do
|
81
|
+
self.retry(limit, 5, "Error while updating the finisher.", false, CHECK_EMPTY_BODY) do
|
82
82
|
response = self.class.put("/jobs/#{job_id}/finisher_update", params)
|
83
83
|
if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/
|
84
84
|
raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
|
@@ -7,7 +7,7 @@ module Datahen
|
|
7
7
|
|
8
8
|
def all(job_id, collection = 'default', opts = {})
|
9
9
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : 0
|
10
|
-
self.retry(limit, 10, "Error while updating the seeder.", true) do
|
10
|
+
self.retry(limit, 10, "Error while updating the seeder.", true, CHECK_EMPTY_BODY) do
|
11
11
|
self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records", @options)
|
12
12
|
end
|
13
13
|
end
|
@@ -63,7 +63,7 @@ module Datahen
|
|
63
63
|
params = @options.merge({body: body.to_json})
|
64
64
|
|
65
65
|
limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:parser]
|
66
|
-
self.retry(limit, 5, "Error while updating the parser.") do
|
66
|
+
self.retry(limit, 5, "Error while updating the parser.", false, CHECK_EMPTY_BODY) do
|
67
67
|
response = self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
|
68
68
|
if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
|
69
69
|
raise Error::CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
|
data/lib/datahen/client.rb
CHANGED
@@ -26,6 +26,8 @@ require "datahen/client/scraper_job_var"
|
|
26
26
|
require "datahen/client/job_finisher"
|
27
27
|
require "datahen/client/job_task"
|
28
28
|
require "datahen/client/scraper_task"
|
29
|
+
require "datahen/client/job_resource"
|
30
|
+
require "datahen/client/scraper_resource"
|
29
31
|
|
30
32
|
module Datahen
|
31
33
|
module Client
|
data/lib/datahen/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datahen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Parama Danoesubroto
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -227,6 +227,7 @@ files:
|
|
227
227
|
- lib/datahen/cli/scraper_job.rb
|
228
228
|
- lib/datahen/cli/scraper_job_var.rb
|
229
229
|
- lib/datahen/cli/scraper_page.rb
|
230
|
+
- lib/datahen/cli/scraper_resource.rb
|
230
231
|
- lib/datahen/cli/scraper_task.rb
|
231
232
|
- lib/datahen/cli/scraper_var.rb
|
232
233
|
- lib/datahen/cli/seeder.rb
|
@@ -245,6 +246,7 @@ files:
|
|
245
246
|
- lib/datahen/client/job_log.rb
|
246
247
|
- lib/datahen/client/job_output.rb
|
247
248
|
- lib/datahen/client/job_page.rb
|
249
|
+
- lib/datahen/client/job_resource.rb
|
248
250
|
- lib/datahen/client/job_stat.rb
|
249
251
|
- lib/datahen/client/job_task.rb
|
250
252
|
- lib/datahen/client/job_var.rb
|
@@ -257,6 +259,7 @@ files:
|
|
257
259
|
- lib/datahen/client/scraper_job_output.rb
|
258
260
|
- lib/datahen/client/scraper_job_page.rb
|
259
261
|
- lib/datahen/client/scraper_job_var.rb
|
262
|
+
- lib/datahen/client/scraper_resource.rb
|
260
263
|
- lib/datahen/client/scraper_task.rb
|
261
264
|
- lib/datahen/client/scraper_var.rb
|
262
265
|
- lib/datahen/error.rb
|
@@ -281,7 +284,7 @@ metadata:
|
|
281
284
|
allowed_push_host: https://rubygems.org
|
282
285
|
homepage_uri: https://datahen.com
|
283
286
|
source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
|
284
|
-
post_install_message:
|
287
|
+
post_install_message:
|
285
288
|
rdoc_options: []
|
286
289
|
require_paths:
|
287
290
|
- lib
|
@@ -296,8 +299,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
296
299
|
- !ruby/object:Gem::Version
|
297
300
|
version: '0'
|
298
301
|
requirements: []
|
299
|
-
rubygems_version: 3.
|
300
|
-
signing_key:
|
302
|
+
rubygems_version: 3.2.15
|
303
|
+
signing_key:
|
301
304
|
specification_version: 4
|
302
305
|
summary: DataHen toolbelt for developers
|
303
306
|
test_files: []
|