datahen 1.5.2 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc47c55d814a573f9de29c4725aba0cad212dceae5dfa2d330dc980ad4f64253
4
- data.tar.gz: 508fe6249ef13f07c835297758f31eb8459080b13c4256c9393d0c8c6ea2b171
3
+ metadata.gz: 94937cbe6614336d283523740d1bd237b154a7e084dd3c0757dda96a068cc4c2
4
+ data.tar.gz: 51552871510232be116af8d0f108e5ee51d6733657abb681b1561d78eb68efff
5
5
  SHA512:
6
- metadata.gz: 23c817ae6f20698c95fdae1503bfe9fc6072f617389e979a1389eab746de07632fab432b2a3a5a824c56c9ad3b6a254a1508ecfd7bb691013843e7b93831daf5
7
- data.tar.gz: c3a1df12099bc6bf159ac7689d7c7be36dc15416b6ecc3c347e7ef1ba1e8e844a58d9600f00803c7fd73837915b88674aa8e470e504acc6ac89d6fd0897df632
6
+ metadata.gz: a37ba23e5e4915540ee5ad656d7c93dbd4d78ab36ad9e7f05ee2019d51061793b5a914e9e5527ce40bdf42ca5f6cb758124503293fb229ccf112b8d1e2f70193
7
+ data.tar.gz: 10f1cf69238cf4e8fae71e9ae892bc771d392b426d98b7168bbe9fbc9985bfcc3f0cf62dff8f9ba6fbca3484b13f29a4e2513259e0659a1a33b6708b764dccfa
@@ -252,6 +252,9 @@ module Datahen
252
252
  desc "task SUBCOMMAND ...ARGS", "manage task on a job"
253
253
  subcommand "task", ScraperTask
254
254
 
255
+ desc "resource SUBCOMMAND ...ARGS", "manage resource on a job"
256
+ subcommand "resource", ScraperResource
257
+
255
258
 
256
259
  end
257
260
  end
@@ -0,0 +1,35 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperResource < Thor
4
+ package_name "scraper resource"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "list", "List resources on a scraper's current job"
10
+ long_desc <<-LONGDESC
11
+ List all resources in a scraper's current job or given job ID.\x5
12
+ LONGDESC
13
+ option :scraper_name, :aliases => :s, type: :string, desc: 'Filter by a specific scraper_name'
14
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
15
+ option :pod, type: :string, desc: 'Returns only tasks with specific pod.'
16
+ option :container, type: :string, desc: 'Returns only tasks with specific container.'
17
+ option :executor, type: :string, desc: 'Returns only tasks with specific executor.'
18
+ def list()
19
+ if options[:job]
20
+ client = Client::JobResource.new(options)
21
+ puts "#{client.all(options[:job])}"
22
+ else
23
+ if options[:scraper_name]
24
+ client = Client::ScraperResource.new(options)
25
+ puts "#{client.all(options[:scraper_name])}"
26
+ else
27
+ puts 'Must specify either a job ID or a scraper name'
28
+ end
29
+ end
30
+ end
31
+
32
+ end
33
+ end
34
+
35
+ end
data/lib/datahen/cli.rb CHANGED
@@ -7,6 +7,7 @@ require 'datahen/cli/scraper_job_var'
7
7
  require 'datahen/cli/scraper_job'
8
8
  require 'datahen/cli/scraper_finisher'
9
9
  require 'datahen/cli/global_page'
10
+ require 'datahen/cli/scraper_resource'
10
11
  require 'datahen/cli/scraper_page'
11
12
  require 'datahen/cli/job_output'
12
13
  require 'datahen/cli/job'
@@ -13,6 +13,9 @@ module Datahen
13
13
  finisher: nil
14
14
  }
15
15
 
16
+ CHECK_NIL = lambda{|v|v.nil?}
17
+ CHECK_EMPTY_BODY = lambda{|v|v.body.nil? || v.body.empty?}
18
+
16
19
  def self.env_auth_token
17
20
  ENV['DATAHEN_TOKEN']
18
21
  end
@@ -56,14 +59,14 @@ module Datahen
56
59
  target.merge(source.select{|k,v|target.has_key?(k)})
57
60
  end
58
61
 
59
- def retry times, delay = nil, err_msg = nil, stream = false
62
+ def retry(times, delay = nil, err_msg = nil, stream = false, check_nil = CHECK_NIL)
60
63
  limit = times.nil? ? nil : times.to_i
61
64
  delay = delay.nil? ? 5 : delay.to_i
62
65
  count = 0
63
66
  begin
64
67
  val = yield
65
68
  if stream
66
- return if val.nil?
69
+ return if check_nil.call(val)
67
70
  if val['error'] != ""
68
71
  raise StandardError.new(val['error'])
69
72
  end
@@ -125,6 +128,10 @@ module Datahen
125
128
  query[:force] = opts[:force] if opts[:force]
126
129
  query[:action] = opts[:action] if opts[:action]
127
130
  query[:"include-system"] = opts[:"include-system"] if opts[:"include-system"]
131
+ query[:"pod"] = opts[:"pod"] if opts[:"pod"]
132
+ query[:"container"] = opts[:"container"] if opts[:"container"]
133
+ query[:"executor"] = opts[:"executor"] if opts[:"executor"]
134
+
128
135
 
129
136
  if opts[:query]
130
137
  if opts[:query].is_a?(Hash)
@@ -60,7 +60,7 @@ module Datahen
60
60
  params = @options.merge({body: body.to_json})
61
61
 
62
62
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:seeder]
63
- self.retry(limit, 5, "Error while updating the seeder.") do
63
+ self.retry(limit, 5, "Error while updating the seeder.", false, CHECK_EMPTY_BODY) do
64
64
  response = self.class.put("/jobs/#{job_id}/seeding_update", params)
65
65
  if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
66
66
  raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
@@ -78,7 +78,7 @@ module Datahen
78
78
  params = @options.merge({body: body.to_json})
79
79
 
80
80
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:finisher]
81
- self.retry(limit, 5, "Error while updating the finisher.") do
81
+ self.retry(limit, 5, "Error while updating the finisher.", false, CHECK_EMPTY_BODY) do
82
82
  response = self.class.put("/jobs/#{job_id}/finisher_update", params)
83
83
  if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/
84
84
  raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
@@ -7,7 +7,7 @@ module Datahen
7
7
 
8
8
  def all(job_id, collection = 'default', opts = {})
9
9
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : 0
10
- self.retry(limit, 10, "Error while updating the seeder.", true) do
10
+ self.retry(limit, 10, "Error while updating the seeder.", true, CHECK_EMPTY_BODY) do
11
11
  self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records", @options)
12
12
  end
13
13
  end
@@ -63,7 +63,7 @@ module Datahen
63
63
  params = @options.merge({body: body.to_json})
64
64
 
65
65
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:parser]
66
- self.retry(limit, 5, "Error while updating the parser.") do
66
+ self.retry(limit, 5, "Error while updating the parser.", false, CHECK_EMPTY_BODY) do
67
67
  response = self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
68
68
  if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
69
69
  raise Error::CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
@@ -0,0 +1,11 @@
1
+ module Datahen
2
+ module Client
3
+ class JobResource < Datahen::Client::Base
4
+ def all(job_id, opts={})
5
+ params = @options.merge(opts)
6
+ self.class.get("/jobs/#{job_id}/resources", params)
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module Datahen
2
+ module Client
3
+ class ScraperResource < Datahen::Client::Base
4
+ def all(scraper_name, opts={})
5
+ params = @options.merge(opts)
6
+ self.class.get("/scrapers/#{scraper_name}/resources", params)
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -26,6 +26,8 @@ require "datahen/client/scraper_job_var"
26
26
  require "datahen/client/job_finisher"
27
27
  require "datahen/client/job_task"
28
28
  require "datahen/client/scraper_task"
29
+ require "datahen/client/job_resource"
30
+ require "datahen/client/scraper_resource"
29
31
 
30
32
  module Datahen
31
33
  module Client
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "1.5.2"
2
+ VERSION = "1.6.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.2
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-09 00:00:00.000000000 Z
11
+ date: 2024-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -227,6 +227,7 @@ files:
227
227
  - lib/datahen/cli/scraper_job.rb
228
228
  - lib/datahen/cli/scraper_job_var.rb
229
229
  - lib/datahen/cli/scraper_page.rb
230
+ - lib/datahen/cli/scraper_resource.rb
230
231
  - lib/datahen/cli/scraper_task.rb
231
232
  - lib/datahen/cli/scraper_var.rb
232
233
  - lib/datahen/cli/seeder.rb
@@ -245,6 +246,7 @@ files:
245
246
  - lib/datahen/client/job_log.rb
246
247
  - lib/datahen/client/job_output.rb
247
248
  - lib/datahen/client/job_page.rb
249
+ - lib/datahen/client/job_resource.rb
248
250
  - lib/datahen/client/job_stat.rb
249
251
  - lib/datahen/client/job_task.rb
250
252
  - lib/datahen/client/job_var.rb
@@ -257,6 +259,7 @@ files:
257
259
  - lib/datahen/client/scraper_job_output.rb
258
260
  - lib/datahen/client/scraper_job_page.rb
259
261
  - lib/datahen/client/scraper_job_var.rb
262
+ - lib/datahen/client/scraper_resource.rb
260
263
  - lib/datahen/client/scraper_task.rb
261
264
  - lib/datahen/client/scraper_var.rb
262
265
  - lib/datahen/error.rb
@@ -281,7 +284,7 @@ metadata:
281
284
  allowed_push_host: https://rubygems.org
282
285
  homepage_uri: https://datahen.com
283
286
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
284
- post_install_message:
287
+ post_install_message:
285
288
  rdoc_options: []
286
289
  require_paths:
287
290
  - lib
@@ -296,8 +299,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
296
299
  - !ruby/object:Gem::Version
297
300
  version: '0'
298
301
  requirements: []
299
- rubygems_version: 3.0.3
300
- signing_key:
302
+ rubygems_version: 3.2.15
303
+ signing_key:
301
304
  specification_version: 4
302
305
  summary: DataHen toolbelt for developers
303
306
  test_files: []