datahen 1.5.2 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc47c55d814a573f9de29c4725aba0cad212dceae5dfa2d330dc980ad4f64253
4
- data.tar.gz: 508fe6249ef13f07c835297758f31eb8459080b13c4256c9393d0c8c6ea2b171
3
+ metadata.gz: 94937cbe6614336d283523740d1bd237b154a7e084dd3c0757dda96a068cc4c2
4
+ data.tar.gz: 51552871510232be116af8d0f108e5ee51d6733657abb681b1561d78eb68efff
5
5
  SHA512:
6
- metadata.gz: 23c817ae6f20698c95fdae1503bfe9fc6072f617389e979a1389eab746de07632fab432b2a3a5a824c56c9ad3b6a254a1508ecfd7bb691013843e7b93831daf5
7
- data.tar.gz: c3a1df12099bc6bf159ac7689d7c7be36dc15416b6ecc3c347e7ef1ba1e8e844a58d9600f00803c7fd73837915b88674aa8e470e504acc6ac89d6fd0897df632
6
+ metadata.gz: a37ba23e5e4915540ee5ad656d7c93dbd4d78ab36ad9e7f05ee2019d51061793b5a914e9e5527ce40bdf42ca5f6cb758124503293fb229ccf112b8d1e2f70193
7
+ data.tar.gz: 10f1cf69238cf4e8fae71e9ae892bc771d392b426d98b7168bbe9fbc9985bfcc3f0cf62dff8f9ba6fbca3484b13f29a4e2513259e0659a1a33b6708b764dccfa
@@ -252,6 +252,9 @@ module Datahen
252
252
  desc "task SUBCOMMAND ...ARGS", "manage task on a job"
253
253
  subcommand "task", ScraperTask
254
254
 
255
+ desc "resource SUBCOMMAND ...ARGS", "manage resource on a job"
256
+ subcommand "resource", ScraperResource
257
+
255
258
 
256
259
  end
257
260
  end
@@ -0,0 +1,35 @@
1
+ module Datahen
2
+ class CLI < Thor
3
+ class ScraperResource < Thor
4
+ package_name "scraper resource"
5
+ def self.banner(command, namespace = nil, subcommand = false)
6
+ "#{basename} #{@package_name} #{command.usage}"
7
+ end
8
+
9
+ desc "list", "List resources on a scraper's current job"
10
+ long_desc <<-LONGDESC
11
+ List all resources in a scraper's current job or given job ID.\x5
12
+ LONGDESC
13
+ option :scraper_name, :aliases => :s, type: :string, desc: 'Filter by a specific scraper_name'
14
+ option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
15
+ option :pod, type: :string, desc: 'Returns only tasks with specific pod.'
16
+ option :container, type: :string, desc: 'Returns only tasks with specific container.'
17
+ option :executor, type: :string, desc: 'Returns only tasks with specific executor.'
18
+ def list()
19
+ if options[:job]
20
+ client = Client::JobResource.new(options)
21
+ puts "#{client.all(options[:job])}"
22
+ else
23
+ if options[:scraper_name]
24
+ client = Client::ScraperResource.new(options)
25
+ puts "#{client.all(options[:scraper_name])}"
26
+ else
27
+ puts 'Must specify either a job ID or a scraper name'
28
+ end
29
+ end
30
+ end
31
+
32
+ end
33
+ end
34
+
35
+ end
data/lib/datahen/cli.rb CHANGED
@@ -7,6 +7,7 @@ require 'datahen/cli/scraper_job_var'
7
7
  require 'datahen/cli/scraper_job'
8
8
  require 'datahen/cli/scraper_finisher'
9
9
  require 'datahen/cli/global_page'
10
+ require 'datahen/cli/scraper_resource'
10
11
  require 'datahen/cli/scraper_page'
11
12
  require 'datahen/cli/job_output'
12
13
  require 'datahen/cli/job'
@@ -13,6 +13,9 @@ module Datahen
13
13
  finisher: nil
14
14
  }
15
15
 
16
+ CHECK_NIL = lambda{|v|v.nil?}
17
+ CHECK_EMPTY_BODY = lambda{|v|v.body.nil? || v.body.empty?}
18
+
16
19
  def self.env_auth_token
17
20
  ENV['DATAHEN_TOKEN']
18
21
  end
@@ -56,14 +59,14 @@ module Datahen
56
59
  target.merge(source.select{|k,v|target.has_key?(k)})
57
60
  end
58
61
 
59
- def retry times, delay = nil, err_msg = nil, stream = false
62
+ def retry(times, delay = nil, err_msg = nil, stream = false, check_nil = CHECK_NIL)
60
63
  limit = times.nil? ? nil : times.to_i
61
64
  delay = delay.nil? ? 5 : delay.to_i
62
65
  count = 0
63
66
  begin
64
67
  val = yield
65
68
  if stream
66
- return if val.nil?
69
+ return if check_nil.call(val)
67
70
  if val['error'] != ""
68
71
  raise StandardError.new(val['error'])
69
72
  end
@@ -125,6 +128,10 @@ module Datahen
125
128
  query[:force] = opts[:force] if opts[:force]
126
129
  query[:action] = opts[:action] if opts[:action]
127
130
  query[:"include-system"] = opts[:"include-system"] if opts[:"include-system"]
131
+ query[:"pod"] = opts[:"pod"] if opts[:"pod"]
132
+ query[:"container"] = opts[:"container"] if opts[:"container"]
133
+ query[:"executor"] = opts[:"executor"] if opts[:"executor"]
134
+
128
135
 
129
136
  if opts[:query]
130
137
  if opts[:query].is_a?(Hash)
@@ -60,7 +60,7 @@ module Datahen
60
60
  params = @options.merge({body: body.to_json})
61
61
 
62
62
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:seeder]
63
- self.retry(limit, 5, "Error while updating the seeder.") do
63
+ self.retry(limit, 5, "Error while updating the seeder.", false, CHECK_EMPTY_BODY) do
64
64
  response = self.class.put("/jobs/#{job_id}/seeding_update", params)
65
65
  if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
66
66
  raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
@@ -78,7 +78,7 @@ module Datahen
78
78
  params = @options.merge({body: body.to_json})
79
79
 
80
80
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:finisher]
81
- self.retry(limit, 5, "Error while updating the finisher.") do
81
+ self.retry(limit, 5, "Error while updating the finisher.", false, CHECK_EMPTY_BODY) do
82
82
  response = self.class.put("/jobs/#{job_id}/finisher_update", params)
83
83
  if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/
84
84
  raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
@@ -7,7 +7,7 @@ module Datahen
7
7
 
8
8
  def all(job_id, collection = 'default', opts = {})
9
9
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : 0
10
- self.retry(limit, 10, "Error while updating the seeder.", true) do
10
+ self.retry(limit, 10, "Error while updating the seeder.", true, CHECK_EMPTY_BODY) do
11
11
  self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records", @options)
12
12
  end
13
13
  end
@@ -63,7 +63,7 @@ module Datahen
63
63
  params = @options.merge({body: body.to_json})
64
64
 
65
65
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:parser]
66
- self.retry(limit, 5, "Error while updating the parser.") do
66
+ self.retry(limit, 5, "Error while updating the parser.", false, CHECK_EMPTY_BODY) do
67
67
  response = self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
68
68
  if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
69
69
  raise Error::CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
@@ -0,0 +1,11 @@
1
+ module Datahen
2
+ module Client
3
+ class JobResource < Datahen::Client::Base
4
+ def all(job_id, opts={})
5
+ params = @options.merge(opts)
6
+ self.class.get("/jobs/#{job_id}/resources", params)
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module Datahen
2
+ module Client
3
+ class ScraperResource < Datahen::Client::Base
4
+ def all(scraper_name, opts={})
5
+ params = @options.merge(opts)
6
+ self.class.get("/scrapers/#{scraper_name}/resources", params)
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -26,6 +26,8 @@ require "datahen/client/scraper_job_var"
26
26
  require "datahen/client/job_finisher"
27
27
  require "datahen/client/job_task"
28
28
  require "datahen/client/scraper_task"
29
+ require "datahen/client/job_resource"
30
+ require "datahen/client/scraper_resource"
29
31
 
30
32
  module Datahen
31
33
  module Client
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "1.5.2"
2
+ VERSION = "1.6.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.2
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-09 00:00:00.000000000 Z
11
+ date: 2024-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -227,6 +227,7 @@ files:
227
227
  - lib/datahen/cli/scraper_job.rb
228
228
  - lib/datahen/cli/scraper_job_var.rb
229
229
  - lib/datahen/cli/scraper_page.rb
230
+ - lib/datahen/cli/scraper_resource.rb
230
231
  - lib/datahen/cli/scraper_task.rb
231
232
  - lib/datahen/cli/scraper_var.rb
232
233
  - lib/datahen/cli/seeder.rb
@@ -245,6 +246,7 @@ files:
245
246
  - lib/datahen/client/job_log.rb
246
247
  - lib/datahen/client/job_output.rb
247
248
  - lib/datahen/client/job_page.rb
249
+ - lib/datahen/client/job_resource.rb
248
250
  - lib/datahen/client/job_stat.rb
249
251
  - lib/datahen/client/job_task.rb
250
252
  - lib/datahen/client/job_var.rb
@@ -257,6 +259,7 @@ files:
257
259
  - lib/datahen/client/scraper_job_output.rb
258
260
  - lib/datahen/client/scraper_job_page.rb
259
261
  - lib/datahen/client/scraper_job_var.rb
262
+ - lib/datahen/client/scraper_resource.rb
260
263
  - lib/datahen/client/scraper_task.rb
261
264
  - lib/datahen/client/scraper_var.rb
262
265
  - lib/datahen/error.rb
@@ -281,7 +284,7 @@ metadata:
281
284
  allowed_push_host: https://rubygems.org
282
285
  homepage_uri: https://datahen.com
283
286
  source_code_uri: https://github.com/DataHenOfficial/datahen-ruby
284
- post_install_message:
287
+ post_install_message:
285
288
  rdoc_options: []
286
289
  require_paths:
287
290
  - lib
@@ -296,8 +299,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
296
299
  - !ruby/object:Gem::Version
297
300
  version: '0'
298
301
  requirements: []
299
- rubygems_version: 3.0.3
300
- signing_key:
302
+ rubygems_version: 3.2.15
303
+ signing_key:
301
304
  specification_version: 4
302
305
  summary: DataHen toolbelt for developers
303
306
  test_files: []