datahen 1.1.1 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14cf0c4f60c5104527a963f2cb8ee0112db9f939cc2bb95ba8e9a1931f1781c0
4
- data.tar.gz: ca0a1662af06f3ed3344524aa3f1ed2a8e7c32f7dc3d204ae344e4657ecc1b73
3
+ metadata.gz: 7f50f72a9ba32a9da0ecf43c684baa35c6ed84302f705ef8f87272a3ea0c9d0d
4
+ data.tar.gz: 5451f07252a2ba798313c02f2a3183e80d321e95d4b8eacc800b70217dd03077
5
5
  SHA512:
6
- metadata.gz: 4c94feb022e23fc300fa9b7ff90e262d3425f2433ac2cb0935ff6020f96cd75a11c1f04766c7584f94921b2ea0f999f84494cb12daf22c1fe79642918210772a
7
- data.tar.gz: a3591cf025d044f0f28e479f8fa883b20a91c77ecf9bdf92bec944ec2a6b61b1a14941da3e9e7964070f7b32a097874a9f9da3702de488adc87d5def1e4b9769
6
+ metadata.gz: 52d5f2c71f63379503b2f054b3ef2ba587d43b35d5dfa5f20c0a1c0547f62a09a1c805ce7452a37e9d064f5cec1ba3aaf833ce09eb1d6583220fcf9ae88c10a3
7
+ data.tar.gz: 55d3564529cc1c1a3815b936f1b0184d5e62c95af8e2c85e051430b6c38e206a27622d0c4d00e250b4db1f8cdf7c9a0276b1252335d337a888cc614bc3242790
data/exe/hen CHANGED
@@ -1,3 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'datahen'
2
3
  require 'datahen/cli'
3
4
  Datahen::CLI.start
@@ -131,13 +131,14 @@ module Datahen
131
131
  Reparse pages in a scraper's current job. You need to specify either a --gid or --parse-fail or --status or --page-type.\x5
132
132
  LONGDESC
133
133
  option :gid, :aliases => :g, type: :string, desc: 'Reparse a specific GID'
134
+ option :fetch_fail, type: :boolean, desc: 'Reparse only pages that fails fetching.'
134
135
  option :parse_fail, type: :boolean, desc: 'Reparse only pages that fails parsing.'
135
136
  option :status, type: :string, desc: 'Reparse only pages with a specific status.'
136
137
  option :page_type, type: :string, desc: 'Refetches only pages with a specific page type.'
137
138
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
138
139
  def reparse(scraper_name)
139
- if !options.key?(:gid) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
140
- puts "Must specify either a --gid, --parse-fail, --status or --page-type"
140
+ if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
141
+ puts "Must specify either a --gid, --fetch-fail, --parse-fail, --status or --page-type"
141
142
  return
142
143
  end
143
144
 
@@ -155,11 +156,14 @@ module Datahen
155
156
  Move pages in a scraper's current job to limbo. You need to specify either a --gid or --status.\x5
156
157
  LONGDESC
157
158
  option :gid, :aliases => :g, type: :string, desc: 'Move a specific GID to limbo'
159
+ option :fetch_fail, type: :boolean, desc: 'Move pages that fails fetching to limbo.'
160
+ option :parse_fail, type: :boolean, desc: 'Move pages that fails parsing to limbo.'
158
161
  option :status, type: :string, desc: 'Move pages with a specific status to limbo.'
162
+ option :page_type, type: :string, desc: 'Move pages with a specific page type to limbo.'
159
163
  option :job, :aliases => :j, type: :numeric, desc: 'Set a specific job ID'
160
164
  def limbo(scraper_name)
161
- if !options.key?(:gid) && !options.key?(:status)
162
- puts "Must specify either a --gid or --status"
165
+ if !options.key?(:gid) && !options.key?(:fetch_fail) && !options.key?(:parse_fail) && !options.key?(:status) && !options.key?(:page_type)
166
+ puts "Must specify either a --gid, --fetch-fail, --parse-fail, --status or --page-type"
163
167
  return
164
168
  end
165
169
 
data/lib/datahen/cli.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'thor'
2
- require 'datahen/scraper'
2
+ require 'datahen'
3
3
  require 'datahen/cli/scraper_var'
4
4
  require 'datahen/cli/scraper_exporter'
5
5
  require 'datahen/cli/scraper_export'
@@ -44,5 +44,10 @@ module Datahen
44
44
 
45
45
  desc "account SUBCOMMAND ...ARGS", "for account related activities"
46
46
  subcommand "account", Account
47
+
48
+ desc "version", "Shows the gem version"
49
+ def version()
50
+ puts "#{VERSION}"
51
+ end
47
52
  end
48
53
  end
@@ -21,6 +21,10 @@ module Datahen
21
21
  ENV['DATAHEN_IGNORE_SSL'].to_s.strip == '1'
22
22
  end
23
23
 
24
+ def self.random_delay max_seconds = 2
25
+ (rand * max_seconds * 1000.0).to_i / 1000.0
26
+ end
27
+
24
28
  def env_api_url
25
29
  ENV['DATAHEN_API_URL'].nil? ? 'https://app.datahen.com/api/v1' : ENV['DATAHEN_API_URL']
26
30
  end
@@ -58,11 +62,15 @@ module Datahen
58
62
  count = 0
59
63
  begin
60
64
  yield
61
- rescue StandardError => e
62
- STDERR.puts(e.inspect)
65
+ rescue Error::CustomRetryError, StandardError => e
66
+ is_custom_retry = e.is_a? Error::CustomRetryError
67
+ real_delay = is_custom_retry ? e.delay : delay
68
+ err_msg = is_custom_retry ? e.error : e.inspect
69
+
70
+ STDERR.puts(err_msg)
63
71
 
64
72
  # wait before retry (default 5 sec)
65
- sleep(delay) if delay > 0
73
+ sleep(delay) if real_delay > 0
66
74
 
67
75
  # raise error when retry limit is reached
68
76
  raise e unless limit.nil? || count < limit
@@ -57,7 +57,11 @@ module Datahen
57
57
 
58
58
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:seeder]
59
59
  self.retry(limit, 5, "Error while updating the seeder.") do
60
- self.class.put("/jobs/#{job_id}/seeding_update", params)
60
+ response = self.class.put("/jobs/#{job_id}/seeding_update", params)
61
+ if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
62
+ raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
63
+ end
64
+ response
61
65
  end
62
66
  end
63
67
 
@@ -71,7 +75,11 @@ module Datahen
71
75
 
72
76
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:finisher]
73
77
  self.retry(limit, 5, "Error while updating the finisher.") do
74
- self.class.put("/jobs/#{job_id}/finisher_update", params)
78
+ response = self.class.put("/jobs/#{job_id}/finisher_update", params)
79
+ if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/
80
+ raise CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
81
+ end
82
+ response
75
83
  end
76
84
  end
77
85
 
@@ -70,7 +70,11 @@ module Datahen
70
70
 
71
71
  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:parser]
72
72
  self.retry(limit, 5, "Error while updating the parser.") do
73
- self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
73
+ response = self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
74
+ if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
75
+ raise Error::CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
76
+ end
77
+ response
74
78
  end
75
79
  end
76
80
 
@@ -0,0 +1,12 @@
1
+ module Datahen
2
+ module Error
3
+ class CustomRetryError < Exception
4
+ attr_accessor :error, :delay
5
+
6
+ def initialize delay, error = nil
7
+ self.error = error
8
+ self.delay = delay
9
+ end
10
+ end
11
+ end
12
+ end
data/lib/datahen/error.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'datahen/error/safe_terminate_error'
2
+ require 'datahen/error/custom_retry_error'
2
3
 
3
4
  module Datahen
4
5
  module Error
@@ -1,5 +1,3 @@
1
- require "datahen/error"
2
- require "datahen/plugin"
3
1
  require "datahen/scraper/parser"
4
2
  require "datahen/scraper/batch_parser"
5
3
  require "datahen/scraper/seeder"
@@ -8,7 +6,6 @@ require "datahen/scraper/executor"
8
6
  require "datahen/scraper/ruby_parser_executor"
9
7
  require "datahen/scraper/ruby_seeder_executor"
10
8
  require "datahen/scraper/ruby_finisher_executor"
11
- require "datahen/client"
12
9
 
13
10
  module Datahen
14
11
  module Scraper
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "1.1.1"
2
+ VERSION = "1.2.1"
3
3
  end
data/lib/datahen.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  require "datahen/version"
2
+ require "datahen/error"
3
+ require "datahen/plugin"
2
4
  require "datahen/scraper"
5
+ require "datahen/client"
3
6
 
4
7
  module Datahen
5
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-11 00:00:00.000000000 Z
11
+ date: 2022-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -257,6 +257,7 @@ files:
257
257
  - lib/datahen/client/scraper_job_var.rb
258
258
  - lib/datahen/client/scraper_var.rb
259
259
  - lib/datahen/error.rb
260
+ - lib/datahen/error/custom_retry_error.rb
260
261
  - lib/datahen/error/safe_terminate_error.rb
261
262
  - lib/datahen/plugin.rb
262
263
  - lib/datahen/plugin/context_exposer.rb