aranha 0.15.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf75a29a21e24f2e85a425b8b9291c058a46eef895a8100992514dd03138b065
4
- data.tar.gz: d52cc8cf889e0d9174f13771e3c9ac15971a1f673dc8e4e15085aab3755e169b
3
+ metadata.gz: cc769d0b380643afe3740301d894fd70e93d3df19d3f6cf4bde1d7633d059d4f
4
+ data.tar.gz: 8d019e0d77e967ca19ff7325dc0081aa19bcdb114ea1812e49e245d121ca57c2
5
5
  SHA512:
6
- metadata.gz: 4844ac22fc5d9b3a16e7b03d8da6e5e5fc82a95dd705a0bbbb11c029f206fdc6fb762d1fd2cd71a2c8f869084597ad3f8d65e88f79f95bf37dfcd02a67cc59c6
7
- data.tar.gz: c768ba132ef0b9afbf9630e62aeb4759ab5bdb09e8d6af2658c59b19360c2a9c11dd3c1abd49431111fa25273c895ede8eb2536d7f63ebd607a1965c9e9a0813
6
+ metadata.gz: 16fd673ddb8be560b62a3dbab2e4e81e4b7b66842d5594ee4eb63639ba7a3bec6a43f239d69918c0417ed7a132069eea9bdf6ac741e41988106cb8f0ec1e0e56
7
+ data.tar.gz: 7b5c39602e3694ffbca23db1578cb87fc7dab222dfe67dd3b87e4c19fc36277efa339f10692bd2fd77c3d3218fd4201e31229d6d37b75ad209b200cc87b1229b
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+
5
+ module Aranha
6
+ class AddressProcessor
7
+ ARANHA_EXCEPTIONS = [::Aranha::Parsers::InvalidStateException].freeze
8
+ CORE_EXCEPTIONS = [::SocketError].freeze
9
+ ERRNO_EXCEPTIONS = [Errno::ECONNREFUSED, ::Errno::ECONNRESET].freeze
10
+ HTTPCLIENT_EXCEPTIONS = [
11
+ ::HTTPClient::BadResponseError,
12
+ ::HTTPClient::ConnectTimeoutError,
13
+ ::HTTPClient::ReceiveTimeoutError
14
+ ].freeze
15
+ NET_EXCEPTIONS = [::Net::HTTPFatalError, ::Net::HTTPServerException, ::Net::OpenTimeout].freeze
16
+
17
+ NETWORK_EXCEPTIONS = ARANHA_EXCEPTIONS + CORE_EXCEPTIONS + ERRNO_EXCEPTIONS +
18
+ HTTPCLIENT_EXCEPTIONS + NET_EXCEPTIONS
19
+
20
+ class << self
21
+ def rescuable_error?(error)
22
+ return true if NETWORK_EXCEPTIONS.any? { |klass| error.is_a?(klass) }
23
+
24
+ error.cause.present? ? network_error?(error.cause) : false
25
+ end
26
+ end
27
+
28
+ enable_simple_cache
29
+ common_constructor :address
30
+
31
+ def successful?
32
+ error.blank?
33
+ end
34
+
35
+ def rescuable_error?
36
+ self.class.rescuable_error?(error)
37
+ end
38
+
39
+ private
40
+
41
+ def error_uncached
42
+ address.process
43
+ nil
44
+ rescue ::StandardError => e
45
+ e
46
+ end
47
+ end
48
+ end
@@ -7,19 +7,6 @@ require 'aranha/manager'
7
7
 
8
8
  module Aranha
9
9
  class Processor
10
- ARANHA_EXCEPTIONS = [::Aranha::Parsers::InvalidStateException].freeze
11
- CORE_EXCEPTIONS = [::SocketError].freeze
12
- ERRNO_EXCEPTIONS = [Errno::ECONNREFUSED, ::Errno::ECONNRESET].freeze
13
- HTTPCLIENT_EXCEPTIONS = [
14
- ::HTTPClient::BadResponseError,
15
- ::HTTPClient::ConnectTimeoutError,
16
- ::HTTPClient::ReceiveTimeoutError
17
- ].freeze
18
- NET_EXCEPTIONS = [::Net::HTTPFatalError, ::Net::HTTPServerException, ::Net::OpenTimeout].freeze
19
-
20
- NETWORK_EXCEPTIONS = ARANHA_EXCEPTIONS + CORE_EXCEPTIONS + ERRNO_EXCEPTIONS +
21
- HTTPCLIENT_EXCEPTIONS + NET_EXCEPTIONS
22
-
23
10
  DEFAULT_MAX_TRIES = 3
24
11
 
25
12
  attr_reader :manager
@@ -58,20 +45,20 @@ module Aranha
58
45
  def process_address(address)
59
46
  manager.log_info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
60
47
  " Unprocessed: #{unprocessed.count}/#{::Aranha::Manager.default.addresses_count})")
61
- begin
62
- address.process
63
- @failed.delete(address.id)
64
- rescue StandardError => e
65
- process_exception(address, e)
48
+ ap = ::Aranha::AddressProcessor.new(address)
49
+ if ap.successful?
50
+ @failed.delete(ap.address.id)
51
+ else
52
+ process_exception(ap)
66
53
  end
67
54
  end
68
55
 
69
- def process_exception(address, exception)
70
- raise exception unless network_exception?(exception)
56
+ def process_exception(address_processor)
57
+ raise address_processor.error unless address_processor.rescuable_error?
71
58
 
72
- @failed[address.id] ||= 0
73
- @failed[address.id] += 1
74
- manager.log_warn(exception)
59
+ @failed[address_processor.address.id] ||= 0
60
+ @failed[address_processor.address.id] += 1
61
+ manager.log_warn(address_processor.error)
75
62
  end
76
63
 
77
64
  def next_address
@@ -82,12 +69,6 @@ module Aranha
82
69
  ::Aranha::Manager.default.unprocessed_addresses
83
70
  end
84
71
 
85
- def network_exception?(exception)
86
- return true if NETWORK_EXCEPTIONS.any? { |klass| exception.is_a?(klass) }
87
-
88
- exception.cause.present? ? network_exception?(exception.cause) : false
89
- end
90
-
91
72
  def not_try_ids
92
73
  @failed.select { |_k, v| v > @try }.map { |k, _v| k }
93
74
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Aranha
4
- VERSION = '0.15.2'
4
+ VERSION = '0.16.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.2
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-28 00:00:00.000000000 Z
11
+ date: 2021-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aranha-parsers
@@ -96,6 +96,7 @@ files:
96
96
  - MIT-LICENSE
97
97
  - README.rdoc
98
98
  - lib/aranha.rb
99
+ - lib/aranha/address_processor.rb
99
100
  - lib/aranha/default_processor.rb
100
101
  - lib/aranha/manager.rb
101
102
  - lib/aranha/processor.rb
@@ -119,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
120
  - !ruby/object:Gem::Version
120
121
  version: '0'
121
122
  requirements: []
122
- rubygems_version: 3.0.9
123
+ rubygems_version: 3.1.6
123
124
  signing_key:
124
125
  specification_version: 4
125
126
  summary: Ruby utilities for web crawling.