aranha 0.19.2 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aranha/address_processor.rb +0 -3
- data/lib/aranha/default_processor.rb +0 -3
- data/lib/aranha/manager.rb +0 -2
- data/lib/aranha/processor.rb +7 -10
- data/lib/aranha/temporary_errors.rb +2 -10
- data/lib/aranha/temporary_errors_manager.rb +0 -3
- data/lib/aranha/version.rb +1 -1
- data/lib/aranha.rb +2 -2
- metadata +7 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1117421c0ba7abcf375c5dc921d78b71a5073d756934349a3a0ff36fdd73f79
|
4
|
+
data.tar.gz: 04adeb4c58ef52188febab952b5bb98aecdfdbc4b3635ae1efaaf0350cfe1304
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b0eabc26eba8d57f3559aace6e91668354eb3bbd2ddddee1bfdd09ca8c58a85c3b28ddcab51eb6dc0c51dbee581c29f3319b99e4700ccf227f3590761127fef
|
7
|
+
data.tar.gz: b9a46408902c5a3fc5bc16bf0d03797e4fb9caa197aebafb944d52e0716a317b0b28c55ef67a973110aaf3a861660da6d7ec66ab355d33070580dc3fa22a9cb3
|
data/lib/aranha/manager.rb
CHANGED
data/lib/aranha/processor.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'net/http'
|
4
|
-
require 'aranha/parsers/invalid_state_exception'
|
5
|
-
require 'aranha/manager'
|
6
|
-
|
7
3
|
module Aranha
|
8
4
|
class Processor
|
9
5
|
DEFAULT_MAX_TRIES = 3
|
@@ -42,8 +38,9 @@ module Aranha
|
|
42
38
|
end
|
43
39
|
|
44
40
|
def process_address(address)
|
45
|
-
manager.log_info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
|
46
|
-
|
41
|
+
manager.log_info("Processing #{address} (Try: #{@try}/#{max_tries_s}, " \
|
42
|
+
"Unprocessed: #{unprocessed.count}" \
|
43
|
+
"/#{::Aranha::Manager.default.addresses_count})")
|
47
44
|
ap = ::Aranha::AddressProcessor.new(address)
|
48
45
|
if ap.successful?
|
49
46
|
@failed.delete(ap.address.id)
|
@@ -78,10 +75,10 @@ module Aranha
|
|
78
75
|
|
79
76
|
def max_tries
|
80
77
|
@max_tries ||= begin
|
81
|
-
r = Integer(ENV
|
82
|
-
r
|
83
|
-
|
84
|
-
|
78
|
+
r = Integer(ENV.fetch('ARANHA_MAX_TRIES', nil))
|
79
|
+
[r, 0].max
|
80
|
+
rescue ArgumentError, TypeError
|
81
|
+
DEFAULT_MAX_TRIES
|
85
82
|
end
|
86
83
|
end
|
87
84
|
end
|
@@ -1,20 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'aranha/parsers/invalid_state_exception'
|
4
|
-
require 'aranha/parsers/source_address/fetch_content_error'
|
5
|
-
require 'eac_ruby_utils/core_ext'
|
6
|
-
require 'selenium-webdriver'
|
7
|
-
|
8
3
|
module Aranha
|
9
4
|
module TemporaryErrors
|
10
|
-
ARANHA_ERRORS = [::Aranha::Parsers::InvalidStateException,
|
11
|
-
::Aranha::Parsers::SourceAddress::FetchContentError].freeze
|
12
5
|
CORE_ERRORS = [::SocketError].freeze
|
13
6
|
ERRNO_ERRORS = [Errno::ECONNREFUSED, ::Errno::ECONNRESET].freeze
|
14
|
-
NET_ERRORS = [::Net::HTTPFatalError,
|
15
|
-
SELENIUM_ERRORS = [::Selenium::WebDriver::Error::TimeoutError].freeze
|
7
|
+
NET_ERRORS = [::Net::HTTPFatalError, Net::HTTPClientException, ::Net::OpenTimeout].freeze
|
16
8
|
|
17
|
-
ALL_ERRORS =
|
9
|
+
ALL_ERRORS = CORE_ERRORS + ERRNO_ERRORS + NET_ERRORS
|
18
10
|
|
19
11
|
class << self
|
20
12
|
def errors
|
data/lib/aranha/version.rb
CHANGED
data/lib/aranha.rb
CHANGED
metadata
CHANGED
@@ -1,77 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: aranha-parsers
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.18'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.18'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: aranha-selenium
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0.5'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0.5'
|
41
13
|
- !ruby/object:Gem::Dependency
|
42
14
|
name: eac_ruby_utils
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
44
16
|
requirements:
|
45
17
|
- - "~>"
|
46
18
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
19
|
+
version: '0.128'
|
48
20
|
type: :runtime
|
49
21
|
prerelease: false
|
50
22
|
version_requirements: !ruby/object:Gem::Requirement
|
51
23
|
requirements:
|
52
24
|
- - "~>"
|
53
25
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
26
|
+
version: '0.128'
|
55
27
|
- !ruby/object:Gem::Dependency
|
56
28
|
name: eac_ruby_gem_support
|
57
29
|
requirement: !ruby/object:Gem::Requirement
|
58
30
|
requirements:
|
59
31
|
- - "~>"
|
60
32
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
62
|
-
- - ">="
|
63
|
-
- !ruby/object:Gem::Version
|
64
|
-
version: 0.3.1
|
33
|
+
version: '0.12'
|
65
34
|
type: :development
|
66
35
|
prerelease: false
|
67
36
|
version_requirements: !ruby/object:Gem::Requirement
|
68
37
|
requirements:
|
69
38
|
- - "~>"
|
70
39
|
- !ruby/object:Gem::Version
|
71
|
-
version: '0.
|
72
|
-
- - ">="
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: 0.3.1
|
40
|
+
version: '0.12'
|
75
41
|
description:
|
76
42
|
email:
|
77
43
|
- eduardobogoni@gmail.com
|
@@ -101,7 +67,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
101
67
|
requirements:
|
102
68
|
- - ">="
|
103
69
|
- !ruby/object:Gem::Version
|
104
|
-
version:
|
70
|
+
version: 2.7.0
|
105
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
72
|
requirements:
|
107
73
|
- - ">="
|