aranha 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/views/layouts/aranha/application.html.erb +9 -11
- data/lib/aranha.rb +1 -0
- data/lib/aranha/processor.rb +73 -3
- data/lib/aranha/version.rb +1 -1
- data/lib/tasks/aranha_tasks.rake +4 -0
- data/test/dummy/app/views/layouts/application.html.erb +9 -11
- data/test/dummy/config/application.rb +0 -1
- data/test/integration/navigation_test.rb +0 -1
- metadata +44 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3565d60364c02cd8a739da311d56d7aad7277d9
|
4
|
+
data.tar.gz: 517009c9e93d8d639e21a94dc809c6443f51b00c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56f2404b53245fa4c9cda8095862c0059630518cecd25bf9b1018d5024060845319db12d3675ef7c378508790e279d461970e12227cb6ed1b698518f126e9323
|
7
|
+
data.tar.gz: b7017d7df069c1160d92be0fc76dd76a4f3fb31421128f430ddab7ffe7cece3eaea3ac1f8e7f2a82024498079f54cbfe3e136486751b51455603044fea13fef7
|
@@ -1,14 +1,12 @@
|
|
1
1
|
<!DOCTYPE html>
|
2
2
|
<html>
|
3
|
-
<head>
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
</head>
|
9
|
-
<body>
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
</body>
|
3
|
+
<head>
|
4
|
+
<title>Aranha</title>
|
5
|
+
<%= stylesheet_link_tag "aranha/application", media: "all" %>
|
6
|
+
<%= javascript_include_tag "aranha/application" %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
<%= yield %>
|
11
|
+
</body>
|
14
12
|
</html>
|
data/lib/aranha.rb
CHANGED
data/lib/aranha/processor.rb
CHANGED
@@ -1,14 +1,84 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module Aranha
|
3
3
|
class Processor
|
4
|
+
NETWORK_EXCEPTIONS = [::HTTPClient::BadResponseError, Errno::ECONNRESET].freeze
|
5
|
+
DEFAULT_MAX_TRIES = 3
|
6
|
+
|
4
7
|
def initialize
|
5
8
|
::Aranha::Address.clear_expired
|
6
9
|
::Aranha::Address.add_start_points
|
10
|
+
@failed = {}
|
11
|
+
@try = 0
|
12
|
+
process_loop
|
13
|
+
raise "Addresses failed: #{@failed.count}" if @failed.any?
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def process_loop
|
19
|
+
Rails.logger.info("Max tries: #{max_tries_s}")
|
7
20
|
loop do
|
8
|
-
|
9
|
-
|
10
|
-
|
21
|
+
break if process_next_address
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def process_next_address
|
26
|
+
a = next_address
|
27
|
+
if a
|
28
|
+
process_address(a)
|
29
|
+
false
|
30
|
+
elsif @failed.any?
|
31
|
+
@try += 1
|
32
|
+
max_tries > 0 && @try >= max_tries
|
33
|
+
else
|
34
|
+
true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def process_address(a)
|
39
|
+
Rails.logger.info("Processing #{a} (Try: #{@try}/#{max_tries_s}," \
|
40
|
+
" Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
|
41
|
+
begin
|
11
42
|
a.process
|
43
|
+
@failed.delete(a.id)
|
44
|
+
rescue StandardError => ex
|
45
|
+
process_exception(a, ex)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def process_exception(a, ex)
|
50
|
+
raise ex unless network_exception?(ex)
|
51
|
+
@failed[a.id] ||= 0
|
52
|
+
@failed[a.id] += 1
|
53
|
+
Rails.logger.warn(ex)
|
54
|
+
end
|
55
|
+
|
56
|
+
def next_address
|
57
|
+
unprocessed.where.not(id: not_try_ids).first
|
58
|
+
end
|
59
|
+
|
60
|
+
def unprocessed
|
61
|
+
::Aranha::Address.unprocessed
|
62
|
+
end
|
63
|
+
|
64
|
+
def network_exception?(ex)
|
65
|
+
NETWORK_EXCEPTIONS.any? { |klass| ex.is_a?(klass) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def not_try_ids
|
69
|
+
@failed.select { |_k, v| v > @try }.map { |k, _v| k }
|
70
|
+
end
|
71
|
+
|
72
|
+
def max_tries_s
|
73
|
+
max_tries <= 0 ? 'INF' : max_tries
|
74
|
+
end
|
75
|
+
|
76
|
+
def max_tries
|
77
|
+
@max_tries ||= begin
|
78
|
+
r = Integer(ENV['ARANHA_MAX_TRIES'])
|
79
|
+
r <= 0 ? 0 : r
|
80
|
+
rescue ArgumentError
|
81
|
+
DEFAULT_MAX_TRIES
|
12
82
|
end
|
13
83
|
end
|
14
84
|
end
|
data/lib/aranha/version.rb
CHANGED
data/lib/tasks/aranha_tasks.rake
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
<!DOCTYPE html>
|
2
2
|
<html>
|
3
|
-
<head>
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
</head>
|
9
|
-
<body>
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
</body>
|
3
|
+
<head>
|
4
|
+
<title>Dummy</title>
|
5
|
+
<%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
|
6
|
+
<%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
<%= yield %>
|
11
|
+
</body>
|
14
12
|
</html>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 4.2.10
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: httpclient
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.6'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: sqlite3
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,47 +149,47 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
149
|
version: '0'
|
136
150
|
requirements: []
|
137
151
|
rubyforge_project:
|
138
|
-
rubygems_version: 2.
|
152
|
+
rubygems_version: 2.4.8
|
139
153
|
signing_key:
|
140
154
|
specification_version: 4
|
141
155
|
summary: Rails utilities for web crawling.
|
142
156
|
test_files:
|
143
|
-
- test/
|
144
|
-
- test/dummy/README.rdoc
|
157
|
+
- test/integration/navigation_test.rb
|
145
158
|
- test/dummy/config.ru
|
159
|
+
- test/dummy/README.rdoc
|
160
|
+
- test/dummy/db/schema.rb
|
161
|
+
- test/dummy/app/views/layouts/application.html.erb
|
162
|
+
- test/dummy/app/assets/stylesheets/application.css
|
163
|
+
- test/dummy/app/assets/javascripts/application.js
|
164
|
+
- test/dummy/app/helpers/application_helper.rb
|
165
|
+
- test/dummy/app/controllers/application_controller.rb
|
166
|
+
- test/dummy/bin/bundle
|
167
|
+
- test/dummy/bin/rails
|
168
|
+
- test/dummy/bin/setup
|
169
|
+
- test/dummy/bin/rake
|
170
|
+
- test/dummy/Rakefile
|
171
|
+
- test/dummy/config/environments/production.rb
|
172
|
+
- test/dummy/config/environments/test.rb
|
173
|
+
- test/dummy/config/environments/development.rb
|
174
|
+
- test/dummy/config/application.rb
|
146
175
|
- test/dummy/config/boot.rb
|
147
|
-
- test/dummy/config/database.yml
|
148
|
-
- test/dummy/config/secrets.yml
|
149
176
|
- test/dummy/config/locales/en.yml
|
150
|
-
- test/dummy/config/application.rb
|
151
|
-
- test/dummy/config/environments/development.rb
|
152
|
-
- test/dummy/config/environments/test.rb
|
153
|
-
- test/dummy/config/environments/production.rb
|
154
|
-
- test/dummy/config/environment.rb
|
155
|
-
- test/dummy/config/routes.rb
|
156
|
-
- test/dummy/config/initializers/assets.rb
|
157
|
-
- test/dummy/config/initializers/cookies_serializer.rb
|
158
|
-
- test/dummy/config/initializers/inflections.rb
|
159
177
|
- test/dummy/config/initializers/session_store.rb
|
160
|
-
- test/dummy/config/initializers/wrap_parameters.rb
|
161
|
-
- test/dummy/config/initializers/to_time_preserves_timezone.rb
|
162
178
|
- test/dummy/config/initializers/filter_parameter_logging.rb
|
179
|
+
- test/dummy/config/initializers/wrap_parameters.rb
|
163
180
|
- test/dummy/config/initializers/backtrace_silencers.rb
|
181
|
+
- test/dummy/config/initializers/inflections.rb
|
182
|
+
- test/dummy/config/initializers/to_time_preserves_timezone.rb
|
183
|
+
- test/dummy/config/initializers/assets.rb
|
184
|
+
- test/dummy/config/initializers/cookies_serializer.rb
|
164
185
|
- test/dummy/config/initializers/mime_types.rb
|
165
|
-
- test/dummy/
|
166
|
-
- test/dummy/
|
167
|
-
- test/dummy/
|
168
|
-
- test/dummy/
|
169
|
-
- test/dummy/app/assets/stylesheets/application.css
|
170
|
-
- test/dummy/app/assets/javascripts/application.js
|
186
|
+
- test/dummy/config/secrets.yml
|
187
|
+
- test/dummy/config/database.yml
|
188
|
+
- test/dummy/config/routes.rb
|
189
|
+
- test/dummy/config/environment.rb
|
171
190
|
- test/dummy/public/422.html
|
172
|
-
- test/dummy/public/404.html
|
173
191
|
- test/dummy/public/favicon.ico
|
192
|
+
- test/dummy/public/404.html
|
174
193
|
- test/dummy/public/500.html
|
175
|
-
- test/dummy/bin/bundle
|
176
|
-
- test/dummy/bin/setup
|
177
|
-
- test/dummy/bin/rails
|
178
|
-
- test/dummy/bin/rake
|
179
194
|
- test/aranha_test.rb
|
180
195
|
- test/test_helper.rb
|
181
|
-
- test/integration/navigation_test.rb
|