aranha 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/views/layouts/aranha/application.html.erb +9 -11
- data/lib/aranha.rb +1 -0
- data/lib/aranha/processor.rb +73 -3
- data/lib/aranha/version.rb +1 -1
- data/lib/tasks/aranha_tasks.rake +4 -0
- data/test/dummy/app/views/layouts/application.html.erb +9 -11
- data/test/dummy/config/application.rb +0 -1
- data/test/integration/navigation_test.rb +0 -1
- metadata +44 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3565d60364c02cd8a739da311d56d7aad7277d9
|
4
|
+
data.tar.gz: 517009c9e93d8d639e21a94dc809c6443f51b00c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56f2404b53245fa4c9cda8095862c0059630518cecd25bf9b1018d5024060845319db12d3675ef7c378508790e279d461970e12227cb6ed1b698518f126e9323
|
7
|
+
data.tar.gz: b7017d7df069c1160d92be0fc76dd76a4f3fb31421128f430ddab7ffe7cece3eaea3ac1f8e7f2a82024498079f54cbfe3e136486751b51455603044fea13fef7
|
@@ -1,14 +1,12 @@
|
|
1
1
|
<!DOCTYPE html>
|
2
2
|
<html>
|
3
|
-
<head>
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
</head>
|
9
|
-
<body>
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
</body>
|
3
|
+
<head>
|
4
|
+
<title>Aranha</title>
|
5
|
+
<%= stylesheet_link_tag "aranha/application", media: "all" %>
|
6
|
+
<%= javascript_include_tag "aranha/application" %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
<%= yield %>
|
11
|
+
</body>
|
14
12
|
</html>
|
data/lib/aranha.rb
CHANGED
data/lib/aranha/processor.rb
CHANGED
@@ -1,14 +1,84 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module Aranha
|
3
3
|
class Processor
|
4
|
+
NETWORK_EXCEPTIONS = [::HTTPClient::BadResponseError, Errno::ECONNRESET].freeze
|
5
|
+
DEFAULT_MAX_TRIES = 3
|
6
|
+
|
4
7
|
def initialize
|
5
8
|
::Aranha::Address.clear_expired
|
6
9
|
::Aranha::Address.add_start_points
|
10
|
+
@failed = {}
|
11
|
+
@try = 0
|
12
|
+
process_loop
|
13
|
+
raise "Addresses failed: #{@failed.count}" if @failed.any?
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def process_loop
|
19
|
+
Rails.logger.info("Max tries: #{max_tries_s}")
|
7
20
|
loop do
|
8
|
-
|
9
|
-
|
10
|
-
|
21
|
+
break if process_next_address
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def process_next_address
|
26
|
+
a = next_address
|
27
|
+
if a
|
28
|
+
process_address(a)
|
29
|
+
false
|
30
|
+
elsif @failed.any?
|
31
|
+
@try += 1
|
32
|
+
max_tries > 0 && @try >= max_tries
|
33
|
+
else
|
34
|
+
true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def process_address(a)
|
39
|
+
Rails.logger.info("Processing #{a} (Try: #{@try}/#{max_tries_s}," \
|
40
|
+
" Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
|
41
|
+
begin
|
11
42
|
a.process
|
43
|
+
@failed.delete(a.id)
|
44
|
+
rescue StandardError => ex
|
45
|
+
process_exception(a, ex)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def process_exception(a, ex)
|
50
|
+
raise ex unless network_exception?(ex)
|
51
|
+
@failed[a.id] ||= 0
|
52
|
+
@failed[a.id] += 1
|
53
|
+
Rails.logger.warn(ex)
|
54
|
+
end
|
55
|
+
|
56
|
+
def next_address
|
57
|
+
unprocessed.where.not(id: not_try_ids).first
|
58
|
+
end
|
59
|
+
|
60
|
+
def unprocessed
|
61
|
+
::Aranha::Address.unprocessed
|
62
|
+
end
|
63
|
+
|
64
|
+
def network_exception?(ex)
|
65
|
+
NETWORK_EXCEPTIONS.any? { |klass| ex.is_a?(klass) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def not_try_ids
|
69
|
+
@failed.select { |_k, v| v > @try }.map { |k, _v| k }
|
70
|
+
end
|
71
|
+
|
72
|
+
def max_tries_s
|
73
|
+
max_tries <= 0 ? 'INF' : max_tries
|
74
|
+
end
|
75
|
+
|
76
|
+
def max_tries
|
77
|
+
@max_tries ||= begin
|
78
|
+
r = Integer(ENV['ARANHA_MAX_TRIES'])
|
79
|
+
r <= 0 ? 0 : r
|
80
|
+
rescue ArgumentError
|
81
|
+
DEFAULT_MAX_TRIES
|
12
82
|
end
|
13
83
|
end
|
14
84
|
end
|
data/lib/aranha/version.rb
CHANGED
data/lib/tasks/aranha_tasks.rake
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
<!DOCTYPE html>
|
2
2
|
<html>
|
3
|
-
<head>
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
</head>
|
9
|
-
<body>
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
</body>
|
3
|
+
<head>
|
4
|
+
<title>Dummy</title>
|
5
|
+
<%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
|
6
|
+
<%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
<%= yield %>
|
11
|
+
</body>
|
14
12
|
</html>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 4.2.10
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: httpclient
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.6'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: sqlite3
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,47 +149,47 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
149
|
version: '0'
|
136
150
|
requirements: []
|
137
151
|
rubyforge_project:
|
138
|
-
rubygems_version: 2.
|
152
|
+
rubygems_version: 2.4.8
|
139
153
|
signing_key:
|
140
154
|
specification_version: 4
|
141
155
|
summary: Rails utilities for web crawling.
|
142
156
|
test_files:
|
143
|
-
- test/
|
144
|
-
- test/dummy/README.rdoc
|
157
|
+
- test/integration/navigation_test.rb
|
145
158
|
- test/dummy/config.ru
|
159
|
+
- test/dummy/README.rdoc
|
160
|
+
- test/dummy/db/schema.rb
|
161
|
+
- test/dummy/app/views/layouts/application.html.erb
|
162
|
+
- test/dummy/app/assets/stylesheets/application.css
|
163
|
+
- test/dummy/app/assets/javascripts/application.js
|
164
|
+
- test/dummy/app/helpers/application_helper.rb
|
165
|
+
- test/dummy/app/controllers/application_controller.rb
|
166
|
+
- test/dummy/bin/bundle
|
167
|
+
- test/dummy/bin/rails
|
168
|
+
- test/dummy/bin/setup
|
169
|
+
- test/dummy/bin/rake
|
170
|
+
- test/dummy/Rakefile
|
171
|
+
- test/dummy/config/environments/production.rb
|
172
|
+
- test/dummy/config/environments/test.rb
|
173
|
+
- test/dummy/config/environments/development.rb
|
174
|
+
- test/dummy/config/application.rb
|
146
175
|
- test/dummy/config/boot.rb
|
147
|
-
- test/dummy/config/database.yml
|
148
|
-
- test/dummy/config/secrets.yml
|
149
176
|
- test/dummy/config/locales/en.yml
|
150
|
-
- test/dummy/config/application.rb
|
151
|
-
- test/dummy/config/environments/development.rb
|
152
|
-
- test/dummy/config/environments/test.rb
|
153
|
-
- test/dummy/config/environments/production.rb
|
154
|
-
- test/dummy/config/environment.rb
|
155
|
-
- test/dummy/config/routes.rb
|
156
|
-
- test/dummy/config/initializers/assets.rb
|
157
|
-
- test/dummy/config/initializers/cookies_serializer.rb
|
158
|
-
- test/dummy/config/initializers/inflections.rb
|
159
177
|
- test/dummy/config/initializers/session_store.rb
|
160
|
-
- test/dummy/config/initializers/wrap_parameters.rb
|
161
|
-
- test/dummy/config/initializers/to_time_preserves_timezone.rb
|
162
178
|
- test/dummy/config/initializers/filter_parameter_logging.rb
|
179
|
+
- test/dummy/config/initializers/wrap_parameters.rb
|
163
180
|
- test/dummy/config/initializers/backtrace_silencers.rb
|
181
|
+
- test/dummy/config/initializers/inflections.rb
|
182
|
+
- test/dummy/config/initializers/to_time_preserves_timezone.rb
|
183
|
+
- test/dummy/config/initializers/assets.rb
|
184
|
+
- test/dummy/config/initializers/cookies_serializer.rb
|
164
185
|
- test/dummy/config/initializers/mime_types.rb
|
165
|
-
- test/dummy/
|
166
|
-
- test/dummy/
|
167
|
-
- test/dummy/
|
168
|
-
- test/dummy/
|
169
|
-
- test/dummy/app/assets/stylesheets/application.css
|
170
|
-
- test/dummy/app/assets/javascripts/application.js
|
186
|
+
- test/dummy/config/secrets.yml
|
187
|
+
- test/dummy/config/database.yml
|
188
|
+
- test/dummy/config/routes.rb
|
189
|
+
- test/dummy/config/environment.rb
|
171
190
|
- test/dummy/public/422.html
|
172
|
-
- test/dummy/public/404.html
|
173
191
|
- test/dummy/public/favicon.ico
|
192
|
+
- test/dummy/public/404.html
|
174
193
|
- test/dummy/public/500.html
|
175
|
-
- test/dummy/bin/bundle
|
176
|
-
- test/dummy/bin/setup
|
177
|
-
- test/dummy/bin/rails
|
178
|
-
- test/dummy/bin/rake
|
179
194
|
- test/aranha_test.rb
|
180
195
|
- test/test_helper.rb
|
181
|
-
- test/integration/navigation_test.rb
|