aranha 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7776940b9f7bcb2d542e4993240fd10c18ff2024
4
- data.tar.gz: d6579b4134a9a4ce5ec98cf335db1d11ab1b7038
3
+ metadata.gz: c3565d60364c02cd8a739da311d56d7aad7277d9
4
+ data.tar.gz: 517009c9e93d8d639e21a94dc809c6443f51b00c
5
5
  SHA512:
6
- metadata.gz: 8d4924f70f27bb9a4809c2c56034fcaba0013b50925e8b204391034fa0cb88d5c2a3dc6de0fc365b8da8db5c81f45458c5b87b34ed058ab037dcc604d4f4fe0b
7
- data.tar.gz: f5774605515bc18a2b57c4c794e1a0d050026dd96d16d13e7eb8683492199a48032281995a773f8b937b8e9c90ae0514fd304759b4a27e830629caefd59e19c4
6
+ metadata.gz: 56f2404b53245fa4c9cda8095862c0059630518cecd25bf9b1018d5024060845319db12d3675ef7c378508790e279d461970e12227cb6ed1b698518f126e9323
7
+ data.tar.gz: b7017d7df069c1160d92be0fc76dd76a4f3fb31421128f430ddab7ffe7cece3eaea3ac1f8e7f2a82024498079f54cbfe3e136486751b51455603044fea13fef7
@@ -1,14 +1,12 @@
1
1
  <!DOCTYPE html>
2
2
  <html>
3
- <head>
4
- <title>Aranha</title>
5
- <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
- <%= javascript_include_tag "aranha/application" %>
7
- <%= csrf_meta_tags %>
8
- </head>
9
- <body>
10
-
11
- <%= yield %>
12
-
13
- </body>
3
+ <head>
4
+ <title>Aranha</title>
5
+ <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
+ <%= javascript_include_tag "aranha/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+ <%= yield %>
11
+ </body>
14
12
  </html>
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'httpclient'
2
3
  require 'active_support/dependencies'
3
4
  require_dependency 'aranha/engine'
4
5
  require_dependency 'active_scaffold'
@@ -1,14 +1,84 @@
1
1
  # frozen_string_literal: true
2
2
  module Aranha
3
3
  class Processor
4
+ NETWORK_EXCEPTIONS = [::HTTPClient::BadResponseError, Errno::ECONNRESET].freeze
5
+ DEFAULT_MAX_TRIES = 3
6
+
4
7
  def initialize
5
8
  ::Aranha::Address.clear_expired
6
9
  ::Aranha::Address.add_start_points
10
+ @failed = {}
11
+ @try = 0
12
+ process_loop
13
+ raise "Addresses failed: #{@failed.count}" if @failed.any?
14
+ end
15
+
16
+ private
17
+
18
+ def process_loop
19
+ Rails.logger.info("Max tries: #{max_tries_s}")
7
20
  loop do
8
- a = ::Aranha::Address.unprocessed.first
9
- break unless a
10
- Rails.logger.info("Processing #{a}")
21
+ break if process_next_address
22
+ end
23
+ end
24
+
25
+ def process_next_address
26
+ a = next_address
27
+ if a
28
+ process_address(a)
29
+ false
30
+ elsif @failed.any?
31
+ @try += 1
32
+ max_tries > 0 && @try >= max_tries
33
+ else
34
+ true
35
+ end
36
+ end
37
+
38
+ def process_address(a)
39
+ Rails.logger.info("Processing #{a} (Try: #{@try}/#{max_tries_s}," \
40
+ " Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
41
+ begin
11
42
  a.process
43
+ @failed.delete(a.id)
44
+ rescue StandardError => ex
45
+ process_exception(a, ex)
46
+ end
47
+ end
48
+
49
+ def process_exception(a, ex)
50
+ raise ex unless network_exception?(ex)
51
+ @failed[a.id] ||= 0
52
+ @failed[a.id] += 1
53
+ Rails.logger.warn(ex)
54
+ end
55
+
56
+ def next_address
57
+ unprocessed.where.not(id: not_try_ids).first
58
+ end
59
+
60
+ def unprocessed
61
+ ::Aranha::Address.unprocessed
62
+ end
63
+
64
+ def network_exception?(ex)
65
+ NETWORK_EXCEPTIONS.any? { |klass| ex.is_a?(klass) }
66
+ end
67
+
68
+ def not_try_ids
69
+ @failed.select { |_k, v| v > @try }.map { |k, _v| k }
70
+ end
71
+
72
+ def max_tries_s
73
+ max_tries <= 0 ? 'INF' : max_tries
74
+ end
75
+
76
+ def max_tries
77
+ @max_tries ||= begin
78
+ r = Integer(ENV['ARANHA_MAX_TRIES'])
79
+ r <= 0 ? 0 : r
80
+ rescue ArgumentError
81
+ DEFAULT_MAX_TRIES
12
82
  end
13
83
  end
14
84
  end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Aranha
3
- VERSION = '0.0.1'
3
+ VERSION = '0.0.2'
4
4
  end
@@ -3,4 +3,8 @@ namespace(:aranha) do
3
3
  task process: :environment do
4
4
  ::Aranha::Processor.new
5
5
  end
6
+
7
+ task clear: :environment do
8
+ Rails.logger.info("Addresses deleted: #{::Aranha::Address.destroy_all.count}")
9
+ end
6
10
  end
@@ -1,14 +1,12 @@
1
1
  <!DOCTYPE html>
2
2
  <html>
3
- <head>
4
- <title>Dummy</title>
5
- <%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
6
- <%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
7
- <%= csrf_meta_tags %>
8
- </head>
9
- <body>
10
-
11
- <%= yield %>
12
-
13
- </body>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
6
+ <%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+ <%= yield %>
11
+ </body>
14
12
  </html>
@@ -23,4 +23,3 @@ module Dummy
23
23
  config.active_record.raise_in_transactional_callbacks = true
24
24
  end
25
25
  end
26
-
@@ -5,4 +5,3 @@ class NavigationTest < ActionDispatch::IntegrationTest
5
5
  # assert true
6
6
  # end
7
7
  end
8
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-18 00:00:00.000000000 Z
11
+ date: 2018-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_scaffold
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 4.2.10
41
+ - !ruby/object:Gem::Dependency
42
+ name: httpclient
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '2.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '2.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: sqlite3
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -135,47 +149,47 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
149
  version: '0'
136
150
  requirements: []
137
151
  rubyforge_project:
138
- rubygems_version: 2.6.12
152
+ rubygems_version: 2.4.8
139
153
  signing_key:
140
154
  specification_version: 4
141
155
  summary: Rails utilities for web crawling.
142
156
  test_files:
143
- - test/dummy/Rakefile
144
- - test/dummy/README.rdoc
157
+ - test/integration/navigation_test.rb
145
158
  - test/dummy/config.ru
159
+ - test/dummy/README.rdoc
160
+ - test/dummy/db/schema.rb
161
+ - test/dummy/app/views/layouts/application.html.erb
162
+ - test/dummy/app/assets/stylesheets/application.css
163
+ - test/dummy/app/assets/javascripts/application.js
164
+ - test/dummy/app/helpers/application_helper.rb
165
+ - test/dummy/app/controllers/application_controller.rb
166
+ - test/dummy/bin/bundle
167
+ - test/dummy/bin/rails
168
+ - test/dummy/bin/setup
169
+ - test/dummy/bin/rake
170
+ - test/dummy/Rakefile
171
+ - test/dummy/config/environments/production.rb
172
+ - test/dummy/config/environments/test.rb
173
+ - test/dummy/config/environments/development.rb
174
+ - test/dummy/config/application.rb
146
175
  - test/dummy/config/boot.rb
147
- - test/dummy/config/database.yml
148
- - test/dummy/config/secrets.yml
149
176
  - test/dummy/config/locales/en.yml
150
- - test/dummy/config/application.rb
151
- - test/dummy/config/environments/development.rb
152
- - test/dummy/config/environments/test.rb
153
- - test/dummy/config/environments/production.rb
154
- - test/dummy/config/environment.rb
155
- - test/dummy/config/routes.rb
156
- - test/dummy/config/initializers/assets.rb
157
- - test/dummy/config/initializers/cookies_serializer.rb
158
- - test/dummy/config/initializers/inflections.rb
159
177
  - test/dummy/config/initializers/session_store.rb
160
- - test/dummy/config/initializers/wrap_parameters.rb
161
- - test/dummy/config/initializers/to_time_preserves_timezone.rb
162
178
  - test/dummy/config/initializers/filter_parameter_logging.rb
179
+ - test/dummy/config/initializers/wrap_parameters.rb
163
180
  - test/dummy/config/initializers/backtrace_silencers.rb
181
+ - test/dummy/config/initializers/inflections.rb
182
+ - test/dummy/config/initializers/to_time_preserves_timezone.rb
183
+ - test/dummy/config/initializers/assets.rb
184
+ - test/dummy/config/initializers/cookies_serializer.rb
164
185
  - test/dummy/config/initializers/mime_types.rb
165
- - test/dummy/db/schema.rb
166
- - test/dummy/app/views/layouts/application.html.erb
167
- - test/dummy/app/controllers/application_controller.rb
168
- - test/dummy/app/helpers/application_helper.rb
169
- - test/dummy/app/assets/stylesheets/application.css
170
- - test/dummy/app/assets/javascripts/application.js
186
+ - test/dummy/config/secrets.yml
187
+ - test/dummy/config/database.yml
188
+ - test/dummy/config/routes.rb
189
+ - test/dummy/config/environment.rb
171
190
  - test/dummy/public/422.html
172
- - test/dummy/public/404.html
173
191
  - test/dummy/public/favicon.ico
192
+ - test/dummy/public/404.html
174
193
  - test/dummy/public/500.html
175
- - test/dummy/bin/bundle
176
- - test/dummy/bin/setup
177
- - test/dummy/bin/rails
178
- - test/dummy/bin/rake
179
194
  - test/aranha_test.rb
180
195
  - test/test_helper.rb
181
- - test/integration/navigation_test.rb