aranha 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7776940b9f7bcb2d542e4993240fd10c18ff2024
4
- data.tar.gz: d6579b4134a9a4ce5ec98cf335db1d11ab1b7038
3
+ metadata.gz: c3565d60364c02cd8a739da311d56d7aad7277d9
4
+ data.tar.gz: 517009c9e93d8d639e21a94dc809c6443f51b00c
5
5
  SHA512:
6
- metadata.gz: 8d4924f70f27bb9a4809c2c56034fcaba0013b50925e8b204391034fa0cb88d5c2a3dc6de0fc365b8da8db5c81f45458c5b87b34ed058ab037dcc604d4f4fe0b
7
- data.tar.gz: f5774605515bc18a2b57c4c794e1a0d050026dd96d16d13e7eb8683492199a48032281995a773f8b937b8e9c90ae0514fd304759b4a27e830629caefd59e19c4
6
+ metadata.gz: 56f2404b53245fa4c9cda8095862c0059630518cecd25bf9b1018d5024060845319db12d3675ef7c378508790e279d461970e12227cb6ed1b698518f126e9323
7
+ data.tar.gz: b7017d7df069c1160d92be0fc76dd76a4f3fb31421128f430ddab7ffe7cece3eaea3ac1f8e7f2a82024498079f54cbfe3e136486751b51455603044fea13fef7
@@ -1,14 +1,12 @@
1
1
  <!DOCTYPE html>
2
2
  <html>
3
- <head>
4
- <title>Aranha</title>
5
- <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
- <%= javascript_include_tag "aranha/application" %>
7
- <%= csrf_meta_tags %>
8
- </head>
9
- <body>
10
-
11
- <%= yield %>
12
-
13
- </body>
3
+ <head>
4
+ <title>Aranha</title>
5
+ <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
+ <%= javascript_include_tag "aranha/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+ <%= yield %>
11
+ </body>
14
12
  </html>
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require 'httpclient'
2
3
  require 'active_support/dependencies'
3
4
  require_dependency 'aranha/engine'
4
5
  require_dependency 'active_scaffold'
@@ -1,14 +1,84 @@
1
1
  # frozen_string_literal: true
2
2
  module Aranha
3
3
  class Processor
4
+ NETWORK_EXCEPTIONS = [::HTTPClient::BadResponseError, Errno::ECONNRESET].freeze
5
+ DEFAULT_MAX_TRIES = 3
6
+
4
7
  def initialize
5
8
  ::Aranha::Address.clear_expired
6
9
  ::Aranha::Address.add_start_points
10
+ @failed = {}
11
+ @try = 0
12
+ process_loop
13
+ raise "Addresses failed: #{@failed.count}" if @failed.any?
14
+ end
15
+
16
+ private
17
+
18
+ def process_loop
19
+ Rails.logger.info("Max tries: #{max_tries_s}")
7
20
  loop do
8
- a = ::Aranha::Address.unprocessed.first
9
- break unless a
10
- Rails.logger.info("Processing #{a}")
21
+ break if process_next_address
22
+ end
23
+ end
24
+
25
+ def process_next_address
26
+ a = next_address
27
+ if a
28
+ process_address(a)
29
+ false
30
+ elsif @failed.any?
31
+ @try += 1
32
+ max_tries > 0 && @try >= max_tries
33
+ else
34
+ true
35
+ end
36
+ end
37
+
38
+ def process_address(a)
39
+ Rails.logger.info("Processing #{a} (Try: #{@try}/#{max_tries_s}," \
40
+ " Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
41
+ begin
11
42
  a.process
43
+ @failed.delete(a.id)
44
+ rescue StandardError => ex
45
+ process_exception(a, ex)
46
+ end
47
+ end
48
+
49
+ def process_exception(a, ex)
50
+ raise ex unless network_exception?(ex)
51
+ @failed[a.id] ||= 0
52
+ @failed[a.id] += 1
53
+ Rails.logger.warn(ex)
54
+ end
55
+
56
+ def next_address
57
+ unprocessed.where.not(id: not_try_ids).first
58
+ end
59
+
60
+ def unprocessed
61
+ ::Aranha::Address.unprocessed
62
+ end
63
+
64
+ def network_exception?(ex)
65
+ NETWORK_EXCEPTIONS.any? { |klass| ex.is_a?(klass) }
66
+ end
67
+
68
+ def not_try_ids
69
+ @failed.select { |_k, v| v > @try }.map { |k, _v| k }
70
+ end
71
+
72
+ def max_tries_s
73
+ max_tries <= 0 ? 'INF' : max_tries
74
+ end
75
+
76
+ def max_tries
77
+ @max_tries ||= begin
78
+ r = Integer(ENV['ARANHA_MAX_TRIES'])
79
+ r <= 0 ? 0 : r
80
+ rescue ArgumentError
81
+ DEFAULT_MAX_TRIES
12
82
  end
13
83
  end
14
84
  end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Aranha
3
- VERSION = '0.0.1'
3
+ VERSION = '0.0.2'
4
4
  end
@@ -3,4 +3,8 @@ namespace(:aranha) do
3
3
  task process: :environment do
4
4
  ::Aranha::Processor.new
5
5
  end
6
+
7
+ task clear: :environment do
8
+ Rails.logger.info("Addresses deleted: #{::Aranha::Address.destroy_all.count}")
9
+ end
6
10
  end
@@ -1,14 +1,12 @@
1
1
  <!DOCTYPE html>
2
2
  <html>
3
- <head>
4
- <title>Dummy</title>
5
- <%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
6
- <%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
7
- <%= csrf_meta_tags %>
8
- </head>
9
- <body>
10
-
11
- <%= yield %>
12
-
13
- </body>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
6
+ <%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+ <%= yield %>
11
+ </body>
14
12
  </html>
@@ -23,4 +23,3 @@ module Dummy
23
23
  config.active_record.raise_in_transactional_callbacks = true
24
24
  end
25
25
  end
26
-
@@ -5,4 +5,3 @@ class NavigationTest < ActionDispatch::IntegrationTest
5
5
  # assert true
6
6
  # end
7
7
  end
8
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-18 00:00:00.000000000 Z
11
+ date: 2018-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_scaffold
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 4.2.10
41
+ - !ruby/object:Gem::Dependency
42
+ name: httpclient
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '2.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '2.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: sqlite3
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -135,47 +149,47 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
149
  version: '0'
136
150
  requirements: []
137
151
  rubyforge_project:
138
- rubygems_version: 2.6.12
152
+ rubygems_version: 2.4.8
139
153
  signing_key:
140
154
  specification_version: 4
141
155
  summary: Rails utilities for web crawling.
142
156
  test_files:
143
- - test/dummy/Rakefile
144
- - test/dummy/README.rdoc
157
+ - test/integration/navigation_test.rb
145
158
  - test/dummy/config.ru
159
+ - test/dummy/README.rdoc
160
+ - test/dummy/db/schema.rb
161
+ - test/dummy/app/views/layouts/application.html.erb
162
+ - test/dummy/app/assets/stylesheets/application.css
163
+ - test/dummy/app/assets/javascripts/application.js
164
+ - test/dummy/app/helpers/application_helper.rb
165
+ - test/dummy/app/controllers/application_controller.rb
166
+ - test/dummy/bin/bundle
167
+ - test/dummy/bin/rails
168
+ - test/dummy/bin/setup
169
+ - test/dummy/bin/rake
170
+ - test/dummy/Rakefile
171
+ - test/dummy/config/environments/production.rb
172
+ - test/dummy/config/environments/test.rb
173
+ - test/dummy/config/environments/development.rb
174
+ - test/dummy/config/application.rb
146
175
  - test/dummy/config/boot.rb
147
- - test/dummy/config/database.yml
148
- - test/dummy/config/secrets.yml
149
176
  - test/dummy/config/locales/en.yml
150
- - test/dummy/config/application.rb
151
- - test/dummy/config/environments/development.rb
152
- - test/dummy/config/environments/test.rb
153
- - test/dummy/config/environments/production.rb
154
- - test/dummy/config/environment.rb
155
- - test/dummy/config/routes.rb
156
- - test/dummy/config/initializers/assets.rb
157
- - test/dummy/config/initializers/cookies_serializer.rb
158
- - test/dummy/config/initializers/inflections.rb
159
177
  - test/dummy/config/initializers/session_store.rb
160
- - test/dummy/config/initializers/wrap_parameters.rb
161
- - test/dummy/config/initializers/to_time_preserves_timezone.rb
162
178
  - test/dummy/config/initializers/filter_parameter_logging.rb
179
+ - test/dummy/config/initializers/wrap_parameters.rb
163
180
  - test/dummy/config/initializers/backtrace_silencers.rb
181
+ - test/dummy/config/initializers/inflections.rb
182
+ - test/dummy/config/initializers/to_time_preserves_timezone.rb
183
+ - test/dummy/config/initializers/assets.rb
184
+ - test/dummy/config/initializers/cookies_serializer.rb
164
185
  - test/dummy/config/initializers/mime_types.rb
165
- - test/dummy/db/schema.rb
166
- - test/dummy/app/views/layouts/application.html.erb
167
- - test/dummy/app/controllers/application_controller.rb
168
- - test/dummy/app/helpers/application_helper.rb
169
- - test/dummy/app/assets/stylesheets/application.css
170
- - test/dummy/app/assets/javascripts/application.js
186
+ - test/dummy/config/secrets.yml
187
+ - test/dummy/config/database.yml
188
+ - test/dummy/config/routes.rb
189
+ - test/dummy/config/environment.rb
171
190
  - test/dummy/public/422.html
172
- - test/dummy/public/404.html
173
191
  - test/dummy/public/favicon.ico
192
+ - test/dummy/public/404.html
174
193
  - test/dummy/public/500.html
175
- - test/dummy/bin/bundle
176
- - test/dummy/bin/setup
177
- - test/dummy/bin/rails
178
- - test/dummy/bin/rake
179
194
  - test/aranha_test.rb
180
195
  - test/test_helper.rb
181
- - test/integration/navigation_test.rb