aranha 0.12.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/aranha/addresses_controller.rb +1 -3
- data/app/models/aranha/address.rb +15 -9
- data/config/locales/en.yml +6 -0
- data/config/locales/pt-BR.yml +6 -0
- data/db/migrate/20181125042102_add_extra_data_to_aranha_addresses.rb +1 -0
- data/lib/aranha/default_processor.rb +2 -3
- data/lib/aranha/fixtures.rb +1 -0
- data/lib/aranha/fixtures/download.rb +2 -0
- data/lib/aranha/processor.rb +3 -2
- data/lib/aranha/version.rb +1 -1
- metadata +30 -16
- data/Rakefile +0 -36
- data/app/controllers/aranha/application_controller.rb +0 -7
- data/test/aranha_test.rb +0 -9
- data/test/integration/navigation_test.rb +0 -9
- data/test/test_helper.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11b9dd8f6de631eeb2550a1d009a4cc80e8c2309e078179138e7f2bfb4a6813c
|
4
|
+
data.tar.gz: 7a30d214e9a0364798c6fe284e0c08fb4337a00c10f302aa483556c786b27b13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7704cab9261ab592f2199eece35d10d34aa48f49d2fe118ff5b610b0effc2e0486a9479c58912ac607a13d937f3397da4460003e043f80fee50fa6b97f4fb8cf
|
7
|
+
data.tar.gz: e7ac44ed95d0d1c7c29c6bab249fb7f71f5309a9d706fd08d854b41fdb28d2abb261ff0f23b470370f5868d193188bdc99c10dfde065abfd982ffceb94d51a34
|
@@ -1,9 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_dependency 'aranha/application_controller'
|
4
|
-
|
5
3
|
module Aranha
|
6
|
-
class AddressesController < ApplicationController
|
4
|
+
class AddressesController < ::ApplicationController
|
7
5
|
active_scaffold :'aranha/address' do |_conf|
|
8
6
|
end
|
9
7
|
end
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'eac_ruby_utils/yaml'
|
4
|
+
|
3
5
|
module Aranha
|
4
6
|
class Address < ActiveRecord::Base
|
5
|
-
include ::
|
7
|
+
include ::EacRailsUtils::Models::InequalityQueries
|
6
8
|
|
7
9
|
add_inequality_queries(:created_at)
|
8
10
|
|
@@ -68,23 +70,27 @@ module Aranha
|
|
68
70
|
private
|
69
71
|
|
70
72
|
def instanciate_processor
|
71
|
-
|
72
|
-
processor_instancier.call(url_to_process, YAML.load(extra_data))
|
73
|
-
elsif processor_instancier_arity == 1
|
74
|
-
processor_instancier.call(url_to_process)
|
75
|
-
else
|
76
|
-
raise("#{processor}.initialize should has 1 or 2 or * arguments")
|
77
|
-
end
|
73
|
+
processor_instancier.call(*processor_instancier_arguments)
|
78
74
|
end
|
79
75
|
|
80
76
|
def url_to_process
|
81
|
-
::
|
77
|
+
::EacRubyUtils::Yaml.load_common(url)
|
82
78
|
end
|
83
79
|
|
84
80
|
def processor_instancier
|
85
81
|
processor.constantize.method(:new)
|
86
82
|
end
|
87
83
|
|
84
|
+
def processor_instancier_arguments
|
85
|
+
if processor_instancier_arity == 2 || processor_instancier_arity.negative?
|
86
|
+
[url_to_process, EacRubyUtils::Yaml.load_common(extra_data)]
|
87
|
+
elsif processor_instancier_arity == 1
|
88
|
+
[processor_instancier.call(url_to_process)]
|
89
|
+
else
|
90
|
+
raise("#{processor}.initialize should has 1 or 2 or * arguments")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
88
94
|
def processor_instancier_arity
|
89
95
|
processor.constantize.instance_method(:initialize).arity
|
90
96
|
end
|
@@ -9,9 +9,8 @@ module Aranha
|
|
9
9
|
class << self
|
10
10
|
def sanitize_uri(uri)
|
11
11
|
return uri if uri.is_a?(Hash)
|
12
|
-
|
13
|
-
|
14
|
-
end
|
12
|
+
|
13
|
+
uri = uri.to_s.gsub(%r{\A/}, 'file:///') unless uri.is_a?(Addressable::URI)
|
15
14
|
Addressable::URI.parse(uri)
|
16
15
|
end
|
17
16
|
end
|
data/lib/aranha/fixtures.rb
CHANGED
@@ -31,6 +31,7 @@ module Aranha
|
|
31
31
|
|
32
32
|
def select_path?(path)
|
33
33
|
return false unless match_prefix_pattern(path)
|
34
|
+
|
34
35
|
!pending || !source_exist?(path)
|
35
36
|
end
|
36
37
|
|
@@ -46,6 +47,7 @@ module Aranha
|
|
46
47
|
Rails.logger.info "Baixando \"#{url}\"..."
|
47
48
|
content = ::Aranha::Parsers::Base.new(url).content
|
48
49
|
raise "Content is blank for \"#{url}\"" if content.blank?
|
50
|
+
|
49
51
|
File.open(target, 'wb') { |file| file.write(content) }
|
50
52
|
end
|
51
53
|
|
data/lib/aranha/processor.rb
CHANGED
@@ -80,6 +80,7 @@ module Aranha
|
|
80
80
|
|
81
81
|
def network_exception?(exception)
|
82
82
|
return true if NETWORK_EXCEPTIONS.any? { |klass| exception.is_a?(klass) }
|
83
|
+
|
83
84
|
exception.cause.present? ? network_exception?(exception.cause) : false
|
84
85
|
end
|
85
86
|
|
@@ -95,8 +96,8 @@ module Aranha
|
|
95
96
|
@max_tries ||= begin
|
96
97
|
r = Integer(ENV['ARANHA_MAX_TRIES'])
|
97
98
|
r <= 0 ? 0 : r
|
98
|
-
|
99
|
-
|
99
|
+
rescue ArgumentError, TypeError
|
100
|
+
DEFAULT_MAX_TRIES
|
100
101
|
end
|
101
102
|
end
|
102
103
|
end
|
data/lib/aranha/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
33
|
+
version: '0.4'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
40
|
+
version: '0.4'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: aranha-selenium
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +45,9 @@ dependencies:
|
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0.1'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 0.1.2
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -52,6 +55,23 @@ dependencies:
|
|
52
55
|
- - "~>"
|
53
56
|
- !ruby/object:Gem::Version
|
54
57
|
version: '0.1'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 0.1.2
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: eac_rails_utils
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0.11'
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0.11'
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: eac_ruby_utils
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,19 +115,19 @@ dependencies:
|
|
95
115
|
- !ruby/object:Gem::Version
|
96
116
|
version: 4.2.10
|
97
117
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
118
|
+
name: eac_ruby_gem_support
|
99
119
|
requirement: !ruby/object:Gem::Requirement
|
100
120
|
requirements:
|
101
121
|
- - "~>"
|
102
122
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
123
|
+
version: '0.1'
|
104
124
|
type: :development
|
105
125
|
prerelease: false
|
106
126
|
version_requirements: !ruby/object:Gem::Requirement
|
107
127
|
requirements:
|
108
128
|
- - "~>"
|
109
129
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
130
|
+
version: '0.1'
|
111
131
|
- !ruby/object:Gem::Dependency
|
112
132
|
name: sqlite3
|
113
133
|
requirement: !ruby/object:Gem::Requirement
|
@@ -131,14 +151,14 @@ extra_rdoc_files: []
|
|
131
151
|
files:
|
132
152
|
- MIT-LICENSE
|
133
153
|
- README.rdoc
|
134
|
-
- Rakefile
|
135
154
|
- app/assets/javascripts/aranha/application.js
|
136
155
|
- app/assets/stylesheets/aranha/application.css
|
137
156
|
- app/controllers/aranha/addresses_controller.rb
|
138
|
-
- app/controllers/aranha/application_controller.rb
|
139
157
|
- app/helpers/aranha/application_helper.rb
|
140
158
|
- app/models/aranha/address.rb
|
141
159
|
- app/views/layouts/aranha/application.html.erb
|
160
|
+
- config/locales/en.yml
|
161
|
+
- config/locales/pt-BR.yml
|
142
162
|
- config/routes.rb
|
143
163
|
- db/migrate/20171201021251_create_aranha_addresses.rb
|
144
164
|
- db/migrate/20181125042102_add_extra_data_to_aranha_addresses.rb
|
@@ -154,9 +174,6 @@ files:
|
|
154
174
|
- lib/aranha/processor.rb
|
155
175
|
- lib/aranha/version.rb
|
156
176
|
- lib/tasks/aranha_tasks.rake
|
157
|
-
- test/aranha_test.rb
|
158
|
-
- test/integration/navigation_test.rb
|
159
|
-
- test/test_helper.rb
|
160
177
|
homepage:
|
161
178
|
licenses:
|
162
179
|
- MIT
|
@@ -181,7 +198,4 @@ rubygems_version: 2.7.7
|
|
181
198
|
signing_key:
|
182
199
|
specification_version: 4
|
183
200
|
summary: Rails utilities for web crawling.
|
184
|
-
test_files:
|
185
|
-
- test/aranha_test.rb
|
186
|
-
- test/test_helper.rb
|
187
|
-
- test/integration/navigation_test.rb
|
201
|
+
test_files: []
|
data/Rakefile
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
begin
|
4
|
-
require 'bundler/setup'
|
5
|
-
rescue LoadError
|
6
|
-
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
7
|
-
end
|
8
|
-
|
9
|
-
require 'rdoc/task'
|
10
|
-
|
11
|
-
RDoc::Task.new(:rdoc) do |rdoc|
|
12
|
-
rdoc.rdoc_dir = 'rdoc'
|
13
|
-
rdoc.title = 'Aranha'
|
14
|
-
rdoc.options << '--line-numbers'
|
15
|
-
rdoc.rdoc_files.include('README.rdoc')
|
16
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
17
|
-
end
|
18
|
-
|
19
|
-
APP_RAKEFILE = File.expand_path('spec/support/rails_app/Rakefile', __dir__)
|
20
|
-
load 'rails/tasks/engine.rake'
|
21
|
-
|
22
|
-
load 'rails/tasks/statistics.rake'
|
23
|
-
|
24
|
-
Bundler::GemHelper.install_tasks
|
25
|
-
|
26
|
-
require 'rake/testtask'
|
27
|
-
|
28
|
-
begin
|
29
|
-
require 'rspec/core/rake_task'
|
30
|
-
RSpec::Core::RakeTask.new(:spec)
|
31
|
-
task test: :spec
|
32
|
-
rescue LoadError
|
33
|
-
# no rspec available
|
34
|
-
end
|
35
|
-
|
36
|
-
task default: :test
|
data/test/aranha_test.rb
DELETED
data/test/test_helper.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Configure Rails Environment
|
4
|
-
ENV['RAILS_ENV'] = 'test'
|
5
|
-
|
6
|
-
require File.expand_path('../spec/support/rails_apps/config/environment.rb', __dir__)
|
7
|
-
ActiveRecord::Migrator.migrations_paths = [
|
8
|
-
File.expand_path('../spec/support/rails_app/db/migrate', __dir__)
|
9
|
-
]
|
10
|
-
require 'rails/test_help'
|
11
|
-
|
12
|
-
# Filter out Minitest backtrace while allowing backtrace from other libraries
|
13
|
-
# to be shown.
|
14
|
-
Minitest.backtrace_filter = Minitest::BacktraceFilter.new
|
15
|
-
|
16
|
-
# Load support files
|
17
|
-
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
18
|
-
|
19
|
-
# Load fixtures from the engine
|
20
|
-
if ActiveSupport::TestCase.respond_to?(:fixture_path=)
|
21
|
-
ActiveSupport::TestCase.fixture_path = File.expand_path('fixtures', __dir__)
|
22
|
-
ActiveSupport::TestCase.fixtures :all
|
23
|
-
end
|