aranha 0.12.1 → 0.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/models/aranha/address.rb +15 -9
- data/db/migrate/20181125042102_add_extra_data_to_aranha_addresses.rb +1 -0
- data/lib/aranha/default_processor.rb +2 -3
- data/lib/aranha/fixtures.rb +1 -0
- data/lib/aranha/fixtures/download.rb +2 -0
- data/lib/aranha/processor.rb +3 -2
- data/lib/aranha/version.rb +1 -1
- metadata +30 -17
- data/Rakefile +0 -36
- data/test/aranha_test.rb +0 -9
- data/test/integration/navigation_test.rb +0 -9
- data/test/test_helper.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69b5f27ae4cdf78c734b545906c670acd789509f0e49dd0d75b1a5083bc036c7
|
4
|
+
data.tar.gz: fb6766cf8e2e9bc13b48666f1a993bbba8635f4cbf99e5178c0a942a1b27f5d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d35371bebef37c10de34ad045f5966c73bb4c7590197c3bb0cabd8c3b75ee574a2911ac631eaa5b85a47887b785ea6441eb6f93579b484a863dbd635d0a314d
|
7
|
+
data.tar.gz: 70491efe2434753c2585fa32cc42ed6719ac7902e347c7fed7ca7fb284b445d0a63ce744ad55b306b49850c6fc660ba021a77694f2c98010cdb0a6c7f459553a
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'eac_ruby_utils/yaml'
|
4
|
+
|
3
5
|
module Aranha
|
4
6
|
class Address < ActiveRecord::Base
|
5
|
-
include ::
|
7
|
+
include ::EacRailsUtils::Models::InequalityQueries
|
6
8
|
|
7
9
|
add_inequality_queries(:created_at)
|
8
10
|
|
@@ -68,23 +70,27 @@ module Aranha
|
|
68
70
|
private
|
69
71
|
|
70
72
|
def instanciate_processor
|
71
|
-
|
72
|
-
processor_instancier.call(url_to_process, YAML.load(extra_data))
|
73
|
-
elsif processor_instancier_arity == 1
|
74
|
-
processor_instancier.call(url_to_process)
|
75
|
-
else
|
76
|
-
raise("#{processor}.initialize should has 1 or 2 or * arguments")
|
77
|
-
end
|
73
|
+
processor_instancier.call(*processor_instancier_arguments)
|
78
74
|
end
|
79
75
|
|
80
76
|
def url_to_process
|
81
|
-
::
|
77
|
+
::EacRubyUtils::Yaml.load(url)
|
82
78
|
end
|
83
79
|
|
84
80
|
def processor_instancier
|
85
81
|
processor.constantize.method(:new)
|
86
82
|
end
|
87
83
|
|
84
|
+
def processor_instancier_arguments
|
85
|
+
if processor_instancier_arity == 2 || processor_instancier_arity.negative?
|
86
|
+
[url_to_process, EacRubyUtils::Yaml.load(extra_data)]
|
87
|
+
elsif processor_instancier_arity == 1
|
88
|
+
[processor_instancier.call(url_to_process)]
|
89
|
+
else
|
90
|
+
raise("#{processor}.initialize should has 1 or 2 or * arguments")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
88
94
|
def processor_instancier_arity
|
89
95
|
processor.constantize.instance_method(:initialize).arity
|
90
96
|
end
|
@@ -9,9 +9,8 @@ module Aranha
|
|
9
9
|
class << self
|
10
10
|
def sanitize_uri(uri)
|
11
11
|
return uri if uri.is_a?(Hash)
|
12
|
-
|
13
|
-
|
14
|
-
end
|
12
|
+
|
13
|
+
uri = uri.to_s.gsub(%r{\A/}, 'file:///') unless uri.is_a?(Addressable::URI)
|
15
14
|
Addressable::URI.parse(uri)
|
16
15
|
end
|
17
16
|
end
|
data/lib/aranha/fixtures.rb
CHANGED
@@ -31,6 +31,7 @@ module Aranha
|
|
31
31
|
|
32
32
|
def select_path?(path)
|
33
33
|
return false unless match_prefix_pattern(path)
|
34
|
+
|
34
35
|
!pending || !source_exist?(path)
|
35
36
|
end
|
36
37
|
|
@@ -46,6 +47,7 @@ module Aranha
|
|
46
47
|
Rails.logger.info "Baixando \"#{url}\"..."
|
47
48
|
content = ::Aranha::Parsers::Base.new(url).content
|
48
49
|
raise "Content is blank for \"#{url}\"" if content.blank?
|
50
|
+
|
49
51
|
File.open(target, 'wb') { |file| file.write(content) }
|
50
52
|
end
|
51
53
|
|
data/lib/aranha/processor.rb
CHANGED
@@ -80,6 +80,7 @@ module Aranha
|
|
80
80
|
|
81
81
|
def network_exception?(exception)
|
82
82
|
return true if NETWORK_EXCEPTIONS.any? { |klass| exception.is_a?(klass) }
|
83
|
+
|
83
84
|
exception.cause.present? ? network_exception?(exception.cause) : false
|
84
85
|
end
|
85
86
|
|
@@ -95,8 +96,8 @@ module Aranha
|
|
95
96
|
@max_tries ||= begin
|
96
97
|
r = Integer(ENV['ARANHA_MAX_TRIES'])
|
97
98
|
r <= 0 ? 0 : r
|
98
|
-
|
99
|
-
|
99
|
+
rescue ArgumentError, TypeError
|
100
|
+
DEFAULT_MAX_TRIES
|
100
101
|
end
|
101
102
|
end
|
102
103
|
end
|
data/lib/aranha/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
33
|
+
version: '0.4'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
40
|
+
version: '0.4'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: aranha-selenium
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +45,9 @@ dependencies:
|
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0.1'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 0.1.2
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -52,20 +55,37 @@ dependencies:
|
|
52
55
|
- - "~>"
|
53
56
|
- !ruby/object:Gem::Version
|
54
57
|
version: '0.1'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 0.1.2
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: eac_rails_utils
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0.11'
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0.11'
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: eac_ruby_utils
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
58
78
|
requirements:
|
59
79
|
- - "~>"
|
60
80
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
81
|
+
version: '0.35'
|
62
82
|
type: :runtime
|
63
83
|
prerelease: false
|
64
84
|
version_requirements: !ruby/object:Gem::Requirement
|
65
85
|
requirements:
|
66
86
|
- - "~>"
|
67
87
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0.
|
88
|
+
version: '0.35'
|
69
89
|
- !ruby/object:Gem::Dependency
|
70
90
|
name: httpclient
|
71
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,19 +115,19 @@ dependencies:
|
|
95
115
|
- !ruby/object:Gem::Version
|
96
116
|
version: 4.2.10
|
97
117
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
118
|
+
name: eac_ruby_gem_support
|
99
119
|
requirement: !ruby/object:Gem::Requirement
|
100
120
|
requirements:
|
101
121
|
- - "~>"
|
102
122
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
123
|
+
version: '0.1'
|
104
124
|
type: :development
|
105
125
|
prerelease: false
|
106
126
|
version_requirements: !ruby/object:Gem::Requirement
|
107
127
|
requirements:
|
108
128
|
- - "~>"
|
109
129
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
130
|
+
version: '0.1'
|
111
131
|
- !ruby/object:Gem::Dependency
|
112
132
|
name: sqlite3
|
113
133
|
requirement: !ruby/object:Gem::Requirement
|
@@ -131,7 +151,6 @@ extra_rdoc_files: []
|
|
131
151
|
files:
|
132
152
|
- MIT-LICENSE
|
133
153
|
- README.rdoc
|
134
|
-
- Rakefile
|
135
154
|
- app/assets/javascripts/aranha/application.js
|
136
155
|
- app/assets/stylesheets/aranha/application.css
|
137
156
|
- app/controllers/aranha/addresses_controller.rb
|
@@ -155,9 +174,6 @@ files:
|
|
155
174
|
- lib/aranha/processor.rb
|
156
175
|
- lib/aranha/version.rb
|
157
176
|
- lib/tasks/aranha_tasks.rake
|
158
|
-
- test/aranha_test.rb
|
159
|
-
- test/integration/navigation_test.rb
|
160
|
-
- test/test_helper.rb
|
161
177
|
homepage:
|
162
178
|
licenses:
|
163
179
|
- MIT
|
@@ -182,7 +198,4 @@ rubygems_version: 2.7.7
|
|
182
198
|
signing_key:
|
183
199
|
specification_version: 4
|
184
200
|
summary: Rails utilities for web crawling.
|
185
|
-
test_files:
|
186
|
-
- test/aranha_test.rb
|
187
|
-
- test/test_helper.rb
|
188
|
-
- test/integration/navigation_test.rb
|
201
|
+
test_files: []
|
data/Rakefile
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
begin
|
4
|
-
require 'bundler/setup'
|
5
|
-
rescue LoadError
|
6
|
-
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
7
|
-
end
|
8
|
-
|
9
|
-
require 'rdoc/task'
|
10
|
-
|
11
|
-
RDoc::Task.new(:rdoc) do |rdoc|
|
12
|
-
rdoc.rdoc_dir = 'rdoc'
|
13
|
-
rdoc.title = 'Aranha'
|
14
|
-
rdoc.options << '--line-numbers'
|
15
|
-
rdoc.rdoc_files.include('README.rdoc')
|
16
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
17
|
-
end
|
18
|
-
|
19
|
-
APP_RAKEFILE = File.expand_path('spec/support/rails_app/Rakefile', __dir__)
|
20
|
-
load 'rails/tasks/engine.rake'
|
21
|
-
|
22
|
-
load 'rails/tasks/statistics.rake'
|
23
|
-
|
24
|
-
Bundler::GemHelper.install_tasks
|
25
|
-
|
26
|
-
require 'rake/testtask'
|
27
|
-
|
28
|
-
begin
|
29
|
-
require 'rspec/core/rake_task'
|
30
|
-
RSpec::Core::RakeTask.new(:spec)
|
31
|
-
task test: :spec
|
32
|
-
rescue LoadError
|
33
|
-
# no rspec available
|
34
|
-
end
|
35
|
-
|
36
|
-
task default: :test
|
data/test/aranha_test.rb
DELETED
data/test/test_helper.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Configure Rails Environment
|
4
|
-
ENV['RAILS_ENV'] = 'test'
|
5
|
-
|
6
|
-
require File.expand_path('../spec/support/rails_apps/config/environment.rb', __dir__)
|
7
|
-
ActiveRecord::Migrator.migrations_paths = [
|
8
|
-
File.expand_path('../spec/support/rails_app/db/migrate', __dir__)
|
9
|
-
]
|
10
|
-
require 'rails/test_help'
|
11
|
-
|
12
|
-
# Filter out Minitest backtrace while allowing backtrace from other libraries
|
13
|
-
# to be shown.
|
14
|
-
Minitest.backtrace_filter = Minitest::BacktraceFilter.new
|
15
|
-
|
16
|
-
# Load support files
|
17
|
-
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
18
|
-
|
19
|
-
# Load fixtures from the engine
|
20
|
-
if ActiveSupport::TestCase.respond_to?(:fixture_path=)
|
21
|
-
ActiveSupport::TestCase.fixture_path = File.expand_path('fixtures', __dir__)
|
22
|
-
ActiveSupport::TestCase.fixtures :all
|
23
|
-
end
|