aranha 0.14.5 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b89c63a38f3c2f658263dde445f10b45628ecd19069d6a78a0d5e838a1a6127
4
- data.tar.gz: 15cfb49d733aba80bd316813d21c587c47c4799af6a60d9a6147bed1e08f132f
3
+ metadata.gz: c5fac0411750b6def655452009d3d091801905c696158499d8b4b26f99ccc426
4
+ data.tar.gz: cb8cd648b8603cfd1f578ba92e5017e37d6e1e19427b176d33e097e413c17baf
5
5
  SHA512:
6
- metadata.gz: fe5b484b9709033c8f87db0f8070c83d5dd328109d2d6bfb12a7ef356ba7268ec45cf728f5554dbd3015e28f60d14be2b81d6ac33f836365024fb4e88f1ed7e1
7
- data.tar.gz: 9456fafe876c0894f810575fe41f078703ce2f9b75930c967ed4ffd9d33b93eeeb80e4b899e9bc12f31612d397fb1d462ab29520e7df4f71956fb64c489628f6
6
+ metadata.gz: c4864a35aa117b9bb00d544c013ae6caaf356b3d674657d5b1d5a3371f644b949b4fa32b6ec97d5f44fa58f65ea71db0499f18208ed885103a4f93d891d41309
7
+ data.tar.gz: c20f17bb1c3d04ce9b7678d2bec181c74dcf3b930202f9c838f5046cd68b326eb660ad490c4ad4c92647f60474eefc705e6e2734a62f3b42541760d689ab9686
@@ -1,3 +1,3 @@
1
1
  = Aranha
2
2
 
3
- Rails utilities for web crawling.
3
+ Ruby utilities for web crawling.
@@ -1,15 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'httpclient'
4
- require 'active_support/dependencies'
5
- require 'active_scaffold'
3
+ require 'eac_ruby_utils/core_ext'
6
4
 
7
5
  module Aranha
8
- require 'aranha/default_processor'
9
- require 'aranha/dom_elements_traverser'
10
- require 'aranha/engine'
11
- require 'aranha/fixtures'
12
- require 'aranha/processor'
13
- require 'aranha/parsers'
14
- require 'aranha/selenium'
6
+ require_sub __FILE__
15
7
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/abstract_methods'
4
+
5
+ module Aranha
6
+ class Manager
7
+ include ::EacRubyUtils::AbstractMethods
8
+
9
+ class << self
10
+ attr_accessor :default
11
+ end
12
+
13
+ def addresses_count
14
+ raise_abstract_method(__method__)
15
+ end
16
+
17
+ def add_address(_uri, _processor_class, _extra_data = nil)
18
+ raise_abstract_method(__method__)
19
+ end
20
+
21
+ def add_start_point(uri, processor_class, extra_data = nil)
22
+ start_points_var << ::EacRubyUtils::Struct.new(
23
+ uri: uri, processor_class: processor_class, extra_data: extra_data
24
+ )
25
+ end
26
+
27
+ def clear_expired_addresses
28
+ raise_abstract_method(__method__)
29
+ end
30
+
31
+ def init
32
+ clear_expired_addresses
33
+ start_points_to_addresses
34
+ end
35
+
36
+ def log_info(_message)
37
+ raise_abstract_method(__method__)
38
+ end
39
+
40
+ def log_warn(_message)
41
+ raise_abstract_method(__method__)
42
+ end
43
+
44
+ def start_points
45
+ start_points_var.to_enum
46
+ end
47
+
48
+ def start_points_to_addresses
49
+ start_points_var.each do |sp|
50
+ add_address(sp.uri, sp.processor_class, sp.extra_data)
51
+ end
52
+ end
53
+
54
+ def unprocessed_addresses
55
+ raise_abstract_method(__method__)
56
+ end
57
+
58
+ private
59
+
60
+ def start_points_var
61
+ @start_points_var ||= []
62
+ end
63
+ end
64
+ end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'net/http'
4
+ require 'httpclient'
4
5
  require 'aranha/parsers/invalid_state_exception'
6
+ require 'aranha/manager'
5
7
 
6
8
  module Aranha
7
9
  class Processor
@@ -20,11 +22,13 @@ module Aranha
20
22
 
21
23
  DEFAULT_MAX_TRIES = 3
22
24
 
23
- def initialize
24
- ::Aranha::Address.clear_expired
25
- ::Aranha::Address.add_start_points
25
+ attr_reader :manager
26
+
27
+ def initialize(manager = nil)
28
+ @manager = manager || ::Aranha::Manager.default
26
29
  @failed = {}
27
30
  @try = 0
31
+ self.manager.init
28
32
  process_loop
29
33
  raise "Addresses failed: #{@failed.count}" if @failed.any?
30
34
  end
@@ -32,7 +36,7 @@ module Aranha
32
36
  private
33
37
 
34
38
  def process_loop
35
- Rails.logger.info("Max tries: #{max_tries_s}")
39
+ manager.log_info("Max tries: #{max_tries_s}")
36
40
  loop do
37
41
  break if process_next_address
38
42
  end
@@ -52,8 +56,8 @@ module Aranha
52
56
  end
53
57
 
54
58
  def process_address(address)
55
- Rails.logger.info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
56
- " Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
59
+ manager.log_info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
60
+ " Unprocessed: #{unprocessed.count}/#{::Aranha::Manager.default.addresses_count})")
57
61
  begin
58
62
  address.process
59
63
  @failed.delete(address.id)
@@ -67,7 +71,7 @@ module Aranha
67
71
 
68
72
  @failed[address.id] ||= 0
69
73
  @failed[address.id] += 1
70
- Rails.logger.warn(exception)
74
+ manager.log_warn(exception)
71
75
  end
72
76
 
73
77
  def next_address
@@ -75,7 +79,7 @@ module Aranha
75
79
  end
76
80
 
77
81
  def unprocessed
78
- ::Aranha::Address.unprocessed
82
+ ::Aranha::Manager.default.unprocessed_addresses
79
83
  end
80
84
 
81
85
  def network_exception?(exception)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Aranha
4
- VERSION = '0.14.5'
4
+ VERSION = '0.15.0'
5
5
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.5
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-14 00:00:00.000000000 Z
11
+ date: 2020-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: active_scaffold
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 3.4.41.1
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 3.4.41.1
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: aranha-parsers
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -58,34 +44,20 @@ dependencies:
58
44
  - - ">="
59
45
  - !ruby/object:Gem::Version
60
46
  version: 0.1.2
61
- - !ruby/object:Gem::Dependency
62
- name: eac_rails_utils
63
- requirement: !ruby/object:Gem::Requirement
64
- requirements:
65
- - - "~>"
66
- - !ruby/object:Gem::Version
67
- version: '0.11'
68
- type: :runtime
69
- prerelease: false
70
- version_requirements: !ruby/object:Gem::Requirement
71
- requirements:
72
- - - "~>"
73
- - !ruby/object:Gem::Version
74
- version: '0.11'
75
47
  - !ruby/object:Gem::Dependency
76
48
  name: eac_ruby_utils
77
49
  requirement: !ruby/object:Gem::Requirement
78
50
  requirements:
79
51
  - - "~>"
80
52
  - !ruby/object:Gem::Version
81
- version: '0.35'
53
+ version: '0.52'
82
54
  type: :runtime
83
55
  prerelease: false
84
56
  version_requirements: !ruby/object:Gem::Requirement
85
57
  requirements:
86
58
  - - "~>"
87
59
  - !ruby/object:Gem::Version
88
- version: '0.35'
60
+ version: '0.52'
89
61
  - !ruby/object:Gem::Dependency
90
62
  name: httpclient
91
63
  requirement: !ruby/object:Gem::Requirement
@@ -100,20 +72,6 @@ dependencies:
100
72
  - - ">="
101
73
  - !ruby/object:Gem::Version
102
74
  version: '2.6'
103
- - !ruby/object:Gem::Dependency
104
- name: rails
105
- requirement: !ruby/object:Gem::Requirement
106
- requirements:
107
- - - ">="
108
- - !ruby/object:Gem::Version
109
- version: 4.2.11.3
110
- type: :runtime
111
- prerelease: false
112
- version_requirements: !ruby/object:Gem::Requirement
113
- requirements:
114
- - - ">="
115
- - !ruby/object:Gem::Version
116
- version: 4.2.11.3
117
75
  - !ruby/object:Gem::Dependency
118
76
  name: eac_ruby_gem_support
119
77
  requirement: !ruby/object:Gem::Requirement
@@ -128,20 +86,6 @@ dependencies:
128
86
  - - "~>"
129
87
  - !ruby/object:Gem::Version
130
88
  version: '0.1'
131
- - !ruby/object:Gem::Dependency
132
- name: sqlite3
133
- requirement: !ruby/object:Gem::Requirement
134
- requirements:
135
- - - ">="
136
- - !ruby/object:Gem::Version
137
- version: '0'
138
- type: :development
139
- prerelease: false
140
- version_requirements: !ruby/object:Gem::Requirement
141
- requirements:
142
- - - ">="
143
- - !ruby/object:Gem::Version
144
- version: '0'
145
89
  description:
146
90
  email:
147
91
  - eduardobogoni@gmail.com
@@ -151,29 +95,11 @@ extra_rdoc_files: []
151
95
  files:
152
96
  - MIT-LICENSE
153
97
  - README.rdoc
154
- - app/assets/javascripts/aranha/application.js
155
- - app/assets/stylesheets/aranha/application.css
156
- - app/controllers/aranha/addresses_controller.rb
157
- - app/helpers/aranha/application_helper.rb
158
- - app/models/aranha/address.rb
159
- - app/views/layouts/aranha/application.html.erb
160
- - config/locales/en.yml
161
- - config/locales/pt-BR.yml
162
- - config/routes.rb
163
- - db/migrate/20171201021251_create_aranha_addresses.rb
164
- - db/migrate/20181125042102_add_extra_data_to_aranha_addresses.rb
165
98
  - lib/aranha.rb
166
99
  - lib/aranha/default_processor.rb
167
- - lib/aranha/dom_elements_traverser.rb
168
- - lib/aranha/dom_elements_traverser/conditions.rb
169
- - lib/aranha/dom_elements_traverser/cursor.rb
170
- - lib/aranha/dom_elements_traverser/data.rb
171
- - lib/aranha/engine.rb
172
- - lib/aranha/fixtures.rb
173
- - lib/aranha/fixtures/download.rb
100
+ - lib/aranha/manager.rb
174
101
  - lib/aranha/processor.rb
175
102
  - lib/aranha/version.rb
176
- - lib/tasks/aranha_tasks.rake
177
103
  homepage:
178
104
  licenses:
179
105
  - MIT
@@ -196,5 +122,5 @@ requirements: []
196
122
  rubygems_version: 3.0.8
197
123
  signing_key:
198
124
  specification_version: 4
199
- summary: Rails utilities for web crawling.
125
+ summary: Ruby utilities for web crawling.
200
126
  test_files: []
@@ -1,14 +0,0 @@
1
- // This is a manifest file that'll be compiled into application.js, which will include all the files
2
- // listed below.
3
- //
4
- // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
- // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
- //
7
- // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
- // compiled file.
9
- //
10
- // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
- // about supported directives.
12
- //
13
- //= require_tree .
14
- //= require active_scaffold
@@ -1,16 +0,0 @@
1
- /*
2
- * This is a manifest file that'll be compiled into application.css, which will include all the files
3
- * listed below.
4
- *
5
- * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
- * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
- *
8
- * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
- * compiled file so the styles you add here take precedence over styles defined in any styles
10
- * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
- * file per style scope.
12
- *
13
- *= require_tree .
14
- *= require_self
15
- *= require active_scaffold
16
- */
@@ -1,8 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class AddressesController < ::ApplicationController
5
- active_scaffold :'aranha/address' do |_conf|
6
- end
7
- end
8
- end
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- module ApplicationHelper
5
- end
6
- end
@@ -1,98 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/yaml'
4
-
5
- module Aranha
6
- class Address < ActiveRecord::Base
7
- include ::EacRailsUtils::Models::InequalityQueries
8
-
9
- add_inequality_queries(:created_at)
10
-
11
- class << self
12
- def set_start_point(url, processor)
13
- start_points[url] = processor
14
- end
15
-
16
- def add_start_points
17
- ::Rails.logger.info("Start points: #{start_points.count}")
18
- start_points.each do |url, processor|
19
- add(url, processor)
20
- end
21
- end
22
-
23
- def add(url, processor, extra_data = nil)
24
- a = find_or_initialize_by(url: sanitize_url(url))
25
- a.processor = processor
26
- a.extra_data = extra_data.to_yaml
27
- a.save!
28
- end
29
-
30
- def clear_expired
31
- q = by_created_at_lt(Time.zone.now - 12.hours)
32
- Rails.logger.info("Addresses expired: #{q.count}")
33
- q.destroy_all
34
- end
35
-
36
- private
37
-
38
- def sanitize_url(url)
39
- if url.is_a?(Hash)
40
- url.to_yaml
41
- else
42
- url.to_s
43
- end
44
- end
45
-
46
- def start_points
47
- @start_points ||= {}
48
- end
49
- end
50
-
51
- validates :url, presence: true, uniqueness: true
52
- validates :processor, presence: true
53
-
54
- scope :unprocessed, lambda {
55
- where(processed_at: nil)
56
- }
57
-
58
- def to_s
59
- "#{processor}|#{url}"
60
- end
61
-
62
- def process
63
- ActiveRecord::Base.transaction do
64
- instanciate_processor.process
65
- self.processed_at = Time.zone.now
66
- save!
67
- end
68
- end
69
-
70
- private
71
-
72
- def instanciate_processor
73
- processor_instancier.call(*processor_instancier_arguments)
74
- end
75
-
76
- def url_to_process
77
- ::EacRubyUtils::Yaml.load(url)
78
- end
79
-
80
- def processor_instancier
81
- processor.constantize.method(:new)
82
- end
83
-
84
- def processor_instancier_arguments
85
- if processor_instancier_arity == 2 || processor_instancier_arity.negative?
86
- [url_to_process, EacRubyUtils::Yaml.load(extra_data)]
87
- elsif processor_instancier_arity == 1
88
- [processor_instancier.call(url_to_process)]
89
- else
90
- raise("#{processor}.initialize should has 1 or 2 or * arguments")
91
- end
92
- end
93
-
94
- def processor_instancier_arity
95
- processor.constantize.instance_method(:initialize).arity
96
- end
97
- end
98
- end
@@ -1,12 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Aranha</title>
5
- <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
- <%= javascript_include_tag "aranha/application" %>
7
- <%= csrf_meta_tags %>
8
- </head>
9
- <body>
10
- <%= yield %>
11
- </body>
12
- </html>
@@ -1,6 +0,0 @@
1
- en:
2
- activerecord:
3
- models:
4
- aranha/address:
5
- one: Aranha address
6
- other: Aranha addresses
@@ -1,6 +0,0 @@
1
- pt-BR:
2
- activerecord:
3
- models:
4
- aranha/address:
5
- one: Endereço Aranha
6
- other: Endereços Aranha
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- Aranha::Engine.routes.draw do
4
- concern :active_scaffold, ActiveScaffold::Routing::Basic.new(association: true)
5
- resources(:addresses, concerns: :active_scaffold)
6
- end
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class CreateAranhaAddresses < (
4
- Rails.version < '5' ? ActiveRecord::Migration : ActiveRecord::Migration[4.2]
5
- )
6
- def change
7
- create_table :aranha_addresses do |t|
8
- t.string :url
9
- t.string :processor
10
- t.timestamp :processed_at
11
-
12
- t.timestamps null: false
13
- end
14
- end
15
- end
@@ -1,9 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class AddExtraDataToAranhaAddresses < (
4
- Rails.version < '5' ? ActiveRecord::Migration : ActiveRecord::Migration[4.2]
5
- )
6
- def change
7
- add_column :aranha_addresses, :extra_data, :text
8
- end
9
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'aranha/dom_elements_traverser/conditions'
4
- require 'aranha/dom_elements_traverser/data'
5
- require 'aranha/dom_elements_traverser/cursor'
6
-
7
- module Aranha
8
- class DomElementsTraverser
9
- include ::Aranha::DomElementsTraverser::Conditions
10
- include ::Aranha::DomElementsTraverser::Cursor
11
- include ::Aranha::DomElementsTraverser::Data
12
-
13
- class << self
14
- def traverse(options, &block)
15
- new(elements_from_options(options), &block)
16
- end
17
-
18
- def empty
19
- new([])
20
- end
21
-
22
- private
23
-
24
- def elements_from_options(options)
25
- options = ::EacRubyUtils::OptionsConsumer.new(options)
26
- elements = nil
27
- options.consume(:children_of) { |v| elements = v.children.to_a }
28
- raise 'None option of [:children_of] defined' unless elements
29
-
30
- options.validate
31
- elements
32
- end
33
- end
34
-
35
- private
36
-
37
- def initialize(elements, &block)
38
- @elements = elements
39
- @index = 0
40
- @data = {}
41
- instance_eval(&block) if block
42
- end
43
- end
44
- end
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class DomElementsTraverser
5
- module Conditions
6
- private
7
-
8
- def match_conditions?(conditions)
9
- raise "No element (Conditions: #{conditions})" unless current
10
-
11
- conditions.all? { |key, value| match_condition?(key, value) }
12
- end
13
-
14
- def match_condition?(key, value)
15
- case key.to_sym
16
- when :text then match_text_condition?(value)
17
- when :name then match_name_condition?(value)
18
- else raise "Unknown key condition: (#{key})"
19
- end
20
- end
21
-
22
- def match_name_condition?(tag_name)
23
- current.name.casecmp(tag_name.to_s).zero?
24
- end
25
-
26
- def match_text_condition?(texts)
27
- texts = [texts.to_s] unless texts.is_a?(Array)
28
- texts.all? { |t| current.text.downcase.include?(t.downcase) }
29
- end
30
- end
31
- end
32
- end
@@ -1,48 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/options_consumer'
4
-
5
- module Aranha
6
- class DomElementsTraverser
7
- module Cursor
8
- private
9
-
10
- def current
11
- @elements[@index]
12
- end
13
-
14
- def skip
15
- @index += 1
16
- end
17
-
18
- def skip_until(options)
19
- oc = ::EacRubyUtils::OptionsConsumer.new(options)
20
- optional = oc.consume(:optional, false)
21
- while current
22
- break if match_conditions?(oc.left_data)
23
-
24
- skip
25
- end
26
- raise "No element found for conditions #{oc.left_data}" unless current || optional
27
-
28
- current
29
- end
30
-
31
- def skip_until_after(conditions)
32
- skip_until(conditions)
33
- skip
34
- current
35
- end
36
-
37
- def if_found(conditions, &block)
38
- marked = @index
39
- skip_until({ optional: true }.merge(conditions))
40
- if current
41
- instance_eval(&block) if block
42
- else
43
- @index = marked
44
- end
45
- end
46
- end
47
- end
48
- end
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class DomElementsTraverser
5
- module Data
6
- def data
7
- @data.dup
8
- end
9
-
10
- private
11
-
12
- def store(key, options = {}, &converter)
13
- validate(options)
14
- value = store_value(options, converter)
15
- @data[key] = value
16
- r = current
17
- skip
18
- r
19
- end
20
-
21
- def store_value(options, converter)
22
- value = if options.key?(:attribute)
23
- current.attribute(options[:attribute]).value
24
- else
25
- current.text.strip
26
- end
27
- converter ? converter.call(value) : value
28
- end
29
-
30
- def validate(options)
31
- return unless options.key?(:validate)
32
- return if match_conditions?(options[:validate])
33
-
34
- raise "Element does not match conditions #{options[:validate]}" \
35
- " (Element: |#{current}|#{current.name}|)"
36
- end
37
- end
38
- end
39
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class Engine < ::Rails::Engine
5
- isolate_namespace Aranha
6
-
7
- initializer :append_migrations do |app|
8
- config.paths['db/migrate'].expanded.each do |expanded_path|
9
- app.config.paths['db/migrate'] << expanded_path
10
- end
11
- end
12
- end
13
- end
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- module Fixtures
5
- require 'aranha/fixtures/download'
6
- end
7
- end
@@ -1,72 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'aranha/parsers/base'
4
- require 'aranha/parsers/source_address'
5
- require 'aranha/parsers/source_target_fixtures'
6
-
7
- module Aranha
8
- module Fixtures
9
- class Download
10
- attr_reader :pending
11
-
12
- def initialize(options)
13
- @prefix = options.fetch(:prefix)
14
- @prefix = '' if @prefix.blank?
15
- @download = options.fetch(:download)
16
- @pending = options.fetch(:pending)
17
- end
18
-
19
- def run
20
- url_files.each do |f|
21
- Rails.logger.info(relative_path(f))
22
- download(url(f), target(f)) if @download
23
- end
24
- end
25
-
26
- private
27
-
28
- def url_files
29
- Dir["#{fixtures_root}/**/*.url"].select { |path| select_path?(path) }
30
- end
31
-
32
- def select_path?(path)
33
- return false unless match_prefix_pattern(path)
34
-
35
- !pending || !source_exist?(path)
36
- end
37
-
38
- def match_prefix_pattern(path)
39
- relative_path(path).start_with?(@prefix)
40
- end
41
-
42
- def fixtures_root
43
- Rails.root.to_s
44
- end
45
-
46
- def download(url, target)
47
- Rails.logger.info "Baixando \"#{url}\"..."
48
- content = ::Aranha::Parsers::Base.new(url).content
49
- raise "Content is blank for \"#{url}\"" if content.blank?
50
-
51
- File.open(target, 'wb') { |file| file.write(content) }
52
- end
53
-
54
- def url(file)
55
- ::Aranha::Parsers::SourceAddress.from_file(file)
56
- end
57
-
58
- def target(file)
59
- File.expand_path(File.basename(file, '.url') + '.source.html', File.dirname(file))
60
- end
61
-
62
- def relative_path(path)
63
- path.sub(%r{^#{Regexp.quote(fixtures_root)}/}, '')
64
- end
65
-
66
- def source_exist?(path)
67
- stf = ::Aranha::Parsers::SourceTargetFixtures.new(::File.dirname(path))
68
- stf.source_file(::File.basename(path, '.url')).present?
69
- end
70
- end
71
- end
72
- end
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- namespace(:aranha) do
4
- task process: :environment do
5
- ::Aranha::Processor.new
6
- end
7
-
8
- task clear: :environment do
9
- Rails.logger.info("Addresses deleted: #{::Aranha::Address.destroy_all.count}")
10
- end
11
-
12
- namespace :fixtures do
13
- desc 'Download remote content for fixtures.'
14
- task download: :environment do
15
- ::Aranha::Fixtures::Download.new(
16
- prefix: ENV['PREFIX'],
17
- download: ENV['DOWNLOAD'].present?,
18
- pending: ENV['PENDING'].present?
19
- ).run
20
- end
21
- end
22
- end