aranha 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11b9dd8f6de631eeb2550a1d009a4cc80e8c2309e078179138e7f2bfb4a6813c
4
- data.tar.gz: 7a30d214e9a0364798c6fe284e0c08fb4337a00c10f302aa483556c786b27b13
3
+ metadata.gz: c5fac0411750b6def655452009d3d091801905c696158499d8b4b26f99ccc426
4
+ data.tar.gz: cb8cd648b8603cfd1f578ba92e5017e37d6e1e19427b176d33e097e413c17baf
5
5
  SHA512:
6
- metadata.gz: 7704cab9261ab592f2199eece35d10d34aa48f49d2fe118ff5b610b0effc2e0486a9479c58912ac607a13d937f3397da4460003e043f80fee50fa6b97f4fb8cf
7
- data.tar.gz: e7ac44ed95d0d1c7c29c6bab249fb7f71f5309a9d706fd08d854b41fdb28d2abb261ff0f23b470370f5868d193188bdc99c10dfde065abfd982ffceb94d51a34
6
+ metadata.gz: c4864a35aa117b9bb00d544c013ae6caaf356b3d674657d5b1d5a3371f644b949b4fa32b6ec97d5f44fa58f65ea71db0499f18208ed885103a4f93d891d41309
7
+ data.tar.gz: c20f17bb1c3d04ce9b7678d2bec181c74dcf3b930202f9c838f5046cd68b326eb660ad490c4ad4c92647f60474eefc705e6e2734a62f3b42541760d689ab9686
@@ -1,3 +1,3 @@
1
1
  = Aranha
2
2
 
3
- Rails utilities for web crawling.
3
+ Ruby utilities for web crawling.
@@ -1,15 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'httpclient'
4
- require 'active_support/dependencies'
5
- require 'active_scaffold'
3
+ require 'eac_ruby_utils/core_ext'
6
4
 
7
5
  module Aranha
8
- require 'aranha/default_processor'
9
- require 'aranha/dom_elements_traverser'
10
- require 'aranha/engine'
11
- require 'aranha/fixtures'
12
- require 'aranha/processor'
13
- require 'aranha/parsers'
14
- require 'aranha/selenium'
6
+ require_sub __FILE__
15
7
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/abstract_methods'
4
+
5
+ module Aranha
6
+ class Manager
7
+ include ::EacRubyUtils::AbstractMethods
8
+
9
+ class << self
10
+ attr_accessor :default
11
+ end
12
+
13
+ def addresses_count
14
+ raise_abstract_method(__method__)
15
+ end
16
+
17
+ def add_address(_uri, _processor_class, _extra_data = nil)
18
+ raise_abstract_method(__method__)
19
+ end
20
+
21
+ def add_start_point(uri, processor_class, extra_data = nil)
22
+ start_points_var << ::EacRubyUtils::Struct.new(
23
+ uri: uri, processor_class: processor_class, extra_data: extra_data
24
+ )
25
+ end
26
+
27
+ def clear_expired_addresses
28
+ raise_abstract_method(__method__)
29
+ end
30
+
31
+ def init
32
+ clear_expired_addresses
33
+ start_points_to_addresses
34
+ end
35
+
36
+ def log_info(_message)
37
+ raise_abstract_method(__method__)
38
+ end
39
+
40
+ def log_warn(_message)
41
+ raise_abstract_method(__method__)
42
+ end
43
+
44
+ def start_points
45
+ start_points_var.to_enum
46
+ end
47
+
48
+ def start_points_to_addresses
49
+ start_points_var.each do |sp|
50
+ add_address(sp.uri, sp.processor_class, sp.extra_data)
51
+ end
52
+ end
53
+
54
+ def unprocessed_addresses
55
+ raise_abstract_method(__method__)
56
+ end
57
+
58
+ private
59
+
60
+ def start_points_var
61
+ @start_points_var ||= []
62
+ end
63
+ end
64
+ end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'net/http'
4
+ require 'httpclient'
4
5
  require 'aranha/parsers/invalid_state_exception'
6
+ require 'aranha/manager'
5
7
 
6
8
  module Aranha
7
9
  class Processor
@@ -20,11 +22,13 @@ module Aranha
20
22
 
21
23
  DEFAULT_MAX_TRIES = 3
22
24
 
23
- def initialize
24
- ::Aranha::Address.clear_expired
25
- ::Aranha::Address.add_start_points
25
+ attr_reader :manager
26
+
27
+ def initialize(manager = nil)
28
+ @manager = manager || ::Aranha::Manager.default
26
29
  @failed = {}
27
30
  @try = 0
31
+ self.manager.init
28
32
  process_loop
29
33
  raise "Addresses failed: #{@failed.count}" if @failed.any?
30
34
  end
@@ -32,7 +36,7 @@ module Aranha
32
36
  private
33
37
 
34
38
  def process_loop
35
- Rails.logger.info("Max tries: #{max_tries_s}")
39
+ manager.log_info("Max tries: #{max_tries_s}")
36
40
  loop do
37
41
  break if process_next_address
38
42
  end
@@ -52,8 +56,8 @@ module Aranha
52
56
  end
53
57
 
54
58
  def process_address(address)
55
- Rails.logger.info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
56
- " Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
59
+ manager.log_info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
60
+ " Unprocessed: #{unprocessed.count}/#{::Aranha::Manager.default.addresses_count})")
57
61
  begin
58
62
  address.process
59
63
  @failed.delete(address.id)
@@ -67,7 +71,7 @@ module Aranha
67
71
 
68
72
  @failed[address.id] ||= 0
69
73
  @failed[address.id] += 1
70
- Rails.logger.warn(exception)
74
+ manager.log_warn(exception)
71
75
  end
72
76
 
73
77
  def next_address
@@ -75,7 +79,7 @@ module Aranha
75
79
  end
76
80
 
77
81
  def unprocessed
78
- ::Aranha::Address.unprocessed
82
+ ::Aranha::Manager.default.unprocessed_addresses
79
83
  end
80
84
 
81
85
  def network_exception?(exception)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Aranha
4
- VERSION = '0.14.1'
4
+ VERSION = '0.15.0'
5
5
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.1
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-29 00:00:00.000000000 Z
11
+ date: 2020-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: active_scaffold
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: 3.4.41.1
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: 3.4.41.1
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: aranha-parsers
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -58,34 +44,20 @@ dependencies:
58
44
  - - ">="
59
45
  - !ruby/object:Gem::Version
60
46
  version: 0.1.2
61
- - !ruby/object:Gem::Dependency
62
- name: eac_rails_utils
63
- requirement: !ruby/object:Gem::Requirement
64
- requirements:
65
- - - "~>"
66
- - !ruby/object:Gem::Version
67
- version: '0.11'
68
- type: :runtime
69
- prerelease: false
70
- version_requirements: !ruby/object:Gem::Requirement
71
- requirements:
72
- - - "~>"
73
- - !ruby/object:Gem::Version
74
- version: '0.11'
75
47
  - !ruby/object:Gem::Dependency
76
48
  name: eac_ruby_utils
77
49
  requirement: !ruby/object:Gem::Requirement
78
50
  requirements:
79
51
  - - "~>"
80
52
  - !ruby/object:Gem::Version
81
- version: '0.15'
53
+ version: '0.52'
82
54
  type: :runtime
83
55
  prerelease: false
84
56
  version_requirements: !ruby/object:Gem::Requirement
85
57
  requirements:
86
58
  - - "~>"
87
59
  - !ruby/object:Gem::Version
88
- version: '0.15'
60
+ version: '0.52'
89
61
  - !ruby/object:Gem::Dependency
90
62
  name: httpclient
91
63
  requirement: !ruby/object:Gem::Requirement
@@ -100,20 +72,6 @@ dependencies:
100
72
  - - ">="
101
73
  - !ruby/object:Gem::Version
102
74
  version: '2.6'
103
- - !ruby/object:Gem::Dependency
104
- name: rails
105
- requirement: !ruby/object:Gem::Requirement
106
- requirements:
107
- - - "~>"
108
- - !ruby/object:Gem::Version
109
- version: 4.2.10
110
- type: :runtime
111
- prerelease: false
112
- version_requirements: !ruby/object:Gem::Requirement
113
- requirements:
114
- - - "~>"
115
- - !ruby/object:Gem::Version
116
- version: 4.2.10
117
75
  - !ruby/object:Gem::Dependency
118
76
  name: eac_ruby_gem_support
119
77
  requirement: !ruby/object:Gem::Requirement
@@ -128,20 +86,6 @@ dependencies:
128
86
  - - "~>"
129
87
  - !ruby/object:Gem::Version
130
88
  version: '0.1'
131
- - !ruby/object:Gem::Dependency
132
- name: sqlite3
133
- requirement: !ruby/object:Gem::Requirement
134
- requirements:
135
- - - ">="
136
- - !ruby/object:Gem::Version
137
- version: '0'
138
- type: :development
139
- prerelease: false
140
- version_requirements: !ruby/object:Gem::Requirement
141
- requirements:
142
- - - ">="
143
- - !ruby/object:Gem::Version
144
- version: '0'
145
89
  description:
146
90
  email:
147
91
  - eduardobogoni@gmail.com
@@ -151,29 +95,11 @@ extra_rdoc_files: []
151
95
  files:
152
96
  - MIT-LICENSE
153
97
  - README.rdoc
154
- - app/assets/javascripts/aranha/application.js
155
- - app/assets/stylesheets/aranha/application.css
156
- - app/controllers/aranha/addresses_controller.rb
157
- - app/helpers/aranha/application_helper.rb
158
- - app/models/aranha/address.rb
159
- - app/views/layouts/aranha/application.html.erb
160
- - config/locales/en.yml
161
- - config/locales/pt-BR.yml
162
- - config/routes.rb
163
- - db/migrate/20171201021251_create_aranha_addresses.rb
164
- - db/migrate/20181125042102_add_extra_data_to_aranha_addresses.rb
165
98
  - lib/aranha.rb
166
99
  - lib/aranha/default_processor.rb
167
- - lib/aranha/dom_elements_traverser.rb
168
- - lib/aranha/dom_elements_traverser/conditions.rb
169
- - lib/aranha/dom_elements_traverser/cursor.rb
170
- - lib/aranha/dom_elements_traverser/data.rb
171
- - lib/aranha/engine.rb
172
- - lib/aranha/fixtures.rb
173
- - lib/aranha/fixtures/download.rb
100
+ - lib/aranha/manager.rb
174
101
  - lib/aranha/processor.rb
175
102
  - lib/aranha/version.rb
176
- - lib/tasks/aranha_tasks.rake
177
103
  homepage:
178
104
  licenses:
179
105
  - MIT
@@ -193,9 +119,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
119
  - !ruby/object:Gem::Version
194
120
  version: '0'
195
121
  requirements: []
196
- rubyforge_project:
197
- rubygems_version: 2.7.7
122
+ rubygems_version: 3.0.8
198
123
  signing_key:
199
124
  specification_version: 4
200
- summary: Rails utilities for web crawling.
125
+ summary: Ruby utilities for web crawling.
201
126
  test_files: []
@@ -1,14 +0,0 @@
1
- // This is a manifest file that'll be compiled into application.js, which will include all the files
2
- // listed below.
3
- //
4
- // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
- // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
- //
7
- // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
- // compiled file.
9
- //
10
- // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
- // about supported directives.
12
- //
13
- //= require_tree .
14
- //= require active_scaffold
@@ -1,16 +0,0 @@
1
- /*
2
- * This is a manifest file that'll be compiled into application.css, which will include all the files
3
- * listed below.
4
- *
5
- * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
- * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
- *
8
- * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
- * compiled file so the styles you add here take precedence over styles defined in any styles
10
- * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
- * file per style scope.
12
- *
13
- *= require_tree .
14
- *= require_self
15
- *= require active_scaffold
16
- */
@@ -1,8 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class AddressesController < ::ApplicationController
5
- active_scaffold :'aranha/address' do |_conf|
6
- end
7
- end
8
- end
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- module ApplicationHelper
5
- end
6
- end
@@ -1,98 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/yaml'
4
-
5
- module Aranha
6
- class Address < ActiveRecord::Base
7
- include ::EacRailsUtils::Models::InequalityQueries
8
-
9
- add_inequality_queries(:created_at)
10
-
11
- class << self
12
- def set_start_point(url, processor)
13
- start_points[url] = processor
14
- end
15
-
16
- def add_start_points
17
- ::Rails.logger.info("Start points: #{start_points.count}")
18
- start_points.each do |url, processor|
19
- add(url, processor)
20
- end
21
- end
22
-
23
- def add(url, processor, extra_data = nil)
24
- a = find_or_initialize_by(url: sanitize_url(url))
25
- a.processor = processor
26
- a.extra_data = extra_data.to_yaml
27
- a.save!
28
- end
29
-
30
- def clear_expired
31
- q = by_created_at_lt(Time.zone.now - 12.hours)
32
- Rails.logger.info("Addresses expired: #{q.count}")
33
- q.destroy_all
34
- end
35
-
36
- private
37
-
38
- def sanitize_url(url)
39
- if url.is_a?(Hash)
40
- url.to_yaml
41
- else
42
- url.to_s
43
- end
44
- end
45
-
46
- def start_points
47
- @start_points ||= {}
48
- end
49
- end
50
-
51
- validates :url, presence: true, uniqueness: true
52
- validates :processor, presence: true
53
-
54
- scope :unprocessed, lambda {
55
- where(processed_at: nil)
56
- }
57
-
58
- def to_s
59
- "#{processor}|#{url}"
60
- end
61
-
62
- def process
63
- ActiveRecord::Base.transaction do
64
- instanciate_processor.process
65
- self.processed_at = Time.zone.now
66
- save!
67
- end
68
- end
69
-
70
- private
71
-
72
- def instanciate_processor
73
- processor_instancier.call(*processor_instancier_arguments)
74
- end
75
-
76
- def url_to_process
77
- ::EacRubyUtils::Yaml.load_common(url)
78
- end
79
-
80
- def processor_instancier
81
- processor.constantize.method(:new)
82
- end
83
-
84
- def processor_instancier_arguments
85
- if processor_instancier_arity == 2 || processor_instancier_arity.negative?
86
- [url_to_process, EacRubyUtils::Yaml.load_common(extra_data)]
87
- elsif processor_instancier_arity == 1
88
- [processor_instancier.call(url_to_process)]
89
- else
90
- raise("#{processor}.initialize should has 1 or 2 or * arguments")
91
- end
92
- end
93
-
94
- def processor_instancier_arity
95
- processor.constantize.instance_method(:initialize).arity
96
- end
97
- end
98
- end
@@ -1,12 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Aranha</title>
5
- <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
- <%= javascript_include_tag "aranha/application" %>
7
- <%= csrf_meta_tags %>
8
- </head>
9
- <body>
10
- <%= yield %>
11
- </body>
12
- </html>
@@ -1,6 +0,0 @@
1
- en:
2
- activerecord:
3
- models:
4
- aranha/address:
5
- one: Aranha address
6
- other: Aranha addresses
@@ -1,6 +0,0 @@
1
- pt-BR:
2
- activerecord:
3
- models:
4
- aranha/address:
5
- one: Endereço Aranha
6
- other: Endereços Aranha
@@ -1,5 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- Aranha::Engine.routes.draw do
4
- resources(:addresses) { as_routes }
5
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class CreateAranhaAddresses < ActiveRecord::Migration
4
- def change
5
- create_table :aranha_addresses do |t|
6
- t.string :url
7
- t.string :processor
8
- t.timestamp :processed_at
9
-
10
- t.timestamps null: false
11
- end
12
- end
13
- end
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class AddExtraDataToAranhaAddresses < ActiveRecord::Migration
4
- def change
5
- add_column :aranha_addresses, :extra_data, :text
6
- end
7
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'aranha/dom_elements_traverser/conditions'
4
- require 'aranha/dom_elements_traverser/data'
5
- require 'aranha/dom_elements_traverser/cursor'
6
-
7
- module Aranha
8
- class DomElementsTraverser
9
- include ::Aranha::DomElementsTraverser::Conditions
10
- include ::Aranha::DomElementsTraverser::Cursor
11
- include ::Aranha::DomElementsTraverser::Data
12
-
13
- class << self
14
- def traverse(options, &block)
15
- new(elements_from_options(options), &block)
16
- end
17
-
18
- def empty
19
- new([])
20
- end
21
-
22
- private
23
-
24
- def elements_from_options(options)
25
- options = ::EacRubyUtils::OptionsConsumer.new(options)
26
- elements = nil
27
- options.consume(:children_of) { |v| elements = v.children.to_a }
28
- raise 'None option of [:children_of] defined' unless elements
29
-
30
- options.validate
31
- elements
32
- end
33
- end
34
-
35
- private
36
-
37
- def initialize(elements, &block)
38
- @elements = elements
39
- @index = 0
40
- @data = {}
41
- instance_eval(&block) if block
42
- end
43
- end
44
- end
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class DomElementsTraverser
5
- module Conditions
6
- private
7
-
8
- def match_conditions?(conditions)
9
- raise "No element (Conditions: #{conditions})" unless current
10
-
11
- conditions.all? { |key, value| match_condition?(key, value) }
12
- end
13
-
14
- def match_condition?(key, value)
15
- case key.to_sym
16
- when :text then match_text_condition?(value)
17
- when :name then match_name_condition?(value)
18
- else raise "Unknown key condition: (#{key})"
19
- end
20
- end
21
-
22
- def match_name_condition?(tag_name)
23
- current.name.casecmp(tag_name.to_s).zero?
24
- end
25
-
26
- def match_text_condition?(texts)
27
- texts = [texts.to_s] unless texts.is_a?(Array)
28
- texts.all? { |t| current.text.downcase.include?(t.downcase) }
29
- end
30
- end
31
- end
32
- end
@@ -1,48 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/options_consumer'
4
-
5
- module Aranha
6
- class DomElementsTraverser
7
- module Cursor
8
- private
9
-
10
- def current
11
- @elements[@index]
12
- end
13
-
14
- def skip
15
- @index += 1
16
- end
17
-
18
- def skip_until(options)
19
- oc = ::EacRubyUtils::OptionsConsumer.new(options)
20
- optional = oc.consume(:optional, false)
21
- while current
22
- break if match_conditions?(oc.left_data)
23
-
24
- skip
25
- end
26
- raise "No element found for conditions #{oc.left_data}" unless current || optional
27
-
28
- current
29
- end
30
-
31
- def skip_until_after(conditions)
32
- skip_until(conditions)
33
- skip
34
- current
35
- end
36
-
37
- def if_found(conditions, &block)
38
- marked = @index
39
- skip_until({ optional: true }.merge(conditions))
40
- if current
41
- instance_eval(&block) if block
42
- else
43
- @index = marked
44
- end
45
- end
46
- end
47
- end
48
- end
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class DomElementsTraverser
5
- module Data
6
- def data
7
- @data.dup
8
- end
9
-
10
- private
11
-
12
- def store(key, options = {}, &converter)
13
- validate(options)
14
- value = store_value(options, converter)
15
- @data[key] = value
16
- r = current
17
- skip
18
- r
19
- end
20
-
21
- def store_value(options, converter)
22
- value = if options.key?(:attribute)
23
- current.attribute(options[:attribute]).value
24
- else
25
- current.text.strip
26
- end
27
- converter ? converter.call(value) : value
28
- end
29
-
30
- def validate(options)
31
- return unless options.key?(:validate)
32
- return if match_conditions?(options[:validate])
33
-
34
- raise "Element does not match conditions #{options[:validate]}" \
35
- " (Element: |#{current}|#{current.name}|)"
36
- end
37
- end
38
- end
39
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- class Engine < ::Rails::Engine
5
- isolate_namespace Aranha
6
-
7
- initializer :append_migrations do |app|
8
- config.paths['db/migrate'].expanded.each do |expanded_path|
9
- app.config.paths['db/migrate'] << expanded_path
10
- end
11
- end
12
- end
13
- end
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- module Fixtures
5
- require 'aranha/fixtures/download'
6
- end
7
- end
@@ -1,72 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'aranha/parsers/base'
4
- require 'aranha/parsers/source_address'
5
- require 'aranha/parsers/source_target_fixtures'
6
-
7
- module Aranha
8
- module Fixtures
9
- class Download
10
- attr_reader :pending
11
-
12
- def initialize(options)
13
- @prefix = options.fetch(:prefix)
14
- @prefix = '' if @prefix.blank?
15
- @download = options.fetch(:download)
16
- @pending = options.fetch(:pending)
17
- end
18
-
19
- def run
20
- url_files.each do |f|
21
- Rails.logger.info(relative_path(f))
22
- download(url(f), target(f)) if @download
23
- end
24
- end
25
-
26
- private
27
-
28
- def url_files
29
- Dir["#{fixtures_root}/**/*.url"].select { |path| select_path?(path) }
30
- end
31
-
32
- def select_path?(path)
33
- return false unless match_prefix_pattern(path)
34
-
35
- !pending || !source_exist?(path)
36
- end
37
-
38
- def match_prefix_pattern(path)
39
- relative_path(path).start_with?(@prefix)
40
- end
41
-
42
- def fixtures_root
43
- Rails.root.to_s
44
- end
45
-
46
- def download(url, target)
47
- Rails.logger.info "Baixando \"#{url}\"..."
48
- content = ::Aranha::Parsers::Base.new(url).content
49
- raise "Content is blank for \"#{url}\"" if content.blank?
50
-
51
- File.open(target, 'wb') { |file| file.write(content) }
52
- end
53
-
54
- def url(file)
55
- ::Aranha::Parsers::SourceAddress.from_file(file)
56
- end
57
-
58
- def target(file)
59
- File.expand_path(File.basename(file, '.url') + '.source.html', File.dirname(file))
60
- end
61
-
62
- def relative_path(path)
63
- path.sub(%r{^#{Regexp.quote(fixtures_root)}/}, '')
64
- end
65
-
66
- def source_exist?(path)
67
- stf = ::Aranha::Parsers::SourceTargetFixtures.new(::File.dirname(path))
68
- stf.source_file(::File.basename(path, '.url')).present?
69
- end
70
- end
71
- end
72
- end
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- namespace(:aranha) do
4
- task process: :environment do
5
- ::Aranha::Processor.new
6
- end
7
-
8
- task clear: :environment do
9
- Rails.logger.info("Addresses deleted: #{::Aranha::Address.destroy_all.count}")
10
- end
11
-
12
- namespace :fixtures do
13
- desc 'Download remote content for fixtures.'
14
- task download: :environment do
15
- ::Aranha::Fixtures::Download.new(
16
- prefix: ENV['PREFIX'],
17
- download: ENV['DOWNLOAD'].present?,
18
- pending: ENV['PENDING'].present?
19
- ).run
20
- end
21
- end
22
- end