aranha 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -5
  3. data/app/controllers/aranha/addresses_controller.rb +1 -0
  4. data/app/controllers/aranha/application_controller.rb +2 -0
  5. data/app/helpers/aranha/application_helper.rb +2 -0
  6. data/app/models/aranha/address.rb +1 -0
  7. data/config/routes.rb +2 -0
  8. data/db/migrate/20171201021251_create_aranha_addresses.rb +1 -0
  9. data/lib/aranha.rb +6 -0
  10. data/lib/aranha/default_processor.rb +35 -0
  11. data/lib/aranha/dom_elements_traverser.rb +44 -0
  12. data/lib/aranha/dom_elements_traverser/conditions.rb +32 -0
  13. data/lib/aranha/dom_elements_traverser/cursor.rb +46 -0
  14. data/lib/aranha/dom_elements_traverser/data.rb +39 -0
  15. data/lib/aranha/engine.rb +2 -0
  16. data/lib/aranha/parsers/base.rb +79 -0
  17. data/lib/aranha/parsers/html/base.rb +28 -0
  18. data/lib/aranha/parsers/html/item_list.rb +24 -0
  19. data/lib/aranha/parsers/html/node/base.rb +30 -0
  20. data/lib/aranha/parsers/html/node/default.rb +93 -0
  21. data/lib/aranha/processor.rb +16 -14
  22. data/lib/aranha/version.rb +2 -1
  23. data/lib/tasks/aranha_tasks.rake +1 -0
  24. data/test/aranha_test.rb +3 -1
  25. data/test/dummy/Rakefile +3 -1
  26. data/test/dummy/app/controllers/application_controller.rb +2 -0
  27. data/test/dummy/app/helpers/application_helper.rb +2 -0
  28. data/test/dummy/bin/bundle +3 -1
  29. data/test/dummy/bin/rails +3 -1
  30. data/test/dummy/bin/rake +2 -0
  31. data/test/dummy/bin/setup +10 -8
  32. data/test/dummy/config.ru +2 -0
  33. data/test/dummy/config/application.rb +4 -2
  34. data/test/dummy/config/boot.rb +4 -2
  35. data/test/dummy/config/environment.rb +3 -1
  36. data/test/dummy/config/environments/development.rb +2 -0
  37. data/test/dummy/config/environments/production.rb +4 -1
  38. data/test/dummy/config/environments/test.rb +2 -0
  39. data/test/dummy/config/initializers/assets.rb +2 -0
  40. data/test/dummy/config/initializers/backtrace_silencers.rb +6 -2
  41. data/test/dummy/config/initializers/cookies_serializer.rb +2 -0
  42. data/test/dummy/config/initializers/filter_parameter_logging.rb +2 -0
  43. data/test/dummy/config/initializers/inflections.rb +2 -0
  44. data/test/dummy/config/initializers/mime_types.rb +2 -0
  45. data/test/dummy/config/initializers/session_store.rb +2 -0
  46. data/test/dummy/config/initializers/to_time_preserves_timezone.rb +2 -0
  47. data/test/dummy/config/initializers/wrap_parameters.rb +2 -0
  48. data/test/dummy/config/routes.rb +3 -2
  49. data/test/integration/navigation_test.rb +2 -0
  50. data/test/test_helper.rb +4 -3
  51. metadata +55 -31
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db4987f0c4c725a35cddd376afbd950d541491d700e3f392bb4def95d2885a24
4
- data.tar.gz: f5d84a48f26780e4a9c84c623a150ccd1528afdd0c3b39c23580bbe22dbe4896
3
+ metadata.gz: e3050cda5e754315e7b80518851f3ce91aa167e8ca8359084203ed61b20c7aa7
4
+ data.tar.gz: 80525648edf9ba10f99f7ab2bd684a0916ce91ea20892550b8bb6f19d092b7d7
5
5
  SHA512:
6
- metadata.gz: 9ec0aa19303a62e0f260da849f55524d14f539b522b4944f28fb399698a60bc6cb4f2f3c10aaf5782c76ba1d5c5102d1350a6aaf7fd050a2a3969d3ced028ba9
7
- data.tar.gz: f098aeb43bce8afe3a9330667ef0e34805852737857bf54f066c4ad976b48b2b4ed05d8a36d3c560f41640596889a30604e239a0c30e8a4d9bb8b0a165e9b589
6
+ metadata.gz: 1b2036fa6ebc24937ac23fda40a8aacbf0a89c3f233d17ddf82c15ecdf1c26bbe36b39b04eeb2f4fa8bfcaa0170477a46338bb74f8ba356d84c591e59944c17c
7
+ data.tar.gz: 919372af358638d177b31f61dfe65b41a7d7cb81cb50d235b818e63c8de306d102fc0508143dfa6b4da6fffa23ad02d1889f4e364ffc3f5ddb56082552a629a1
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  begin
2
4
  require 'bundler/setup'
3
5
  rescue LoadError
@@ -14,14 +16,11 @@ RDoc::Task.new(:rdoc) do |rdoc|
14
16
  rdoc.rdoc_files.include('lib/**/*.rb')
15
17
  end
16
18
 
17
- APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
19
+ APP_RAKEFILE = File.expand_path('test/dummy/Rakefile', __dir__)
18
20
  load 'rails/tasks/engine.rake'
19
21
 
20
-
21
22
  load 'rails/tasks/statistics.rake'
22
23
 
23
-
24
-
25
24
  Bundler::GemHelper.install_tasks
26
25
 
27
26
  require 'rake/testtask'
@@ -33,5 +32,4 @@ Rake::TestTask.new(:test) do |t|
33
32
  t.verbose = false
34
33
  end
35
34
 
36
-
37
35
  task default: :test
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_dependency 'aranha/application_controller'
3
4
 
4
5
  module Aranha
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Aranha
2
4
  class ApplicationController < ActionController::Base
3
5
  protect_from_forgery with: :exception
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Aranha
2
4
  module ApplicationHelper
3
5
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Aranha
3
4
  class Address < ActiveRecord::Base
4
5
  include ::Eac::InequalityQueries
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Aranha::Engine.routes.draw do
2
4
  resources(:addresses) { as_routes }
3
5
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class CreateAranhaAddresses < ActiveRecord::Migration
3
4
  def change
4
5
  create_table :aranha_addresses do |t|
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require 'httpclient'
3
4
  require 'active_support/dependencies'
4
5
  require_dependency 'aranha/engine'
@@ -7,4 +8,9 @@ require_dependency 'active_scaffold'
7
8
  module Aranha
8
9
  end
9
10
 
11
+ require_dependency 'aranha/default_processor'
10
12
  require_dependency 'aranha/processor'
13
+ require_dependency 'aranha/parsers/base'
14
+ require_dependency 'aranha/parsers/html/base'
15
+ require_dependency 'aranha/parsers/html/item_list'
16
+ require_dependency 'aranha/dom_elements_traverser'
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DefaultProcessor
5
+ attr_reader :source_uri
6
+
7
+ def initialize(source_uri)
8
+ unless source_uri.is_a?(Addressable::URI)
9
+ source_uri = source_uri.to_s.gsub(%r{\A/}, 'file:///')
10
+ end
11
+ @source_uri = Addressable::URI.parse(source_uri)
12
+ end
13
+
14
+ def process
15
+ raise 'Implement method process'
16
+ end
17
+
18
+ protected
19
+
20
+ def target_uri
21
+ source_uri
22
+ end
23
+
24
+ def data
25
+ @data ||= parser_class.new(target_uri).data
26
+ end
27
+
28
+ def parser_class
29
+ r = self.class.name.gsub('::Processors::', '::Parsers::').constantize
30
+ return r unless is_a?(r)
31
+
32
+ raise "Parser can be not the process class: #{r}"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_dependency 'aranha/dom_elements_traverser/conditions'
4
+ require_dependency 'aranha/dom_elements_traverser/data'
5
+ require_dependency 'aranha/dom_elements_traverser/cursor'
6
+
7
+ module Aranha
8
+ class DomElementsTraverser
9
+ include ::Aranha::DomElementsTraverser::Conditions
10
+ include ::Aranha::DomElementsTraverser::Cursor
11
+ include ::Aranha::DomElementsTraverser::Data
12
+
13
+ class << self
14
+ def traverse(options, &block)
15
+ new(elements_from_options(options), &block)
16
+ end
17
+
18
+ def empty
19
+ new([])
20
+ end
21
+
22
+ private
23
+
24
+ def elements_from_options(options)
25
+ options = ::EacRubyUtils::OptionsConsumer.new(options)
26
+ elements = nil
27
+ options.consume(:children_of) { |v| elements = v.children.to_a }
28
+ raise 'None option of [:children_of] defined' unless elements
29
+
30
+ options.validate
31
+ elements
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def initialize(elements, &block)
38
+ @elements = elements
39
+ @index = 0
40
+ @data = {}
41
+ instance_eval(&block) if block
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DomElementsTraverser
5
+ module Conditions
6
+ private
7
+
8
+ def match_conditions?(conditions)
9
+ raise "No element (Conditions: #{conditions})" unless current
10
+
11
+ conditions.all? { |key, value| match_condition?(key, value) }
12
+ end
13
+
14
+ def match_condition?(key, value)
15
+ case key.to_sym
16
+ when :text then match_text_condition?(value)
17
+ when :name then match_name_condition?(value)
18
+ else raise "Unknown key condition: (#{key})"
19
+ end
20
+ end
21
+
22
+ def match_name_condition?(tag_name)
23
+ current.name.casecmp(tag_name.to_s).zero?
24
+ end
25
+
26
+ def match_text_condition?(texts)
27
+ texts = [texts.to_s] unless texts.is_a?(Array)
28
+ texts.all? { |t| current.text.downcase.include?(t.downcase) }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DomElementsTraverser
5
+ module Cursor
6
+ private
7
+
8
+ def current
9
+ @elements[@index]
10
+ end
11
+
12
+ def skip
13
+ @index += 1
14
+ end
15
+
16
+ def skip_until(options)
17
+ oc = ::EacRubyUtils::OptionsConsumer.new(options)
18
+ optional = oc.consume(:optional, false)
19
+ while current
20
+ break if match_conditions?(oc.left_data)
21
+
22
+ skip
23
+ end
24
+ raise "No element found for conditions #{oc.left_data}" unless current || optional
25
+
26
+ current
27
+ end
28
+
29
+ def skip_until_after(conditions)
30
+ skip_until(conditions)
31
+ skip
32
+ current
33
+ end
34
+
35
+ def if_found(conditions, &block)
36
+ marked = @index
37
+ skip_until({ optional: true }.merge(conditions))
38
+ if current
39
+ instance_eval(&block) if block
40
+ else
41
+ @index = marked
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DomElementsTraverser
5
+ module Data
6
+ def data
7
+ @data.dup
8
+ end
9
+
10
+ private
11
+
12
+ def store(key, options = {}, &converter)
13
+ validate(options)
14
+ value = store_value(options, converter)
15
+ @data[key] = value
16
+ r = current
17
+ skip
18
+ r
19
+ end
20
+
21
+ def store_value(options, converter)
22
+ value = if options.key?(:attribute)
23
+ current.attribute(options[:attribute]).value
24
+ else
25
+ current.text.strip
26
+ end
27
+ converter ? converter.call(value) : value
28
+ end
29
+
30
+ def validate(options)
31
+ return unless options.key?(:validate)
32
+ return if match_conditions?(options[:validate])
33
+
34
+ raise "Element does not match conditions #{options[:validate]}" \
35
+ " (Element: |#{current}|#{current.name}|)"
36
+ end
37
+ end
38
+ end
39
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Aranha
2
4
  class Engine < ::Rails::Engine
3
5
  isolate_namespace Aranha
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open-uri'
4
+ require 'fileutils'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ class Base
9
+ def initialize(url)
10
+ @url = url
11
+ end
12
+
13
+ def url
14
+ @url.gsub(%r{/+$}, '')
15
+ end
16
+
17
+ def content
18
+ s = content_by_url_type
19
+ log_content(s)
20
+ s
21
+ end
22
+
23
+ private
24
+
25
+ def content_by_url_type
26
+ if @url.is_a?(Hash)
27
+ content_hash
28
+ elsif /^http/ =~ @url
29
+ content_get
30
+ else
31
+ content_file
32
+ end
33
+ end
34
+
35
+ def content_file
36
+ ::File.open(@url.gsub(%r{\Afile://}, ''), &:read)
37
+ end
38
+
39
+ def content_get
40
+ content_get_fetch(@url)
41
+ end
42
+
43
+ def content_get_fetch(uri, limit = 10)
44
+ raise 'too many HTTP redirects' if limit.zero?
45
+
46
+ response = Net::HTTP.get_response(URI(uri))
47
+
48
+ case response
49
+ when Net::HTTPSuccess then
50
+ response.body
51
+ when Net::HTTPRedirection then
52
+ content_get_fetch(response['location'], limit - 1)
53
+ else
54
+ response.value
55
+ end
56
+ end
57
+
58
+ def content_hash
59
+ return content_post if @url[:method] == :post
60
+
61
+ raise "Unknown URL format: #{@url}"
62
+ end
63
+
64
+ def content_post
65
+ HTTPClient.new.post_content(@url[:url], @url[:params].merge(follow_redirect: true))
66
+ end
67
+
68
+ def log_content(content)
69
+ File.open(log_file, 'wb') { |file| file.write(content) }
70
+ end
71
+
72
+ def log_file
73
+ f = Rails.root.join('log', 'parsers', "#{self.class.name.parameterize}.log")
74
+ FileUtils.mkdir_p(File.dirname(f))
75
+ f
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_dependency 'aranha/parsers/base'
4
+ require_dependency 'aranha/parsers/html/node/default'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ module Html
9
+ class Base < ::Aranha::Parsers::Base
10
+ def nokogiri
11
+ @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
12
+ end
13
+
14
+ protected
15
+
16
+ def node_parser_class
17
+ ::Aranha::Parsers::Html::Node::Default
18
+ end
19
+
20
+ private
21
+
22
+ def node_parser
23
+ @node_parser ||= node_parser_class.new(fields)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ module Html
6
+ class ItemList < Base
7
+ def data
8
+ count = 0
9
+ @data ||= nokogiri.xpath(items_xpath).map do |m|
10
+ count += 1
11
+ node_parser.parse(m)
12
+ end
13
+ rescue StandardError => e
14
+ e.message << " / Count: #{count}"
15
+ raise e
16
+ end
17
+
18
+ def items_xpath
19
+ raise "Class #{self.class} has no method \"item_xpath\". Implement it"
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ module Html
6
+ module Node
7
+ class Base
8
+ attr_reader :fields
9
+
10
+ def initialize(fields)
11
+ @fields = fields
12
+ end
13
+
14
+ def parse(node)
15
+ Hash[fields.map { |f| [f[0], parse_field(node, f[2], f[1])] }]
16
+ end
17
+
18
+ private
19
+
20
+ def parse_field(node, xpath, parser_method)
21
+ value_method = "#{parser_method}_value"
22
+ return send(value_method, node, xpath) if respond_to?(value_method)
23
+
24
+ raise "Method \"#{value_method}\" not found in #{self.class}"
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_dependency 'aranha/parsers/html/node/base'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ module Node
9
+ class Default < ::Aranha::Parsers::Html::Node::Base
10
+ def string_value(node, xpath)
11
+ if node.at_xpath(xpath)
12
+ node.at_xpath(xpath).text.to_s.tr("\u00A0", ' ').strip
13
+ else
14
+ ''
15
+ end
16
+ end
17
+
18
+ def quoted_value(node, xpath)
19
+ s = string_value(node, xpath)
20
+ return '' unless s
21
+
22
+ m = /\"([^\"]+)\"/.match(s)
23
+ return m[1] if m
24
+
25
+ ''
26
+ end
27
+
28
+ def integer_value(node, xpath)
29
+ r = string_value(node, xpath)
30
+ return nil if r.blank?
31
+
32
+ m = /\d+/.match(r)
33
+ raise "Integer not found in \"#{r}\"" unless m
34
+
35
+ m[0].to_i
36
+ end
37
+
38
+ def integer_optional_value(node, xpath)
39
+ r = string_value(node, xpath)
40
+ m = /\d+/.match(r)
41
+ m ? m[0].to_i : nil
42
+ end
43
+
44
+ def float_value(node, xpath)
45
+ parse_float(node, xpath, true)
46
+ end
47
+
48
+ def float_optional_value(node, xpath)
49
+ parse_float(node, xpath, false)
50
+ end
51
+
52
+ def array_value(node, xpath)
53
+ r = node.xpath(xpath).map { |n| n.text.strip }
54
+ r.join('|')
55
+ end
56
+
57
+ def join_value(node, xpath)
58
+ m = ''
59
+ node.xpath(xpath).each do |n|
60
+ m << n.text.strip
61
+ end
62
+ m
63
+ end
64
+
65
+ def duration_value(node, xpath)
66
+ m = /(\d+) m/.match(join_value(node, xpath))
67
+ m ? m[1].to_i : nil
68
+ end
69
+
70
+ def regxep(node, xpath, pattern)
71
+ s = string_value(node, xpath)
72
+ m = pattern.match(s)
73
+ return m if m
74
+
75
+ raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
76
+ end
77
+
78
+ private
79
+
80
+ def parse_float(node, xpath, required)
81
+ s = string_value(node, xpath)
82
+ m = /\d+(?:[\.\,](\d+))?/.match(s)
83
+ if m
84
+ m[0].sub(',', '.').to_f
85
+ elsif required
86
+ raise "Float value not found in \"#{s}\""
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require 'net/http'
3
4
 
4
5
  module Aranha
5
6
  class Processor
6
7
  NETWORK_EXCEPTIONS = [::HTTPClient::BadResponseError, Errno::ECONNRESET,
7
- ::Net::HTTPFatalError].freeze
8
+ ::Net::HTTPFatalError, ::HTTPClient::ReceiveTimeoutError].freeze
8
9
  DEFAULT_MAX_TRIES = 3
9
10
 
10
11
  def initialize
@@ -32,28 +33,29 @@ module Aranha
32
33
  false
33
34
  elsif @failed.any?
34
35
  @try += 1
35
- max_tries > 0 && @try >= max_tries
36
+ max_tries.positive? && @try >= max_tries
36
37
  else
37
38
  true
38
39
  end
39
40
  end
40
41
 
41
- def process_address(a)
42
- Rails.logger.info("Processing #{a} (Try: #{@try}/#{max_tries_s}," \
42
+ def process_address(address)
43
+ Rails.logger.info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
43
44
  " Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
44
45
  begin
45
- a.process
46
- @failed.delete(a.id)
46
+ address.process
47
+ @failed.delete(address.id)
47
48
  rescue StandardError => ex
48
- process_exception(a, ex)
49
+ process_exception(address, ex)
49
50
  end
50
51
  end
51
52
 
52
- def process_exception(a, ex)
53
- raise ex unless network_exception?(ex)
54
- @failed[a.id] ||= 0
55
- @failed[a.id] += 1
56
- Rails.logger.warn(ex)
53
+ def process_exception(address, exception)
54
+ raise exception unless network_exception?(exception)
55
+
56
+ @failed[address.id] ||= 0
57
+ @failed[address.id] += 1
58
+ Rails.logger.warn(exception)
57
59
  end
58
60
 
59
61
  def next_address
@@ -64,8 +66,8 @@ module Aranha
64
66
  ::Aranha::Address.unprocessed
65
67
  end
66
68
 
67
- def network_exception?(ex)
68
- NETWORK_EXCEPTIONS.any? { |klass| ex.is_a?(klass) }
69
+ def network_exception?(exception)
70
+ NETWORK_EXCEPTIONS.any? { |klass| exception.is_a?(klass) }
69
71
  end
70
72
 
71
73
  def not_try_ids
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Aranha
3
- VERSION = '0.0.4'
4
+ VERSION = '0.1.0'
4
5
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  namespace(:aranha) do
3
4
  task process: :environment do
4
5
  ::Aranha::Processor.new
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'test_helper'
2
4
 
3
5
  class AranhaTest < ActiveSupport::TestCase
4
- test "truth" do
6
+ test 'truth' do
5
7
  assert_kind_of Module, Aranha
6
8
  end
7
9
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Add your own tasks in files placed in lib/tasks ending in .rake,
2
4
  # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
5
 
4
- require File.expand_path('../config/application', __FILE__)
6
+ require File.expand_path('config/application', __dir__)
5
7
 
6
8
  Rails.application.load_tasks
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class ApplicationController < ActionController::Base
2
4
  # Prevent CSRF attacks by raising an exception.
3
5
  # For APIs, you may want to use :null_session instead.
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ApplicationHelper
2
4
  end
@@ -1,3 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
2
+ # frozen_string_literal: true
3
+
4
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
3
5
  load Gem.bin_path('bundler', 'bundle')
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
- APP_PATH = File.expand_path('../../config/application', __FILE__)
2
+ # frozen_string_literal: true
3
+
4
+ APP_PATH = File.expand_path('../config/application', __dir__)
3
5
  require_relative '../config/boot'
4
6
  require 'rails/commands'
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require_relative '../config/boot'
3
5
  require 'rake'
4
6
  Rake.application.run
@@ -1,16 +1,18 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'pathname'
3
5
 
4
6
  # path to your application root.
5
- APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
7
+ APP_ROOT = Pathname.new File.expand_path('..', __dir__)
6
8
 
7
9
  Dir.chdir APP_ROOT do
8
10
  # This script is a starting point to setup your application.
9
11
  # Add necessary setup steps to this file:
10
12
 
11
- puts "== Installing dependencies =="
12
- system "gem install bundler --conservative"
13
- system "bundle check || bundle install"
13
+ puts '== Installing dependencies =='
14
+ system 'gem install bundler --conservative'
15
+ system 'bundle check || bundle install'
14
16
 
15
17
  # puts "\n== Copying sample files =="
16
18
  # unless File.exist?("config/database.yml")
@@ -18,12 +20,12 @@ Dir.chdir APP_ROOT do
18
20
  # end
19
21
 
20
22
  puts "\n== Preparing database =="
21
- system "bin/rake db:setup"
23
+ system 'bin/rake db:setup'
22
24
 
23
25
  puts "\n== Removing old logs and tempfiles =="
24
- system "rm -f log/*"
25
- system "rm -rf tmp/cache"
26
+ system 'rm -f log/*'
27
+ system 'rm -rf tmp/cache'
26
28
 
27
29
  puts "\n== Restarting application server =="
28
- system "touch tmp/restart.txt"
30
+ system 'touch tmp/restart.txt'
29
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # This file is used by Rack-based servers to start the application.
2
4
 
3
5
  require ::File.expand_path('../config/environment', __FILE__)
@@ -1,9 +1,11 @@
1
- require File.expand_path('../boot', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('boot', __dir__)
2
4
 
3
5
  require 'rails/all'
4
6
 
5
7
  Bundler.require(*Rails.groups)
6
- require "aranha"
8
+ require 'aranha'
7
9
 
8
10
  module Dummy
9
11
  class Application < Rails::Application
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Set up gems listed in the Gemfile.
2
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../../../Gemfile', __FILE__)
4
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../../Gemfile', __dir__)
3
5
 
4
6
  require 'bundler/setup' if File.exist?(ENV['BUNDLE_GEMFILE'])
5
- $LOAD_PATH.unshift File.expand_path('../../../../lib', __FILE__)
7
+ $LOAD_PATH.unshift File.expand_path('../../../lib', __dir__)
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Load the Rails application.
2
- require File.expand_path('../application', __FILE__)
4
+ require File.expand_path('application', __dir__)
3
5
 
4
6
  # Initialize the Rails application.
5
7
  Rails.application.initialize!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Rails.application.configure do
2
4
  # Settings specified here will take precedence over those in config/application.rb.
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Rails.application.configure do
2
4
  # Settings specified here will take precedence over those in config/application.rb.
3
5
 
@@ -35,7 +37,8 @@ Rails.application.configure do
35
37
  # yet still be able to expire them through the digest params.
36
38
  config.assets.digest = true
37
39
 
38
- # `config.assets.precompile` and `config.assets.version` have moved to config/initializers/assets.rb
40
+ # `config.assets.precompile` and `config.assets.version` have moved to
41
+ # config/initializers/assets.rb
39
42
 
40
43
  # Specifies the header that your server uses for sending files.
41
44
  # config.action_dispatch.x_sendfile_header = 'X-Sendfile' # for Apache
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Rails.application.configure do
2
4
  # Settings specified here will take precedence over those in config/application.rb.
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Version of your assets, change this if you want to expire all your assets.
@@ -1,7 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
- # You can add backtrace silencers for libraries that you're using but don't wish to see in your backtraces.
5
+ # You can add backtrace silencers for libraries that you're using but don't wish to see
6
+ # in your backtraces.
4
7
  # Rails.backtrace_cleaner.add_silencer { |line| line =~ /my_noisy_library/ }
5
8
 
6
- # You can also remove all the silencers if you're trying to debug a problem that might stem from framework code.
9
+ # You can also remove all the silencers if you're trying to debug a problem that might
10
+ # stem from framework code.
7
11
  # Rails.backtrace_cleaner.remove_silencers!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  Rails.application.config.action_dispatch.cookies_serializer = :json
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Configure sensitive parameters which will be filtered from the log file.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Add new inflection rules using the following format. Inflections
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Add new mime types for use in respond_to blocks:
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  Rails.application.config.session_store :cookie_store, key: '_dummy_session'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Preserve the timezone of the receiver when calling to `to_time`.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # This file contains settings for ActionController::ParamsWrapper which
@@ -1,4 +1,5 @@
1
- Rails.application.routes.draw do
1
+ # frozen_string_literal: true
2
2
 
3
- mount Aranha::Engine => "/aranha"
3
+ Rails.application.routes.draw do
4
+ mount Aranha::Engine => '/aranha'
4
5
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'test_helper'
2
4
 
3
5
  class NavigationTest < ActionDispatch::IntegrationTest
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  # Configure Rails Environment
3
4
  ENV['RAILS_ENV'] = 'test'
4
5
 
5
- require File.expand_path('../../test/dummy/config/environment.rb', __FILE__)
6
+ require File.expand_path('../test/dummy/config/environment.rb', __dir__)
6
7
  ActiveRecord::Migrator.migrations_paths = [
7
- File.expand_path('../../test/dummy/db/migrate', __FILE__)
8
+ File.expand_path('../test/dummy/db/migrate', __dir__)
8
9
  ]
9
10
  require 'rails/test_help'
10
11
 
@@ -17,6 +18,6 @@ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
17
18
 
18
19
  # Load fixtures from the engine
19
20
  if ActiveSupport::TestCase.respond_to?(:fixture_path=)
20
- ActiveSupport::TestCase.fixture_path = File.expand_path('../fixtures', __FILE__)
21
+ ActiveSupport::TestCase.fixture_path = File.expand_path('fixtures', __dir__)
21
22
  ActiveSupport::TestCase.fixtures :all
22
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-10 00:00:00.000000000 Z
11
+ date: 2018-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_scaffold
@@ -25,19 +25,19 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 3.4.41.1
27
27
  - !ruby/object:Gem::Dependency
28
- name: rails
28
+ name: eac_ruby_utils
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 4.2.10
33
+ version: '0.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 4.2.10
40
+ version: '0.3'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: httpclient
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rails
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 4.2.10
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 4.2.10
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: sqlite3
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -86,7 +100,17 @@ files:
86
100
  - config/routes.rb
87
101
  - db/migrate/20171201021251_create_aranha_addresses.rb
88
102
  - lib/aranha.rb
103
+ - lib/aranha/default_processor.rb
104
+ - lib/aranha/dom_elements_traverser.rb
105
+ - lib/aranha/dom_elements_traverser/conditions.rb
106
+ - lib/aranha/dom_elements_traverser/cursor.rb
107
+ - lib/aranha/dom_elements_traverser/data.rb
89
108
  - lib/aranha/engine.rb
109
+ - lib/aranha/parsers/base.rb
110
+ - lib/aranha/parsers/html/base.rb
111
+ - lib/aranha/parsers/html/item_list.rb
112
+ - lib/aranha/parsers/html/node/base.rb
113
+ - lib/aranha/parsers/html/node/default.rb
90
114
  - lib/aranha/processor.rb
91
115
  - lib/aranha/version.rb
92
116
  - lib/tasks/aranha_tasks.rake
@@ -156,40 +180,40 @@ summary: Rails utilities for web crawling.
156
180
  test_files:
157
181
  - test/dummy/Rakefile
158
182
  - test/dummy/README.rdoc
159
- - test/dummy/bin/rails
160
- - test/dummy/bin/rake
161
- - test/dummy/bin/bundle
162
- - test/dummy/bin/setup
163
- - test/dummy/config/initializers/assets.rb
164
- - test/dummy/config/initializers/to_time_preserves_timezone.rb
165
- - test/dummy/config/initializers/wrap_parameters.rb
166
- - test/dummy/config/initializers/session_store.rb
167
- - test/dummy/config/initializers/cookies_serializer.rb
168
- - test/dummy/config/initializers/inflections.rb
169
- - test/dummy/config/initializers/mime_types.rb
170
- - test/dummy/config/initializers/backtrace_silencers.rb
171
- - test/dummy/config/initializers/filter_parameter_logging.rb
183
+ - test/dummy/config.ru
172
184
  - test/dummy/config/boot.rb
173
- - test/dummy/config/locales/en.yml
174
- - test/dummy/config/secrets.yml
175
- - test/dummy/config/environment.rb
176
185
  - test/dummy/config/database.yml
177
- - test/dummy/config/routes.rb
186
+ - test/dummy/config/secrets.yml
187
+ - test/dummy/config/locales/en.yml
188
+ - test/dummy/config/application.rb
178
189
  - test/dummy/config/environments/development.rb
179
190
  - test/dummy/config/environments/test.rb
180
191
  - test/dummy/config/environments/production.rb
181
- - test/dummy/config/application.rb
182
- - test/dummy/app/assets/stylesheets/application.css
183
- - test/dummy/app/assets/javascripts/application.js
192
+ - test/dummy/config/environment.rb
193
+ - test/dummy/config/routes.rb
194
+ - test/dummy/config/initializers/assets.rb
195
+ - test/dummy/config/initializers/cookies_serializer.rb
196
+ - test/dummy/config/initializers/inflections.rb
197
+ - test/dummy/config/initializers/session_store.rb
198
+ - test/dummy/config/initializers/wrap_parameters.rb
199
+ - test/dummy/config/initializers/to_time_preserves_timezone.rb
200
+ - test/dummy/config/initializers/filter_parameter_logging.rb
201
+ - test/dummy/config/initializers/backtrace_silencers.rb
202
+ - test/dummy/config/initializers/mime_types.rb
203
+ - test/dummy/db/schema.rb
184
204
  - test/dummy/app/views/layouts/application.html.erb
185
- - test/dummy/app/helpers/application_helper.rb
186
205
  - test/dummy/app/controllers/application_controller.rb
187
- - test/dummy/db/schema.rb
188
- - test/dummy/public/favicon.ico
189
- - test/dummy/public/500.html
206
+ - test/dummy/app/helpers/application_helper.rb
207
+ - test/dummy/app/assets/stylesheets/application.css
208
+ - test/dummy/app/assets/javascripts/application.js
190
209
  - test/dummy/public/422.html
191
210
  - test/dummy/public/404.html
192
- - test/dummy/config.ru
193
- - test/test_helper.rb
211
+ - test/dummy/public/favicon.ico
212
+ - test/dummy/public/500.html
213
+ - test/dummy/bin/bundle
214
+ - test/dummy/bin/setup
215
+ - test/dummy/bin/rails
216
+ - test/dummy/bin/rake
194
217
  - test/aranha_test.rb
218
+ - test/test_helper.rb
195
219
  - test/integration/navigation_test.rb