aranha 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -5
  3. data/app/controllers/aranha/addresses_controller.rb +1 -0
  4. data/app/controllers/aranha/application_controller.rb +2 -0
  5. data/app/helpers/aranha/application_helper.rb +2 -0
  6. data/app/models/aranha/address.rb +1 -0
  7. data/config/routes.rb +2 -0
  8. data/db/migrate/20171201021251_create_aranha_addresses.rb +1 -0
  9. data/lib/aranha.rb +6 -0
  10. data/lib/aranha/default_processor.rb +35 -0
  11. data/lib/aranha/dom_elements_traverser.rb +44 -0
  12. data/lib/aranha/dom_elements_traverser/conditions.rb +32 -0
  13. data/lib/aranha/dom_elements_traverser/cursor.rb +46 -0
  14. data/lib/aranha/dom_elements_traverser/data.rb +39 -0
  15. data/lib/aranha/engine.rb +2 -0
  16. data/lib/aranha/parsers/base.rb +79 -0
  17. data/lib/aranha/parsers/html/base.rb +28 -0
  18. data/lib/aranha/parsers/html/item_list.rb +24 -0
  19. data/lib/aranha/parsers/html/node/base.rb +30 -0
  20. data/lib/aranha/parsers/html/node/default.rb +93 -0
  21. data/lib/aranha/processor.rb +16 -14
  22. data/lib/aranha/version.rb +2 -1
  23. data/lib/tasks/aranha_tasks.rake +1 -0
  24. data/test/aranha_test.rb +3 -1
  25. data/test/dummy/Rakefile +3 -1
  26. data/test/dummy/app/controllers/application_controller.rb +2 -0
  27. data/test/dummy/app/helpers/application_helper.rb +2 -0
  28. data/test/dummy/bin/bundle +3 -1
  29. data/test/dummy/bin/rails +3 -1
  30. data/test/dummy/bin/rake +2 -0
  31. data/test/dummy/bin/setup +10 -8
  32. data/test/dummy/config.ru +2 -0
  33. data/test/dummy/config/application.rb +4 -2
  34. data/test/dummy/config/boot.rb +4 -2
  35. data/test/dummy/config/environment.rb +3 -1
  36. data/test/dummy/config/environments/development.rb +2 -0
  37. data/test/dummy/config/environments/production.rb +4 -1
  38. data/test/dummy/config/environments/test.rb +2 -0
  39. data/test/dummy/config/initializers/assets.rb +2 -0
  40. data/test/dummy/config/initializers/backtrace_silencers.rb +6 -2
  41. data/test/dummy/config/initializers/cookies_serializer.rb +2 -0
  42. data/test/dummy/config/initializers/filter_parameter_logging.rb +2 -0
  43. data/test/dummy/config/initializers/inflections.rb +2 -0
  44. data/test/dummy/config/initializers/mime_types.rb +2 -0
  45. data/test/dummy/config/initializers/session_store.rb +2 -0
  46. data/test/dummy/config/initializers/to_time_preserves_timezone.rb +2 -0
  47. data/test/dummy/config/initializers/wrap_parameters.rb +2 -0
  48. data/test/dummy/config/routes.rb +3 -2
  49. data/test/integration/navigation_test.rb +2 -0
  50. data/test/test_helper.rb +4 -3
  51. metadata +55 -31
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db4987f0c4c725a35cddd376afbd950d541491d700e3f392bb4def95d2885a24
4
- data.tar.gz: f5d84a48f26780e4a9c84c623a150ccd1528afdd0c3b39c23580bbe22dbe4896
3
+ metadata.gz: e3050cda5e754315e7b80518851f3ce91aa167e8ca8359084203ed61b20c7aa7
4
+ data.tar.gz: 80525648edf9ba10f99f7ab2bd684a0916ce91ea20892550b8bb6f19d092b7d7
5
5
  SHA512:
6
- metadata.gz: 9ec0aa19303a62e0f260da849f55524d14f539b522b4944f28fb399698a60bc6cb4f2f3c10aaf5782c76ba1d5c5102d1350a6aaf7fd050a2a3969d3ced028ba9
7
- data.tar.gz: f098aeb43bce8afe3a9330667ef0e34805852737857bf54f066c4ad976b48b2b4ed05d8a36d3c560f41640596889a30604e239a0c30e8a4d9bb8b0a165e9b589
6
+ metadata.gz: 1b2036fa6ebc24937ac23fda40a8aacbf0a89c3f233d17ddf82c15ecdf1c26bbe36b39b04eeb2f4fa8bfcaa0170477a46338bb74f8ba356d84c591e59944c17c
7
+ data.tar.gz: 919372af358638d177b31f61dfe65b41a7d7cb81cb50d235b818e63c8de306d102fc0508143dfa6b4da6fffa23ad02d1889f4e364ffc3f5ddb56082552a629a1
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  begin
2
4
  require 'bundler/setup'
3
5
  rescue LoadError
@@ -14,14 +16,11 @@ RDoc::Task.new(:rdoc) do |rdoc|
14
16
  rdoc.rdoc_files.include('lib/**/*.rb')
15
17
  end
16
18
 
17
- APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
19
+ APP_RAKEFILE = File.expand_path('test/dummy/Rakefile', __dir__)
18
20
  load 'rails/tasks/engine.rake'
19
21
 
20
-
21
22
  load 'rails/tasks/statistics.rake'
22
23
 
23
-
24
-
25
24
  Bundler::GemHelper.install_tasks
26
25
 
27
26
  require 'rake/testtask'
@@ -33,5 +32,4 @@ Rake::TestTask.new(:test) do |t|
33
32
  t.verbose = false
34
33
  end
35
34
 
36
-
37
35
  task default: :test
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_dependency 'aranha/application_controller'
3
4
 
4
5
  module Aranha
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Aranha
2
4
  class ApplicationController < ActionController::Base
3
5
  protect_from_forgery with: :exception
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Aranha
2
4
  module ApplicationHelper
3
5
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Aranha
3
4
  class Address < ActiveRecord::Base
4
5
  include ::Eac::InequalityQueries
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Aranha::Engine.routes.draw do
2
4
  resources(:addresses) { as_routes }
3
5
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class CreateAranhaAddresses < ActiveRecord::Migration
3
4
  def change
4
5
  create_table :aranha_addresses do |t|
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require 'httpclient'
3
4
  require 'active_support/dependencies'
4
5
  require_dependency 'aranha/engine'
@@ -7,4 +8,9 @@ require_dependency 'active_scaffold'
7
8
  module Aranha
8
9
  end
9
10
 
11
+ require_dependency 'aranha/default_processor'
10
12
  require_dependency 'aranha/processor'
13
+ require_dependency 'aranha/parsers/base'
14
+ require_dependency 'aranha/parsers/html/base'
15
+ require_dependency 'aranha/parsers/html/item_list'
16
+ require_dependency 'aranha/dom_elements_traverser'
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DefaultProcessor
5
+ attr_reader :source_uri
6
+
7
+ def initialize(source_uri)
8
+ unless source_uri.is_a?(Addressable::URI)
9
+ source_uri = source_uri.to_s.gsub(%r{\A/}, 'file:///')
10
+ end
11
+ @source_uri = Addressable::URI.parse(source_uri)
12
+ end
13
+
14
+ def process
15
+ raise 'Implement method process'
16
+ end
17
+
18
+ protected
19
+
20
+ def target_uri
21
+ source_uri
22
+ end
23
+
24
+ def data
25
+ @data ||= parser_class.new(target_uri).data
26
+ end
27
+
28
+ def parser_class
29
+ r = self.class.name.gsub('::Processors::', '::Parsers::').constantize
30
+ return r unless is_a?(r)
31
+
32
+ raise "Parser can be not the process class: #{r}"
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_dependency 'aranha/dom_elements_traverser/conditions'
4
+ require_dependency 'aranha/dom_elements_traverser/data'
5
+ require_dependency 'aranha/dom_elements_traverser/cursor'
6
+
7
+ module Aranha
8
+ class DomElementsTraverser
9
+ include ::Aranha::DomElementsTraverser::Conditions
10
+ include ::Aranha::DomElementsTraverser::Cursor
11
+ include ::Aranha::DomElementsTraverser::Data
12
+
13
+ class << self
14
+ def traverse(options, &block)
15
+ new(elements_from_options(options), &block)
16
+ end
17
+
18
+ def empty
19
+ new([])
20
+ end
21
+
22
+ private
23
+
24
+ def elements_from_options(options)
25
+ options = ::EacRubyUtils::OptionsConsumer.new(options)
26
+ elements = nil
27
+ options.consume(:children_of) { |v| elements = v.children.to_a }
28
+ raise 'None option of [:children_of] defined' unless elements
29
+
30
+ options.validate
31
+ elements
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def initialize(elements, &block)
38
+ @elements = elements
39
+ @index = 0
40
+ @data = {}
41
+ instance_eval(&block) if block
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DomElementsTraverser
5
+ module Conditions
6
+ private
7
+
8
+ def match_conditions?(conditions)
9
+ raise "No element (Conditions: #{conditions})" unless current
10
+
11
+ conditions.all? { |key, value| match_condition?(key, value) }
12
+ end
13
+
14
+ def match_condition?(key, value)
15
+ case key.to_sym
16
+ when :text then match_text_condition?(value)
17
+ when :name then match_name_condition?(value)
18
+ else raise "Unknown key condition: (#{key})"
19
+ end
20
+ end
21
+
22
+ def match_name_condition?(tag_name)
23
+ current.name.casecmp(tag_name.to_s).zero?
24
+ end
25
+
26
+ def match_text_condition?(texts)
27
+ texts = [texts.to_s] unless texts.is_a?(Array)
28
+ texts.all? { |t| current.text.downcase.include?(t.downcase) }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DomElementsTraverser
5
+ module Cursor
6
+ private
7
+
8
+ def current
9
+ @elements[@index]
10
+ end
11
+
12
+ def skip
13
+ @index += 1
14
+ end
15
+
16
+ def skip_until(options)
17
+ oc = ::EacRubyUtils::OptionsConsumer.new(options)
18
+ optional = oc.consume(:optional, false)
19
+ while current
20
+ break if match_conditions?(oc.left_data)
21
+
22
+ skip
23
+ end
24
+ raise "No element found for conditions #{oc.left_data}" unless current || optional
25
+
26
+ current
27
+ end
28
+
29
+ def skip_until_after(conditions)
30
+ skip_until(conditions)
31
+ skip
32
+ current
33
+ end
34
+
35
+ def if_found(conditions, &block)
36
+ marked = @index
37
+ skip_until({ optional: true }.merge(conditions))
38
+ if current
39
+ instance_eval(&block) if block
40
+ else
41
+ @index = marked
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ class DomElementsTraverser
5
+ module Data
6
+ def data
7
+ @data.dup
8
+ end
9
+
10
+ private
11
+
12
+ def store(key, options = {}, &converter)
13
+ validate(options)
14
+ value = store_value(options, converter)
15
+ @data[key] = value
16
+ r = current
17
+ skip
18
+ r
19
+ end
20
+
21
+ def store_value(options, converter)
22
+ value = if options.key?(:attribute)
23
+ current.attribute(options[:attribute]).value
24
+ else
25
+ current.text.strip
26
+ end
27
+ converter ? converter.call(value) : value
28
+ end
29
+
30
+ def validate(options)
31
+ return unless options.key?(:validate)
32
+ return if match_conditions?(options[:validate])
33
+
34
+ raise "Element does not match conditions #{options[:validate]}" \
35
+ " (Element: |#{current}|#{current.name}|)"
36
+ end
37
+ end
38
+ end
39
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Aranha
2
4
  class Engine < ::Rails::Engine
3
5
  isolate_namespace Aranha
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open-uri'
4
+ require 'fileutils'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ class Base
9
+ def initialize(url)
10
+ @url = url
11
+ end
12
+
13
+ def url
14
+ @url.gsub(%r{/+$}, '')
15
+ end
16
+
17
+ def content
18
+ s = content_by_url_type
19
+ log_content(s)
20
+ s
21
+ end
22
+
23
+ private
24
+
25
+ def content_by_url_type
26
+ if @url.is_a?(Hash)
27
+ content_hash
28
+ elsif /^http/ =~ @url
29
+ content_get
30
+ else
31
+ content_file
32
+ end
33
+ end
34
+
35
+ def content_file
36
+ ::File.open(@url.gsub(%r{\Afile://}, ''), &:read)
37
+ end
38
+
39
+ def content_get
40
+ content_get_fetch(@url)
41
+ end
42
+
43
+ def content_get_fetch(uri, limit = 10)
44
+ raise 'too many HTTP redirects' if limit.zero?
45
+
46
+ response = Net::HTTP.get_response(URI(uri))
47
+
48
+ case response
49
+ when Net::HTTPSuccess then
50
+ response.body
51
+ when Net::HTTPRedirection then
52
+ content_get_fetch(response['location'], limit - 1)
53
+ else
54
+ response.value
55
+ end
56
+ end
57
+
58
+ def content_hash
59
+ return content_post if @url[:method] == :post
60
+
61
+ raise "Unknown URL format: #{@url}"
62
+ end
63
+
64
+ def content_post
65
+ HTTPClient.new.post_content(@url[:url], @url[:params].merge(follow_redirect: true))
66
+ end
67
+
68
+ def log_content(content)
69
+ File.open(log_file, 'wb') { |file| file.write(content) }
70
+ end
71
+
72
+ def log_file
73
+ f = Rails.root.join('log', 'parsers', "#{self.class.name.parameterize}.log")
74
+ FileUtils.mkdir_p(File.dirname(f))
75
+ f
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_dependency 'aranha/parsers/base'
4
+ require_dependency 'aranha/parsers/html/node/default'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ module Html
9
+ class Base < ::Aranha::Parsers::Base
10
+ def nokogiri
11
+ @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
12
+ end
13
+
14
+ protected
15
+
16
+ def node_parser_class
17
+ ::Aranha::Parsers::Html::Node::Default
18
+ end
19
+
20
+ private
21
+
22
+ def node_parser
23
+ @node_parser ||= node_parser_class.new(fields)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ module Html
6
+ class ItemList < Base
7
+ def data
8
+ count = 0
9
+ @data ||= nokogiri.xpath(items_xpath).map do |m|
10
+ count += 1
11
+ node_parser.parse(m)
12
+ end
13
+ rescue StandardError => e
14
+ e.message << " / Count: #{count}"
15
+ raise e
16
+ end
17
+
18
+ def items_xpath
19
+ raise "Class #{self.class} has no method \"item_xpath\". Implement it"
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ module Html
6
+ module Node
7
+ class Base
8
+ attr_reader :fields
9
+
10
+ def initialize(fields)
11
+ @fields = fields
12
+ end
13
+
14
+ def parse(node)
15
+ Hash[fields.map { |f| [f[0], parse_field(node, f[2], f[1])] }]
16
+ end
17
+
18
+ private
19
+
20
+ def parse_field(node, xpath, parser_method)
21
+ value_method = "#{parser_method}_value"
22
+ return send(value_method, node, xpath) if respond_to?(value_method)
23
+
24
+ raise "Method \"#{value_method}\" not found in #{self.class}"
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_dependency 'aranha/parsers/html/node/base'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ module Node
9
+ class Default < ::Aranha::Parsers::Html::Node::Base
10
+ def string_value(node, xpath)
11
+ if node.at_xpath(xpath)
12
+ node.at_xpath(xpath).text.to_s.tr("\u00A0", ' ').strip
13
+ else
14
+ ''
15
+ end
16
+ end
17
+
18
+ def quoted_value(node, xpath)
19
+ s = string_value(node, xpath)
20
+ return '' unless s
21
+
22
+ m = /\"([^\"]+)\"/.match(s)
23
+ return m[1] if m
24
+
25
+ ''
26
+ end
27
+
28
+ def integer_value(node, xpath)
29
+ r = string_value(node, xpath)
30
+ return nil if r.blank?
31
+
32
+ m = /\d+/.match(r)
33
+ raise "Integer not found in \"#{r}\"" unless m
34
+
35
+ m[0].to_i
36
+ end
37
+
38
+ def integer_optional_value(node, xpath)
39
+ r = string_value(node, xpath)
40
+ m = /\d+/.match(r)
41
+ m ? m[0].to_i : nil
42
+ end
43
+
44
+ def float_value(node, xpath)
45
+ parse_float(node, xpath, true)
46
+ end
47
+
48
+ def float_optional_value(node, xpath)
49
+ parse_float(node, xpath, false)
50
+ end
51
+
52
+ def array_value(node, xpath)
53
+ r = node.xpath(xpath).map { |n| n.text.strip }
54
+ r.join('|')
55
+ end
56
+
57
+ def join_value(node, xpath)
58
+ m = ''
59
+ node.xpath(xpath).each do |n|
60
+ m << n.text.strip
61
+ end
62
+ m
63
+ end
64
+
65
+ def duration_value(node, xpath)
66
+ m = /(\d+) m/.match(join_value(node, xpath))
67
+ m ? m[1].to_i : nil
68
+ end
69
+
70
+ def regxep(node, xpath, pattern)
71
+ s = string_value(node, xpath)
72
+ m = pattern.match(s)
73
+ return m if m
74
+
75
+ raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
76
+ end
77
+
78
+ private
79
+
80
+ def parse_float(node, xpath, required)
81
+ s = string_value(node, xpath)
82
+ m = /\d+(?:[\.\,](\d+))?/.match(s)
83
+ if m
84
+ m[0].sub(',', '.').to_f
85
+ elsif required
86
+ raise "Float value not found in \"#{s}\""
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require 'net/http'
3
4
 
4
5
  module Aranha
5
6
  class Processor
6
7
  NETWORK_EXCEPTIONS = [::HTTPClient::BadResponseError, Errno::ECONNRESET,
7
- ::Net::HTTPFatalError].freeze
8
+ ::Net::HTTPFatalError, ::HTTPClient::ReceiveTimeoutError].freeze
8
9
  DEFAULT_MAX_TRIES = 3
9
10
 
10
11
  def initialize
@@ -32,28 +33,29 @@ module Aranha
32
33
  false
33
34
  elsif @failed.any?
34
35
  @try += 1
35
- max_tries > 0 && @try >= max_tries
36
+ max_tries.positive? && @try >= max_tries
36
37
  else
37
38
  true
38
39
  end
39
40
  end
40
41
 
41
- def process_address(a)
42
- Rails.logger.info("Processing #{a} (Try: #{@try}/#{max_tries_s}," \
42
+ def process_address(address)
43
+ Rails.logger.info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
43
44
  " Unprocessed: #{unprocessed.count}/#{Aranha::Address.count})")
44
45
  begin
45
- a.process
46
- @failed.delete(a.id)
46
+ address.process
47
+ @failed.delete(address.id)
47
48
  rescue StandardError => ex
48
- process_exception(a, ex)
49
+ process_exception(address, ex)
49
50
  end
50
51
  end
51
52
 
52
- def process_exception(a, ex)
53
- raise ex unless network_exception?(ex)
54
- @failed[a.id] ||= 0
55
- @failed[a.id] += 1
56
- Rails.logger.warn(ex)
53
+ def process_exception(address, exception)
54
+ raise exception unless network_exception?(exception)
55
+
56
+ @failed[address.id] ||= 0
57
+ @failed[address.id] += 1
58
+ Rails.logger.warn(exception)
57
59
  end
58
60
 
59
61
  def next_address
@@ -64,8 +66,8 @@ module Aranha
64
66
  ::Aranha::Address.unprocessed
65
67
  end
66
68
 
67
- def network_exception?(ex)
68
- NETWORK_EXCEPTIONS.any? { |klass| ex.is_a?(klass) }
69
+ def network_exception?(exception)
70
+ NETWORK_EXCEPTIONS.any? { |klass| exception.is_a?(klass) }
69
71
  end
70
72
 
71
73
  def not_try_ids
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Aranha
3
- VERSION = '0.0.4'
4
+ VERSION = '0.1.0'
4
5
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  namespace(:aranha) do
3
4
  task process: :environment do
4
5
  ::Aranha::Processor.new
@@ -1,7 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'test_helper'
2
4
 
3
5
  class AranhaTest < ActiveSupport::TestCase
4
- test "truth" do
6
+ test 'truth' do
5
7
  assert_kind_of Module, Aranha
6
8
  end
7
9
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Add your own tasks in files placed in lib/tasks ending in .rake,
2
4
  # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
5
 
4
- require File.expand_path('../config/application', __FILE__)
6
+ require File.expand_path('config/application', __dir__)
5
7
 
6
8
  Rails.application.load_tasks
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class ApplicationController < ActionController::Base
2
4
  # Prevent CSRF attacks by raising an exception.
3
5
  # For APIs, you may want to use :null_session instead.
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ApplicationHelper
2
4
  end
@@ -1,3 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
2
+ # frozen_string_literal: true
3
+
4
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
3
5
  load Gem.bin_path('bundler', 'bundle')
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
- APP_PATH = File.expand_path('../../config/application', __FILE__)
2
+ # frozen_string_literal: true
3
+
4
+ APP_PATH = File.expand_path('../config/application', __dir__)
3
5
  require_relative '../config/boot'
4
6
  require 'rails/commands'
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require_relative '../config/boot'
3
5
  require 'rake'
4
6
  Rake.application.run
@@ -1,16 +1,18 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require 'pathname'
3
5
 
4
6
  # path to your application root.
5
- APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
7
+ APP_ROOT = Pathname.new File.expand_path('..', __dir__)
6
8
 
7
9
  Dir.chdir APP_ROOT do
8
10
  # This script is a starting point to setup your application.
9
11
  # Add necessary setup steps to this file:
10
12
 
11
- puts "== Installing dependencies =="
12
- system "gem install bundler --conservative"
13
- system "bundle check || bundle install"
13
+ puts '== Installing dependencies =='
14
+ system 'gem install bundler --conservative'
15
+ system 'bundle check || bundle install'
14
16
 
15
17
  # puts "\n== Copying sample files =="
16
18
  # unless File.exist?("config/database.yml")
@@ -18,12 +20,12 @@ Dir.chdir APP_ROOT do
18
20
  # end
19
21
 
20
22
  puts "\n== Preparing database =="
21
- system "bin/rake db:setup"
23
+ system 'bin/rake db:setup'
22
24
 
23
25
  puts "\n== Removing old logs and tempfiles =="
24
- system "rm -f log/*"
25
- system "rm -rf tmp/cache"
26
+ system 'rm -f log/*'
27
+ system 'rm -rf tmp/cache'
26
28
 
27
29
  puts "\n== Restarting application server =="
28
- system "touch tmp/restart.txt"
30
+ system 'touch tmp/restart.txt'
29
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # This file is used by Rack-based servers to start the application.
2
4
 
3
5
  require ::File.expand_path('../config/environment', __FILE__)
@@ -1,9 +1,11 @@
1
- require File.expand_path('../boot', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('boot', __dir__)
2
4
 
3
5
  require 'rails/all'
4
6
 
5
7
  Bundler.require(*Rails.groups)
6
- require "aranha"
8
+ require 'aranha'
7
9
 
8
10
  module Dummy
9
11
  class Application < Rails::Application
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Set up gems listed in the Gemfile.
2
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../../../Gemfile', __FILE__)
4
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../../Gemfile', __dir__)
3
5
 
4
6
  require 'bundler/setup' if File.exist?(ENV['BUNDLE_GEMFILE'])
5
- $LOAD_PATH.unshift File.expand_path('../../../../lib', __FILE__)
7
+ $LOAD_PATH.unshift File.expand_path('../../../lib', __dir__)
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Load the Rails application.
2
- require File.expand_path('../application', __FILE__)
4
+ require File.expand_path('application', __dir__)
3
5
 
4
6
  # Initialize the Rails application.
5
7
  Rails.application.initialize!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Rails.application.configure do
2
4
  # Settings specified here will take precedence over those in config/application.rb.
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Rails.application.configure do
2
4
  # Settings specified here will take precedence over those in config/application.rb.
3
5
 
@@ -35,7 +37,8 @@ Rails.application.configure do
35
37
  # yet still be able to expire them through the digest params.
36
38
  config.assets.digest = true
37
39
 
38
- # `config.assets.precompile` and `config.assets.version` have moved to config/initializers/assets.rb
40
+ # `config.assets.precompile` and `config.assets.version` have moved to
41
+ # config/initializers/assets.rb
39
42
 
40
43
  # Specifies the header that your server uses for sending files.
41
44
  # config.action_dispatch.x_sendfile_header = 'X-Sendfile' # for Apache
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Rails.application.configure do
2
4
  # Settings specified here will take precedence over those in config/application.rb.
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Version of your assets, change this if you want to expire all your assets.
@@ -1,7 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
- # You can add backtrace silencers for libraries that you're using but don't wish to see in your backtraces.
5
+ # You can add backtrace silencers for libraries that you're using but don't wish to see
6
+ # in your backtraces.
4
7
  # Rails.backtrace_cleaner.add_silencer { |line| line =~ /my_noisy_library/ }
5
8
 
6
- # You can also remove all the silencers if you're trying to debug a problem that might stem from framework code.
9
+ # You can also remove all the silencers if you're trying to debug a problem that might
10
+ # stem from framework code.
7
11
  # Rails.backtrace_cleaner.remove_silencers!
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  Rails.application.config.action_dispatch.cookies_serializer = :json
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Configure sensitive parameters which will be filtered from the log file.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Add new inflection rules using the following format. Inflections
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Add new mime types for use in respond_to blocks:
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  Rails.application.config.session_store :cookie_store, key: '_dummy_session'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # Preserve the timezone of the receiver when calling to `to_time`.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Be sure to restart your server when you modify this file.
2
4
 
3
5
  # This file contains settings for ActionController::ParamsWrapper which
@@ -1,4 +1,5 @@
1
- Rails.application.routes.draw do
1
+ # frozen_string_literal: true
2
2
 
3
- mount Aranha::Engine => "/aranha"
3
+ Rails.application.routes.draw do
4
+ mount Aranha::Engine => '/aranha'
4
5
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'test_helper'
2
4
 
3
5
  class NavigationTest < ActionDispatch::IntegrationTest
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  # Configure Rails Environment
3
4
  ENV['RAILS_ENV'] = 'test'
4
5
 
5
- require File.expand_path('../../test/dummy/config/environment.rb', __FILE__)
6
+ require File.expand_path('../test/dummy/config/environment.rb', __dir__)
6
7
  ActiveRecord::Migrator.migrations_paths = [
7
- File.expand_path('../../test/dummy/db/migrate', __FILE__)
8
+ File.expand_path('../test/dummy/db/migrate', __dir__)
8
9
  ]
9
10
  require 'rails/test_help'
10
11
 
@@ -17,6 +18,6 @@ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
17
18
 
18
19
  # Load fixtures from the engine
19
20
  if ActiveSupport::TestCase.respond_to?(:fixture_path=)
20
- ActiveSupport::TestCase.fixture_path = File.expand_path('../fixtures', __FILE__)
21
+ ActiveSupport::TestCase.fixture_path = File.expand_path('fixtures', __dir__)
21
22
  ActiveSupport::TestCase.fixtures :all
22
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-10 00:00:00.000000000 Z
11
+ date: 2018-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_scaffold
@@ -25,19 +25,19 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 3.4.41.1
27
27
  - !ruby/object:Gem::Dependency
28
- name: rails
28
+ name: eac_ruby_utils
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 4.2.10
33
+ version: '0.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 4.2.10
40
+ version: '0.3'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: httpclient
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rails
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 4.2.10
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 4.2.10
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: sqlite3
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -86,7 +100,17 @@ files:
86
100
  - config/routes.rb
87
101
  - db/migrate/20171201021251_create_aranha_addresses.rb
88
102
  - lib/aranha.rb
103
+ - lib/aranha/default_processor.rb
104
+ - lib/aranha/dom_elements_traverser.rb
105
+ - lib/aranha/dom_elements_traverser/conditions.rb
106
+ - lib/aranha/dom_elements_traverser/cursor.rb
107
+ - lib/aranha/dom_elements_traverser/data.rb
89
108
  - lib/aranha/engine.rb
109
+ - lib/aranha/parsers/base.rb
110
+ - lib/aranha/parsers/html/base.rb
111
+ - lib/aranha/parsers/html/item_list.rb
112
+ - lib/aranha/parsers/html/node/base.rb
113
+ - lib/aranha/parsers/html/node/default.rb
90
114
  - lib/aranha/processor.rb
91
115
  - lib/aranha/version.rb
92
116
  - lib/tasks/aranha_tasks.rake
@@ -156,40 +180,40 @@ summary: Rails utilities for web crawling.
156
180
  test_files:
157
181
  - test/dummy/Rakefile
158
182
  - test/dummy/README.rdoc
159
- - test/dummy/bin/rails
160
- - test/dummy/bin/rake
161
- - test/dummy/bin/bundle
162
- - test/dummy/bin/setup
163
- - test/dummy/config/initializers/assets.rb
164
- - test/dummy/config/initializers/to_time_preserves_timezone.rb
165
- - test/dummy/config/initializers/wrap_parameters.rb
166
- - test/dummy/config/initializers/session_store.rb
167
- - test/dummy/config/initializers/cookies_serializer.rb
168
- - test/dummy/config/initializers/inflections.rb
169
- - test/dummy/config/initializers/mime_types.rb
170
- - test/dummy/config/initializers/backtrace_silencers.rb
171
- - test/dummy/config/initializers/filter_parameter_logging.rb
183
+ - test/dummy/config.ru
172
184
  - test/dummy/config/boot.rb
173
- - test/dummy/config/locales/en.yml
174
- - test/dummy/config/secrets.yml
175
- - test/dummy/config/environment.rb
176
185
  - test/dummy/config/database.yml
177
- - test/dummy/config/routes.rb
186
+ - test/dummy/config/secrets.yml
187
+ - test/dummy/config/locales/en.yml
188
+ - test/dummy/config/application.rb
178
189
  - test/dummy/config/environments/development.rb
179
190
  - test/dummy/config/environments/test.rb
180
191
  - test/dummy/config/environments/production.rb
181
- - test/dummy/config/application.rb
182
- - test/dummy/app/assets/stylesheets/application.css
183
- - test/dummy/app/assets/javascripts/application.js
192
+ - test/dummy/config/environment.rb
193
+ - test/dummy/config/routes.rb
194
+ - test/dummy/config/initializers/assets.rb
195
+ - test/dummy/config/initializers/cookies_serializer.rb
196
+ - test/dummy/config/initializers/inflections.rb
197
+ - test/dummy/config/initializers/session_store.rb
198
+ - test/dummy/config/initializers/wrap_parameters.rb
199
+ - test/dummy/config/initializers/to_time_preserves_timezone.rb
200
+ - test/dummy/config/initializers/filter_parameter_logging.rb
201
+ - test/dummy/config/initializers/backtrace_silencers.rb
202
+ - test/dummy/config/initializers/mime_types.rb
203
+ - test/dummy/db/schema.rb
184
204
  - test/dummy/app/views/layouts/application.html.erb
185
- - test/dummy/app/helpers/application_helper.rb
186
205
  - test/dummy/app/controllers/application_controller.rb
187
- - test/dummy/db/schema.rb
188
- - test/dummy/public/favicon.ico
189
- - test/dummy/public/500.html
206
+ - test/dummy/app/helpers/application_helper.rb
207
+ - test/dummy/app/assets/stylesheets/application.css
208
+ - test/dummy/app/assets/javascripts/application.js
190
209
  - test/dummy/public/422.html
191
210
  - test/dummy/public/404.html
192
- - test/dummy/config.ru
193
- - test/test_helper.rb
211
+ - test/dummy/public/favicon.ico
212
+ - test/dummy/public/500.html
213
+ - test/dummy/bin/bundle
214
+ - test/dummy/bin/setup
215
+ - test/dummy/bin/rails
216
+ - test/dummy/bin/rake
194
217
  - test/aranha_test.rb
218
+ - test/test_helper.rb
195
219
  - test/integration/navigation_test.rb