aranha 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fae3e1f9e295a208be700db6357172330192e9e4c0586ef0fff9a78db59001a3
4
- data.tar.gz: fb90fe66bdca6283dd61b4effd5d9265c7d8c99d5efbb21b22e8519a75aa4cb7
3
+ metadata.gz: 86615d49ac9b469d088f1fe3404db6efc98bdc19a0f0cd484bd968d1a68e090c
4
+ data.tar.gz: 32f5a13f4b3e5d3d17867aa2f911f85cda43b9ed4fc1d55ce7b29fe8fb3e8c2e
5
5
  SHA512:
6
- metadata.gz: 9a9cef98927abcc0ffbca46aed279e757aadbf4959604a0855094e19585364de1eca69ad209c27acbe959614a33856d322e9046005b4aeeb5404d064159c20d3
7
- data.tar.gz: 680fcb07943ee059df08c17c51dcc8ebec377be0e153be7118118c5c566ae195c860712e79248e940ca652f21f3bfeff91b37a5375a381bbc62c6c7ea3e72052
6
+ metadata.gz: 77697e1e182931f7bda03c402bef371f6f89d8cf963960210f80b5d82c665a912d96e0030c30be13d4c10d7c8159cc8d003571ee694e77df08e35d13b7b55838
7
+ data.tar.gz: 48a89f0b5b20c5ba77d18fe1744b7f3ba1cb964e2de020b2ea38b21a851224834bae19cefd346e9e070408e0c510f7c7cfca11a44e33dcb45f26addf435c9dfd
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'addressable'
4
+
3
5
  module Aranha
4
6
  class DefaultProcessor
5
7
  attr_reader :source_uri, :extra_data
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'eac_ruby_utils/options_consumer'
4
+
3
5
  module Aranha
4
6
  class DomElementsTraverser
5
7
  module Cursor
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_dependency 'aranha/dom_elements_traverser/conditions'
4
- require_dependency 'aranha/dom_elements_traverser/data'
5
- require_dependency 'aranha/dom_elements_traverser/cursor'
3
+ require 'aranha/dom_elements_traverser/conditions'
4
+ require 'aranha/dom_elements_traverser/data'
5
+ require 'aranha/dom_elements_traverser/cursor'
6
6
 
7
7
  module Aranha
8
8
  class DomElementsTraverser
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'aranha/parsers/base'
3
4
  require 'aranha/parsers/source_address'
5
+ require 'aranha/parsers/spec/source_target_fixtures'
4
6
 
5
7
  module Aranha
6
8
  module Fixtures
@@ -29,7 +31,7 @@ module Aranha
29
31
 
30
32
  def select_path?(path)
31
33
  return false unless match_prefix_pattern(path)
32
- !pending || !::File.exist?(target(path))
34
+ !pending || !source_exist?(path)
33
35
  end
34
36
 
35
37
  def match_prefix_pattern(path)
@@ -56,6 +58,11 @@ module Aranha
56
58
  def relative_path(path)
57
59
  path.sub(%r{^#{Regexp.quote(fixtures_root)}/}, '')
58
60
  end
61
+
62
+ def source_exist?(path)
63
+ stf = ::Aranha::Spec::SourceTargetFixtures.new(::File.dirname(path))
64
+ stf.source_file(::File.basename(path, '.url')).present?
65
+ end
59
66
  end
60
67
  end
61
68
  end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Fixtures
4
+ require 'aranha/fixtures/download'
5
+ end
6
+ end
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative '../base'
4
- require_relative 'node/default'
3
+ require 'nokogiri'
4
+ require 'aranha/parsers/base'
5
+ require 'aranha/parsers/html/node/default'
5
6
 
6
7
  module Aranha
7
8
  module Parsers
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
3
+ require 'aranha/parsers/html/base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
3
+ require 'aranha/parsers/html/base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
@@ -13,8 +13,7 @@ module Aranha
13
13
  node_parser.parse(m)
14
14
  end
15
15
  rescue StandardError => e
16
- e.message << " / Count: #{count}"
17
- raise e
16
+ raise StandardError, "#{e.message} (Count: #{count})"
18
17
  end
19
18
 
20
19
  def items_xpath
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
3
+ require 'aranha/parsers/html/node/base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Parsers
4
+ module Html
5
+ module Node
6
+ require 'aranha/parsers/html/node/base'
7
+ require 'aranha/parsers/html/node/default'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Parsers
4
+ module Html
5
+ require 'aranha/parsers/html/base'
6
+ require 'aranha/parsers/html/item'
7
+ require 'aranha/parsers/html/item_list'
8
+ require 'aranha/parsers/html/node'
9
+ end
10
+ end
11
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'addressable'
3
4
  require 'net/http'
4
5
 
5
6
  module Aranha
@@ -7,6 +8,10 @@ module Aranha
7
8
  class SourceAddress
8
9
  class HttpGet
9
10
  class << self
11
+ def location_uri(source_uri, location)
12
+ ::Addressable::URI.join(source_uri, location).to_s
13
+ end
14
+
10
15
  def valid_source?(source)
11
16
  source.to_s =~ %r{\Ahttps?://}
12
17
  end
@@ -45,7 +50,7 @@ module Aranha
45
50
  when Net::HTTPSuccess then
46
51
  response.body
47
52
  when Net::HTTPRedirection then
48
- content_fetch(response['location'], limit - 1)
53
+ content_fetch(self.class.location_uri(uri, response['location']), limit - 1)
49
54
  else
50
55
  response.value
51
56
  end
@@ -34,8 +34,6 @@ module Aranha
34
34
  r
35
35
  end
36
36
 
37
- private
38
-
39
37
  def target_file(basename)
40
38
  fixture_file(basename, 'target')
41
39
  end
@@ -44,6 +42,8 @@ module Aranha
44
42
  fixture_file(basename, 'source')
45
43
  end
46
44
 
45
+ private
46
+
47
47
  def fixture_file(basename, suffix)
48
48
  prefix = "#{basename}.#{suffix}"
49
49
  Dir.foreach(fixtures_directory) do |item|
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Parsers
4
+ require 'aranha/parsers/base'
5
+ require 'aranha/parsers/html'
6
+ require 'aranha/parsers/invalid_state_exception'
7
+ require 'aranha/parsers/source_address'
8
+ end
9
+ end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'net/http'
4
- require_dependency 'aranha/parsers/invalid_state_exception'
4
+ require 'aranha/parsers/invalid_state_exception'
5
5
 
6
6
  module Aranha
7
7
  class Processor
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'active_support/core_ext/hash/indifferent_access'
4
+ require 'aranha/selenium/driver_factory/base'
4
5
  require 'aranha/selenium/driver_factory/chrome'
5
6
  require 'aranha/selenium/driver_factory/firefox'
6
7
 
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Selenium
4
+ require 'aranha/selenium/driver_factory'
5
+ require 'aranha/selenium/session'
6
+ end
7
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Aranha
4
- VERSION = '0.9.0'
4
+ VERSION = '0.9.1'
5
5
  end
data/lib/aranha.rb CHANGED
@@ -2,18 +2,14 @@
2
2
 
3
3
  require 'httpclient'
4
4
  require 'active_support/dependencies'
5
- require_dependency 'aranha/engine'
6
- require_dependency 'active_scaffold'
5
+ require 'active_scaffold'
7
6
 
8
7
  module Aranha
8
+ require 'aranha/default_processor'
9
+ require 'aranha/dom_elements_traverser'
10
+ require 'aranha/engine'
11
+ require 'aranha/fixtures'
12
+ require 'aranha/processor'
13
+ require 'aranha/parsers'
14
+ require 'aranha/selenium'
9
15
  end
10
-
11
- require_dependency 'aranha/default_processor'
12
- require_dependency 'aranha/fixtures/download'
13
- require_dependency 'aranha/processor'
14
- require_dependency 'aranha/parsers/base'
15
- require_dependency 'aranha/parsers/html/base'
16
- require_dependency 'aranha/parsers/html/item_list'
17
- require_dependency 'aranha/parsers/invalid_state_exception'
18
- require_dependency 'aranha/dom_elements_traverser'
19
- require_dependency 'aranha/selenium/driver_factory'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-11 00:00:00.000000000 Z
11
+ date: 2019-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_scaffold
@@ -141,11 +141,15 @@ files:
141
141
  - lib/aranha/dom_elements_traverser/cursor.rb
142
142
  - lib/aranha/dom_elements_traverser/data.rb
143
143
  - lib/aranha/engine.rb
144
+ - lib/aranha/fixtures.rb
144
145
  - lib/aranha/fixtures/download.rb
146
+ - lib/aranha/parsers.rb
145
147
  - lib/aranha/parsers/base.rb
148
+ - lib/aranha/parsers/html.rb
146
149
  - lib/aranha/parsers/html/base.rb
147
150
  - lib/aranha/parsers/html/item.rb
148
151
  - lib/aranha/parsers/html/item_list.rb
152
+ - lib/aranha/parsers/html/node.rb
149
153
  - lib/aranha/parsers/html/node/base.rb
150
154
  - lib/aranha/parsers/html/node/default.rb
151
155
  - lib/aranha/parsers/invalid_state_exception.rb
@@ -157,6 +161,7 @@ files:
157
161
  - lib/aranha/parsers/spec/source_target_fixtures.rb
158
162
  - lib/aranha/parsers/spec/source_target_fixtures_example.rb
159
163
  - lib/aranha/processor.rb
164
+ - lib/aranha/selenium.rb
160
165
  - lib/aranha/selenium/driver_factory.rb
161
166
  - lib/aranha/selenium/driver_factory/base.rb
162
167
  - lib/aranha/selenium/driver_factory/chrome.rb
@@ -194,5 +199,5 @@ specification_version: 4
194
199
  summary: Rails utilities for web crawling.
195
200
  test_files:
196
201
  - test/aranha_test.rb
197
- - test/test_helper.rb
198
202
  - test/integration/navigation_test.rb
203
+ - test/test_helper.rb