aranha 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fae3e1f9e295a208be700db6357172330192e9e4c0586ef0fff9a78db59001a3
4
- data.tar.gz: fb90fe66bdca6283dd61b4effd5d9265c7d8c99d5efbb21b22e8519a75aa4cb7
3
+ metadata.gz: 86615d49ac9b469d088f1fe3404db6efc98bdc19a0f0cd484bd968d1a68e090c
4
+ data.tar.gz: 32f5a13f4b3e5d3d17867aa2f911f85cda43b9ed4fc1d55ce7b29fe8fb3e8c2e
5
5
  SHA512:
6
- metadata.gz: 9a9cef98927abcc0ffbca46aed279e757aadbf4959604a0855094e19585364de1eca69ad209c27acbe959614a33856d322e9046005b4aeeb5404d064159c20d3
7
- data.tar.gz: 680fcb07943ee059df08c17c51dcc8ebec377be0e153be7118118c5c566ae195c860712e79248e940ca652f21f3bfeff91b37a5375a381bbc62c6c7ea3e72052
6
+ metadata.gz: 77697e1e182931f7bda03c402bef371f6f89d8cf963960210f80b5d82c665a912d96e0030c30be13d4c10d7c8159cc8d003571ee694e77df08e35d13b7b55838
7
+ data.tar.gz: 48a89f0b5b20c5ba77d18fe1744b7f3ba1cb964e2de020b2ea38b21a851224834bae19cefd346e9e070408e0c510f7c7cfca11a44e33dcb45f26addf435c9dfd
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'addressable'
4
+
3
5
  module Aranha
4
6
  class DefaultProcessor
5
7
  attr_reader :source_uri, :extra_data
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'eac_ruby_utils/options_consumer'
4
+
3
5
  module Aranha
4
6
  class DomElementsTraverser
5
7
  module Cursor
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_dependency 'aranha/dom_elements_traverser/conditions'
4
- require_dependency 'aranha/dom_elements_traverser/data'
5
- require_dependency 'aranha/dom_elements_traverser/cursor'
3
+ require 'aranha/dom_elements_traverser/conditions'
4
+ require 'aranha/dom_elements_traverser/data'
5
+ require 'aranha/dom_elements_traverser/cursor'
6
6
 
7
7
  module Aranha
8
8
  class DomElementsTraverser
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'aranha/parsers/base'
3
4
  require 'aranha/parsers/source_address'
5
+ require 'aranha/parsers/spec/source_target_fixtures'
4
6
 
5
7
  module Aranha
6
8
  module Fixtures
@@ -29,7 +31,7 @@ module Aranha
29
31
 
30
32
  def select_path?(path)
31
33
  return false unless match_prefix_pattern(path)
32
- !pending || !::File.exist?(target(path))
34
+ !pending || !source_exist?(path)
33
35
  end
34
36
 
35
37
  def match_prefix_pattern(path)
@@ -56,6 +58,11 @@ module Aranha
56
58
  def relative_path(path)
57
59
  path.sub(%r{^#{Regexp.quote(fixtures_root)}/}, '')
58
60
  end
61
+
62
+ def source_exist?(path)
63
+ stf = ::Aranha::Spec::SourceTargetFixtures.new(::File.dirname(path))
64
+ stf.source_file(::File.basename(path, '.url')).present?
65
+ end
59
66
  end
60
67
  end
61
68
  end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Fixtures
4
+ require 'aranha/fixtures/download'
5
+ end
6
+ end
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative '../base'
4
- require_relative 'node/default'
3
+ require 'nokogiri'
4
+ require 'aranha/parsers/base'
5
+ require 'aranha/parsers/html/node/default'
5
6
 
6
7
  module Aranha
7
8
  module Parsers
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
3
+ require 'aranha/parsers/html/base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
3
+ require 'aranha/parsers/html/base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
@@ -13,8 +13,7 @@ module Aranha
13
13
  node_parser.parse(m)
14
14
  end
15
15
  rescue StandardError => e
16
- e.message << " / Count: #{count}"
17
- raise e
16
+ raise StandardError, "#{e.message} (Count: #{count})"
18
17
  end
19
18
 
20
19
  def items_xpath
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
3
+ require 'aranha/parsers/html/node/base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Parsers
4
+ module Html
5
+ module Node
6
+ require 'aranha/parsers/html/node/base'
7
+ require 'aranha/parsers/html/node/default'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Parsers
4
+ module Html
5
+ require 'aranha/parsers/html/base'
6
+ require 'aranha/parsers/html/item'
7
+ require 'aranha/parsers/html/item_list'
8
+ require 'aranha/parsers/html/node'
9
+ end
10
+ end
11
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'addressable'
3
4
  require 'net/http'
4
5
 
5
6
  module Aranha
@@ -7,6 +8,10 @@ module Aranha
7
8
  class SourceAddress
8
9
  class HttpGet
9
10
  class << self
11
+ def location_uri(source_uri, location)
12
+ ::Addressable::URI.join(source_uri, location).to_s
13
+ end
14
+
10
15
  def valid_source?(source)
11
16
  source.to_s =~ %r{\Ahttps?://}
12
17
  end
@@ -45,7 +50,7 @@ module Aranha
45
50
  when Net::HTTPSuccess then
46
51
  response.body
47
52
  when Net::HTTPRedirection then
48
- content_fetch(response['location'], limit - 1)
53
+ content_fetch(self.class.location_uri(uri, response['location']), limit - 1)
49
54
  else
50
55
  response.value
51
56
  end
@@ -34,8 +34,6 @@ module Aranha
34
34
  r
35
35
  end
36
36
 
37
- private
38
-
39
37
  def target_file(basename)
40
38
  fixture_file(basename, 'target')
41
39
  end
@@ -44,6 +42,8 @@ module Aranha
44
42
  fixture_file(basename, 'source')
45
43
  end
46
44
 
45
+ private
46
+
47
47
  def fixture_file(basename, suffix)
48
48
  prefix = "#{basename}.#{suffix}"
49
49
  Dir.foreach(fixtures_directory) do |item|
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Parsers
4
+ require 'aranha/parsers/base'
5
+ require 'aranha/parsers/html'
6
+ require 'aranha/parsers/invalid_state_exception'
7
+ require 'aranha/parsers/source_address'
8
+ end
9
+ end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'net/http'
4
- require_dependency 'aranha/parsers/invalid_state_exception'
4
+ require 'aranha/parsers/invalid_state_exception'
5
5
 
6
6
  module Aranha
7
7
  class Processor
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'active_support/core_ext/hash/indifferent_access'
4
+ require 'aranha/selenium/driver_factory/base'
4
5
  require 'aranha/selenium/driver_factory/chrome'
5
6
  require 'aranha/selenium/driver_factory/firefox'
6
7
 
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ module Selenium
4
+ require 'aranha/selenium/driver_factory'
5
+ require 'aranha/selenium/session'
6
+ end
7
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Aranha
4
- VERSION = '0.9.0'
4
+ VERSION = '0.9.1'
5
5
  end
data/lib/aranha.rb CHANGED
@@ -2,18 +2,14 @@
2
2
 
3
3
  require 'httpclient'
4
4
  require 'active_support/dependencies'
5
- require_dependency 'aranha/engine'
6
- require_dependency 'active_scaffold'
5
+ require 'active_scaffold'
7
6
 
8
7
  module Aranha
8
+ require 'aranha/default_processor'
9
+ require 'aranha/dom_elements_traverser'
10
+ require 'aranha/engine'
11
+ require 'aranha/fixtures'
12
+ require 'aranha/processor'
13
+ require 'aranha/parsers'
14
+ require 'aranha/selenium'
9
15
  end
10
-
11
- require_dependency 'aranha/default_processor'
12
- require_dependency 'aranha/fixtures/download'
13
- require_dependency 'aranha/processor'
14
- require_dependency 'aranha/parsers/base'
15
- require_dependency 'aranha/parsers/html/base'
16
- require_dependency 'aranha/parsers/html/item_list'
17
- require_dependency 'aranha/parsers/invalid_state_exception'
18
- require_dependency 'aranha/dom_elements_traverser'
19
- require_dependency 'aranha/selenium/driver_factory'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-11 00:00:00.000000000 Z
11
+ date: 2019-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: active_scaffold
@@ -141,11 +141,15 @@ files:
141
141
  - lib/aranha/dom_elements_traverser/cursor.rb
142
142
  - lib/aranha/dom_elements_traverser/data.rb
143
143
  - lib/aranha/engine.rb
144
+ - lib/aranha/fixtures.rb
144
145
  - lib/aranha/fixtures/download.rb
146
+ - lib/aranha/parsers.rb
145
147
  - lib/aranha/parsers/base.rb
148
+ - lib/aranha/parsers/html.rb
146
149
  - lib/aranha/parsers/html/base.rb
147
150
  - lib/aranha/parsers/html/item.rb
148
151
  - lib/aranha/parsers/html/item_list.rb
152
+ - lib/aranha/parsers/html/node.rb
149
153
  - lib/aranha/parsers/html/node/base.rb
150
154
  - lib/aranha/parsers/html/node/default.rb
151
155
  - lib/aranha/parsers/invalid_state_exception.rb
@@ -157,6 +161,7 @@ files:
157
161
  - lib/aranha/parsers/spec/source_target_fixtures.rb
158
162
  - lib/aranha/parsers/spec/source_target_fixtures_example.rb
159
163
  - lib/aranha/processor.rb
164
+ - lib/aranha/selenium.rb
160
165
  - lib/aranha/selenium/driver_factory.rb
161
166
  - lib/aranha/selenium/driver_factory/base.rb
162
167
  - lib/aranha/selenium/driver_factory/chrome.rb
@@ -194,5 +199,5 @@ specification_version: 4
194
199
  summary: Rails utilities for web crawling.
195
200
  test_files:
196
201
  - test/aranha_test.rb
197
- - test/test_helper.rb
198
202
  - test/integration/navigation_test.rb
203
+ - test/test_helper.rb