aranha 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aranha/default_processor.rb +2 -0
- data/lib/aranha/dom_elements_traverser/cursor.rb +2 -0
- data/lib/aranha/dom_elements_traverser.rb +3 -3
- data/lib/aranha/fixtures/download.rb +8 -1
- data/lib/aranha/fixtures.rb +6 -0
- data/lib/aranha/parsers/html/base.rb +3 -2
- data/lib/aranha/parsers/html/item.rb +1 -1
- data/lib/aranha/parsers/html/item_list.rb +2 -3
- data/lib/aranha/parsers/html/node/default.rb +1 -1
- data/lib/aranha/parsers/html/node.rb +11 -0
- data/lib/aranha/parsers/html.rb +11 -0
- data/lib/aranha/parsers/source_address/http_get.rb +6 -1
- data/lib/aranha/parsers/spec/source_target_fixtures.rb +2 -2
- data/lib/aranha/parsers.rb +9 -0
- data/lib/aranha/processor.rb +1 -1
- data/lib/aranha/selenium/driver_factory.rb +1 -0
- data/lib/aranha/selenium.rb +7 -0
- data/lib/aranha/version.rb +1 -1
- data/lib/aranha.rb +8 -12
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86615d49ac9b469d088f1fe3404db6efc98bdc19a0f0cd484bd968d1a68e090c
|
4
|
+
data.tar.gz: 32f5a13f4b3e5d3d17867aa2f911f85cda43b9ed4fc1d55ce7b29fe8fb3e8c2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77697e1e182931f7bda03c402bef371f6f89d8cf963960210f80b5d82c665a912d96e0030c30be13d4c10d7c8159cc8d003571ee694e77df08e35d13b7b55838
|
7
|
+
data.tar.gz: 48a89f0b5b20c5ba77d18fe1744b7f3ba1cb964e2de020b2ea38b21a851224834bae19cefd346e9e070408e0c510f7c7cfca11a44e33dcb45f26addf435c9dfd
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require 'aranha/dom_elements_traverser/conditions'
|
4
|
+
require 'aranha/dom_elements_traverser/data'
|
5
|
+
require 'aranha/dom_elements_traverser/cursor'
|
6
6
|
|
7
7
|
module Aranha
|
8
8
|
class DomElementsTraverser
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'aranha/parsers/base'
|
3
4
|
require 'aranha/parsers/source_address'
|
5
|
+
require 'aranha/parsers/spec/source_target_fixtures'
|
4
6
|
|
5
7
|
module Aranha
|
6
8
|
module Fixtures
|
@@ -29,7 +31,7 @@ module Aranha
|
|
29
31
|
|
30
32
|
def select_path?(path)
|
31
33
|
return false unless match_prefix_pattern(path)
|
32
|
-
!pending ||
|
34
|
+
!pending || !source_exist?(path)
|
33
35
|
end
|
34
36
|
|
35
37
|
def match_prefix_pattern(path)
|
@@ -56,6 +58,11 @@ module Aranha
|
|
56
58
|
def relative_path(path)
|
57
59
|
path.sub(%r{^#{Regexp.quote(fixtures_root)}/}, '')
|
58
60
|
end
|
61
|
+
|
62
|
+
def source_exist?(path)
|
63
|
+
stf = ::Aranha::Spec::SourceTargetFixtures.new(::File.dirname(path))
|
64
|
+
stf.source_file(::File.basename(path, '.url')).present?
|
65
|
+
end
|
59
66
|
end
|
60
67
|
end
|
61
68
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require 'aranha/parsers/html/base'
|
4
4
|
|
5
5
|
module Aranha
|
6
6
|
module Parsers
|
@@ -13,8 +13,7 @@ module Aranha
|
|
13
13
|
node_parser.parse(m)
|
14
14
|
end
|
15
15
|
rescue StandardError => e
|
16
|
-
e.message
|
17
|
-
raise e
|
16
|
+
raise StandardError, "#{e.message} (Count: #{count})"
|
18
17
|
end
|
19
18
|
|
20
19
|
def items_xpath
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'addressable'
|
3
4
|
require 'net/http'
|
4
5
|
|
5
6
|
module Aranha
|
@@ -7,6 +8,10 @@ module Aranha
|
|
7
8
|
class SourceAddress
|
8
9
|
class HttpGet
|
9
10
|
class << self
|
11
|
+
def location_uri(source_uri, location)
|
12
|
+
::Addressable::URI.join(source_uri, location).to_s
|
13
|
+
end
|
14
|
+
|
10
15
|
def valid_source?(source)
|
11
16
|
source.to_s =~ %r{\Ahttps?://}
|
12
17
|
end
|
@@ -45,7 +50,7 @@ module Aranha
|
|
45
50
|
when Net::HTTPSuccess then
|
46
51
|
response.body
|
47
52
|
when Net::HTTPRedirection then
|
48
|
-
content_fetch(response['location'], limit - 1)
|
53
|
+
content_fetch(self.class.location_uri(uri, response['location']), limit - 1)
|
49
54
|
else
|
50
55
|
response.value
|
51
56
|
end
|
@@ -34,8 +34,6 @@ module Aranha
|
|
34
34
|
r
|
35
35
|
end
|
36
36
|
|
37
|
-
private
|
38
|
-
|
39
37
|
def target_file(basename)
|
40
38
|
fixture_file(basename, 'target')
|
41
39
|
end
|
@@ -44,6 +42,8 @@ module Aranha
|
|
44
42
|
fixture_file(basename, 'source')
|
45
43
|
end
|
46
44
|
|
45
|
+
private
|
46
|
+
|
47
47
|
def fixture_file(basename, suffix)
|
48
48
|
prefix = "#{basename}.#{suffix}"
|
49
49
|
Dir.foreach(fixtures_directory) do |item|
|
data/lib/aranha/processor.rb
CHANGED
data/lib/aranha/version.rb
CHANGED
data/lib/aranha.rb
CHANGED
@@ -2,18 +2,14 @@
|
|
2
2
|
|
3
3
|
require 'httpclient'
|
4
4
|
require 'active_support/dependencies'
|
5
|
-
|
6
|
-
require_dependency 'active_scaffold'
|
5
|
+
require 'active_scaffold'
|
7
6
|
|
8
7
|
module Aranha
|
8
|
+
require 'aranha/default_processor'
|
9
|
+
require 'aranha/dom_elements_traverser'
|
10
|
+
require 'aranha/engine'
|
11
|
+
require 'aranha/fixtures'
|
12
|
+
require 'aranha/processor'
|
13
|
+
require 'aranha/parsers'
|
14
|
+
require 'aranha/selenium'
|
9
15
|
end
|
10
|
-
|
11
|
-
require_dependency 'aranha/default_processor'
|
12
|
-
require_dependency 'aranha/fixtures/download'
|
13
|
-
require_dependency 'aranha/processor'
|
14
|
-
require_dependency 'aranha/parsers/base'
|
15
|
-
require_dependency 'aranha/parsers/html/base'
|
16
|
-
require_dependency 'aranha/parsers/html/item_list'
|
17
|
-
require_dependency 'aranha/parsers/invalid_state_exception'
|
18
|
-
require_dependency 'aranha/dom_elements_traverser'
|
19
|
-
require_dependency 'aranha/selenium/driver_factory'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -141,11 +141,15 @@ files:
|
|
141
141
|
- lib/aranha/dom_elements_traverser/cursor.rb
|
142
142
|
- lib/aranha/dom_elements_traverser/data.rb
|
143
143
|
- lib/aranha/engine.rb
|
144
|
+
- lib/aranha/fixtures.rb
|
144
145
|
- lib/aranha/fixtures/download.rb
|
146
|
+
- lib/aranha/parsers.rb
|
145
147
|
- lib/aranha/parsers/base.rb
|
148
|
+
- lib/aranha/parsers/html.rb
|
146
149
|
- lib/aranha/parsers/html/base.rb
|
147
150
|
- lib/aranha/parsers/html/item.rb
|
148
151
|
- lib/aranha/parsers/html/item_list.rb
|
152
|
+
- lib/aranha/parsers/html/node.rb
|
149
153
|
- lib/aranha/parsers/html/node/base.rb
|
150
154
|
- lib/aranha/parsers/html/node/default.rb
|
151
155
|
- lib/aranha/parsers/invalid_state_exception.rb
|
@@ -157,6 +161,7 @@ files:
|
|
157
161
|
- lib/aranha/parsers/spec/source_target_fixtures.rb
|
158
162
|
- lib/aranha/parsers/spec/source_target_fixtures_example.rb
|
159
163
|
- lib/aranha/processor.rb
|
164
|
+
- lib/aranha/selenium.rb
|
160
165
|
- lib/aranha/selenium/driver_factory.rb
|
161
166
|
- lib/aranha/selenium/driver_factory/base.rb
|
162
167
|
- lib/aranha/selenium/driver_factory/chrome.rb
|
@@ -194,5 +199,5 @@ specification_version: 4
|
|
194
199
|
summary: Rails utilities for web crawling.
|
195
200
|
test_files:
|
196
201
|
- test/aranha_test.rb
|
197
|
-
- test/test_helper.rb
|
198
202
|
- test/integration/navigation_test.rb
|
203
|
+
- test/test_helper.rb
|