aranha 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aranha/default_processor.rb +2 -0
- data/lib/aranha/dom_elements_traverser/cursor.rb +2 -0
- data/lib/aranha/dom_elements_traverser.rb +3 -3
- data/lib/aranha/fixtures/download.rb +8 -1
- data/lib/aranha/fixtures.rb +6 -0
- data/lib/aranha/parsers/html/base.rb +3 -2
- data/lib/aranha/parsers/html/item.rb +1 -1
- data/lib/aranha/parsers/html/item_list.rb +2 -3
- data/lib/aranha/parsers/html/node/default.rb +1 -1
- data/lib/aranha/parsers/html/node.rb +11 -0
- data/lib/aranha/parsers/html.rb +11 -0
- data/lib/aranha/parsers/source_address/http_get.rb +6 -1
- data/lib/aranha/parsers/spec/source_target_fixtures.rb +2 -2
- data/lib/aranha/parsers.rb +9 -0
- data/lib/aranha/processor.rb +1 -1
- data/lib/aranha/selenium/driver_factory.rb +1 -0
- data/lib/aranha/selenium.rb +7 -0
- data/lib/aranha/version.rb +1 -1
- data/lib/aranha.rb +8 -12
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86615d49ac9b469d088f1fe3404db6efc98bdc19a0f0cd484bd968d1a68e090c
|
4
|
+
data.tar.gz: 32f5a13f4b3e5d3d17867aa2f911f85cda43b9ed4fc1d55ce7b29fe8fb3e8c2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77697e1e182931f7bda03c402bef371f6f89d8cf963960210f80b5d82c665a912d96e0030c30be13d4c10d7c8159cc8d003571ee694e77df08e35d13b7b55838
|
7
|
+
data.tar.gz: 48a89f0b5b20c5ba77d18fe1744b7f3ba1cb964e2de020b2ea38b21a851224834bae19cefd346e9e070408e0c510f7c7cfca11a44e33dcb45f26addf435c9dfd
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require 'aranha/dom_elements_traverser/conditions'
|
4
|
+
require 'aranha/dom_elements_traverser/data'
|
5
|
+
require 'aranha/dom_elements_traverser/cursor'
|
6
6
|
|
7
7
|
module Aranha
|
8
8
|
class DomElementsTraverser
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'aranha/parsers/base'
|
3
4
|
require 'aranha/parsers/source_address'
|
5
|
+
require 'aranha/parsers/spec/source_target_fixtures'
|
4
6
|
|
5
7
|
module Aranha
|
6
8
|
module Fixtures
|
@@ -29,7 +31,7 @@ module Aranha
|
|
29
31
|
|
30
32
|
def select_path?(path)
|
31
33
|
return false unless match_prefix_pattern(path)
|
32
|
-
!pending ||
|
34
|
+
!pending || !source_exist?(path)
|
33
35
|
end
|
34
36
|
|
35
37
|
def match_prefix_pattern(path)
|
@@ -56,6 +58,11 @@ module Aranha
|
|
56
58
|
def relative_path(path)
|
57
59
|
path.sub(%r{^#{Regexp.quote(fixtures_root)}/}, '')
|
58
60
|
end
|
61
|
+
|
62
|
+
def source_exist?(path)
|
63
|
+
stf = ::Aranha::Spec::SourceTargetFixtures.new(::File.dirname(path))
|
64
|
+
stf.source_file(::File.basename(path, '.url')).present?
|
65
|
+
end
|
59
66
|
end
|
60
67
|
end
|
61
68
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require 'aranha/parsers/html/base'
|
4
4
|
|
5
5
|
module Aranha
|
6
6
|
module Parsers
|
@@ -13,8 +13,7 @@ module Aranha
|
|
13
13
|
node_parser.parse(m)
|
14
14
|
end
|
15
15
|
rescue StandardError => e
|
16
|
-
e.message
|
17
|
-
raise e
|
16
|
+
raise StandardError, "#{e.message} (Count: #{count})"
|
18
17
|
end
|
19
18
|
|
20
19
|
def items_xpath
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'addressable'
|
3
4
|
require 'net/http'
|
4
5
|
|
5
6
|
module Aranha
|
@@ -7,6 +8,10 @@ module Aranha
|
|
7
8
|
class SourceAddress
|
8
9
|
class HttpGet
|
9
10
|
class << self
|
11
|
+
def location_uri(source_uri, location)
|
12
|
+
::Addressable::URI.join(source_uri, location).to_s
|
13
|
+
end
|
14
|
+
|
10
15
|
def valid_source?(source)
|
11
16
|
source.to_s =~ %r{\Ahttps?://}
|
12
17
|
end
|
@@ -45,7 +50,7 @@ module Aranha
|
|
45
50
|
when Net::HTTPSuccess then
|
46
51
|
response.body
|
47
52
|
when Net::HTTPRedirection then
|
48
|
-
content_fetch(response['location'], limit - 1)
|
53
|
+
content_fetch(self.class.location_uri(uri, response['location']), limit - 1)
|
49
54
|
else
|
50
55
|
response.value
|
51
56
|
end
|
@@ -34,8 +34,6 @@ module Aranha
|
|
34
34
|
r
|
35
35
|
end
|
36
36
|
|
37
|
-
private
|
38
|
-
|
39
37
|
def target_file(basename)
|
40
38
|
fixture_file(basename, 'target')
|
41
39
|
end
|
@@ -44,6 +42,8 @@ module Aranha
|
|
44
42
|
fixture_file(basename, 'source')
|
45
43
|
end
|
46
44
|
|
45
|
+
private
|
46
|
+
|
47
47
|
def fixture_file(basename, suffix)
|
48
48
|
prefix = "#{basename}.#{suffix}"
|
49
49
|
Dir.foreach(fixtures_directory) do |item|
|
data/lib/aranha/processor.rb
CHANGED
data/lib/aranha/version.rb
CHANGED
data/lib/aranha.rb
CHANGED
@@ -2,18 +2,14 @@
|
|
2
2
|
|
3
3
|
require 'httpclient'
|
4
4
|
require 'active_support/dependencies'
|
5
|
-
|
6
|
-
require_dependency 'active_scaffold'
|
5
|
+
require 'active_scaffold'
|
7
6
|
|
8
7
|
module Aranha
|
8
|
+
require 'aranha/default_processor'
|
9
|
+
require 'aranha/dom_elements_traverser'
|
10
|
+
require 'aranha/engine'
|
11
|
+
require 'aranha/fixtures'
|
12
|
+
require 'aranha/processor'
|
13
|
+
require 'aranha/parsers'
|
14
|
+
require 'aranha/selenium'
|
9
15
|
end
|
10
|
-
|
11
|
-
require_dependency 'aranha/default_processor'
|
12
|
-
require_dependency 'aranha/fixtures/download'
|
13
|
-
require_dependency 'aranha/processor'
|
14
|
-
require_dependency 'aranha/parsers/base'
|
15
|
-
require_dependency 'aranha/parsers/html/base'
|
16
|
-
require_dependency 'aranha/parsers/html/item_list'
|
17
|
-
require_dependency 'aranha/parsers/invalid_state_exception'
|
18
|
-
require_dependency 'aranha/dom_elements_traverser'
|
19
|
-
require_dependency 'aranha/selenium/driver_factory'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -141,11 +141,15 @@ files:
|
|
141
141
|
- lib/aranha/dom_elements_traverser/cursor.rb
|
142
142
|
- lib/aranha/dom_elements_traverser/data.rb
|
143
143
|
- lib/aranha/engine.rb
|
144
|
+
- lib/aranha/fixtures.rb
|
144
145
|
- lib/aranha/fixtures/download.rb
|
146
|
+
- lib/aranha/parsers.rb
|
145
147
|
- lib/aranha/parsers/base.rb
|
148
|
+
- lib/aranha/parsers/html.rb
|
146
149
|
- lib/aranha/parsers/html/base.rb
|
147
150
|
- lib/aranha/parsers/html/item.rb
|
148
151
|
- lib/aranha/parsers/html/item_list.rb
|
152
|
+
- lib/aranha/parsers/html/node.rb
|
149
153
|
- lib/aranha/parsers/html/node/base.rb
|
150
154
|
- lib/aranha/parsers/html/node/default.rb
|
151
155
|
- lib/aranha/parsers/invalid_state_exception.rb
|
@@ -157,6 +161,7 @@ files:
|
|
157
161
|
- lib/aranha/parsers/spec/source_target_fixtures.rb
|
158
162
|
- lib/aranha/parsers/spec/source_target_fixtures_example.rb
|
159
163
|
- lib/aranha/processor.rb
|
164
|
+
- lib/aranha/selenium.rb
|
160
165
|
- lib/aranha/selenium/driver_factory.rb
|
161
166
|
- lib/aranha/selenium/driver_factory/base.rb
|
162
167
|
- lib/aranha/selenium/driver_factory/chrome.rb
|
@@ -194,5 +199,5 @@ specification_version: 4
|
|
194
199
|
summary: Rails utilities for web crawling.
|
195
200
|
test_files:
|
196
201
|
- test/aranha_test.rb
|
197
|
-
- test/test_helper.rb
|
198
202
|
- test/integration/navigation_test.rb
|
203
|
+
- test/test_helper.rb
|