aranha 0.10.1 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aranha/selenium/driver_factory/base.rb +4 -0
- data/lib/aranha/selenium/driver_factory/chrome.rb +1 -0
- data/lib/aranha/selenium/driver_factory/firefox.rb +1 -0
- data/lib/aranha/version.rb +1 -1
- metadata +17 -20
- data/lib/aranha/parsers.rb +0 -9
- data/lib/aranha/parsers/base.rb +0 -58
- data/lib/aranha/parsers/html.rb +0 -11
- data/lib/aranha/parsers/html/base.rb +0 -47
- data/lib/aranha/parsers/html/item.rb +0 -23
- data/lib/aranha/parsers/html/item_list.rb +0 -25
- data/lib/aranha/parsers/html/node.rb +0 -11
- data/lib/aranha/parsers/html/node/base.rb +0 -30
- data/lib/aranha/parsers/html/node/default.rb +0 -93
- data/lib/aranha/parsers/invalid_state_exception.rb +0 -8
- data/lib/aranha/parsers/source_address.rb +0 -55
- data/lib/aranha/parsers/source_address/file.rb +0 -31
- data/lib/aranha/parsers/source_address/hash_http_get.rb +0 -25
- data/lib/aranha/parsers/source_address/hash_http_post.rb +0 -45
- data/lib/aranha/parsers/source_address/http_get.rb +0 -61
- data/lib/aranha/parsers/spec/source_target_fixtures.rb +0 -67
- data/lib/aranha/parsers/spec/source_target_fixtures_example.rb +0 -61
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dea7f9194a220cad9e2f168f4fa9a32b17353f45233eef8d0698788535317228
|
4
|
+
data.tar.gz: 40fa4399386bd6b1f2e52fc914cbb8fb436099f29c5493f404aa219b40cfb586
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1955d0bbaa9fffbe63c24814bad2e7f97a1c960e1b4e5041ca1775fc10a12e8eb3653c9d73c706011805de7af4608cad3a2f5a10548ae3434d91a199eb0b9255
|
7
|
+
data.tar.gz: 532b2cd21a97ae55873aa996815947cf3759212b0279081dac374a23d2f9f390b059b422dd9ee5e2a2627e0e66de524702ffd817c0910063b8c376b22e47b1f3
|
@@ -19,6 +19,7 @@ module Aranha
|
|
19
19
|
r.add_argument('--headless') if headless?
|
20
20
|
r.add_argument('--disable-popup-blocking')
|
21
21
|
r.add_argument('--disable-translate')
|
22
|
+
r.add_argument("user-agent=#{user_agent}") if user_agent.present?
|
22
23
|
r.add_preference(:download, prompt_for_download: false, default_directory: downloads_dir)
|
23
24
|
r
|
24
25
|
end
|
data/lib/aranha/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 3.4.41.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: aranha-parsers
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: eac_ruby_utils
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -149,23 +163,6 @@ files:
|
|
149
163
|
- lib/aranha/engine.rb
|
150
164
|
- lib/aranha/fixtures.rb
|
151
165
|
- lib/aranha/fixtures/download.rb
|
152
|
-
- lib/aranha/parsers.rb
|
153
|
-
- lib/aranha/parsers/base.rb
|
154
|
-
- lib/aranha/parsers/html.rb
|
155
|
-
- lib/aranha/parsers/html/base.rb
|
156
|
-
- lib/aranha/parsers/html/item.rb
|
157
|
-
- lib/aranha/parsers/html/item_list.rb
|
158
|
-
- lib/aranha/parsers/html/node.rb
|
159
|
-
- lib/aranha/parsers/html/node/base.rb
|
160
|
-
- lib/aranha/parsers/html/node/default.rb
|
161
|
-
- lib/aranha/parsers/invalid_state_exception.rb
|
162
|
-
- lib/aranha/parsers/source_address.rb
|
163
|
-
- lib/aranha/parsers/source_address/file.rb
|
164
|
-
- lib/aranha/parsers/source_address/hash_http_get.rb
|
165
|
-
- lib/aranha/parsers/source_address/hash_http_post.rb
|
166
|
-
- lib/aranha/parsers/source_address/http_get.rb
|
167
|
-
- lib/aranha/parsers/spec/source_target_fixtures.rb
|
168
|
-
- lib/aranha/parsers/spec/source_target_fixtures_example.rb
|
169
166
|
- lib/aranha/processor.rb
|
170
167
|
- lib/aranha/selenium.rb
|
171
168
|
- lib/aranha/selenium/driver_factory.rb
|
@@ -205,5 +202,5 @@ specification_version: 4
|
|
205
202
|
summary: Rails utilities for web crawling.
|
206
203
|
test_files:
|
207
204
|
- test/aranha_test.rb
|
208
|
-
- test/test_helper.rb
|
209
205
|
- test/integration/navigation_test.rb
|
206
|
+
- test/test_helper.rb
|
data/lib/aranha/parsers.rb
DELETED
data/lib/aranha/parsers/base.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'open-uri'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'aranha/parsers/source_address'
|
6
|
-
|
7
|
-
module Aranha
|
8
|
-
module Parsers
|
9
|
-
class Base
|
10
|
-
LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
|
11
|
-
|
12
|
-
attr_reader :source_address
|
13
|
-
|
14
|
-
def initialize(url)
|
15
|
-
@source_address = ::Aranha::Parsers::SourceAddress.new(url)
|
16
|
-
log_content(source_address.serialize, '-source-address')
|
17
|
-
end
|
18
|
-
|
19
|
-
delegate :url, to: :source_address
|
20
|
-
|
21
|
-
def content
|
22
|
-
s = source_address.content
|
23
|
-
log_content(s)
|
24
|
-
s
|
25
|
-
end
|
26
|
-
|
27
|
-
private
|
28
|
-
|
29
|
-
def log_content(content, suffix = '')
|
30
|
-
path = log_file(suffix)
|
31
|
-
|
32
|
-
return unless path
|
33
|
-
File.open(path, 'wb') { |file| file.write(content) }
|
34
|
-
end
|
35
|
-
|
36
|
-
def log_file(suffix)
|
37
|
-
dir = log_parsers_dir
|
38
|
-
return nil unless dir
|
39
|
-
f = ::File.join(dir, "#{self.class.name.parameterize}#{suffix}.log")
|
40
|
-
FileUtils.mkdir_p(File.dirname(f))
|
41
|
-
f
|
42
|
-
end
|
43
|
-
|
44
|
-
def log_parsers_dir
|
45
|
-
return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
|
46
|
-
return ::Rails.root.join('log', 'parsers') if rails_root_exist?
|
47
|
-
nil
|
48
|
-
end
|
49
|
-
|
50
|
-
def rails_root_exist?
|
51
|
-
::Rails.root
|
52
|
-
true
|
53
|
-
rescue NameError
|
54
|
-
return false
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
data/lib/aranha/parsers/html.rb
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'aranha/parsers/base'
|
5
|
-
require 'aranha/parsers/html/node/default'
|
6
|
-
|
7
|
-
module Aranha
|
8
|
-
module Parsers
|
9
|
-
module Html
|
10
|
-
class Base < ::Aranha::Parsers::Base
|
11
|
-
class << self
|
12
|
-
def fields
|
13
|
-
@fields ||= []
|
14
|
-
@fields.dup
|
15
|
-
end
|
16
|
-
|
17
|
-
def field(name, type, xpath)
|
18
|
-
@fields ||= []
|
19
|
-
@fields << Field.new(name, type, xpath)
|
20
|
-
end
|
21
|
-
|
22
|
-
Field = Struct.new(:name, :type, :xpath)
|
23
|
-
end
|
24
|
-
|
25
|
-
def nokogiri
|
26
|
-
@nokogiri ||= Nokogiri::HTML(content, &:noblanks)
|
27
|
-
end
|
28
|
-
|
29
|
-
protected
|
30
|
-
|
31
|
-
def node_parser_class
|
32
|
-
::Aranha::Parsers::Html::Node::Default
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def node_parser
|
38
|
-
@node_parser ||= node_parser_class.new(fields)
|
39
|
-
end
|
40
|
-
|
41
|
-
def fields
|
42
|
-
self.class.fields.map { |f| [f.name, f.type, f.xpath] }
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'aranha/parsers/html/base'
|
4
|
-
|
5
|
-
module Aranha
|
6
|
-
module Parsers
|
7
|
-
module Html
|
8
|
-
class Item < Base
|
9
|
-
def data
|
10
|
-
@data ||= node_parser.parse(item_node)
|
11
|
-
end
|
12
|
-
|
13
|
-
def item_node
|
14
|
-
@item_node ||= begin
|
15
|
-
r = item_xpath ? nokogiri.at_xpath(item_xpath) : nokogiri
|
16
|
-
raise "Item node not found (Item xpath: #{item_xpath})" unless r
|
17
|
-
r
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'aranha/parsers/html/base'
|
4
|
-
|
5
|
-
module Aranha
|
6
|
-
module Parsers
|
7
|
-
module Html
|
8
|
-
class ItemList < Base
|
9
|
-
def data
|
10
|
-
count = 0
|
11
|
-
@data ||= nokogiri.xpath(items_xpath).map do |m|
|
12
|
-
count += 1
|
13
|
-
node_parser.parse(m)
|
14
|
-
end
|
15
|
-
rescue StandardError => e
|
16
|
-
raise StandardError, "#{e.message} (Count: #{count})"
|
17
|
-
end
|
18
|
-
|
19
|
-
def items_xpath
|
20
|
-
raise "Class #{self.class} has no method \"item_xpath\". Implement it"
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Aranha
|
4
|
-
module Parsers
|
5
|
-
module Html
|
6
|
-
module Node
|
7
|
-
class Base
|
8
|
-
attr_reader :fields
|
9
|
-
|
10
|
-
def initialize(fields)
|
11
|
-
@fields = fields
|
12
|
-
end
|
13
|
-
|
14
|
-
def parse(node)
|
15
|
-
Hash[fields.map { |f| [f[0], parse_field(node, f[2], f[1])] }]
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def parse_field(node, xpath, parser_method)
|
21
|
-
value_method = "#{parser_method}_value"
|
22
|
-
return send(value_method, node, xpath) if respond_to?(value_method)
|
23
|
-
|
24
|
-
raise "Method \"#{value_method}\" not found in #{self.class}"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
@@ -1,93 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'aranha/parsers/html/node/base'
|
4
|
-
|
5
|
-
module Aranha
|
6
|
-
module Parsers
|
7
|
-
module Html
|
8
|
-
module Node
|
9
|
-
class Default < ::Aranha::Parsers::Html::Node::Base
|
10
|
-
def string_value(node, xpath)
|
11
|
-
if node.at_xpath(xpath)
|
12
|
-
node.at_xpath(xpath).text.to_s.tr("\u00A0", ' ').strip
|
13
|
-
else
|
14
|
-
''
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def quoted_value(node, xpath)
|
19
|
-
s = string_value(node, xpath)
|
20
|
-
return '' unless s
|
21
|
-
|
22
|
-
m = /\"([^\"]+)\"/.match(s)
|
23
|
-
return m[1] if m
|
24
|
-
|
25
|
-
''
|
26
|
-
end
|
27
|
-
|
28
|
-
def integer_value(node, xpath)
|
29
|
-
r = string_value(node, xpath)
|
30
|
-
return nil if r.blank?
|
31
|
-
|
32
|
-
m = /\d+/.match(r)
|
33
|
-
raise "Integer not found in \"#{r}\"" unless m
|
34
|
-
|
35
|
-
m[0].to_i
|
36
|
-
end
|
37
|
-
|
38
|
-
def integer_optional_value(node, xpath)
|
39
|
-
r = string_value(node, xpath)
|
40
|
-
m = /\d+/.match(r)
|
41
|
-
m ? m[0].to_i : nil
|
42
|
-
end
|
43
|
-
|
44
|
-
def float_value(node, xpath)
|
45
|
-
parse_float(node, xpath, true)
|
46
|
-
end
|
47
|
-
|
48
|
-
def float_optional_value(node, xpath)
|
49
|
-
parse_float(node, xpath, false)
|
50
|
-
end
|
51
|
-
|
52
|
-
def array_value(node, xpath)
|
53
|
-
r = node.xpath(xpath).map { |n| n.text.strip }
|
54
|
-
r.join('|')
|
55
|
-
end
|
56
|
-
|
57
|
-
def join_value(node, xpath)
|
58
|
-
m = ''
|
59
|
-
node.xpath(xpath).each do |n|
|
60
|
-
m << n.text.strip
|
61
|
-
end
|
62
|
-
m
|
63
|
-
end
|
64
|
-
|
65
|
-
def duration_value(node, xpath)
|
66
|
-
m = /(\d+) m/.match(join_value(node, xpath))
|
67
|
-
m ? m[1].to_i : nil
|
68
|
-
end
|
69
|
-
|
70
|
-
def regxep(node, xpath, pattern)
|
71
|
-
s = string_value(node, xpath)
|
72
|
-
m = pattern.match(s)
|
73
|
-
return m if m
|
74
|
-
|
75
|
-
raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
|
76
|
-
end
|
77
|
-
|
78
|
-
private
|
79
|
-
|
80
|
-
def parse_float(node, xpath, required)
|
81
|
-
s = string_value(node, xpath)
|
82
|
-
m = /\d+(?:[\.\,](\d+))?/.match(s)
|
83
|
-
if m
|
84
|
-
m[0].sub(',', '.').to_f
|
85
|
-
elsif required
|
86
|
-
raise "Float value not found in \"#{s}\""
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
@@ -1,55 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'yaml'
|
4
|
-
require 'active_support/core_ext/module/delegation'
|
5
|
-
require 'aranha/parsers/source_address/hash_http_get'
|
6
|
-
require 'aranha/parsers/source_address/hash_http_post'
|
7
|
-
require 'aranha/parsers/source_address/http_get'
|
8
|
-
require 'aranha/parsers/source_address/file'
|
9
|
-
|
10
|
-
module Aranha
|
11
|
-
module Parsers
|
12
|
-
class SourceAddress
|
13
|
-
class << self
|
14
|
-
SUBS = [
|
15
|
-
::Aranha::Parsers::SourceAddress::HashHttpGet,
|
16
|
-
::Aranha::Parsers::SourceAddress::HashHttpPost,
|
17
|
-
::Aranha::Parsers::SourceAddress::HttpGet,
|
18
|
-
::Aranha::Parsers::SourceAddress::File
|
19
|
-
].freeze
|
20
|
-
|
21
|
-
def detect_sub(source)
|
22
|
-
return source.sub if source.is_a?(self)
|
23
|
-
SUBS.each do |sub|
|
24
|
-
return sub.new(source) if sub.valid_source?(source)
|
25
|
-
end
|
26
|
-
raise "No content fetcher found for source \"#{source}\""
|
27
|
-
end
|
28
|
-
|
29
|
-
def deserialize(string)
|
30
|
-
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string))
|
31
|
-
end
|
32
|
-
|
33
|
-
def from_file(path)
|
34
|
-
deserialize(::File.read(path))
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
attr_reader :sub
|
39
|
-
|
40
|
-
def initialize(source)
|
41
|
-
@sub = self.class.detect_sub(source)
|
42
|
-
end
|
43
|
-
|
44
|
-
delegate :content, :url, to: :sub
|
45
|
-
|
46
|
-
def to_s
|
47
|
-
sub.url
|
48
|
-
end
|
49
|
-
|
50
|
-
def serialize
|
51
|
-
sub.serialize.strip + "\n"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'aranha/parsers/source_address/http_get'
|
4
|
-
|
5
|
-
module Aranha
|
6
|
-
module Parsers
|
7
|
-
class SourceAddress
|
8
|
-
class File < ::Aranha::Parsers::SourceAddress::HttpGet
|
9
|
-
SCHEME = 'file://'
|
10
|
-
|
11
|
-
class << self
|
12
|
-
def valid_source?(source)
|
13
|
-
source.to_s.start_with?(SCHEME + '/', '/')
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def initialize(source)
|
18
|
-
super source.to_s.gsub(/\A#{Regexp.quote(SCHEME)}/, '')
|
19
|
-
end
|
20
|
-
|
21
|
-
def url
|
22
|
-
"#{SCHEME}#{source}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def content
|
26
|
-
::File.open(source, &:read)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'aranha/parsers/source_address/hash_http_post'
|
4
|
-
|
5
|
-
module Aranha
|
6
|
-
module Parsers
|
7
|
-
class SourceAddress
|
8
|
-
class HashHttpGet < ::Aranha::Parsers::SourceAddress::HashHttpPost
|
9
|
-
class << self
|
10
|
-
def valid_source?(source)
|
11
|
-
source.is_a?(::Hash) &&
|
12
|
-
source.with_indifferent_access[:method].to_s.downcase.strip == 'get'
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def content
|
17
|
-
HTTPClient.new.get_content(
|
18
|
-
source[:url],
|
19
|
-
source[:params]
|
20
|
-
)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'active_support/core_ext/hash/indifferent_access'
|
4
|
-
require 'httpclient'
|
5
|
-
require 'yaml'
|
6
|
-
|
7
|
-
module Aranha
|
8
|
-
module Parsers
|
9
|
-
class SourceAddress
|
10
|
-
class HashHttpPost
|
11
|
-
class << self
|
12
|
-
def valid_source?(source)
|
13
|
-
source.is_a?(::Hash) &&
|
14
|
-
source.with_indifferent_access[:method].to_s.downcase.strip == 'post'
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
attr_reader :source
|
19
|
-
|
20
|
-
def initialize(source)
|
21
|
-
@source = source.with_indifferent_access
|
22
|
-
end
|
23
|
-
|
24
|
-
def ==(other)
|
25
|
-
self.class == other.class && source == other.source
|
26
|
-
end
|
27
|
-
|
28
|
-
def url
|
29
|
-
source.fetch(:url)
|
30
|
-
end
|
31
|
-
|
32
|
-
def serialize
|
33
|
-
source.to_yaml
|
34
|
-
end
|
35
|
-
|
36
|
-
def content
|
37
|
-
HTTPClient.new.post_content(
|
38
|
-
source[:url],
|
39
|
-
source[:params].merge(follow_redirect: true)
|
40
|
-
)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'addressable'
|
4
|
-
require 'net/http'
|
5
|
-
|
6
|
-
module Aranha
|
7
|
-
module Parsers
|
8
|
-
class SourceAddress
|
9
|
-
class HttpGet
|
10
|
-
class << self
|
11
|
-
def location_uri(source_uri, location)
|
12
|
-
::Addressable::URI.join(source_uri, location).to_s
|
13
|
-
end
|
14
|
-
|
15
|
-
def valid_source?(source)
|
16
|
-
source.to_s =~ %r{\Ahttps?://}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
attr_reader :source
|
21
|
-
|
22
|
-
def initialize(source)
|
23
|
-
@source = source.to_s
|
24
|
-
end
|
25
|
-
|
26
|
-
def ==(other)
|
27
|
-
self.class == other.class && source == other.source
|
28
|
-
end
|
29
|
-
|
30
|
-
def url
|
31
|
-
source
|
32
|
-
end
|
33
|
-
|
34
|
-
def content
|
35
|
-
content_fetch(url)
|
36
|
-
end
|
37
|
-
|
38
|
-
def serialize
|
39
|
-
url
|
40
|
-
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
def content_fetch(uri, limit = 10)
|
45
|
-
raise 'too many HTTP redirects' if limit.zero?
|
46
|
-
|
47
|
-
response = Net::HTTP.get_response(URI(uri))
|
48
|
-
|
49
|
-
case response
|
50
|
-
when Net::HTTPSuccess then
|
51
|
-
response.body
|
52
|
-
when Net::HTTPRedirection then
|
53
|
-
content_fetch(self.class.location_uri(uri, response['location']), limit - 1)
|
54
|
-
else
|
55
|
-
response.value
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
module Aranha
|
7
|
-
module Spec
|
8
|
-
# Lists pairs of source/target files in a directory.
|
9
|
-
class SourceTargetFixtures
|
10
|
-
class << self
|
11
|
-
def source_target_basename(file)
|
12
|
-
m = /^(.+)\.(?:source|target)(?:\..+)?$/.match(File.basename(file))
|
13
|
-
m ? m[1] : nil
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
attr_reader :fixtures_directory
|
18
|
-
|
19
|
-
def initialize(fixtures_directory)
|
20
|
-
@fixtures_directory = fixtures_directory
|
21
|
-
end
|
22
|
-
|
23
|
-
def source_target_files
|
24
|
-
sources_targets_basenames.map do |basename|
|
25
|
-
OpenStruct.new(source: source_file(basename), target: target_file(basename))
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def source_files
|
30
|
-
r = []
|
31
|
-
source_target_files.each do |st|
|
32
|
-
r << st.source if st.source
|
33
|
-
end
|
34
|
-
r
|
35
|
-
end
|
36
|
-
|
37
|
-
def target_file(basename)
|
38
|
-
fixture_file(basename, 'target')
|
39
|
-
end
|
40
|
-
|
41
|
-
def source_file(basename)
|
42
|
-
fixture_file(basename, 'source')
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
|
-
def fixture_file(basename, suffix)
|
48
|
-
prefix = "#{basename}.#{suffix}"
|
49
|
-
Dir.foreach(fixtures_directory) do |item|
|
50
|
-
next if item == '.' || item == '..'
|
51
|
-
return File.expand_path(item, fixtures_directory) if item.starts_with?(prefix)
|
52
|
-
end
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
|
56
|
-
def sources_targets_basenames
|
57
|
-
basenames = Set.new
|
58
|
-
Dir.foreach(fixtures_directory) do |item|
|
59
|
-
next if item == '.' || item == '..'
|
60
|
-
b = self.class.source_target_basename(item)
|
61
|
-
basenames << b if b.present?
|
62
|
-
end
|
63
|
-
basenames
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'source_target_fixtures'
|
4
|
-
|
5
|
-
RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable Metrics/BlockLength
|
6
|
-
let(:spec_file) { spec_file }
|
7
|
-
|
8
|
-
it 'fixtures directory should exist' do
|
9
|
-
expect(::File.directory?(fixtures_dir)).to be true
|
10
|
-
end
|
11
|
-
|
12
|
-
context 'in fixtures directory' do
|
13
|
-
it 'should have at least one file' do
|
14
|
-
expect(source_target_fixtures.source_target_files.count).to be > 0
|
15
|
-
end
|
16
|
-
|
17
|
-
if ENV['WRITE_TARGET_FIXTURES']
|
18
|
-
it 'should write target data for all files' do
|
19
|
-
source_target_fixtures.source_files.each do |source_file|
|
20
|
-
sd = sort_results(source_data(source_file))
|
21
|
-
basename = ::Aranha::Spec::SourceTargetFixtures.source_target_basename(source_file)
|
22
|
-
target_file = File.expand_path("../#{basename}.target.yaml", source_file)
|
23
|
-
File.write(target_file, sd.to_yaml)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
else
|
27
|
-
it 'should parse data for all files' do
|
28
|
-
source_target_fixtures.source_target_files.each do |st|
|
29
|
-
assert_source_target_complete(st)
|
30
|
-
sd = source_data(st.source)
|
31
|
-
td = YAML.load_file(st.target)
|
32
|
-
expect(sort_results(sd)).to eq(sort_results(td))
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def source_target_fixtures
|
39
|
-
@source_target_fixtures ||= ::Aranha::Spec::SourceTargetFixtures.new(fixtures_dir)
|
40
|
-
end
|
41
|
-
|
42
|
-
def assert_source_target_complete(st)
|
43
|
-
expect(st.source).to(be_truthy, "Source not found (Target: #{st.target})")
|
44
|
-
expect(st.target).to(be_truthy, "Target not found (Source: #{st.source})")
|
45
|
-
end
|
46
|
-
|
47
|
-
def source_data(source_file)
|
48
|
-
described_class.new(source_file).data
|
49
|
-
end
|
50
|
-
|
51
|
-
def fixtures_dir
|
52
|
-
::File.join(
|
53
|
-
::File.dirname(spec_file),
|
54
|
-
::File.basename(spec_file, '.*') + '_files'
|
55
|
-
)
|
56
|
-
end
|
57
|
-
|
58
|
-
def sort_results(r)
|
59
|
-
r
|
60
|
-
end
|
61
|
-
end
|