aranha-parsers 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aranha/parsers/html/item_list.rb +5 -1
- data/lib/aranha/parsers/source_address/hash_http_base.rb +86 -0
- data/lib/aranha/parsers/source_address/hash_http_get.rb +3 -15
- data/lib/aranha/parsers/source_address/hash_http_post.rb +3 -35
- data/lib/aranha/parsers/version.rb +1 -1
- metadata +11 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 955f0df6b2a59762cbdd597dd17ae6608a880ee74f4547449ad06bbef4cf9f09
|
4
|
+
data.tar.gz: 95e891f8db0aa252f3e385956e783c48e08e42e68b3e6666491356eb2e60618a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5e7266770218b0698edb3111db90d797a24bb3e0f5ee54d5728a5810a87559a22a3a9f3d368bb28fc83fda8c6cbc04c99cab44c50ee25ea76e7550ea4753249
|
7
|
+
data.tar.gz: 66873b24e21f3c3e43a4ae49b82997c6e401a4e500274e2fc76e4ba91b013eae1a4f6b8641880f94896f1dab40bf23c86b24e226c14c2b92afe024d2d18aa230
|
@@ -10,11 +10,15 @@ module Aranha
|
|
10
10
|
items_data
|
11
11
|
end
|
12
12
|
|
13
|
+
def item_data(item)
|
14
|
+
item
|
15
|
+
end
|
16
|
+
|
13
17
|
def items_data
|
14
18
|
count = 0
|
15
19
|
@data ||= nokogiri.xpath(items_xpath).map do |m|
|
16
20
|
count += 1
|
17
|
-
node_parser.parse(m)
|
21
|
+
item_data(node_parser.parse(m))
|
18
22
|
end
|
19
23
|
rescue StandardError => e
|
20
24
|
raise StandardError, "#{e.message} (Count: #{count})"
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aranha/parsers/source_address/fetch_content_error'
|
4
|
+
require 'aranha/parsers/source_address/hash_http_base'
|
5
|
+
require 'eac_ruby_utils/core_ext'
|
6
|
+
require 'faraday_middleware'
|
7
|
+
require 'yaml'
|
8
|
+
|
9
|
+
module Aranha
|
10
|
+
module Parsers
|
11
|
+
class SourceAddress
|
12
|
+
class HashHttpBase
|
13
|
+
class << self
|
14
|
+
def http_method
|
15
|
+
const_get 'HTTP_METHOD'
|
16
|
+
end
|
17
|
+
|
18
|
+
def valid_source?(source)
|
19
|
+
source.is_a?(::Hash) &&
|
20
|
+
source.with_indifferent_access[:method].to_s.downcase.strip == http_method.to_s
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
DEFAULT_BODY = ''
|
25
|
+
DEFAULT_FOLLOW_REDIRECT = true
|
26
|
+
DEFAULT_HEADERS = {}.freeze
|
27
|
+
DEFAULT_PARAMS = {}.freeze
|
28
|
+
|
29
|
+
common_constructor :source do
|
30
|
+
self.source = source.with_indifferent_access
|
31
|
+
end
|
32
|
+
compare_by :source
|
33
|
+
|
34
|
+
def body
|
35
|
+
param(:body, DEFAULT_BODY)
|
36
|
+
end
|
37
|
+
|
38
|
+
def follow_redirect?
|
39
|
+
param(:follow_redirect, DEFAULT_FOLLOW_REDIRECT)
|
40
|
+
end
|
41
|
+
|
42
|
+
def headers
|
43
|
+
param(:headers, DEFAULT_HEADERS)
|
44
|
+
end
|
45
|
+
|
46
|
+
def url
|
47
|
+
source.fetch(:url)
|
48
|
+
end
|
49
|
+
|
50
|
+
def serialize
|
51
|
+
source.to_yaml
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Faraday]
|
55
|
+
def faraday_connection
|
56
|
+
::Faraday.new do |f|
|
57
|
+
f.response :follow_redirects if follow_redirect?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def faraday_request
|
62
|
+
faraday_connection.send(self.class.http_method, url) do |req|
|
63
|
+
headers.if_present { |v| req.headers = v }
|
64
|
+
body.if_present { |v| req.body = v }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def content
|
69
|
+
req = faraday_request
|
70
|
+
return req.body if req.status == 200
|
71
|
+
|
72
|
+
raise ::Aranha::Parsers::SourceAddress::FetchContentError,
|
73
|
+
"Get #{url} returned #{req.status.to_i}"
|
74
|
+
end
|
75
|
+
|
76
|
+
def param(key, default_value)
|
77
|
+
source[key] || params[key] || default_value
|
78
|
+
end
|
79
|
+
|
80
|
+
def params
|
81
|
+
source[:params].if_present(DEFAULT_PARAMS)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -1,24 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'aranha/parsers/source_address/
|
3
|
+
require 'aranha/parsers/source_address/hash_http_base'
|
4
4
|
|
5
5
|
module Aranha
|
6
6
|
module Parsers
|
7
7
|
class SourceAddress
|
8
|
-
class HashHttpGet < ::Aranha::Parsers::SourceAddress::
|
9
|
-
|
10
|
-
def valid_source?(source)
|
11
|
-
source.is_a?(::Hash) &&
|
12
|
-
source.with_indifferent_access[:method].to_s.downcase.strip == 'get'
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def content
|
17
|
-
HTTPClient.new.get_content(
|
18
|
-
source[:url],
|
19
|
-
source[:params]
|
20
|
-
)
|
21
|
-
end
|
8
|
+
class HashHttpGet < ::Aranha::Parsers::SourceAddress::HashHttpBase
|
9
|
+
HTTP_METHOD = :get
|
22
10
|
end
|
23
11
|
end
|
24
12
|
end
|
@@ -1,44 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
4
|
-
require 'httpclient'
|
5
|
-
require 'yaml'
|
3
|
+
require 'aranha/parsers/source_address/hash_http_base'
|
6
4
|
|
7
5
|
module Aranha
|
8
6
|
module Parsers
|
9
7
|
class SourceAddress
|
10
|
-
class HashHttpPost
|
11
|
-
|
12
|
-
def valid_source?(source)
|
13
|
-
source.is_a?(::Hash) &&
|
14
|
-
source.with_indifferent_access[:method].to_s.downcase.strip == 'post'
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
attr_reader :source
|
19
|
-
|
20
|
-
def initialize(source)
|
21
|
-
@source = source.with_indifferent_access
|
22
|
-
end
|
23
|
-
|
24
|
-
def ==(other)
|
25
|
-
self.class == other.class && source == other.source
|
26
|
-
end
|
27
|
-
|
28
|
-
def url
|
29
|
-
source.fetch(:url)
|
30
|
-
end
|
31
|
-
|
32
|
-
def serialize
|
33
|
-
source.to_yaml
|
34
|
-
end
|
35
|
-
|
36
|
-
def content
|
37
|
-
HTTPClient.new.post_content(
|
38
|
-
source[:url],
|
39
|
-
source[:params].merge(follow_redirect: true)
|
40
|
-
)
|
41
|
-
end
|
8
|
+
class HashHttpPost < ::Aranha::Parsers::SourceAddress::HashHttpBase
|
9
|
+
HTTP_METHOD = :post
|
42
10
|
end
|
43
11
|
end
|
44
12
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha-parsers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Esquilo Azul Company
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -58,14 +58,20 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
61
|
+
version: '0.92'
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 0.92.1
|
62
65
|
type: :runtime
|
63
66
|
prerelease: false
|
64
67
|
version_requirements: !ruby/object:Gem::Requirement
|
65
68
|
requirements:
|
66
69
|
- - "~>"
|
67
70
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0.
|
71
|
+
version: '0.92'
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 0.92.1
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: faraday_middleware
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,26 +86,6 @@ dependencies:
|
|
80
86
|
- - ">="
|
81
87
|
- !ruby/object:Gem::Version
|
82
88
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: httpclient
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '2.8'
|
90
|
-
- - ">="
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
version: 2.8.3
|
93
|
-
type: :runtime
|
94
|
-
prerelease: false
|
95
|
-
version_requirements: !ruby/object:Gem::Requirement
|
96
|
-
requirements:
|
97
|
-
- - "~>"
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
version: '2.8'
|
100
|
-
- - ">="
|
101
|
-
- !ruby/object:Gem::Version
|
102
|
-
version: 2.8.3
|
103
89
|
- !ruby/object:Gem::Dependency
|
104
90
|
name: nokogiri
|
105
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,6 +169,7 @@ files:
|
|
183
169
|
- lib/aranha/parsers/source_address.rb
|
184
170
|
- lib/aranha/parsers/source_address/fetch_content_error.rb
|
185
171
|
- lib/aranha/parsers/source_address/file.rb
|
172
|
+
- lib/aranha/parsers/source_address/hash_http_base.rb
|
186
173
|
- lib/aranha/parsers/source_address/hash_http_get.rb
|
187
174
|
- lib/aranha/parsers/source_address/hash_http_post.rb
|
188
175
|
- lib/aranha/parsers/source_address/http_get.rb
|