aranha-parsers 0.11.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea4e3ac094b66f5e1b02e6af4f102c752a869e0a234354b0badb3b2d666368c7
4
- data.tar.gz: 95de53aed0f9e7157894515f3b3c881c740c6a9d4e2e787d4c5bbb2bb4432294
3
+ metadata.gz: 2ec41f5c85e077409458835a5cbd8d244a2f74ee4edb5e3c8d02ae9a3919f66b
4
+ data.tar.gz: 1a64b3f50994e1ed230b36a90861895faea86875963693ff95256d49d563ec09
5
5
  SHA512:
6
- metadata.gz: 176470a9a8163f44654485f96285c254ecf8d6661f3b435f7af0bc6ccf076c8081cb0554f7b31d77a751f2ce9caa4592454d7b3b5de4cd522a008c2798ecdc7d
7
- data.tar.gz: ebc4b32f65dc83d6ba681a0af28babcd8375f4b4c9585985080fc5bcdb5f7bae653979bf21377f10075862c1ab7fa589cca4c1ce0b254b134f1d8d9e7c1f9d3e
6
+ metadata.gz: cfca3d50399e38f7eccdf8173e8f630ce7b05f255619bc603a65a1a0ca34c5534582b791e1d6e67a5dd91536e053dcb99df1acf32270e7b7ca12724e7fbbe302
7
+ data.tar.gz: 1deb6e809fe03032f681f79194e058709e822b31771293f6c796ab22ccc543836201771fccfc03ab6e1f3421370eefa1e28787a613972e248cb0ba6d3814bf40
@@ -12,7 +12,7 @@ module Aranha
12
12
  def from_content(content)
13
13
  ::EacRubyUtils::Fs::Temp.on_file do |path|
14
14
  ::File.open(path.to_s, 'w:UTF-8') do |f|
15
- f.write content.force_encoding('UTF-8')
15
+ f.write content.dup.force_encoding('UTF-8')
16
16
  end
17
17
  r = new(path.to_path)
18
18
  r.content
@@ -10,11 +10,15 @@ module Aranha
10
10
  items_data
11
11
  end
12
12
 
13
+ def item_data(item)
14
+ item
15
+ end
16
+
13
17
  def items_data
14
18
  count = 0
15
19
  @data ||= nokogiri.xpath(items_xpath).map do |m|
16
20
  count += 1
17
- node_parser.parse(m)
21
+ item_data(node_parser.parse(m))
18
22
  end
19
23
  rescue StandardError => e
20
24
  raise StandardError, "#{e.message} (Count: #{count})"
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/source_address/fetch_content_error'
4
+ require 'aranha/parsers/source_address/hash_http_base'
5
+ require 'eac_ruby_utils/core_ext'
6
+ require 'faraday_middleware'
7
+ require 'yaml'
8
+
9
+ module Aranha
10
+ module Parsers
11
+ class SourceAddress
12
+ class HashHttpBase
13
+ class << self
14
+ def http_method
15
+ const_get 'HTTP_METHOD'
16
+ end
17
+
18
+ def valid_source?(source)
19
+ source.is_a?(::Hash) &&
20
+ source.with_indifferent_access[:method].to_s.downcase.strip == http_method.to_s
21
+ end
22
+ end
23
+
24
+ DEFAULT_BODY = ''
25
+ DEFAULT_FOLLOW_REDIRECT = true
26
+ DEFAULT_HEADERS = {}.freeze
27
+ DEFAULT_PARAMS = {}.freeze
28
+
29
+ enable_simple_cache
30
+
31
+ common_constructor :source do
32
+ self.source = source.with_indifferent_access
33
+ end
34
+ compare_by :source
35
+
36
+ def body
37
+ param(:body, DEFAULT_BODY)
38
+ end
39
+
40
+ def follow_redirect?
41
+ param(:follow_redirect, DEFAULT_FOLLOW_REDIRECT)
42
+ end
43
+
44
+ def headers
45
+ param(:headers, DEFAULT_HEADERS)
46
+ end
47
+
48
+ def url
49
+ source.fetch(:url)
50
+ end
51
+
52
+ def serialize
53
+ source.to_yaml
54
+ end
55
+
56
+ # @return [Faraday]
57
+ def faraday_connection
58
+ ::Faraday.new do |f|
59
+ f.response :follow_redirects if follow_redirect?
60
+ end
61
+ end
62
+
63
+ def content
64
+ req = faraday_request
65
+ return req.body if req.status == 200
66
+
67
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError,
68
+ "Get #{url} returned #{req.status.to_i}"
69
+ end
70
+
71
+ def param(key, default_value)
72
+ source[key] || params[key] || default_value
73
+ end
74
+
75
+ def params
76
+ source[:params].if_present(DEFAULT_PARAMS)
77
+ end
78
+
79
+ private
80
+
81
+ def faraday_request_uncached
82
+ faraday_connection.send(self.class.http_method, url) do |req|
83
+ headers.if_present { |v| req.headers = v }
84
+ body.if_present { |v| req.body = v }
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -1,24 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'aranha/parsers/source_address/hash_http_post'
3
+ require 'aranha/parsers/source_address/hash_http_base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
7
7
  class SourceAddress
8
- class HashHttpGet < ::Aranha::Parsers::SourceAddress::HashHttpPost
9
- class << self
10
- def valid_source?(source)
11
- source.is_a?(::Hash) &&
12
- source.with_indifferent_access[:method].to_s.downcase.strip == 'get'
13
- end
14
- end
15
-
16
- def content
17
- HTTPClient.new.get_content(
18
- source[:url],
19
- source[:params]
20
- )
21
- end
8
+ class HashHttpGet < ::Aranha::Parsers::SourceAddress::HashHttpBase
9
+ HTTP_METHOD = :get
22
10
  end
23
11
  end
24
12
  end
@@ -1,44 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/core_ext/hash/indifferent_access'
4
- require 'httpclient'
5
- require 'yaml'
3
+ require 'aranha/parsers/source_address/hash_http_base'
6
4
 
7
5
  module Aranha
8
6
  module Parsers
9
7
  class SourceAddress
10
- class HashHttpPost
11
- class << self
12
- def valid_source?(source)
13
- source.is_a?(::Hash) &&
14
- source.with_indifferent_access[:method].to_s.downcase.strip == 'post'
15
- end
16
- end
17
-
18
- attr_reader :source
19
-
20
- def initialize(source)
21
- @source = source.with_indifferent_access
22
- end
23
-
24
- def ==(other)
25
- self.class == other.class && source == other.source
26
- end
27
-
28
- def url
29
- source.fetch(:url)
30
- end
31
-
32
- def serialize
33
- source.to_yaml
34
- end
35
-
36
- def content
37
- HTTPClient.new.post_content(
38
- source[:url],
39
- source[:params].merge(follow_redirect: true)
40
- )
41
- end
8
+ class HashHttpPost < ::Aranha::Parsers::SourceAddress::HashHttpBase
9
+ HTTP_METHOD = :post
42
10
  end
43
11
  end
44
12
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.11.0'
5
+ VERSION = '0.13.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-04-06 00:00:00.000000000 Z
11
+ date: 2022-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -58,14 +58,20 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.74'
61
+ version: '0.92'
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 0.92.1
62
65
  type: :runtime
63
66
  prerelease: false
64
67
  version_requirements: !ruby/object:Gem::Requirement
65
68
  requirements:
66
69
  - - "~>"
67
70
  - !ruby/object:Gem::Version
68
- version: '0.74'
71
+ version: '0.92'
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.92.1
69
75
  - !ruby/object:Gem::Dependency
70
76
  name: faraday_middleware
71
77
  requirement: !ruby/object:Gem::Requirement
@@ -80,26 +86,6 @@ dependencies:
80
86
  - - ">="
81
87
  - !ruby/object:Gem::Version
82
88
  version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: httpclient
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: '2.8'
90
- - - ">="
91
- - !ruby/object:Gem::Version
92
- version: 2.8.3
93
- type: :runtime
94
- prerelease: false
95
- version_requirements: !ruby/object:Gem::Requirement
96
- requirements:
97
- - - "~>"
98
- - !ruby/object:Gem::Version
99
- version: '2.8'
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- version: 2.8.3
103
89
  - !ruby/object:Gem::Dependency
104
90
  name: nokogiri
105
91
  requirement: !ruby/object:Gem::Requirement
@@ -183,6 +169,7 @@ files:
183
169
  - lib/aranha/parsers/source_address.rb
184
170
  - lib/aranha/parsers/source_address/fetch_content_error.rb
185
171
  - lib/aranha/parsers/source_address/file.rb
172
+ - lib/aranha/parsers/source_address/hash_http_base.rb
186
173
  - lib/aranha/parsers/source_address/hash_http_get.rb
187
174
  - lib/aranha/parsers/source_address/hash_http_post.rb
188
175
  - lib/aranha/parsers/source_address/http_get.rb