aranha-parsers 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c023094e51e5188e1979f5ab406b595a93b58006c656ccf7bb55609d1b828472
4
- data.tar.gz: 1dab0c5d8cb5b000949c3484db3f30494cd893e1f82f194627ae622ece43a457
3
+ metadata.gz: cb5cd7c64c21a8805a583f01c75efd58268003a8c8cc695ef809f938e79dc3ed
4
+ data.tar.gz: '078397e90586fe403b39dc8821e30f36efcf86fa55d31058bd94d18a42cb8eb8'
5
5
  SHA512:
6
- metadata.gz: 9a3da7fef40ee10d63806ae268939285b3eb7778ff2d4fe5570ce305ddb7b2ee9d07abc5cd9f31c7e79b484fa807bb57aa2dd04bd5a46f76251cf5f677576ba9
7
- data.tar.gz: f96f07f064028911ae1723f97e1cb8543fe44f2e2813e35b70048894788a3a47130e464cef662fa60d75ba07d030eade04fba54b48a04fb6df6c20553622ef32
6
+ metadata.gz: d017b16cb135ad2968fb35e83c086d3e5bdfbc59a8be5281303f26d8fbdbdff6697d9d1f029d949c69bb6341ae7eb25509d924e82a99c26e0391df7322a6a095
7
+ data.tar.gz: ada16700b30a9456f1140debf88f17020699d27ecc1dc6bcafbac43a1569d3bfe19415109b6f74688214bb94033dcb14ec2fc251fb3ae8060f5a45e62f8902c0
@@ -9,20 +9,25 @@ module Aranha
9
9
  module Parsers
10
10
  class Base
11
11
  class << self
12
+ # @deprecated Use {#from_string} instead.
13
+ # @param content [String]
14
+ # @return [Aranha::Parsers::Base]
12
15
  def from_content(content)
16
+ from_string(content)
17
+ end
18
+
19
+ # @param string [String]
20
+ # @return [Aranha::Parsers::Base]
21
+ def from_string(string)
13
22
  ::EacRubyUtils::Fs::Temp.on_file do |path|
14
23
  ::File.open(path.to_s, 'w:UTF-8') do |f|
15
- f.write content.dup.force_encoding('UTF-8')
24
+ f.write string.dup.force_encoding('UTF-8')
16
25
  end
17
26
  r = new(path.to_path)
18
27
  r.content
19
28
  r
20
29
  end
21
30
  end
22
-
23
- def parse_content(content)
24
- from_content(content).data
25
- end
26
31
  end
27
32
 
28
33
  LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
@@ -19,6 +19,20 @@ module Aranha
19
19
  @fields << Field.new(name, type, xpath)
20
20
  end
21
21
 
22
+ # @param node [Nokogiri::XML::Node]
23
+ # @return [Aranha::Parsers::Html::Base]
24
+ def from_node(node)
25
+ from_string(node.to_html)
26
+ end
27
+
28
+ # @param haystack [String]
29
+ # @param needle [String]
30
+ # @return [String]
31
+ def xpath_ends_with(haystack, needle)
32
+ "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
33
+ "= #{needle}"
34
+ end
35
+
22
36
  Field = Struct.new(:name, :type, :xpath)
23
37
  end
24
38
 
@@ -27,16 +27,16 @@ module Aranha
27
27
  raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
28
28
  end
29
29
 
30
+ # @param node [Nokogiri::XML::Node]
31
+ # @param xpath [String]
32
+ # @return [String]
30
33
  def string_value(node, xpath)
31
- if node.at_xpath(xpath)
32
- sanitize_string(node.at_xpath(xpath).text)
33
- else
34
- ''
35
- end
34
+ found = node_value(node, xpath)
35
+ found ? sanitize_string(found.text) : ''
36
36
  end
37
37
 
38
38
  def string_recursive_value(node, xpath, required = true)
39
- root = node.at_xpath(xpath)
39
+ root = node_value(node, xpath)
40
40
  if root.blank?
41
41
  return nil unless required
42
42
 
@@ -11,13 +11,13 @@ module Aranha
11
11
  require_sub __FILE__, include_modules: true
12
12
 
13
13
  def array_value(node, xpath)
14
- r = node.xpath(xpath).map { |n| n.text.strip }
14
+ r = node_set_value(node, xpath).map { |n| n.text.strip }
15
15
  r.join('|')
16
16
  end
17
17
 
18
18
  def join_value(node, xpath)
19
19
  m = ''
20
- node.xpath(xpath).each do |n|
20
+ node_set_value(node, xpath).each do |n|
21
21
  m << n.text.strip
22
22
  end
23
23
  m
@@ -27,6 +27,20 @@ module Aranha
27
27
  m = /(\d+) m/.match(join_value(node, xpath))
28
28
  m ? m[1].to_i : nil
29
29
  end
30
+
31
+ # @param node [Nokogiri::XML::Node]
32
+ # @param xpath [String]
33
+ # @return [Nokogiri::XML::NodeSet]
34
+ def node_set_value(node, xpath)
35
+ node.xpath(xpath)
36
+ end
37
+
38
+ # @param node [Nokogiri::XML::Node]
39
+ # @param xpath [String]
40
+ # @return [Nokogiri::XML::Node]
41
+ def node_value(node, xpath)
42
+ node.at_xpath(xpath)
43
+ end
30
44
  end
31
45
  end
32
46
  end
@@ -2,10 +2,9 @@
2
2
 
3
3
  require 'aranha/parsers/source_address/fetch_content_error'
4
4
  require 'aranha/parsers/source_address/hash_http_base'
5
+ require 'eac_envs/http/error'
6
+ require 'eac_envs/http/request'
5
7
  require 'eac_ruby_utils/core_ext'
6
- require 'faraday'
7
- require 'faraday/follow_redirects'
8
- require 'faraday/gzip'
9
8
  require 'yaml'
10
9
 
11
10
  module Aranha
@@ -55,21 +54,11 @@ module Aranha
55
54
  source.to_yaml
56
55
  end
57
56
 
58
- # @return [Faraday]
59
- def faraday_connection
60
- ::Faraday.new do |f|
61
- f.request :gzip
62
- f.response :follow_redirects if follow_redirect?
63
- end
64
- end
65
-
66
57
  def content
67
- req = faraday_request
68
- return req.body if req.status == 200
69
-
70
- raise ::Aranha::Parsers::SourceAddress::FetchContentError.new(
71
- "Get #{url} returned #{req.status.to_i}", req
72
- )
58
+ request = http_request
59
+ request.response.body_str
60
+ rescue ::EacEnvs::Http::Error => e
61
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError, e.message, request
73
62
  end
74
63
 
75
64
  def param(key, default_value)
@@ -82,11 +71,13 @@ module Aranha
82
71
 
83
72
  private
84
73
 
85
- def faraday_request_uncached
86
- faraday_connection.send(self.class.http_method, url) do |req|
87
- headers.if_present { |v| req.headers = v }
88
- body.if_present { |v| req.body = v }
89
- end
74
+ # @return [EacEnvs::Http::Request]
75
+ def http_request
76
+ r = ::EacEnvs::Http::Request.new.verb(self.class.http_method).url(url)
77
+ r = headers.if_present(r) { |v| r.headers(v) }
78
+ r = body.if_present(r) { |v| r.body(v) }
79
+ r = r.follow_redirect(true) if follow_redirect?
80
+ r
90
81
  end
91
82
  end
92
83
  end
@@ -2,8 +2,8 @@
2
2
 
3
3
  require 'addressable'
4
4
  require 'aranha/parsers/source_address/fetch_content_error'
5
- require 'faraday'
6
- require 'faraday/retry'
5
+ require 'eac_envs/http/error'
6
+ require 'eac_envs/http/request'
7
7
 
8
8
  module Aranha
9
9
  module Parsers
@@ -39,16 +39,10 @@ module Aranha
39
39
  end
40
40
 
41
41
  def content
42
- conn = ::Faraday.new do |f|
43
- f.request :retry # retry transient failures
44
- f.response :follow_redirects # follow redirects
45
- end
46
- c = conn.get(url)
47
- return c.body if c.status == 200
48
-
49
- raise ::Aranha::Parsers::SourceAddress::FetchContentError.new(
50
- "Get #{url} returned #{c.status.to_i}", c
51
- )
42
+ request = ::EacEnvs::Http::Request.new.url(url).retry(true).follow_redirect(true)
43
+ request.response.body_str
44
+ rescue ::EacEnvs::Http::Error => e
45
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError, e.message, request
52
46
  end
53
47
 
54
48
  def serialize
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.17.0'
5
+ VERSION = '0.18.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-20 00:00:00.000000000 Z
11
+ date: 2023-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: '2.8'
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: 2.8.1
36
+ version: 2.8.4
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,48 +43,17 @@ dependencies:
43
43
  version: '2.8'
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.8.1
46
+ version: 2.8.4
47
47
  - !ruby/object:Gem::Dependency
48
- name: eac_ruby_utils
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0.112'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '0.112'
61
- - !ruby/object:Gem::Dependency
62
- name: faraday
48
+ name: eac_envs-http
63
49
  requirement: !ruby/object:Gem::Requirement
64
50
  requirements:
65
51
  - - "~>"
66
52
  - !ruby/object:Gem::Version
67
- version: '2.7'
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- version: 2.7.4
71
- type: :runtime
72
- prerelease: false
73
- version_requirements: !ruby/object:Gem::Requirement
74
- requirements:
75
- - - "~>"
76
- - !ruby/object:Gem::Version
77
- version: '2.7'
53
+ version: '0.3'
78
54
  - - ">="
79
55
  - !ruby/object:Gem::Version
80
- version: 2.7.4
81
- - !ruby/object:Gem::Dependency
82
- name: faraday-follow_redirects
83
- requirement: !ruby/object:Gem::Requirement
84
- requirements:
85
- - - "~>"
86
- - !ruby/object:Gem::Version
87
- version: '0.3'
56
+ version: 0.3.1
88
57
  type: :runtime
89
58
  prerelease: false
90
59
  version_requirements: !ruby/object:Gem::Requirement
@@ -92,34 +61,23 @@ dependencies:
92
61
  - - "~>"
93
62
  - !ruby/object:Gem::Version
94
63
  version: '0.3'
95
- - !ruby/object:Gem::Dependency
96
- name: faraday-gzip
97
- requirement: !ruby/object:Gem::Requirement
98
- requirements:
99
- - - "~>"
100
- - !ruby/object:Gem::Version
101
- version: '0.1'
102
- type: :runtime
103
- prerelease: false
104
- version_requirements: !ruby/object:Gem::Requirement
105
- requirements:
106
- - - "~>"
64
+ - - ">="
107
65
  - !ruby/object:Gem::Version
108
- version: '0.1'
66
+ version: 0.3.1
109
67
  - !ruby/object:Gem::Dependency
110
- name: faraday-retry
68
+ name: eac_ruby_utils
111
69
  requirement: !ruby/object:Gem::Requirement
112
70
  requirements:
113
71
  - - "~>"
114
72
  - !ruby/object:Gem::Version
115
- version: '2.1'
73
+ version: '0.116'
116
74
  type: :runtime
117
75
  prerelease: false
118
76
  version_requirements: !ruby/object:Gem::Requirement
119
77
  requirements:
120
78
  - - "~>"
121
79
  - !ruby/object:Gem::Version
122
- version: '2.1'
80
+ version: '0.116'
123
81
  - !ruby/object:Gem::Dependency
124
82
  name: nokogiri
125
83
  requirement: !ruby/object:Gem::Requirement
@@ -129,7 +87,7 @@ dependencies:
129
87
  version: '1.14'
130
88
  - - ">="
131
89
  - !ruby/object:Gem::Version
132
- version: 1.14.2
90
+ version: 1.14.4
133
91
  type: :runtime
134
92
  prerelease: false
135
93
  version_requirements: !ruby/object:Gem::Requirement
@@ -139,7 +97,7 @@ dependencies:
139
97
  version: '1.14'
140
98
  - - ">="
141
99
  - !ruby/object:Gem::Version
142
- version: 1.14.2
100
+ version: 1.14.4
143
101
  - !ruby/object:Gem::Dependency
144
102
  name: ofx-parser
145
103
  requirement: !ruby/object:Gem::Requirement