aranha-parsers 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c023094e51e5188e1979f5ab406b595a93b58006c656ccf7bb55609d1b828472
4
- data.tar.gz: 1dab0c5d8cb5b000949c3484db3f30494cd893e1f82f194627ae622ece43a457
3
+ metadata.gz: cb5cd7c64c21a8805a583f01c75efd58268003a8c8cc695ef809f938e79dc3ed
4
+ data.tar.gz: '078397e90586fe403b39dc8821e30f36efcf86fa55d31058bd94d18a42cb8eb8'
5
5
  SHA512:
6
- metadata.gz: 9a3da7fef40ee10d63806ae268939285b3eb7778ff2d4fe5570ce305ddb7b2ee9d07abc5cd9f31c7e79b484fa807bb57aa2dd04bd5a46f76251cf5f677576ba9
7
- data.tar.gz: f96f07f064028911ae1723f97e1cb8543fe44f2e2813e35b70048894788a3a47130e464cef662fa60d75ba07d030eade04fba54b48a04fb6df6c20553622ef32
6
+ metadata.gz: d017b16cb135ad2968fb35e83c086d3e5bdfbc59a8be5281303f26d8fbdbdff6697d9d1f029d949c69bb6341ae7eb25509d924e82a99c26e0391df7322a6a095
7
+ data.tar.gz: ada16700b30a9456f1140debf88f17020699d27ecc1dc6bcafbac43a1569d3bfe19415109b6f74688214bb94033dcb14ec2fc251fb3ae8060f5a45e62f8902c0
@@ -9,20 +9,25 @@ module Aranha
9
9
  module Parsers
10
10
  class Base
11
11
  class << self
12
+ # @deprecated Use {#from_string} instead.
13
+ # @param content [String]
14
+ # @return [Aranha::Parsers::Base]
12
15
  def from_content(content)
16
+ from_string(content)
17
+ end
18
+
19
+ # @param string [String]
20
+ # @return [Aranha::Parsers::Base]
21
+ def from_string(string)
13
22
  ::EacRubyUtils::Fs::Temp.on_file do |path|
14
23
  ::File.open(path.to_s, 'w:UTF-8') do |f|
15
- f.write content.dup.force_encoding('UTF-8')
24
+ f.write string.dup.force_encoding('UTF-8')
16
25
  end
17
26
  r = new(path.to_path)
18
27
  r.content
19
28
  r
20
29
  end
21
30
  end
22
-
23
- def parse_content(content)
24
- from_content(content).data
25
- end
26
31
  end
27
32
 
28
33
  LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
@@ -19,6 +19,20 @@ module Aranha
19
19
  @fields << Field.new(name, type, xpath)
20
20
  end
21
21
 
22
+ # @param node [Nokogiri::XML::Node]
23
+ # @return [Aranha::Parsers::Html::Base]
24
+ def from_node(node)
25
+ from_string(node.to_html)
26
+ end
27
+
28
+ # @param haystack [String]
29
+ # @param needle [String]
30
+ # @return [String]
31
+ def xpath_ends_with(haystack, needle)
32
+ "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
33
+ "= #{needle}"
34
+ end
35
+
22
36
  Field = Struct.new(:name, :type, :xpath)
23
37
  end
24
38
 
@@ -27,16 +27,16 @@ module Aranha
27
27
  raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
28
28
  end
29
29
 
30
+ # @param node [Nokogiri::XML::Node]
31
+ # @param xpath [String]
32
+ # @return [String]
30
33
  def string_value(node, xpath)
31
- if node.at_xpath(xpath)
32
- sanitize_string(node.at_xpath(xpath).text)
33
- else
34
- ''
35
- end
34
+ found = node_value(node, xpath)
35
+ found ? sanitize_string(found.text) : ''
36
36
  end
37
37
 
38
38
  def string_recursive_value(node, xpath, required = true)
39
- root = node.at_xpath(xpath)
39
+ root = node_value(node, xpath)
40
40
  if root.blank?
41
41
  return nil unless required
42
42
 
@@ -11,13 +11,13 @@ module Aranha
11
11
  require_sub __FILE__, include_modules: true
12
12
 
13
13
  def array_value(node, xpath)
14
- r = node.xpath(xpath).map { |n| n.text.strip }
14
+ r = node_set_value(node, xpath).map { |n| n.text.strip }
15
15
  r.join('|')
16
16
  end
17
17
 
18
18
  def join_value(node, xpath)
19
19
  m = ''
20
- node.xpath(xpath).each do |n|
20
+ node_set_value(node, xpath).each do |n|
21
21
  m << n.text.strip
22
22
  end
23
23
  m
@@ -27,6 +27,20 @@ module Aranha
27
27
  m = /(\d+) m/.match(join_value(node, xpath))
28
28
  m ? m[1].to_i : nil
29
29
  end
30
+
31
+ # @param node [Nokogiri::XML::Node]
32
+ # @param xpath [String]
33
+ # @return [Nokogiri::XML::NodeSet]
34
+ def node_set_value(node, xpath)
35
+ node.xpath(xpath)
36
+ end
37
+
38
+ # @param node [Nokogiri::XML::Node]
39
+ # @param xpath [String]
40
+ # @return [Nokogiri::XML::Node]
41
+ def node_value(node, xpath)
42
+ node.at_xpath(xpath)
43
+ end
30
44
  end
31
45
  end
32
46
  end
@@ -2,10 +2,9 @@
2
2
 
3
3
  require 'aranha/parsers/source_address/fetch_content_error'
4
4
  require 'aranha/parsers/source_address/hash_http_base'
5
+ require 'eac_envs/http/error'
6
+ require 'eac_envs/http/request'
5
7
  require 'eac_ruby_utils/core_ext'
6
- require 'faraday'
7
- require 'faraday/follow_redirects'
8
- require 'faraday/gzip'
9
8
  require 'yaml'
10
9
 
11
10
  module Aranha
@@ -55,21 +54,11 @@ module Aranha
55
54
  source.to_yaml
56
55
  end
57
56
 
58
- # @return [Faraday]
59
- def faraday_connection
60
- ::Faraday.new do |f|
61
- f.request :gzip
62
- f.response :follow_redirects if follow_redirect?
63
- end
64
- end
65
-
66
57
  def content
67
- req = faraday_request
68
- return req.body if req.status == 200
69
-
70
- raise ::Aranha::Parsers::SourceAddress::FetchContentError.new(
71
- "Get #{url} returned #{req.status.to_i}", req
72
- )
58
+ request = http_request
59
+ request.response.body_str
60
+ rescue ::EacEnvs::Http::Error => e
61
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError, e.message, request
73
62
  end
74
63
 
75
64
  def param(key, default_value)
@@ -82,11 +71,13 @@ module Aranha
82
71
 
83
72
  private
84
73
 
85
- def faraday_request_uncached
86
- faraday_connection.send(self.class.http_method, url) do |req|
87
- headers.if_present { |v| req.headers = v }
88
- body.if_present { |v| req.body = v }
89
- end
74
+ # @return [EacEnvs::Http::Request]
75
+ def http_request
76
+ r = ::EacEnvs::Http::Request.new.verb(self.class.http_method).url(url)
77
+ r = headers.if_present(r) { |v| r.headers(v) }
78
+ r = body.if_present(r) { |v| r.body(v) }
79
+ r = r.follow_redirect(true) if follow_redirect?
80
+ r
90
81
  end
91
82
  end
92
83
  end
@@ -2,8 +2,8 @@
2
2
 
3
3
  require 'addressable'
4
4
  require 'aranha/parsers/source_address/fetch_content_error'
5
- require 'faraday'
6
- require 'faraday/retry'
5
+ require 'eac_envs/http/error'
6
+ require 'eac_envs/http/request'
7
7
 
8
8
  module Aranha
9
9
  module Parsers
@@ -39,16 +39,10 @@ module Aranha
39
39
  end
40
40
 
41
41
  def content
42
- conn = ::Faraday.new do |f|
43
- f.request :retry # retry transient failures
44
- f.response :follow_redirects # follow redirects
45
- end
46
- c = conn.get(url)
47
- return c.body if c.status == 200
48
-
49
- raise ::Aranha::Parsers::SourceAddress::FetchContentError.new(
50
- "Get #{url} returned #{c.status.to_i}", c
51
- )
42
+ request = ::EacEnvs::Http::Request.new.url(url).retry(true).follow_redirect(true)
43
+ request.response.body_str
44
+ rescue ::EacEnvs::Http::Error => e
45
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError, e.message, request
52
46
  end
53
47
 
54
48
  def serialize
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.17.0'
5
+ VERSION = '0.18.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-20 00:00:00.000000000 Z
11
+ date: 2023-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: '2.8'
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: 2.8.1
36
+ version: 2.8.4
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,48 +43,17 @@ dependencies:
43
43
  version: '2.8'
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.8.1
46
+ version: 2.8.4
47
47
  - !ruby/object:Gem::Dependency
48
- name: eac_ruby_utils
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0.112'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '0.112'
61
- - !ruby/object:Gem::Dependency
62
- name: faraday
48
+ name: eac_envs-http
63
49
  requirement: !ruby/object:Gem::Requirement
64
50
  requirements:
65
51
  - - "~>"
66
52
  - !ruby/object:Gem::Version
67
- version: '2.7'
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- version: 2.7.4
71
- type: :runtime
72
- prerelease: false
73
- version_requirements: !ruby/object:Gem::Requirement
74
- requirements:
75
- - - "~>"
76
- - !ruby/object:Gem::Version
77
- version: '2.7'
53
+ version: '0.3'
78
54
  - - ">="
79
55
  - !ruby/object:Gem::Version
80
- version: 2.7.4
81
- - !ruby/object:Gem::Dependency
82
- name: faraday-follow_redirects
83
- requirement: !ruby/object:Gem::Requirement
84
- requirements:
85
- - - "~>"
86
- - !ruby/object:Gem::Version
87
- version: '0.3'
56
+ version: 0.3.1
88
57
  type: :runtime
89
58
  prerelease: false
90
59
  version_requirements: !ruby/object:Gem::Requirement
@@ -92,34 +61,23 @@ dependencies:
92
61
  - - "~>"
93
62
  - !ruby/object:Gem::Version
94
63
  version: '0.3'
95
- - !ruby/object:Gem::Dependency
96
- name: faraday-gzip
97
- requirement: !ruby/object:Gem::Requirement
98
- requirements:
99
- - - "~>"
100
- - !ruby/object:Gem::Version
101
- version: '0.1'
102
- type: :runtime
103
- prerelease: false
104
- version_requirements: !ruby/object:Gem::Requirement
105
- requirements:
106
- - - "~>"
64
+ - - ">="
107
65
  - !ruby/object:Gem::Version
108
- version: '0.1'
66
+ version: 0.3.1
109
67
  - !ruby/object:Gem::Dependency
110
- name: faraday-retry
68
+ name: eac_ruby_utils
111
69
  requirement: !ruby/object:Gem::Requirement
112
70
  requirements:
113
71
  - - "~>"
114
72
  - !ruby/object:Gem::Version
115
- version: '2.1'
73
+ version: '0.116'
116
74
  type: :runtime
117
75
  prerelease: false
118
76
  version_requirements: !ruby/object:Gem::Requirement
119
77
  requirements:
120
78
  - - "~>"
121
79
  - !ruby/object:Gem::Version
122
- version: '2.1'
80
+ version: '0.116'
123
81
  - !ruby/object:Gem::Dependency
124
82
  name: nokogiri
125
83
  requirement: !ruby/object:Gem::Requirement
@@ -129,7 +87,7 @@ dependencies:
129
87
  version: '1.14'
130
88
  - - ">="
131
89
  - !ruby/object:Gem::Version
132
- version: 1.14.2
90
+ version: 1.14.4
133
91
  type: :runtime
134
92
  prerelease: false
135
93
  version_requirements: !ruby/object:Gem::Requirement
@@ -139,7 +97,7 @@ dependencies:
139
97
  version: '1.14'
140
98
  - - ">="
141
99
  - !ruby/object:Gem::Version
142
- version: 1.14.2
100
+ version: 1.14.4
143
101
  - !ruby/object:Gem::Dependency
144
102
  name: ofx-parser
145
103
  requirement: !ruby/object:Gem::Requirement