aranha-parsers 0.8.3 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee60e5eb7ad164eaac294af5fe274611cc72adb14645c59f12db7fe483dd551f
4
- data.tar.gz: 280ba737d37a5cd22d741b92961d3ee15c72b15a43f76604f4c9f2bc92e4731e
3
+ metadata.gz: f6ec27959167dfbae56e1fb00bfdbedad5bfe022a0708e493c623ca259df5c5d
4
+ data.tar.gz: c87db56022bfdf05f3f5cc99f58fbba4f0e73a5e8ddb7683d2569aabbf7bf28a
5
5
  SHA512:
6
- metadata.gz: 743b98993e76af5d18f0c58b4a197b0bf62a18771be883a6b29ce8551c3d236ebaa81acda3bd4200d4c3e8f6ab9f9db434d047bc80ae2ecd040fbf7cf706826f
7
- data.tar.gz: 1086dbe158f755c8716dbee97058c5cd6a72673b9769f92e9c11206bc072637131a0d698907ce292ac6fbe864e33d5702abc0aceb044d16b8f034d648199570e
6
+ metadata.gz: 9116a522005c0a01b650bc889ce1a4c70f7096b478febc2e1b24f6596a4d94c77e750d81e616bd70b6ca557a1c61e56d55fcfe2066ff4f62aea3a9a14237ed01
7
+ data.tar.gz: 5080f643d1da251f05afb094a1c7ac9c5cbbe140a15f0d05b77ec87591e700b148ba4766615ee1853284d24674e2720149f25e6858b485e0123ebd3add74c4db
@@ -11,7 +11,9 @@ module Aranha
11
11
  class << self
12
12
  def from_content(content)
13
13
  ::EacRubyUtils::Fs::Temp.on_file do |path|
14
- path.write(content)
14
+ ::File.open(path.to_s, 'w:UTF-8') do |f|
15
+ f.write content.force_encoding('UTF-8')
16
+ end
15
17
  r = new(path.to_path)
16
18
  r.content
17
19
  r
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/html/node/base'
4
+ require 'eac_ruby_utils/core_ext'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ module Html
9
+ module Node
10
+ class Default < ::Aranha::Parsers::Html::Node::Base
11
+ module NumericSupport
12
+ def integer_value(node, xpath)
13
+ r = string_value(node, xpath)
14
+ return nil if r.blank?
15
+
16
+ m = /\d+/.match(r)
17
+ raise "Integer not found in \"#{r}\"" unless m
18
+
19
+ m[0].to_i
20
+ end
21
+
22
+ def integer_optional_value(node, xpath)
23
+ r = string_value(node, xpath)
24
+ m = /\d+/.match(r)
25
+ m ? m[0].to_i : nil
26
+ end
27
+
28
+ def float_value(node, xpath)
29
+ parse_float(node, xpath, true)
30
+ end
31
+
32
+ def float_optional_value(node, xpath)
33
+ parse_float(node, xpath, false)
34
+ end
35
+
36
+ def us_decimal_value(node, xpath)
37
+ parse_us_decimal(node, xpath, true)
38
+ end
39
+
40
+ def us_decimal_optional_value(node, xpath)
41
+ parse_us_decimal(node, xpath, false)
42
+ end
43
+
44
+ private
45
+
46
+ def parse_float(node, xpath, required)
47
+ s = string_value(node, xpath)
48
+ m = /\d+(?:[\.\,](\d+))?/.match(s)
49
+ if m
50
+ m[0].delete('.').tr(',', '.').to_f
51
+ elsif required
52
+ raise "Float value not found in \"#{s}\""
53
+ end
54
+ end
55
+
56
+ def parse_us_decimal(node, xpath, required)
57
+ s = string_value(node, xpath)
58
+ m = /\d+(?:[\.\,](\d+))?/.match(s)
59
+ if m
60
+ m[0].delete(',').to_f
61
+ elsif required
62
+ raise "US decimal value not found in \"#{s}\""
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/html/node/base'
4
+ require 'eac_ruby_utils/core_ext'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ module Html
9
+ module Node
10
+ class Default < ::Aranha::Parsers::Html::Node::Base
11
+ module StringSupport
12
+ def quoted_value(node, xpath)
13
+ s = string_value(node, xpath)
14
+ return '' unless s
15
+
16
+ m = /\"([^\"]+)\"/.match(s)
17
+ return m[1] if m
18
+
19
+ ''
20
+ end
21
+
22
+ def regxep(node, xpath, pattern)
23
+ s = string_value(node, xpath)
24
+ m = pattern.match(s)
25
+ return m if m
26
+
27
+ raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
28
+ end
29
+
30
+ def string_value(node, xpath)
31
+ if node.at_xpath(xpath)
32
+ sanitize_string(node.at_xpath(xpath).text)
33
+ else
34
+ ''
35
+ end
36
+ end
37
+
38
+ def string_recursive_value(node, xpath, required = true)
39
+ root = node.at_xpath(xpath)
40
+ if root.blank?
41
+ return nil unless required
42
+
43
+ raise "No node found (Xpath: #{xpath})"
44
+ end
45
+ result = string_recursive(root)
46
+ return result if result.present?
47
+ return nil unless required
48
+
49
+ raise "String blank (Xpath: #{xpath})"
50
+ end
51
+
52
+ def string_recursive_optional_value(node, xpath)
53
+ string_recursive_value(node, xpath, false)
54
+ end
55
+
56
+ private
57
+
58
+ def sanitize_string(obj)
59
+ obj.to_s.tr("\u00A0", ' ').strip
60
+ end
61
+
62
+ def string_recursive(node)
63
+ return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
64
+
65
+ s = ''
66
+ node.children.each do |child|
67
+ child_s = string_recursive(child)
68
+ s += ' ' + child_s if child_s.present?
69
+ end
70
+ sanitize_string(s)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -1,71 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'aranha/parsers/html/node/base'
4
+ require 'eac_ruby_utils/core_ext'
4
5
 
5
6
  module Aranha
6
7
  module Parsers
7
8
  module Html
8
9
  module Node
9
10
  class Default < ::Aranha::Parsers::Html::Node::Base
10
- def string_value(node, xpath)
11
- if node.at_xpath(xpath)
12
- sanitize_string(node.at_xpath(xpath).text)
13
- else
14
- ''
15
- end
16
- end
17
-
18
- def string_recursive_value(node, xpath, required = true)
19
- root = node.at_xpath(xpath)
20
- if root.blank?
21
- return nil unless required
22
-
23
- raise "No node found (Xpath: #{xpath})"
24
- end
25
- result = string_recursive(root)
26
- return result if result.present?
27
- return nil unless required
28
-
29
- raise "String blank (Xpath: #{xpath})"
30
- end
31
-
32
- def string_recursive_optional_value(node, xpath)
33
- string_recursive_value(node, xpath, false)
34
- end
35
-
36
- def quoted_value(node, xpath)
37
- s = string_value(node, xpath)
38
- return '' unless s
39
-
40
- m = /\"([^\"]+)\"/.match(s)
41
- return m[1] if m
42
-
43
- ''
44
- end
45
-
46
- def integer_value(node, xpath)
47
- r = string_value(node, xpath)
48
- return nil if r.blank?
49
-
50
- m = /\d+/.match(r)
51
- raise "Integer not found in \"#{r}\"" unless m
52
-
53
- m[0].to_i
54
- end
55
-
56
- def integer_optional_value(node, xpath)
57
- r = string_value(node, xpath)
58
- m = /\d+/.match(r)
59
- m ? m[0].to_i : nil
60
- end
61
-
62
- def float_value(node, xpath)
63
- parse_float(node, xpath, true)
64
- end
65
-
66
- def float_optional_value(node, xpath)
67
- parse_float(node, xpath, false)
68
- end
11
+ require_sub __FILE__, include_modules: true
69
12
 
70
13
  def array_value(node, xpath)
71
14
  r = node.xpath(xpath).map { |n| n.text.strip }
@@ -84,41 +27,6 @@ module Aranha
84
27
  m = /(\d+) m/.match(join_value(node, xpath))
85
28
  m ? m[1].to_i : nil
86
29
  end
87
-
88
- def regxep(node, xpath, pattern)
89
- s = string_value(node, xpath)
90
- m = pattern.match(s)
91
- return m if m
92
-
93
- raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
94
- end
95
-
96
- private
97
-
98
- def parse_float(node, xpath, required)
99
- s = string_value(node, xpath)
100
- m = /\d+(?:[\.\,](\d+))?/.match(s)
101
- if m
102
- m[0].delete('.').tr(',', '.').to_f
103
- elsif required
104
- raise "Float value not found in \"#{s}\""
105
- end
106
- end
107
-
108
- def sanitize_string(obj)
109
- obj.to_s.tr("\u00A0", ' ').strip
110
- end
111
-
112
- def string_recursive(node)
113
- return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
114
-
115
- s = ''
116
- node.children.each do |child|
117
- child_s = string_recursive(child)
118
- s += ' ' + child_s if child_s.present?
119
- end
120
- sanitize_string(s)
121
- end
122
30
  end
123
31
  end
124
32
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/base'
4
+ require 'json'
5
+
6
+ module Aranha
7
+ module Parsers
8
+ module Json
9
+ class Base < ::Aranha::Parsers::Base
10
+ def data
11
+ default_data
12
+ end
13
+
14
+ def default_data
15
+ ::JSON.parse(content)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Json
8
+ require_sub __FILE__
9
+ end
10
+ end
11
+ end
@@ -7,7 +7,7 @@ module Aranha
7
7
  module Rspec
8
8
  module Setup
9
9
  def self.extended(_setup_obj)
10
- require 'aranha/parsers/rspec/source_target_fixtures_example'
10
+ require 'aranha/parsers/rspec/shared_examples/source_target_fixtures'
11
11
  end
12
12
  end
13
13
  end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ class SourceAddress
6
+ class FetchContentError < ::RuntimeError
7
+ end
8
+ end
9
+ end
10
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'addressable'
4
4
  require 'curb'
5
+ require 'aranha/parsers/source_address/fetch_content_error'
6
+ require 'faraday_middleware'
5
7
 
6
8
  module Aranha
7
9
  module Parsers
@@ -31,13 +33,21 @@ module Aranha
31
33
  source
32
34
  end
33
35
 
36
+ def final_url
37
+ content unless @final_url
38
+ @final_url
39
+ end
40
+
34
41
  def content
35
- c = ::Curl::Easy.new(url)
36
- c.follow_location = true
37
- raise "Curl perform failed (URL: #{url})" unless c.perform
38
- return c.body_str if c.status.to_i == 200
42
+ conn = ::Faraday.new do |f|
43
+ f.request :retry # retry transient failures
44
+ f.response :follow_redirects # follow redirects
45
+ end
46
+ c = conn.get(url)
47
+ return c.body if c.status == 200
39
48
 
40
- raise "Get #{url} returned #{c.status.to_i}"
49
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError,
50
+ "Get #{url} returned #{c.status.to_i}"
41
51
  end
42
52
 
43
53
  def serialize
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.8.3'
5
+ VERSION = '0.10.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-15 00:00:00.000000000 Z
11
+ date: 2021-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.74'
69
+ - !ruby/object:Gem::Dependency
70
+ name: faraday_middleware
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: httpclient
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -86,6 +100,26 @@ dependencies:
86
100
  - - ">="
87
101
  - !ruby/object:Gem::Version
88
102
  version: 2.8.3
103
+ - !ruby/object:Gem::Dependency
104
+ name: nokogiri
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.12'
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 1.12.4
113
+ type: :runtime
114
+ prerelease: false
115
+ version_requirements: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '1.12'
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 1.12.4
89
123
  - !ruby/object:Gem::Dependency
90
124
  name: ofx-parser
91
125
  requirement: !ruby/object:Gem::Requirement
@@ -136,13 +170,18 @@ files:
136
170
  - lib/aranha/parsers/html/node.rb
137
171
  - lib/aranha/parsers/html/node/base.rb
138
172
  - lib/aranha/parsers/html/node/default.rb
173
+ - lib/aranha/parsers/html/node/default/numeric_support.rb
174
+ - lib/aranha/parsers/html/node/default/string_support.rb
139
175
  - lib/aranha/parsers/invalid_state_exception.rb
176
+ - lib/aranha/parsers/json.rb
177
+ - lib/aranha/parsers/json/base.rb
140
178
  - lib/aranha/parsers/patches.rb
141
179
  - lib/aranha/parsers/patches/ofx_parser.rb
142
180
  - lib/aranha/parsers/rspec.rb
143
181
  - lib/aranha/parsers/rspec/setup.rb
144
- - lib/aranha/parsers/rspec/source_target_fixtures_example.rb
182
+ - lib/aranha/parsers/rspec/shared_examples/source_target_fixtures.rb
145
183
  - lib/aranha/parsers/source_address.rb
184
+ - lib/aranha/parsers/source_address/fetch_content_error.rb
146
185
  - lib/aranha/parsers/source_address/file.rb
147
186
  - lib/aranha/parsers/source_address/hash_http_get.rb
148
187
  - lib/aranha/parsers/source_address/hash_http_post.rb