aranha-parsers 0.20.0 → 0.22.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa915be6016406435fb47dc3eecb8901579b8c6d5b32b72a4e90b546f9213ad0
4
- data.tar.gz: 30ad5a0bd9abaf92282de9441feb949f0d83f32293cb11040f197c89246f37d4
3
+ metadata.gz: d2ddf8d96b3eb46a3690802194d7bac810d77955dc2f79e6c0329c80ef326592
4
+ data.tar.gz: 7caeceb58462533b58c9976ed6ab942f995a8756d4db00ff50f61fd730f98735
5
5
  SHA512:
6
- metadata.gz: 4d6c8edf3a17ceec908482b6dd9ecce69a35ab6b178e5daf44c7f55a0b3cfe2ed326ad275f02d7856a36b881e7a5261a7575fe7f1972842854b1c2c2e20b1490
7
- data.tar.gz: aa3804298dbc0540306cccc4f9546392092de04d052ab7af9c4dac17b3bc126735ae6ec79c1f9fdc062b6003a35975f71c7acb773a3aac3f79a6df01f9e50d9b
6
+ metadata.gz: 394353da9e1e5859005fcb65104629535162e9a89d2acf70c515f71358e2fca7b1291b6210549f4f1f9d64577aaef6a947670f8ae1903af4f16af1ade346c4ca
7
+ data.tar.gz: 0bba5dff7e92e219a978f4708ccaa083a208671c0a810ad204da7397760e633645228aa2ddb718d33e508ca76ca8cb3da6f7c3f1b043a5d6d241c6d3cf2b8e37
@@ -54,7 +54,7 @@ module Aranha
54
54
  def log_content(content, suffix = '')
55
55
  path = log_file(suffix)
56
56
 
57
- File.open(path, 'wb') { |file| file.write(content) } if path
57
+ File.binwrite(path, content) if path
58
58
 
59
59
  content
60
60
  end
@@ -70,7 +70,7 @@ module Aranha
70
70
 
71
71
  def log_parsers_dir
72
72
  return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
73
- return ::Rails.root.join('log', 'parsers') if rails_root_exist?
73
+ return ::Rails.root.join('log/parsers') if rails_root_exist?
74
74
 
75
75
  nil
76
76
  end
@@ -25,6 +25,13 @@ module Aranha
25
25
  from_string(node.to_html)
26
26
  end
27
27
 
28
+ # @param node [String]
29
+ # @param klass [String]
30
+ # @return [String]
31
+ def xpath_contains_class(klass, node = '@class')
32
+ "contains(concat(' ', normalize-space(#{node}), ' '), ' #{klass} ')"
33
+ end
34
+
28
35
  # @param haystack [String]
29
36
  # @param needle [String]
30
37
  # @return [String]
@@ -12,11 +12,11 @@ module Aranha
12
12
  end
13
13
 
14
14
  def parse(node)
15
- fields.map do |f|
15
+ fields.to_h do |f|
16
16
  [f[0], parse_field(node, f[2], f[1])]
17
17
  rescue StandardError => e
18
18
  raise StandardError, "#{e.message}\nFields: #{f}"
19
- end.to_h
19
+ end
20
20
  end
21
21
 
22
22
  private
@@ -102,7 +102,7 @@ module Aranha
102
102
  m[0].delete(delimiter).to_f
103
103
  elsif required
104
104
  raise "decimal [Separator=\"#{separator}, Delimiter=\"#{delimiter}\"] value not " \
105
- "found in \"#{s}\""
105
+ "found in \"#{s}\""
106
106
  end
107
107
  end
108
108
 
@@ -13,7 +13,7 @@ module Aranha
13
13
  s = string_value(node, xpath)
14
14
  return '' unless s
15
15
 
16
- m = /\"([^\"]+)\"/.match(s)
16
+ m = /"([^\"]+)"/.match(s)
17
17
  return m[1] if m
18
18
 
19
19
  ''
@@ -35,7 +35,7 @@ module Aranha
35
35
  found ? sanitize_string(found.text) : ''
36
36
  end
37
37
 
38
- def string_recursive_value(node, xpath, required = true)
38
+ def string_recursive_value(node, xpath, required = true) # rubocop:disable Style/OptionalBooleanParameter
39
39
  root = node_value(node, xpath)
40
40
  if root.blank?
41
41
  return nil unless required
@@ -65,7 +65,7 @@ module Aranha
65
65
  s = ''
66
66
  node.children.each do |child|
67
67
  child_s = string_recursive(child)
68
- s += ' ' + child_s if child_s.present?
68
+ s += " #{child_s}" if child_s.present?
69
69
  end
70
70
  sanitize_string(s)
71
71
  end
@@ -34,5 +34,5 @@ end
34
34
 
35
35
  require 'eac_ruby_utils/patch'
36
36
 
37
- ::EacRubyUtils.patch(::OfxParser::OfxParser, ::Aranha::Parsers::Patches::OfxParser::OfxParser)
38
- ::EacRubyUtils.patch(::OfxParser::Transaction, ::Aranha::Parsers::Patches::OfxParser::Transaction)
37
+ EacRubyUtils.patch(OfxParser::OfxParser, Aranha::Parsers::Patches::OfxParser::OfxParser)
38
+ EacRubyUtils.patch(OfxParser::Transaction, Aranha::Parsers::Patches::OfxParser::Transaction)
@@ -10,7 +10,7 @@ module Aranha
10
10
 
11
11
  class << self
12
12
  def valid_source?(source)
13
- source.to_s.start_with?(SCHEME + '/', '/')
13
+ source.to_s.start_with?("#{SCHEME}/", '/')
14
14
  end
15
15
  end
16
16
 
@@ -23,7 +23,7 @@ module Aranha
23
23
  end
24
24
 
25
25
  def content
26
- ::File.open(source, &:read)
26
+ ::File.read(source)
27
27
  end
28
28
  end
29
29
  end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ class SourceAddress
8
+ class HashHttpBase
9
+ class Headers
10
+ acts_as_immutable
11
+ immutable_accessor :value, type: :hash
12
+
13
+ # @!method to_h()
14
+ # @return [Hash]
15
+ delegate :to_h, to: :values
16
+
17
+ # @param other_values [Array, Hash]
18
+ # @return [Aranha::Parsers::SourceAddress::HashHttpBase::Headers]
19
+ def merge(other_values)
20
+ if other_values.is_a?(::Hash)
21
+ values(values.merge(other_values))
22
+ elsif other_values.is_a?(::Enumerable)
23
+ merge(other_values.to_h { |v| merge_array_item_to_h(v) })
24
+ else
25
+ raise ::ArgumentError,
26
+ "\"other_values\"=\"#{other_values}\" should be a Array or a Hash"
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ # @param item [Array]
33
+ # @return [Array
34
+ def merge_array_item_to_h(item)
35
+ 2.times.inject(Array(item)) do |a, e|
36
+ e >= a.count ? a + [nil] : a
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -26,6 +26,8 @@ module Aranha
26
26
  DEFAULT_FOLLOW_REDIRECT = true
27
27
  DEFAULT_HEADERS = {}.freeze
28
28
  DEFAULT_PARAMS = {}.freeze
29
+ USER_AGENT_KEY = 'user-agent'
30
+ USER_AGENT_VALUE = 'aranha-parsers'
29
31
 
30
32
  enable_simple_cache
31
33
 
@@ -73,12 +75,26 @@ module Aranha
73
75
 
74
76
  # @return [EacEnvs::Http::Request]
75
77
  def http_request
76
- r = ::EacEnvs::Http::Request.new.verb(self.class.http_method).url(url)
77
- r = headers.if_present(r) { |v| r.headers(v) }
78
+ r = initial_http_request
79
+ r = headers.if_present(r) { |v| r.headers(initial_headers.merge(v).to_h) }
78
80
  r = body.if_present(r) { |v| r.body_data(v) }
79
81
  r = r.follow_redirect(true) if follow_redirect?
80
82
  r
81
83
  end
84
+
85
+ # @return [Hash]
86
+ def initial_headers
87
+ ::Aranha::Parsers::SourceAddress::HashHttpBase::Headers.new
88
+ .value(USER_AGENT_KEY, USER_AGENT_VALUE)
89
+ end
90
+
91
+ # @return [EacEnvs::Http::Request]
92
+ def initial_http_request
93
+ ::EacEnvs::Http::Request.new.verb(self.class.http_method).url(url)
94
+ .headers(initial_headers.to_h)
95
+ end
96
+
97
+ require_sub __FILE__
82
98
  end
83
99
  end
84
100
  end
@@ -40,6 +40,7 @@ module Aranha
40
40
 
41
41
  def content
42
42
  request = ::EacEnvs::Http::Request.new.url(url).retry(true).follow_redirect(true)
43
+ .header('user-agent', self.class.name)
43
44
  request.response.body_str
44
45
  rescue ::EacEnvs::Http::Error => e
45
46
  raise ::Aranha::Parsers::SourceAddress::FetchContentError, e.message, request
@@ -44,7 +44,7 @@ module Aranha
44
44
  end
45
45
 
46
46
  def serialize
47
- sub.serialize.strip + "\n"
47
+ "#{sub.serialize.strip}\n"
48
48
  end
49
49
 
50
50
  private
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.20.0'
5
+ VERSION = '0.22.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.22.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-02 00:00:00.000000000 Z
11
+ date: 2023-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: '2.8'
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: 2.8.4
36
+ version: 2.8.5
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,47 +43,47 @@ dependencies:
43
43
  version: '2.8'
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.8.4
46
+ version: 2.8.5
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: eac_envs-http
49
49
  requirement: !ruby/object:Gem::Requirement
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '0.3'
53
+ version: '0.4'
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: 0.3.2
56
+ version: 0.4.1
57
57
  type: :runtime
58
58
  prerelease: false
59
59
  version_requirements: !ruby/object:Gem::Requirement
60
60
  requirements:
61
61
  - - "~>"
62
62
  - !ruby/object:Gem::Version
63
- version: '0.3'
63
+ version: '0.4'
64
64
  - - ">="
65
65
  - !ruby/object:Gem::Version
66
- version: 0.3.2
66
+ version: 0.4.1
67
67
  - !ruby/object:Gem::Dependency
68
68
  name: eac_ruby_utils
69
69
  requirement: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - "~>"
72
72
  - !ruby/object:Gem::Version
73
- version: '0.117'
73
+ version: '0.119'
74
74
  - - ">="
75
75
  - !ruby/object:Gem::Version
76
- version: 0.117.1
76
+ version: 0.119.2
77
77
  type: :runtime
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
81
  - - "~>"
82
82
  - !ruby/object:Gem::Version
83
- version: '0.117'
83
+ version: '0.119'
84
84
  - - ">="
85
85
  - !ruby/object:Gem::Version
86
- version: 0.117.1
86
+ version: 0.119.2
87
87
  - !ruby/object:Gem::Dependency
88
88
  name: nokogiri
89
89
  requirement: !ruby/object:Gem::Requirement
@@ -93,7 +93,7 @@ dependencies:
93
93
  version: '1.15'
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: 1.15.2
96
+ version: 1.15.4
97
97
  type: :runtime
98
98
  prerelease: false
99
99
  version_requirements: !ruby/object:Gem::Requirement
@@ -103,7 +103,7 @@ dependencies:
103
103
  version: '1.15'
104
104
  - - ">="
105
105
  - !ruby/object:Gem::Version
106
- version: 1.15.2
106
+ version: 1.15.4
107
107
  - !ruby/object:Gem::Dependency
108
108
  name: ofx-parser
109
109
  requirement: !ruby/object:Gem::Requirement
@@ -124,14 +124,14 @@ dependencies:
124
124
  requirements:
125
125
  - - "~>"
126
126
  - !ruby/object:Gem::Version
127
- version: 0.5.1
127
+ version: '0.9'
128
128
  type: :development
129
129
  prerelease: false
130
130
  version_requirements: !ruby/object:Gem::Requirement
131
131
  requirements:
132
132
  - - "~>"
133
133
  - !ruby/object:Gem::Version
134
- version: 0.5.1
134
+ version: '0.9'
135
135
  description:
136
136
  email:
137
137
  executables: []
@@ -156,19 +156,14 @@ files:
156
156
  - lib/aranha/parsers/json/base.rb
157
157
  - lib/aranha/parsers/patches.rb
158
158
  - lib/aranha/parsers/patches/ofx_parser.rb
159
- - lib/aranha/parsers/rspec.rb
160
- - lib/aranha/parsers/rspec/setup.rb
161
- - lib/aranha/parsers/rspec/shared_examples/source_target_fixtures.rb
162
- - lib/aranha/parsers/rspec/source_target_fixtures_controller.rb
163
159
  - lib/aranha/parsers/source_address.rb
164
160
  - lib/aranha/parsers/source_address/fetch_content_error.rb
165
161
  - lib/aranha/parsers/source_address/file.rb
166
162
  - lib/aranha/parsers/source_address/hash_http_base.rb
163
+ - lib/aranha/parsers/source_address/hash_http_base/headers.rb
167
164
  - lib/aranha/parsers/source_address/hash_http_get.rb
168
165
  - lib/aranha/parsers/source_address/hash_http_post.rb
169
166
  - lib/aranha/parsers/source_address/http_get.rb
170
- - lib/aranha/parsers/source_target_fixtures.rb
171
- - lib/aranha/parsers/source_target_fixtures/source_target_file.rb
172
167
  - lib/aranha/parsers/version.rb
173
168
  homepage:
174
169
  licenses: []
@@ -181,7 +176,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
181
176
  requirements:
182
177
  - - ">="
183
178
  - !ruby/object:Gem::Version
184
- version: '0'
179
+ version: '2.7'
185
180
  required_rubygems_version: !ruby/object:Gem::Requirement
186
181
  requirements:
187
182
  - - ">="
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/core_ext'
4
-
5
- module Aranha
6
- module Parsers
7
- module Rspec
8
- module Setup
9
- def self.extended(_setup_obj)
10
- require 'aranha/parsers/rspec/shared_examples/source_target_fixtures'
11
- end
12
- end
13
- end
14
- end
15
- end
@@ -1,74 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'aranha/parsers/rspec/source_target_fixtures_controller'
4
- require 'aranha/parsers/source_target_fixtures'
5
- require 'yaml'
6
-
7
- RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable Metrics/BlockLength
8
- fixtures_controller = ::Aranha::Parsers::Rspec::SourceTargetFixturesController
9
- .new(self, spec_file)
10
-
11
- let(:fixtures_controller) { fixtures_controller }
12
- let(:spec_file) { spec_file }
13
-
14
- it 'fixtures directory should exist' do
15
- expect(::File.directory?(fixtures_controller.fixtures_dir)).to be true
16
- end
17
-
18
- context 'with fixtures directory' do
19
- it 'has at least one file' do
20
- expect(source_target_fixtures.source_target_files.count).to be > 0 # rubocop:disable Style/NumericPredicate
21
- end
22
-
23
- fixtures_controller.source_target_fixtures.source_target_files.each do |st|
24
- context "when source file is \"#{::File.basename(st.source)}\"" do
25
- if fixtures_controller.write_target_fixtures?
26
- it 'writes target data' do
27
- sd = sort_results(source_data(st.source))
28
- basename = ::Aranha::Parsers::SourceTargetFixtures.source_target_basename(st.source)
29
- target_file = File.expand_path("../#{basename}.target#{target_file_extname}", st.source)
30
- File.write(target_file, target_content(sd))
31
- end
32
- else
33
- it 'parses data' do
34
- assert_source_target_complete(st)
35
- sd = source_data(st.source)
36
- td = target_data(st.target)
37
- expect(sort_results(sd)).to eq(sort_results(td))
38
- end
39
- end
40
- end
41
- end
42
- end
43
-
44
- delegate :source_target_fixtures, to: :fixtures_controller
45
-
46
- def assert_source_target_complete(source_target)
47
- expect(source_target.source).to(be_truthy, "Source not found (Target: #{source_target.target})")
48
- expect(source_target.target).to(be_truthy, "Target not found (Source: #{source_target.source})")
49
- end
50
-
51
- def source_data(source_file)
52
- instance = described_class.new(source_file)
53
- return instance.data if instance.respond_to?(:data)
54
-
55
- raise "#{instance} has no \"data\" method. You need to implement \"#{instance}.data\" or " \
56
- "\"#{self}.source_data(source_file)\""
57
- end
58
-
59
- def sort_results(results)
60
- results
61
- end
62
-
63
- def target_data(target_file)
64
- ::YAML.load_file(target_file)
65
- end
66
-
67
- def target_content(data)
68
- data.to_yaml
69
- end
70
-
71
- def target_file_extname
72
- '.yaml'
73
- end
74
- end
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/core_ext'
4
-
5
- module Aranha
6
- module Parsers
7
- module Rspec
8
- class SourceTargetFixturesController
9
- enable_simple_cache
10
- common_constructor :example, :spec_file
11
-
12
- def default_fixtures_dir
13
- ::File.join(
14
- ::File.dirname(spec_file),
15
- ::File.basename(spec_file, '.*') + '_files'
16
- )
17
- end
18
-
19
- def fixtures_dir
20
- if example.respond_to?(:fixtures_dir)
21
- example.fixtures_dir
22
- else
23
- default_fixtures_dir
24
- end
25
- end
26
-
27
- def write_target_fixtures?
28
- ENV['WRITE_TARGET_FIXTURES'].to_bool
29
- end
30
-
31
- private
32
-
33
- def source_target_fixtures_uncached
34
- ::Aranha::Parsers::SourceTargetFixtures.new(fixtures_dir)
35
- end
36
- end
37
- end
38
- end
39
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'eac_ruby_utils/core_ext'
4
-
5
- module Aranha
6
- module Parsers
7
- module Rspec
8
- require_sub __FILE__
9
- end
10
- end
11
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Aranha
4
- module Parsers
5
- class SourceTargetFixtures
6
- class SourceTargetFile
7
- common_constructor :owner, :basename
8
-
9
- def source
10
- owner.source_file(basename)
11
- end
12
-
13
- def target
14
- owner.target_file(basename)
15
- end
16
- end
17
- end
18
- end
19
- end
@@ -1,78 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'active_support/core_ext/object'
4
- require 'active_support/core_ext/string'
5
-
6
- module Aranha
7
- module Parsers
8
- # Lists pairs of source/target files in a directory.
9
- class SourceTargetFixtures
10
- require_sub __FILE__
11
-
12
- class << self
13
- def source_target_basename(file)
14
- m = /^(.+)\.(?:source|target)(?:\..+)?$/.match(File.basename(file))
15
- m ? m[1] : nil
16
- end
17
- end
18
-
19
- attr_reader :fixtures_directory
20
-
21
- def initialize(fixtures_directory)
22
- @fixtures_directory = fixtures_directory
23
- end
24
-
25
- def source_target_files
26
- sources_targets_basenames.map do |basename|
27
- ::Aranha::Parsers::SourceTargetFixtures::SourceTargetFile.new(self, basename)
28
- end
29
- end
30
-
31
- def source_files
32
- r = []
33
- source_target_files.each do |st|
34
- r << st.source if st.source
35
- end
36
- r
37
- end
38
-
39
- def target_files
40
- r = []
41
- source_target_files.each do |st|
42
- r << st.target if st.target
43
- end
44
- r
45
- end
46
-
47
- def target_file(basename)
48
- fixture_file(basename, 'target')
49
- end
50
-
51
- def source_file(basename)
52
- fixture_file(basename, 'source')
53
- end
54
-
55
- private
56
-
57
- def fixture_file(basename, suffix)
58
- prefix = "#{basename}.#{suffix}"
59
- Dir.foreach(fixtures_directory) do |item|
60
- next if %w[. ..].include?(item)
61
- return File.expand_path(item, fixtures_directory) if item.starts_with?(prefix)
62
- end
63
- nil
64
- end
65
-
66
- def sources_targets_basenames
67
- basenames = Set.new
68
- Dir.foreach(fixtures_directory) do |item|
69
- next if %w[. ..].include?(item)
70
-
71
- b = self.class.source_target_basename(item)
72
- basenames << b if b.present?
73
- end
74
- basenames
75
- end
76
- end
77
- end
78
- end