aranha-parsers 0.17.1 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62f1b799c069d10df8548cab6f80af270b4efe96fd515ed9bb51b52dd41a1fb0
4
- data.tar.gz: 4277d1ede3d4d766de0578e043e35a0c34483f7e286d2ffaa130e18865793682
3
+ metadata.gz: fbd2b941128c7e9295e385c75bf10d9a4074dc41fb70c6814eed11c78f469f6e
4
+ data.tar.gz: 5ead55e4c16b3afa84edcd72050994fc9b503036a4ffcb500dc69193513abdd7
5
5
  SHA512:
6
- metadata.gz: b6bf24b5aac72c3ff02b7b619bdfca517e86a54e96c5a9b4a8edf718837a8cbd4ec5ce1fe941df85a50701e9277753f81ea29e9313d6f8bd9f69126bbede5643
7
- data.tar.gz: 01feebca673e547d9983eaf6f16f5a13224311db0ca8c77672290d88b19664c6fe7146ce8094fd7fa4a45721980e48c8c3250867921ff07616b543743db00676
6
+ metadata.gz: 5372a04c8b487c9820815974b5c3ccc679ded34cee791a0038af7b28fe4089a3a76f216a819f101fa573955bf96f6f1da21f5917a4402aeef994011861ce703f
7
+ data.tar.gz: 9f745e276a8514c9c2acf071555367650d72a1749e51970cb5c76c29c96550a14a9d4780a859935eb2996315168c37824ba9e6bff5a7768fbbafb582f743b1e8
@@ -9,20 +9,25 @@ module Aranha
9
9
  module Parsers
10
10
  class Base
11
11
  class << self
12
+ # @deprecated Use {#from_string} instead.
13
+ # @param content [String]
14
+ # @return [Aranha::Parsers::Base]
12
15
  def from_content(content)
16
+ from_string(content)
17
+ end
18
+
19
+ # @param string [String]
20
+ # @return [Aranha::Parsers::Base]
21
+ def from_string(string)
13
22
  ::EacRubyUtils::Fs::Temp.on_file do |path|
14
23
  ::File.open(path.to_s, 'w:UTF-8') do |f|
15
- f.write content.dup.force_encoding('UTF-8')
24
+ f.write string.dup.force_encoding('UTF-8')
16
25
  end
17
26
  r = new(path.to_path)
18
27
  r.content
19
28
  r
20
29
  end
21
30
  end
22
-
23
- def parse_content(content)
24
- from_content(content).data
25
- end
26
31
  end
27
32
 
28
33
  LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
@@ -19,6 +19,20 @@ module Aranha
19
19
  @fields << Field.new(name, type, xpath)
20
20
  end
21
21
 
22
+ # @param node [Nokogiri::XML::Node]
23
+ # @return [Aranha::Parsers::Html::Base]
24
+ def from_node(node)
25
+ from_string(node.to_html)
26
+ end
27
+
28
+ # @param haystack [String]
29
+ # @param needle [String]
30
+ # @return [String]
31
+ def xpath_ends_with(haystack, needle)
32
+ "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
33
+ "= #{needle}"
34
+ end
35
+
22
36
  Field = Struct.new(:name, :type, :xpath)
23
37
  end
24
38
 
@@ -27,16 +27,16 @@ module Aranha
27
27
  raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
28
28
  end
29
29
 
30
+ # @param node [Nokogiri::XML::Node]
31
+ # @param xpath [String]
32
+ # @return [String]
30
33
  def string_value(node, xpath)
31
- if node.at_xpath(xpath)
32
- sanitize_string(node.at_xpath(xpath).text)
33
- else
34
- ''
35
- end
34
+ found = node_value(node, xpath)
35
+ found ? sanitize_string(found.text) : ''
36
36
  end
37
37
 
38
38
  def string_recursive_value(node, xpath, required = true)
39
- root = node.at_xpath(xpath)
39
+ root = node_value(node, xpath)
40
40
  if root.blank?
41
41
  return nil unless required
42
42
 
@@ -10,14 +10,9 @@ module Aranha
10
10
  class Default < ::Aranha::Parsers::Html::Node::Base
11
11
  require_sub __FILE__, include_modules: true
12
12
 
13
- def array_value(node, xpath)
14
- r = node.xpath(xpath).map { |n| n.text.strip }
15
- r.join('|')
16
- end
17
-
18
13
  def join_value(node, xpath)
19
14
  m = ''
20
- node.xpath(xpath).each do |n|
15
+ node_set_value(node, xpath).each do |n|
21
16
  m << n.text.strip
22
17
  end
23
18
  m
@@ -27,6 +22,20 @@ module Aranha
27
22
  m = /(\d+) m/.match(join_value(node, xpath))
28
23
  m ? m[1].to_i : nil
29
24
  end
25
+
26
+ # @param node [Nokogiri::XML::Node]
27
+ # @param xpath [String]
28
+ # @return [Nokogiri::XML::NodeSet]
29
+ def node_set_value(node, xpath)
30
+ node.xpath(xpath)
31
+ end
32
+
33
+ # @param node [Nokogiri::XML::Node]
34
+ # @param xpath [String]
35
+ # @return [Nokogiri::XML::Node]
36
+ def node_value(node, xpath)
37
+ node.at_xpath(xpath)
38
+ end
30
39
  end
31
40
  end
32
41
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.17.1'
5
+ VERSION = '0.19.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-08 00:00:00.000000000 Z
11
+ date: 2023-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: '2.8'
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: 2.8.3
36
+ version: 2.8.4
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '2.8'
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.8.3
46
+ version: 2.8.4
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: eac_envs-http
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -51,6 +51,9 @@ dependencies:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
53
  version: '0.3'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 0.3.2
54
57
  type: :runtime
55
58
  prerelease: false
56
59
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,40 +61,43 @@ dependencies:
58
61
  - - "~>"
59
62
  - !ruby/object:Gem::Version
60
63
  version: '0.3'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 0.3.2
61
67
  - !ruby/object:Gem::Dependency
62
68
  name: eac_ruby_utils
63
69
  requirement: !ruby/object:Gem::Requirement
64
70
  requirements:
65
71
  - - "~>"
66
72
  - !ruby/object:Gem::Version
67
- version: '0.112'
73
+ version: '0.117'
68
74
  type: :runtime
69
75
  prerelease: false
70
76
  version_requirements: !ruby/object:Gem::Requirement
71
77
  requirements:
72
78
  - - "~>"
73
79
  - !ruby/object:Gem::Version
74
- version: '0.112'
80
+ version: '0.117'
75
81
  - !ruby/object:Gem::Dependency
76
82
  name: nokogiri
77
83
  requirement: !ruby/object:Gem::Requirement
78
84
  requirements:
79
85
  - - "~>"
80
86
  - !ruby/object:Gem::Version
81
- version: '1.14'
87
+ version: '1.15'
82
88
  - - ">="
83
89
  - !ruby/object:Gem::Version
84
- version: 1.14.2
90
+ version: 1.15.1
85
91
  type: :runtime
86
92
  prerelease: false
87
93
  version_requirements: !ruby/object:Gem::Requirement
88
94
  requirements:
89
95
  - - "~>"
90
96
  - !ruby/object:Gem::Version
91
- version: '1.14'
97
+ version: '1.15'
92
98
  - - ">="
93
99
  - !ruby/object:Gem::Version
94
- version: 1.14.2
100
+ version: 1.15.1
95
101
  - !ruby/object:Gem::Dependency
96
102
  name: ofx-parser
97
103
  requirement: !ruby/object:Gem::Requirement