aranha-parsers 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb5cd7c64c21a8805a583f01c75efd58268003a8c8cc695ef809f938e79dc3ed
|
4
|
+
data.tar.gz: '078397e90586fe403b39dc8821e30f36efcf86fa55d31058bd94d18a42cb8eb8'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d017b16cb135ad2968fb35e83c086d3e5bdfbc59a8be5281303f26d8fbdbdff6697d9d1f029d949c69bb6341ae7eb25509d924e82a99c26e0391df7322a6a095
|
7
|
+
data.tar.gz: ada16700b30a9456f1140debf88f17020699d27ecc1dc6bcafbac43a1569d3bfe19415109b6f74688214bb94033dcb14ec2fc251fb3ae8060f5a45e62f8902c0
|
data/lib/aranha/parsers/base.rb
CHANGED
@@ -9,20 +9,25 @@ module Aranha
|
|
9
9
|
module Parsers
|
10
10
|
class Base
|
11
11
|
class << self
|
12
|
+
# @deprecated Use {#from_string} instead.
|
13
|
+
# @param content [String]
|
14
|
+
# @return [Aranha::Parsers::Base]
|
12
15
|
def from_content(content)
|
16
|
+
from_string(content)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param string [String]
|
20
|
+
# @return [Aranha::Parsers::Base]
|
21
|
+
def from_string(string)
|
13
22
|
::EacRubyUtils::Fs::Temp.on_file do |path|
|
14
23
|
::File.open(path.to_s, 'w:UTF-8') do |f|
|
15
|
-
f.write
|
24
|
+
f.write string.dup.force_encoding('UTF-8')
|
16
25
|
end
|
17
26
|
r = new(path.to_path)
|
18
27
|
r.content
|
19
28
|
r
|
20
29
|
end
|
21
30
|
end
|
22
|
-
|
23
|
-
def parse_content(content)
|
24
|
-
from_content(content).data
|
25
|
-
end
|
26
31
|
end
|
27
32
|
|
28
33
|
LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
|
@@ -19,6 +19,20 @@ module Aranha
|
|
19
19
|
@fields << Field.new(name, type, xpath)
|
20
20
|
end
|
21
21
|
|
22
|
+
# @param node [Nokogiri::XML::Node]
|
23
|
+
# @return [Aranha::Parsers::Html::Base]
|
24
|
+
def from_node(node)
|
25
|
+
from_string(node.to_html)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param haystack [String]
|
29
|
+
# @param needle [String]
|
30
|
+
# @return [String]
|
31
|
+
def xpath_ends_with(haystack, needle)
|
32
|
+
"substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
|
33
|
+
"= #{needle}"
|
34
|
+
end
|
35
|
+
|
22
36
|
Field = Struct.new(:name, :type, :xpath)
|
23
37
|
end
|
24
38
|
|
@@ -27,16 +27,16 @@ module Aranha
|
|
27
27
|
raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
|
28
28
|
end
|
29
29
|
|
30
|
+
# @param node [Nokogiri::XML::Node]
|
31
|
+
# @param xpath [String]
|
32
|
+
# @return [String]
|
30
33
|
def string_value(node, xpath)
|
31
|
-
|
32
|
-
|
33
|
-
else
|
34
|
-
''
|
35
|
-
end
|
34
|
+
found = node_value(node, xpath)
|
35
|
+
found ? sanitize_string(found.text) : ''
|
36
36
|
end
|
37
37
|
|
38
38
|
def string_recursive_value(node, xpath, required = true)
|
39
|
-
root = node
|
39
|
+
root = node_value(node, xpath)
|
40
40
|
if root.blank?
|
41
41
|
return nil unless required
|
42
42
|
|
@@ -11,13 +11,13 @@ module Aranha
|
|
11
11
|
require_sub __FILE__, include_modules: true
|
12
12
|
|
13
13
|
def array_value(node, xpath)
|
14
|
-
r = node
|
14
|
+
r = node_set_value(node, xpath).map { |n| n.text.strip }
|
15
15
|
r.join('|')
|
16
16
|
end
|
17
17
|
|
18
18
|
def join_value(node, xpath)
|
19
19
|
m = ''
|
20
|
-
node
|
20
|
+
node_set_value(node, xpath).each do |n|
|
21
21
|
m << n.text.strip
|
22
22
|
end
|
23
23
|
m
|
@@ -27,6 +27,20 @@ module Aranha
|
|
27
27
|
m = /(\d+) m/.match(join_value(node, xpath))
|
28
28
|
m ? m[1].to_i : nil
|
29
29
|
end
|
30
|
+
|
31
|
+
# @param node [Nokogiri::XML::Node]
|
32
|
+
# @param xpath [String]
|
33
|
+
# @return [Nokogiri::XML::NodeSet]
|
34
|
+
def node_set_value(node, xpath)
|
35
|
+
node.xpath(xpath)
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param node [Nokogiri::XML::Node]
|
39
|
+
# @param xpath [String]
|
40
|
+
# @return [Nokogiri::XML::Node]
|
41
|
+
def node_value(node, xpath)
|
42
|
+
node.at_xpath(xpath)
|
43
|
+
end
|
30
44
|
end
|
31
45
|
end
|
32
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha-parsers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Esquilo Azul Company
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: '2.8'
|
34
34
|
- - ">="
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: 2.8.
|
36
|
+
version: 2.8.4
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '2.8'
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.8.
|
46
|
+
version: 2.8.4
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: eac_envs-http
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -51,6 +51,9 @@ dependencies:
|
|
51
51
|
- - "~>"
|
52
52
|
- !ruby/object:Gem::Version
|
53
53
|
version: '0.3'
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 0.3.1
|
54
57
|
type: :runtime
|
55
58
|
prerelease: false
|
56
59
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,20 +61,23 @@ dependencies:
|
|
58
61
|
- - "~>"
|
59
62
|
- !ruby/object:Gem::Version
|
60
63
|
version: '0.3'
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 0.3.1
|
61
67
|
- !ruby/object:Gem::Dependency
|
62
68
|
name: eac_ruby_utils
|
63
69
|
requirement: !ruby/object:Gem::Requirement
|
64
70
|
requirements:
|
65
71
|
- - "~>"
|
66
72
|
- !ruby/object:Gem::Version
|
67
|
-
version: '0.
|
73
|
+
version: '0.116'
|
68
74
|
type: :runtime
|
69
75
|
prerelease: false
|
70
76
|
version_requirements: !ruby/object:Gem::Requirement
|
71
77
|
requirements:
|
72
78
|
- - "~>"
|
73
79
|
- !ruby/object:Gem::Version
|
74
|
-
version: '0.
|
80
|
+
version: '0.116'
|
75
81
|
- !ruby/object:Gem::Dependency
|
76
82
|
name: nokogiri
|
77
83
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,7 +87,7 @@ dependencies:
|
|
81
87
|
version: '1.14'
|
82
88
|
- - ">="
|
83
89
|
- !ruby/object:Gem::Version
|
84
|
-
version: 1.14.
|
90
|
+
version: 1.14.4
|
85
91
|
type: :runtime
|
86
92
|
prerelease: false
|
87
93
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -91,7 +97,7 @@ dependencies:
|
|
91
97
|
version: '1.14'
|
92
98
|
- - ">="
|
93
99
|
- !ruby/object:Gem::Version
|
94
|
-
version: 1.14.
|
100
|
+
version: 1.14.4
|
95
101
|
- !ruby/object:Gem::Dependency
|
96
102
|
name: ofx-parser
|
97
103
|
requirement: !ruby/object:Gem::Requirement
|