xml_row_finder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +1 -0
- data/lib/xml_row_finder.rb +73 -0
- data.tar.gz.sig +0 -0
- metadata +90 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cfc152e03ec51b624550b4b4bf031eae3e7205c42e1dccbdd4d45219d684860d
|
4
|
+
data.tar.gz: 77be8ae4da89aded4ec73a653e9fe1e3d9815a144a29a37a73158b97174da984
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e83c208f4dfeac7827b9f8ca453baba4f19ff4be9d4341bbd15f46695948afd7af42ae5ad0c71efb994b58832c46b9cf836fcb2d115ffb126a49ebfc26065772
|
7
|
+
data.tar.gz: 24335ada2416257aa9caebd29b687562e92fe2d0ca2ac71f59824448c98eee018e15f95ee60352ed326440a50cb5445d3fb5bd4cbb1a0c47eed58ffc85ff12ec
|
checksums.yaml.gz.sig
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
;�Ac.���n�g�r;�%�6,z{ma��j��~�zJJ����Ù4�r�c{���E���i�lDp��:R=[W�mJ����=#M�b�F���n�@�E��O�F���_FK�-��4Z�*wT��x�RH;�L�G�d�&��Y�遲�Lo&��7�Eב-$:�1�؏��U���)y��"�v{��/nD*:%����^�kz�8�n�%m14Ȯ�I�c\�YGNu�軀�J���{����Xs���=�@V;���M���ʺ���p�p����������Kb<#�J6��}�k���X��~��Hc�b���Ҏ���=���L��N��Գ*k�Rw��������B������jd�F�`WXx����J
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: xml_row_finder.rb
|
4
|
+
|
5
|
+
require 'rexle'
|
6
|
+
require 'rexml'
|
7
|
+
include REXML
|
8
|
+
|
9
|
+
|
10
|
+
class XMLRowFinder
|
11
|
+
|
12
|
+
attr_reader :to_a
|
13
|
+
|
14
|
+
def initialize(s, debug: false)
|
15
|
+
|
16
|
+
@debug = debug
|
17
|
+
doc = Rexle.new(s)
|
18
|
+
|
19
|
+
a = []
|
20
|
+
|
21
|
+
doc.root.each_recursive do |e|
|
22
|
+
e.attributes.delete
|
23
|
+
a << e.backtrack.to_xpath
|
24
|
+
end
|
25
|
+
|
26
|
+
a2 = a.select{ |e| a.count(e) > 1 }.map {|x| x.split('/')}.uniq
|
27
|
+
|
28
|
+
# remove parent nodes on the same branch
|
29
|
+
#
|
30
|
+
a2.reject!.with_index do |x,i|
|
31
|
+
next if i == a2.length-1
|
32
|
+
x == a2[i+1][0..-2]
|
33
|
+
end
|
34
|
+
|
35
|
+
# remove elements from rows which only exist once in the document
|
36
|
+
#
|
37
|
+
a3 = a2.map do |row|
|
38
|
+
row.reject do |x|
|
39
|
+
found = doc.root.xpath('//' + x)
|
40
|
+
found.length < 2
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# add parent node to the row as a reference for the xpath
|
45
|
+
#
|
46
|
+
a4 = a3.map.with_index do |row,i|
|
47
|
+
a2[i][-(row.length+1)..-1]
|
48
|
+
end
|
49
|
+
|
50
|
+
# find the parent node attributes
|
51
|
+
#
|
52
|
+
@to_a = a4.map do |col|
|
53
|
+
|
54
|
+
# currently using REXML for this XPath since there is a bug in
|
55
|
+
# Rexle when attempting the following
|
56
|
+
#
|
57
|
+
doc2 = Document.new(s)
|
58
|
+
xpath = "//%s[%s]" % [col[0], col[1..-1].join('/')]
|
59
|
+
puts 'xpath: ' + xpath.inspect if @debug
|
60
|
+
r = XPath.first(doc2, xpath)
|
61
|
+
xpath_a = BacktrackXPath.new(r).to_xpath
|
62
|
+
|
63
|
+
if col.length >= 3
|
64
|
+
"%s/%s[%s]" % [xpath_a, col[1], col[2..-1].join('/')]
|
65
|
+
else
|
66
|
+
"%s/%s" % [xpath_a, col[1]]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
data.tar.gz.sig
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml_row_finder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTIxMjAxOTQyWhcN
|
15
|
+
MjMwMTIxMjAxOTQyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC+EN7D
|
17
|
+
60MdhiAKWBY7jv7itPYA77Jw8h9gWR1Bh3w684JRc4UHIl9d5vjC3h1mdslJq3iQ
|
18
|
+
RZzllFz8os9f5d9CFjbIsbRW/oTshuYcAgVJCPIXq3+1GjTTasFHztvYI/y5tQfe
|
19
|
+
CAMCaH//0u1c+K/XLHG1r5UrMIO0vsaDV3jtYEsHpTxFOg/b1922sIGsw2O35+hL
|
20
|
+
0Pjxlvl/EG9jIYlf+XRdNJAYNTu52YgFZ+uzAj2T8xxxHb7TuEtt3l2y6FKGiSpz
|
21
|
+
8qJzzfPfYcsbpbpXpbv2XYk+gSGgBJTOgp5KWu2/IdZq33EgVBvQYA6xov9hy5Ls
|
22
|
+
mvr0xm5zeeC5CjRPwBK1ZxCQ5Nntf4fF57qGeg9YhlaH2sRtQXHrAevQ6Cxax0oJ
|
23
|
+
3lOgU0RBH7zz17ItVFxb7Bd9teOsrZQfwAemV6WQPsfnYPpI3uKR5OLZlUdVlz8V
|
24
|
+
xnV4maNpknMfarx8f6s9Hj8AYB66K6ro6z1GzsQKj9QYmH+8VqlaK/rjSD8CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNuKD1Egp
|
26
|
+
48/WyUVF8QxmbWYzP+gwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAdTGz4gEgE4eU7tGyhFBeLvQv7RM4arJXFsxxISap
|
29
|
+
HNr2CyeckvJAkmWWNRQS/8G1QRxMMEIJCJw1mlKAUMYR7gAs8AiqdTwC/Q0WNr3/
|
30
|
+
AyUJaDXJ37SPT429sUNoJ9n/0/ChqwH6A4xU/S+owwNtvzBzy4S34vITDy1F5yeF
|
31
|
+
Uuy0aMjAqZGCQNOzVe0lbC0QCxm1OTRdZ4hGFdn5M0lvhPZXjWpLU2Ha5rnFyDyI
|
32
|
+
pluXqAxZwcOmUS3whRdEy20CRSIxKnznEpikm9Xc92RU5k/xhEsxzQgFA3jkGjs6
|
33
|
+
52cQoCLdtaNELCj3WqC+q9TCvPY8j3JRJqCz37+Lp/asof/5/OlX1k8iVUMWRACw
|
34
|
+
qPdaJGkE6iXpro2Nfvj/069UeXNUGSlwROMB/YoDkbamR/+UFIkBXdTQmRnoos6z
|
35
|
+
a/pAsvo0jT6QTnSB7xzsx8LSFDT5tfHKR9Dcn1Y3R06fsh02JvwxaSAMgDBM2aFb
|
36
|
+
2A7/BQ1hD7SU82VTxB1gFIHl
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2022-01-21 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rexle
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.5'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.5.14
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '1.5'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.5.14
|
60
|
+
description:
|
61
|
+
email: digital.robertson@gmail.com
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/xml_row_finder.rb
|
67
|
+
homepage: https://github.com/jrobertson/xml_row_finder
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.2.22
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Attempts to find repeating rows in XHTML and returns the associated xpath.
|
90
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|