xml_row_finder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +1 -0
- data/lib/xml_row_finder.rb +73 -0
- data.tar.gz.sig +0 -0
- metadata +90 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cfc152e03ec51b624550b4b4bf031eae3e7205c42e1dccbdd4d45219d684860d
|
4
|
+
data.tar.gz: 77be8ae4da89aded4ec73a653e9fe1e3d9815a144a29a37a73158b97174da984
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e83c208f4dfeac7827b9f8ca453baba4f19ff4be9d4341bbd15f46695948afd7af42ae5ad0c71efb994b58832c46b9cf836fcb2d115ffb126a49ebfc26065772
|
7
|
+
data.tar.gz: 24335ada2416257aa9caebd29b687562e92fe2d0ca2ac71f59824448c98eee018e15f95ee60352ed326440a50cb5445d3fb5bd4cbb1a0c47eed58ffc85ff12ec
|
checksums.yaml.gz.sig
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
;�Ac.���n�g�r;�%�6,z{ma��j��~�zJJ����Ù4�r�c{���E���i�lDp��:R=[W�mJ����=#M�b�F���n�@�E��O�F���_FK�-��4Z�*wT��x�RH;�L�G�d�&��Y�遲�Lo&��7�Eב-$:�1�؏��U���)y��"�v{��/nD*:%����^�kz�8�n�%m14Ȯ�I�c\�YGNu�軀�J���{����Xs���=�@V;���M���ʺ���p�p����������Kb<#�J6��}�k���X��~��Hc�b���Ҏ���=���L��N��Գ*k�Rw��������B������jd�F�`WXx����J
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: xml_row_finder.rb
|
4
|
+
|
5
|
+
require 'rexle'
|
6
|
+
require 'rexml'
|
7
|
+
include REXML
|
8
|
+
|
9
|
+
|
10
|
+
class XMLRowFinder
|
11
|
+
|
12
|
+
attr_reader :to_a
|
13
|
+
|
14
|
+
def initialize(s, debug: false)
|
15
|
+
|
16
|
+
@debug = debug
|
17
|
+
doc = Rexle.new(s)
|
18
|
+
|
19
|
+
a = []
|
20
|
+
|
21
|
+
doc.root.each_recursive do |e|
|
22
|
+
e.attributes.delete
|
23
|
+
a << e.backtrack.to_xpath
|
24
|
+
end
|
25
|
+
|
26
|
+
a2 = a.select{ |e| a.count(e) > 1 }.map {|x| x.split('/')}.uniq
|
27
|
+
|
28
|
+
# remove parent nodes on the same branch
|
29
|
+
#
|
30
|
+
a2.reject!.with_index do |x,i|
|
31
|
+
next if i == a2.length-1
|
32
|
+
x == a2[i+1][0..-2]
|
33
|
+
end
|
34
|
+
|
35
|
+
# remove elements from rows which only exist once in the document
|
36
|
+
#
|
37
|
+
a3 = a2.map do |row|
|
38
|
+
row.reject do |x|
|
39
|
+
found = doc.root.xpath('//' + x)
|
40
|
+
found.length < 2
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# add parent node to the row as a reference for the xpath
|
45
|
+
#
|
46
|
+
a4 = a3.map.with_index do |row,i|
|
47
|
+
a2[i][-(row.length+1)..-1]
|
48
|
+
end
|
49
|
+
|
50
|
+
# find the parent node attributes
|
51
|
+
#
|
52
|
+
@to_a = a4.map do |col|
|
53
|
+
|
54
|
+
# currently using REXML for this XPath since there is a bug in
|
55
|
+
# Rexle when attempting the following
|
56
|
+
#
|
57
|
+
doc2 = Document.new(s)
|
58
|
+
xpath = "//%s[%s]" % [col[0], col[1..-1].join('/')]
|
59
|
+
puts 'xpath: ' + xpath.inspect if @debug
|
60
|
+
r = XPath.first(doc2, xpath)
|
61
|
+
xpath_a = BacktrackXPath.new(r).to_xpath
|
62
|
+
|
63
|
+
if col.length >= 3
|
64
|
+
"%s/%s[%s]" % [xpath_a, col[1], col[2..-1].join('/')]
|
65
|
+
else
|
66
|
+
"%s/%s" % [xpath_a, col[1]]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
data.tar.gz.sig
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml_row_finder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTIxMjAxOTQyWhcN
|
15
|
+
MjMwMTIxMjAxOTQyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC+EN7D
|
17
|
+
60MdhiAKWBY7jv7itPYA77Jw8h9gWR1Bh3w684JRc4UHIl9d5vjC3h1mdslJq3iQ
|
18
|
+
RZzllFz8os9f5d9CFjbIsbRW/oTshuYcAgVJCPIXq3+1GjTTasFHztvYI/y5tQfe
|
19
|
+
CAMCaH//0u1c+K/XLHG1r5UrMIO0vsaDV3jtYEsHpTxFOg/b1922sIGsw2O35+hL
|
20
|
+
0Pjxlvl/EG9jIYlf+XRdNJAYNTu52YgFZ+uzAj2T8xxxHb7TuEtt3l2y6FKGiSpz
|
21
|
+
8qJzzfPfYcsbpbpXpbv2XYk+gSGgBJTOgp5KWu2/IdZq33EgVBvQYA6xov9hy5Ls
|
22
|
+
mvr0xm5zeeC5CjRPwBK1ZxCQ5Nntf4fF57qGeg9YhlaH2sRtQXHrAevQ6Cxax0oJ
|
23
|
+
3lOgU0RBH7zz17ItVFxb7Bd9teOsrZQfwAemV6WQPsfnYPpI3uKR5OLZlUdVlz8V
|
24
|
+
xnV4maNpknMfarx8f6s9Hj8AYB66K6ro6z1GzsQKj9QYmH+8VqlaK/rjSD8CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNuKD1Egp
|
26
|
+
48/WyUVF8QxmbWYzP+gwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAdTGz4gEgE4eU7tGyhFBeLvQv7RM4arJXFsxxISap
|
29
|
+
HNr2CyeckvJAkmWWNRQS/8G1QRxMMEIJCJw1mlKAUMYR7gAs8AiqdTwC/Q0WNr3/
|
30
|
+
AyUJaDXJ37SPT429sUNoJ9n/0/ChqwH6A4xU/S+owwNtvzBzy4S34vITDy1F5yeF
|
31
|
+
Uuy0aMjAqZGCQNOzVe0lbC0QCxm1OTRdZ4hGFdn5M0lvhPZXjWpLU2Ha5rnFyDyI
|
32
|
+
pluXqAxZwcOmUS3whRdEy20CRSIxKnznEpikm9Xc92RU5k/xhEsxzQgFA3jkGjs6
|
33
|
+
52cQoCLdtaNELCj3WqC+q9TCvPY8j3JRJqCz37+Lp/asof/5/OlX1k8iVUMWRACw
|
34
|
+
qPdaJGkE6iXpro2Nfvj/069UeXNUGSlwROMB/YoDkbamR/+UFIkBXdTQmRnoos6z
|
35
|
+
a/pAsvo0jT6QTnSB7xzsx8LSFDT5tfHKR9Dcn1Y3R06fsh02JvwxaSAMgDBM2aFb
|
36
|
+
2A7/BQ1hD7SU82VTxB1gFIHl
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2022-01-21 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rexle
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.5'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.5.14
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '1.5'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.5.14
|
60
|
+
description:
|
61
|
+
email: digital.robertson@gmail.com
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/xml_row_finder.rb
|
67
|
+
homepage: https://github.com/jrobertson/xml_row_finder
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.2.22
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Attempts to find repeating rows in XHTML and returns the associated xpath.
|
90
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|