xml_col_finder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +3 -0
- data/lib/xml_col_finder.rb +125 -0
- data.tar.gz.sig +1 -0
- metadata +91 -0
- metadata.gz.sig +3 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7ad99ebebeb440fb2a0229c49d2ab7e993ea086375bb4cf48264b07897e4c9cc
|
4
|
+
data.tar.gz: f4d6ba7b029cbfff03f147ed4c0da760651f34fa5fca46d675435ed1d78226fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d7daa28d76d6e6f4d1ffc57041cbd4b24b96352bd10b85fe80d65eaa071e8c7e97e77b0ecb877f89e67445ce93aa138a652175da2509a0e853ff13839bfbb48d
|
7
|
+
data.tar.gz: c689924e6f815fa8f4398fb54b8142b4d1ad135f391d0c2836b4a93395c8a58e31b70f42ae05bf8c1b977fde6cc3d8a1c041ce29a3c86f0ef400720f6684f212
|
checksums.yaml.gz.sig
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: xml_col_finder.rb
|
4
|
+
|
5
|
+
require 'rexle'
|
6
|
+
|
7
|
+
|
8
|
+
class XMLColFinder
|
9
|
+
|
10
|
+
attr_reader :to_a
|
11
|
+
|
12
|
+
def initialize(s, debug: false)
|
13
|
+
|
14
|
+
@debug = debug
|
15
|
+
doc = Rexle.new(s)
|
16
|
+
|
17
|
+
a = []
|
18
|
+
doc.root.each_recursive do |node|
|
19
|
+
|
20
|
+
if node.text then
|
21
|
+
a << [BacktrackXPath.new(node, ignore_id: true).to_xpath.split('/'),
|
22
|
+
node.text]
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
h = group_by_xpath(a)
|
28
|
+
@to_a = truncate_xpath(h).flatten(1)
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
# Groups xpath by matching branches
|
33
|
+
#
|
34
|
+
def group_by_xpath(a)
|
35
|
+
|
36
|
+
h = Hash.new
|
37
|
+
h.default = 0
|
38
|
+
|
39
|
+
a.each do |path, txt|
|
40
|
+
|
41
|
+
stickypath = ''
|
42
|
+
path.each do |name|
|
43
|
+
xpath = stickypath += '/' + name
|
44
|
+
h[xpath] += 1
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
h2 = {}
|
50
|
+
|
51
|
+
n = 0
|
52
|
+
while n < a.length
|
53
|
+
|
54
|
+
path, txt = a[n]
|
55
|
+
stickypath = ''
|
56
|
+
|
57
|
+
path.each do |name|
|
58
|
+
|
59
|
+
if @debug then
|
60
|
+
puts "h[+ stickypath + '/' + name]: " \
|
61
|
+
+ h[stickypath + '/' + name].inspect
|
62
|
+
end
|
63
|
+
|
64
|
+
if h[stickypath + '/' + name] > 1 then
|
65
|
+
|
66
|
+
stickypath += '/' + name
|
67
|
+
next
|
68
|
+
|
69
|
+
else
|
70
|
+
|
71
|
+
h2[stickypath] ||= []
|
72
|
+
h2[stickypath] << [path.join('/'), txt]
|
73
|
+
break
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
n += 1
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
if h2.length > 1 then
|
85
|
+
|
86
|
+
a2 = h2.map {|k,v| [k.split('/'), v]}
|
87
|
+
group_by_xpath(a2)
|
88
|
+
|
89
|
+
else
|
90
|
+
|
91
|
+
return h2
|
92
|
+
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def truncate_xpath(records, offset=0)
|
97
|
+
|
98
|
+
records.map do |key, value|
|
99
|
+
|
100
|
+
new_key = key.sub(/^\/+/,'')[offset..-1]
|
101
|
+
len = new_key.length
|
102
|
+
#puts len.inspect
|
103
|
+
puts 'new_key: ' + new_key.inspect if @debug
|
104
|
+
|
105
|
+
new_value = value.map do |k2, v2|
|
106
|
+
|
107
|
+
puts 'k2: ' + k2.inspect if @debug
|
108
|
+
new_k2 = k2.sub(/^\/+/,'')[offset+len..-1]
|
109
|
+
new_len = new_k2.length
|
110
|
+
puts 'new_k2: ' + new_k2.inspect if @debug
|
111
|
+
puts 'v2: ' + v2.inspect if @debug
|
112
|
+
|
113
|
+
v2b = v2.is_a?(Array) ? truncate_xpath(v2, offset+len+new_len) : v2
|
114
|
+
|
115
|
+
[new_k2, v2b]
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
[new_key, new_value]
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
data.tar.gz.sig
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
���U�@��Lgqz%��}�%�t?|B���y�e�SK�=�� c�z��V���.���Fuc:�]�����^���C9��Wp�}H��Va.�AO��bW�����1�T!'*x�]֖*��J�C~?=��8��30k\������eMO�?����^�9�Y]y05#;^u�uw��>�&��q����.ۓwke�
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xml_col_finder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTIyMTk1ODA3WhcN
|
15
|
+
MjMwMTIyMTk1ODA3WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxKE/6
|
17
|
+
Ck1Fv2iEIgj1LOhztmbL7FJoc2a2tQMh9lgF3KT62xRAO/eMVFHz9DvH1QdmVjOH
|
18
|
+
avRuav7MCgJrUV/CT3A00XXDPr1nzJ0iA9eXbTz2lkeFNSBvm9Y/76nnly+7unUO
|
19
|
+
WwUjbjXxWfjkWMI1p9udfVp4QBxvR4Epk8xuDXOTR4TM0minuOvIevXaYdwgs79w
|
20
|
+
xi6d/ZfvqErAMEgHGOXP1lK9OPGNZxDzzhBDTze4uVNCcWPtIKEkrok3G4XgHLH9
|
21
|
+
1LBo0gkHWxxXDbzuRP7jgtt+XFCFg5LyRSYwdsINiDJ15LTxbe05B9h164V6rQe8
|
22
|
+
xIz1FsSapF473bET74YdHI83jej/VDPdFpuPRYSc4RhvPlgbn5ZaCdXTIqwTpyZl
|
23
|
+
eQW8BiR29jv0oG9r+n7LcVgUiIhz2yooFGRR5PhPIVl9Qb5o2ucU7RLDK2eEW5mX
|
24
|
+
gs1gQ9IukkqJCe5pyCu5Zpf7K79QZ5FLSvA+uH9WmQN980q2kn8zdacxL7ECAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUT1pa36Fp
|
26
|
+
N8zQtxF/c1EZqtcdMkgwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEACW1/6uWRV3yjgzYAprp4sq7NPlNxICkGTTspaV83
|
29
|
+
HqyyuQdQ8eUaeM+AnbSpFj2vzoB4fCWnRKnragV2BnfaWzOnMUaxxMBpSt4Dgnyo
|
30
|
+
RhEdlx9toP1WXmL3t7A7E6b5Zcx0ZQ/Q0mR84hJc3o6RR55a2ppHzmdkhMTuFY/T
|
31
|
+
UsN4pysbba4mMJDjwfnoRwWx/3AZl0G/LnaUoOCvCc6lbF5dRlY9dTAvuSZBNb+I
|
32
|
+
If7MfD7Qn8DRM/Zegrz3jtoTyELy7T9MYHerGNDhWKiEIOK6UZOxRc0QEomye5PF
|
33
|
+
wD4JT9wgu+sDjUa9lk6GnpIlfRMttYeEQDNvRJbjnoNy6nfA/30z/W3VPkQAuGvg
|
34
|
+
djJnf2LzCY4CjrZLIg5ifmI2H6YYw1zCI3KtBFXvZSIhFLNC2xhmsEoB9pqpz14n
|
35
|
+
GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
|
36
|
+
ShsxXxzmzIrRENmpBp3tyR3k
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2022-01-22 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rexle
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.5'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.5.14
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '1.5'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.5.14
|
60
|
+
description:
|
61
|
+
email: digital.robertson@gmail.com
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/xml_col_finder.rb
|
67
|
+
homepage: https://github.com/jrobertson/xml_col_finder
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 2.7.10
|
88
|
+
signing_key:
|
89
|
+
specification_version: 4
|
90
|
+
summary: Attempts to return the relative xpath for each element containing text.
|
91
|
+
test_files: []
|
metadata.gz.sig
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
)�~ �T����䑘�0c�$���[X-��]���q�B��k^��X��1J���'���0 r4�_-��4"Ck��H��Mf�����</������[��� f���M^!,�9ۋ�n.M-�..��_+�G����A7d�uÇ)�M� ��K��;�O���K��ˬ����b\j���?�A�#�VU�ȜJ`J����$^���Ɣ���j��6�v��m��G(B/#�Xvc6@Z_3�s�AU��M?���I����FJ�'�Z
|
2
|
+
eF�r��5$*̸4\�o�n��(B���p= yF�����,��B�ߧ��?H2��.
|
3
|
+
�w؇��XQfg���7��an��C����&z��>cX���r��CQr�_���2�P�"�
|