xml_col_finder 0.1.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/xml_col_finder.rb +175 -31
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b98fdd5a28880c8560256ae8a01f5e0a64a3067f2c0974f5c0e668243ce7ad92
|
4
|
+
data.tar.gz: c1177f458579fb3fd7a9d975512d14f5b0e2395ea4cbaa21dc40253b564b78f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73daefd3b3bb3873e21848aad39fd7d9bb570229c8369bb71920bce81474d312b6eb83a9647cf03d66bc4b095788a72dee9925a68d8de8e78554f82155afbdea
|
7
|
+
data.tar.gz: be011c43afe08767e860c436024c65d8b86e507f610ffd7caad367f034cd76dfbfaf5c39882eef9ee6524cc2f90d1de9c564db297f4899df01f3e40e42840885
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/xml_col_finder.rb
CHANGED
@@ -24,11 +24,80 @@ class XMLColFinder
|
|
24
24
|
|
25
25
|
end
|
26
26
|
|
27
|
+
#@to_a = a
|
27
28
|
h = group_by_xpath(a)
|
28
29
|
@to_a = truncate_xpath(h).flatten(1)
|
29
30
|
|
30
31
|
end
|
31
32
|
|
33
|
+
def to_code(nametip: true)
|
34
|
+
|
35
|
+
@nametip = nametip
|
36
|
+
@tags = {}
|
37
|
+
|
38
|
+
xpath, remaining = @to_a
|
39
|
+
|
40
|
+
eid = getid(xpath)
|
41
|
+
linex = formatline('doc', eid, xpath)
|
42
|
+
a = scan(remaining, eid)
|
43
|
+
|
44
|
+
lines = a.flatten.compact.prepend linex
|
45
|
+
lines.join("\n").lines\
|
46
|
+
.map {|line| line =~ /.text$/ ? 'puts ' + line : line }.join
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
|
53
|
+
|
54
|
+
if eid then
|
55
|
+
|
56
|
+
nametip = @nametip && tail.is_a?(String)
|
57
|
+
klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
|
58
|
+
|
59
|
+
line = if klass then
|
60
|
+
desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
|
61
|
+
desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
|
62
|
+
"\n# " + desc + "\n"
|
63
|
+
else
|
64
|
+
''
|
65
|
+
end
|
66
|
+
|
67
|
+
line += "%s = %s.element(\"%s\")" % [eid, pid, key]
|
68
|
+
if tail.is_a? String
|
69
|
+
line += '.text'
|
70
|
+
#line += "\n" if nametip
|
71
|
+
end
|
72
|
+
|
73
|
+
else
|
74
|
+
line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
|
75
|
+
end
|
76
|
+
|
77
|
+
return line
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def getid(rawtag)
|
82
|
+
|
83
|
+
rawtagx = rawtag.split('/').last[/\w+/]
|
84
|
+
|
85
|
+
tag = case rawtagx.to_sym
|
86
|
+
when :a
|
87
|
+
'link'
|
88
|
+
else
|
89
|
+
rawtagx
|
90
|
+
end
|
91
|
+
|
92
|
+
if @tags.include?(tag) then
|
93
|
+
@tags[tag] =~ /\d+$/ ? @tags[tag].succ! : @tags[tag] += '1'
|
94
|
+
else
|
95
|
+
@tags[tag] = tag
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
|
32
101
|
# Groups xpath by matching branches
|
33
102
|
#
|
34
103
|
def group_by_xpath(a)
|
@@ -48,39 +117,39 @@ class XMLColFinder
|
|
48
117
|
|
49
118
|
h2 = {}
|
50
119
|
|
51
|
-
|
52
|
-
while n < a.length
|
120
|
+
a.each do |path,txt|
|
53
121
|
|
54
|
-
path, txt = a[n]
|
55
122
|
stickypath = ''
|
56
123
|
|
57
|
-
|
124
|
+
n = 0
|
125
|
+
while n < path.length
|
58
126
|
|
59
|
-
|
60
|
-
puts "h[+ stickypath + '/' + name]: " \
|
61
|
-
+ h[stickypath + '/' + name].inspect
|
62
|
-
end
|
127
|
+
name = path[n]
|
63
128
|
|
64
|
-
if h[stickypath + '/' + name] > 1
|
129
|
+
if h[stickypath + '/' + name] > 1
|
65
130
|
|
66
131
|
stickypath += '/' + name
|
67
|
-
|
132
|
+
|
133
|
+
if (n == path.length - 1) then
|
134
|
+
|
135
|
+
h2[stickypath.sub(/^\//,'')] ||= []
|
136
|
+
h2[stickypath.sub(/^\//,'')] << txt
|
137
|
+
|
138
|
+
end
|
68
139
|
|
69
140
|
else
|
70
141
|
|
71
|
-
|
72
|
-
h2[stickypath
|
142
|
+
|
143
|
+
h2[stickypath.sub(/^\//,'')] ||= []
|
144
|
+
h2[stickypath.sub(/^\//,'')] << [path.join('/'), txt]
|
73
145
|
break
|
74
146
|
|
75
147
|
end
|
76
|
-
|
148
|
+
n += 1
|
77
149
|
end
|
78
150
|
|
79
|
-
n += 1
|
80
|
-
|
81
151
|
end
|
82
152
|
|
83
|
-
|
84
153
|
if h2.length > 1 then
|
85
154
|
|
86
155
|
a2 = h2.map {|k,v| [k.split('/'), v]}
|
@@ -91,32 +160,107 @@ class XMLColFinder
|
|
91
160
|
return h2
|
92
161
|
|
93
162
|
end
|
163
|
+
|
94
164
|
end
|
95
165
|
|
96
|
-
def
|
166
|
+
def scan(a, eid='doc', pid=eid.clone)
|
167
|
+
|
168
|
+
#puts 'a: ' + a.inspect if @debug
|
169
|
+
|
170
|
+
a.map do |row|
|
171
|
+
|
172
|
+
head, tail = row
|
173
|
+
|
174
|
+
if head.is_a? Array then
|
97
175
|
|
98
|
-
|
176
|
+
hline = scan(row, eid, pid)
|
99
177
|
|
100
|
-
|
101
|
-
len = new_key.length
|
102
|
-
#puts len.inspect
|
103
|
-
puts 'new_key: ' + new_key.inspect if @debug
|
178
|
+
elsif head
|
104
179
|
|
105
|
-
|
180
|
+
if head[0] == '/' then
|
106
181
|
|
107
|
-
|
108
|
-
|
109
|
-
new_len = new_k2.length
|
110
|
-
puts 'new_k2: ' + new_k2.inspect if @debug
|
111
|
-
puts 'v2: ' + v2.inspect if @debug
|
182
|
+
key = head[1..-1]
|
183
|
+
puts 'key: ' + key.inspect if @debug
|
112
184
|
|
113
|
-
|
185
|
+
eid = getid(key)
|
114
186
|
|
115
|
-
|
187
|
+
hline = if tail.is_a? Array and tail.all? {|x| x.is_a? String } then
|
188
|
+
@prev_xpath = true
|
189
|
+
"%s = %s.xpath(\"%s\")" % [eid, pid, key]
|
190
|
+
else
|
191
|
+
@prev_xpath = false
|
192
|
+
formatline(pid, eid, key, tail)
|
193
|
+
end
|
116
194
|
|
195
|
+
else
|
196
|
+
|
197
|
+
hline = formatline(pid=eid)
|
198
|
+
end
|
117
199
|
end
|
118
200
|
|
119
|
-
|
201
|
+
if tail.is_a? Array then
|
202
|
+
|
203
|
+
if tail.compact[0].is_a? Array then
|
204
|
+
|
205
|
+
puts 'tail: ' + tail.inspect if @debug
|
206
|
+
|
207
|
+
tline = scan(tail, eid)
|
208
|
+
|
209
|
+
elsif tail.all? {|x| x.is_a? String} and tail[0][0] != '/'
|
210
|
+
puts '_tail: ' + tail.inspect if @debug
|
211
|
+
tline = tail.map.with_index do |x,i|
|
212
|
+
formatline(pid=eid, index: i)
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
[hline, tline]
|
219
|
+
end
|
220
|
+
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
def truncate_xpath(records, offset=0)
|
225
|
+
|
226
|
+
records.map do |record|
|
227
|
+
|
228
|
+
next unless record
|
229
|
+
if record.is_a? Array and (record[0].is_a? Array or record.length > 2) then
|
230
|
+
|
231
|
+
truncate_xpath(record, offset)
|
232
|
+
|
233
|
+
else
|
234
|
+
|
235
|
+
if record.is_a? String then
|
236
|
+
|
237
|
+
value = record
|
238
|
+
puts 'valuex: ' + value.inspect if @debug
|
239
|
+
value
|
240
|
+
|
241
|
+
else
|
242
|
+
|
243
|
+
key, value = record
|
244
|
+
puts 'key: ' + key.inspect if @debug
|
245
|
+
|
246
|
+
new_key = key[offset..-1]
|
247
|
+
puts 'new_key: ' + new_key.inspect if @debug
|
248
|
+
|
249
|
+
next unless new_key
|
250
|
+
len = new_key.length
|
251
|
+
|
252
|
+
if value.is_a? Array then
|
253
|
+
|
254
|
+
[new_key, truncate_xpath(value, offset+len)]
|
255
|
+
|
256
|
+
else
|
257
|
+
|
258
|
+
puts 'value: ' + value.inspect if @debug
|
259
|
+
[new_key, value]
|
260
|
+
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
120
264
|
|
121
265
|
end
|
122
266
|
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xml_col_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
|
36
36
|
ShsxXxzmzIrRENmpBp3tyR3k
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-01-
|
38
|
+
date: 2022-01-24 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: rexle
|
metadata.gz.sig
CHANGED
Binary file
|