xml_col_finder 0.1.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ad99ebebeb440fb2a0229c49d2ab7e993ea086375bb4cf48264b07897e4c9cc
4
- data.tar.gz: f4d6ba7b029cbfff03f147ed4c0da760651f34fa5fca46d675435ed1d78226fd
3
+ metadata.gz: b98fdd5a28880c8560256ae8a01f5e0a64a3067f2c0974f5c0e668243ce7ad92
4
+ data.tar.gz: c1177f458579fb3fd7a9d975512d14f5b0e2395ea4cbaa21dc40253b564b78f3
5
5
  SHA512:
6
- metadata.gz: d7daa28d76d6e6f4d1ffc57041cbd4b24b96352bd10b85fe80d65eaa071e8c7e97e77b0ecb877f89e67445ce93aa138a652175da2509a0e853ff13839bfbb48d
7
- data.tar.gz: c689924e6f815fa8f4398fb54b8142b4d1ad135f391d0c2836b4a93395c8a58e31b70f42ae05bf8c1b977fde6cc3d8a1c041ce29a3c86f0ef400720f6684f212
6
+ metadata.gz: 73daefd3b3bb3873e21848aad39fd7d9bb570229c8369bb71920bce81474d312b6eb83a9647cf03d66bc4b095788a72dee9925a68d8de8e78554f82155afbdea
7
+ data.tar.gz: be011c43afe08767e860c436024c65d8b86e507f610ffd7caad367f034cd76dfbfaf5c39882eef9ee6524cc2f90d1de9c564db297f4899df01f3e40e42840885
checksums.yaml.gz.sig CHANGED
Binary file
@@ -24,11 +24,80 @@ class XMLColFinder
24
24
 
25
25
  end
26
26
 
27
+ #@to_a = a
27
28
  h = group_by_xpath(a)
28
29
  @to_a = truncate_xpath(h).flatten(1)
29
30
 
30
31
  end
31
32
 
33
+ def to_code(nametip: true)
34
+
35
+ @nametip = nametip
36
+ @tags = {}
37
+
38
+ xpath, remaining = @to_a
39
+
40
+ eid = getid(xpath)
41
+ linex = formatline('doc', eid, xpath)
42
+ a = scan(remaining, eid)
43
+
44
+ lines = a.flatten.compact.prepend linex
45
+ lines.join("\n").lines\
46
+ .map {|line| line =~ /.text$/ ? 'puts ' + line : line }.join
47
+
48
+ end
49
+
50
+ private
51
+
52
+ def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
53
+
54
+ if eid then
55
+
56
+ nametip = @nametip && tail.is_a?(String)
57
+ klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
58
+
59
+ line = if klass then
60
+ desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
61
+ desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
62
+ "\n# " + desc + "\n"
63
+ else
64
+ ''
65
+ end
66
+
67
+ line += "%s = %s.element(\"%s\")" % [eid, pid, key]
68
+ if tail.is_a? String
69
+ line += '.text'
70
+ #line += "\n" if nametip
71
+ end
72
+
73
+ else
74
+ line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
75
+ end
76
+
77
+ return line
78
+ end
79
+
80
+
81
+ def getid(rawtag)
82
+
83
+ rawtagx = rawtag.split('/').last[/\w+/]
84
+
85
+ tag = case rawtagx.to_sym
86
+ when :a
87
+ 'link'
88
+ else
89
+ rawtagx
90
+ end
91
+
92
+ if @tags.include?(tag) then
93
+ @tags[tag] =~ /\d+$/ ? @tags[tag].succ! : @tags[tag] += '1'
94
+ else
95
+ @tags[tag] = tag
96
+ end
97
+
98
+ end
99
+
100
+
32
101
  # Groups xpath by matching branches
33
102
  #
34
103
  def group_by_xpath(a)
@@ -48,39 +117,39 @@ class XMLColFinder
48
117
 
49
118
  h2 = {}
50
119
 
51
- n = 0
52
- while n < a.length
120
+ a.each do |path,txt|
53
121
 
54
- path, txt = a[n]
55
122
  stickypath = ''
56
123
 
57
- path.each do |name|
124
+ n = 0
125
+ while n < path.length
58
126
 
59
- if @debug then
60
- puts "h[+ stickypath + '/' + name]: " \
61
- + h[stickypath + '/' + name].inspect
62
- end
127
+ name = path[n]
63
128
 
64
- if h[stickypath + '/' + name] > 1 then
129
+ if h[stickypath + '/' + name] > 1
65
130
 
66
131
  stickypath += '/' + name
67
- next
132
+
133
+ if (n == path.length - 1) then
134
+
135
+ h2[stickypath.sub(/^\//,'')] ||= []
136
+ h2[stickypath.sub(/^\//,'')] << txt
137
+
138
+ end
68
139
 
69
140
  else
70
141
 
71
- h2[stickypath] ||= []
72
- h2[stickypath] << [path.join('/'), txt]
142
+
143
+ h2[stickypath.sub(/^\//,'')] ||= []
144
+ h2[stickypath.sub(/^\//,'')] << [path.join('/'), txt]
73
145
  break
74
146
 
75
147
  end
76
-
148
+ n += 1
77
149
  end
78
150
 
79
- n += 1
80
-
81
151
  end
82
152
 
83
-
84
153
  if h2.length > 1 then
85
154
 
86
155
  a2 = h2.map {|k,v| [k.split('/'), v]}
@@ -91,32 +160,107 @@ class XMLColFinder
91
160
  return h2
92
161
 
93
162
  end
163
+
94
164
  end
95
165
 
96
- def truncate_xpath(records, offset=0)
166
+ def scan(a, eid='doc', pid=eid.clone)
167
+
168
+ #puts 'a: ' + a.inspect if @debug
169
+
170
+ a.map do |row|
171
+
172
+ head, tail = row
173
+
174
+ if head.is_a? Array then
97
175
 
98
- records.map do |key, value|
176
+ hline = scan(row, eid, pid)
99
177
 
100
- new_key = key.sub(/^\/+/,'')[offset..-1]
101
- len = new_key.length
102
- #puts len.inspect
103
- puts 'new_key: ' + new_key.inspect if @debug
178
+ elsif head
104
179
 
105
- new_value = value.map do |k2, v2|
180
+ if head[0] == '/' then
106
181
 
107
- puts 'k2: ' + k2.inspect if @debug
108
- new_k2 = k2.sub(/^\/+/,'')[offset+len..-1]
109
- new_len = new_k2.length
110
- puts 'new_k2: ' + new_k2.inspect if @debug
111
- puts 'v2: ' + v2.inspect if @debug
182
+ key = head[1..-1]
183
+ puts 'key: ' + key.inspect if @debug
112
184
 
113
- v2b = v2.is_a?(Array) ? truncate_xpath(v2, offset+len+new_len) : v2
185
+ eid = getid(key)
114
186
 
115
- [new_k2, v2b]
187
+ hline = if tail.is_a? Array and tail.all? {|x| x.is_a? String } then
188
+ @prev_xpath = true
189
+ "%s = %s.xpath(\"%s\")" % [eid, pid, key]
190
+ else
191
+ @prev_xpath = false
192
+ formatline(pid, eid, key, tail)
193
+ end
116
194
 
195
+ else
196
+
197
+ hline = formatline(pid=eid)
198
+ end
117
199
  end
118
200
 
119
- [new_key, new_value]
201
+ if tail.is_a? Array then
202
+
203
+ if tail.compact[0].is_a? Array then
204
+
205
+ puts 'tail: ' + tail.inspect if @debug
206
+
207
+ tline = scan(tail, eid)
208
+
209
+ elsif tail.all? {|x| x.is_a? String} and tail[0][0] != '/'
210
+ puts '_tail: ' + tail.inspect if @debug
211
+ tline = tail.map.with_index do |x,i|
212
+ formatline(pid=eid, index: i)
213
+ end
214
+
215
+ end
216
+ end
217
+
218
+ [hline, tline]
219
+ end
220
+
221
+ end
222
+
223
+
224
+ def truncate_xpath(records, offset=0)
225
+
226
+ records.map do |record|
227
+
228
+ next unless record
229
+ if record.is_a? Array and (record[0].is_a? Array or record.length > 2) then
230
+
231
+ truncate_xpath(record, offset)
232
+
233
+ else
234
+
235
+ if record.is_a? String then
236
+
237
+ value = record
238
+ puts 'valuex: ' + value.inspect if @debug
239
+ value
240
+
241
+ else
242
+
243
+ key, value = record
244
+ puts 'key: ' + key.inspect if @debug
245
+
246
+ new_key = key[offset..-1]
247
+ puts 'new_key: ' + new_key.inspect if @debug
248
+
249
+ next unless new_key
250
+ len = new_key.length
251
+
252
+ if value.is_a? Array then
253
+
254
+ [new_key, truncate_xpath(value, offset+len)]
255
+
256
+ else
257
+
258
+ puts 'value: ' + value.inspect if @debug
259
+ [new_key, value]
260
+
261
+ end
262
+ end
263
+ end
120
264
 
121
265
  end
122
266
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_col_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
36
36
  ShsxXxzmzIrRENmpBp3tyR3k
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-22 00:00:00.000000000 Z
38
+ date: 2022-01-24 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
metadata.gz.sig CHANGED
Binary file