xml_col_finder 0.1.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45d703949948a3a22ed318a5297cf621eecd664e842ac1f654c497018e4b9ec1
4
- data.tar.gz: f46900d22268f63f35e525bdc70156e0cf9bf68da7faf292facbe682c9094bc8
3
+ metadata.gz: 4229c7009179e903163ef10f64f0917a7f67e29d588172d184bed94f716123cf
4
+ data.tar.gz: f543fcbacf5ca517995d28fc6050671b48f18c8ae9df539aad6133486d7b74f5
5
5
  SHA512:
6
- metadata.gz: c5da937ee341a1b26a587113633c51912c7f61f7409eebd0197c241860c799c266be394ed247009cbb6158cf4ca5f3de45ed4a46380b49736d25cecf86828f52
7
- data.tar.gz: 2bfc5dee7af91f61327d71fafe91a73ea0e6060e8953dd193e6a9e1b6f839db9a0d33574cc3bb76f2c42824817e47d135edd866b1fb76108496c50d8ce4f7366
6
+ metadata.gz: 22cd4abb94f055f100d0f56993de883fa87d1bcb2e59ef9edaf0f3f46846f49aa2c73f7e0a38f4779d40314ad5a93bfe91b22738d347bf5866afc1b2ed58c105
7
+ data.tar.gz: 10c48be3a91d355aa826ebaf1e5b6329e2f3ec7acabc795e8800eb4758fe0abed45cfaee761a91190f72b4407e7b0a0a9accc19b563ca770057a98aa7dfb148b
checksums.yaml.gz.sig CHANGED
Binary file
@@ -12,10 +12,10 @@ class XMLColFinder
12
12
  def initialize(s, debug: false)
13
13
 
14
14
  @debug = debug
15
- doc = Rexle.new(s)
15
+ @doc = Rexle.new(s)
16
16
 
17
17
  a = []
18
- doc.root.each_recursive do |node|
18
+ @doc.root.each_recursive do |node|
19
19
 
20
20
  if node.text then
21
21
  a << [BacktrackXPath.new(node, ignore_id: true).to_xpath.split('/'),
@@ -30,8 +30,83 @@ class XMLColFinder
30
30
 
31
31
  end
32
32
 
33
+ def to_code(nametip: true)
34
+
35
+ @nametip = nametip
36
+ @tags = {}
37
+
38
+ xpath, remaining = @to_a
39
+
40
+ eid = getid(xpath)
41
+ linex = formatline('doc', eid, xpath)
42
+ a = scan(remaining, eid)
43
+
44
+ lines = a.flatten.compact.prepend linex
45
+ lines.join("\n").lines\
46
+ .map {|line| line =~ /.text$/ ? 'puts ' + line : line }.join
47
+
48
+ end
49
+
50
+ def to_doc()
51
+ @doc
52
+ end
53
+
33
54
  private
34
55
 
56
+ def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
57
+
58
+ if eid then
59
+
60
+ nametip = @nametip && tail.is_a?(String)
61
+ klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
62
+
63
+ line = if klass then
64
+ desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
65
+ desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
66
+ "\n# " + desc + "\n"
67
+ elsif nametip
68
+ "\n# e.g. %s\n" % [tail.length < 50 ? tail : tail[0..46] + '...']
69
+ else
70
+ ''
71
+ end
72
+
73
+ key.gsub!("[@class='']",'') # Rexle XPath bug solution!
74
+ line += "%s = %s.element(\"%s\")" % [eid, pid, key]
75
+ if tail.is_a? String
76
+ line += '.text'
77
+ #line += "\n" if nametip
78
+ end
79
+
80
+ else
81
+ line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
82
+ end
83
+
84
+ return line
85
+ end
86
+
87
+
88
+ def getid(rawtag)
89
+
90
+ rawtagx = rawtag.split('/').last[/\w+/]
91
+
92
+ tag = case rawtagx.to_sym
93
+ when :a
94
+ 'link'
95
+ when :p
96
+ 'para'
97
+ else
98
+ rawtagx
99
+ end
100
+
101
+ if @tags.include?(tag) then
102
+ @tags[tag] =~ /\d+$/ ? @tags[tag].succ! : @tags[tag] += '1'
103
+ else
104
+ @tags[tag] = tag
105
+ end
106
+
107
+ end
108
+
109
+
35
110
  # Groups xpath by matching branches
36
111
  #
37
112
  def group_by_xpath(a)
@@ -73,8 +148,6 @@ class XMLColFinder
73
148
 
74
149
  else
75
150
 
76
- puts "path.join('/'): " + path.join('/').inspect
77
- puts 'txt:' + txt.inspect
78
151
 
79
152
  h2[stickypath.sub(/^\//,'')] ||= []
80
153
  h2[stickypath.sub(/^\//,'')] << [path.join('/'), txt]
@@ -99,6 +172,64 @@ class XMLColFinder
99
172
 
100
173
  end
101
174
 
175
+ def scan(a, eid='doc', pid=eid.clone)
176
+
177
+ #puts 'a: ' + a.inspect if @debug
178
+
179
+ a.map do |row|
180
+
181
+ head, tail = row
182
+
183
+ if head.is_a? Array then
184
+
185
+ hline = scan(row, eid, pid)
186
+
187
+ elsif head
188
+
189
+ if head[0] == '/' then
190
+
191
+ key = head[1..-1]
192
+ puts 'key: ' + key.inspect if @debug
193
+
194
+ eid = getid(key)
195
+
196
+ hline = if tail.is_a? Array and tail.all? {|x| x.is_a? String } then
197
+ @prev_xpath = true
198
+ "%s = %s.xpath(\"%s\")" % [eid, pid, key]
199
+ else
200
+ @prev_xpath = false
201
+ formatline(pid, eid, key, tail)
202
+ end
203
+
204
+ else
205
+
206
+ hline = formatline(pid=eid)
207
+ end
208
+ end
209
+
210
+ if tail.is_a? Array then
211
+
212
+ if tail.compact[0].is_a? Array then
213
+
214
+ puts 'tail: ' + tail.inspect if @debug
215
+
216
+ tline = scan(tail, eid)
217
+
218
+ elsif tail.all? {|x| x.is_a? String} and tail[0][0] != '/'
219
+ puts '_tail: ' + tail.inspect if @debug
220
+ tline = tail.map.with_index do |x,i|
221
+ formatline(pid=eid, index: i)
222
+ end
223
+
224
+ end
225
+ end
226
+
227
+ [hline, tline]
228
+ end
229
+
230
+ end
231
+
232
+
102
233
  def truncate_xpath(records, offset=0)
103
234
 
104
235
  records.map do |record|
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_col_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
36
36
  ShsxXxzmzIrRENmpBp3tyR3k
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-23 00:00:00.000000000 Z
38
+ date: 2022-01-24 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
metadata.gz.sig CHANGED
Binary file