xml_col_finder 0.1.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45d703949948a3a22ed318a5297cf621eecd664e842ac1f654c497018e4b9ec1
4
- data.tar.gz: f46900d22268f63f35e525bdc70156e0cf9bf68da7faf292facbe682c9094bc8
3
+ metadata.gz: 4229c7009179e903163ef10f64f0917a7f67e29d588172d184bed94f716123cf
4
+ data.tar.gz: f543fcbacf5ca517995d28fc6050671b48f18c8ae9df539aad6133486d7b74f5
5
5
  SHA512:
6
- metadata.gz: c5da937ee341a1b26a587113633c51912c7f61f7409eebd0197c241860c799c266be394ed247009cbb6158cf4ca5f3de45ed4a46380b49736d25cecf86828f52
7
- data.tar.gz: 2bfc5dee7af91f61327d71fafe91a73ea0e6060e8953dd193e6a9e1b6f839db9a0d33574cc3bb76f2c42824817e47d135edd866b1fb76108496c50d8ce4f7366
6
+ metadata.gz: 22cd4abb94f055f100d0f56993de883fa87d1bcb2e59ef9edaf0f3f46846f49aa2c73f7e0a38f4779d40314ad5a93bfe91b22738d347bf5866afc1b2ed58c105
7
+ data.tar.gz: 10c48be3a91d355aa826ebaf1e5b6329e2f3ec7acabc795e8800eb4758fe0abed45cfaee761a91190f72b4407e7b0a0a9accc19b563ca770057a98aa7dfb148b
checksums.yaml.gz.sig CHANGED
Binary file
@@ -12,10 +12,10 @@ class XMLColFinder
12
12
  def initialize(s, debug: false)
13
13
 
14
14
  @debug = debug
15
- doc = Rexle.new(s)
15
+ @doc = Rexle.new(s)
16
16
 
17
17
  a = []
18
- doc.root.each_recursive do |node|
18
+ @doc.root.each_recursive do |node|
19
19
 
20
20
  if node.text then
21
21
  a << [BacktrackXPath.new(node, ignore_id: true).to_xpath.split('/'),
@@ -30,8 +30,83 @@ class XMLColFinder
30
30
 
31
31
  end
32
32
 
33
+ def to_code(nametip: true)
34
+
35
+ @nametip = nametip
36
+ @tags = {}
37
+
38
+ xpath, remaining = @to_a
39
+
40
+ eid = getid(xpath)
41
+ linex = formatline('doc', eid, xpath)
42
+ a = scan(remaining, eid)
43
+
44
+ lines = a.flatten.compact.prepend linex
45
+ lines.join("\n").lines\
46
+ .map {|line| line =~ /.text$/ ? 'puts ' + line : line }.join
47
+
48
+ end
49
+
50
+ def to_doc()
51
+ @doc
52
+ end
53
+
33
54
  private
34
55
 
56
+ def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
57
+
58
+ if eid then
59
+
60
+ nametip = @nametip && tail.is_a?(String)
61
+ klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
62
+
63
+ line = if klass then
64
+ desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
65
+ desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
66
+ "\n# " + desc + "\n"
67
+ elsif nametip
68
+ "\n# e.g. %s\n" % [tail.length < 50 ? tail : tail[0..46] + '...']
69
+ else
70
+ ''
71
+ end
72
+
73
+ key.gsub!("[@class='']",'') # Rexle XPath bug solution!
74
+ line += "%s = %s.element(\"%s\")" % [eid, pid, key]
75
+ if tail.is_a? String
76
+ line += '.text'
77
+ #line += "\n" if nametip
78
+ end
79
+
80
+ else
81
+ line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
82
+ end
83
+
84
+ return line
85
+ end
86
+
87
+
88
+ def getid(rawtag)
89
+
90
+ rawtagx = rawtag.split('/').last[/\w+/]
91
+
92
+ tag = case rawtagx.to_sym
93
+ when :a
94
+ 'link'
95
+ when :p
96
+ 'para'
97
+ else
98
+ rawtagx
99
+ end
100
+
101
+ if @tags.include?(tag) then
102
+ @tags[tag] =~ /\d+$/ ? @tags[tag].succ! : @tags[tag] += '1'
103
+ else
104
+ @tags[tag] = tag
105
+ end
106
+
107
+ end
108
+
109
+
35
110
  # Groups xpath by matching branches
36
111
  #
37
112
  def group_by_xpath(a)
@@ -73,8 +148,6 @@ class XMLColFinder
73
148
 
74
149
  else
75
150
 
76
- puts "path.join('/'): " + path.join('/').inspect
77
- puts 'txt:' + txt.inspect
78
151
 
79
152
  h2[stickypath.sub(/^\//,'')] ||= []
80
153
  h2[stickypath.sub(/^\//,'')] << [path.join('/'), txt]
@@ -99,6 +172,64 @@ class XMLColFinder
99
172
 
100
173
  end
101
174
 
175
+ def scan(a, eid='doc', pid=eid.clone)
176
+
177
+ #puts 'a: ' + a.inspect if @debug
178
+
179
+ a.map do |row|
180
+
181
+ head, tail = row
182
+
183
+ if head.is_a? Array then
184
+
185
+ hline = scan(row, eid, pid)
186
+
187
+ elsif head
188
+
189
+ if head[0] == '/' then
190
+
191
+ key = head[1..-1]
192
+ puts 'key: ' + key.inspect if @debug
193
+
194
+ eid = getid(key)
195
+
196
+ hline = if tail.is_a? Array and tail.all? {|x| x.is_a? String } then
197
+ @prev_xpath = true
198
+ "%s = %s.xpath(\"%s\")" % [eid, pid, key]
199
+ else
200
+ @prev_xpath = false
201
+ formatline(pid, eid, key, tail)
202
+ end
203
+
204
+ else
205
+
206
+ hline = formatline(pid=eid)
207
+ end
208
+ end
209
+
210
+ if tail.is_a? Array then
211
+
212
+ if tail.compact[0].is_a? Array then
213
+
214
+ puts 'tail: ' + tail.inspect if @debug
215
+
216
+ tline = scan(tail, eid)
217
+
218
+ elsif tail.all? {|x| x.is_a? String} and tail[0][0] != '/'
219
+ puts '_tail: ' + tail.inspect if @debug
220
+ tline = tail.map.with_index do |x,i|
221
+ formatline(pid=eid, index: i)
222
+ end
223
+
224
+ end
225
+ end
226
+
227
+ [hline, tline]
228
+ end
229
+
230
+ end
231
+
232
+
102
233
  def truncate_xpath(records, offset=0)
103
234
 
104
235
  records.map do |record|
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_col_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
36
36
  ShsxXxzmzIrRENmpBp3tyR3k
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-23 00:00:00.000000000 Z
38
+ date: 2022-01-24 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
metadata.gz.sig CHANGED
Binary file