rexle 1.5.10 → 1.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexle.rb +374 -349
- data.tar.gz.sig +0 -0
- metadata +11 -11
- metadata.gz.sig +0 -0
data/lib/rexle.rb
CHANGED
@@ -13,32 +13,40 @@ require 'backtrack-xpath'
|
|
13
13
|
|
14
14
|
# modifications:
|
15
15
|
|
16
|
-
#
|
16
|
+
# 15-Jan-2022: bug fix: Related to previous bug fix; Detecting
|
17
|
+
# Attributes::Value is now performed instead of
|
18
|
+
# a String object
|
19
|
+
# 14-Jan-2022: bug fix: Related to previous bug fix; Unescape is
|
20
|
+
# now only applied to objects of type Attributes::Value
|
21
|
+
# 01-Jan-2022: bug fix: Attribute values are no longer unescaped when
|
22
|
+
# called from Rexle#xml
|
23
|
+
# 03-Apr-2021: bug fix: Using *to_a* a CDATA element if present is now output
|
24
|
+
# 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil.
|
17
25
|
# 11-Sep-2020: feature: Rexle::Element#text now has unescaped HTML using CGI
|
18
26
|
# 30-Jul-2020: minor improvement: #plaintext now unescapes & to &
|
19
27
|
# 11-May-2020: bug fix: Rexle#css now responds correctly to valid selectors
|
20
28
|
# 23-Apr-2020: feature: Added public method *plaintext*.
|
21
29
|
# 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag
|
22
|
-
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
23
|
-
# 09-Jul-2019: minor improvement: A comment tag now has a
|
30
|
+
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
31
|
+
# 09-Jul-2019: minor improvement: A comment tag now has a
|
24
32
|
# new line when pretty printed
|
25
33
|
# 02-Feb-2019: feature: A comment tag can now have nested elements
|
26
34
|
# 03-Nov-2018: feature: Debug messages can now use coloured text
|
27
35
|
# 02-Oct-2018: feature: Added Rexle::Elements#last
|
28
|
-
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
29
|
-
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
36
|
+
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
37
|
+
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
30
38
|
# xpath contains an *and* operator
|
31
|
-
# 14-Sep-2017: improvement: An *and* operator can now be
|
39
|
+
# 14-Sep-2017: improvement: An *and* operator can now be
|
32
40
|
# used between xpath statements
|
33
41
|
# 10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id]
|
34
|
-
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
35
|
-
# track of the working document when elements are passed to
|
42
|
+
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
43
|
+
# track of the working document when elements are passed to
|
36
44
|
# different documents
|
37
45
|
# bug fix: Element prefixes are now only processed if they exist
|
38
|
-
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
46
|
+
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
39
47
|
# Rexle::Element#next_sibling and Rexle::Element#previous_sibling bugs
|
40
|
-
# 25-Feb-2017: improvement:
|
41
|
-
# An input rexle array can now have an empty array for
|
48
|
+
# 25-Feb-2017: improvement:
|
49
|
+
# An input rexle array can now have an empty array for
|
42
50
|
# children e.g. doc = Rexle.new(["records", {}, "", []])
|
43
51
|
# 25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer
|
44
52
|
|
@@ -52,7 +60,14 @@ module XMLhelper
|
|
52
60
|
scan_print(children).join.force_encoding("utf-8")
|
53
61
|
|
54
62
|
a = self.root.attributes.to_a.map do |k,v|
|
55
|
-
|
63
|
+
|
64
|
+
val = if v.is_a?(Array) then
|
65
|
+
v.join(' ')
|
66
|
+
else
|
67
|
+
v.is_a?(Attributes::Value) ? v.to_s(unescape: false) : v
|
68
|
+
end
|
69
|
+
|
70
|
+
"%s='%s'" % [k, val]
|
56
71
|
end
|
57
72
|
|
58
73
|
xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \
|
@@ -60,7 +75,7 @@ module XMLhelper
|
|
60
75
|
|
61
76
|
if self.instructions and declaration then
|
62
77
|
processing_instructions() + xml
|
63
|
-
else
|
78
|
+
else
|
64
79
|
xml
|
65
80
|
end
|
66
81
|
end
|
@@ -69,43 +84,43 @@ module XMLhelper
|
|
69
84
|
|
70
85
|
body = pretty_print(children,2).join
|
71
86
|
|
72
|
-
a = self.root.attributes.to_a.map do |k,v|
|
73
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
87
|
+
a = self.root.attributes.to_a.map do |k,v|
|
88
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)]
|
74
89
|
end
|
75
|
-
|
76
|
-
ind = "\n "
|
90
|
+
|
91
|
+
ind = "\n "
|
77
92
|
xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \
|
78
93
|
' ' + a.join(' '), ind, body, "\n", self.root.name]
|
79
94
|
|
80
95
|
if self.instructions and declaration then
|
81
96
|
processing_instructions("") + xml
|
82
|
-
else
|
97
|
+
else
|
83
98
|
xml
|
84
99
|
end
|
85
100
|
end
|
86
|
-
|
87
|
-
def inspect()
|
101
|
+
|
102
|
+
def inspect()
|
88
103
|
"#<Rexle:%s>" % [self.object_id]
|
89
104
|
end
|
90
105
|
|
91
106
|
def processing_instructions(s='')
|
92
107
|
self.instructions.map do |instruction|
|
93
|
-
"<?%s?>\n" % instruction.join(' ')
|
108
|
+
"<?%s?>\n" % instruction.join(' ')
|
94
109
|
end.join s
|
95
110
|
end
|
96
111
|
|
97
112
|
def scan_print(nodes)
|
98
113
|
|
99
114
|
r2 = nodes.map do |x|
|
100
|
-
|
115
|
+
|
101
116
|
r = if x.is_a? Rexle::Element then
|
102
117
|
|
103
|
-
a = x.attributes.to_a.map do |k,v|
|
118
|
+
a = x.attributes.to_a.map do |k,v|
|
104
119
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
105
120
|
end
|
106
121
|
|
107
122
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
108
|
-
|
123
|
+
|
109
124
|
non_self_closing_tags = %w(script textarea iframe div object a)
|
110
125
|
|
111
126
|
if (x.children and x.children.length > 0 \
|
@@ -119,20 +134,20 @@ module XMLhelper
|
|
119
134
|
else
|
120
135
|
out = ["<%s/>" % tag]
|
121
136
|
end
|
122
|
-
|
137
|
+
|
123
138
|
elsif x.is_a? String then x
|
124
|
-
elsif x.is_a? Rexle::CData then x.print
|
125
|
-
elsif x.is_a? Rexle::Comment then x.print
|
126
|
-
|
139
|
+
elsif x.is_a? Rexle::CData then x.print
|
140
|
+
elsif x.is_a? Rexle::Comment then x.print
|
141
|
+
|
127
142
|
end
|
128
143
|
|
129
144
|
r
|
130
145
|
end
|
131
|
-
|
146
|
+
|
132
147
|
r2
|
133
148
|
|
134
149
|
end
|
135
|
-
|
150
|
+
|
136
151
|
def scan_to_a(nodes)
|
137
152
|
|
138
153
|
nodes.inject([]) do |r,x|
|
@@ -141,6 +156,10 @@ module XMLhelper
|
|
141
156
|
|
142
157
|
a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]
|
143
158
|
|
159
|
+
if x.cdatas.any? then
|
160
|
+
a.concat x.cdatas.map {|cdata| ['![', {}, cdata] }
|
161
|
+
end
|
162
|
+
|
144
163
|
(a.concat(scan_to_a(x.children))) if x.children.length > 1
|
145
164
|
r << a
|
146
165
|
elsif x.is_a? String then
|
@@ -151,7 +170,7 @@ module XMLhelper
|
|
151
170
|
end
|
152
171
|
|
153
172
|
end
|
154
|
-
|
173
|
+
|
155
174
|
|
156
175
|
|
157
176
|
def pretty_print(nodes, indent='0')
|
@@ -164,13 +183,13 @@ module XMLhelper
|
|
164
183
|
|
165
184
|
if x.is_a? Rexle::Element then
|
166
185
|
|
167
|
-
a = x.attributes.to_a.map do |k,v|
|
186
|
+
a = x.attributes.to_a.map do |k,v|
|
168
187
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
169
188
|
end
|
170
189
|
a ||= []
|
171
190
|
|
172
191
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
173
|
-
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
192
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
174
193
|
|
175
194
|
if (x.value and x.value.length > 0) \
|
176
195
|
or (x.children and x.children.length > 0 \
|
@@ -178,13 +197,13 @@ module XMLhelper
|
|
178
197
|
x.name == 'script' or x.name == 'textarea' or \
|
179
198
|
x.name == 'iframe' then
|
180
199
|
|
181
|
-
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
200
|
+
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
182
201
|
("\n" + ' ' * indent) : ''
|
183
|
-
|
202
|
+
|
184
203
|
out = ["%s<%s>%s" % [start, tag, ind1]]
|
185
|
-
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
204
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
186
205
|
ind2 = (ind1 and ind1.length > 0) ? ("\n" + ' ' * (indent - 1)) : ''
|
187
|
-
out << "%s</%s>" % [ind2, x.name]
|
206
|
+
out << "%s</%s>" % [ind2, x.name]
|
188
207
|
else
|
189
208
|
|
190
209
|
out = ["%s<%s/>" % [start, tag]]
|
@@ -192,8 +211,8 @@ module XMLhelper
|
|
192
211
|
|
193
212
|
|
194
213
|
elsif x.is_a? String then x.sub(/^[\n\s]+$/,'')
|
195
|
-
elsif x.is_a? Rexle::CData then x.print
|
196
|
-
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
214
|
+
elsif x.is_a? Rexle::CData then x.print
|
215
|
+
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
197
216
|
|
198
217
|
end
|
199
218
|
end
|
@@ -208,17 +227,17 @@ class Rexle
|
|
208
227
|
|
209
228
|
attr_reader :prefixes, :doctype
|
210
229
|
attr_accessor :instructions
|
211
|
-
|
230
|
+
|
212
231
|
def initialize(x=nil, rexle: self, debug: false)
|
213
232
|
|
214
233
|
@rexle, @debug = rexle, debug
|
215
234
|
$debug = @debug
|
216
|
-
|
235
|
+
|
217
236
|
puts 'inside Rexle'.debug if debug
|
218
|
-
|
237
|
+
|
219
238
|
super()
|
220
239
|
|
221
|
-
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
240
|
+
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
222
241
|
@doctype = :xml
|
223
242
|
|
224
243
|
# what type of input is it? Is it a string, array
|
@@ -228,11 +247,11 @@ class Rexle
|
|
228
247
|
Array: proc {|x| x},
|
229
248
|
RexleParser: ->(x){ parse_rexle(x)}
|
230
249
|
}
|
231
|
-
|
250
|
+
|
232
251
|
doc_node = ['doc', Attributes.new]
|
233
|
-
|
252
|
+
|
234
253
|
@a = procs[x.class.to_s.to_sym].call(x)
|
235
|
-
|
254
|
+
|
236
255
|
@doc = scan_element(*(doc_node << @a))
|
237
256
|
|
238
257
|
# fetch the namespaces
|
@@ -243,78 +262,78 @@ class Rexle
|
|
243
262
|
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
244
263
|
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
245
264
|
end
|
246
|
-
|
265
|
+
|
247
266
|
end
|
248
267
|
|
249
268
|
end
|
250
|
-
|
269
|
+
|
251
270
|
def clone()
|
252
271
|
Rexle.new self.to_a
|
253
272
|
end
|
254
|
-
|
273
|
+
|
255
274
|
def at_css(selector)
|
256
275
|
@doc.root.element RexleCSS.new(selector).to_xpath
|
257
|
-
end
|
258
|
-
|
276
|
+
end
|
277
|
+
|
259
278
|
def css(selector)
|
260
|
-
|
261
|
-
a = selector.split(',').flat_map do |x|
|
279
|
+
|
280
|
+
a = selector.split(',').flat_map do |x|
|
262
281
|
@doc.root.xpath RexleCSS.new(x).to_xpath
|
263
282
|
end
|
264
|
-
|
283
|
+
|
265
284
|
return a
|
266
285
|
end
|
267
|
-
|
286
|
+
|
268
287
|
def xpath(path, &blk)
|
269
288
|
@doc.xpath(path, &blk)
|
270
|
-
end
|
289
|
+
end
|
271
290
|
|
272
291
|
class Element
|
273
292
|
include XMLhelper
|
274
|
-
|
293
|
+
|
275
294
|
class Value < String
|
276
|
-
|
295
|
+
|
277
296
|
def initialize(value)
|
278
297
|
super(value)
|
279
298
|
end
|
280
|
-
|
299
|
+
|
281
300
|
def <(val2)
|
282
301
|
self.to_f < val2.to_f
|
283
|
-
end
|
284
|
-
|
302
|
+
end
|
303
|
+
|
285
304
|
def >(val2)
|
286
305
|
self.to_f > val2.to_f
|
287
|
-
end
|
288
|
-
end
|
289
|
-
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
290
309
|
class Attribute
|
291
|
-
|
310
|
+
|
292
311
|
attr_reader :value
|
293
|
-
|
312
|
+
|
294
313
|
def initialize(value)
|
295
314
|
@value = value
|
296
315
|
end
|
297
|
-
|
316
|
+
|
298
317
|
def to_f()
|
299
318
|
@value.to_f
|
300
|
-
end
|
301
|
-
|
319
|
+
end
|
320
|
+
|
302
321
|
def to_i()
|
303
322
|
@value.to_i
|
304
323
|
end
|
305
|
-
|
324
|
+
|
306
325
|
alias to_s value
|
307
|
-
|
326
|
+
|
308
327
|
end
|
309
|
-
|
328
|
+
|
310
329
|
attr_accessor :name, :value, :parent
|
311
330
|
attr_reader :child_elements, :doc_id, :instructions
|
312
|
-
|
331
|
+
|
313
332
|
alias original_clone clone
|
314
333
|
|
315
334
|
def initialize(name=nil, value: nil, attributes: Attributes.new, rexle: self)
|
316
335
|
|
317
|
-
@rexle = rexle
|
336
|
+
@rexle = rexle
|
318
337
|
super()
|
319
338
|
|
320
339
|
@name, @attributes = name.to_s, attributes
|
@@ -324,11 +343,11 @@ class Rexle
|
|
324
343
|
self.add_text value if value
|
325
344
|
|
326
345
|
end
|
327
|
-
|
346
|
+
|
328
347
|
def backtrack(use_attributes: true)
|
329
348
|
BacktrackXPath.new(self, use_attributes: use_attributes)
|
330
349
|
end
|
331
|
-
|
350
|
+
|
332
351
|
def cdata?()
|
333
352
|
self.is_a? CData
|
334
353
|
end
|
@@ -336,8 +355,8 @@ class Rexle
|
|
336
355
|
def contains(raw_args)
|
337
356
|
|
338
357
|
path, raw_val = raw_args.split(',',2)
|
339
|
-
val = raw_val.strip[/^["']?.*["']?$/]
|
340
|
-
|
358
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
359
|
+
|
341
360
|
anode = query_xpath(path)
|
342
361
|
|
343
362
|
return [false] if anode.nil? or anode.empty?
|
@@ -346,21 +365,21 @@ class Rexle
|
|
346
365
|
r = [a.grep(/#{val.sub(/^["'](.*)["']$/,'\1')}/).length > 0]
|
347
366
|
|
348
367
|
r.any?
|
349
|
-
end
|
350
|
-
|
368
|
+
end
|
369
|
+
|
351
370
|
def count(path)
|
352
371
|
length = query_xpath(path).flatten.compact.length
|
353
372
|
length
|
354
373
|
end
|
355
|
-
|
374
|
+
|
356
375
|
def current()
|
357
376
|
self
|
358
377
|
end
|
359
378
|
|
360
379
|
def at_css(selector)
|
361
380
|
self.root.element RexleCSS.new(selector).to_xpath
|
362
|
-
end
|
363
|
-
|
381
|
+
end
|
382
|
+
|
364
383
|
def css(selector)
|
365
384
|
|
366
385
|
selector.split(',')\
|
@@ -371,76 +390,76 @@ class Rexle
|
|
371
390
|
def lowercase(s)
|
372
391
|
|
373
392
|
end
|
374
|
-
|
375
|
-
def max(path)
|
393
|
+
|
394
|
+
def max(path)
|
376
395
|
a = query_xpath(path).flatten.select{|x| x.is_a? String or x.is_a? Rexle::Element::Attribute}.map(&:to_i)
|
377
|
-
a.max
|
396
|
+
a.max
|
378
397
|
end
|
379
|
-
|
398
|
+
|
380
399
|
def name()
|
381
|
-
|
400
|
+
|
382
401
|
if @rexle and @rexle.respond_to? :prefixes then
|
383
|
-
|
402
|
+
|
384
403
|
if @rexle.prefixes.is_a? Array then
|
385
|
-
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
404
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
386
405
|
end
|
387
|
-
|
406
|
+
|
388
407
|
prefix ? @name.sub(prefix + ':', '') : @name
|
389
|
-
|
408
|
+
|
390
409
|
else
|
391
410
|
@name
|
392
411
|
end
|
393
|
-
|
412
|
+
|
394
413
|
end
|
395
|
-
|
396
|
-
def next_element()
|
414
|
+
|
415
|
+
def next_element()
|
397
416
|
|
398
417
|
id = self.object_id
|
399
|
-
a = self.parent.elements
|
418
|
+
a = self.parent.elements
|
400
419
|
|
401
420
|
i = a.index {|x| x.object_id == id} + 2
|
402
421
|
a[i] if i < a.length + 1
|
403
|
-
|
422
|
+
|
404
423
|
end
|
405
|
-
|
424
|
+
|
406
425
|
alias next_sibling next_element
|
407
|
-
|
426
|
+
|
408
427
|
def not(bool)
|
409
428
|
|
410
429
|
r = self.xpath(bool).any?
|
411
430
|
|
412
431
|
!r
|
413
432
|
end
|
414
|
-
|
415
|
-
def previous_element()
|
416
|
-
|
433
|
+
|
434
|
+
def previous_element()
|
435
|
+
|
417
436
|
id = self.object_id
|
418
|
-
a = self.parent.elements
|
437
|
+
a = self.parent.elements
|
419
438
|
i = a.index {|x| x.object_id == id}
|
420
439
|
|
421
|
-
a[i] if i > 0
|
440
|
+
a[i] if i > 0
|
422
441
|
|
423
442
|
end
|
424
|
-
|
443
|
+
|
425
444
|
alias previous_sibling previous_element
|
426
|
-
|
445
|
+
|
427
446
|
def xpath(path, rlist=[], &blk)
|
428
|
-
|
447
|
+
|
429
448
|
#@log.debug 'inside xpath ' + path.inspect
|
430
449
|
|
431
450
|
r = filter_xpath(path, rlist=[], &blk)
|
432
451
|
#@log.debug 'after filter_xpath : ' + r.inspect
|
433
|
-
|
452
|
+
|
434
453
|
if r.is_a?(Array) then
|
435
|
-
|
454
|
+
|
436
455
|
Recordset.new(r.compact)
|
437
|
-
|
456
|
+
|
438
457
|
else
|
439
458
|
r
|
440
459
|
end
|
441
|
-
|
460
|
+
|
442
461
|
end
|
443
|
-
|
462
|
+
|
444
463
|
def filter_xpath(raw_path, rlist=[], &blk)
|
445
464
|
#@log.debug 'inside filter_xpath : ' + raw_path.inspect
|
446
465
|
path = String.new raw_path
|
@@ -450,25 +469,25 @@ class Rexle
|
|
450
469
|
#fn_match = path.match(/^(\w+)\(/)
|
451
470
|
#@log.debug 'fn_match : ' + fn_match.inspect
|
452
471
|
end_fn_match = path.slice!(/\[\w+\(\)\]$/)
|
453
|
-
|
472
|
+
|
454
473
|
if end_fn_match then
|
455
|
-
|
474
|
+
|
456
475
|
m = end_fn_match[1..-4]
|
457
476
|
#@log.debug 'its a function'
|
458
477
|
[method(m.to_sym).call(xpath path)]
|
459
|
-
|
460
|
-
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
478
|
+
|
479
|
+
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
461
480
|
|
462
481
|
procs = {
|
463
482
|
|
464
|
-
Array: proc { |x|
|
465
|
-
if block_given? then
|
466
|
-
x.flatten(1)
|
483
|
+
Array: proc { |x|
|
484
|
+
if block_given? then
|
485
|
+
x.flatten(1)
|
467
486
|
else
|
468
487
|
rs = x.flatten
|
469
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
488
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
470
489
|
end
|
471
|
-
},
|
490
|
+
},
|
472
491
|
String: proc {|x| x},
|
473
492
|
Hash: proc {|x| x},
|
474
493
|
TrueClass: proc{|x| x},
|
@@ -477,25 +496,25 @@ class Rexle
|
|
477
496
|
}
|
478
497
|
bucket = []
|
479
498
|
raw_results = path.split('|').map do |xp|
|
480
|
-
query_xpath(xp.strip, bucket, &blk)
|
499
|
+
query_xpath(xp.strip, bucket, &blk)
|
481
500
|
end
|
482
|
-
|
501
|
+
|
483
502
|
results = raw_results
|
484
503
|
|
485
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
486
|
-
|
504
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
505
|
+
|
487
506
|
elsif fn_match.nil?
|
488
|
-
|
507
|
+
|
489
508
|
procs = {
|
490
509
|
|
491
|
-
Array: proc { |x|
|
492
|
-
if block_given? then
|
493
|
-
x.flatten(1)
|
510
|
+
Array: proc { |x|
|
511
|
+
if block_given? then
|
512
|
+
x.flatten(1)
|
494
513
|
else
|
495
514
|
rs = x.flatten
|
496
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
515
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
497
516
|
end
|
498
|
-
},
|
517
|
+
},
|
499
518
|
String: proc {|x| x},
|
500
519
|
Hash: proc {|x| x},
|
501
520
|
TrueClass: proc{|x| x},
|
@@ -503,23 +522,23 @@ class Rexle
|
|
503
522
|
:"Rexle::Element" => proc {|x| [x]}
|
504
523
|
}
|
505
524
|
bucket = []
|
506
|
-
|
525
|
+
|
507
526
|
results = if path =~ /[\[]|\(/ then
|
508
527
|
|
509
528
|
raw_results = path.split(/\|/).map do |xp|
|
510
|
-
query_xpath(xp.strip, bucket, &blk)
|
529
|
+
query_xpath(xp.strip, bucket, &blk)
|
511
530
|
end
|
512
531
|
|
513
532
|
raw_results.flatten.index(true) ? [true] : []
|
514
|
-
|
533
|
+
|
515
534
|
else
|
516
535
|
raw_results = path.split(/ *(?:\||\band\b) */).map do |xp|
|
517
|
-
query_xpath(xp.strip, bucket, &blk)
|
518
|
-
end
|
536
|
+
query_xpath(xp.strip, bucket, &blk)
|
537
|
+
end
|
519
538
|
|
520
539
|
if path =~ / and / then
|
521
540
|
|
522
|
-
raw_results.flatten.select {|x| x == true or x == false}
|
541
|
+
raw_results.flatten.select {|x| x == true or x == false}
|
523
542
|
|
524
543
|
else
|
525
544
|
raw_results.flatten.index(true) ? [true] : []
|
@@ -528,12 +547,12 @@ class Rexle
|
|
528
547
|
|
529
548
|
return results if !path[/[><]/] and results.any?
|
530
549
|
results = raw_results # .flatten.select {|x| x}
|
531
|
-
|
532
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
550
|
+
|
551
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
533
552
|
|
534
553
|
else
|
535
|
-
|
536
|
-
m, xpath_value, index = fn_match.captures
|
554
|
+
|
555
|
+
m, xpath_value, index = fn_match.captures
|
537
556
|
|
538
557
|
if m == 'text' then
|
539
558
|
a = texts()
|
@@ -547,14 +566,14 @@ class Rexle
|
|
547
566
|
|
548
567
|
end
|
549
568
|
|
550
|
-
end
|
551
|
-
|
569
|
+
end
|
570
|
+
|
552
571
|
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
553
572
|
|
554
573
|
#@log.debug 'query_xpath : ' + raw_xpath_value.inspect
|
555
574
|
#@log.debug '++ ' + self.xml.inspect
|
556
575
|
|
557
|
-
flag_func = false
|
576
|
+
flag_func = false
|
558
577
|
|
559
578
|
xpath_value = raw_xpath_value.sub('child::','./')
|
560
579
|
|
@@ -578,38 +597,38 @@ class Rexle
|
|
578
597
|
|
579
598
|
raw_condition = raw_condition ? raw_condition + '/' + remaining_path \
|
580
599
|
: remaining_path
|
581
|
-
remaining_path = ''
|
600
|
+
remaining_path = ''
|
582
601
|
end
|
583
602
|
|
584
|
-
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
603
|
+
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
585
604
|
|
586
605
|
if r then
|
587
606
|
a_path = raw_path.split(/(?=\/\/)/,2)
|
588
607
|
else
|
589
608
|
a_path = raw_path.split('/',2)
|
590
609
|
end
|
591
|
-
|
610
|
+
|
592
611
|
condition = raw_condition if a_path.length <= 1 #and not raw_condition[/^\[\w+\(.*\)\]$/]
|
593
612
|
|
594
613
|
if raw_path[0,2] == '//' then
|
595
614
|
s = ''
|
596
|
-
elsif raw_path == 'text()'
|
615
|
+
elsif raw_path == 'text()'
|
597
616
|
|
598
617
|
a_path.shift
|
599
618
|
#return @value
|
600
619
|
return self.texts
|
601
620
|
else
|
602
621
|
|
603
|
-
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
604
|
-
|
622
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
623
|
+
|
605
624
|
return @attributes if attribute == '*'
|
606
|
-
|
625
|
+
|
607
626
|
if attribute and @attributes and \
|
608
627
|
@attributes.has_key?(attribute.to_sym) then
|
609
628
|
return [Attribute.new(@attributes[attribute.to_sym])]
|
610
629
|
end
|
611
630
|
s = a_path.shift
|
612
|
-
end
|
631
|
+
end
|
613
632
|
|
614
633
|
# isolate the xpath to return just the path to the current element
|
615
634
|
|
@@ -625,7 +644,7 @@ class Rexle
|
|
625
644
|
if element_name and element_name[/^\d/] then
|
626
645
|
element_name = nil
|
627
646
|
end
|
628
|
-
|
647
|
+
|
629
648
|
condition = raw_xpath_value if element_name.nil?
|
630
649
|
|
631
650
|
else
|
@@ -661,11 +680,11 @@ class Rexle
|
|
661
680
|
r4 = attribute_search(attr_search, self, self.attributes)
|
662
681
|
return r4
|
663
682
|
end
|
664
|
-
|
665
|
-
|
683
|
+
|
684
|
+
|
666
685
|
return_elements = []
|
667
|
-
|
668
|
-
|
686
|
+
|
687
|
+
|
669
688
|
|
670
689
|
if raw_path[0,2] == '//' then
|
671
690
|
|
@@ -680,25 +699,25 @@ class Rexle
|
|
680
699
|
|
681
700
|
if element_name.is_a? String then
|
682
701
|
ename, raw_selector = (element_name.split('::',2)).reverse
|
683
|
-
|
702
|
+
|
684
703
|
selector = case raw_selector
|
685
704
|
when 'following-sibling' then 1
|
686
705
|
when 'preceding-sibling' then -1
|
687
706
|
end
|
688
|
-
|
707
|
+
|
689
708
|
else
|
690
709
|
ename = element_name
|
691
|
-
end
|
710
|
+
end
|
692
711
|
|
693
712
|
if ename == '..' then
|
694
|
-
|
713
|
+
|
695
714
|
remaining_xpath = raw_path[/\.\.\/(.*)/,1]
|
696
715
|
# select the parent element
|
697
716
|
|
698
717
|
r2 = self.parent.xpath(remaining_xpath)
|
699
718
|
|
700
719
|
return r2
|
701
|
-
|
720
|
+
|
702
721
|
elsif ename == '.'
|
703
722
|
|
704
723
|
remaining_xpath = raw_path[1..-1]
|
@@ -707,41 +726,41 @@ class Rexle
|
|
707
726
|
if xpath_value.length > 0 and xpath_value =~ /\[/ then
|
708
727
|
|
709
728
|
r = eval(attr_search.sub(/^h/,'self.attributes'))
|
710
|
-
return self if r
|
729
|
+
return self if r
|
711
730
|
|
712
731
|
else
|
713
732
|
return self
|
714
733
|
end
|
715
734
|
else
|
716
735
|
return self.xpath(remaining_xpath)
|
717
|
-
end
|
736
|
+
end
|
718
737
|
|
719
738
|
elsif element_name.nil?
|
720
739
|
puts ('attr_search: ' + attr_search.inspect).debug if $debug
|
721
|
-
return eval attr_search
|
740
|
+
return eval attr_search
|
722
741
|
else
|
723
742
|
|
724
743
|
if raw_selector.nil? and ename != element_part then
|
725
744
|
|
726
745
|
right_cond = element_part[/#{ename}(.*)/,1]
|
727
746
|
|
728
|
-
end
|
747
|
+
end
|
729
748
|
|
730
749
|
return_elements = @child_elements.map.with_index.select do |x, i|
|
731
750
|
|
732
751
|
next unless x.is_a? Rexle::Element
|
733
752
|
|
734
753
|
#x.name == ename or (ename == '*')
|
735
|
-
|
754
|
+
|
736
755
|
r10 = ((x.name == ename) or (ename == '*'))
|
737
756
|
|
738
|
-
|
757
|
+
|
739
758
|
|
740
759
|
end
|
741
|
-
|
760
|
+
|
742
761
|
if right_cond then
|
743
|
-
|
744
|
-
|
762
|
+
|
763
|
+
|
745
764
|
r12 = return_elements.map do |x, i|
|
746
765
|
|
747
766
|
if x.text then
|
@@ -751,15 +770,15 @@ class Rexle
|
|
751
770
|
else
|
752
771
|
false
|
753
772
|
end
|
754
|
-
|
773
|
+
|
755
774
|
end
|
756
|
-
|
775
|
+
|
757
776
|
return r12
|
758
|
-
|
759
|
-
end
|
760
|
-
|
777
|
+
|
778
|
+
end
|
779
|
+
|
761
780
|
if selector then
|
762
|
-
ne = return_elements.inject([]) do |r,x|
|
781
|
+
ne = return_elements.inject([]) do |r,x|
|
763
782
|
i = x.last + selector
|
764
783
|
if i >= 0 then
|
765
784
|
r << i
|
@@ -770,17 +789,17 @@ class Rexle
|
|
770
789
|
|
771
790
|
return_elements = ne.map {|x| [@child_elements[x], x] if x}
|
772
791
|
end
|
773
|
-
|
792
|
+
|
774
793
|
|
775
794
|
end
|
776
795
|
end
|
777
|
-
|
796
|
+
|
778
797
|
if return_elements.length > 0 then
|
779
798
|
|
780
799
|
if (a_path + [remaining_path]).join.empty? then
|
781
800
|
|
782
801
|
# pass in a block to the filter if it is function contains?
|
783
|
-
rlist = return_elements.map.with_index do |x,i|
|
802
|
+
rlist = return_elements.map.with_index do |x,i|
|
784
803
|
r5 = filter(x, i+1, attr_search, &blk)
|
785
804
|
|
786
805
|
r5
|
@@ -790,9 +809,9 @@ class Rexle
|
|
790
809
|
|
791
810
|
else
|
792
811
|
|
793
|
-
rlist << return_elements.map.with_index do |x,i|
|
812
|
+
rlist << return_elements.map.with_index do |x,i|
|
794
813
|
|
795
|
-
rtn_element = filter(x, i+1, attr_search) do |e|
|
814
|
+
rtn_element = filter(x, i+1, attr_search) do |e|
|
796
815
|
|
797
816
|
r = e.xpath(a_path.join('/') + raw_condition.to_s \
|
798
817
|
+ remaining_path, &blk)
|
@@ -838,7 +857,7 @@ class Rexle
|
|
838
857
|
rlist,&blk)
|
839
858
|
end
|
840
859
|
end
|
841
|
-
|
860
|
+
|
842
861
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) \
|
843
862
|
or (rlist.length > 1 and rlist[0].is_a? Array)
|
844
863
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
@@ -855,21 +874,21 @@ class Rexle
|
|
855
874
|
elsif item.is_a? Rexle::CData then
|
856
875
|
@child_elements << item
|
857
876
|
elsif item.is_a? Rexle::Comment then
|
858
|
-
@child_elements << item
|
877
|
+
@child_elements << item
|
859
878
|
elsif item.is_a? Rexle::Element then
|
860
879
|
|
861
880
|
@child_elements << item
|
862
881
|
# add a reference from this element (the parent) to the child
|
863
882
|
item.parent = self
|
864
|
-
item
|
865
|
-
|
883
|
+
item
|
884
|
+
|
866
885
|
elsif item.is_a? Rexle then
|
867
886
|
self.add_element(item.root)
|
868
887
|
end
|
869
888
|
|
870
|
-
end
|
889
|
+
end
|
871
890
|
|
872
|
-
def add(item)
|
891
|
+
def add(item)
|
873
892
|
|
874
893
|
if item.is_a? Rexle::Element then
|
875
894
|
|
@@ -895,13 +914,13 @@ class Rexle
|
|
895
914
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
896
915
|
else
|
897
916
|
self.xml
|
898
|
-
end
|
917
|
+
end
|
899
918
|
end
|
900
919
|
|
901
920
|
def add_attribute(*x)
|
902
|
-
|
921
|
+
|
903
922
|
proc_hash = lambda {|x| Hash[*x]}
|
904
|
-
|
923
|
+
|
905
924
|
procs = {
|
906
925
|
Hash: lambda {|x| x[0] || {}},
|
907
926
|
String: proc_hash,
|
@@ -919,57 +938,57 @@ class Rexle
|
|
919
938
|
def add_text(s)
|
920
939
|
|
921
940
|
self.child_elements << s
|
922
|
-
self
|
941
|
+
self
|
923
942
|
end
|
924
|
-
|
925
|
-
def attribute(key)
|
926
|
-
|
943
|
+
|
944
|
+
def attribute(key)
|
945
|
+
|
927
946
|
key = key.to_sym if key.is_a? String
|
928
|
-
|
947
|
+
|
929
948
|
if @attributes[key].is_a? String then
|
930
|
-
@attributes[key].gsub('<','<').gsub('>','>')
|
949
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
931
950
|
else
|
932
951
|
@attributes[key]
|
933
952
|
end
|
934
|
-
end
|
935
|
-
|
936
|
-
def attributes() @attributes end
|
937
|
-
|
953
|
+
end
|
954
|
+
|
955
|
+
def attributes() @attributes end
|
956
|
+
|
938
957
|
def cdatas()
|
939
958
|
self.children.inject([]){|r,x| x.is_a?(Rexle::CData) ? r << x.to_s : r }
|
940
959
|
end
|
941
|
-
|
960
|
+
|
942
961
|
def children()
|
943
962
|
|
944
963
|
r = @child_elements
|
945
|
-
|
964
|
+
|
946
965
|
def r.is_an_empty_string?()
|
947
966
|
self.length == 1 and self.first == ''
|
948
|
-
end
|
949
|
-
|
967
|
+
end
|
968
|
+
|
950
969
|
return r
|
951
|
-
end
|
970
|
+
end
|
952
971
|
|
953
972
|
def children=(a) @child_elements = a if a.is_a? Array end
|
954
|
-
|
973
|
+
|
955
974
|
def deep_clone() Rexle.new(self.xml).root end
|
956
|
-
|
957
|
-
def clone()
|
958
|
-
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
975
|
+
|
976
|
+
def clone()
|
977
|
+
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
959
978
|
end
|
960
|
-
|
979
|
+
|
961
980
|
def delete(obj=nil)
|
962
981
|
|
963
982
|
if obj then
|
964
983
|
|
965
984
|
if obj.is_a? String then
|
966
|
-
|
985
|
+
|
967
986
|
self.xpath(obj).each {|e| e.delete; e = nil}
|
968
|
-
|
987
|
+
|
969
988
|
else
|
970
989
|
|
971
990
|
i = @child_elements.index(obj)
|
972
|
-
[@child_elements].each{|x| x.delete_at i} if i
|
991
|
+
[@child_elements].each{|x| x.delete_at i} if i
|
973
992
|
end
|
974
993
|
else
|
975
994
|
|
@@ -991,50 +1010,50 @@ class Rexle
|
|
991
1010
|
String: proc {|x| @child_elements[x]}
|
992
1011
|
}
|
993
1012
|
|
994
|
-
procs[s.class.to_s.to_sym].call(s)
|
1013
|
+
procs[s.class.to_s.to_sym].call(s)
|
995
1014
|
end
|
996
1015
|
|
997
1016
|
def doc_root() @rexle.root end
|
998
1017
|
def each(&blk) self.children.each(&blk) end
|
999
1018
|
def each_recursive(&blk) recursive_scan(self.children,&blk) end
|
1000
1019
|
alias traverse each_recursive
|
1001
|
-
def has_elements?() !self.elements.empty? end
|
1002
|
-
def insert_after(node) insert(node, 1) end
|
1020
|
+
def has_elements?() !self.elements.empty? end
|
1021
|
+
def insert_after(node) insert(node, 1) end
|
1003
1022
|
def insert_before(node) insert(node) end
|
1004
1023
|
def last(a) a.last end
|
1005
|
-
def map(&blk) self.children.map(&blk) end
|
1006
|
-
|
1024
|
+
def map(&blk) self.children.map(&blk) end
|
1025
|
+
|
1007
1026
|
def plaintext()
|
1008
1027
|
CGI.unescapeHTML xml().gsub(/<\/?[^>]+>/,'').gsub(' ',' ')\
|
1009
1028
|
.gsub(/\n\s+/,' ')
|
1010
1029
|
end
|
1011
|
-
|
1012
|
-
def root() self end
|
1030
|
+
|
1031
|
+
def root() self end
|
1013
1032
|
|
1014
1033
|
def text(s='')
|
1015
|
-
|
1016
|
-
return self.value if s.empty?
|
1017
|
-
|
1034
|
+
|
1035
|
+
return self.value if s.empty?
|
1036
|
+
|
1018
1037
|
e = self.element(s)
|
1019
1038
|
return e if e.is_a? String
|
1020
|
-
|
1039
|
+
|
1021
1040
|
e.value if e
|
1022
1041
|
end
|
1023
|
-
|
1042
|
+
|
1024
1043
|
def texts()
|
1025
1044
|
|
1026
1045
|
r = @child_elements.select do |x|
|
1027
1046
|
x.is_a? String or x.is_a? Rexle::CData
|
1028
1047
|
end
|
1029
|
-
|
1048
|
+
|
1030
1049
|
r.map do |x|
|
1031
1050
|
def x.unescape()
|
1032
1051
|
s = self.to_s.clone
|
1033
1052
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1034
1053
|
s
|
1035
|
-
end
|
1054
|
+
end
|
1036
1055
|
end
|
1037
|
-
|
1056
|
+
|
1038
1057
|
return r
|
1039
1058
|
end
|
1040
1059
|
|
@@ -1042,20 +1061,20 @@ class Rexle
|
|
1042
1061
|
|
1043
1062
|
r = @child_elements.first
|
1044
1063
|
return nil unless r.is_a? String
|
1045
|
-
|
1064
|
+
|
1046
1065
|
def r.unescape()
|
1047
1066
|
s = self.clone
|
1048
1067
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1049
1068
|
s
|
1050
|
-
end
|
1051
|
-
|
1069
|
+
end
|
1070
|
+
|
1052
1071
|
return r
|
1053
1072
|
end
|
1054
|
-
|
1073
|
+
|
1055
1074
|
def value=(raw_s)
|
1056
1075
|
|
1057
1076
|
val = Value.new(raw_s.to_s.clone)
|
1058
|
-
|
1077
|
+
|
1059
1078
|
escape_chars = %w(& & ' ' < < > >).each_slice(2).to_a
|
1060
1079
|
escape_chars.each{|x| val.gsub!(*x)}
|
1061
1080
|
|
@@ -1065,9 +1084,15 @@ class Rexle
|
|
1065
1084
|
end
|
1066
1085
|
|
1067
1086
|
alias text= value=
|
1068
|
-
|
1087
|
+
|
1069
1088
|
def to_a()
|
1089
|
+
|
1070
1090
|
e = [String.new(self.name), Hash.new(self.attributes)]
|
1091
|
+
|
1092
|
+
if self.cdatas.any? then
|
1093
|
+
e.concat self.cdatas.map {|cdata| ['![', {}, cdata] }
|
1094
|
+
end
|
1095
|
+
|
1071
1096
|
[*e, *scan_to_a(self.children)]
|
1072
1097
|
end
|
1073
1098
|
|
@@ -1077,10 +1102,10 @@ class Rexle
|
|
1077
1102
|
Hash: lambda {|x|
|
1078
1103
|
o = {pretty: false}.merge(x)
|
1079
1104
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
1080
|
-
|
1105
|
+
|
1081
1106
|
method(msg).call(self.children)
|
1082
1107
|
},
|
1083
|
-
String: lambda {|x|
|
1108
|
+
String: lambda {|x|
|
1084
1109
|
r = self.element(x)
|
1085
1110
|
r ? r.xml : ''
|
1086
1111
|
}
|
@@ -1093,18 +1118,18 @@ class Rexle
|
|
1093
1118
|
end
|
1094
1119
|
|
1095
1120
|
def prepend(item)
|
1096
|
-
|
1121
|
+
|
1097
1122
|
@child_elements.unshift item
|
1098
|
-
|
1123
|
+
|
1099
1124
|
# add a reference from this element (the parent) to the child
|
1100
1125
|
item.parent = self
|
1101
|
-
item
|
1102
|
-
end
|
1103
|
-
|
1126
|
+
item
|
1127
|
+
end
|
1128
|
+
|
1104
1129
|
alias to_s xml
|
1105
1130
|
|
1106
1131
|
private
|
1107
|
-
|
1132
|
+
|
1108
1133
|
def insert(node,offset=0)
|
1109
1134
|
|
1110
1135
|
i = parent.child_elements.index(self)
|
@@ -1116,7 +1141,7 @@ class Rexle
|
|
1116
1141
|
node.instance_variable_set(:@doc_id, self.doc_root.object_id)
|
1117
1142
|
|
1118
1143
|
self
|
1119
|
-
end
|
1144
|
+
end
|
1120
1145
|
|
1121
1146
|
def format_condition(condition)
|
1122
1147
|
|
@@ -1141,21 +1166,21 @@ class Rexle
|
|
1141
1166
|
elsif raw_items[0][/^not\(/]
|
1142
1167
|
|
1143
1168
|
return raw_items[0]
|
1144
|
-
|
1169
|
+
|
1145
1170
|
else
|
1146
1171
|
|
1147
1172
|
andor_items = raw_items.map.with_index\
|
1148
1173
|
.select{|x,i| x[/\band\b|\bor\b/]}\
|
1149
1174
|
.map{|x| [x.last, x.last + 1]}.flatten
|
1150
|
-
|
1175
|
+
|
1151
1176
|
indices = [0] + andor_items + [raw_items.length]
|
1152
1177
|
|
1153
1178
|
if raw_items[0][0] == '@' then
|
1154
1179
|
|
1155
1180
|
raw_items.each{|x| x.gsub!(/^@/,'')}
|
1156
|
-
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1181
|
+
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1157
1182
|
|
1158
|
-
items = cons_items.map do |x|
|
1183
|
+
items = cons_items.map do |x|
|
1159
1184
|
|
1160
1185
|
if x.length >= 3 then
|
1161
1186
|
if x[0] != 'class' then
|
@@ -1174,17 +1199,17 @@ class Rexle
|
|
1174
1199
|
else
|
1175
1200
|
|
1176
1201
|
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1177
|
-
|
1178
|
-
items = cons_items.map do |x|
|
1202
|
+
|
1203
|
+
items = cons_items.map do |x|
|
1179
1204
|
|
1180
1205
|
if x.length >= 3 then
|
1181
1206
|
|
1182
1207
|
x[1] = '==' if x[1] == '='
|
1183
1208
|
if x[0] != '.' then
|
1184
1209
|
if x[0][/\//] then
|
1185
|
-
|
1210
|
+
|
1186
1211
|
path, value = x.values_at(0,-1)
|
1187
|
-
|
1212
|
+
|
1188
1213
|
if x[0][/@\w+$/] then
|
1189
1214
|
"r = e.xpath('#{path}').first; r and r.value == #{value}"
|
1190
1215
|
else
|
@@ -1208,16 +1233,16 @@ class Rexle
|
|
1208
1233
|
|
1209
1234
|
|
1210
1235
|
end
|
1211
|
-
|
1236
|
+
|
1212
1237
|
def scan_match(node, path)
|
1213
|
-
|
1238
|
+
|
1214
1239
|
if path == '//' then
|
1215
|
-
return [node, node.text,
|
1240
|
+
return [node, node.text,
|
1216
1241
|
node.elements.map {|x| scan_match x, path}]
|
1217
1242
|
end
|
1218
|
-
|
1243
|
+
|
1219
1244
|
r = []
|
1220
|
-
xpath2 = path[2..-1]
|
1245
|
+
xpath2 = path[2..-1]
|
1221
1246
|
#jr150316 xpath2.sub!(/^\*\//,'')
|
1222
1247
|
#jr150316xpath2.sub!(/^\*/,self.name)
|
1223
1248
|
#jr150316xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
@@ -1240,17 +1265,17 @@ class Rexle
|
|
1240
1265
|
end
|
1241
1266
|
a
|
1242
1267
|
end
|
1243
|
-
|
1244
|
-
|
1268
|
+
|
1269
|
+
|
1245
1270
|
def filter(raw_element, i, attr_search, &blk)
|
1246
|
-
|
1271
|
+
|
1247
1272
|
x, index = raw_element
|
1248
1273
|
e = @child_elements[index]
|
1249
1274
|
|
1250
1275
|
return unless e.is_a? Rexle::Element
|
1251
1276
|
name, value = e.name, e.value if e.is_a? Rexle::Element
|
1252
1277
|
|
1253
|
-
h = x.attributes # <-- fetch the attributes
|
1278
|
+
h = x.attributes # <-- fetch the attributes
|
1254
1279
|
|
1255
1280
|
if attr_search then
|
1256
1281
|
|
@@ -1266,21 +1291,21 @@ class Rexle
|
|
1266
1291
|
def attribute_search(attr_search, e, h, i=nil, &blk)
|
1267
1292
|
|
1268
1293
|
r2 = if attr_search.is_a? Integer then
|
1269
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
1294
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
1270
1295
|
elsif attr_search[/i\s(?:<|>|==|%)\s\d+/] and eval(attr_search) then
|
1271
|
-
block_given? ? blk.call(e) : e
|
1296
|
+
block_given? ? blk.call(e) : e
|
1272
1297
|
elsif h and !h.empty? and attr_search[/^h\[/] and eval(attr_search) then
|
1273
1298
|
block_given? ? blk.call(e) : e
|
1274
|
-
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1299
|
+
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1275
1300
|
next unless x.is_a? Rexle::Element
|
1276
1301
|
name, attributes, value = x.name, x.attributes, x.value.to_s
|
1277
1302
|
b = eval(attr_search)
|
1278
1303
|
b}.length > 0
|
1279
1304
|
|
1280
1305
|
block_given? ? blk.call(e) : e
|
1281
|
-
|
1282
|
-
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1283
|
-
block_given? ? blk.call(e) : e
|
1306
|
+
|
1307
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1308
|
+
block_given? ? blk.call(e) : e
|
1284
1309
|
elsif attr_search[/^e\.value/]
|
1285
1310
|
|
1286
1311
|
v = attr_search[/[^\s]+$/]
|
@@ -1294,92 +1319,92 @@ class Rexle
|
|
1294
1319
|
block_given? ? blk.call(e) : e
|
1295
1320
|
elsif attr_search[/^\w*\(/] and e.element(attr_search)
|
1296
1321
|
block_given? ? blk.call(e) : e
|
1297
|
-
end
|
1322
|
+
end
|
1298
1323
|
|
1299
1324
|
r2
|
1300
1325
|
end
|
1301
|
-
|
1326
|
+
|
1302
1327
|
def recursive_scan(nodes, &blk)
|
1303
|
-
|
1328
|
+
|
1304
1329
|
nodes.each do |x|
|
1305
1330
|
|
1306
1331
|
if x.is_a? Rexle::Element then
|
1307
1332
|
blk.call(x)
|
1308
1333
|
recursive_scan(x.children, &blk) if x.children.length > 0
|
1309
|
-
end
|
1334
|
+
end
|
1310
1335
|
end
|
1311
1336
|
end
|
1312
|
-
|
1337
|
+
|
1313
1338
|
end # -- end of element --
|
1314
|
-
|
1339
|
+
|
1315
1340
|
|
1316
1341
|
class CData
|
1317
|
-
|
1342
|
+
|
1318
1343
|
def initialize(val='')
|
1319
1344
|
@value = val
|
1320
1345
|
end
|
1321
|
-
|
1346
|
+
|
1322
1347
|
def clone()
|
1323
1348
|
CData.new(@value)
|
1324
1349
|
end
|
1325
|
-
|
1350
|
+
|
1326
1351
|
def inspect()
|
1327
1352
|
@value.inspect
|
1328
1353
|
end
|
1329
|
-
|
1354
|
+
|
1330
1355
|
def print()
|
1331
1356
|
"<![CDATA[%s]]>" % @value
|
1332
1357
|
end
|
1333
|
-
|
1358
|
+
|
1334
1359
|
def to_s()
|
1335
1360
|
@value
|
1336
1361
|
end
|
1337
|
-
|
1362
|
+
|
1338
1363
|
def unescape()
|
1339
1364
|
s = @value.clone
|
1340
1365
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1341
1366
|
s
|
1342
|
-
end
|
1343
|
-
|
1367
|
+
end
|
1368
|
+
|
1344
1369
|
end
|
1345
|
-
|
1346
|
-
|
1370
|
+
|
1371
|
+
|
1347
1372
|
class Comment
|
1348
|
-
|
1373
|
+
|
1349
1374
|
|
1350
1375
|
def initialize(val='')
|
1351
1376
|
@e = Element.new('_').add_text val
|
1352
1377
|
@value = val
|
1353
1378
|
end
|
1354
|
-
|
1379
|
+
|
1355
1380
|
def add_element(e2)
|
1356
1381
|
@e.add e2
|
1357
1382
|
end
|
1358
|
-
|
1383
|
+
|
1359
1384
|
def add_text(t)
|
1360
1385
|
@e.add_text t
|
1361
1386
|
end
|
1362
|
-
|
1387
|
+
|
1363
1388
|
def inspect()
|
1364
1389
|
@value
|
1365
1390
|
end
|
1366
|
-
|
1391
|
+
|
1367
1392
|
def print()
|
1368
1393
|
"<!--%s-->" % @e.root.xpath('//./text()').join
|
1369
1394
|
end
|
1370
|
-
|
1395
|
+
|
1371
1396
|
def texts()
|
1372
1397
|
@e.texts
|
1373
1398
|
end
|
1374
|
-
|
1399
|
+
|
1375
1400
|
def to_s()
|
1376
1401
|
@value
|
1377
1402
|
end
|
1378
1403
|
end
|
1379
|
-
|
1404
|
+
|
1380
1405
|
class Elements
|
1381
1406
|
include Enumerable
|
1382
|
-
|
1407
|
+
|
1383
1408
|
def initialize(elements=[])
|
1384
1409
|
super()
|
1385
1410
|
@elements = elements
|
@@ -1391,63 +1416,63 @@ class Rexle
|
|
1391
1416
|
i = raw_i - 1
|
1392
1417
|
@elements[i]
|
1393
1418
|
end
|
1394
|
-
|
1419
|
+
|
1395
1420
|
def each(&blk) @elements.each(&blk) end
|
1396
1421
|
def empty?() @elements.empty? end
|
1397
|
-
|
1422
|
+
|
1398
1423
|
def index(e=nil, &blk)
|
1399
|
-
|
1424
|
+
|
1400
1425
|
if block_given? then
|
1401
1426
|
@elements.index(&blk)
|
1402
1427
|
else
|
1403
1428
|
@elements.index e
|
1404
1429
|
end
|
1405
1430
|
end
|
1406
|
-
|
1431
|
+
|
1407
1432
|
def last() @elements.last end
|
1408
1433
|
def length() @elements.length end
|
1409
1434
|
def to_a() @elements end
|
1410
|
-
|
1435
|
+
|
1411
1436
|
end # -- end of elements --
|
1412
1437
|
|
1413
1438
|
|
1414
1439
|
def parse(x=nil)
|
1415
|
-
|
1440
|
+
|
1416
1441
|
a = []
|
1417
|
-
|
1442
|
+
|
1418
1443
|
if x then
|
1419
1444
|
procs = {
|
1420
1445
|
String: proc {|x| parse_string(x)},
|
1421
1446
|
Array: proc {|x| x}
|
1422
1447
|
}
|
1423
1448
|
a = procs[x.class.to_s.to_sym].call(x)
|
1424
|
-
else
|
1449
|
+
else
|
1425
1450
|
a = yield
|
1426
1451
|
end
|
1427
|
-
|
1452
|
+
|
1428
1453
|
doc_node = ['doc',Attributes.new]
|
1429
1454
|
@a = procs[x.class.to_s.to_sym].call(x)
|
1430
1455
|
@doc = scan_element(*(doc_node << @a))
|
1431
|
-
|
1456
|
+
|
1432
1457
|
self
|
1433
1458
|
end
|
1434
1459
|
|
1435
1460
|
def add_attribute(x) @doc.attribute(x) end
|
1436
1461
|
def attribute(key) @doc.attribute(key) end
|
1437
1462
|
def attributes() @doc.attributes end
|
1438
|
-
|
1439
|
-
def add_element(element)
|
1440
1463
|
|
1441
|
-
|
1464
|
+
def add_element(element)
|
1465
|
+
|
1466
|
+
if @doc then
|
1442
1467
|
raise 'attempted adding second root element to document' if @doc.root
|
1443
|
-
@doc.root.add_element(element)
|
1468
|
+
@doc.root.add_element(element)
|
1444
1469
|
else
|
1445
|
-
doc_node = ['doc', Attributes.new, element.to_a]
|
1446
|
-
@doc = scan_element(*doc_node)
|
1470
|
+
doc_node = ['doc', Attributes.new, element.to_a]
|
1471
|
+
@doc = scan_element(*doc_node)
|
1447
1472
|
end
|
1448
1473
|
element
|
1449
1474
|
end
|
1450
|
-
|
1475
|
+
|
1451
1476
|
def add_text(s) end
|
1452
1477
|
|
1453
1478
|
alias add add_element
|
@@ -1457,26 +1482,26 @@ class Rexle
|
|
1457
1482
|
@doc.xpath(xpath).each {|e| e.delete; e = nil }
|
1458
1483
|
|
1459
1484
|
end
|
1460
|
-
|
1485
|
+
|
1461
1486
|
alias remove delete
|
1462
1487
|
|
1463
|
-
def element(xpath) self.xpath(xpath).first end
|
1488
|
+
def element(xpath) self.xpath(xpath).first end
|
1464
1489
|
def elements(s=nil) @doc.elements(s) end
|
1465
1490
|
def name() @doc.root.name end
|
1466
1491
|
def to_a() @a end
|
1467
|
-
|
1468
|
-
def to_s(options={})
|
1492
|
+
|
1493
|
+
def to_s(options={})
|
1469
1494
|
return '<UNDEFINED/>' unless @doc
|
1470
|
-
self.xml options
|
1495
|
+
self.xml options
|
1471
1496
|
end
|
1472
|
-
|
1497
|
+
|
1473
1498
|
def text(xpath) @doc.text(xpath) end
|
1474
|
-
def root()
|
1475
|
-
@doc.elements.first
|
1499
|
+
def root()
|
1500
|
+
@doc.elements.first
|
1476
1501
|
end
|
1477
1502
|
|
1478
|
-
def write(f)
|
1479
|
-
f.write xml
|
1503
|
+
def write(f)
|
1504
|
+
f.write xml
|
1480
1505
|
end
|
1481
1506
|
|
1482
1507
|
def xml(options={})
|
@@ -1505,14 +1530,14 @@ class Rexle
|
|
1505
1530
|
private
|
1506
1531
|
|
1507
1532
|
def parse_rexle(x)
|
1508
|
-
|
1533
|
+
|
1509
1534
|
rp = RexleParser.new(x)
|
1510
1535
|
a = rp.to_a
|
1511
1536
|
|
1512
1537
|
@instructions = rp.instructions
|
1513
|
-
return a
|
1538
|
+
return a
|
1514
1539
|
end
|
1515
|
-
|
1540
|
+
|
1516
1541
|
def parse_string(x)
|
1517
1542
|
|
1518
1543
|
# check if the XML string is a dynarex document
|
@@ -1528,50 +1553,50 @@ class Rexle
|
|
1528
1553
|
'polyrex' => proc {|x| parse_rexle(x)}
|
1529
1554
|
}
|
1530
1555
|
other_parser = procs[recordx_type]
|
1531
|
-
|
1556
|
+
|
1532
1557
|
if other_parser then
|
1533
|
-
|
1558
|
+
|
1534
1559
|
begin
|
1535
1560
|
other_parser.call(x)
|
1536
1561
|
rescue
|
1537
1562
|
parse_rexle x
|
1538
1563
|
end
|
1539
|
-
|
1564
|
+
|
1540
1565
|
else
|
1541
|
-
|
1566
|
+
|
1542
1567
|
parse_rexle x
|
1543
|
-
|
1544
|
-
end
|
1545
|
-
|
1568
|
+
|
1569
|
+
end
|
1570
|
+
|
1546
1571
|
else
|
1547
1572
|
|
1548
1573
|
parse_rexle x
|
1549
|
-
|
1574
|
+
|
1550
1575
|
end
|
1551
1576
|
else
|
1552
1577
|
|
1553
1578
|
parse_rexle x
|
1554
|
-
|
1579
|
+
|
1555
1580
|
end
|
1556
1581
|
|
1557
1582
|
end
|
1558
|
-
|
1583
|
+
|
1559
1584
|
def scan_element(name=nil, attributes=nil, *children)
|
1560
|
-
|
1585
|
+
|
1561
1586
|
return unless name
|
1562
|
-
|
1587
|
+
|
1563
1588
|
return Rexle::CData.new(children.first) if name == '!['
|
1564
1589
|
return Rexle::Comment.new(children.first) if name == '!-'
|
1565
1590
|
|
1566
|
-
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1591
|
+
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1567
1592
|
|
1568
1593
|
if children then
|
1569
1594
|
|
1570
1595
|
children.each do |x4|
|
1571
|
-
|
1596
|
+
|
1572
1597
|
|
1573
1598
|
if x4.is_a? Array then
|
1574
|
-
element.add_element scan_element(*x4)
|
1599
|
+
element.add_element scan_element(*x4)
|
1575
1600
|
elsif x4.is_a? String then
|
1576
1601
|
|
1577
1602
|
e = if x4.is_a? String then
|
@@ -1580,22 +1605,22 @@ class Rexle
|
|
1580
1605
|
elsif x4.name == '![' then
|
1581
1606
|
|
1582
1607
|
Rexle::CData.new(x4)
|
1583
|
-
|
1608
|
+
|
1584
1609
|
elsif x4.name == '!-' then
|
1585
1610
|
|
1586
1611
|
Rexle::Comment.new(x4)
|
1587
|
-
|
1612
|
+
|
1588
1613
|
end
|
1589
1614
|
|
1590
1615
|
element.add_element e
|
1591
1616
|
end
|
1592
1617
|
end
|
1593
1618
|
end
|
1594
|
-
|
1619
|
+
|
1595
1620
|
return element
|
1596
1621
|
end
|
1597
1622
|
|
1598
|
-
|
1623
|
+
|
1599
1624
|
# scan a rexml doc
|
1600
1625
|
#
|
1601
1626
|
def scan_doc(node)
|
@@ -1603,28 +1628,28 @@ class Rexle
|
|
1603
1628
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
1604
1629
|
[node.name, node.text.to_s, attributes, *children]
|
1605
1630
|
end
|
1606
|
-
|
1631
|
+
|
1607
1632
|
class Recordset < Array
|
1608
1633
|
|
1609
1634
|
def initialize(a)
|
1610
1635
|
super(a)
|
1611
1636
|
end
|
1612
|
-
|
1637
|
+
|
1613
1638
|
def to_doc(root: 'root')
|
1614
|
-
|
1639
|
+
|
1615
1640
|
recordset = self.map(&:to_a)
|
1616
1641
|
Rexle.new([root,{}, *recordset])
|
1617
|
-
|
1642
|
+
|
1618
1643
|
end
|
1619
|
-
|
1644
|
+
|
1620
1645
|
def xpath(xpath)
|
1621
1646
|
self.to_doc.root.xpath(xpath)
|
1622
1647
|
end
|
1623
|
-
|
1648
|
+
|
1624
1649
|
def element(xpath)
|
1625
1650
|
self.to_doc.root.element(xpath)
|
1626
1651
|
end
|
1627
1652
|
|
1628
|
-
end
|
1629
|
-
|
1653
|
+
end
|
1654
|
+
|
1630
1655
|
end
|