rexle 1.5.11 → 1.5.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexle.rb +358 -356
- data.tar.gz.sig +0 -0
- metadata +11 -11
- metadata.gz.sig +0 -0
data/lib/rexle.rb
CHANGED
@@ -13,6 +13,8 @@ require 'backtrack-xpath'
|
|
13
13
|
|
14
14
|
# modifications:
|
15
15
|
|
16
|
+
# 01-Jan-2022: bug fix: Attribute values are no longer unescaped when
|
17
|
+
# called from Rexle#xml
|
16
18
|
# 03-Apr-2021: bug fix: Using *to_a* a CDATA element if present is now output
|
17
19
|
# 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil.
|
18
20
|
# 11-Sep-2020: feature: Rexle::Element#text now has unescaped HTML using CGI
|
@@ -20,26 +22,26 @@ require 'backtrack-xpath'
|
|
20
22
|
# 11-May-2020: bug fix: Rexle#css now responds correctly to valid selectors
|
21
23
|
# 23-Apr-2020: feature: Added public method *plaintext*.
|
22
24
|
# 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag
|
23
|
-
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
24
|
-
# 09-Jul-2019: minor improvement: A comment tag now has a
|
25
|
+
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
26
|
+
# 09-Jul-2019: minor improvement: A comment tag now has a
|
25
27
|
# new line when pretty printed
|
26
28
|
# 02-Feb-2019: feature: A comment tag can now have nested elements
|
27
29
|
# 03-Nov-2018: feature: Debug messages can now use coloured text
|
28
30
|
# 02-Oct-2018: feature: Added Rexle::Elements#last
|
29
|
-
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
30
|
-
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
31
|
+
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
32
|
+
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
31
33
|
# xpath contains an *and* operator
|
32
|
-
# 14-Sep-2017: improvement: An *and* operator can now be
|
34
|
+
# 14-Sep-2017: improvement: An *and* operator can now be
|
33
35
|
# used between xpath statements
|
34
36
|
# 10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id]
|
35
|
-
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
36
|
-
# track of the working document when elements are passed to
|
37
|
+
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
38
|
+
# track of the working document when elements are passed to
|
37
39
|
# different documents
|
38
40
|
# bug fix: Element prefixes are now only processed if they exist
|
39
|
-
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
41
|
+
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
40
42
|
# Rexle::Element#next_sibling and Rexle::Element#previous_sibling bugs
|
41
|
-
# 25-Feb-2017: improvement:
|
42
|
-
# An input rexle array can now have an empty array for
|
43
|
+
# 25-Feb-2017: improvement:
|
44
|
+
# An input rexle array can now have an empty array for
|
43
45
|
# children e.g. doc = Rexle.new(["records", {}, "", []])
|
44
46
|
# 25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer
|
45
47
|
|
@@ -53,7 +55,7 @@ module XMLhelper
|
|
53
55
|
scan_print(children).join.force_encoding("utf-8")
|
54
56
|
|
55
57
|
a = self.root.attributes.to_a.map do |k,v|
|
56
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)]
|
58
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s(unescape: false))]
|
57
59
|
end
|
58
60
|
|
59
61
|
xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \
|
@@ -61,7 +63,7 @@ module XMLhelper
|
|
61
63
|
|
62
64
|
if self.instructions and declaration then
|
63
65
|
processing_instructions() + xml
|
64
|
-
else
|
66
|
+
else
|
65
67
|
xml
|
66
68
|
end
|
67
69
|
end
|
@@ -70,43 +72,43 @@ module XMLhelper
|
|
70
72
|
|
71
73
|
body = pretty_print(children,2).join
|
72
74
|
|
73
|
-
a = self.root.attributes.to_a.map do |k,v|
|
74
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
75
|
+
a = self.root.attributes.to_a.map do |k,v|
|
76
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s(unescape: false))]
|
75
77
|
end
|
76
|
-
|
77
|
-
ind = "\n "
|
78
|
+
|
79
|
+
ind = "\n "
|
78
80
|
xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \
|
79
81
|
' ' + a.join(' '), ind, body, "\n", self.root.name]
|
80
82
|
|
81
83
|
if self.instructions and declaration then
|
82
84
|
processing_instructions("") + xml
|
83
|
-
else
|
85
|
+
else
|
84
86
|
xml
|
85
87
|
end
|
86
88
|
end
|
87
|
-
|
88
|
-
def inspect()
|
89
|
+
|
90
|
+
def inspect()
|
89
91
|
"#<Rexle:%s>" % [self.object_id]
|
90
92
|
end
|
91
93
|
|
92
94
|
def processing_instructions(s='')
|
93
95
|
self.instructions.map do |instruction|
|
94
|
-
"<?%s?>\n" % instruction.join(' ')
|
96
|
+
"<?%s?>\n" % instruction.join(' ')
|
95
97
|
end.join s
|
96
98
|
end
|
97
99
|
|
98
100
|
def scan_print(nodes)
|
99
101
|
|
100
102
|
r2 = nodes.map do |x|
|
101
|
-
|
103
|
+
|
102
104
|
r = if x.is_a? Rexle::Element then
|
103
105
|
|
104
|
-
a = x.attributes.to_a.map do |k,v|
|
106
|
+
a = x.attributes.to_a.map do |k,v|
|
105
107
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
106
108
|
end
|
107
109
|
|
108
110
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
109
|
-
|
111
|
+
|
110
112
|
non_self_closing_tags = %w(script textarea iframe div object a)
|
111
113
|
|
112
114
|
if (x.children and x.children.length > 0 \
|
@@ -120,31 +122,31 @@ module XMLhelper
|
|
120
122
|
else
|
121
123
|
out = ["<%s/>" % tag]
|
122
124
|
end
|
123
|
-
|
125
|
+
|
124
126
|
elsif x.is_a? String then x
|
125
|
-
elsif x.is_a? Rexle::CData then x.print
|
126
|
-
elsif x.is_a? Rexle::Comment then x.print
|
127
|
-
|
127
|
+
elsif x.is_a? Rexle::CData then x.print
|
128
|
+
elsif x.is_a? Rexle::Comment then x.print
|
129
|
+
|
128
130
|
end
|
129
131
|
|
130
132
|
r
|
131
133
|
end
|
132
|
-
|
134
|
+
|
133
135
|
r2
|
134
136
|
|
135
137
|
end
|
136
|
-
|
138
|
+
|
137
139
|
def scan_to_a(nodes)
|
138
140
|
|
139
141
|
nodes.inject([]) do |r,x|
|
140
142
|
|
141
143
|
if x.is_a? Rexle::Element then
|
142
144
|
|
143
|
-
a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]
|
145
|
+
a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]
|
144
146
|
|
145
|
-
if x.cdatas.any? then
|
147
|
+
if x.cdatas.any? then
|
146
148
|
a.concat x.cdatas.map {|cdata| ['![', {}, cdata] }
|
147
|
-
end
|
149
|
+
end
|
148
150
|
|
149
151
|
(a.concat(scan_to_a(x.children))) if x.children.length > 1
|
150
152
|
r << a
|
@@ -156,7 +158,7 @@ module XMLhelper
|
|
156
158
|
end
|
157
159
|
|
158
160
|
end
|
159
|
-
|
161
|
+
|
160
162
|
|
161
163
|
|
162
164
|
def pretty_print(nodes, indent='0')
|
@@ -169,13 +171,13 @@ module XMLhelper
|
|
169
171
|
|
170
172
|
if x.is_a? Rexle::Element then
|
171
173
|
|
172
|
-
a = x.attributes.to_a.map do |k,v|
|
174
|
+
a = x.attributes.to_a.map do |k,v|
|
173
175
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
174
176
|
end
|
175
177
|
a ||= []
|
176
178
|
|
177
179
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
178
|
-
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
180
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
179
181
|
|
180
182
|
if (x.value and x.value.length > 0) \
|
181
183
|
or (x.children and x.children.length > 0 \
|
@@ -183,13 +185,13 @@ module XMLhelper
|
|
183
185
|
x.name == 'script' or x.name == 'textarea' or \
|
184
186
|
x.name == 'iframe' then
|
185
187
|
|
186
|
-
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
188
|
+
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
187
189
|
("\n" + ' ' * indent) : ''
|
188
|
-
|
190
|
+
|
189
191
|
out = ["%s<%s>%s" % [start, tag, ind1]]
|
190
|
-
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
192
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
191
193
|
ind2 = (ind1 and ind1.length > 0) ? ("\n" + ' ' * (indent - 1)) : ''
|
192
|
-
out << "%s</%s>" % [ind2, x.name]
|
194
|
+
out << "%s</%s>" % [ind2, x.name]
|
193
195
|
else
|
194
196
|
|
195
197
|
out = ["%s<%s/>" % [start, tag]]
|
@@ -197,8 +199,8 @@ module XMLhelper
|
|
197
199
|
|
198
200
|
|
199
201
|
elsif x.is_a? String then x.sub(/^[\n\s]+$/,'')
|
200
|
-
elsif x.is_a? Rexle::CData then x.print
|
201
|
-
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
202
|
+
elsif x.is_a? Rexle::CData then x.print
|
203
|
+
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
202
204
|
|
203
205
|
end
|
204
206
|
end
|
@@ -213,17 +215,17 @@ class Rexle
|
|
213
215
|
|
214
216
|
attr_reader :prefixes, :doctype
|
215
217
|
attr_accessor :instructions
|
216
|
-
|
218
|
+
|
217
219
|
def initialize(x=nil, rexle: self, debug: false)
|
218
220
|
|
219
221
|
@rexle, @debug = rexle, debug
|
220
222
|
$debug = @debug
|
221
|
-
|
223
|
+
|
222
224
|
puts 'inside Rexle'.debug if debug
|
223
|
-
|
225
|
+
|
224
226
|
super()
|
225
227
|
|
226
|
-
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
228
|
+
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
227
229
|
@doctype = :xml
|
228
230
|
|
229
231
|
# what type of input is it? Is it a string, array
|
@@ -233,11 +235,11 @@ class Rexle
|
|
233
235
|
Array: proc {|x| x},
|
234
236
|
RexleParser: ->(x){ parse_rexle(x)}
|
235
237
|
}
|
236
|
-
|
238
|
+
|
237
239
|
doc_node = ['doc', Attributes.new]
|
238
|
-
|
240
|
+
|
239
241
|
@a = procs[x.class.to_s.to_sym].call(x)
|
240
|
-
|
242
|
+
|
241
243
|
@doc = scan_element(*(doc_node << @a))
|
242
244
|
|
243
245
|
# fetch the namespaces
|
@@ -248,78 +250,78 @@ class Rexle
|
|
248
250
|
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
249
251
|
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
250
252
|
end
|
251
|
-
|
253
|
+
|
252
254
|
end
|
253
255
|
|
254
256
|
end
|
255
|
-
|
257
|
+
|
256
258
|
def clone()
|
257
259
|
Rexle.new self.to_a
|
258
260
|
end
|
259
|
-
|
261
|
+
|
260
262
|
def at_css(selector)
|
261
263
|
@doc.root.element RexleCSS.new(selector).to_xpath
|
262
|
-
end
|
263
|
-
|
264
|
+
end
|
265
|
+
|
264
266
|
def css(selector)
|
265
|
-
|
266
|
-
a = selector.split(',').flat_map do |x|
|
267
|
+
|
268
|
+
a = selector.split(',').flat_map do |x|
|
267
269
|
@doc.root.xpath RexleCSS.new(x).to_xpath
|
268
270
|
end
|
269
|
-
|
271
|
+
|
270
272
|
return a
|
271
273
|
end
|
272
|
-
|
274
|
+
|
273
275
|
def xpath(path, &blk)
|
274
276
|
@doc.xpath(path, &blk)
|
275
|
-
end
|
277
|
+
end
|
276
278
|
|
277
279
|
class Element
|
278
280
|
include XMLhelper
|
279
|
-
|
281
|
+
|
280
282
|
class Value < String
|
281
|
-
|
283
|
+
|
282
284
|
def initialize(value)
|
283
285
|
super(value)
|
284
286
|
end
|
285
|
-
|
287
|
+
|
286
288
|
def <(val2)
|
287
289
|
self.to_f < val2.to_f
|
288
|
-
end
|
289
|
-
|
290
|
+
end
|
291
|
+
|
290
292
|
def >(val2)
|
291
293
|
self.to_f > val2.to_f
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
295
297
|
class Attribute
|
296
|
-
|
298
|
+
|
297
299
|
attr_reader :value
|
298
|
-
|
300
|
+
|
299
301
|
def initialize(value)
|
300
302
|
@value = value
|
301
303
|
end
|
302
|
-
|
304
|
+
|
303
305
|
def to_f()
|
304
306
|
@value.to_f
|
305
|
-
end
|
306
|
-
|
307
|
+
end
|
308
|
+
|
307
309
|
def to_i()
|
308
310
|
@value.to_i
|
309
311
|
end
|
310
|
-
|
312
|
+
|
311
313
|
alias to_s value
|
312
|
-
|
314
|
+
|
313
315
|
end
|
314
|
-
|
316
|
+
|
315
317
|
attr_accessor :name, :value, :parent
|
316
318
|
attr_reader :child_elements, :doc_id, :instructions
|
317
|
-
|
319
|
+
|
318
320
|
alias original_clone clone
|
319
321
|
|
320
322
|
def initialize(name=nil, value: nil, attributes: Attributes.new, rexle: self)
|
321
323
|
|
322
|
-
@rexle = rexle
|
324
|
+
@rexle = rexle
|
323
325
|
super()
|
324
326
|
|
325
327
|
@name, @attributes = name.to_s, attributes
|
@@ -329,11 +331,11 @@ class Rexle
|
|
329
331
|
self.add_text value if value
|
330
332
|
|
331
333
|
end
|
332
|
-
|
334
|
+
|
333
335
|
def backtrack(use_attributes: true)
|
334
336
|
BacktrackXPath.new(self, use_attributes: use_attributes)
|
335
337
|
end
|
336
|
-
|
338
|
+
|
337
339
|
def cdata?()
|
338
340
|
self.is_a? CData
|
339
341
|
end
|
@@ -341,8 +343,8 @@ class Rexle
|
|
341
343
|
def contains(raw_args)
|
342
344
|
|
343
345
|
path, raw_val = raw_args.split(',',2)
|
344
|
-
val = raw_val.strip[/^["']?.*["']?$/]
|
345
|
-
|
346
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
347
|
+
|
346
348
|
anode = query_xpath(path)
|
347
349
|
|
348
350
|
return [false] if anode.nil? or anode.empty?
|
@@ -351,21 +353,21 @@ class Rexle
|
|
351
353
|
r = [a.grep(/#{val.sub(/^["'](.*)["']$/,'\1')}/).length > 0]
|
352
354
|
|
353
355
|
r.any?
|
354
|
-
end
|
355
|
-
|
356
|
+
end
|
357
|
+
|
356
358
|
def count(path)
|
357
359
|
length = query_xpath(path).flatten.compact.length
|
358
360
|
length
|
359
361
|
end
|
360
|
-
|
362
|
+
|
361
363
|
def current()
|
362
364
|
self
|
363
365
|
end
|
364
366
|
|
365
367
|
def at_css(selector)
|
366
368
|
self.root.element RexleCSS.new(selector).to_xpath
|
367
|
-
end
|
368
|
-
|
369
|
+
end
|
370
|
+
|
369
371
|
def css(selector)
|
370
372
|
|
371
373
|
selector.split(',')\
|
@@ -376,76 +378,76 @@ class Rexle
|
|
376
378
|
def lowercase(s)
|
377
379
|
|
378
380
|
end
|
379
|
-
|
380
|
-
def max(path)
|
381
|
+
|
382
|
+
def max(path)
|
381
383
|
a = query_xpath(path).flatten.select{|x| x.is_a? String or x.is_a? Rexle::Element::Attribute}.map(&:to_i)
|
382
|
-
a.max
|
384
|
+
a.max
|
383
385
|
end
|
384
|
-
|
386
|
+
|
385
387
|
def name()
|
386
|
-
|
388
|
+
|
387
389
|
if @rexle and @rexle.respond_to? :prefixes then
|
388
|
-
|
390
|
+
|
389
391
|
if @rexle.prefixes.is_a? Array then
|
390
|
-
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
392
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
391
393
|
end
|
392
|
-
|
394
|
+
|
393
395
|
prefix ? @name.sub(prefix + ':', '') : @name
|
394
|
-
|
396
|
+
|
395
397
|
else
|
396
398
|
@name
|
397
399
|
end
|
398
|
-
|
400
|
+
|
399
401
|
end
|
400
|
-
|
401
|
-
def next_element()
|
402
|
+
|
403
|
+
def next_element()
|
402
404
|
|
403
405
|
id = self.object_id
|
404
|
-
a = self.parent.elements
|
406
|
+
a = self.parent.elements
|
405
407
|
|
406
408
|
i = a.index {|x| x.object_id == id} + 2
|
407
409
|
a[i] if i < a.length + 1
|
408
|
-
|
410
|
+
|
409
411
|
end
|
410
|
-
|
412
|
+
|
411
413
|
alias next_sibling next_element
|
412
|
-
|
414
|
+
|
413
415
|
def not(bool)
|
414
416
|
|
415
417
|
r = self.xpath(bool).any?
|
416
418
|
|
417
419
|
!r
|
418
420
|
end
|
419
|
-
|
420
|
-
def previous_element()
|
421
|
-
|
421
|
+
|
422
|
+
def previous_element()
|
423
|
+
|
422
424
|
id = self.object_id
|
423
|
-
a = self.parent.elements
|
425
|
+
a = self.parent.elements
|
424
426
|
i = a.index {|x| x.object_id == id}
|
425
427
|
|
426
|
-
a[i] if i > 0
|
428
|
+
a[i] if i > 0
|
427
429
|
|
428
430
|
end
|
429
|
-
|
431
|
+
|
430
432
|
alias previous_sibling previous_element
|
431
|
-
|
433
|
+
|
432
434
|
def xpath(path, rlist=[], &blk)
|
433
|
-
|
435
|
+
|
434
436
|
#@log.debug 'inside xpath ' + path.inspect
|
435
437
|
|
436
438
|
r = filter_xpath(path, rlist=[], &blk)
|
437
439
|
#@log.debug 'after filter_xpath : ' + r.inspect
|
438
|
-
|
440
|
+
|
439
441
|
if r.is_a?(Array) then
|
440
|
-
|
442
|
+
|
441
443
|
Recordset.new(r.compact)
|
442
|
-
|
444
|
+
|
443
445
|
else
|
444
446
|
r
|
445
447
|
end
|
446
|
-
|
448
|
+
|
447
449
|
end
|
448
|
-
|
450
|
+
|
449
451
|
def filter_xpath(raw_path, rlist=[], &blk)
|
450
452
|
#@log.debug 'inside filter_xpath : ' + raw_path.inspect
|
451
453
|
path = String.new raw_path
|
@@ -455,25 +457,25 @@ class Rexle
|
|
455
457
|
#fn_match = path.match(/^(\w+)\(/)
|
456
458
|
#@log.debug 'fn_match : ' + fn_match.inspect
|
457
459
|
end_fn_match = path.slice!(/\[\w+\(\)\]$/)
|
458
|
-
|
460
|
+
|
459
461
|
if end_fn_match then
|
460
|
-
|
462
|
+
|
461
463
|
m = end_fn_match[1..-4]
|
462
464
|
#@log.debug 'its a function'
|
463
465
|
[method(m.to_sym).call(xpath path)]
|
464
|
-
|
465
|
-
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
466
|
+
|
467
|
+
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
466
468
|
|
467
469
|
procs = {
|
468
470
|
|
469
|
-
Array: proc { |x|
|
470
|
-
if block_given? then
|
471
|
-
x.flatten(1)
|
471
|
+
Array: proc { |x|
|
472
|
+
if block_given? then
|
473
|
+
x.flatten(1)
|
472
474
|
else
|
473
475
|
rs = x.flatten
|
474
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
476
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
475
477
|
end
|
476
|
-
},
|
478
|
+
},
|
477
479
|
String: proc {|x| x},
|
478
480
|
Hash: proc {|x| x},
|
479
481
|
TrueClass: proc{|x| x},
|
@@ -482,25 +484,25 @@ class Rexle
|
|
482
484
|
}
|
483
485
|
bucket = []
|
484
486
|
raw_results = path.split('|').map do |xp|
|
485
|
-
query_xpath(xp.strip, bucket, &blk)
|
487
|
+
query_xpath(xp.strip, bucket, &blk)
|
486
488
|
end
|
487
|
-
|
489
|
+
|
488
490
|
results = raw_results
|
489
491
|
|
490
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
491
|
-
|
492
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
493
|
+
|
492
494
|
elsif fn_match.nil?
|
493
|
-
|
495
|
+
|
494
496
|
procs = {
|
495
497
|
|
496
|
-
Array: proc { |x|
|
497
|
-
if block_given? then
|
498
|
-
x.flatten(1)
|
498
|
+
Array: proc { |x|
|
499
|
+
if block_given? then
|
500
|
+
x.flatten(1)
|
499
501
|
else
|
500
502
|
rs = x.flatten
|
501
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
503
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
502
504
|
end
|
503
|
-
},
|
505
|
+
},
|
504
506
|
String: proc {|x| x},
|
505
507
|
Hash: proc {|x| x},
|
506
508
|
TrueClass: proc{|x| x},
|
@@ -508,23 +510,23 @@ class Rexle
|
|
508
510
|
:"Rexle::Element" => proc {|x| [x]}
|
509
511
|
}
|
510
512
|
bucket = []
|
511
|
-
|
513
|
+
|
512
514
|
results = if path =~ /[\[]|\(/ then
|
513
515
|
|
514
516
|
raw_results = path.split(/\|/).map do |xp|
|
515
|
-
query_xpath(xp.strip, bucket, &blk)
|
517
|
+
query_xpath(xp.strip, bucket, &blk)
|
516
518
|
end
|
517
519
|
|
518
520
|
raw_results.flatten.index(true) ? [true] : []
|
519
|
-
|
521
|
+
|
520
522
|
else
|
521
523
|
raw_results = path.split(/ *(?:\||\band\b) */).map do |xp|
|
522
|
-
query_xpath(xp.strip, bucket, &blk)
|
523
|
-
end
|
524
|
+
query_xpath(xp.strip, bucket, &blk)
|
525
|
+
end
|
524
526
|
|
525
527
|
if path =~ / and / then
|
526
528
|
|
527
|
-
raw_results.flatten.select {|x| x == true or x == false}
|
529
|
+
raw_results.flatten.select {|x| x == true or x == false}
|
528
530
|
|
529
531
|
else
|
530
532
|
raw_results.flatten.index(true) ? [true] : []
|
@@ -533,12 +535,12 @@ class Rexle
|
|
533
535
|
|
534
536
|
return results if !path[/[><]/] and results.any?
|
535
537
|
results = raw_results # .flatten.select {|x| x}
|
536
|
-
|
537
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
538
|
+
|
539
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
538
540
|
|
539
541
|
else
|
540
|
-
|
541
|
-
m, xpath_value, index = fn_match.captures
|
542
|
+
|
543
|
+
m, xpath_value, index = fn_match.captures
|
542
544
|
|
543
545
|
if m == 'text' then
|
544
546
|
a = texts()
|
@@ -552,14 +554,14 @@ class Rexle
|
|
552
554
|
|
553
555
|
end
|
554
556
|
|
555
|
-
end
|
556
|
-
|
557
|
+
end
|
558
|
+
|
557
559
|
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
558
560
|
|
559
561
|
#@log.debug 'query_xpath : ' + raw_xpath_value.inspect
|
560
562
|
#@log.debug '++ ' + self.xml.inspect
|
561
563
|
|
562
|
-
flag_func = false
|
564
|
+
flag_func = false
|
563
565
|
|
564
566
|
xpath_value = raw_xpath_value.sub('child::','./')
|
565
567
|
|
@@ -583,38 +585,38 @@ class Rexle
|
|
583
585
|
|
584
586
|
raw_condition = raw_condition ? raw_condition + '/' + remaining_path \
|
585
587
|
: remaining_path
|
586
|
-
remaining_path = ''
|
588
|
+
remaining_path = ''
|
587
589
|
end
|
588
590
|
|
589
|
-
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
591
|
+
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
590
592
|
|
591
593
|
if r then
|
592
594
|
a_path = raw_path.split(/(?=\/\/)/,2)
|
593
595
|
else
|
594
596
|
a_path = raw_path.split('/',2)
|
595
597
|
end
|
596
|
-
|
598
|
+
|
597
599
|
condition = raw_condition if a_path.length <= 1 #and not raw_condition[/^\[\w+\(.*\)\]$/]
|
598
600
|
|
599
601
|
if raw_path[0,2] == '//' then
|
600
602
|
s = ''
|
601
|
-
elsif raw_path == 'text()'
|
603
|
+
elsif raw_path == 'text()'
|
602
604
|
|
603
605
|
a_path.shift
|
604
606
|
#return @value
|
605
607
|
return self.texts
|
606
608
|
else
|
607
609
|
|
608
|
-
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
609
|
-
|
610
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
611
|
+
|
610
612
|
return @attributes if attribute == '*'
|
611
|
-
|
613
|
+
|
612
614
|
if attribute and @attributes and \
|
613
615
|
@attributes.has_key?(attribute.to_sym) then
|
614
616
|
return [Attribute.new(@attributes[attribute.to_sym])]
|
615
617
|
end
|
616
618
|
s = a_path.shift
|
617
|
-
end
|
619
|
+
end
|
618
620
|
|
619
621
|
# isolate the xpath to return just the path to the current element
|
620
622
|
|
@@ -630,7 +632,7 @@ class Rexle
|
|
630
632
|
if element_name and element_name[/^\d/] then
|
631
633
|
element_name = nil
|
632
634
|
end
|
633
|
-
|
635
|
+
|
634
636
|
condition = raw_xpath_value if element_name.nil?
|
635
637
|
|
636
638
|
else
|
@@ -666,11 +668,11 @@ class Rexle
|
|
666
668
|
r4 = attribute_search(attr_search, self, self.attributes)
|
667
669
|
return r4
|
668
670
|
end
|
669
|
-
|
670
|
-
|
671
|
+
|
672
|
+
|
671
673
|
return_elements = []
|
672
|
-
|
673
|
-
|
674
|
+
|
675
|
+
|
674
676
|
|
675
677
|
if raw_path[0,2] == '//' then
|
676
678
|
|
@@ -685,25 +687,25 @@ class Rexle
|
|
685
687
|
|
686
688
|
if element_name.is_a? String then
|
687
689
|
ename, raw_selector = (element_name.split('::',2)).reverse
|
688
|
-
|
690
|
+
|
689
691
|
selector = case raw_selector
|
690
692
|
when 'following-sibling' then 1
|
691
693
|
when 'preceding-sibling' then -1
|
692
694
|
end
|
693
|
-
|
695
|
+
|
694
696
|
else
|
695
697
|
ename = element_name
|
696
|
-
end
|
698
|
+
end
|
697
699
|
|
698
700
|
if ename == '..' then
|
699
|
-
|
701
|
+
|
700
702
|
remaining_xpath = raw_path[/\.\.\/(.*)/,1]
|
701
703
|
# select the parent element
|
702
704
|
|
703
705
|
r2 = self.parent.xpath(remaining_xpath)
|
704
706
|
|
705
707
|
return r2
|
706
|
-
|
708
|
+
|
707
709
|
elsif ename == '.'
|
708
710
|
|
709
711
|
remaining_xpath = raw_path[1..-1]
|
@@ -712,41 +714,41 @@ class Rexle
|
|
712
714
|
if xpath_value.length > 0 and xpath_value =~ /\[/ then
|
713
715
|
|
714
716
|
r = eval(attr_search.sub(/^h/,'self.attributes'))
|
715
|
-
return self if r
|
717
|
+
return self if r
|
716
718
|
|
717
719
|
else
|
718
720
|
return self
|
719
721
|
end
|
720
722
|
else
|
721
723
|
return self.xpath(remaining_xpath)
|
722
|
-
end
|
724
|
+
end
|
723
725
|
|
724
726
|
elsif element_name.nil?
|
725
727
|
puts ('attr_search: ' + attr_search.inspect).debug if $debug
|
726
|
-
return eval attr_search
|
728
|
+
return eval attr_search
|
727
729
|
else
|
728
730
|
|
729
731
|
if raw_selector.nil? and ename != element_part then
|
730
732
|
|
731
733
|
right_cond = element_part[/#{ename}(.*)/,1]
|
732
734
|
|
733
|
-
end
|
735
|
+
end
|
734
736
|
|
735
737
|
return_elements = @child_elements.map.with_index.select do |x, i|
|
736
738
|
|
737
739
|
next unless x.is_a? Rexle::Element
|
738
740
|
|
739
741
|
#x.name == ename or (ename == '*')
|
740
|
-
|
742
|
+
|
741
743
|
r10 = ((x.name == ename) or (ename == '*'))
|
742
744
|
|
743
|
-
|
745
|
+
|
744
746
|
|
745
747
|
end
|
746
|
-
|
748
|
+
|
747
749
|
if right_cond then
|
748
|
-
|
749
|
-
|
750
|
+
|
751
|
+
|
750
752
|
r12 = return_elements.map do |x, i|
|
751
753
|
|
752
754
|
if x.text then
|
@@ -756,15 +758,15 @@ class Rexle
|
|
756
758
|
else
|
757
759
|
false
|
758
760
|
end
|
759
|
-
|
761
|
+
|
760
762
|
end
|
761
|
-
|
763
|
+
|
762
764
|
return r12
|
763
|
-
|
764
|
-
end
|
765
|
-
|
765
|
+
|
766
|
+
end
|
767
|
+
|
766
768
|
if selector then
|
767
|
-
ne = return_elements.inject([]) do |r,x|
|
769
|
+
ne = return_elements.inject([]) do |r,x|
|
768
770
|
i = x.last + selector
|
769
771
|
if i >= 0 then
|
770
772
|
r << i
|
@@ -775,17 +777,17 @@ class Rexle
|
|
775
777
|
|
776
778
|
return_elements = ne.map {|x| [@child_elements[x], x] if x}
|
777
779
|
end
|
778
|
-
|
780
|
+
|
779
781
|
|
780
782
|
end
|
781
783
|
end
|
782
|
-
|
784
|
+
|
783
785
|
if return_elements.length > 0 then
|
784
786
|
|
785
787
|
if (a_path + [remaining_path]).join.empty? then
|
786
788
|
|
787
789
|
# pass in a block to the filter if it is function contains?
|
788
|
-
rlist = return_elements.map.with_index do |x,i|
|
790
|
+
rlist = return_elements.map.with_index do |x,i|
|
789
791
|
r5 = filter(x, i+1, attr_search, &blk)
|
790
792
|
|
791
793
|
r5
|
@@ -795,9 +797,9 @@ class Rexle
|
|
795
797
|
|
796
798
|
else
|
797
799
|
|
798
|
-
rlist << return_elements.map.with_index do |x,i|
|
800
|
+
rlist << return_elements.map.with_index do |x,i|
|
799
801
|
|
800
|
-
rtn_element = filter(x, i+1, attr_search) do |e|
|
802
|
+
rtn_element = filter(x, i+1, attr_search) do |e|
|
801
803
|
|
802
804
|
r = e.xpath(a_path.join('/') + raw_condition.to_s \
|
803
805
|
+ remaining_path, &blk)
|
@@ -843,7 +845,7 @@ class Rexle
|
|
843
845
|
rlist,&blk)
|
844
846
|
end
|
845
847
|
end
|
846
|
-
|
848
|
+
|
847
849
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) \
|
848
850
|
or (rlist.length > 1 and rlist[0].is_a? Array)
|
849
851
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
@@ -860,21 +862,21 @@ class Rexle
|
|
860
862
|
elsif item.is_a? Rexle::CData then
|
861
863
|
@child_elements << item
|
862
864
|
elsif item.is_a? Rexle::Comment then
|
863
|
-
@child_elements << item
|
865
|
+
@child_elements << item
|
864
866
|
elsif item.is_a? Rexle::Element then
|
865
867
|
|
866
868
|
@child_elements << item
|
867
869
|
# add a reference from this element (the parent) to the child
|
868
870
|
item.parent = self
|
869
|
-
item
|
870
|
-
|
871
|
+
item
|
872
|
+
|
871
873
|
elsif item.is_a? Rexle then
|
872
874
|
self.add_element(item.root)
|
873
875
|
end
|
874
876
|
|
875
|
-
end
|
877
|
+
end
|
876
878
|
|
877
|
-
def add(item)
|
879
|
+
def add(item)
|
878
880
|
|
879
881
|
if item.is_a? Rexle::Element then
|
880
882
|
|
@@ -900,13 +902,13 @@ class Rexle
|
|
900
902
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
901
903
|
else
|
902
904
|
self.xml
|
903
|
-
end
|
905
|
+
end
|
904
906
|
end
|
905
907
|
|
906
908
|
def add_attribute(*x)
|
907
|
-
|
909
|
+
|
908
910
|
proc_hash = lambda {|x| Hash[*x]}
|
909
|
-
|
911
|
+
|
910
912
|
procs = {
|
911
913
|
Hash: lambda {|x| x[0] || {}},
|
912
914
|
String: proc_hash,
|
@@ -924,57 +926,57 @@ class Rexle
|
|
924
926
|
def add_text(s)
|
925
927
|
|
926
928
|
self.child_elements << s
|
927
|
-
self
|
929
|
+
self
|
928
930
|
end
|
929
|
-
|
930
|
-
def attribute(key)
|
931
|
-
|
931
|
+
|
932
|
+
def attribute(key)
|
933
|
+
|
932
934
|
key = key.to_sym if key.is_a? String
|
933
|
-
|
935
|
+
|
934
936
|
if @attributes[key].is_a? String then
|
935
|
-
@attributes[key].gsub('<','<').gsub('>','>')
|
937
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
936
938
|
else
|
937
939
|
@attributes[key]
|
938
940
|
end
|
939
|
-
end
|
940
|
-
|
941
|
-
def attributes() @attributes end
|
942
|
-
|
941
|
+
end
|
942
|
+
|
943
|
+
def attributes() @attributes end
|
944
|
+
|
943
945
|
def cdatas()
|
944
946
|
self.children.inject([]){|r,x| x.is_a?(Rexle::CData) ? r << x.to_s : r }
|
945
947
|
end
|
946
|
-
|
948
|
+
|
947
949
|
def children()
|
948
950
|
|
949
951
|
r = @child_elements
|
950
|
-
|
952
|
+
|
951
953
|
def r.is_an_empty_string?()
|
952
954
|
self.length == 1 and self.first == ''
|
953
|
-
end
|
954
|
-
|
955
|
+
end
|
956
|
+
|
955
957
|
return r
|
956
|
-
end
|
958
|
+
end
|
957
959
|
|
958
960
|
def children=(a) @child_elements = a if a.is_a? Array end
|
959
|
-
|
961
|
+
|
960
962
|
def deep_clone() Rexle.new(self.xml).root end
|
961
|
-
|
962
|
-
def clone()
|
963
|
-
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
963
|
+
|
964
|
+
def clone()
|
965
|
+
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
964
966
|
end
|
965
|
-
|
967
|
+
|
966
968
|
def delete(obj=nil)
|
967
969
|
|
968
970
|
if obj then
|
969
971
|
|
970
972
|
if obj.is_a? String then
|
971
|
-
|
973
|
+
|
972
974
|
self.xpath(obj).each {|e| e.delete; e = nil}
|
973
|
-
|
975
|
+
|
974
976
|
else
|
975
977
|
|
976
978
|
i = @child_elements.index(obj)
|
977
|
-
[@child_elements].each{|x| x.delete_at i} if i
|
979
|
+
[@child_elements].each{|x| x.delete_at i} if i
|
978
980
|
end
|
979
981
|
else
|
980
982
|
|
@@ -996,50 +998,50 @@ class Rexle
|
|
996
998
|
String: proc {|x| @child_elements[x]}
|
997
999
|
}
|
998
1000
|
|
999
|
-
procs[s.class.to_s.to_sym].call(s)
|
1001
|
+
procs[s.class.to_s.to_sym].call(s)
|
1000
1002
|
end
|
1001
1003
|
|
1002
1004
|
def doc_root() @rexle.root end
|
1003
1005
|
def each(&blk) self.children.each(&blk) end
|
1004
1006
|
def each_recursive(&blk) recursive_scan(self.children,&blk) end
|
1005
1007
|
alias traverse each_recursive
|
1006
|
-
def has_elements?() !self.elements.empty? end
|
1007
|
-
def insert_after(node) insert(node, 1) end
|
1008
|
+
def has_elements?() !self.elements.empty? end
|
1009
|
+
def insert_after(node) insert(node, 1) end
|
1008
1010
|
def insert_before(node) insert(node) end
|
1009
1011
|
def last(a) a.last end
|
1010
|
-
def map(&blk) self.children.map(&blk) end
|
1011
|
-
|
1012
|
+
def map(&blk) self.children.map(&blk) end
|
1013
|
+
|
1012
1014
|
def plaintext()
|
1013
1015
|
CGI.unescapeHTML xml().gsub(/<\/?[^>]+>/,'').gsub(' ',' ')\
|
1014
1016
|
.gsub(/\n\s+/,' ')
|
1015
1017
|
end
|
1016
|
-
|
1017
|
-
def root() self end
|
1018
|
+
|
1019
|
+
def root() self end
|
1018
1020
|
|
1019
1021
|
def text(s='')
|
1020
|
-
|
1021
|
-
return self.value if s.empty?
|
1022
|
-
|
1022
|
+
|
1023
|
+
return self.value if s.empty?
|
1024
|
+
|
1023
1025
|
e = self.element(s)
|
1024
1026
|
return e if e.is_a? String
|
1025
|
-
|
1027
|
+
|
1026
1028
|
e.value if e
|
1027
1029
|
end
|
1028
|
-
|
1030
|
+
|
1029
1031
|
def texts()
|
1030
1032
|
|
1031
1033
|
r = @child_elements.select do |x|
|
1032
1034
|
x.is_a? String or x.is_a? Rexle::CData
|
1033
1035
|
end
|
1034
|
-
|
1036
|
+
|
1035
1037
|
r.map do |x|
|
1036
1038
|
def x.unescape()
|
1037
1039
|
s = self.to_s.clone
|
1038
1040
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1039
1041
|
s
|
1040
|
-
end
|
1042
|
+
end
|
1041
1043
|
end
|
1042
|
-
|
1044
|
+
|
1043
1045
|
return r
|
1044
1046
|
end
|
1045
1047
|
|
@@ -1047,20 +1049,20 @@ class Rexle
|
|
1047
1049
|
|
1048
1050
|
r = @child_elements.first
|
1049
1051
|
return nil unless r.is_a? String
|
1050
|
-
|
1052
|
+
|
1051
1053
|
def r.unescape()
|
1052
1054
|
s = self.clone
|
1053
1055
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1054
1056
|
s
|
1055
|
-
end
|
1056
|
-
|
1057
|
+
end
|
1058
|
+
|
1057
1059
|
return r
|
1058
1060
|
end
|
1059
|
-
|
1061
|
+
|
1060
1062
|
def value=(raw_s)
|
1061
1063
|
|
1062
1064
|
val = Value.new(raw_s.to_s.clone)
|
1063
|
-
|
1065
|
+
|
1064
1066
|
escape_chars = %w(& & ' ' < < > >).each_slice(2).to_a
|
1065
1067
|
escape_chars.each{|x| val.gsub!(*x)}
|
1066
1068
|
|
@@ -1070,15 +1072,15 @@ class Rexle
|
|
1070
1072
|
end
|
1071
1073
|
|
1072
1074
|
alias text= value=
|
1073
|
-
|
1075
|
+
|
1074
1076
|
def to_a()
|
1075
|
-
|
1077
|
+
|
1076
1078
|
e = [String.new(self.name), Hash.new(self.attributes)]
|
1077
|
-
|
1078
|
-
if self.cdatas.any? then
|
1079
|
+
|
1080
|
+
if self.cdatas.any? then
|
1079
1081
|
e.concat self.cdatas.map {|cdata| ['![', {}, cdata] }
|
1080
|
-
end
|
1081
|
-
|
1082
|
+
end
|
1083
|
+
|
1082
1084
|
[*e, *scan_to_a(self.children)]
|
1083
1085
|
end
|
1084
1086
|
|
@@ -1088,10 +1090,10 @@ class Rexle
|
|
1088
1090
|
Hash: lambda {|x|
|
1089
1091
|
o = {pretty: false}.merge(x)
|
1090
1092
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
1091
|
-
|
1093
|
+
|
1092
1094
|
method(msg).call(self.children)
|
1093
1095
|
},
|
1094
|
-
String: lambda {|x|
|
1096
|
+
String: lambda {|x|
|
1095
1097
|
r = self.element(x)
|
1096
1098
|
r ? r.xml : ''
|
1097
1099
|
}
|
@@ -1104,18 +1106,18 @@ class Rexle
|
|
1104
1106
|
end
|
1105
1107
|
|
1106
1108
|
def prepend(item)
|
1107
|
-
|
1109
|
+
|
1108
1110
|
@child_elements.unshift item
|
1109
|
-
|
1111
|
+
|
1110
1112
|
# add a reference from this element (the parent) to the child
|
1111
1113
|
item.parent = self
|
1112
|
-
item
|
1113
|
-
end
|
1114
|
-
|
1114
|
+
item
|
1115
|
+
end
|
1116
|
+
|
1115
1117
|
alias to_s xml
|
1116
1118
|
|
1117
1119
|
private
|
1118
|
-
|
1120
|
+
|
1119
1121
|
def insert(node,offset=0)
|
1120
1122
|
|
1121
1123
|
i = parent.child_elements.index(self)
|
@@ -1127,7 +1129,7 @@ class Rexle
|
|
1127
1129
|
node.instance_variable_set(:@doc_id, self.doc_root.object_id)
|
1128
1130
|
|
1129
1131
|
self
|
1130
|
-
end
|
1132
|
+
end
|
1131
1133
|
|
1132
1134
|
def format_condition(condition)
|
1133
1135
|
|
@@ -1152,21 +1154,21 @@ class Rexle
|
|
1152
1154
|
elsif raw_items[0][/^not\(/]
|
1153
1155
|
|
1154
1156
|
return raw_items[0]
|
1155
|
-
|
1157
|
+
|
1156
1158
|
else
|
1157
1159
|
|
1158
1160
|
andor_items = raw_items.map.with_index\
|
1159
1161
|
.select{|x,i| x[/\band\b|\bor\b/]}\
|
1160
1162
|
.map{|x| [x.last, x.last + 1]}.flatten
|
1161
|
-
|
1163
|
+
|
1162
1164
|
indices = [0] + andor_items + [raw_items.length]
|
1163
1165
|
|
1164
1166
|
if raw_items[0][0] == '@' then
|
1165
1167
|
|
1166
1168
|
raw_items.each{|x| x.gsub!(/^@/,'')}
|
1167
|
-
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1169
|
+
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1168
1170
|
|
1169
|
-
items = cons_items.map do |x|
|
1171
|
+
items = cons_items.map do |x|
|
1170
1172
|
|
1171
1173
|
if x.length >= 3 then
|
1172
1174
|
if x[0] != 'class' then
|
@@ -1185,17 +1187,17 @@ class Rexle
|
|
1185
1187
|
else
|
1186
1188
|
|
1187
1189
|
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1188
|
-
|
1189
|
-
items = cons_items.map do |x|
|
1190
|
+
|
1191
|
+
items = cons_items.map do |x|
|
1190
1192
|
|
1191
1193
|
if x.length >= 3 then
|
1192
1194
|
|
1193
1195
|
x[1] = '==' if x[1] == '='
|
1194
1196
|
if x[0] != '.' then
|
1195
1197
|
if x[0][/\//] then
|
1196
|
-
|
1198
|
+
|
1197
1199
|
path, value = x.values_at(0,-1)
|
1198
|
-
|
1200
|
+
|
1199
1201
|
if x[0][/@\w+$/] then
|
1200
1202
|
"r = e.xpath('#{path}').first; r and r.value == #{value}"
|
1201
1203
|
else
|
@@ -1219,16 +1221,16 @@ class Rexle
|
|
1219
1221
|
|
1220
1222
|
|
1221
1223
|
end
|
1222
|
-
|
1224
|
+
|
1223
1225
|
def scan_match(node, path)
|
1224
|
-
|
1226
|
+
|
1225
1227
|
if path == '//' then
|
1226
|
-
return [node, node.text,
|
1228
|
+
return [node, node.text,
|
1227
1229
|
node.elements.map {|x| scan_match x, path}]
|
1228
1230
|
end
|
1229
|
-
|
1231
|
+
|
1230
1232
|
r = []
|
1231
|
-
xpath2 = path[2..-1]
|
1233
|
+
xpath2 = path[2..-1]
|
1232
1234
|
#jr150316 xpath2.sub!(/^\*\//,'')
|
1233
1235
|
#jr150316xpath2.sub!(/^\*/,self.name)
|
1234
1236
|
#jr150316xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
@@ -1251,17 +1253,17 @@ class Rexle
|
|
1251
1253
|
end
|
1252
1254
|
a
|
1253
1255
|
end
|
1254
|
-
|
1255
|
-
|
1256
|
+
|
1257
|
+
|
1256
1258
|
def filter(raw_element, i, attr_search, &blk)
|
1257
|
-
|
1259
|
+
|
1258
1260
|
x, index = raw_element
|
1259
1261
|
e = @child_elements[index]
|
1260
1262
|
|
1261
1263
|
return unless e.is_a? Rexle::Element
|
1262
1264
|
name, value = e.name, e.value if e.is_a? Rexle::Element
|
1263
1265
|
|
1264
|
-
h = x.attributes # <-- fetch the attributes
|
1266
|
+
h = x.attributes # <-- fetch the attributes
|
1265
1267
|
|
1266
1268
|
if attr_search then
|
1267
1269
|
|
@@ -1277,21 +1279,21 @@ class Rexle
|
|
1277
1279
|
def attribute_search(attr_search, e, h, i=nil, &blk)
|
1278
1280
|
|
1279
1281
|
r2 = if attr_search.is_a? Integer then
|
1280
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
1282
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
1281
1283
|
elsif attr_search[/i\s(?:<|>|==|%)\s\d+/] and eval(attr_search) then
|
1282
|
-
block_given? ? blk.call(e) : e
|
1284
|
+
block_given? ? blk.call(e) : e
|
1283
1285
|
elsif h and !h.empty? and attr_search[/^h\[/] and eval(attr_search) then
|
1284
1286
|
block_given? ? blk.call(e) : e
|
1285
|
-
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1287
|
+
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1286
1288
|
next unless x.is_a? Rexle::Element
|
1287
1289
|
name, attributes, value = x.name, x.attributes, x.value.to_s
|
1288
1290
|
b = eval(attr_search)
|
1289
1291
|
b}.length > 0
|
1290
1292
|
|
1291
1293
|
block_given? ? blk.call(e) : e
|
1292
|
-
|
1293
|
-
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1294
|
-
block_given? ? blk.call(e) : e
|
1294
|
+
|
1295
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1296
|
+
block_given? ? blk.call(e) : e
|
1295
1297
|
elsif attr_search[/^e\.value/]
|
1296
1298
|
|
1297
1299
|
v = attr_search[/[^\s]+$/]
|
@@ -1305,92 +1307,92 @@ class Rexle
|
|
1305
1307
|
block_given? ? blk.call(e) : e
|
1306
1308
|
elsif attr_search[/^\w*\(/] and e.element(attr_search)
|
1307
1309
|
block_given? ? blk.call(e) : e
|
1308
|
-
end
|
1310
|
+
end
|
1309
1311
|
|
1310
1312
|
r2
|
1311
1313
|
end
|
1312
|
-
|
1314
|
+
|
1313
1315
|
def recursive_scan(nodes, &blk)
|
1314
|
-
|
1316
|
+
|
1315
1317
|
nodes.each do |x|
|
1316
1318
|
|
1317
1319
|
if x.is_a? Rexle::Element then
|
1318
1320
|
blk.call(x)
|
1319
1321
|
recursive_scan(x.children, &blk) if x.children.length > 0
|
1320
|
-
end
|
1322
|
+
end
|
1321
1323
|
end
|
1322
1324
|
end
|
1323
|
-
|
1325
|
+
|
1324
1326
|
end # -- end of element --
|
1325
|
-
|
1327
|
+
|
1326
1328
|
|
1327
1329
|
class CData
|
1328
|
-
|
1330
|
+
|
1329
1331
|
def initialize(val='')
|
1330
1332
|
@value = val
|
1331
1333
|
end
|
1332
|
-
|
1334
|
+
|
1333
1335
|
def clone()
|
1334
1336
|
CData.new(@value)
|
1335
1337
|
end
|
1336
|
-
|
1338
|
+
|
1337
1339
|
def inspect()
|
1338
1340
|
@value.inspect
|
1339
1341
|
end
|
1340
|
-
|
1342
|
+
|
1341
1343
|
def print()
|
1342
1344
|
"<![CDATA[%s]]>" % @value
|
1343
1345
|
end
|
1344
|
-
|
1346
|
+
|
1345
1347
|
def to_s()
|
1346
1348
|
@value
|
1347
1349
|
end
|
1348
|
-
|
1350
|
+
|
1349
1351
|
def unescape()
|
1350
1352
|
s = @value.clone
|
1351
1353
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1352
1354
|
s
|
1353
|
-
end
|
1354
|
-
|
1355
|
+
end
|
1356
|
+
|
1355
1357
|
end
|
1356
|
-
|
1357
|
-
|
1358
|
+
|
1359
|
+
|
1358
1360
|
class Comment
|
1359
|
-
|
1361
|
+
|
1360
1362
|
|
1361
1363
|
def initialize(val='')
|
1362
1364
|
@e = Element.new('_').add_text val
|
1363
1365
|
@value = val
|
1364
1366
|
end
|
1365
|
-
|
1367
|
+
|
1366
1368
|
def add_element(e2)
|
1367
1369
|
@e.add e2
|
1368
1370
|
end
|
1369
|
-
|
1371
|
+
|
1370
1372
|
def add_text(t)
|
1371
1373
|
@e.add_text t
|
1372
1374
|
end
|
1373
|
-
|
1375
|
+
|
1374
1376
|
def inspect()
|
1375
1377
|
@value
|
1376
1378
|
end
|
1377
|
-
|
1379
|
+
|
1378
1380
|
def print()
|
1379
1381
|
"<!--%s-->" % @e.root.xpath('//./text()').join
|
1380
1382
|
end
|
1381
|
-
|
1383
|
+
|
1382
1384
|
def texts()
|
1383
1385
|
@e.texts
|
1384
1386
|
end
|
1385
|
-
|
1387
|
+
|
1386
1388
|
def to_s()
|
1387
1389
|
@value
|
1388
1390
|
end
|
1389
1391
|
end
|
1390
|
-
|
1392
|
+
|
1391
1393
|
class Elements
|
1392
1394
|
include Enumerable
|
1393
|
-
|
1395
|
+
|
1394
1396
|
def initialize(elements=[])
|
1395
1397
|
super()
|
1396
1398
|
@elements = elements
|
@@ -1402,63 +1404,63 @@ class Rexle
|
|
1402
1404
|
i = raw_i - 1
|
1403
1405
|
@elements[i]
|
1404
1406
|
end
|
1405
|
-
|
1407
|
+
|
1406
1408
|
def each(&blk) @elements.each(&blk) end
|
1407
1409
|
def empty?() @elements.empty? end
|
1408
|
-
|
1410
|
+
|
1409
1411
|
def index(e=nil, &blk)
|
1410
|
-
|
1412
|
+
|
1411
1413
|
if block_given? then
|
1412
1414
|
@elements.index(&blk)
|
1413
1415
|
else
|
1414
1416
|
@elements.index e
|
1415
1417
|
end
|
1416
1418
|
end
|
1417
|
-
|
1419
|
+
|
1418
1420
|
def last() @elements.last end
|
1419
1421
|
def length() @elements.length end
|
1420
1422
|
def to_a() @elements end
|
1421
|
-
|
1423
|
+
|
1422
1424
|
end # -- end of elements --
|
1423
1425
|
|
1424
1426
|
|
1425
1427
|
def parse(x=nil)
|
1426
|
-
|
1428
|
+
|
1427
1429
|
a = []
|
1428
|
-
|
1430
|
+
|
1429
1431
|
if x then
|
1430
1432
|
procs = {
|
1431
1433
|
String: proc {|x| parse_string(x)},
|
1432
1434
|
Array: proc {|x| x}
|
1433
1435
|
}
|
1434
1436
|
a = procs[x.class.to_s.to_sym].call(x)
|
1435
|
-
else
|
1437
|
+
else
|
1436
1438
|
a = yield
|
1437
1439
|
end
|
1438
|
-
|
1440
|
+
|
1439
1441
|
doc_node = ['doc',Attributes.new]
|
1440
1442
|
@a = procs[x.class.to_s.to_sym].call(x)
|
1441
1443
|
@doc = scan_element(*(doc_node << @a))
|
1442
|
-
|
1444
|
+
|
1443
1445
|
self
|
1444
1446
|
end
|
1445
1447
|
|
1446
1448
|
def add_attribute(x) @doc.attribute(x) end
|
1447
1449
|
def attribute(key) @doc.attribute(key) end
|
1448
1450
|
def attributes() @doc.attributes end
|
1449
|
-
|
1450
|
-
def add_element(element)
|
1451
1451
|
|
1452
|
-
|
1452
|
+
def add_element(element)
|
1453
|
+
|
1454
|
+
if @doc then
|
1453
1455
|
raise 'attempted adding second root element to document' if @doc.root
|
1454
|
-
@doc.root.add_element(element)
|
1456
|
+
@doc.root.add_element(element)
|
1455
1457
|
else
|
1456
|
-
doc_node = ['doc', Attributes.new, element.to_a]
|
1457
|
-
@doc = scan_element(*doc_node)
|
1458
|
+
doc_node = ['doc', Attributes.new, element.to_a]
|
1459
|
+
@doc = scan_element(*doc_node)
|
1458
1460
|
end
|
1459
1461
|
element
|
1460
1462
|
end
|
1461
|
-
|
1463
|
+
|
1462
1464
|
def add_text(s) end
|
1463
1465
|
|
1464
1466
|
alias add add_element
|
@@ -1468,26 +1470,26 @@ class Rexle
|
|
1468
1470
|
@doc.xpath(xpath).each {|e| e.delete; e = nil }
|
1469
1471
|
|
1470
1472
|
end
|
1471
|
-
|
1473
|
+
|
1472
1474
|
alias remove delete
|
1473
1475
|
|
1474
|
-
def element(xpath) self.xpath(xpath).first end
|
1476
|
+
def element(xpath) self.xpath(xpath).first end
|
1475
1477
|
def elements(s=nil) @doc.elements(s) end
|
1476
1478
|
def name() @doc.root.name end
|
1477
1479
|
def to_a() @a end
|
1478
|
-
|
1479
|
-
def to_s(options={})
|
1480
|
+
|
1481
|
+
def to_s(options={})
|
1480
1482
|
return '<UNDEFINED/>' unless @doc
|
1481
|
-
self.xml options
|
1483
|
+
self.xml options
|
1482
1484
|
end
|
1483
|
-
|
1485
|
+
|
1484
1486
|
def text(xpath) @doc.text(xpath) end
|
1485
|
-
def root()
|
1486
|
-
@doc.elements.first
|
1487
|
+
def root()
|
1488
|
+
@doc.elements.first
|
1487
1489
|
end
|
1488
1490
|
|
1489
|
-
def write(f)
|
1490
|
-
f.write xml
|
1491
|
+
def write(f)
|
1492
|
+
f.write xml
|
1491
1493
|
end
|
1492
1494
|
|
1493
1495
|
def xml(options={})
|
@@ -1516,14 +1518,14 @@ class Rexle
|
|
1516
1518
|
private
|
1517
1519
|
|
1518
1520
|
def parse_rexle(x)
|
1519
|
-
|
1521
|
+
|
1520
1522
|
rp = RexleParser.new(x)
|
1521
1523
|
a = rp.to_a
|
1522
1524
|
|
1523
1525
|
@instructions = rp.instructions
|
1524
|
-
return a
|
1526
|
+
return a
|
1525
1527
|
end
|
1526
|
-
|
1528
|
+
|
1527
1529
|
def parse_string(x)
|
1528
1530
|
|
1529
1531
|
# check if the XML string is a dynarex document
|
@@ -1539,50 +1541,50 @@ class Rexle
|
|
1539
1541
|
'polyrex' => proc {|x| parse_rexle(x)}
|
1540
1542
|
}
|
1541
1543
|
other_parser = procs[recordx_type]
|
1542
|
-
|
1544
|
+
|
1543
1545
|
if other_parser then
|
1544
|
-
|
1546
|
+
|
1545
1547
|
begin
|
1546
1548
|
other_parser.call(x)
|
1547
1549
|
rescue
|
1548
1550
|
parse_rexle x
|
1549
1551
|
end
|
1550
|
-
|
1552
|
+
|
1551
1553
|
else
|
1552
|
-
|
1554
|
+
|
1553
1555
|
parse_rexle x
|
1554
|
-
|
1555
|
-
end
|
1556
|
-
|
1556
|
+
|
1557
|
+
end
|
1558
|
+
|
1557
1559
|
else
|
1558
1560
|
|
1559
1561
|
parse_rexle x
|
1560
|
-
|
1562
|
+
|
1561
1563
|
end
|
1562
1564
|
else
|
1563
1565
|
|
1564
1566
|
parse_rexle x
|
1565
|
-
|
1567
|
+
|
1566
1568
|
end
|
1567
1569
|
|
1568
1570
|
end
|
1569
|
-
|
1571
|
+
|
1570
1572
|
def scan_element(name=nil, attributes=nil, *children)
|
1571
|
-
|
1573
|
+
|
1572
1574
|
return unless name
|
1573
|
-
|
1575
|
+
|
1574
1576
|
return Rexle::CData.new(children.first) if name == '!['
|
1575
1577
|
return Rexle::Comment.new(children.first) if name == '!-'
|
1576
1578
|
|
1577
|
-
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1579
|
+
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1578
1580
|
|
1579
1581
|
if children then
|
1580
1582
|
|
1581
1583
|
children.each do |x4|
|
1582
|
-
|
1584
|
+
|
1583
1585
|
|
1584
1586
|
if x4.is_a? Array then
|
1585
|
-
element.add_element scan_element(*x4)
|
1587
|
+
element.add_element scan_element(*x4)
|
1586
1588
|
elsif x4.is_a? String then
|
1587
1589
|
|
1588
1590
|
e = if x4.is_a? String then
|
@@ -1591,22 +1593,22 @@ class Rexle
|
|
1591
1593
|
elsif x4.name == '![' then
|
1592
1594
|
|
1593
1595
|
Rexle::CData.new(x4)
|
1594
|
-
|
1596
|
+
|
1595
1597
|
elsif x4.name == '!-' then
|
1596
1598
|
|
1597
1599
|
Rexle::Comment.new(x4)
|
1598
|
-
|
1600
|
+
|
1599
1601
|
end
|
1600
1602
|
|
1601
1603
|
element.add_element e
|
1602
1604
|
end
|
1603
1605
|
end
|
1604
1606
|
end
|
1605
|
-
|
1607
|
+
|
1606
1608
|
return element
|
1607
1609
|
end
|
1608
1610
|
|
1609
|
-
|
1611
|
+
|
1610
1612
|
# scan a rexml doc
|
1611
1613
|
#
|
1612
1614
|
def scan_doc(node)
|
@@ -1614,28 +1616,28 @@ class Rexle
|
|
1614
1616
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
1615
1617
|
[node.name, node.text.to_s, attributes, *children]
|
1616
1618
|
end
|
1617
|
-
|
1619
|
+
|
1618
1620
|
class Recordset < Array
|
1619
1621
|
|
1620
1622
|
def initialize(a)
|
1621
1623
|
super(a)
|
1622
1624
|
end
|
1623
|
-
|
1625
|
+
|
1624
1626
|
def to_doc(root: 'root')
|
1625
|
-
|
1627
|
+
|
1626
1628
|
recordset = self.map(&:to_a)
|
1627
1629
|
Rexle.new([root,{}, *recordset])
|
1628
|
-
|
1630
|
+
|
1629
1631
|
end
|
1630
|
-
|
1632
|
+
|
1631
1633
|
def xpath(xpath)
|
1632
1634
|
self.to_doc.root.xpath(xpath)
|
1633
1635
|
end
|
1634
|
-
|
1636
|
+
|
1635
1637
|
def element(xpath)
|
1636
1638
|
self.to_doc.root.element(xpath)
|
1637
1639
|
end
|
1638
1640
|
|
1639
|
-
end
|
1640
|
-
|
1641
|
+
end
|
1642
|
+
|
1641
1643
|
end
|