rexle 1.5.10 → 1.5.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexle.rb +374 -349
- data.tar.gz.sig +0 -0
- metadata +11 -11
- metadata.gz.sig +0 -0
data/lib/rexle.rb
CHANGED
@@ -13,32 +13,40 @@ require 'backtrack-xpath'
|
|
13
13
|
|
14
14
|
# modifications:
|
15
15
|
|
16
|
-
#
|
16
|
+
# 15-Jan-2022: bug fix: Related to previous bug fix; Detecting
|
17
|
+
# Attributes::Value is now performed instead of
|
18
|
+
# a String object
|
19
|
+
# 14-Jan-2022: bug fix: Related to previous bug fix; Unescape is
|
20
|
+
# now only applied to objects of type Attributes::Value
|
21
|
+
# 01-Jan-2022: bug fix: Attribute values are no longer unescaped when
|
22
|
+
# called from Rexle#xml
|
23
|
+
# 03-Apr-2021: bug fix: Using *to_a* a CDATA element if present is now output
|
24
|
+
# 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil.
|
17
25
|
# 11-Sep-2020: feature: Rexle::Element#text now has unescaped HTML using CGI
|
18
26
|
# 30-Jul-2020: minor improvement: #plaintext now unescapes & to &
|
19
27
|
# 11-May-2020: bug fix: Rexle#css now responds correctly to valid selectors
|
20
28
|
# 23-Apr-2020: feature: Added public method *plaintext*.
|
21
29
|
# 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag
|
22
|
-
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
23
|
-
# 09-Jul-2019: minor improvement: A comment tag now has a
|
30
|
+
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
31
|
+
# 09-Jul-2019: minor improvement: A comment tag now has a
|
24
32
|
# new line when pretty printed
|
25
33
|
# 02-Feb-2019: feature: A comment tag can now have nested elements
|
26
34
|
# 03-Nov-2018: feature: Debug messages can now use coloured text
|
27
35
|
# 02-Oct-2018: feature: Added Rexle::Elements#last
|
28
|
-
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
29
|
-
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
36
|
+
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
37
|
+
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
30
38
|
# xpath contains an *and* operator
|
31
|
-
# 14-Sep-2017: improvement: An *and* operator can now be
|
39
|
+
# 14-Sep-2017: improvement: An *and* operator can now be
|
32
40
|
# used between xpath statements
|
33
41
|
# 10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id]
|
34
|
-
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
35
|
-
# track of the working document when elements are passed to
|
42
|
+
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
43
|
+
# track of the working document when elements are passed to
|
36
44
|
# different documents
|
37
45
|
# bug fix: Element prefixes are now only processed if they exist
|
38
|
-
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
46
|
+
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
39
47
|
# Rexle::Element#next_sibling and Rexle::Element#previous_sibling bugs
|
40
|
-
# 25-Feb-2017: improvement:
|
41
|
-
# An input rexle array can now have an empty array for
|
48
|
+
# 25-Feb-2017: improvement:
|
49
|
+
# An input rexle array can now have an empty array for
|
42
50
|
# children e.g. doc = Rexle.new(["records", {}, "", []])
|
43
51
|
# 25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer
|
44
52
|
|
@@ -52,7 +60,14 @@ module XMLhelper
|
|
52
60
|
scan_print(children).join.force_encoding("utf-8")
|
53
61
|
|
54
62
|
a = self.root.attributes.to_a.map do |k,v|
|
55
|
-
|
63
|
+
|
64
|
+
val = if v.is_a?(Array) then
|
65
|
+
v.join(' ')
|
66
|
+
else
|
67
|
+
v.is_a?(Attributes::Value) ? v.to_s(unescape: false) : v
|
68
|
+
end
|
69
|
+
|
70
|
+
"%s='%s'" % [k, val]
|
56
71
|
end
|
57
72
|
|
58
73
|
xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \
|
@@ -60,7 +75,7 @@ module XMLhelper
|
|
60
75
|
|
61
76
|
if self.instructions and declaration then
|
62
77
|
processing_instructions() + xml
|
63
|
-
else
|
78
|
+
else
|
64
79
|
xml
|
65
80
|
end
|
66
81
|
end
|
@@ -69,43 +84,43 @@ module XMLhelper
|
|
69
84
|
|
70
85
|
body = pretty_print(children,2).join
|
71
86
|
|
72
|
-
a = self.root.attributes.to_a.map do |k,v|
|
73
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
87
|
+
a = self.root.attributes.to_a.map do |k,v|
|
88
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)]
|
74
89
|
end
|
75
|
-
|
76
|
-
ind = "\n "
|
90
|
+
|
91
|
+
ind = "\n "
|
77
92
|
xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \
|
78
93
|
' ' + a.join(' '), ind, body, "\n", self.root.name]
|
79
94
|
|
80
95
|
if self.instructions and declaration then
|
81
96
|
processing_instructions("") + xml
|
82
|
-
else
|
97
|
+
else
|
83
98
|
xml
|
84
99
|
end
|
85
100
|
end
|
86
|
-
|
87
|
-
def inspect()
|
101
|
+
|
102
|
+
def inspect()
|
88
103
|
"#<Rexle:%s>" % [self.object_id]
|
89
104
|
end
|
90
105
|
|
91
106
|
def processing_instructions(s='')
|
92
107
|
self.instructions.map do |instruction|
|
93
|
-
"<?%s?>\n" % instruction.join(' ')
|
108
|
+
"<?%s?>\n" % instruction.join(' ')
|
94
109
|
end.join s
|
95
110
|
end
|
96
111
|
|
97
112
|
def scan_print(nodes)
|
98
113
|
|
99
114
|
r2 = nodes.map do |x|
|
100
|
-
|
115
|
+
|
101
116
|
r = if x.is_a? Rexle::Element then
|
102
117
|
|
103
|
-
a = x.attributes.to_a.map do |k,v|
|
118
|
+
a = x.attributes.to_a.map do |k,v|
|
104
119
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
105
120
|
end
|
106
121
|
|
107
122
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
108
|
-
|
123
|
+
|
109
124
|
non_self_closing_tags = %w(script textarea iframe div object a)
|
110
125
|
|
111
126
|
if (x.children and x.children.length > 0 \
|
@@ -119,20 +134,20 @@ module XMLhelper
|
|
119
134
|
else
|
120
135
|
out = ["<%s/>" % tag]
|
121
136
|
end
|
122
|
-
|
137
|
+
|
123
138
|
elsif x.is_a? String then x
|
124
|
-
elsif x.is_a? Rexle::CData then x.print
|
125
|
-
elsif x.is_a? Rexle::Comment then x.print
|
126
|
-
|
139
|
+
elsif x.is_a? Rexle::CData then x.print
|
140
|
+
elsif x.is_a? Rexle::Comment then x.print
|
141
|
+
|
127
142
|
end
|
128
143
|
|
129
144
|
r
|
130
145
|
end
|
131
|
-
|
146
|
+
|
132
147
|
r2
|
133
148
|
|
134
149
|
end
|
135
|
-
|
150
|
+
|
136
151
|
def scan_to_a(nodes)
|
137
152
|
|
138
153
|
nodes.inject([]) do |r,x|
|
@@ -141,6 +156,10 @@ module XMLhelper
|
|
141
156
|
|
142
157
|
a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]
|
143
158
|
|
159
|
+
if x.cdatas.any? then
|
160
|
+
a.concat x.cdatas.map {|cdata| ['![', {}, cdata] }
|
161
|
+
end
|
162
|
+
|
144
163
|
(a.concat(scan_to_a(x.children))) if x.children.length > 1
|
145
164
|
r << a
|
146
165
|
elsif x.is_a? String then
|
@@ -151,7 +170,7 @@ module XMLhelper
|
|
151
170
|
end
|
152
171
|
|
153
172
|
end
|
154
|
-
|
173
|
+
|
155
174
|
|
156
175
|
|
157
176
|
def pretty_print(nodes, indent='0')
|
@@ -164,13 +183,13 @@ module XMLhelper
|
|
164
183
|
|
165
184
|
if x.is_a? Rexle::Element then
|
166
185
|
|
167
|
-
a = x.attributes.to_a.map do |k,v|
|
186
|
+
a = x.attributes.to_a.map do |k,v|
|
168
187
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
169
188
|
end
|
170
189
|
a ||= []
|
171
190
|
|
172
191
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
173
|
-
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
192
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
174
193
|
|
175
194
|
if (x.value and x.value.length > 0) \
|
176
195
|
or (x.children and x.children.length > 0 \
|
@@ -178,13 +197,13 @@ module XMLhelper
|
|
178
197
|
x.name == 'script' or x.name == 'textarea' or \
|
179
198
|
x.name == 'iframe' then
|
180
199
|
|
181
|
-
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
200
|
+
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
182
201
|
("\n" + ' ' * indent) : ''
|
183
|
-
|
202
|
+
|
184
203
|
out = ["%s<%s>%s" % [start, tag, ind1]]
|
185
|
-
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
204
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
186
205
|
ind2 = (ind1 and ind1.length > 0) ? ("\n" + ' ' * (indent - 1)) : ''
|
187
|
-
out << "%s</%s>" % [ind2, x.name]
|
206
|
+
out << "%s</%s>" % [ind2, x.name]
|
188
207
|
else
|
189
208
|
|
190
209
|
out = ["%s<%s/>" % [start, tag]]
|
@@ -192,8 +211,8 @@ module XMLhelper
|
|
192
211
|
|
193
212
|
|
194
213
|
elsif x.is_a? String then x.sub(/^[\n\s]+$/,'')
|
195
|
-
elsif x.is_a? Rexle::CData then x.print
|
196
|
-
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
214
|
+
elsif x.is_a? Rexle::CData then x.print
|
215
|
+
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
197
216
|
|
198
217
|
end
|
199
218
|
end
|
@@ -208,17 +227,17 @@ class Rexle
|
|
208
227
|
|
209
228
|
attr_reader :prefixes, :doctype
|
210
229
|
attr_accessor :instructions
|
211
|
-
|
230
|
+
|
212
231
|
def initialize(x=nil, rexle: self, debug: false)
|
213
232
|
|
214
233
|
@rexle, @debug = rexle, debug
|
215
234
|
$debug = @debug
|
216
|
-
|
235
|
+
|
217
236
|
puts 'inside Rexle'.debug if debug
|
218
|
-
|
237
|
+
|
219
238
|
super()
|
220
239
|
|
221
|
-
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
240
|
+
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
222
241
|
@doctype = :xml
|
223
242
|
|
224
243
|
# what type of input is it? Is it a string, array
|
@@ -228,11 +247,11 @@ class Rexle
|
|
228
247
|
Array: proc {|x| x},
|
229
248
|
RexleParser: ->(x){ parse_rexle(x)}
|
230
249
|
}
|
231
|
-
|
250
|
+
|
232
251
|
doc_node = ['doc', Attributes.new]
|
233
|
-
|
252
|
+
|
234
253
|
@a = procs[x.class.to_s.to_sym].call(x)
|
235
|
-
|
254
|
+
|
236
255
|
@doc = scan_element(*(doc_node << @a))
|
237
256
|
|
238
257
|
# fetch the namespaces
|
@@ -243,78 +262,78 @@ class Rexle
|
|
243
262
|
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
244
263
|
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
245
264
|
end
|
246
|
-
|
265
|
+
|
247
266
|
end
|
248
267
|
|
249
268
|
end
|
250
|
-
|
269
|
+
|
251
270
|
def clone()
|
252
271
|
Rexle.new self.to_a
|
253
272
|
end
|
254
|
-
|
273
|
+
|
255
274
|
def at_css(selector)
|
256
275
|
@doc.root.element RexleCSS.new(selector).to_xpath
|
257
|
-
end
|
258
|
-
|
276
|
+
end
|
277
|
+
|
259
278
|
def css(selector)
|
260
|
-
|
261
|
-
a = selector.split(',').flat_map do |x|
|
279
|
+
|
280
|
+
a = selector.split(',').flat_map do |x|
|
262
281
|
@doc.root.xpath RexleCSS.new(x).to_xpath
|
263
282
|
end
|
264
|
-
|
283
|
+
|
265
284
|
return a
|
266
285
|
end
|
267
|
-
|
286
|
+
|
268
287
|
def xpath(path, &blk)
|
269
288
|
@doc.xpath(path, &blk)
|
270
|
-
end
|
289
|
+
end
|
271
290
|
|
272
291
|
class Element
|
273
292
|
include XMLhelper
|
274
|
-
|
293
|
+
|
275
294
|
class Value < String
|
276
|
-
|
295
|
+
|
277
296
|
def initialize(value)
|
278
297
|
super(value)
|
279
298
|
end
|
280
|
-
|
299
|
+
|
281
300
|
def <(val2)
|
282
301
|
self.to_f < val2.to_f
|
283
|
-
end
|
284
|
-
|
302
|
+
end
|
303
|
+
|
285
304
|
def >(val2)
|
286
305
|
self.to_f > val2.to_f
|
287
|
-
end
|
288
|
-
end
|
289
|
-
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
290
309
|
class Attribute
|
291
|
-
|
310
|
+
|
292
311
|
attr_reader :value
|
293
|
-
|
312
|
+
|
294
313
|
def initialize(value)
|
295
314
|
@value = value
|
296
315
|
end
|
297
|
-
|
316
|
+
|
298
317
|
def to_f()
|
299
318
|
@value.to_f
|
300
|
-
end
|
301
|
-
|
319
|
+
end
|
320
|
+
|
302
321
|
def to_i()
|
303
322
|
@value.to_i
|
304
323
|
end
|
305
|
-
|
324
|
+
|
306
325
|
alias to_s value
|
307
|
-
|
326
|
+
|
308
327
|
end
|
309
|
-
|
328
|
+
|
310
329
|
attr_accessor :name, :value, :parent
|
311
330
|
attr_reader :child_elements, :doc_id, :instructions
|
312
|
-
|
331
|
+
|
313
332
|
alias original_clone clone
|
314
333
|
|
315
334
|
def initialize(name=nil, value: nil, attributes: Attributes.new, rexle: self)
|
316
335
|
|
317
|
-
@rexle = rexle
|
336
|
+
@rexle = rexle
|
318
337
|
super()
|
319
338
|
|
320
339
|
@name, @attributes = name.to_s, attributes
|
@@ -324,11 +343,11 @@ class Rexle
|
|
324
343
|
self.add_text value if value
|
325
344
|
|
326
345
|
end
|
327
|
-
|
346
|
+
|
328
347
|
def backtrack(use_attributes: true)
|
329
348
|
BacktrackXPath.new(self, use_attributes: use_attributes)
|
330
349
|
end
|
331
|
-
|
350
|
+
|
332
351
|
def cdata?()
|
333
352
|
self.is_a? CData
|
334
353
|
end
|
@@ -336,8 +355,8 @@ class Rexle
|
|
336
355
|
def contains(raw_args)
|
337
356
|
|
338
357
|
path, raw_val = raw_args.split(',',2)
|
339
|
-
val = raw_val.strip[/^["']?.*["']?$/]
|
340
|
-
|
358
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
359
|
+
|
341
360
|
anode = query_xpath(path)
|
342
361
|
|
343
362
|
return [false] if anode.nil? or anode.empty?
|
@@ -346,21 +365,21 @@ class Rexle
|
|
346
365
|
r = [a.grep(/#{val.sub(/^["'](.*)["']$/,'\1')}/).length > 0]
|
347
366
|
|
348
367
|
r.any?
|
349
|
-
end
|
350
|
-
|
368
|
+
end
|
369
|
+
|
351
370
|
def count(path)
|
352
371
|
length = query_xpath(path).flatten.compact.length
|
353
372
|
length
|
354
373
|
end
|
355
|
-
|
374
|
+
|
356
375
|
def current()
|
357
376
|
self
|
358
377
|
end
|
359
378
|
|
360
379
|
def at_css(selector)
|
361
380
|
self.root.element RexleCSS.new(selector).to_xpath
|
362
|
-
end
|
363
|
-
|
381
|
+
end
|
382
|
+
|
364
383
|
def css(selector)
|
365
384
|
|
366
385
|
selector.split(',')\
|
@@ -371,76 +390,76 @@ class Rexle
|
|
371
390
|
def lowercase(s)
|
372
391
|
|
373
392
|
end
|
374
|
-
|
375
|
-
def max(path)
|
393
|
+
|
394
|
+
def max(path)
|
376
395
|
a = query_xpath(path).flatten.select{|x| x.is_a? String or x.is_a? Rexle::Element::Attribute}.map(&:to_i)
|
377
|
-
a.max
|
396
|
+
a.max
|
378
397
|
end
|
379
|
-
|
398
|
+
|
380
399
|
def name()
|
381
|
-
|
400
|
+
|
382
401
|
if @rexle and @rexle.respond_to? :prefixes then
|
383
|
-
|
402
|
+
|
384
403
|
if @rexle.prefixes.is_a? Array then
|
385
|
-
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
404
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
386
405
|
end
|
387
|
-
|
406
|
+
|
388
407
|
prefix ? @name.sub(prefix + ':', '') : @name
|
389
|
-
|
408
|
+
|
390
409
|
else
|
391
410
|
@name
|
392
411
|
end
|
393
|
-
|
412
|
+
|
394
413
|
end
|
395
|
-
|
396
|
-
def next_element()
|
414
|
+
|
415
|
+
def next_element()
|
397
416
|
|
398
417
|
id = self.object_id
|
399
|
-
a = self.parent.elements
|
418
|
+
a = self.parent.elements
|
400
419
|
|
401
420
|
i = a.index {|x| x.object_id == id} + 2
|
402
421
|
a[i] if i < a.length + 1
|
403
|
-
|
422
|
+
|
404
423
|
end
|
405
|
-
|
424
|
+
|
406
425
|
alias next_sibling next_element
|
407
|
-
|
426
|
+
|
408
427
|
def not(bool)
|
409
428
|
|
410
429
|
r = self.xpath(bool).any?
|
411
430
|
|
412
431
|
!r
|
413
432
|
end
|
414
|
-
|
415
|
-
def previous_element()
|
416
|
-
|
433
|
+
|
434
|
+
def previous_element()
|
435
|
+
|
417
436
|
id = self.object_id
|
418
|
-
a = self.parent.elements
|
437
|
+
a = self.parent.elements
|
419
438
|
i = a.index {|x| x.object_id == id}
|
420
439
|
|
421
|
-
a[i] if i > 0
|
440
|
+
a[i] if i > 0
|
422
441
|
|
423
442
|
end
|
424
|
-
|
443
|
+
|
425
444
|
alias previous_sibling previous_element
|
426
|
-
|
445
|
+
|
427
446
|
def xpath(path, rlist=[], &blk)
|
428
|
-
|
447
|
+
|
429
448
|
#@log.debug 'inside xpath ' + path.inspect
|
430
449
|
|
431
450
|
r = filter_xpath(path, rlist=[], &blk)
|
432
451
|
#@log.debug 'after filter_xpath : ' + r.inspect
|
433
|
-
|
452
|
+
|
434
453
|
if r.is_a?(Array) then
|
435
|
-
|
454
|
+
|
436
455
|
Recordset.new(r.compact)
|
437
|
-
|
456
|
+
|
438
457
|
else
|
439
458
|
r
|
440
459
|
end
|
441
|
-
|
460
|
+
|
442
461
|
end
|
443
|
-
|
462
|
+
|
444
463
|
def filter_xpath(raw_path, rlist=[], &blk)
|
445
464
|
#@log.debug 'inside filter_xpath : ' + raw_path.inspect
|
446
465
|
path = String.new raw_path
|
@@ -450,25 +469,25 @@ class Rexle
|
|
450
469
|
#fn_match = path.match(/^(\w+)\(/)
|
451
470
|
#@log.debug 'fn_match : ' + fn_match.inspect
|
452
471
|
end_fn_match = path.slice!(/\[\w+\(\)\]$/)
|
453
|
-
|
472
|
+
|
454
473
|
if end_fn_match then
|
455
|
-
|
474
|
+
|
456
475
|
m = end_fn_match[1..-4]
|
457
476
|
#@log.debug 'its a function'
|
458
477
|
[method(m.to_sym).call(xpath path)]
|
459
|
-
|
460
|
-
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
478
|
+
|
479
|
+
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
461
480
|
|
462
481
|
procs = {
|
463
482
|
|
464
|
-
Array: proc { |x|
|
465
|
-
if block_given? then
|
466
|
-
x.flatten(1)
|
483
|
+
Array: proc { |x|
|
484
|
+
if block_given? then
|
485
|
+
x.flatten(1)
|
467
486
|
else
|
468
487
|
rs = x.flatten
|
469
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
488
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
470
489
|
end
|
471
|
-
},
|
490
|
+
},
|
472
491
|
String: proc {|x| x},
|
473
492
|
Hash: proc {|x| x},
|
474
493
|
TrueClass: proc{|x| x},
|
@@ -477,25 +496,25 @@ class Rexle
|
|
477
496
|
}
|
478
497
|
bucket = []
|
479
498
|
raw_results = path.split('|').map do |xp|
|
480
|
-
query_xpath(xp.strip, bucket, &blk)
|
499
|
+
query_xpath(xp.strip, bucket, &blk)
|
481
500
|
end
|
482
|
-
|
501
|
+
|
483
502
|
results = raw_results
|
484
503
|
|
485
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
486
|
-
|
504
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
505
|
+
|
487
506
|
elsif fn_match.nil?
|
488
|
-
|
507
|
+
|
489
508
|
procs = {
|
490
509
|
|
491
|
-
Array: proc { |x|
|
492
|
-
if block_given? then
|
493
|
-
x.flatten(1)
|
510
|
+
Array: proc { |x|
|
511
|
+
if block_given? then
|
512
|
+
x.flatten(1)
|
494
513
|
else
|
495
514
|
rs = x.flatten
|
496
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
515
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
497
516
|
end
|
498
|
-
},
|
517
|
+
},
|
499
518
|
String: proc {|x| x},
|
500
519
|
Hash: proc {|x| x},
|
501
520
|
TrueClass: proc{|x| x},
|
@@ -503,23 +522,23 @@ class Rexle
|
|
503
522
|
:"Rexle::Element" => proc {|x| [x]}
|
504
523
|
}
|
505
524
|
bucket = []
|
506
|
-
|
525
|
+
|
507
526
|
results = if path =~ /[\[]|\(/ then
|
508
527
|
|
509
528
|
raw_results = path.split(/\|/).map do |xp|
|
510
|
-
query_xpath(xp.strip, bucket, &blk)
|
529
|
+
query_xpath(xp.strip, bucket, &blk)
|
511
530
|
end
|
512
531
|
|
513
532
|
raw_results.flatten.index(true) ? [true] : []
|
514
|
-
|
533
|
+
|
515
534
|
else
|
516
535
|
raw_results = path.split(/ *(?:\||\band\b) */).map do |xp|
|
517
|
-
query_xpath(xp.strip, bucket, &blk)
|
518
|
-
end
|
536
|
+
query_xpath(xp.strip, bucket, &blk)
|
537
|
+
end
|
519
538
|
|
520
539
|
if path =~ / and / then
|
521
540
|
|
522
|
-
raw_results.flatten.select {|x| x == true or x == false}
|
541
|
+
raw_results.flatten.select {|x| x == true or x == false}
|
523
542
|
|
524
543
|
else
|
525
544
|
raw_results.flatten.index(true) ? [true] : []
|
@@ -528,12 +547,12 @@ class Rexle
|
|
528
547
|
|
529
548
|
return results if !path[/[><]/] and results.any?
|
530
549
|
results = raw_results # .flatten.select {|x| x}
|
531
|
-
|
532
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
550
|
+
|
551
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
533
552
|
|
534
553
|
else
|
535
|
-
|
536
|
-
m, xpath_value, index = fn_match.captures
|
554
|
+
|
555
|
+
m, xpath_value, index = fn_match.captures
|
537
556
|
|
538
557
|
if m == 'text' then
|
539
558
|
a = texts()
|
@@ -547,14 +566,14 @@ class Rexle
|
|
547
566
|
|
548
567
|
end
|
549
568
|
|
550
|
-
end
|
551
|
-
|
569
|
+
end
|
570
|
+
|
552
571
|
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
553
572
|
|
554
573
|
#@log.debug 'query_xpath : ' + raw_xpath_value.inspect
|
555
574
|
#@log.debug '++ ' + self.xml.inspect
|
556
575
|
|
557
|
-
flag_func = false
|
576
|
+
flag_func = false
|
558
577
|
|
559
578
|
xpath_value = raw_xpath_value.sub('child::','./')
|
560
579
|
|
@@ -578,38 +597,38 @@ class Rexle
|
|
578
597
|
|
579
598
|
raw_condition = raw_condition ? raw_condition + '/' + remaining_path \
|
580
599
|
: remaining_path
|
581
|
-
remaining_path = ''
|
600
|
+
remaining_path = ''
|
582
601
|
end
|
583
602
|
|
584
|
-
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
603
|
+
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
585
604
|
|
586
605
|
if r then
|
587
606
|
a_path = raw_path.split(/(?=\/\/)/,2)
|
588
607
|
else
|
589
608
|
a_path = raw_path.split('/',2)
|
590
609
|
end
|
591
|
-
|
610
|
+
|
592
611
|
condition = raw_condition if a_path.length <= 1 #and not raw_condition[/^\[\w+\(.*\)\]$/]
|
593
612
|
|
594
613
|
if raw_path[0,2] == '//' then
|
595
614
|
s = ''
|
596
|
-
elsif raw_path == 'text()'
|
615
|
+
elsif raw_path == 'text()'
|
597
616
|
|
598
617
|
a_path.shift
|
599
618
|
#return @value
|
600
619
|
return self.texts
|
601
620
|
else
|
602
621
|
|
603
|
-
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
604
|
-
|
622
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
623
|
+
|
605
624
|
return @attributes if attribute == '*'
|
606
|
-
|
625
|
+
|
607
626
|
if attribute and @attributes and \
|
608
627
|
@attributes.has_key?(attribute.to_sym) then
|
609
628
|
return [Attribute.new(@attributes[attribute.to_sym])]
|
610
629
|
end
|
611
630
|
s = a_path.shift
|
612
|
-
end
|
631
|
+
end
|
613
632
|
|
614
633
|
# isolate the xpath to return just the path to the current element
|
615
634
|
|
@@ -625,7 +644,7 @@ class Rexle
|
|
625
644
|
if element_name and element_name[/^\d/] then
|
626
645
|
element_name = nil
|
627
646
|
end
|
628
|
-
|
647
|
+
|
629
648
|
condition = raw_xpath_value if element_name.nil?
|
630
649
|
|
631
650
|
else
|
@@ -661,11 +680,11 @@ class Rexle
|
|
661
680
|
r4 = attribute_search(attr_search, self, self.attributes)
|
662
681
|
return r4
|
663
682
|
end
|
664
|
-
|
665
|
-
|
683
|
+
|
684
|
+
|
666
685
|
return_elements = []
|
667
|
-
|
668
|
-
|
686
|
+
|
687
|
+
|
669
688
|
|
670
689
|
if raw_path[0,2] == '//' then
|
671
690
|
|
@@ -680,25 +699,25 @@ class Rexle
|
|
680
699
|
|
681
700
|
if element_name.is_a? String then
|
682
701
|
ename, raw_selector = (element_name.split('::',2)).reverse
|
683
|
-
|
702
|
+
|
684
703
|
selector = case raw_selector
|
685
704
|
when 'following-sibling' then 1
|
686
705
|
when 'preceding-sibling' then -1
|
687
706
|
end
|
688
|
-
|
707
|
+
|
689
708
|
else
|
690
709
|
ename = element_name
|
691
|
-
end
|
710
|
+
end
|
692
711
|
|
693
712
|
if ename == '..' then
|
694
|
-
|
713
|
+
|
695
714
|
remaining_xpath = raw_path[/\.\.\/(.*)/,1]
|
696
715
|
# select the parent element
|
697
716
|
|
698
717
|
r2 = self.parent.xpath(remaining_xpath)
|
699
718
|
|
700
719
|
return r2
|
701
|
-
|
720
|
+
|
702
721
|
elsif ename == '.'
|
703
722
|
|
704
723
|
remaining_xpath = raw_path[1..-1]
|
@@ -707,41 +726,41 @@ class Rexle
|
|
707
726
|
if xpath_value.length > 0 and xpath_value =~ /\[/ then
|
708
727
|
|
709
728
|
r = eval(attr_search.sub(/^h/,'self.attributes'))
|
710
|
-
return self if r
|
729
|
+
return self if r
|
711
730
|
|
712
731
|
else
|
713
732
|
return self
|
714
733
|
end
|
715
734
|
else
|
716
735
|
return self.xpath(remaining_xpath)
|
717
|
-
end
|
736
|
+
end
|
718
737
|
|
719
738
|
elsif element_name.nil?
|
720
739
|
puts ('attr_search: ' + attr_search.inspect).debug if $debug
|
721
|
-
return eval attr_search
|
740
|
+
return eval attr_search
|
722
741
|
else
|
723
742
|
|
724
743
|
if raw_selector.nil? and ename != element_part then
|
725
744
|
|
726
745
|
right_cond = element_part[/#{ename}(.*)/,1]
|
727
746
|
|
728
|
-
end
|
747
|
+
end
|
729
748
|
|
730
749
|
return_elements = @child_elements.map.with_index.select do |x, i|
|
731
750
|
|
732
751
|
next unless x.is_a? Rexle::Element
|
733
752
|
|
734
753
|
#x.name == ename or (ename == '*')
|
735
|
-
|
754
|
+
|
736
755
|
r10 = ((x.name == ename) or (ename == '*'))
|
737
756
|
|
738
|
-
|
757
|
+
|
739
758
|
|
740
759
|
end
|
741
|
-
|
760
|
+
|
742
761
|
if right_cond then
|
743
|
-
|
744
|
-
|
762
|
+
|
763
|
+
|
745
764
|
r12 = return_elements.map do |x, i|
|
746
765
|
|
747
766
|
if x.text then
|
@@ -751,15 +770,15 @@ class Rexle
|
|
751
770
|
else
|
752
771
|
false
|
753
772
|
end
|
754
|
-
|
773
|
+
|
755
774
|
end
|
756
|
-
|
775
|
+
|
757
776
|
return r12
|
758
|
-
|
759
|
-
end
|
760
|
-
|
777
|
+
|
778
|
+
end
|
779
|
+
|
761
780
|
if selector then
|
762
|
-
ne = return_elements.inject([]) do |r,x|
|
781
|
+
ne = return_elements.inject([]) do |r,x|
|
763
782
|
i = x.last + selector
|
764
783
|
if i >= 0 then
|
765
784
|
r << i
|
@@ -770,17 +789,17 @@ class Rexle
|
|
770
789
|
|
771
790
|
return_elements = ne.map {|x| [@child_elements[x], x] if x}
|
772
791
|
end
|
773
|
-
|
792
|
+
|
774
793
|
|
775
794
|
end
|
776
795
|
end
|
777
|
-
|
796
|
+
|
778
797
|
if return_elements.length > 0 then
|
779
798
|
|
780
799
|
if (a_path + [remaining_path]).join.empty? then
|
781
800
|
|
782
801
|
# pass in a block to the filter if it is function contains?
|
783
|
-
rlist = return_elements.map.with_index do |x,i|
|
802
|
+
rlist = return_elements.map.with_index do |x,i|
|
784
803
|
r5 = filter(x, i+1, attr_search, &blk)
|
785
804
|
|
786
805
|
r5
|
@@ -790,9 +809,9 @@ class Rexle
|
|
790
809
|
|
791
810
|
else
|
792
811
|
|
793
|
-
rlist << return_elements.map.with_index do |x,i|
|
812
|
+
rlist << return_elements.map.with_index do |x,i|
|
794
813
|
|
795
|
-
rtn_element = filter(x, i+1, attr_search) do |e|
|
814
|
+
rtn_element = filter(x, i+1, attr_search) do |e|
|
796
815
|
|
797
816
|
r = e.xpath(a_path.join('/') + raw_condition.to_s \
|
798
817
|
+ remaining_path, &blk)
|
@@ -838,7 +857,7 @@ class Rexle
|
|
838
857
|
rlist,&blk)
|
839
858
|
end
|
840
859
|
end
|
841
|
-
|
860
|
+
|
842
861
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) \
|
843
862
|
or (rlist.length > 1 and rlist[0].is_a? Array)
|
844
863
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
@@ -855,21 +874,21 @@ class Rexle
|
|
855
874
|
elsif item.is_a? Rexle::CData then
|
856
875
|
@child_elements << item
|
857
876
|
elsif item.is_a? Rexle::Comment then
|
858
|
-
@child_elements << item
|
877
|
+
@child_elements << item
|
859
878
|
elsif item.is_a? Rexle::Element then
|
860
879
|
|
861
880
|
@child_elements << item
|
862
881
|
# add a reference from this element (the parent) to the child
|
863
882
|
item.parent = self
|
864
|
-
item
|
865
|
-
|
883
|
+
item
|
884
|
+
|
866
885
|
elsif item.is_a? Rexle then
|
867
886
|
self.add_element(item.root)
|
868
887
|
end
|
869
888
|
|
870
|
-
end
|
889
|
+
end
|
871
890
|
|
872
|
-
def add(item)
|
891
|
+
def add(item)
|
873
892
|
|
874
893
|
if item.is_a? Rexle::Element then
|
875
894
|
|
@@ -895,13 +914,13 @@ class Rexle
|
|
895
914
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
896
915
|
else
|
897
916
|
self.xml
|
898
|
-
end
|
917
|
+
end
|
899
918
|
end
|
900
919
|
|
901
920
|
def add_attribute(*x)
|
902
|
-
|
921
|
+
|
903
922
|
proc_hash = lambda {|x| Hash[*x]}
|
904
|
-
|
923
|
+
|
905
924
|
procs = {
|
906
925
|
Hash: lambda {|x| x[0] || {}},
|
907
926
|
String: proc_hash,
|
@@ -919,57 +938,57 @@ class Rexle
|
|
919
938
|
def add_text(s)
|
920
939
|
|
921
940
|
self.child_elements << s
|
922
|
-
self
|
941
|
+
self
|
923
942
|
end
|
924
|
-
|
925
|
-
def attribute(key)
|
926
|
-
|
943
|
+
|
944
|
+
def attribute(key)
|
945
|
+
|
927
946
|
key = key.to_sym if key.is_a? String
|
928
|
-
|
947
|
+
|
929
948
|
if @attributes[key].is_a? String then
|
930
|
-
@attributes[key].gsub('<','<').gsub('>','>')
|
949
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
931
950
|
else
|
932
951
|
@attributes[key]
|
933
952
|
end
|
934
|
-
end
|
935
|
-
|
936
|
-
def attributes() @attributes end
|
937
|
-
|
953
|
+
end
|
954
|
+
|
955
|
+
def attributes() @attributes end
|
956
|
+
|
938
957
|
def cdatas()
|
939
958
|
self.children.inject([]){|r,x| x.is_a?(Rexle::CData) ? r << x.to_s : r }
|
940
959
|
end
|
941
|
-
|
960
|
+
|
942
961
|
def children()
|
943
962
|
|
944
963
|
r = @child_elements
|
945
|
-
|
964
|
+
|
946
965
|
def r.is_an_empty_string?()
|
947
966
|
self.length == 1 and self.first == ''
|
948
|
-
end
|
949
|
-
|
967
|
+
end
|
968
|
+
|
950
969
|
return r
|
951
|
-
end
|
970
|
+
end
|
952
971
|
|
953
972
|
def children=(a) @child_elements = a if a.is_a? Array end
|
954
|
-
|
973
|
+
|
955
974
|
def deep_clone() Rexle.new(self.xml).root end
|
956
|
-
|
957
|
-
def clone()
|
958
|
-
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
975
|
+
|
976
|
+
def clone()
|
977
|
+
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
959
978
|
end
|
960
|
-
|
979
|
+
|
961
980
|
def delete(obj=nil)
|
962
981
|
|
963
982
|
if obj then
|
964
983
|
|
965
984
|
if obj.is_a? String then
|
966
|
-
|
985
|
+
|
967
986
|
self.xpath(obj).each {|e| e.delete; e = nil}
|
968
|
-
|
987
|
+
|
969
988
|
else
|
970
989
|
|
971
990
|
i = @child_elements.index(obj)
|
972
|
-
[@child_elements].each{|x| x.delete_at i} if i
|
991
|
+
[@child_elements].each{|x| x.delete_at i} if i
|
973
992
|
end
|
974
993
|
else
|
975
994
|
|
@@ -991,50 +1010,50 @@ class Rexle
|
|
991
1010
|
String: proc {|x| @child_elements[x]}
|
992
1011
|
}
|
993
1012
|
|
994
|
-
procs[s.class.to_s.to_sym].call(s)
|
1013
|
+
procs[s.class.to_s.to_sym].call(s)
|
995
1014
|
end
|
996
1015
|
|
997
1016
|
def doc_root() @rexle.root end
|
998
1017
|
def each(&blk) self.children.each(&blk) end
|
999
1018
|
def each_recursive(&blk) recursive_scan(self.children,&blk) end
|
1000
1019
|
alias traverse each_recursive
|
1001
|
-
def has_elements?() !self.elements.empty? end
|
1002
|
-
def insert_after(node) insert(node, 1) end
|
1020
|
+
def has_elements?() !self.elements.empty? end
|
1021
|
+
def insert_after(node) insert(node, 1) end
|
1003
1022
|
def insert_before(node) insert(node) end
|
1004
1023
|
def last(a) a.last end
|
1005
|
-
def map(&blk) self.children.map(&blk) end
|
1006
|
-
|
1024
|
+
def map(&blk) self.children.map(&blk) end
|
1025
|
+
|
1007
1026
|
def plaintext()
|
1008
1027
|
CGI.unescapeHTML xml().gsub(/<\/?[^>]+>/,'').gsub(' ',' ')\
|
1009
1028
|
.gsub(/\n\s+/,' ')
|
1010
1029
|
end
|
1011
|
-
|
1012
|
-
def root() self end
|
1030
|
+
|
1031
|
+
def root() self end
|
1013
1032
|
|
1014
1033
|
def text(s='')
|
1015
|
-
|
1016
|
-
return self.value if s.empty?
|
1017
|
-
|
1034
|
+
|
1035
|
+
return self.value if s.empty?
|
1036
|
+
|
1018
1037
|
e = self.element(s)
|
1019
1038
|
return e if e.is_a? String
|
1020
|
-
|
1039
|
+
|
1021
1040
|
e.value if e
|
1022
1041
|
end
|
1023
|
-
|
1042
|
+
|
1024
1043
|
def texts()
|
1025
1044
|
|
1026
1045
|
r = @child_elements.select do |x|
|
1027
1046
|
x.is_a? String or x.is_a? Rexle::CData
|
1028
1047
|
end
|
1029
|
-
|
1048
|
+
|
1030
1049
|
r.map do |x|
|
1031
1050
|
def x.unescape()
|
1032
1051
|
s = self.to_s.clone
|
1033
1052
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1034
1053
|
s
|
1035
|
-
end
|
1054
|
+
end
|
1036
1055
|
end
|
1037
|
-
|
1056
|
+
|
1038
1057
|
return r
|
1039
1058
|
end
|
1040
1059
|
|
@@ -1042,20 +1061,20 @@ class Rexle
|
|
1042
1061
|
|
1043
1062
|
r = @child_elements.first
|
1044
1063
|
return nil unless r.is_a? String
|
1045
|
-
|
1064
|
+
|
1046
1065
|
def r.unescape()
|
1047
1066
|
s = self.clone
|
1048
1067
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1049
1068
|
s
|
1050
|
-
end
|
1051
|
-
|
1069
|
+
end
|
1070
|
+
|
1052
1071
|
return r
|
1053
1072
|
end
|
1054
|
-
|
1073
|
+
|
1055
1074
|
def value=(raw_s)
|
1056
1075
|
|
1057
1076
|
val = Value.new(raw_s.to_s.clone)
|
1058
|
-
|
1077
|
+
|
1059
1078
|
escape_chars = %w(& & ' ' < < > >).each_slice(2).to_a
|
1060
1079
|
escape_chars.each{|x| val.gsub!(*x)}
|
1061
1080
|
|
@@ -1065,9 +1084,15 @@ class Rexle
|
|
1065
1084
|
end
|
1066
1085
|
|
1067
1086
|
alias text= value=
|
1068
|
-
|
1087
|
+
|
1069
1088
|
def to_a()
|
1089
|
+
|
1070
1090
|
e = [String.new(self.name), Hash.new(self.attributes)]
|
1091
|
+
|
1092
|
+
if self.cdatas.any? then
|
1093
|
+
e.concat self.cdatas.map {|cdata| ['![', {}, cdata] }
|
1094
|
+
end
|
1095
|
+
|
1071
1096
|
[*e, *scan_to_a(self.children)]
|
1072
1097
|
end
|
1073
1098
|
|
@@ -1077,10 +1102,10 @@ class Rexle
|
|
1077
1102
|
Hash: lambda {|x|
|
1078
1103
|
o = {pretty: false}.merge(x)
|
1079
1104
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
1080
|
-
|
1105
|
+
|
1081
1106
|
method(msg).call(self.children)
|
1082
1107
|
},
|
1083
|
-
String: lambda {|x|
|
1108
|
+
String: lambda {|x|
|
1084
1109
|
r = self.element(x)
|
1085
1110
|
r ? r.xml : ''
|
1086
1111
|
}
|
@@ -1093,18 +1118,18 @@ class Rexle
|
|
1093
1118
|
end
|
1094
1119
|
|
1095
1120
|
def prepend(item)
|
1096
|
-
|
1121
|
+
|
1097
1122
|
@child_elements.unshift item
|
1098
|
-
|
1123
|
+
|
1099
1124
|
# add a reference from this element (the parent) to the child
|
1100
1125
|
item.parent = self
|
1101
|
-
item
|
1102
|
-
end
|
1103
|
-
|
1126
|
+
item
|
1127
|
+
end
|
1128
|
+
|
1104
1129
|
alias to_s xml
|
1105
1130
|
|
1106
1131
|
private
|
1107
|
-
|
1132
|
+
|
1108
1133
|
def insert(node,offset=0)
|
1109
1134
|
|
1110
1135
|
i = parent.child_elements.index(self)
|
@@ -1116,7 +1141,7 @@ class Rexle
|
|
1116
1141
|
node.instance_variable_set(:@doc_id, self.doc_root.object_id)
|
1117
1142
|
|
1118
1143
|
self
|
1119
|
-
end
|
1144
|
+
end
|
1120
1145
|
|
1121
1146
|
def format_condition(condition)
|
1122
1147
|
|
@@ -1141,21 +1166,21 @@ class Rexle
|
|
1141
1166
|
elsif raw_items[0][/^not\(/]
|
1142
1167
|
|
1143
1168
|
return raw_items[0]
|
1144
|
-
|
1169
|
+
|
1145
1170
|
else
|
1146
1171
|
|
1147
1172
|
andor_items = raw_items.map.with_index\
|
1148
1173
|
.select{|x,i| x[/\band\b|\bor\b/]}\
|
1149
1174
|
.map{|x| [x.last, x.last + 1]}.flatten
|
1150
|
-
|
1175
|
+
|
1151
1176
|
indices = [0] + andor_items + [raw_items.length]
|
1152
1177
|
|
1153
1178
|
if raw_items[0][0] == '@' then
|
1154
1179
|
|
1155
1180
|
raw_items.each{|x| x.gsub!(/^@/,'')}
|
1156
|
-
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1181
|
+
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1157
1182
|
|
1158
|
-
items = cons_items.map do |x|
|
1183
|
+
items = cons_items.map do |x|
|
1159
1184
|
|
1160
1185
|
if x.length >= 3 then
|
1161
1186
|
if x[0] != 'class' then
|
@@ -1174,17 +1199,17 @@ class Rexle
|
|
1174
1199
|
else
|
1175
1200
|
|
1176
1201
|
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1177
|
-
|
1178
|
-
items = cons_items.map do |x|
|
1202
|
+
|
1203
|
+
items = cons_items.map do |x|
|
1179
1204
|
|
1180
1205
|
if x.length >= 3 then
|
1181
1206
|
|
1182
1207
|
x[1] = '==' if x[1] == '='
|
1183
1208
|
if x[0] != '.' then
|
1184
1209
|
if x[0][/\//] then
|
1185
|
-
|
1210
|
+
|
1186
1211
|
path, value = x.values_at(0,-1)
|
1187
|
-
|
1212
|
+
|
1188
1213
|
if x[0][/@\w+$/] then
|
1189
1214
|
"r = e.xpath('#{path}').first; r and r.value == #{value}"
|
1190
1215
|
else
|
@@ -1208,16 +1233,16 @@ class Rexle
|
|
1208
1233
|
|
1209
1234
|
|
1210
1235
|
end
|
1211
|
-
|
1236
|
+
|
1212
1237
|
def scan_match(node, path)
|
1213
|
-
|
1238
|
+
|
1214
1239
|
if path == '//' then
|
1215
|
-
return [node, node.text,
|
1240
|
+
return [node, node.text,
|
1216
1241
|
node.elements.map {|x| scan_match x, path}]
|
1217
1242
|
end
|
1218
|
-
|
1243
|
+
|
1219
1244
|
r = []
|
1220
|
-
xpath2 = path[2..-1]
|
1245
|
+
xpath2 = path[2..-1]
|
1221
1246
|
#jr150316 xpath2.sub!(/^\*\//,'')
|
1222
1247
|
#jr150316xpath2.sub!(/^\*/,self.name)
|
1223
1248
|
#jr150316xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
@@ -1240,17 +1265,17 @@ class Rexle
|
|
1240
1265
|
end
|
1241
1266
|
a
|
1242
1267
|
end
|
1243
|
-
|
1244
|
-
|
1268
|
+
|
1269
|
+
|
1245
1270
|
def filter(raw_element, i, attr_search, &blk)
|
1246
|
-
|
1271
|
+
|
1247
1272
|
x, index = raw_element
|
1248
1273
|
e = @child_elements[index]
|
1249
1274
|
|
1250
1275
|
return unless e.is_a? Rexle::Element
|
1251
1276
|
name, value = e.name, e.value if e.is_a? Rexle::Element
|
1252
1277
|
|
1253
|
-
h = x.attributes # <-- fetch the attributes
|
1278
|
+
h = x.attributes # <-- fetch the attributes
|
1254
1279
|
|
1255
1280
|
if attr_search then
|
1256
1281
|
|
@@ -1266,21 +1291,21 @@ class Rexle
|
|
1266
1291
|
def attribute_search(attr_search, e, h, i=nil, &blk)
|
1267
1292
|
|
1268
1293
|
r2 = if attr_search.is_a? Integer then
|
1269
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
1294
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
1270
1295
|
elsif attr_search[/i\s(?:<|>|==|%)\s\d+/] and eval(attr_search) then
|
1271
|
-
block_given? ? blk.call(e) : e
|
1296
|
+
block_given? ? blk.call(e) : e
|
1272
1297
|
elsif h and !h.empty? and attr_search[/^h\[/] and eval(attr_search) then
|
1273
1298
|
block_given? ? blk.call(e) : e
|
1274
|
-
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1299
|
+
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1275
1300
|
next unless x.is_a? Rexle::Element
|
1276
1301
|
name, attributes, value = x.name, x.attributes, x.value.to_s
|
1277
1302
|
b = eval(attr_search)
|
1278
1303
|
b}.length > 0
|
1279
1304
|
|
1280
1305
|
block_given? ? blk.call(e) : e
|
1281
|
-
|
1282
|
-
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1283
|
-
block_given? ? blk.call(e) : e
|
1306
|
+
|
1307
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1308
|
+
block_given? ? blk.call(e) : e
|
1284
1309
|
elsif attr_search[/^e\.value/]
|
1285
1310
|
|
1286
1311
|
v = attr_search[/[^\s]+$/]
|
@@ -1294,92 +1319,92 @@ class Rexle
|
|
1294
1319
|
block_given? ? blk.call(e) : e
|
1295
1320
|
elsif attr_search[/^\w*\(/] and e.element(attr_search)
|
1296
1321
|
block_given? ? blk.call(e) : e
|
1297
|
-
end
|
1322
|
+
end
|
1298
1323
|
|
1299
1324
|
r2
|
1300
1325
|
end
|
1301
|
-
|
1326
|
+
|
1302
1327
|
def recursive_scan(nodes, &blk)
|
1303
|
-
|
1328
|
+
|
1304
1329
|
nodes.each do |x|
|
1305
1330
|
|
1306
1331
|
if x.is_a? Rexle::Element then
|
1307
1332
|
blk.call(x)
|
1308
1333
|
recursive_scan(x.children, &blk) if x.children.length > 0
|
1309
|
-
end
|
1334
|
+
end
|
1310
1335
|
end
|
1311
1336
|
end
|
1312
|
-
|
1337
|
+
|
1313
1338
|
end # -- end of element --
|
1314
|
-
|
1339
|
+
|
1315
1340
|
|
1316
1341
|
class CData
|
1317
|
-
|
1342
|
+
|
1318
1343
|
def initialize(val='')
|
1319
1344
|
@value = val
|
1320
1345
|
end
|
1321
|
-
|
1346
|
+
|
1322
1347
|
def clone()
|
1323
1348
|
CData.new(@value)
|
1324
1349
|
end
|
1325
|
-
|
1350
|
+
|
1326
1351
|
def inspect()
|
1327
1352
|
@value.inspect
|
1328
1353
|
end
|
1329
|
-
|
1354
|
+
|
1330
1355
|
def print()
|
1331
1356
|
"<![CDATA[%s]]>" % @value
|
1332
1357
|
end
|
1333
|
-
|
1358
|
+
|
1334
1359
|
def to_s()
|
1335
1360
|
@value
|
1336
1361
|
end
|
1337
|
-
|
1362
|
+
|
1338
1363
|
def unescape()
|
1339
1364
|
s = @value.clone
|
1340
1365
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1341
1366
|
s
|
1342
|
-
end
|
1343
|
-
|
1367
|
+
end
|
1368
|
+
|
1344
1369
|
end
|
1345
|
-
|
1346
|
-
|
1370
|
+
|
1371
|
+
|
1347
1372
|
class Comment
|
1348
|
-
|
1373
|
+
|
1349
1374
|
|
1350
1375
|
def initialize(val='')
|
1351
1376
|
@e = Element.new('_').add_text val
|
1352
1377
|
@value = val
|
1353
1378
|
end
|
1354
|
-
|
1379
|
+
|
1355
1380
|
def add_element(e2)
|
1356
1381
|
@e.add e2
|
1357
1382
|
end
|
1358
|
-
|
1383
|
+
|
1359
1384
|
def add_text(t)
|
1360
1385
|
@e.add_text t
|
1361
1386
|
end
|
1362
|
-
|
1387
|
+
|
1363
1388
|
def inspect()
|
1364
1389
|
@value
|
1365
1390
|
end
|
1366
|
-
|
1391
|
+
|
1367
1392
|
def print()
|
1368
1393
|
"<!--%s-->" % @e.root.xpath('//./text()').join
|
1369
1394
|
end
|
1370
|
-
|
1395
|
+
|
1371
1396
|
def texts()
|
1372
1397
|
@e.texts
|
1373
1398
|
end
|
1374
|
-
|
1399
|
+
|
1375
1400
|
def to_s()
|
1376
1401
|
@value
|
1377
1402
|
end
|
1378
1403
|
end
|
1379
|
-
|
1404
|
+
|
1380
1405
|
class Elements
|
1381
1406
|
include Enumerable
|
1382
|
-
|
1407
|
+
|
1383
1408
|
def initialize(elements=[])
|
1384
1409
|
super()
|
1385
1410
|
@elements = elements
|
@@ -1391,63 +1416,63 @@ class Rexle
|
|
1391
1416
|
i = raw_i - 1
|
1392
1417
|
@elements[i]
|
1393
1418
|
end
|
1394
|
-
|
1419
|
+
|
1395
1420
|
def each(&blk) @elements.each(&blk) end
|
1396
1421
|
def empty?() @elements.empty? end
|
1397
|
-
|
1422
|
+
|
1398
1423
|
def index(e=nil, &blk)
|
1399
|
-
|
1424
|
+
|
1400
1425
|
if block_given? then
|
1401
1426
|
@elements.index(&blk)
|
1402
1427
|
else
|
1403
1428
|
@elements.index e
|
1404
1429
|
end
|
1405
1430
|
end
|
1406
|
-
|
1431
|
+
|
1407
1432
|
def last() @elements.last end
|
1408
1433
|
def length() @elements.length end
|
1409
1434
|
def to_a() @elements end
|
1410
|
-
|
1435
|
+
|
1411
1436
|
end # -- end of elements --
|
1412
1437
|
|
1413
1438
|
|
1414
1439
|
def parse(x=nil)
|
1415
|
-
|
1440
|
+
|
1416
1441
|
a = []
|
1417
|
-
|
1442
|
+
|
1418
1443
|
if x then
|
1419
1444
|
procs = {
|
1420
1445
|
String: proc {|x| parse_string(x)},
|
1421
1446
|
Array: proc {|x| x}
|
1422
1447
|
}
|
1423
1448
|
a = procs[x.class.to_s.to_sym].call(x)
|
1424
|
-
else
|
1449
|
+
else
|
1425
1450
|
a = yield
|
1426
1451
|
end
|
1427
|
-
|
1452
|
+
|
1428
1453
|
doc_node = ['doc',Attributes.new]
|
1429
1454
|
@a = procs[x.class.to_s.to_sym].call(x)
|
1430
1455
|
@doc = scan_element(*(doc_node << @a))
|
1431
|
-
|
1456
|
+
|
1432
1457
|
self
|
1433
1458
|
end
|
1434
1459
|
|
1435
1460
|
def add_attribute(x) @doc.attribute(x) end
|
1436
1461
|
def attribute(key) @doc.attribute(key) end
|
1437
1462
|
def attributes() @doc.attributes end
|
1438
|
-
|
1439
|
-
def add_element(element)
|
1440
1463
|
|
1441
|
-
|
1464
|
+
def add_element(element)
|
1465
|
+
|
1466
|
+
if @doc then
|
1442
1467
|
raise 'attempted adding second root element to document' if @doc.root
|
1443
|
-
@doc.root.add_element(element)
|
1468
|
+
@doc.root.add_element(element)
|
1444
1469
|
else
|
1445
|
-
doc_node = ['doc', Attributes.new, element.to_a]
|
1446
|
-
@doc = scan_element(*doc_node)
|
1470
|
+
doc_node = ['doc', Attributes.new, element.to_a]
|
1471
|
+
@doc = scan_element(*doc_node)
|
1447
1472
|
end
|
1448
1473
|
element
|
1449
1474
|
end
|
1450
|
-
|
1475
|
+
|
1451
1476
|
def add_text(s) end
|
1452
1477
|
|
1453
1478
|
alias add add_element
|
@@ -1457,26 +1482,26 @@ class Rexle
|
|
1457
1482
|
@doc.xpath(xpath).each {|e| e.delete; e = nil }
|
1458
1483
|
|
1459
1484
|
end
|
1460
|
-
|
1485
|
+
|
1461
1486
|
alias remove delete
|
1462
1487
|
|
1463
|
-
def element(xpath) self.xpath(xpath).first end
|
1488
|
+
def element(xpath) self.xpath(xpath).first end
|
1464
1489
|
def elements(s=nil) @doc.elements(s) end
|
1465
1490
|
def name() @doc.root.name end
|
1466
1491
|
def to_a() @a end
|
1467
|
-
|
1468
|
-
def to_s(options={})
|
1492
|
+
|
1493
|
+
def to_s(options={})
|
1469
1494
|
return '<UNDEFINED/>' unless @doc
|
1470
|
-
self.xml options
|
1495
|
+
self.xml options
|
1471
1496
|
end
|
1472
|
-
|
1497
|
+
|
1473
1498
|
def text(xpath) @doc.text(xpath) end
|
1474
|
-
def root()
|
1475
|
-
@doc.elements.first
|
1499
|
+
def root()
|
1500
|
+
@doc.elements.first
|
1476
1501
|
end
|
1477
1502
|
|
1478
|
-
def write(f)
|
1479
|
-
f.write xml
|
1503
|
+
def write(f)
|
1504
|
+
f.write xml
|
1480
1505
|
end
|
1481
1506
|
|
1482
1507
|
def xml(options={})
|
@@ -1505,14 +1530,14 @@ class Rexle
|
|
1505
1530
|
private
|
1506
1531
|
|
1507
1532
|
def parse_rexle(x)
|
1508
|
-
|
1533
|
+
|
1509
1534
|
rp = RexleParser.new(x)
|
1510
1535
|
a = rp.to_a
|
1511
1536
|
|
1512
1537
|
@instructions = rp.instructions
|
1513
|
-
return a
|
1538
|
+
return a
|
1514
1539
|
end
|
1515
|
-
|
1540
|
+
|
1516
1541
|
def parse_string(x)
|
1517
1542
|
|
1518
1543
|
# check if the XML string is a dynarex document
|
@@ -1528,50 +1553,50 @@ class Rexle
|
|
1528
1553
|
'polyrex' => proc {|x| parse_rexle(x)}
|
1529
1554
|
}
|
1530
1555
|
other_parser = procs[recordx_type]
|
1531
|
-
|
1556
|
+
|
1532
1557
|
if other_parser then
|
1533
|
-
|
1558
|
+
|
1534
1559
|
begin
|
1535
1560
|
other_parser.call(x)
|
1536
1561
|
rescue
|
1537
1562
|
parse_rexle x
|
1538
1563
|
end
|
1539
|
-
|
1564
|
+
|
1540
1565
|
else
|
1541
|
-
|
1566
|
+
|
1542
1567
|
parse_rexle x
|
1543
|
-
|
1544
|
-
end
|
1545
|
-
|
1568
|
+
|
1569
|
+
end
|
1570
|
+
|
1546
1571
|
else
|
1547
1572
|
|
1548
1573
|
parse_rexle x
|
1549
|
-
|
1574
|
+
|
1550
1575
|
end
|
1551
1576
|
else
|
1552
1577
|
|
1553
1578
|
parse_rexle x
|
1554
|
-
|
1579
|
+
|
1555
1580
|
end
|
1556
1581
|
|
1557
1582
|
end
|
1558
|
-
|
1583
|
+
|
1559
1584
|
def scan_element(name=nil, attributes=nil, *children)
|
1560
|
-
|
1585
|
+
|
1561
1586
|
return unless name
|
1562
|
-
|
1587
|
+
|
1563
1588
|
return Rexle::CData.new(children.first) if name == '!['
|
1564
1589
|
return Rexle::Comment.new(children.first) if name == '!-'
|
1565
1590
|
|
1566
|
-
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1591
|
+
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1567
1592
|
|
1568
1593
|
if children then
|
1569
1594
|
|
1570
1595
|
children.each do |x4|
|
1571
|
-
|
1596
|
+
|
1572
1597
|
|
1573
1598
|
if x4.is_a? Array then
|
1574
|
-
element.add_element scan_element(*x4)
|
1599
|
+
element.add_element scan_element(*x4)
|
1575
1600
|
elsif x4.is_a? String then
|
1576
1601
|
|
1577
1602
|
e = if x4.is_a? String then
|
@@ -1580,22 +1605,22 @@ class Rexle
|
|
1580
1605
|
elsif x4.name == '![' then
|
1581
1606
|
|
1582
1607
|
Rexle::CData.new(x4)
|
1583
|
-
|
1608
|
+
|
1584
1609
|
elsif x4.name == '!-' then
|
1585
1610
|
|
1586
1611
|
Rexle::Comment.new(x4)
|
1587
|
-
|
1612
|
+
|
1588
1613
|
end
|
1589
1614
|
|
1590
1615
|
element.add_element e
|
1591
1616
|
end
|
1592
1617
|
end
|
1593
1618
|
end
|
1594
|
-
|
1619
|
+
|
1595
1620
|
return element
|
1596
1621
|
end
|
1597
1622
|
|
1598
|
-
|
1623
|
+
|
1599
1624
|
# scan a rexml doc
|
1600
1625
|
#
|
1601
1626
|
def scan_doc(node)
|
@@ -1603,28 +1628,28 @@ class Rexle
|
|
1603
1628
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
1604
1629
|
[node.name, node.text.to_s, attributes, *children]
|
1605
1630
|
end
|
1606
|
-
|
1631
|
+
|
1607
1632
|
class Recordset < Array
|
1608
1633
|
|
1609
1634
|
def initialize(a)
|
1610
1635
|
super(a)
|
1611
1636
|
end
|
1612
|
-
|
1637
|
+
|
1613
1638
|
def to_doc(root: 'root')
|
1614
|
-
|
1639
|
+
|
1615
1640
|
recordset = self.map(&:to_a)
|
1616
1641
|
Rexle.new([root,{}, *recordset])
|
1617
|
-
|
1642
|
+
|
1618
1643
|
end
|
1619
|
-
|
1644
|
+
|
1620
1645
|
def xpath(xpath)
|
1621
1646
|
self.to_doc.root.xpath(xpath)
|
1622
1647
|
end
|
1623
|
-
|
1648
|
+
|
1624
1649
|
def element(xpath)
|
1625
1650
|
self.to_doc.root.element(xpath)
|
1626
1651
|
end
|
1627
1652
|
|
1628
|
-
end
|
1629
|
-
|
1653
|
+
end
|
1654
|
+
|
1630
1655
|
end
|