rexle 1.5.9 → 1.5.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexle.rb +372 -349
- data.tar.gz.sig +0 -0
- metadata +35 -34
- metadata.gz.sig +0 -0
data/lib/rexle.rb
CHANGED
@@ -13,31 +13,37 @@ require 'backtrack-xpath'
|
|
13
13
|
|
14
14
|
# modifications:
|
15
15
|
|
16
|
+
# 14-Jan-2022: bug fix: Related to previous bug fix; Unescape is
|
17
|
+
# now only applied to objects of type Attributes::Value
|
18
|
+
# 01-Jan-2022: bug fix: Attribute values are no longer unescaped when
|
19
|
+
# called from Rexle#xml
|
20
|
+
# 03-Apr-2021: bug fix: Using *to_a* a CDATA element if present is now output
|
21
|
+
# 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil.
|
16
22
|
# 11-Sep-2020: feature: Rexle::Element#text now has unescaped HTML using CGI
|
17
23
|
# 30-Jul-2020: minor improvement: #plaintext now unescapes & to &
|
18
24
|
# 11-May-2020: bug fix: Rexle#css now responds correctly to valid selectors
|
19
25
|
# 23-Apr-2020: feature: Added public method *plaintext*.
|
20
26
|
# 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag
|
21
|
-
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
22
|
-
# 09-Jul-2019: minor improvement: A comment tag now has a
|
27
|
+
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
28
|
+
# 09-Jul-2019: minor improvement: A comment tag now has a
|
23
29
|
# new line when pretty printed
|
24
30
|
# 02-Feb-2019: feature: A comment tag can now have nested elements
|
25
31
|
# 03-Nov-2018: feature: Debug messages can now use coloured text
|
26
32
|
# 02-Oct-2018: feature: Added Rexle::Elements#last
|
27
|
-
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
28
|
-
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
33
|
+
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
34
|
+
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
29
35
|
# xpath contains an *and* operator
|
30
|
-
# 14-Sep-2017: improvement: An *and* operator can now be
|
36
|
+
# 14-Sep-2017: improvement: An *and* operator can now be
|
31
37
|
# used between xpath statements
|
32
38
|
# 10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id]
|
33
|
-
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
34
|
-
# track of the working document when elements are passed to
|
39
|
+
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
40
|
+
# track of the working document when elements are passed to
|
35
41
|
# different documents
|
36
42
|
# bug fix: Element prefixes are now only processed if they exist
|
37
|
-
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
43
|
+
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
38
44
|
# Rexle::Element#next_sibling and Rexle::Element#previous_sibling bugs
|
39
|
-
# 25-Feb-2017: improvement:
|
40
|
-
# An input rexle array can now have an empty array for
|
45
|
+
# 25-Feb-2017: improvement:
|
46
|
+
# An input rexle array can now have an empty array for
|
41
47
|
# children e.g. doc = Rexle.new(["records", {}, "", []])
|
42
48
|
# 25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer
|
43
49
|
|
@@ -51,7 +57,14 @@ module XMLhelper
|
|
51
57
|
scan_print(children).join.force_encoding("utf-8")
|
52
58
|
|
53
59
|
a = self.root.attributes.to_a.map do |k,v|
|
54
|
-
|
60
|
+
|
61
|
+
val = if v.is_a?(Array) then
|
62
|
+
v.join(' ')
|
63
|
+
else
|
64
|
+
v.is_a?(String) ? v : v.to_s(unescape: false)
|
65
|
+
end
|
66
|
+
|
67
|
+
"%s='%s'" % [k, val]
|
55
68
|
end
|
56
69
|
|
57
70
|
xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \
|
@@ -59,7 +72,7 @@ module XMLhelper
|
|
59
72
|
|
60
73
|
if self.instructions and declaration then
|
61
74
|
processing_instructions() + xml
|
62
|
-
else
|
75
|
+
else
|
63
76
|
xml
|
64
77
|
end
|
65
78
|
end
|
@@ -68,43 +81,43 @@ module XMLhelper
|
|
68
81
|
|
69
82
|
body = pretty_print(children,2).join
|
70
83
|
|
71
|
-
a = self.root.attributes.to_a.map do |k,v|
|
72
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
84
|
+
a = self.root.attributes.to_a.map do |k,v|
|
85
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)]
|
73
86
|
end
|
74
|
-
|
75
|
-
ind = "\n "
|
87
|
+
|
88
|
+
ind = "\n "
|
76
89
|
xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \
|
77
90
|
' ' + a.join(' '), ind, body, "\n", self.root.name]
|
78
91
|
|
79
92
|
if self.instructions and declaration then
|
80
93
|
processing_instructions("") + xml
|
81
|
-
else
|
94
|
+
else
|
82
95
|
xml
|
83
96
|
end
|
84
97
|
end
|
85
|
-
|
86
|
-
def inspect()
|
98
|
+
|
99
|
+
def inspect()
|
87
100
|
"#<Rexle:%s>" % [self.object_id]
|
88
101
|
end
|
89
102
|
|
90
103
|
def processing_instructions(s='')
|
91
104
|
self.instructions.map do |instruction|
|
92
|
-
"<?%s?>\n" % instruction.join(' ')
|
105
|
+
"<?%s?>\n" % instruction.join(' ')
|
93
106
|
end.join s
|
94
107
|
end
|
95
108
|
|
96
109
|
def scan_print(nodes)
|
97
110
|
|
98
111
|
r2 = nodes.map do |x|
|
99
|
-
|
112
|
+
|
100
113
|
r = if x.is_a? Rexle::Element then
|
101
114
|
|
102
|
-
a = x.attributes.to_a.map do |k,v|
|
115
|
+
a = x.attributes.to_a.map do |k,v|
|
103
116
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
104
117
|
end
|
105
118
|
|
106
119
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
107
|
-
|
120
|
+
|
108
121
|
non_self_closing_tags = %w(script textarea iframe div object a)
|
109
122
|
|
110
123
|
if (x.children and x.children.length > 0 \
|
@@ -118,20 +131,20 @@ module XMLhelper
|
|
118
131
|
else
|
119
132
|
out = ["<%s/>" % tag]
|
120
133
|
end
|
121
|
-
|
134
|
+
|
122
135
|
elsif x.is_a? String then x
|
123
|
-
elsif x.is_a? Rexle::CData then x.print
|
124
|
-
elsif x.is_a? Rexle::Comment then x.print
|
125
|
-
|
136
|
+
elsif x.is_a? Rexle::CData then x.print
|
137
|
+
elsif x.is_a? Rexle::Comment then x.print
|
138
|
+
|
126
139
|
end
|
127
140
|
|
128
141
|
r
|
129
142
|
end
|
130
|
-
|
143
|
+
|
131
144
|
r2
|
132
145
|
|
133
146
|
end
|
134
|
-
|
147
|
+
|
135
148
|
def scan_to_a(nodes)
|
136
149
|
|
137
150
|
nodes.inject([]) do |r,x|
|
@@ -140,6 +153,10 @@ module XMLhelper
|
|
140
153
|
|
141
154
|
a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]
|
142
155
|
|
156
|
+
if x.cdatas.any? then
|
157
|
+
a.concat x.cdatas.map {|cdata| ['![', {}, cdata] }
|
158
|
+
end
|
159
|
+
|
143
160
|
(a.concat(scan_to_a(x.children))) if x.children.length > 1
|
144
161
|
r << a
|
145
162
|
elsif x.is_a? String then
|
@@ -150,7 +167,7 @@ module XMLhelper
|
|
150
167
|
end
|
151
168
|
|
152
169
|
end
|
153
|
-
|
170
|
+
|
154
171
|
|
155
172
|
|
156
173
|
def pretty_print(nodes, indent='0')
|
@@ -163,13 +180,13 @@ module XMLhelper
|
|
163
180
|
|
164
181
|
if x.is_a? Rexle::Element then
|
165
182
|
|
166
|
-
a = x.attributes.to_a.map do |k,v|
|
183
|
+
a = x.attributes.to_a.map do |k,v|
|
167
184
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
168
185
|
end
|
169
186
|
a ||= []
|
170
187
|
|
171
188
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
172
|
-
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
189
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
173
190
|
|
174
191
|
if (x.value and x.value.length > 0) \
|
175
192
|
or (x.children and x.children.length > 0 \
|
@@ -177,13 +194,13 @@ module XMLhelper
|
|
177
194
|
x.name == 'script' or x.name == 'textarea' or \
|
178
195
|
x.name == 'iframe' then
|
179
196
|
|
180
|
-
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
197
|
+
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
181
198
|
("\n" + ' ' * indent) : ''
|
182
|
-
|
199
|
+
|
183
200
|
out = ["%s<%s>%s" % [start, tag, ind1]]
|
184
|
-
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
201
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
185
202
|
ind2 = (ind1 and ind1.length > 0) ? ("\n" + ' ' * (indent - 1)) : ''
|
186
|
-
out << "%s</%s>" % [ind2, x.name]
|
203
|
+
out << "%s</%s>" % [ind2, x.name]
|
187
204
|
else
|
188
205
|
|
189
206
|
out = ["%s<%s/>" % [start, tag]]
|
@@ -191,8 +208,8 @@ module XMLhelper
|
|
191
208
|
|
192
209
|
|
193
210
|
elsif x.is_a? String then x.sub(/^[\n\s]+$/,'')
|
194
|
-
elsif x.is_a? Rexle::CData then x.print
|
195
|
-
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
211
|
+
elsif x.is_a? Rexle::CData then x.print
|
212
|
+
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
196
213
|
|
197
214
|
end
|
198
215
|
end
|
@@ -207,17 +224,17 @@ class Rexle
|
|
207
224
|
|
208
225
|
attr_reader :prefixes, :doctype
|
209
226
|
attr_accessor :instructions
|
210
|
-
|
227
|
+
|
211
228
|
def initialize(x=nil, rexle: self, debug: false)
|
212
229
|
|
213
230
|
@rexle, @debug = rexle, debug
|
214
231
|
$debug = @debug
|
215
|
-
|
232
|
+
|
216
233
|
puts 'inside Rexle'.debug if debug
|
217
|
-
|
234
|
+
|
218
235
|
super()
|
219
236
|
|
220
|
-
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
237
|
+
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
221
238
|
@doctype = :xml
|
222
239
|
|
223
240
|
# what type of input is it? Is it a string, array
|
@@ -227,11 +244,11 @@ class Rexle
|
|
227
244
|
Array: proc {|x| x},
|
228
245
|
RexleParser: ->(x){ parse_rexle(x)}
|
229
246
|
}
|
230
|
-
|
247
|
+
|
231
248
|
doc_node = ['doc', Attributes.new]
|
232
|
-
|
249
|
+
|
233
250
|
@a = procs[x.class.to_s.to_sym].call(x)
|
234
|
-
|
251
|
+
|
235
252
|
@doc = scan_element(*(doc_node << @a))
|
236
253
|
|
237
254
|
# fetch the namespaces
|
@@ -242,78 +259,78 @@ class Rexle
|
|
242
259
|
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
243
260
|
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
244
261
|
end
|
245
|
-
|
262
|
+
|
246
263
|
end
|
247
264
|
|
248
265
|
end
|
249
|
-
|
266
|
+
|
250
267
|
def clone()
|
251
268
|
Rexle.new self.to_a
|
252
269
|
end
|
253
|
-
|
270
|
+
|
254
271
|
def at_css(selector)
|
255
272
|
@doc.root.element RexleCSS.new(selector).to_xpath
|
256
|
-
end
|
257
|
-
|
273
|
+
end
|
274
|
+
|
258
275
|
def css(selector)
|
259
|
-
|
260
|
-
a = selector.split(',').flat_map do |x|
|
276
|
+
|
277
|
+
a = selector.split(',').flat_map do |x|
|
261
278
|
@doc.root.xpath RexleCSS.new(x).to_xpath
|
262
279
|
end
|
263
|
-
|
280
|
+
|
264
281
|
return a
|
265
282
|
end
|
266
|
-
|
283
|
+
|
267
284
|
def xpath(path, &blk)
|
268
285
|
@doc.xpath(path, &blk)
|
269
|
-
end
|
286
|
+
end
|
270
287
|
|
271
288
|
class Element
|
272
289
|
include XMLhelper
|
273
|
-
|
290
|
+
|
274
291
|
class Value < String
|
275
|
-
|
292
|
+
|
276
293
|
def initialize(value)
|
277
294
|
super(value)
|
278
295
|
end
|
279
|
-
|
296
|
+
|
280
297
|
def <(val2)
|
281
298
|
self.to_f < val2.to_f
|
282
|
-
end
|
283
|
-
|
299
|
+
end
|
300
|
+
|
284
301
|
def >(val2)
|
285
302
|
self.to_f > val2.to_f
|
286
|
-
end
|
287
|
-
end
|
288
|
-
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
289
306
|
class Attribute
|
290
|
-
|
307
|
+
|
291
308
|
attr_reader :value
|
292
|
-
|
309
|
+
|
293
310
|
def initialize(value)
|
294
311
|
@value = value
|
295
312
|
end
|
296
|
-
|
313
|
+
|
297
314
|
def to_f()
|
298
315
|
@value.to_f
|
299
|
-
end
|
300
|
-
|
316
|
+
end
|
317
|
+
|
301
318
|
def to_i()
|
302
319
|
@value.to_i
|
303
320
|
end
|
304
|
-
|
321
|
+
|
305
322
|
alias to_s value
|
306
|
-
|
323
|
+
|
307
324
|
end
|
308
|
-
|
325
|
+
|
309
326
|
attr_accessor :name, :value, :parent
|
310
327
|
attr_reader :child_elements, :doc_id, :instructions
|
311
|
-
|
328
|
+
|
312
329
|
alias original_clone clone
|
313
330
|
|
314
331
|
def initialize(name=nil, value: nil, attributes: Attributes.new, rexle: self)
|
315
332
|
|
316
|
-
@rexle = rexle
|
333
|
+
@rexle = rexle
|
317
334
|
super()
|
318
335
|
|
319
336
|
@name, @attributes = name.to_s, attributes
|
@@ -323,11 +340,11 @@ class Rexle
|
|
323
340
|
self.add_text value if value
|
324
341
|
|
325
342
|
end
|
326
|
-
|
343
|
+
|
327
344
|
def backtrack(use_attributes: true)
|
328
345
|
BacktrackXPath.new(self, use_attributes: use_attributes)
|
329
346
|
end
|
330
|
-
|
347
|
+
|
331
348
|
def cdata?()
|
332
349
|
self.is_a? CData
|
333
350
|
end
|
@@ -335,8 +352,8 @@ class Rexle
|
|
335
352
|
def contains(raw_args)
|
336
353
|
|
337
354
|
path, raw_val = raw_args.split(',',2)
|
338
|
-
val = raw_val.strip[/^["']?.*["']?$/]
|
339
|
-
|
355
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
356
|
+
|
340
357
|
anode = query_xpath(path)
|
341
358
|
|
342
359
|
return [false] if anode.nil? or anode.empty?
|
@@ -345,21 +362,21 @@ class Rexle
|
|
345
362
|
r = [a.grep(/#{val.sub(/^["'](.*)["']$/,'\1')}/).length > 0]
|
346
363
|
|
347
364
|
r.any?
|
348
|
-
end
|
349
|
-
|
365
|
+
end
|
366
|
+
|
350
367
|
def count(path)
|
351
368
|
length = query_xpath(path).flatten.compact.length
|
352
369
|
length
|
353
370
|
end
|
354
|
-
|
371
|
+
|
355
372
|
def current()
|
356
373
|
self
|
357
374
|
end
|
358
375
|
|
359
376
|
def at_css(selector)
|
360
377
|
self.root.element RexleCSS.new(selector).to_xpath
|
361
|
-
end
|
362
|
-
|
378
|
+
end
|
379
|
+
|
363
380
|
def css(selector)
|
364
381
|
|
365
382
|
selector.split(',')\
|
@@ -370,76 +387,76 @@ class Rexle
|
|
370
387
|
def lowercase(s)
|
371
388
|
|
372
389
|
end
|
373
|
-
|
374
|
-
def max(path)
|
390
|
+
|
391
|
+
def max(path)
|
375
392
|
a = query_xpath(path).flatten.select{|x| x.is_a? String or x.is_a? Rexle::Element::Attribute}.map(&:to_i)
|
376
|
-
a.max
|
393
|
+
a.max
|
377
394
|
end
|
378
|
-
|
395
|
+
|
379
396
|
def name()
|
380
|
-
|
397
|
+
|
381
398
|
if @rexle and @rexle.respond_to? :prefixes then
|
382
|
-
|
399
|
+
|
383
400
|
if @rexle.prefixes.is_a? Array then
|
384
|
-
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
401
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
385
402
|
end
|
386
|
-
|
403
|
+
|
387
404
|
prefix ? @name.sub(prefix + ':', '') : @name
|
388
|
-
|
405
|
+
|
389
406
|
else
|
390
407
|
@name
|
391
408
|
end
|
392
|
-
|
409
|
+
|
393
410
|
end
|
394
|
-
|
395
|
-
def next_element()
|
411
|
+
|
412
|
+
def next_element()
|
396
413
|
|
397
414
|
id = self.object_id
|
398
|
-
a = self.parent.elements
|
415
|
+
a = self.parent.elements
|
399
416
|
|
400
417
|
i = a.index {|x| x.object_id == id} + 2
|
401
418
|
a[i] if i < a.length + 1
|
402
|
-
|
419
|
+
|
403
420
|
end
|
404
|
-
|
421
|
+
|
405
422
|
alias next_sibling next_element
|
406
|
-
|
423
|
+
|
407
424
|
def not(bool)
|
408
425
|
|
409
426
|
r = self.xpath(bool).any?
|
410
427
|
|
411
428
|
!r
|
412
429
|
end
|
413
|
-
|
414
|
-
def previous_element()
|
415
|
-
|
430
|
+
|
431
|
+
def previous_element()
|
432
|
+
|
416
433
|
id = self.object_id
|
417
|
-
a = self.parent.elements
|
434
|
+
a = self.parent.elements
|
418
435
|
i = a.index {|x| x.object_id == id}
|
419
436
|
|
420
|
-
a[i] if i > 0
|
437
|
+
a[i] if i > 0
|
421
438
|
|
422
439
|
end
|
423
|
-
|
440
|
+
|
424
441
|
alias previous_sibling previous_element
|
425
|
-
|
442
|
+
|
426
443
|
def xpath(path, rlist=[], &blk)
|
427
|
-
|
444
|
+
|
428
445
|
#@log.debug 'inside xpath ' + path.inspect
|
429
446
|
|
430
447
|
r = filter_xpath(path, rlist=[], &blk)
|
431
448
|
#@log.debug 'after filter_xpath : ' + r.inspect
|
432
|
-
|
449
|
+
|
433
450
|
if r.is_a?(Array) then
|
434
|
-
|
451
|
+
|
435
452
|
Recordset.new(r.compact)
|
436
|
-
|
453
|
+
|
437
454
|
else
|
438
455
|
r
|
439
456
|
end
|
440
|
-
|
457
|
+
|
441
458
|
end
|
442
|
-
|
459
|
+
|
443
460
|
def filter_xpath(raw_path, rlist=[], &blk)
|
444
461
|
#@log.debug 'inside filter_xpath : ' + raw_path.inspect
|
445
462
|
path = String.new raw_path
|
@@ -449,25 +466,25 @@ class Rexle
|
|
449
466
|
#fn_match = path.match(/^(\w+)\(/)
|
450
467
|
#@log.debug 'fn_match : ' + fn_match.inspect
|
451
468
|
end_fn_match = path.slice!(/\[\w+\(\)\]$/)
|
452
|
-
|
469
|
+
|
453
470
|
if end_fn_match then
|
454
|
-
|
471
|
+
|
455
472
|
m = end_fn_match[1..-4]
|
456
473
|
#@log.debug 'its a function'
|
457
474
|
[method(m.to_sym).call(xpath path)]
|
458
|
-
|
459
|
-
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
475
|
+
|
476
|
+
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
460
477
|
|
461
478
|
procs = {
|
462
479
|
|
463
|
-
Array: proc { |x|
|
464
|
-
if block_given? then
|
465
|
-
x.flatten(1)
|
480
|
+
Array: proc { |x|
|
481
|
+
if block_given? then
|
482
|
+
x.flatten(1)
|
466
483
|
else
|
467
484
|
rs = x.flatten
|
468
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
485
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
469
486
|
end
|
470
|
-
},
|
487
|
+
},
|
471
488
|
String: proc {|x| x},
|
472
489
|
Hash: proc {|x| x},
|
473
490
|
TrueClass: proc{|x| x},
|
@@ -476,25 +493,25 @@ class Rexle
|
|
476
493
|
}
|
477
494
|
bucket = []
|
478
495
|
raw_results = path.split('|').map do |xp|
|
479
|
-
query_xpath(xp.strip, bucket, &blk)
|
496
|
+
query_xpath(xp.strip, bucket, &blk)
|
480
497
|
end
|
481
|
-
|
498
|
+
|
482
499
|
results = raw_results
|
483
500
|
|
484
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
485
|
-
|
501
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
502
|
+
|
486
503
|
elsif fn_match.nil?
|
487
|
-
|
504
|
+
|
488
505
|
procs = {
|
489
506
|
|
490
|
-
Array: proc { |x|
|
491
|
-
if block_given? then
|
492
|
-
x.flatten(1)
|
507
|
+
Array: proc { |x|
|
508
|
+
if block_given? then
|
509
|
+
x.flatten(1)
|
493
510
|
else
|
494
511
|
rs = x.flatten
|
495
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
512
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
496
513
|
end
|
497
|
-
},
|
514
|
+
},
|
498
515
|
String: proc {|x| x},
|
499
516
|
Hash: proc {|x| x},
|
500
517
|
TrueClass: proc{|x| x},
|
@@ -502,23 +519,23 @@ class Rexle
|
|
502
519
|
:"Rexle::Element" => proc {|x| [x]}
|
503
520
|
}
|
504
521
|
bucket = []
|
505
|
-
|
522
|
+
|
506
523
|
results = if path =~ /[\[]|\(/ then
|
507
524
|
|
508
525
|
raw_results = path.split(/\|/).map do |xp|
|
509
|
-
query_xpath(xp.strip, bucket, &blk)
|
526
|
+
query_xpath(xp.strip, bucket, &blk)
|
510
527
|
end
|
511
528
|
|
512
529
|
raw_results.flatten.index(true) ? [true] : []
|
513
|
-
|
530
|
+
|
514
531
|
else
|
515
532
|
raw_results = path.split(/ *(?:\||\band\b) */).map do |xp|
|
516
|
-
query_xpath(xp.strip, bucket, &blk)
|
517
|
-
end
|
533
|
+
query_xpath(xp.strip, bucket, &blk)
|
534
|
+
end
|
518
535
|
|
519
536
|
if path =~ / and / then
|
520
537
|
|
521
|
-
raw_results.flatten.select {|x| x == true or x == false}
|
538
|
+
raw_results.flatten.select {|x| x == true or x == false}
|
522
539
|
|
523
540
|
else
|
524
541
|
raw_results.flatten.index(true) ? [true] : []
|
@@ -527,12 +544,12 @@ class Rexle
|
|
527
544
|
|
528
545
|
return results if !path[/[><]/] and results.any?
|
529
546
|
results = raw_results # .flatten.select {|x| x}
|
530
|
-
|
531
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
547
|
+
|
548
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
532
549
|
|
533
550
|
else
|
534
|
-
|
535
|
-
m, xpath_value, index = fn_match.captures
|
551
|
+
|
552
|
+
m, xpath_value, index = fn_match.captures
|
536
553
|
|
537
554
|
if m == 'text' then
|
538
555
|
a = texts()
|
@@ -546,14 +563,14 @@ class Rexle
|
|
546
563
|
|
547
564
|
end
|
548
565
|
|
549
|
-
end
|
550
|
-
|
566
|
+
end
|
567
|
+
|
551
568
|
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
552
569
|
|
553
570
|
#@log.debug 'query_xpath : ' + raw_xpath_value.inspect
|
554
571
|
#@log.debug '++ ' + self.xml.inspect
|
555
572
|
|
556
|
-
flag_func = false
|
573
|
+
flag_func = false
|
557
574
|
|
558
575
|
xpath_value = raw_xpath_value.sub('child::','./')
|
559
576
|
|
@@ -577,38 +594,38 @@ class Rexle
|
|
577
594
|
|
578
595
|
raw_condition = raw_condition ? raw_condition + '/' + remaining_path \
|
579
596
|
: remaining_path
|
580
|
-
remaining_path = ''
|
597
|
+
remaining_path = ''
|
581
598
|
end
|
582
599
|
|
583
|
-
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
600
|
+
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
584
601
|
|
585
602
|
if r then
|
586
603
|
a_path = raw_path.split(/(?=\/\/)/,2)
|
587
604
|
else
|
588
605
|
a_path = raw_path.split('/',2)
|
589
606
|
end
|
590
|
-
|
607
|
+
|
591
608
|
condition = raw_condition if a_path.length <= 1 #and not raw_condition[/^\[\w+\(.*\)\]$/]
|
592
609
|
|
593
610
|
if raw_path[0,2] == '//' then
|
594
611
|
s = ''
|
595
|
-
elsif raw_path == 'text()'
|
612
|
+
elsif raw_path == 'text()'
|
596
613
|
|
597
614
|
a_path.shift
|
598
615
|
#return @value
|
599
616
|
return self.texts
|
600
617
|
else
|
601
618
|
|
602
|
-
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
603
|
-
|
619
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
620
|
+
|
604
621
|
return @attributes if attribute == '*'
|
605
|
-
|
622
|
+
|
606
623
|
if attribute and @attributes and \
|
607
624
|
@attributes.has_key?(attribute.to_sym) then
|
608
625
|
return [Attribute.new(@attributes[attribute.to_sym])]
|
609
626
|
end
|
610
627
|
s = a_path.shift
|
611
|
-
end
|
628
|
+
end
|
612
629
|
|
613
630
|
# isolate the xpath to return just the path to the current element
|
614
631
|
|
@@ -624,7 +641,7 @@ class Rexle
|
|
624
641
|
if element_name and element_name[/^\d/] then
|
625
642
|
element_name = nil
|
626
643
|
end
|
627
|
-
|
644
|
+
|
628
645
|
condition = raw_xpath_value if element_name.nil?
|
629
646
|
|
630
647
|
else
|
@@ -660,11 +677,11 @@ class Rexle
|
|
660
677
|
r4 = attribute_search(attr_search, self, self.attributes)
|
661
678
|
return r4
|
662
679
|
end
|
663
|
-
|
664
|
-
|
680
|
+
|
681
|
+
|
665
682
|
return_elements = []
|
666
|
-
|
667
|
-
|
683
|
+
|
684
|
+
|
668
685
|
|
669
686
|
if raw_path[0,2] == '//' then
|
670
687
|
|
@@ -679,25 +696,25 @@ class Rexle
|
|
679
696
|
|
680
697
|
if element_name.is_a? String then
|
681
698
|
ename, raw_selector = (element_name.split('::',2)).reverse
|
682
|
-
|
699
|
+
|
683
700
|
selector = case raw_selector
|
684
701
|
when 'following-sibling' then 1
|
685
702
|
when 'preceding-sibling' then -1
|
686
703
|
end
|
687
|
-
|
704
|
+
|
688
705
|
else
|
689
706
|
ename = element_name
|
690
|
-
end
|
707
|
+
end
|
691
708
|
|
692
709
|
if ename == '..' then
|
693
|
-
|
710
|
+
|
694
711
|
remaining_xpath = raw_path[/\.\.\/(.*)/,1]
|
695
712
|
# select the parent element
|
696
713
|
|
697
714
|
r2 = self.parent.xpath(remaining_xpath)
|
698
715
|
|
699
716
|
return r2
|
700
|
-
|
717
|
+
|
701
718
|
elsif ename == '.'
|
702
719
|
|
703
720
|
remaining_xpath = raw_path[1..-1]
|
@@ -706,41 +723,41 @@ class Rexle
|
|
706
723
|
if xpath_value.length > 0 and xpath_value =~ /\[/ then
|
707
724
|
|
708
725
|
r = eval(attr_search.sub(/^h/,'self.attributes'))
|
709
|
-
return self if r
|
726
|
+
return self if r
|
710
727
|
|
711
728
|
else
|
712
729
|
return self
|
713
730
|
end
|
714
731
|
else
|
715
732
|
return self.xpath(remaining_xpath)
|
716
|
-
end
|
733
|
+
end
|
717
734
|
|
718
735
|
elsif element_name.nil?
|
719
736
|
puts ('attr_search: ' + attr_search.inspect).debug if $debug
|
720
|
-
return eval attr_search
|
737
|
+
return eval attr_search
|
721
738
|
else
|
722
739
|
|
723
740
|
if raw_selector.nil? and ename != element_part then
|
724
741
|
|
725
742
|
right_cond = element_part[/#{ename}(.*)/,1]
|
726
743
|
|
727
|
-
end
|
744
|
+
end
|
728
745
|
|
729
746
|
return_elements = @child_elements.map.with_index.select do |x, i|
|
730
747
|
|
731
748
|
next unless x.is_a? Rexle::Element
|
732
749
|
|
733
750
|
#x.name == ename or (ename == '*')
|
734
|
-
|
751
|
+
|
735
752
|
r10 = ((x.name == ename) or (ename == '*'))
|
736
753
|
|
737
|
-
|
754
|
+
|
738
755
|
|
739
756
|
end
|
740
|
-
|
757
|
+
|
741
758
|
if right_cond then
|
742
|
-
|
743
|
-
|
759
|
+
|
760
|
+
|
744
761
|
r12 = return_elements.map do |x, i|
|
745
762
|
|
746
763
|
if x.text then
|
@@ -750,15 +767,15 @@ class Rexle
|
|
750
767
|
else
|
751
768
|
false
|
752
769
|
end
|
753
|
-
|
770
|
+
|
754
771
|
end
|
755
|
-
|
772
|
+
|
756
773
|
return r12
|
757
|
-
|
758
|
-
end
|
759
|
-
|
774
|
+
|
775
|
+
end
|
776
|
+
|
760
777
|
if selector then
|
761
|
-
ne = return_elements.inject([]) do |r,x|
|
778
|
+
ne = return_elements.inject([]) do |r,x|
|
762
779
|
i = x.last + selector
|
763
780
|
if i >= 0 then
|
764
781
|
r << i
|
@@ -769,17 +786,17 @@ class Rexle
|
|
769
786
|
|
770
787
|
return_elements = ne.map {|x| [@child_elements[x], x] if x}
|
771
788
|
end
|
772
|
-
|
789
|
+
|
773
790
|
|
774
791
|
end
|
775
792
|
end
|
776
|
-
|
793
|
+
|
777
794
|
if return_elements.length > 0 then
|
778
795
|
|
779
796
|
if (a_path + [remaining_path]).join.empty? then
|
780
797
|
|
781
798
|
# pass in a block to the filter if it is function contains?
|
782
|
-
rlist = return_elements.map.with_index do |x,i|
|
799
|
+
rlist = return_elements.map.with_index do |x,i|
|
783
800
|
r5 = filter(x, i+1, attr_search, &blk)
|
784
801
|
|
785
802
|
r5
|
@@ -789,9 +806,9 @@ class Rexle
|
|
789
806
|
|
790
807
|
else
|
791
808
|
|
792
|
-
rlist << return_elements.map.with_index do |x,i|
|
809
|
+
rlist << return_elements.map.with_index do |x,i|
|
793
810
|
|
794
|
-
rtn_element = filter(x, i+1, attr_search) do |e|
|
811
|
+
rtn_element = filter(x, i+1, attr_search) do |e|
|
795
812
|
|
796
813
|
r = e.xpath(a_path.join('/') + raw_condition.to_s \
|
797
814
|
+ remaining_path, &blk)
|
@@ -837,7 +854,7 @@ class Rexle
|
|
837
854
|
rlist,&blk)
|
838
855
|
end
|
839
856
|
end
|
840
|
-
|
857
|
+
|
841
858
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) \
|
842
859
|
or (rlist.length > 1 and rlist[0].is_a? Array)
|
843
860
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
@@ -854,21 +871,21 @@ class Rexle
|
|
854
871
|
elsif item.is_a? Rexle::CData then
|
855
872
|
@child_elements << item
|
856
873
|
elsif item.is_a? Rexle::Comment then
|
857
|
-
@child_elements << item
|
874
|
+
@child_elements << item
|
858
875
|
elsif item.is_a? Rexle::Element then
|
859
876
|
|
860
877
|
@child_elements << item
|
861
878
|
# add a reference from this element (the parent) to the child
|
862
879
|
item.parent = self
|
863
|
-
item
|
864
|
-
|
880
|
+
item
|
881
|
+
|
865
882
|
elsif item.is_a? Rexle then
|
866
883
|
self.add_element(item.root)
|
867
884
|
end
|
868
885
|
|
869
|
-
end
|
886
|
+
end
|
870
887
|
|
871
|
-
def add(item)
|
888
|
+
def add(item)
|
872
889
|
|
873
890
|
if item.is_a? Rexle::Element then
|
874
891
|
|
@@ -894,13 +911,13 @@ class Rexle
|
|
894
911
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
895
912
|
else
|
896
913
|
self.xml
|
897
|
-
end
|
914
|
+
end
|
898
915
|
end
|
899
916
|
|
900
917
|
def add_attribute(*x)
|
901
|
-
|
918
|
+
|
902
919
|
proc_hash = lambda {|x| Hash[*x]}
|
903
|
-
|
920
|
+
|
904
921
|
procs = {
|
905
922
|
Hash: lambda {|x| x[0] || {}},
|
906
923
|
String: proc_hash,
|
@@ -918,57 +935,57 @@ class Rexle
|
|
918
935
|
def add_text(s)
|
919
936
|
|
920
937
|
self.child_elements << s
|
921
|
-
self
|
938
|
+
self
|
922
939
|
end
|
923
|
-
|
924
|
-
def attribute(key)
|
925
|
-
|
940
|
+
|
941
|
+
def attribute(key)
|
942
|
+
|
926
943
|
key = key.to_sym if key.is_a? String
|
927
|
-
|
944
|
+
|
928
945
|
if @attributes[key].is_a? String then
|
929
|
-
@attributes[key].gsub('<','<').gsub('>','>')
|
946
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
930
947
|
else
|
931
948
|
@attributes[key]
|
932
949
|
end
|
933
|
-
end
|
934
|
-
|
935
|
-
def attributes() @attributes end
|
936
|
-
|
950
|
+
end
|
951
|
+
|
952
|
+
def attributes() @attributes end
|
953
|
+
|
937
954
|
def cdatas()
|
938
955
|
self.children.inject([]){|r,x| x.is_a?(Rexle::CData) ? r << x.to_s : r }
|
939
956
|
end
|
940
|
-
|
957
|
+
|
941
958
|
def children()
|
942
959
|
|
943
960
|
r = @child_elements
|
944
|
-
|
961
|
+
|
945
962
|
def r.is_an_empty_string?()
|
946
963
|
self.length == 1 and self.first == ''
|
947
|
-
end
|
948
|
-
|
964
|
+
end
|
965
|
+
|
949
966
|
return r
|
950
|
-
end
|
967
|
+
end
|
951
968
|
|
952
969
|
def children=(a) @child_elements = a if a.is_a? Array end
|
953
|
-
|
970
|
+
|
954
971
|
def deep_clone() Rexle.new(self.xml).root end
|
955
|
-
|
956
|
-
def clone()
|
957
|
-
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
972
|
+
|
973
|
+
def clone()
|
974
|
+
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
958
975
|
end
|
959
|
-
|
976
|
+
|
960
977
|
def delete(obj=nil)
|
961
978
|
|
962
979
|
if obj then
|
963
980
|
|
964
981
|
if obj.is_a? String then
|
965
|
-
|
982
|
+
|
966
983
|
self.xpath(obj).each {|e| e.delete; e = nil}
|
967
|
-
|
984
|
+
|
968
985
|
else
|
969
986
|
|
970
987
|
i = @child_elements.index(obj)
|
971
|
-
[@child_elements].each{|x| x.delete_at i} if i
|
988
|
+
[@child_elements].each{|x| x.delete_at i} if i
|
972
989
|
end
|
973
990
|
else
|
974
991
|
|
@@ -990,50 +1007,50 @@ class Rexle
|
|
990
1007
|
String: proc {|x| @child_elements[x]}
|
991
1008
|
}
|
992
1009
|
|
993
|
-
procs[s.class.to_s.to_sym].call(s)
|
1010
|
+
procs[s.class.to_s.to_sym].call(s)
|
994
1011
|
end
|
995
1012
|
|
996
1013
|
def doc_root() @rexle.root end
|
997
1014
|
def each(&blk) self.children.each(&blk) end
|
998
1015
|
def each_recursive(&blk) recursive_scan(self.children,&blk) end
|
999
1016
|
alias traverse each_recursive
|
1000
|
-
def has_elements?() !self.elements.empty? end
|
1001
|
-
def insert_after(node) insert(node, 1) end
|
1017
|
+
def has_elements?() !self.elements.empty? end
|
1018
|
+
def insert_after(node) insert(node, 1) end
|
1002
1019
|
def insert_before(node) insert(node) end
|
1003
1020
|
def last(a) a.last end
|
1004
|
-
def map(&blk) self.children.map(&blk) end
|
1005
|
-
|
1021
|
+
def map(&blk) self.children.map(&blk) end
|
1022
|
+
|
1006
1023
|
def plaintext()
|
1007
1024
|
CGI.unescapeHTML xml().gsub(/<\/?[^>]+>/,'').gsub(' ',' ')\
|
1008
1025
|
.gsub(/\n\s+/,' ')
|
1009
1026
|
end
|
1010
|
-
|
1011
|
-
def root() self end
|
1027
|
+
|
1028
|
+
def root() self end
|
1012
1029
|
|
1013
1030
|
def text(s='')
|
1014
|
-
|
1015
|
-
return self.value if s.empty?
|
1016
|
-
|
1031
|
+
|
1032
|
+
return self.value if s.empty?
|
1033
|
+
|
1017
1034
|
e = self.element(s)
|
1018
1035
|
return e if e.is_a? String
|
1019
|
-
|
1036
|
+
|
1020
1037
|
e.value if e
|
1021
1038
|
end
|
1022
|
-
|
1039
|
+
|
1023
1040
|
def texts()
|
1024
1041
|
|
1025
1042
|
r = @child_elements.select do |x|
|
1026
1043
|
x.is_a? String or x.is_a? Rexle::CData
|
1027
1044
|
end
|
1028
|
-
|
1045
|
+
|
1029
1046
|
r.map do |x|
|
1030
1047
|
def x.unescape()
|
1031
1048
|
s = self.to_s.clone
|
1032
1049
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1033
1050
|
s
|
1034
|
-
end
|
1051
|
+
end
|
1035
1052
|
end
|
1036
|
-
|
1053
|
+
|
1037
1054
|
return r
|
1038
1055
|
end
|
1039
1056
|
|
@@ -1041,20 +1058,20 @@ class Rexle
|
|
1041
1058
|
|
1042
1059
|
r = @child_elements.first
|
1043
1060
|
return nil unless r.is_a? String
|
1044
|
-
|
1061
|
+
|
1045
1062
|
def r.unescape()
|
1046
1063
|
s = self.clone
|
1047
1064
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1048
1065
|
s
|
1049
|
-
end
|
1050
|
-
|
1066
|
+
end
|
1067
|
+
|
1051
1068
|
return r
|
1052
1069
|
end
|
1053
|
-
|
1070
|
+
|
1054
1071
|
def value=(raw_s)
|
1055
1072
|
|
1056
1073
|
val = Value.new(raw_s.to_s.clone)
|
1057
|
-
|
1074
|
+
|
1058
1075
|
escape_chars = %w(& & ' ' < < > >).each_slice(2).to_a
|
1059
1076
|
escape_chars.each{|x| val.gsub!(*x)}
|
1060
1077
|
|
@@ -1064,9 +1081,15 @@ class Rexle
|
|
1064
1081
|
end
|
1065
1082
|
|
1066
1083
|
alias text= value=
|
1067
|
-
|
1084
|
+
|
1068
1085
|
def to_a()
|
1086
|
+
|
1069
1087
|
e = [String.new(self.name), Hash.new(self.attributes)]
|
1088
|
+
|
1089
|
+
if self.cdatas.any? then
|
1090
|
+
e.concat self.cdatas.map {|cdata| ['![', {}, cdata] }
|
1091
|
+
end
|
1092
|
+
|
1070
1093
|
[*e, *scan_to_a(self.children)]
|
1071
1094
|
end
|
1072
1095
|
|
@@ -1076,10 +1099,10 @@ class Rexle
|
|
1076
1099
|
Hash: lambda {|x|
|
1077
1100
|
o = {pretty: false}.merge(x)
|
1078
1101
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
1079
|
-
|
1102
|
+
|
1080
1103
|
method(msg).call(self.children)
|
1081
1104
|
},
|
1082
|
-
String: lambda {|x|
|
1105
|
+
String: lambda {|x|
|
1083
1106
|
r = self.element(x)
|
1084
1107
|
r ? r.xml : ''
|
1085
1108
|
}
|
@@ -1092,18 +1115,18 @@ class Rexle
|
|
1092
1115
|
end
|
1093
1116
|
|
1094
1117
|
def prepend(item)
|
1095
|
-
|
1118
|
+
|
1096
1119
|
@child_elements.unshift item
|
1097
|
-
|
1120
|
+
|
1098
1121
|
# add a reference from this element (the parent) to the child
|
1099
1122
|
item.parent = self
|
1100
|
-
item
|
1101
|
-
end
|
1102
|
-
|
1123
|
+
item
|
1124
|
+
end
|
1125
|
+
|
1103
1126
|
alias to_s xml
|
1104
1127
|
|
1105
1128
|
private
|
1106
|
-
|
1129
|
+
|
1107
1130
|
def insert(node,offset=0)
|
1108
1131
|
|
1109
1132
|
i = parent.child_elements.index(self)
|
@@ -1115,7 +1138,7 @@ class Rexle
|
|
1115
1138
|
node.instance_variable_set(:@doc_id, self.doc_root.object_id)
|
1116
1139
|
|
1117
1140
|
self
|
1118
|
-
end
|
1141
|
+
end
|
1119
1142
|
|
1120
1143
|
def format_condition(condition)
|
1121
1144
|
|
@@ -1140,21 +1163,21 @@ class Rexle
|
|
1140
1163
|
elsif raw_items[0][/^not\(/]
|
1141
1164
|
|
1142
1165
|
return raw_items[0]
|
1143
|
-
|
1166
|
+
|
1144
1167
|
else
|
1145
1168
|
|
1146
1169
|
andor_items = raw_items.map.with_index\
|
1147
1170
|
.select{|x,i| x[/\band\b|\bor\b/]}\
|
1148
1171
|
.map{|x| [x.last, x.last + 1]}.flatten
|
1149
|
-
|
1172
|
+
|
1150
1173
|
indices = [0] + andor_items + [raw_items.length]
|
1151
1174
|
|
1152
1175
|
if raw_items[0][0] == '@' then
|
1153
1176
|
|
1154
1177
|
raw_items.each{|x| x.gsub!(/^@/,'')}
|
1155
|
-
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1178
|
+
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1156
1179
|
|
1157
|
-
items = cons_items.map do |x|
|
1180
|
+
items = cons_items.map do |x|
|
1158
1181
|
|
1159
1182
|
if x.length >= 3 then
|
1160
1183
|
if x[0] != 'class' then
|
@@ -1173,17 +1196,17 @@ class Rexle
|
|
1173
1196
|
else
|
1174
1197
|
|
1175
1198
|
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1176
|
-
|
1177
|
-
items = cons_items.map do |x|
|
1199
|
+
|
1200
|
+
items = cons_items.map do |x|
|
1178
1201
|
|
1179
1202
|
if x.length >= 3 then
|
1180
1203
|
|
1181
1204
|
x[1] = '==' if x[1] == '='
|
1182
1205
|
if x[0] != '.' then
|
1183
1206
|
if x[0][/\//] then
|
1184
|
-
|
1207
|
+
|
1185
1208
|
path, value = x.values_at(0,-1)
|
1186
|
-
|
1209
|
+
|
1187
1210
|
if x[0][/@\w+$/] then
|
1188
1211
|
"r = e.xpath('#{path}').first; r and r.value == #{value}"
|
1189
1212
|
else
|
@@ -1207,16 +1230,16 @@ class Rexle
|
|
1207
1230
|
|
1208
1231
|
|
1209
1232
|
end
|
1210
|
-
|
1233
|
+
|
1211
1234
|
def scan_match(node, path)
|
1212
|
-
|
1235
|
+
|
1213
1236
|
if path == '//' then
|
1214
|
-
return [node, node.text,
|
1237
|
+
return [node, node.text,
|
1215
1238
|
node.elements.map {|x| scan_match x, path}]
|
1216
1239
|
end
|
1217
|
-
|
1240
|
+
|
1218
1241
|
r = []
|
1219
|
-
xpath2 = path[2..-1]
|
1242
|
+
xpath2 = path[2..-1]
|
1220
1243
|
#jr150316 xpath2.sub!(/^\*\//,'')
|
1221
1244
|
#jr150316xpath2.sub!(/^\*/,self.name)
|
1222
1245
|
#jr150316xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
@@ -1239,17 +1262,17 @@ class Rexle
|
|
1239
1262
|
end
|
1240
1263
|
a
|
1241
1264
|
end
|
1242
|
-
|
1243
|
-
|
1265
|
+
|
1266
|
+
|
1244
1267
|
def filter(raw_element, i, attr_search, &blk)
|
1245
|
-
|
1268
|
+
|
1246
1269
|
x, index = raw_element
|
1247
1270
|
e = @child_elements[index]
|
1248
1271
|
|
1249
1272
|
return unless e.is_a? Rexle::Element
|
1250
1273
|
name, value = e.name, e.value if e.is_a? Rexle::Element
|
1251
1274
|
|
1252
|
-
h = x.attributes # <-- fetch the attributes
|
1275
|
+
h = x.attributes # <-- fetch the attributes
|
1253
1276
|
|
1254
1277
|
if attr_search then
|
1255
1278
|
|
@@ -1265,21 +1288,21 @@ class Rexle
|
|
1265
1288
|
def attribute_search(attr_search, e, h, i=nil, &blk)
|
1266
1289
|
|
1267
1290
|
r2 = if attr_search.is_a? Integer then
|
1268
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
1291
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
1269
1292
|
elsif attr_search[/i\s(?:<|>|==|%)\s\d+/] and eval(attr_search) then
|
1270
|
-
block_given? ? blk.call(e) : e
|
1293
|
+
block_given? ? blk.call(e) : e
|
1271
1294
|
elsif h and !h.empty? and attr_search[/^h\[/] and eval(attr_search) then
|
1272
1295
|
block_given? ? blk.call(e) : e
|
1273
|
-
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1296
|
+
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1274
1297
|
next unless x.is_a? Rexle::Element
|
1275
1298
|
name, attributes, value = x.name, x.attributes, x.value.to_s
|
1276
1299
|
b = eval(attr_search)
|
1277
1300
|
b}.length > 0
|
1278
1301
|
|
1279
1302
|
block_given? ? blk.call(e) : e
|
1280
|
-
|
1281
|
-
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1282
|
-
block_given? ? blk.call(e) : e
|
1303
|
+
|
1304
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1305
|
+
block_given? ? blk.call(e) : e
|
1283
1306
|
elsif attr_search[/^e\.value/]
|
1284
1307
|
|
1285
1308
|
v = attr_search[/[^\s]+$/]
|
@@ -1293,92 +1316,92 @@ class Rexle
|
|
1293
1316
|
block_given? ? blk.call(e) : e
|
1294
1317
|
elsif attr_search[/^\w*\(/] and e.element(attr_search)
|
1295
1318
|
block_given? ? blk.call(e) : e
|
1296
|
-
end
|
1319
|
+
end
|
1297
1320
|
|
1298
1321
|
r2
|
1299
1322
|
end
|
1300
|
-
|
1323
|
+
|
1301
1324
|
def recursive_scan(nodes, &blk)
|
1302
|
-
|
1325
|
+
|
1303
1326
|
nodes.each do |x|
|
1304
1327
|
|
1305
1328
|
if x.is_a? Rexle::Element then
|
1306
1329
|
blk.call(x)
|
1307
1330
|
recursive_scan(x.children, &blk) if x.children.length > 0
|
1308
|
-
end
|
1331
|
+
end
|
1309
1332
|
end
|
1310
1333
|
end
|
1311
|
-
|
1334
|
+
|
1312
1335
|
end # -- end of element --
|
1313
|
-
|
1336
|
+
|
1314
1337
|
|
1315
1338
|
class CData
|
1316
|
-
|
1339
|
+
|
1317
1340
|
def initialize(val='')
|
1318
1341
|
@value = val
|
1319
1342
|
end
|
1320
|
-
|
1343
|
+
|
1321
1344
|
def clone()
|
1322
1345
|
CData.new(@value)
|
1323
1346
|
end
|
1324
|
-
|
1347
|
+
|
1325
1348
|
def inspect()
|
1326
1349
|
@value.inspect
|
1327
1350
|
end
|
1328
|
-
|
1351
|
+
|
1329
1352
|
def print()
|
1330
1353
|
"<![CDATA[%s]]>" % @value
|
1331
1354
|
end
|
1332
|
-
|
1355
|
+
|
1333
1356
|
def to_s()
|
1334
1357
|
@value
|
1335
1358
|
end
|
1336
|
-
|
1359
|
+
|
1337
1360
|
def unescape()
|
1338
1361
|
s = @value.clone
|
1339
1362
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1340
1363
|
s
|
1341
|
-
end
|
1342
|
-
|
1364
|
+
end
|
1365
|
+
|
1343
1366
|
end
|
1344
|
-
|
1345
|
-
|
1367
|
+
|
1368
|
+
|
1346
1369
|
class Comment
|
1347
|
-
|
1370
|
+
|
1348
1371
|
|
1349
1372
|
def initialize(val='')
|
1350
1373
|
@e = Element.new('_').add_text val
|
1351
1374
|
@value = val
|
1352
1375
|
end
|
1353
|
-
|
1376
|
+
|
1354
1377
|
def add_element(e2)
|
1355
1378
|
@e.add e2
|
1356
1379
|
end
|
1357
|
-
|
1380
|
+
|
1358
1381
|
def add_text(t)
|
1359
1382
|
@e.add_text t
|
1360
1383
|
end
|
1361
|
-
|
1384
|
+
|
1362
1385
|
def inspect()
|
1363
1386
|
@value
|
1364
1387
|
end
|
1365
|
-
|
1388
|
+
|
1366
1389
|
def print()
|
1367
1390
|
"<!--%s-->" % @e.root.xpath('//./text()').join
|
1368
1391
|
end
|
1369
|
-
|
1392
|
+
|
1370
1393
|
def texts()
|
1371
1394
|
@e.texts
|
1372
1395
|
end
|
1373
|
-
|
1396
|
+
|
1374
1397
|
def to_s()
|
1375
1398
|
@value
|
1376
1399
|
end
|
1377
1400
|
end
|
1378
|
-
|
1401
|
+
|
1379
1402
|
class Elements
|
1380
1403
|
include Enumerable
|
1381
|
-
|
1404
|
+
|
1382
1405
|
def initialize(elements=[])
|
1383
1406
|
super()
|
1384
1407
|
@elements = elements
|
@@ -1390,63 +1413,63 @@ class Rexle
|
|
1390
1413
|
i = raw_i - 1
|
1391
1414
|
@elements[i]
|
1392
1415
|
end
|
1393
|
-
|
1416
|
+
|
1394
1417
|
def each(&blk) @elements.each(&blk) end
|
1395
1418
|
def empty?() @elements.empty? end
|
1396
|
-
|
1419
|
+
|
1397
1420
|
def index(e=nil, &blk)
|
1398
|
-
|
1421
|
+
|
1399
1422
|
if block_given? then
|
1400
1423
|
@elements.index(&blk)
|
1401
1424
|
else
|
1402
1425
|
@elements.index e
|
1403
1426
|
end
|
1404
1427
|
end
|
1405
|
-
|
1428
|
+
|
1406
1429
|
def last() @elements.last end
|
1407
1430
|
def length() @elements.length end
|
1408
1431
|
def to_a() @elements end
|
1409
|
-
|
1432
|
+
|
1410
1433
|
end # -- end of elements --
|
1411
1434
|
|
1412
1435
|
|
1413
1436
|
def parse(x=nil)
|
1414
|
-
|
1437
|
+
|
1415
1438
|
a = []
|
1416
|
-
|
1439
|
+
|
1417
1440
|
if x then
|
1418
1441
|
procs = {
|
1419
1442
|
String: proc {|x| parse_string(x)},
|
1420
1443
|
Array: proc {|x| x}
|
1421
1444
|
}
|
1422
1445
|
a = procs[x.class.to_s.to_sym].call(x)
|
1423
|
-
else
|
1446
|
+
else
|
1424
1447
|
a = yield
|
1425
1448
|
end
|
1426
|
-
|
1449
|
+
|
1427
1450
|
doc_node = ['doc',Attributes.new]
|
1428
1451
|
@a = procs[x.class.to_s.to_sym].call(x)
|
1429
1452
|
@doc = scan_element(*(doc_node << @a))
|
1430
|
-
|
1453
|
+
|
1431
1454
|
self
|
1432
1455
|
end
|
1433
1456
|
|
1434
1457
|
def add_attribute(x) @doc.attribute(x) end
|
1435
1458
|
def attribute(key) @doc.attribute(key) end
|
1436
1459
|
def attributes() @doc.attributes end
|
1437
|
-
|
1438
|
-
def add_element(element)
|
1439
1460
|
|
1440
|
-
|
1461
|
+
def add_element(element)
|
1462
|
+
|
1463
|
+
if @doc then
|
1441
1464
|
raise 'attempted adding second root element to document' if @doc.root
|
1442
|
-
@doc.root.add_element(element)
|
1465
|
+
@doc.root.add_element(element)
|
1443
1466
|
else
|
1444
|
-
doc_node = ['doc', Attributes.new, element.to_a]
|
1445
|
-
@doc = scan_element(*doc_node)
|
1467
|
+
doc_node = ['doc', Attributes.new, element.to_a]
|
1468
|
+
@doc = scan_element(*doc_node)
|
1446
1469
|
end
|
1447
1470
|
element
|
1448
1471
|
end
|
1449
|
-
|
1472
|
+
|
1450
1473
|
def add_text(s) end
|
1451
1474
|
|
1452
1475
|
alias add add_element
|
@@ -1456,26 +1479,26 @@ class Rexle
|
|
1456
1479
|
@doc.xpath(xpath).each {|e| e.delete; e = nil }
|
1457
1480
|
|
1458
1481
|
end
|
1459
|
-
|
1482
|
+
|
1460
1483
|
alias remove delete
|
1461
1484
|
|
1462
|
-
def element(xpath) self.xpath(xpath).first end
|
1485
|
+
def element(xpath) self.xpath(xpath).first end
|
1463
1486
|
def elements(s=nil) @doc.elements(s) end
|
1464
1487
|
def name() @doc.root.name end
|
1465
1488
|
def to_a() @a end
|
1466
|
-
|
1467
|
-
def to_s(options={})
|
1489
|
+
|
1490
|
+
def to_s(options={})
|
1468
1491
|
return '<UNDEFINED/>' unless @doc
|
1469
|
-
self.xml options
|
1492
|
+
self.xml options
|
1470
1493
|
end
|
1471
|
-
|
1494
|
+
|
1472
1495
|
def text(xpath) @doc.text(xpath) end
|
1473
|
-
def root()
|
1474
|
-
@doc.elements.first
|
1496
|
+
def root()
|
1497
|
+
@doc.elements.first
|
1475
1498
|
end
|
1476
1499
|
|
1477
|
-
def write(f)
|
1478
|
-
f.write xml
|
1500
|
+
def write(f)
|
1501
|
+
f.write xml
|
1479
1502
|
end
|
1480
1503
|
|
1481
1504
|
def xml(options={})
|
@@ -1488,7 +1511,7 @@ class Rexle
|
|
1488
1511
|
|
1489
1512
|
if o[:declaration] == true then
|
1490
1513
|
|
1491
|
-
unless @instructions.assoc 'xml' then
|
1514
|
+
unless @instructions and @instructions.assoc 'xml' then
|
1492
1515
|
@instructions.unshift ["xml","version='1.0' encoding='UTF-8'"]
|
1493
1516
|
end
|
1494
1517
|
end
|
@@ -1504,14 +1527,14 @@ class Rexle
|
|
1504
1527
|
private
|
1505
1528
|
|
1506
1529
|
def parse_rexle(x)
|
1507
|
-
|
1530
|
+
|
1508
1531
|
rp = RexleParser.new(x)
|
1509
1532
|
a = rp.to_a
|
1510
1533
|
|
1511
1534
|
@instructions = rp.instructions
|
1512
|
-
return a
|
1535
|
+
return a
|
1513
1536
|
end
|
1514
|
-
|
1537
|
+
|
1515
1538
|
def parse_string(x)
|
1516
1539
|
|
1517
1540
|
# check if the XML string is a dynarex document
|
@@ -1527,50 +1550,50 @@ class Rexle
|
|
1527
1550
|
'polyrex' => proc {|x| parse_rexle(x)}
|
1528
1551
|
}
|
1529
1552
|
other_parser = procs[recordx_type]
|
1530
|
-
|
1553
|
+
|
1531
1554
|
if other_parser then
|
1532
|
-
|
1555
|
+
|
1533
1556
|
begin
|
1534
1557
|
other_parser.call(x)
|
1535
1558
|
rescue
|
1536
1559
|
parse_rexle x
|
1537
1560
|
end
|
1538
|
-
|
1561
|
+
|
1539
1562
|
else
|
1540
|
-
|
1563
|
+
|
1541
1564
|
parse_rexle x
|
1542
|
-
|
1543
|
-
end
|
1544
|
-
|
1565
|
+
|
1566
|
+
end
|
1567
|
+
|
1545
1568
|
else
|
1546
1569
|
|
1547
1570
|
parse_rexle x
|
1548
|
-
|
1571
|
+
|
1549
1572
|
end
|
1550
1573
|
else
|
1551
1574
|
|
1552
1575
|
parse_rexle x
|
1553
|
-
|
1576
|
+
|
1554
1577
|
end
|
1555
1578
|
|
1556
1579
|
end
|
1557
|
-
|
1580
|
+
|
1558
1581
|
def scan_element(name=nil, attributes=nil, *children)
|
1559
|
-
|
1582
|
+
|
1560
1583
|
return unless name
|
1561
|
-
|
1584
|
+
|
1562
1585
|
return Rexle::CData.new(children.first) if name == '!['
|
1563
1586
|
return Rexle::Comment.new(children.first) if name == '!-'
|
1564
1587
|
|
1565
|
-
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1588
|
+
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1566
1589
|
|
1567
1590
|
if children then
|
1568
1591
|
|
1569
1592
|
children.each do |x4|
|
1570
|
-
|
1593
|
+
|
1571
1594
|
|
1572
1595
|
if x4.is_a? Array then
|
1573
|
-
element.add_element scan_element(*x4)
|
1596
|
+
element.add_element scan_element(*x4)
|
1574
1597
|
elsif x4.is_a? String then
|
1575
1598
|
|
1576
1599
|
e = if x4.is_a? String then
|
@@ -1579,22 +1602,22 @@ class Rexle
|
|
1579
1602
|
elsif x4.name == '![' then
|
1580
1603
|
|
1581
1604
|
Rexle::CData.new(x4)
|
1582
|
-
|
1605
|
+
|
1583
1606
|
elsif x4.name == '!-' then
|
1584
1607
|
|
1585
1608
|
Rexle::Comment.new(x4)
|
1586
|
-
|
1609
|
+
|
1587
1610
|
end
|
1588
1611
|
|
1589
1612
|
element.add_element e
|
1590
1613
|
end
|
1591
1614
|
end
|
1592
1615
|
end
|
1593
|
-
|
1616
|
+
|
1594
1617
|
return element
|
1595
1618
|
end
|
1596
1619
|
|
1597
|
-
|
1620
|
+
|
1598
1621
|
# scan a rexml doc
|
1599
1622
|
#
|
1600
1623
|
def scan_doc(node)
|
@@ -1602,28 +1625,28 @@ class Rexle
|
|
1602
1625
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
1603
1626
|
[node.name, node.text.to_s, attributes, *children]
|
1604
1627
|
end
|
1605
|
-
|
1628
|
+
|
1606
1629
|
class Recordset < Array
|
1607
1630
|
|
1608
1631
|
def initialize(a)
|
1609
1632
|
super(a)
|
1610
1633
|
end
|
1611
|
-
|
1634
|
+
|
1612
1635
|
def to_doc(root: 'root')
|
1613
|
-
|
1636
|
+
|
1614
1637
|
recordset = self.map(&:to_a)
|
1615
1638
|
Rexle.new([root,{}, *recordset])
|
1616
|
-
|
1639
|
+
|
1617
1640
|
end
|
1618
|
-
|
1641
|
+
|
1619
1642
|
def xpath(xpath)
|
1620
1643
|
self.to_doc.root.xpath(xpath)
|
1621
1644
|
end
|
1622
|
-
|
1645
|
+
|
1623
1646
|
def element(xpath)
|
1624
1647
|
self.to_doc.root.element(xpath)
|
1625
1648
|
end
|
1626
1649
|
|
1627
|
-
end
|
1628
|
-
|
1650
|
+
end
|
1651
|
+
|
1629
1652
|
end
|