rexle 1.5.8 → 1.5.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexle.rb +365 -350
- data.tar.gz.sig +0 -0
- metadata +35 -34
- metadata.gz.sig +1 -1
data/lib/rexle.rb
CHANGED
@@ -13,30 +13,35 @@ require 'backtrack-xpath'
|
|
13
13
|
|
14
14
|
# modifications:
|
15
15
|
|
16
|
+
# 01-Jan-2022: bug fix: Attribute values are no longer unescaped when
|
17
|
+
# called from Rexle#xml
|
18
|
+
# 03-Apr-2021: bug fix: Using *to_a* a CDATA element if present is now output
|
19
|
+
# 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil.
|
20
|
+
# 11-Sep-2020: feature: Rexle::Element#text now has unescaped HTML using CGI
|
16
21
|
# 30-Jul-2020: minor improvement: #plaintext now unescapes & to &
|
17
22
|
# 11-May-2020: bug fix: Rexle#css now responds correctly to valid selectors
|
18
23
|
# 23-Apr-2020: feature: Added public method *plaintext*.
|
19
24
|
# 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag
|
20
|
-
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
21
|
-
# 09-Jul-2019: minor improvement: A comment tag now has a
|
25
|
+
# 18-Sep-2019: minor bug fix: &apos is now unescaped properly
|
26
|
+
# 09-Jul-2019: minor improvement: A comment tag now has a
|
22
27
|
# new line when pretty printed
|
23
28
|
# 02-Feb-2019: feature: A comment tag can now have nested elements
|
24
29
|
# 03-Nov-2018: feature: Debug messages can now use coloured text
|
25
30
|
# 02-Oct-2018: feature: Added Rexle::Elements#last
|
26
|
-
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
27
|
-
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
31
|
+
# 18-Jan-2018: bug fix: An Element's attributes are now cloned too
|
32
|
+
# 16-Sep-2017: improvement: Multiple results are now returned if the
|
28
33
|
# xpath contains an *and* operator
|
29
|
-
# 14-Sep-2017: improvement: An *and* operator can now be
|
34
|
+
# 14-Sep-2017: improvement: An *and* operator can now be
|
30
35
|
# used between xpath statements
|
31
36
|
# 10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id]
|
32
|
-
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
33
|
-
# track of the working document when elements are passed to
|
37
|
+
# 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep
|
38
|
+
# track of the working document when elements are passed to
|
34
39
|
# different documents
|
35
40
|
# bug fix: Element prefixes are now only processed if they exist
|
36
|
-
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
41
|
+
# 13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the
|
37
42
|
# Rexle::Element#next_sibling and Rexle::Element#previous_sibling bugs
|
38
|
-
# 25-Feb-2017: improvement:
|
39
|
-
# An input rexle array can now have an empty array for
|
43
|
+
# 25-Feb-2017: improvement:
|
44
|
+
# An input rexle array can now have an empty array for
|
40
45
|
# children e.g. doc = Rexle.new(["records", {}, "", []])
|
41
46
|
# 25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer
|
42
47
|
|
@@ -50,7 +55,7 @@ module XMLhelper
|
|
50
55
|
scan_print(children).join.force_encoding("utf-8")
|
51
56
|
|
52
57
|
a = self.root.attributes.to_a.map do |k,v|
|
53
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)]
|
58
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s(unescape: false))]
|
54
59
|
end
|
55
60
|
|
56
61
|
xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \
|
@@ -58,7 +63,7 @@ module XMLhelper
|
|
58
63
|
|
59
64
|
if self.instructions and declaration then
|
60
65
|
processing_instructions() + xml
|
61
|
-
else
|
66
|
+
else
|
62
67
|
xml
|
63
68
|
end
|
64
69
|
end
|
@@ -67,43 +72,43 @@ module XMLhelper
|
|
67
72
|
|
68
73
|
body = pretty_print(children,2).join
|
69
74
|
|
70
|
-
a = self.root.attributes.to_a.map do |k,v|
|
71
|
-
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
75
|
+
a = self.root.attributes.to_a.map do |k,v|
|
76
|
+
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s(unescape: false))]
|
72
77
|
end
|
73
|
-
|
74
|
-
ind = "\n "
|
78
|
+
|
79
|
+
ind = "\n "
|
75
80
|
xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \
|
76
81
|
' ' + a.join(' '), ind, body, "\n", self.root.name]
|
77
82
|
|
78
83
|
if self.instructions and declaration then
|
79
84
|
processing_instructions("") + xml
|
80
|
-
else
|
85
|
+
else
|
81
86
|
xml
|
82
87
|
end
|
83
88
|
end
|
84
|
-
|
85
|
-
def inspect()
|
89
|
+
|
90
|
+
def inspect()
|
86
91
|
"#<Rexle:%s>" % [self.object_id]
|
87
92
|
end
|
88
93
|
|
89
94
|
def processing_instructions(s='')
|
90
95
|
self.instructions.map do |instruction|
|
91
|
-
"<?%s?>\n" % instruction.join(' ')
|
96
|
+
"<?%s?>\n" % instruction.join(' ')
|
92
97
|
end.join s
|
93
98
|
end
|
94
99
|
|
95
100
|
def scan_print(nodes)
|
96
101
|
|
97
102
|
r2 = nodes.map do |x|
|
98
|
-
|
103
|
+
|
99
104
|
r = if x.is_a? Rexle::Element then
|
100
105
|
|
101
|
-
a = x.attributes.to_a.map do |k,v|
|
106
|
+
a = x.attributes.to_a.map do |k,v|
|
102
107
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
103
108
|
end
|
104
109
|
|
105
110
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
106
|
-
|
111
|
+
|
107
112
|
non_self_closing_tags = %w(script textarea iframe div object a)
|
108
113
|
|
109
114
|
if (x.children and x.children.length > 0 \
|
@@ -117,20 +122,20 @@ module XMLhelper
|
|
117
122
|
else
|
118
123
|
out = ["<%s/>" % tag]
|
119
124
|
end
|
120
|
-
|
125
|
+
|
121
126
|
elsif x.is_a? String then x
|
122
|
-
elsif x.is_a? Rexle::CData then x.print
|
123
|
-
elsif x.is_a? Rexle::Comment then x.print
|
124
|
-
|
127
|
+
elsif x.is_a? Rexle::CData then x.print
|
128
|
+
elsif x.is_a? Rexle::Comment then x.print
|
129
|
+
|
125
130
|
end
|
126
131
|
|
127
132
|
r
|
128
133
|
end
|
129
|
-
|
134
|
+
|
130
135
|
r2
|
131
136
|
|
132
137
|
end
|
133
|
-
|
138
|
+
|
134
139
|
def scan_to_a(nodes)
|
135
140
|
|
136
141
|
nodes.inject([]) do |r,x|
|
@@ -139,6 +144,10 @@ module XMLhelper
|
|
139
144
|
|
140
145
|
a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]
|
141
146
|
|
147
|
+
if x.cdatas.any? then
|
148
|
+
a.concat x.cdatas.map {|cdata| ['![', {}, cdata] }
|
149
|
+
end
|
150
|
+
|
142
151
|
(a.concat(scan_to_a(x.children))) if x.children.length > 1
|
143
152
|
r << a
|
144
153
|
elsif x.is_a? String then
|
@@ -149,7 +158,7 @@ module XMLhelper
|
|
149
158
|
end
|
150
159
|
|
151
160
|
end
|
152
|
-
|
161
|
+
|
153
162
|
|
154
163
|
|
155
164
|
def pretty_print(nodes, indent='0')
|
@@ -162,13 +171,13 @@ module XMLhelper
|
|
162
171
|
|
163
172
|
if x.is_a? Rexle::Element then
|
164
173
|
|
165
|
-
a = x.attributes.to_a.map do |k,v|
|
174
|
+
a = x.attributes.to_a.map do |k,v|
|
166
175
|
"%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
|
167
176
|
end
|
168
177
|
a ||= []
|
169
178
|
|
170
179
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
171
|
-
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
180
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
172
181
|
|
173
182
|
if (x.value and x.value.length > 0) \
|
174
183
|
or (x.children and x.children.length > 0 \
|
@@ -176,13 +185,13 @@ module XMLhelper
|
|
176
185
|
x.name == 'script' or x.name == 'textarea' or \
|
177
186
|
x.name == 'iframe' then
|
178
187
|
|
179
|
-
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
188
|
+
ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ?
|
180
189
|
("\n" + ' ' * indent) : ''
|
181
|
-
|
190
|
+
|
182
191
|
out = ["%s<%s>%s" % [start, tag, ind1]]
|
183
|
-
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
192
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
184
193
|
ind2 = (ind1 and ind1.length > 0) ? ("\n" + ' ' * (indent - 1)) : ''
|
185
|
-
out << "%s</%s>" % [ind2, x.name]
|
194
|
+
out << "%s</%s>" % [ind2, x.name]
|
186
195
|
else
|
187
196
|
|
188
197
|
out = ["%s<%s/>" % [start, tag]]
|
@@ -190,8 +199,8 @@ module XMLhelper
|
|
190
199
|
|
191
200
|
|
192
201
|
elsif x.is_a? String then x.sub(/^[\n\s]+$/,'')
|
193
|
-
elsif x.is_a? Rexle::CData then x.print
|
194
|
-
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
202
|
+
elsif x.is_a? Rexle::CData then x.print
|
203
|
+
elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print
|
195
204
|
|
196
205
|
end
|
197
206
|
end
|
@@ -206,17 +215,17 @@ class Rexle
|
|
206
215
|
|
207
216
|
attr_reader :prefixes, :doctype
|
208
217
|
attr_accessor :instructions
|
209
|
-
|
218
|
+
|
210
219
|
def initialize(x=nil, rexle: self, debug: false)
|
211
220
|
|
212
221
|
@rexle, @debug = rexle, debug
|
213
222
|
$debug = @debug
|
214
|
-
|
223
|
+
|
215
224
|
puts 'inside Rexle'.debug if debug
|
216
|
-
|
225
|
+
|
217
226
|
super()
|
218
227
|
|
219
|
-
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
228
|
+
@instructions = [["xml", "version='1.0' encoding='UTF-8'"]]
|
220
229
|
@doctype = :xml
|
221
230
|
|
222
231
|
# what type of input is it? Is it a string, array
|
@@ -226,11 +235,11 @@ class Rexle
|
|
226
235
|
Array: proc {|x| x},
|
227
236
|
RexleParser: ->(x){ parse_rexle(x)}
|
228
237
|
}
|
229
|
-
|
238
|
+
|
230
239
|
doc_node = ['doc', Attributes.new]
|
231
|
-
|
240
|
+
|
232
241
|
@a = procs[x.class.to_s.to_sym].call(x)
|
233
|
-
|
242
|
+
|
234
243
|
@doc = scan_element(*(doc_node << @a))
|
235
244
|
|
236
245
|
# fetch the namespaces
|
@@ -241,78 +250,78 @@ class Rexle
|
|
241
250
|
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
242
251
|
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
243
252
|
end
|
244
|
-
|
253
|
+
|
245
254
|
end
|
246
255
|
|
247
256
|
end
|
248
|
-
|
257
|
+
|
249
258
|
def clone()
|
250
259
|
Rexle.new self.to_a
|
251
260
|
end
|
252
|
-
|
261
|
+
|
253
262
|
def at_css(selector)
|
254
263
|
@doc.root.element RexleCSS.new(selector).to_xpath
|
255
|
-
end
|
256
|
-
|
264
|
+
end
|
265
|
+
|
257
266
|
def css(selector)
|
258
|
-
|
259
|
-
a = selector.split(',').flat_map do |x|
|
267
|
+
|
268
|
+
a = selector.split(',').flat_map do |x|
|
260
269
|
@doc.root.xpath RexleCSS.new(x).to_xpath
|
261
270
|
end
|
262
|
-
|
271
|
+
|
263
272
|
return a
|
264
273
|
end
|
265
|
-
|
274
|
+
|
266
275
|
def xpath(path, &blk)
|
267
276
|
@doc.xpath(path, &blk)
|
268
|
-
end
|
277
|
+
end
|
269
278
|
|
270
279
|
class Element
|
271
280
|
include XMLhelper
|
272
|
-
|
281
|
+
|
273
282
|
class Value < String
|
274
|
-
|
283
|
+
|
275
284
|
def initialize(value)
|
276
285
|
super(value)
|
277
286
|
end
|
278
|
-
|
287
|
+
|
279
288
|
def <(val2)
|
280
289
|
self.to_f < val2.to_f
|
281
|
-
end
|
282
|
-
|
290
|
+
end
|
291
|
+
|
283
292
|
def >(val2)
|
284
293
|
self.to_f > val2.to_f
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
288
297
|
class Attribute
|
289
|
-
|
298
|
+
|
290
299
|
attr_reader :value
|
291
|
-
|
300
|
+
|
292
301
|
def initialize(value)
|
293
302
|
@value = value
|
294
303
|
end
|
295
|
-
|
304
|
+
|
296
305
|
def to_f()
|
297
306
|
@value.to_f
|
298
|
-
end
|
299
|
-
|
307
|
+
end
|
308
|
+
|
300
309
|
def to_i()
|
301
310
|
@value.to_i
|
302
311
|
end
|
303
|
-
|
312
|
+
|
304
313
|
alias to_s value
|
305
|
-
|
314
|
+
|
306
315
|
end
|
307
|
-
|
316
|
+
|
308
317
|
attr_accessor :name, :value, :parent
|
309
318
|
attr_reader :child_elements, :doc_id, :instructions
|
310
|
-
|
319
|
+
|
311
320
|
alias original_clone clone
|
312
321
|
|
313
322
|
def initialize(name=nil, value: nil, attributes: Attributes.new, rexle: self)
|
314
323
|
|
315
|
-
@rexle = rexle
|
324
|
+
@rexle = rexle
|
316
325
|
super()
|
317
326
|
|
318
327
|
@name, @attributes = name.to_s, attributes
|
@@ -322,11 +331,11 @@ class Rexle
|
|
322
331
|
self.add_text value if value
|
323
332
|
|
324
333
|
end
|
325
|
-
|
334
|
+
|
326
335
|
def backtrack(use_attributes: true)
|
327
336
|
BacktrackXPath.new(self, use_attributes: use_attributes)
|
328
337
|
end
|
329
|
-
|
338
|
+
|
330
339
|
def cdata?()
|
331
340
|
self.is_a? CData
|
332
341
|
end
|
@@ -334,8 +343,8 @@ class Rexle
|
|
334
343
|
def contains(raw_args)
|
335
344
|
|
336
345
|
path, raw_val = raw_args.split(',',2)
|
337
|
-
val = raw_val.strip[/^["']?.*["']?$/]
|
338
|
-
|
346
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
347
|
+
|
339
348
|
anode = query_xpath(path)
|
340
349
|
|
341
350
|
return [false] if anode.nil? or anode.empty?
|
@@ -344,21 +353,21 @@ class Rexle
|
|
344
353
|
r = [a.grep(/#{val.sub(/^["'](.*)["']$/,'\1')}/).length > 0]
|
345
354
|
|
346
355
|
r.any?
|
347
|
-
end
|
348
|
-
|
356
|
+
end
|
357
|
+
|
349
358
|
def count(path)
|
350
359
|
length = query_xpath(path).flatten.compact.length
|
351
360
|
length
|
352
361
|
end
|
353
|
-
|
362
|
+
|
354
363
|
def current()
|
355
364
|
self
|
356
365
|
end
|
357
366
|
|
358
367
|
def at_css(selector)
|
359
368
|
self.root.element RexleCSS.new(selector).to_xpath
|
360
|
-
end
|
361
|
-
|
369
|
+
end
|
370
|
+
|
362
371
|
def css(selector)
|
363
372
|
|
364
373
|
selector.split(',')\
|
@@ -369,76 +378,76 @@ class Rexle
|
|
369
378
|
def lowercase(s)
|
370
379
|
|
371
380
|
end
|
372
|
-
|
373
|
-
def max(path)
|
381
|
+
|
382
|
+
def max(path)
|
374
383
|
a = query_xpath(path).flatten.select{|x| x.is_a? String or x.is_a? Rexle::Element::Attribute}.map(&:to_i)
|
375
|
-
a.max
|
384
|
+
a.max
|
376
385
|
end
|
377
|
-
|
386
|
+
|
378
387
|
def name()
|
379
|
-
|
388
|
+
|
380
389
|
if @rexle and @rexle.respond_to? :prefixes then
|
381
|
-
|
390
|
+
|
382
391
|
if @rexle.prefixes.is_a? Array then
|
383
|
-
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
392
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] }
|
384
393
|
end
|
385
|
-
|
394
|
+
|
386
395
|
prefix ? @name.sub(prefix + ':', '') : @name
|
387
|
-
|
396
|
+
|
388
397
|
else
|
389
398
|
@name
|
390
399
|
end
|
391
|
-
|
400
|
+
|
392
401
|
end
|
393
|
-
|
394
|
-
def next_element()
|
402
|
+
|
403
|
+
def next_element()
|
395
404
|
|
396
405
|
id = self.object_id
|
397
|
-
a = self.parent.elements
|
406
|
+
a = self.parent.elements
|
398
407
|
|
399
408
|
i = a.index {|x| x.object_id == id} + 2
|
400
409
|
a[i] if i < a.length + 1
|
401
|
-
|
410
|
+
|
402
411
|
end
|
403
|
-
|
412
|
+
|
404
413
|
alias next_sibling next_element
|
405
|
-
|
414
|
+
|
406
415
|
def not(bool)
|
407
416
|
|
408
417
|
r = self.xpath(bool).any?
|
409
418
|
|
410
419
|
!r
|
411
420
|
end
|
412
|
-
|
413
|
-
def previous_element()
|
414
|
-
|
421
|
+
|
422
|
+
def previous_element()
|
423
|
+
|
415
424
|
id = self.object_id
|
416
|
-
a = self.parent.elements
|
425
|
+
a = self.parent.elements
|
417
426
|
i = a.index {|x| x.object_id == id}
|
418
427
|
|
419
|
-
a[i] if i > 0
|
428
|
+
a[i] if i > 0
|
420
429
|
|
421
430
|
end
|
422
|
-
|
431
|
+
|
423
432
|
alias previous_sibling previous_element
|
424
|
-
|
433
|
+
|
425
434
|
def xpath(path, rlist=[], &blk)
|
426
|
-
|
435
|
+
|
427
436
|
#@log.debug 'inside xpath ' + path.inspect
|
428
437
|
|
429
438
|
r = filter_xpath(path, rlist=[], &blk)
|
430
439
|
#@log.debug 'after filter_xpath : ' + r.inspect
|
431
|
-
|
440
|
+
|
432
441
|
if r.is_a?(Array) then
|
433
|
-
|
442
|
+
|
434
443
|
Recordset.new(r.compact)
|
435
|
-
|
444
|
+
|
436
445
|
else
|
437
446
|
r
|
438
447
|
end
|
439
|
-
|
448
|
+
|
440
449
|
end
|
441
|
-
|
450
|
+
|
442
451
|
def filter_xpath(raw_path, rlist=[], &blk)
|
443
452
|
#@log.debug 'inside filter_xpath : ' + raw_path.inspect
|
444
453
|
path = String.new raw_path
|
@@ -448,25 +457,25 @@ class Rexle
|
|
448
457
|
#fn_match = path.match(/^(\w+)\(/)
|
449
458
|
#@log.debug 'fn_match : ' + fn_match.inspect
|
450
459
|
end_fn_match = path.slice!(/\[\w+\(\)\]$/)
|
451
|
-
|
460
|
+
|
452
461
|
if end_fn_match then
|
453
|
-
|
462
|
+
|
454
463
|
m = end_fn_match[1..-4]
|
455
464
|
#@log.debug 'its a function'
|
456
465
|
[method(m.to_sym).call(xpath path)]
|
457
|
-
|
458
|
-
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
466
|
+
|
467
|
+
elsif (fn_match and fn_match.captures.first[/^(attribute|@)/])
|
459
468
|
|
460
469
|
procs = {
|
461
470
|
|
462
|
-
Array: proc { |x|
|
463
|
-
if block_given? then
|
464
|
-
x.flatten(1)
|
471
|
+
Array: proc { |x|
|
472
|
+
if block_given? then
|
473
|
+
x.flatten(1)
|
465
474
|
else
|
466
475
|
rs = x.flatten
|
467
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
476
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
468
477
|
end
|
469
|
-
},
|
478
|
+
},
|
470
479
|
String: proc {|x| x},
|
471
480
|
Hash: proc {|x| x},
|
472
481
|
TrueClass: proc{|x| x},
|
@@ -475,25 +484,25 @@ class Rexle
|
|
475
484
|
}
|
476
485
|
bucket = []
|
477
486
|
raw_results = path.split('|').map do |xp|
|
478
|
-
query_xpath(xp.strip, bucket, &blk)
|
487
|
+
query_xpath(xp.strip, bucket, &blk)
|
479
488
|
end
|
480
|
-
|
489
|
+
|
481
490
|
results = raw_results
|
482
491
|
|
483
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
484
|
-
|
492
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
493
|
+
|
485
494
|
elsif fn_match.nil?
|
486
|
-
|
495
|
+
|
487
496
|
procs = {
|
488
497
|
|
489
|
-
Array: proc { |x|
|
490
|
-
if block_given? then
|
491
|
-
x.flatten(1)
|
498
|
+
Array: proc { |x|
|
499
|
+
if block_given? then
|
500
|
+
x.flatten(1)
|
492
501
|
else
|
493
502
|
rs = x.flatten
|
494
|
-
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
503
|
+
rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id)
|
495
504
|
end
|
496
|
-
},
|
505
|
+
},
|
497
506
|
String: proc {|x| x},
|
498
507
|
Hash: proc {|x| x},
|
499
508
|
TrueClass: proc{|x| x},
|
@@ -501,23 +510,23 @@ class Rexle
|
|
501
510
|
:"Rexle::Element" => proc {|x| [x]}
|
502
511
|
}
|
503
512
|
bucket = []
|
504
|
-
|
513
|
+
|
505
514
|
results = if path =~ /[\[]|\(/ then
|
506
515
|
|
507
516
|
raw_results = path.split(/\|/).map do |xp|
|
508
|
-
query_xpath(xp.strip, bucket, &blk)
|
517
|
+
query_xpath(xp.strip, bucket, &blk)
|
509
518
|
end
|
510
519
|
|
511
520
|
raw_results.flatten.index(true) ? [true] : []
|
512
|
-
|
521
|
+
|
513
522
|
else
|
514
523
|
raw_results = path.split(/ *(?:\||\band\b) */).map do |xp|
|
515
|
-
query_xpath(xp.strip, bucket, &blk)
|
516
|
-
end
|
524
|
+
query_xpath(xp.strip, bucket, &blk)
|
525
|
+
end
|
517
526
|
|
518
527
|
if path =~ / and / then
|
519
528
|
|
520
|
-
raw_results.flatten.select {|x| x == true or x == false}
|
529
|
+
raw_results.flatten.select {|x| x == true or x == false}
|
521
530
|
|
522
531
|
else
|
523
532
|
raw_results.flatten.index(true) ? [true] : []
|
@@ -526,12 +535,12 @@ class Rexle
|
|
526
535
|
|
527
536
|
return results if !path[/[><]/] and results.any?
|
528
537
|
results = raw_results # .flatten.select {|x| x}
|
529
|
-
|
530
|
-
procs[results.class.to_s.to_sym].call(results) if results
|
538
|
+
|
539
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
531
540
|
|
532
541
|
else
|
533
|
-
|
534
|
-
m, xpath_value, index = fn_match.captures
|
542
|
+
|
543
|
+
m, xpath_value, index = fn_match.captures
|
535
544
|
|
536
545
|
if m == 'text' then
|
537
546
|
a = texts()
|
@@ -545,14 +554,14 @@ class Rexle
|
|
545
554
|
|
546
555
|
end
|
547
556
|
|
548
|
-
end
|
549
|
-
|
557
|
+
end
|
558
|
+
|
550
559
|
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
551
560
|
|
552
561
|
#@log.debug 'query_xpath : ' + raw_xpath_value.inspect
|
553
562
|
#@log.debug '++ ' + self.xml.inspect
|
554
563
|
|
555
|
-
flag_func = false
|
564
|
+
flag_func = false
|
556
565
|
|
557
566
|
xpath_value = raw_xpath_value.sub('child::','./')
|
558
567
|
|
@@ -576,38 +585,38 @@ class Rexle
|
|
576
585
|
|
577
586
|
raw_condition = raw_condition ? raw_condition + '/' + remaining_path \
|
578
587
|
: remaining_path
|
579
|
-
remaining_path = ''
|
588
|
+
remaining_path = ''
|
580
589
|
end
|
581
590
|
|
582
|
-
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
591
|
+
r = raw_path[/^([^\/]+)(?=\/\/)/,1]
|
583
592
|
|
584
593
|
if r then
|
585
594
|
a_path = raw_path.split(/(?=\/\/)/,2)
|
586
595
|
else
|
587
596
|
a_path = raw_path.split('/',2)
|
588
597
|
end
|
589
|
-
|
598
|
+
|
590
599
|
condition = raw_condition if a_path.length <= 1 #and not raw_condition[/^\[\w+\(.*\)\]$/]
|
591
600
|
|
592
601
|
if raw_path[0,2] == '//' then
|
593
602
|
s = ''
|
594
|
-
elsif raw_path == 'text()'
|
603
|
+
elsif raw_path == 'text()'
|
595
604
|
|
596
605
|
a_path.shift
|
597
606
|
#return @value
|
598
607
|
return self.texts
|
599
608
|
else
|
600
609
|
|
601
|
-
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
602
|
-
|
610
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
611
|
+
|
603
612
|
return @attributes if attribute == '*'
|
604
|
-
|
613
|
+
|
605
614
|
if attribute and @attributes and \
|
606
615
|
@attributes.has_key?(attribute.to_sym) then
|
607
616
|
return [Attribute.new(@attributes[attribute.to_sym])]
|
608
617
|
end
|
609
618
|
s = a_path.shift
|
610
|
-
end
|
619
|
+
end
|
611
620
|
|
612
621
|
# isolate the xpath to return just the path to the current element
|
613
622
|
|
@@ -623,7 +632,7 @@ class Rexle
|
|
623
632
|
if element_name and element_name[/^\d/] then
|
624
633
|
element_name = nil
|
625
634
|
end
|
626
|
-
|
635
|
+
|
627
636
|
condition = raw_xpath_value if element_name.nil?
|
628
637
|
|
629
638
|
else
|
@@ -659,11 +668,11 @@ class Rexle
|
|
659
668
|
r4 = attribute_search(attr_search, self, self.attributes)
|
660
669
|
return r4
|
661
670
|
end
|
662
|
-
|
663
|
-
|
671
|
+
|
672
|
+
|
664
673
|
return_elements = []
|
665
|
-
|
666
|
-
|
674
|
+
|
675
|
+
|
667
676
|
|
668
677
|
if raw_path[0,2] == '//' then
|
669
678
|
|
@@ -678,25 +687,25 @@ class Rexle
|
|
678
687
|
|
679
688
|
if element_name.is_a? String then
|
680
689
|
ename, raw_selector = (element_name.split('::',2)).reverse
|
681
|
-
|
690
|
+
|
682
691
|
selector = case raw_selector
|
683
692
|
when 'following-sibling' then 1
|
684
693
|
when 'preceding-sibling' then -1
|
685
694
|
end
|
686
|
-
|
695
|
+
|
687
696
|
else
|
688
697
|
ename = element_name
|
689
|
-
end
|
698
|
+
end
|
690
699
|
|
691
700
|
if ename == '..' then
|
692
|
-
|
701
|
+
|
693
702
|
remaining_xpath = raw_path[/\.\.\/(.*)/,1]
|
694
703
|
# select the parent element
|
695
704
|
|
696
705
|
r2 = self.parent.xpath(remaining_xpath)
|
697
706
|
|
698
707
|
return r2
|
699
|
-
|
708
|
+
|
700
709
|
elsif ename == '.'
|
701
710
|
|
702
711
|
remaining_xpath = raw_path[1..-1]
|
@@ -705,41 +714,41 @@ class Rexle
|
|
705
714
|
if xpath_value.length > 0 and xpath_value =~ /\[/ then
|
706
715
|
|
707
716
|
r = eval(attr_search.sub(/^h/,'self.attributes'))
|
708
|
-
return self if r
|
717
|
+
return self if r
|
709
718
|
|
710
719
|
else
|
711
720
|
return self
|
712
721
|
end
|
713
722
|
else
|
714
723
|
return self.xpath(remaining_xpath)
|
715
|
-
end
|
724
|
+
end
|
716
725
|
|
717
726
|
elsif element_name.nil?
|
718
727
|
puts ('attr_search: ' + attr_search.inspect).debug if $debug
|
719
|
-
return eval attr_search
|
728
|
+
return eval attr_search
|
720
729
|
else
|
721
730
|
|
722
731
|
if raw_selector.nil? and ename != element_part then
|
723
732
|
|
724
733
|
right_cond = element_part[/#{ename}(.*)/,1]
|
725
734
|
|
726
|
-
end
|
735
|
+
end
|
727
736
|
|
728
737
|
return_elements = @child_elements.map.with_index.select do |x, i|
|
729
738
|
|
730
739
|
next unless x.is_a? Rexle::Element
|
731
740
|
|
732
741
|
#x.name == ename or (ename == '*')
|
733
|
-
|
742
|
+
|
734
743
|
r10 = ((x.name == ename) or (ename == '*'))
|
735
744
|
|
736
|
-
|
745
|
+
|
737
746
|
|
738
747
|
end
|
739
|
-
|
748
|
+
|
740
749
|
if right_cond then
|
741
|
-
|
742
|
-
|
750
|
+
|
751
|
+
|
743
752
|
r12 = return_elements.map do |x, i|
|
744
753
|
|
745
754
|
if x.text then
|
@@ -749,15 +758,15 @@ class Rexle
|
|
749
758
|
else
|
750
759
|
false
|
751
760
|
end
|
752
|
-
|
761
|
+
|
753
762
|
end
|
754
|
-
|
763
|
+
|
755
764
|
return r12
|
756
|
-
|
757
|
-
end
|
758
|
-
|
765
|
+
|
766
|
+
end
|
767
|
+
|
759
768
|
if selector then
|
760
|
-
ne = return_elements.inject([]) do |r,x|
|
769
|
+
ne = return_elements.inject([]) do |r,x|
|
761
770
|
i = x.last + selector
|
762
771
|
if i >= 0 then
|
763
772
|
r << i
|
@@ -768,17 +777,17 @@ class Rexle
|
|
768
777
|
|
769
778
|
return_elements = ne.map {|x| [@child_elements[x], x] if x}
|
770
779
|
end
|
771
|
-
|
780
|
+
|
772
781
|
|
773
782
|
end
|
774
783
|
end
|
775
|
-
|
784
|
+
|
776
785
|
if return_elements.length > 0 then
|
777
786
|
|
778
787
|
if (a_path + [remaining_path]).join.empty? then
|
779
788
|
|
780
789
|
# pass in a block to the filter if it is function contains?
|
781
|
-
rlist = return_elements.map.with_index do |x,i|
|
790
|
+
rlist = return_elements.map.with_index do |x,i|
|
782
791
|
r5 = filter(x, i+1, attr_search, &blk)
|
783
792
|
|
784
793
|
r5
|
@@ -788,9 +797,9 @@ class Rexle
|
|
788
797
|
|
789
798
|
else
|
790
799
|
|
791
|
-
rlist << return_elements.map.with_index do |x,i|
|
800
|
+
rlist << return_elements.map.with_index do |x,i|
|
792
801
|
|
793
|
-
rtn_element = filter(x, i+1, attr_search) do |e|
|
802
|
+
rtn_element = filter(x, i+1, attr_search) do |e|
|
794
803
|
|
795
804
|
r = e.xpath(a_path.join('/') + raw_condition.to_s \
|
796
805
|
+ remaining_path, &blk)
|
@@ -836,7 +845,7 @@ class Rexle
|
|
836
845
|
rlist,&blk)
|
837
846
|
end
|
838
847
|
end
|
839
|
-
|
848
|
+
|
840
849
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) \
|
841
850
|
or (rlist.length > 1 and rlist[0].is_a? Array)
|
842
851
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
@@ -853,21 +862,21 @@ class Rexle
|
|
853
862
|
elsif item.is_a? Rexle::CData then
|
854
863
|
@child_elements << item
|
855
864
|
elsif item.is_a? Rexle::Comment then
|
856
|
-
@child_elements << item
|
865
|
+
@child_elements << item
|
857
866
|
elsif item.is_a? Rexle::Element then
|
858
867
|
|
859
868
|
@child_elements << item
|
860
869
|
# add a reference from this element (the parent) to the child
|
861
870
|
item.parent = self
|
862
|
-
item
|
863
|
-
|
871
|
+
item
|
872
|
+
|
864
873
|
elsif item.is_a? Rexle then
|
865
874
|
self.add_element(item.root)
|
866
875
|
end
|
867
876
|
|
868
|
-
end
|
877
|
+
end
|
869
878
|
|
870
|
-
def add(item)
|
879
|
+
def add(item)
|
871
880
|
|
872
881
|
if item.is_a? Rexle::Element then
|
873
882
|
|
@@ -893,13 +902,13 @@ class Rexle
|
|
893
902
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
894
903
|
else
|
895
904
|
self.xml
|
896
|
-
end
|
905
|
+
end
|
897
906
|
end
|
898
907
|
|
899
908
|
def add_attribute(*x)
|
900
|
-
|
909
|
+
|
901
910
|
proc_hash = lambda {|x| Hash[*x]}
|
902
|
-
|
911
|
+
|
903
912
|
procs = {
|
904
913
|
Hash: lambda {|x| x[0] || {}},
|
905
914
|
String: proc_hash,
|
@@ -917,57 +926,57 @@ class Rexle
|
|
917
926
|
def add_text(s)
|
918
927
|
|
919
928
|
self.child_elements << s
|
920
|
-
self
|
929
|
+
self
|
921
930
|
end
|
922
|
-
|
923
|
-
def attribute(key)
|
924
|
-
|
931
|
+
|
932
|
+
def attribute(key)
|
933
|
+
|
925
934
|
key = key.to_sym if key.is_a? String
|
926
|
-
|
935
|
+
|
927
936
|
if @attributes[key].is_a? String then
|
928
|
-
@attributes[key].gsub('<','<').gsub('>','>')
|
937
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
929
938
|
else
|
930
939
|
@attributes[key]
|
931
940
|
end
|
932
|
-
end
|
933
|
-
|
934
|
-
def attributes() @attributes end
|
935
|
-
|
941
|
+
end
|
942
|
+
|
943
|
+
def attributes() @attributes end
|
944
|
+
|
936
945
|
def cdatas()
|
937
946
|
self.children.inject([]){|r,x| x.is_a?(Rexle::CData) ? r << x.to_s : r }
|
938
947
|
end
|
939
|
-
|
948
|
+
|
940
949
|
def children()
|
941
950
|
|
942
951
|
r = @child_elements
|
943
|
-
|
952
|
+
|
944
953
|
def r.is_an_empty_string?()
|
945
954
|
self.length == 1 and self.first == ''
|
946
|
-
end
|
947
|
-
|
955
|
+
end
|
956
|
+
|
948
957
|
return r
|
949
|
-
end
|
958
|
+
end
|
950
959
|
|
951
960
|
def children=(a) @child_elements = a if a.is_a? Array end
|
952
|
-
|
961
|
+
|
953
962
|
def deep_clone() Rexle.new(self.xml).root end
|
954
|
-
|
955
|
-
def clone()
|
956
|
-
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
963
|
+
|
964
|
+
def clone()
|
965
|
+
Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes)))
|
957
966
|
end
|
958
|
-
|
967
|
+
|
959
968
|
def delete(obj=nil)
|
960
969
|
|
961
970
|
if obj then
|
962
971
|
|
963
972
|
if obj.is_a? String then
|
964
|
-
|
973
|
+
|
965
974
|
self.xpath(obj).each {|e| e.delete; e = nil}
|
966
|
-
|
975
|
+
|
967
976
|
else
|
968
977
|
|
969
978
|
i = @child_elements.index(obj)
|
970
|
-
[@child_elements].each{|x| x.delete_at i} if i
|
979
|
+
[@child_elements].each{|x| x.delete_at i} if i
|
971
980
|
end
|
972
981
|
else
|
973
982
|
|
@@ -989,50 +998,50 @@ class Rexle
|
|
989
998
|
String: proc {|x| @child_elements[x]}
|
990
999
|
}
|
991
1000
|
|
992
|
-
procs[s.class.to_s.to_sym].call(s)
|
1001
|
+
procs[s.class.to_s.to_sym].call(s)
|
993
1002
|
end
|
994
1003
|
|
995
1004
|
def doc_root() @rexle.root end
|
996
1005
|
def each(&blk) self.children.each(&blk) end
|
997
1006
|
def each_recursive(&blk) recursive_scan(self.children,&blk) end
|
998
1007
|
alias traverse each_recursive
|
999
|
-
def has_elements?() !self.elements.empty? end
|
1000
|
-
def insert_after(node) insert(node, 1) end
|
1008
|
+
def has_elements?() !self.elements.empty? end
|
1009
|
+
def insert_after(node) insert(node, 1) end
|
1001
1010
|
def insert_before(node) insert(node) end
|
1002
1011
|
def last(a) a.last end
|
1003
|
-
def map(&blk) self.children.map(&blk) end
|
1004
|
-
|
1012
|
+
def map(&blk) self.children.map(&blk) end
|
1013
|
+
|
1005
1014
|
def plaintext()
|
1006
|
-
xml().gsub(/<\/?[^>]+>/,'').gsub('&
|
1015
|
+
CGI.unescapeHTML xml().gsub(/<\/?[^>]+>/,'').gsub(' ',' ')\
|
1007
1016
|
.gsub(/\n\s+/,' ')
|
1008
1017
|
end
|
1009
|
-
|
1010
|
-
def root() self end
|
1018
|
+
|
1019
|
+
def root() self end
|
1011
1020
|
|
1012
1021
|
def text(s='')
|
1013
|
-
|
1014
|
-
return self.value if s.empty?
|
1015
|
-
|
1022
|
+
|
1023
|
+
return self.value if s.empty?
|
1024
|
+
|
1016
1025
|
e = self.element(s)
|
1017
1026
|
return e if e.is_a? String
|
1018
|
-
|
1027
|
+
|
1019
1028
|
e.value if e
|
1020
1029
|
end
|
1021
|
-
|
1030
|
+
|
1022
1031
|
def texts()
|
1023
1032
|
|
1024
1033
|
r = @child_elements.select do |x|
|
1025
1034
|
x.is_a? String or x.is_a? Rexle::CData
|
1026
1035
|
end
|
1027
|
-
|
1036
|
+
|
1028
1037
|
r.map do |x|
|
1029
1038
|
def x.unescape()
|
1030
1039
|
s = self.to_s.clone
|
1031
1040
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1032
1041
|
s
|
1033
|
-
end
|
1042
|
+
end
|
1034
1043
|
end
|
1035
|
-
|
1044
|
+
|
1036
1045
|
return r
|
1037
1046
|
end
|
1038
1047
|
|
@@ -1040,20 +1049,20 @@ class Rexle
|
|
1040
1049
|
|
1041
1050
|
r = @child_elements.first
|
1042
1051
|
return nil unless r.is_a? String
|
1043
|
-
|
1052
|
+
|
1044
1053
|
def r.unescape()
|
1045
1054
|
s = self.clone
|
1046
1055
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1047
1056
|
s
|
1048
|
-
end
|
1049
|
-
|
1057
|
+
end
|
1058
|
+
|
1050
1059
|
return r
|
1051
1060
|
end
|
1052
|
-
|
1061
|
+
|
1053
1062
|
def value=(raw_s)
|
1054
1063
|
|
1055
1064
|
val = Value.new(raw_s.to_s.clone)
|
1056
|
-
|
1065
|
+
|
1057
1066
|
escape_chars = %w(& & ' ' < < > >).each_slice(2).to_a
|
1058
1067
|
escape_chars.each{|x| val.gsub!(*x)}
|
1059
1068
|
|
@@ -1063,9 +1072,15 @@ class Rexle
|
|
1063
1072
|
end
|
1064
1073
|
|
1065
1074
|
alias text= value=
|
1066
|
-
|
1075
|
+
|
1067
1076
|
def to_a()
|
1077
|
+
|
1068
1078
|
e = [String.new(self.name), Hash.new(self.attributes)]
|
1079
|
+
|
1080
|
+
if self.cdatas.any? then
|
1081
|
+
e.concat self.cdatas.map {|cdata| ['![', {}, cdata] }
|
1082
|
+
end
|
1083
|
+
|
1069
1084
|
[*e, *scan_to_a(self.children)]
|
1070
1085
|
end
|
1071
1086
|
|
@@ -1075,10 +1090,10 @@ class Rexle
|
|
1075
1090
|
Hash: lambda {|x|
|
1076
1091
|
o = {pretty: false}.merge(x)
|
1077
1092
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
1078
|
-
|
1093
|
+
|
1079
1094
|
method(msg).call(self.children)
|
1080
1095
|
},
|
1081
|
-
String: lambda {|x|
|
1096
|
+
String: lambda {|x|
|
1082
1097
|
r = self.element(x)
|
1083
1098
|
r ? r.xml : ''
|
1084
1099
|
}
|
@@ -1091,18 +1106,18 @@ class Rexle
|
|
1091
1106
|
end
|
1092
1107
|
|
1093
1108
|
def prepend(item)
|
1094
|
-
|
1109
|
+
|
1095
1110
|
@child_elements.unshift item
|
1096
|
-
|
1111
|
+
|
1097
1112
|
# add a reference from this element (the parent) to the child
|
1098
1113
|
item.parent = self
|
1099
|
-
item
|
1100
|
-
end
|
1101
|
-
|
1114
|
+
item
|
1115
|
+
end
|
1116
|
+
|
1102
1117
|
alias to_s xml
|
1103
1118
|
|
1104
1119
|
private
|
1105
|
-
|
1120
|
+
|
1106
1121
|
def insert(node,offset=0)
|
1107
1122
|
|
1108
1123
|
i = parent.child_elements.index(self)
|
@@ -1114,7 +1129,7 @@ class Rexle
|
|
1114
1129
|
node.instance_variable_set(:@doc_id, self.doc_root.object_id)
|
1115
1130
|
|
1116
1131
|
self
|
1117
|
-
end
|
1132
|
+
end
|
1118
1133
|
|
1119
1134
|
def format_condition(condition)
|
1120
1135
|
|
@@ -1139,21 +1154,21 @@ class Rexle
|
|
1139
1154
|
elsif raw_items[0][/^not\(/]
|
1140
1155
|
|
1141
1156
|
return raw_items[0]
|
1142
|
-
|
1157
|
+
|
1143
1158
|
else
|
1144
1159
|
|
1145
1160
|
andor_items = raw_items.map.with_index\
|
1146
1161
|
.select{|x,i| x[/\band\b|\bor\b/]}\
|
1147
1162
|
.map{|x| [x.last, x.last + 1]}.flatten
|
1148
|
-
|
1163
|
+
|
1149
1164
|
indices = [0] + andor_items + [raw_items.length]
|
1150
1165
|
|
1151
1166
|
if raw_items[0][0] == '@' then
|
1152
1167
|
|
1153
1168
|
raw_items.each{|x| x.gsub!(/^@/,'')}
|
1154
|
-
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1169
|
+
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1155
1170
|
|
1156
|
-
items = cons_items.map do |x|
|
1171
|
+
items = cons_items.map do |x|
|
1157
1172
|
|
1158
1173
|
if x.length >= 3 then
|
1159
1174
|
if x[0] != 'class' then
|
@@ -1172,17 +1187,17 @@ class Rexle
|
|
1172
1187
|
else
|
1173
1188
|
|
1174
1189
|
cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)}
|
1175
|
-
|
1176
|
-
items = cons_items.map do |x|
|
1190
|
+
|
1191
|
+
items = cons_items.map do |x|
|
1177
1192
|
|
1178
1193
|
if x.length >= 3 then
|
1179
1194
|
|
1180
1195
|
x[1] = '==' if x[1] == '='
|
1181
1196
|
if x[0] != '.' then
|
1182
1197
|
if x[0][/\//] then
|
1183
|
-
|
1198
|
+
|
1184
1199
|
path, value = x.values_at(0,-1)
|
1185
|
-
|
1200
|
+
|
1186
1201
|
if x[0][/@\w+$/] then
|
1187
1202
|
"r = e.xpath('#{path}').first; r and r.value == #{value}"
|
1188
1203
|
else
|
@@ -1206,16 +1221,16 @@ class Rexle
|
|
1206
1221
|
|
1207
1222
|
|
1208
1223
|
end
|
1209
|
-
|
1224
|
+
|
1210
1225
|
def scan_match(node, path)
|
1211
|
-
|
1226
|
+
|
1212
1227
|
if path == '//' then
|
1213
|
-
return [node, node.text,
|
1228
|
+
return [node, node.text,
|
1214
1229
|
node.elements.map {|x| scan_match x, path}]
|
1215
1230
|
end
|
1216
|
-
|
1231
|
+
|
1217
1232
|
r = []
|
1218
|
-
xpath2 = path[2..-1]
|
1233
|
+
xpath2 = path[2..-1]
|
1219
1234
|
#jr150316 xpath2.sub!(/^\*\//,'')
|
1220
1235
|
#jr150316xpath2.sub!(/^\*/,self.name)
|
1221
1236
|
#jr150316xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
@@ -1238,17 +1253,17 @@ class Rexle
|
|
1238
1253
|
end
|
1239
1254
|
a
|
1240
1255
|
end
|
1241
|
-
|
1242
|
-
|
1256
|
+
|
1257
|
+
|
1243
1258
|
def filter(raw_element, i, attr_search, &blk)
|
1244
|
-
|
1259
|
+
|
1245
1260
|
x, index = raw_element
|
1246
1261
|
e = @child_elements[index]
|
1247
1262
|
|
1248
1263
|
return unless e.is_a? Rexle::Element
|
1249
1264
|
name, value = e.name, e.value if e.is_a? Rexle::Element
|
1250
1265
|
|
1251
|
-
h = x.attributes # <-- fetch the attributes
|
1266
|
+
h = x.attributes # <-- fetch the attributes
|
1252
1267
|
|
1253
1268
|
if attr_search then
|
1254
1269
|
|
@@ -1264,21 +1279,21 @@ class Rexle
|
|
1264
1279
|
def attribute_search(attr_search, e, h, i=nil, &blk)
|
1265
1280
|
|
1266
1281
|
r2 = if attr_search.is_a? Integer then
|
1267
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
1282
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
1268
1283
|
elsif attr_search[/i\s(?:<|>|==|%)\s\d+/] and eval(attr_search) then
|
1269
|
-
block_given? ? blk.call(e) : e
|
1284
|
+
block_given? ? blk.call(e) : e
|
1270
1285
|
elsif h and !h.empty? and attr_search[/^h\[/] and eval(attr_search) then
|
1271
1286
|
block_given? ? blk.call(e) : e
|
1272
|
-
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1287
|
+
elsif attr_search[/^\(name ==/] and e.child_elements.select {|x|
|
1273
1288
|
next unless x.is_a? Rexle::Element
|
1274
1289
|
name, attributes, value = x.name, x.attributes, x.value.to_s
|
1275
1290
|
b = eval(attr_search)
|
1276
1291
|
b}.length > 0
|
1277
1292
|
|
1278
1293
|
block_given? ? blk.call(e) : e
|
1279
|
-
|
1280
|
-
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1281
|
-
block_given? ? blk.call(e) : e
|
1294
|
+
|
1295
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
1296
|
+
block_given? ? blk.call(e) : e
|
1282
1297
|
elsif attr_search[/^e\.value/]
|
1283
1298
|
|
1284
1299
|
v = attr_search[/[^\s]+$/]
|
@@ -1292,92 +1307,92 @@ class Rexle
|
|
1292
1307
|
block_given? ? blk.call(e) : e
|
1293
1308
|
elsif attr_search[/^\w*\(/] and e.element(attr_search)
|
1294
1309
|
block_given? ? blk.call(e) : e
|
1295
|
-
end
|
1310
|
+
end
|
1296
1311
|
|
1297
1312
|
r2
|
1298
1313
|
end
|
1299
|
-
|
1314
|
+
|
1300
1315
|
def recursive_scan(nodes, &blk)
|
1301
|
-
|
1316
|
+
|
1302
1317
|
nodes.each do |x|
|
1303
1318
|
|
1304
1319
|
if x.is_a? Rexle::Element then
|
1305
1320
|
blk.call(x)
|
1306
1321
|
recursive_scan(x.children, &blk) if x.children.length > 0
|
1307
|
-
end
|
1322
|
+
end
|
1308
1323
|
end
|
1309
1324
|
end
|
1310
|
-
|
1325
|
+
|
1311
1326
|
end # -- end of element --
|
1312
|
-
|
1327
|
+
|
1313
1328
|
|
1314
1329
|
class CData
|
1315
|
-
|
1330
|
+
|
1316
1331
|
def initialize(val='')
|
1317
1332
|
@value = val
|
1318
1333
|
end
|
1319
|
-
|
1334
|
+
|
1320
1335
|
def clone()
|
1321
1336
|
CData.new(@value)
|
1322
1337
|
end
|
1323
|
-
|
1338
|
+
|
1324
1339
|
def inspect()
|
1325
1340
|
@value.inspect
|
1326
1341
|
end
|
1327
|
-
|
1342
|
+
|
1328
1343
|
def print()
|
1329
1344
|
"<![CDATA[%s]]>" % @value
|
1330
1345
|
end
|
1331
|
-
|
1346
|
+
|
1332
1347
|
def to_s()
|
1333
1348
|
@value
|
1334
1349
|
end
|
1335
|
-
|
1350
|
+
|
1336
1351
|
def unescape()
|
1337
1352
|
s = @value.clone
|
1338
1353
|
%w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)}
|
1339
1354
|
s
|
1340
|
-
end
|
1341
|
-
|
1355
|
+
end
|
1356
|
+
|
1342
1357
|
end
|
1343
|
-
|
1344
|
-
|
1358
|
+
|
1359
|
+
|
1345
1360
|
class Comment
|
1346
|
-
|
1361
|
+
|
1347
1362
|
|
1348
1363
|
def initialize(val='')
|
1349
1364
|
@e = Element.new('_').add_text val
|
1350
1365
|
@value = val
|
1351
1366
|
end
|
1352
|
-
|
1367
|
+
|
1353
1368
|
def add_element(e2)
|
1354
1369
|
@e.add e2
|
1355
1370
|
end
|
1356
|
-
|
1371
|
+
|
1357
1372
|
def add_text(t)
|
1358
1373
|
@e.add_text t
|
1359
1374
|
end
|
1360
|
-
|
1375
|
+
|
1361
1376
|
def inspect()
|
1362
1377
|
@value
|
1363
1378
|
end
|
1364
|
-
|
1379
|
+
|
1365
1380
|
def print()
|
1366
1381
|
"<!--%s-->" % @e.root.xpath('//./text()').join
|
1367
1382
|
end
|
1368
|
-
|
1383
|
+
|
1369
1384
|
def texts()
|
1370
1385
|
@e.texts
|
1371
1386
|
end
|
1372
|
-
|
1387
|
+
|
1373
1388
|
def to_s()
|
1374
1389
|
@value
|
1375
1390
|
end
|
1376
1391
|
end
|
1377
|
-
|
1392
|
+
|
1378
1393
|
class Elements
|
1379
1394
|
include Enumerable
|
1380
|
-
|
1395
|
+
|
1381
1396
|
def initialize(elements=[])
|
1382
1397
|
super()
|
1383
1398
|
@elements = elements
|
@@ -1389,63 +1404,63 @@ class Rexle
|
|
1389
1404
|
i = raw_i - 1
|
1390
1405
|
@elements[i]
|
1391
1406
|
end
|
1392
|
-
|
1407
|
+
|
1393
1408
|
def each(&blk) @elements.each(&blk) end
|
1394
1409
|
def empty?() @elements.empty? end
|
1395
|
-
|
1410
|
+
|
1396
1411
|
def index(e=nil, &blk)
|
1397
|
-
|
1412
|
+
|
1398
1413
|
if block_given? then
|
1399
1414
|
@elements.index(&blk)
|
1400
1415
|
else
|
1401
1416
|
@elements.index e
|
1402
1417
|
end
|
1403
1418
|
end
|
1404
|
-
|
1419
|
+
|
1405
1420
|
def last() @elements.last end
|
1406
1421
|
def length() @elements.length end
|
1407
1422
|
def to_a() @elements end
|
1408
|
-
|
1423
|
+
|
1409
1424
|
end # -- end of elements --
|
1410
1425
|
|
1411
1426
|
|
1412
1427
|
def parse(x=nil)
|
1413
|
-
|
1428
|
+
|
1414
1429
|
a = []
|
1415
|
-
|
1430
|
+
|
1416
1431
|
if x then
|
1417
1432
|
procs = {
|
1418
1433
|
String: proc {|x| parse_string(x)},
|
1419
1434
|
Array: proc {|x| x}
|
1420
1435
|
}
|
1421
1436
|
a = procs[x.class.to_s.to_sym].call(x)
|
1422
|
-
else
|
1437
|
+
else
|
1423
1438
|
a = yield
|
1424
1439
|
end
|
1425
|
-
|
1440
|
+
|
1426
1441
|
doc_node = ['doc',Attributes.new]
|
1427
1442
|
@a = procs[x.class.to_s.to_sym].call(x)
|
1428
1443
|
@doc = scan_element(*(doc_node << @a))
|
1429
|
-
|
1444
|
+
|
1430
1445
|
self
|
1431
1446
|
end
|
1432
1447
|
|
1433
1448
|
def add_attribute(x) @doc.attribute(x) end
|
1434
1449
|
def attribute(key) @doc.attribute(key) end
|
1435
1450
|
def attributes() @doc.attributes end
|
1436
|
-
|
1437
|
-
def add_element(element)
|
1438
1451
|
|
1439
|
-
|
1452
|
+
def add_element(element)
|
1453
|
+
|
1454
|
+
if @doc then
|
1440
1455
|
raise 'attempted adding second root element to document' if @doc.root
|
1441
|
-
@doc.root.add_element(element)
|
1456
|
+
@doc.root.add_element(element)
|
1442
1457
|
else
|
1443
|
-
doc_node = ['doc', Attributes.new, element.to_a]
|
1444
|
-
@doc = scan_element(*doc_node)
|
1458
|
+
doc_node = ['doc', Attributes.new, element.to_a]
|
1459
|
+
@doc = scan_element(*doc_node)
|
1445
1460
|
end
|
1446
1461
|
element
|
1447
1462
|
end
|
1448
|
-
|
1463
|
+
|
1449
1464
|
def add_text(s) end
|
1450
1465
|
|
1451
1466
|
alias add add_element
|
@@ -1455,26 +1470,26 @@ class Rexle
|
|
1455
1470
|
@doc.xpath(xpath).each {|e| e.delete; e = nil }
|
1456
1471
|
|
1457
1472
|
end
|
1458
|
-
|
1473
|
+
|
1459
1474
|
alias remove delete
|
1460
1475
|
|
1461
|
-
def element(xpath) self.xpath(xpath).first end
|
1476
|
+
def element(xpath) self.xpath(xpath).first end
|
1462
1477
|
def elements(s=nil) @doc.elements(s) end
|
1463
1478
|
def name() @doc.root.name end
|
1464
1479
|
def to_a() @a end
|
1465
|
-
|
1466
|
-
def to_s(options={})
|
1480
|
+
|
1481
|
+
def to_s(options={})
|
1467
1482
|
return '<UNDEFINED/>' unless @doc
|
1468
|
-
self.xml options
|
1483
|
+
self.xml options
|
1469
1484
|
end
|
1470
|
-
|
1485
|
+
|
1471
1486
|
def text(xpath) @doc.text(xpath) end
|
1472
|
-
def root()
|
1473
|
-
@doc.elements.first
|
1487
|
+
def root()
|
1488
|
+
@doc.elements.first
|
1474
1489
|
end
|
1475
1490
|
|
1476
|
-
def write(f)
|
1477
|
-
f.write xml
|
1491
|
+
def write(f)
|
1492
|
+
f.write xml
|
1478
1493
|
end
|
1479
1494
|
|
1480
1495
|
def xml(options={})
|
@@ -1487,7 +1502,7 @@ class Rexle
|
|
1487
1502
|
|
1488
1503
|
if o[:declaration] == true then
|
1489
1504
|
|
1490
|
-
unless @instructions.assoc 'xml' then
|
1505
|
+
unless @instructions and @instructions.assoc 'xml' then
|
1491
1506
|
@instructions.unshift ["xml","version='1.0' encoding='UTF-8'"]
|
1492
1507
|
end
|
1493
1508
|
end
|
@@ -1503,14 +1518,14 @@ class Rexle
|
|
1503
1518
|
private
|
1504
1519
|
|
1505
1520
|
def parse_rexle(x)
|
1506
|
-
|
1521
|
+
|
1507
1522
|
rp = RexleParser.new(x)
|
1508
1523
|
a = rp.to_a
|
1509
1524
|
|
1510
1525
|
@instructions = rp.instructions
|
1511
|
-
return a
|
1526
|
+
return a
|
1512
1527
|
end
|
1513
|
-
|
1528
|
+
|
1514
1529
|
def parse_string(x)
|
1515
1530
|
|
1516
1531
|
# check if the XML string is a dynarex document
|
@@ -1526,50 +1541,50 @@ class Rexle
|
|
1526
1541
|
'polyrex' => proc {|x| parse_rexle(x)}
|
1527
1542
|
}
|
1528
1543
|
other_parser = procs[recordx_type]
|
1529
|
-
|
1544
|
+
|
1530
1545
|
if other_parser then
|
1531
|
-
|
1546
|
+
|
1532
1547
|
begin
|
1533
1548
|
other_parser.call(x)
|
1534
1549
|
rescue
|
1535
1550
|
parse_rexle x
|
1536
1551
|
end
|
1537
|
-
|
1552
|
+
|
1538
1553
|
else
|
1539
|
-
|
1554
|
+
|
1540
1555
|
parse_rexle x
|
1541
|
-
|
1542
|
-
end
|
1543
|
-
|
1556
|
+
|
1557
|
+
end
|
1558
|
+
|
1544
1559
|
else
|
1545
1560
|
|
1546
1561
|
parse_rexle x
|
1547
|
-
|
1562
|
+
|
1548
1563
|
end
|
1549
1564
|
else
|
1550
1565
|
|
1551
1566
|
parse_rexle x
|
1552
|
-
|
1567
|
+
|
1553
1568
|
end
|
1554
1569
|
|
1555
1570
|
end
|
1556
|
-
|
1571
|
+
|
1557
1572
|
def scan_element(name=nil, attributes=nil, *children)
|
1558
|
-
|
1573
|
+
|
1559
1574
|
return unless name
|
1560
|
-
|
1575
|
+
|
1561
1576
|
return Rexle::CData.new(children.first) if name == '!['
|
1562
1577
|
return Rexle::Comment.new(children.first) if name == '!-'
|
1563
1578
|
|
1564
|
-
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1579
|
+
element = Rexle::Element.new(name, attributes: attributes, rexle: @rexle)
|
1565
1580
|
|
1566
1581
|
if children then
|
1567
1582
|
|
1568
1583
|
children.each do |x4|
|
1569
|
-
|
1584
|
+
|
1570
1585
|
|
1571
1586
|
if x4.is_a? Array then
|
1572
|
-
element.add_element scan_element(*x4)
|
1587
|
+
element.add_element scan_element(*x4)
|
1573
1588
|
elsif x4.is_a? String then
|
1574
1589
|
|
1575
1590
|
e = if x4.is_a? String then
|
@@ -1578,22 +1593,22 @@ class Rexle
|
|
1578
1593
|
elsif x4.name == '![' then
|
1579
1594
|
|
1580
1595
|
Rexle::CData.new(x4)
|
1581
|
-
|
1596
|
+
|
1582
1597
|
elsif x4.name == '!-' then
|
1583
1598
|
|
1584
1599
|
Rexle::Comment.new(x4)
|
1585
|
-
|
1600
|
+
|
1586
1601
|
end
|
1587
1602
|
|
1588
1603
|
element.add_element e
|
1589
1604
|
end
|
1590
1605
|
end
|
1591
1606
|
end
|
1592
|
-
|
1607
|
+
|
1593
1608
|
return element
|
1594
1609
|
end
|
1595
1610
|
|
1596
|
-
|
1611
|
+
|
1597
1612
|
# scan a rexml doc
|
1598
1613
|
#
|
1599
1614
|
def scan_doc(node)
|
@@ -1601,28 +1616,28 @@ class Rexle
|
|
1601
1616
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
1602
1617
|
[node.name, node.text.to_s, attributes, *children]
|
1603
1618
|
end
|
1604
|
-
|
1619
|
+
|
1605
1620
|
class Recordset < Array
|
1606
1621
|
|
1607
1622
|
def initialize(a)
|
1608
1623
|
super(a)
|
1609
1624
|
end
|
1610
|
-
|
1625
|
+
|
1611
1626
|
def to_doc(root: 'root')
|
1612
|
-
|
1627
|
+
|
1613
1628
|
recordset = self.map(&:to_a)
|
1614
1629
|
Rexle.new([root,{}, *recordset])
|
1615
|
-
|
1630
|
+
|
1616
1631
|
end
|
1617
|
-
|
1632
|
+
|
1618
1633
|
def xpath(xpath)
|
1619
1634
|
self.to_doc.root.xpath(xpath)
|
1620
1635
|
end
|
1621
|
-
|
1636
|
+
|
1622
1637
|
def element(xpath)
|
1623
1638
|
self.to_doc.root.element(xpath)
|
1624
1639
|
end
|
1625
1640
|
|
1626
|
-
end
|
1627
|
-
|
1641
|
+
end
|
1642
|
+
|
1628
1643
|
end
|