rexle 0.9.4 → 0.9.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rexle.rb +418 -157
- metadata +30 -27
data/lib/rexle.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/ruby
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
# file: rexle.rb
|
4
4
|
|
@@ -6,20 +6,48 @@ require 'rexml/document'
|
|
6
6
|
require 'rexleparser'
|
7
7
|
require 'dynarex-parser'
|
8
8
|
require 'polyrex-parser'
|
9
|
+
require 'cgi'
|
9
10
|
include REXML
|
10
11
|
|
12
|
+
# modifications:
|
13
|
+
# 20-Oct-2012: feature: added Rexle::Element#texts which is the equivalent
|
14
|
+
# of REXML::Element#texts
|
15
|
+
# 10-Sep-2012: bug fix: Removed code from method pretty_print in order to
|
16
|
+
# get the XML displayed properly
|
17
|
+
# 23-Aug-2012: feature: implemented xpath function contains()
|
18
|
+
# 17-Aug-2012: bug fix: pretty print now ignores text containing empty space
|
19
|
+
# 16-Aug-2012: the current element's text (if its not empty) is now returned
|
20
|
+
# from its children method
|
21
|
+
# 15-Aug-2012: feature: xpath containing child:: now supported
|
22
|
+
# 13-Aug-2012: bug fix: xpath can now handle the name() function
|
23
|
+
# 11-Aug-2012: bug fix: separated the max() method from 1 line into 3
|
24
|
+
# and that fixed it
|
25
|
+
# 08-Aug-2012: feature: added Element#insert_before and Element#insert_after
|
26
|
+
# 19-Jul-2012: Changed children to elements where appropriate
|
27
|
+
# 15-Jul-2012: bug fix: self.root.value is no longer appended
|
28
|
+
# to the body if there are no child elements
|
29
|
+
# 19-Jun-2012: a bug fix for .//*[@class]
|
30
|
+
# 17-Jun-2012: a couple of new xpath things are supported '.' and '|'
|
31
|
+
# 15-Apr-2012: bug fix: New element names are typecast as string
|
32
|
+
# 16-Mar-2012: bug fix: Element names which contain a colon can now be selected
|
33
|
+
# in the xpath.
|
34
|
+
# 22-Feb-2012: bug resolution: Deactivated the PolyrexParser; using RexleParser instead
|
35
|
+
# 14-Jan-2012: Implemented Rexle::Elements#each
|
36
|
+
# 21-Dec-2011: Bug fix: xpath modified to allow querying from the actual
|
37
|
+
# root rather than the 1st child element from the root
|
38
|
+
|
11
39
|
module XMLhelper
|
12
40
|
|
13
41
|
def doc_print(children)
|
14
|
-
|
15
|
-
body = children.
|
42
|
+
|
43
|
+
body = (children.nil? or children.empty? or children.is_an_empty_string? ) ? '' : scan_print(children).join
|
16
44
|
a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
17
45
|
"<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), body, self.root.name]
|
18
46
|
end
|
19
47
|
|
20
48
|
def doc_pretty_print(children)
|
21
49
|
|
22
|
-
body =
|
50
|
+
body = pretty_print(children,2).join
|
23
51
|
a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
24
52
|
ind = "\n "
|
25
53
|
"<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), ind, body, "\n", self.root.name]
|
@@ -28,16 +56,27 @@ module XMLhelper
|
|
28
56
|
def scan_print(nodes)
|
29
57
|
|
30
58
|
nodes.map do |x|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
59
|
+
|
60
|
+
if x.is_a? Rexle::Element then
|
61
|
+
if x.name.chr != '!' then
|
62
|
+
a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
63
|
+
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
64
|
+
|
65
|
+
if x.value.length > 0 or (x.children.length > 0 and not x.children.is_an_empty_string?) then
|
66
|
+
out = ["<%s>" % tag]
|
67
|
+
#out << x.value unless x.value.nil? || x.value.empty?
|
68
|
+
out << scan_print(x.children)
|
69
|
+
out << "</%s>" % x.name
|
70
|
+
else
|
71
|
+
out = ["<%s/>" % tag]
|
72
|
+
end
|
73
|
+
elsif x.name == '!-' then
|
74
|
+
"<!--%s-->" % x.value
|
75
|
+
else
|
76
|
+
"<![CDATA[%s]]>" % x.value
|
77
|
+
end
|
78
|
+
elsif x.is_a? String then
|
79
|
+
x
|
41
80
|
end
|
42
81
|
end
|
43
82
|
|
@@ -45,21 +84,30 @@ module XMLhelper
|
|
45
84
|
|
46
85
|
def pretty_print(nodes, indent='0')
|
47
86
|
indent = indent.to_i
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
87
|
+
|
88
|
+
nodes.select(){|x| x.is_a? Rexle::Element or x.strip.length > 0}
|
89
|
+
.map.with_index do |x, i|
|
90
|
+
|
91
|
+
if x.is_a? Rexle::Element then
|
92
|
+
unless x.name == '![' then
|
93
|
+
#return ["<%s/>" % x.name] if x.value = ''
|
94
|
+
a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
95
|
+
a ||= []
|
96
|
+
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
97
|
+
|
98
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
99
|
+
ind1 = x.children.grep(Rexle::Element).length > 0 ?
|
100
|
+
("\n" + ' ' * indent) : ''
|
101
|
+
out = ["%s<%s>%s" % [start, tag, ind1]]
|
102
|
+
|
103
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
104
|
+
ind2 = ind1.length > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
105
|
+
out << "%s</%s>" % [ind2, x.name]
|
106
|
+
else
|
107
|
+
"<![CDATA[%s]]>" % x.value
|
108
|
+
end
|
109
|
+
elsif x.is_a? String then
|
110
|
+
x.sub(/^[\n\s]+$/,'')
|
63
111
|
end
|
64
112
|
end
|
65
113
|
|
@@ -70,108 +118,224 @@ end
|
|
70
118
|
class Rexle
|
71
119
|
include XMLhelper
|
72
120
|
|
121
|
+
attr_reader :prefixes
|
122
|
+
|
123
|
+
def self.version()
|
124
|
+
'0.9.xx'
|
125
|
+
end
|
126
|
+
|
73
127
|
def initialize(x=nil)
|
74
128
|
super()
|
75
129
|
|
130
|
+
# what type of input is it? Is it a string, array, or REXML doc?
|
76
131
|
if x then
|
77
132
|
procs = {
|
78
|
-
|
79
|
-
|
80
|
-
|
133
|
+
String: proc {|x| parse_string(x)},
|
134
|
+
Array: proc {|x| x},
|
135
|
+
:"REXML::Document" => proc {|x| scan_doc x.root}
|
81
136
|
}
|
137
|
+
|
138
|
+
doc_node = ['doc','',{}]
|
139
|
+
|
82
140
|
|
83
|
-
a = procs[x.class.to_s.to_sym].call(x)
|
84
|
-
@doc = scan_element(*a)
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
def xpath(path, &blk)
|
90
|
-
|
91
|
-
# is it a function
|
92
|
-
fn_match = path.match(/^(\w+)\(([^\)]+)\)$/)
|
141
|
+
@a = procs[x.class.to_s.to_sym].call(x)
|
142
|
+
@doc = scan_element(*(doc_node << @a))
|
143
|
+
|
144
|
+
# fetch the namespaces
|
145
|
+
@prefixes = []
|
146
|
+
if @doc.root.attributes then
|
93
147
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
Array: proc {|x| block_given? ? x : x.flatten },
|
98
|
-
String: proc {|x| x},
|
99
|
-
:"Rexle::Element" => proc {|x| [x]}
|
100
|
-
}
|
101
|
-
bucket = []
|
102
|
-
result = @doc.xpath(path, bucket, &blk)
|
148
|
+
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
149
|
+
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
150
|
+
end
|
103
151
|
|
104
|
-
procs[result.class.to_s.to_sym].call(result)
|
105
|
-
|
106
|
-
else
|
107
|
-
m, xpath_value = fn_match.captures
|
108
|
-
method(m.to_sym).call(xpath_value)
|
109
152
|
end
|
110
153
|
|
111
154
|
end
|
155
|
+
|
156
|
+
def xpath(path, &blk)
|
157
|
+
@doc.xpath(path, &blk)
|
158
|
+
end
|
112
159
|
|
113
160
|
class Element
|
114
161
|
include XMLhelper
|
115
162
|
|
116
163
|
attr_accessor :name, :value, :parent
|
117
|
-
attr_reader :child_lookup
|
164
|
+
attr_reader :child_lookup, :child_elements
|
165
|
+
|
166
|
+
alias original_clone clone
|
118
167
|
|
119
|
-
def initialize(name=nil, value='', attributes={})
|
168
|
+
def initialize(name=nil, value='', attributes={}, rexle=nil)
|
169
|
+
@rexle = rexle
|
120
170
|
super()
|
121
|
-
@name, @value, @attributes = name, value, attributes
|
171
|
+
@name, @value, @attributes = name.to_s, value, attributes
|
122
172
|
raise "Element name must not be blank" unless name
|
123
173
|
@child_elements = []
|
124
174
|
@child_lookup = []
|
125
175
|
end
|
126
176
|
|
127
|
-
def
|
177
|
+
def contains(raw_args)
|
178
|
+
path, raw_val = raw_args.split(',',2)
|
179
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
180
|
+
|
181
|
+
anode = query_xpath(path)
|
182
|
+
return unless anode
|
183
|
+
a = scan_contents(anode.first)
|
184
|
+
|
185
|
+
[a.grep(/#{val}/).length > 0]
|
186
|
+
end
|
187
|
+
|
188
|
+
def count(path)
|
189
|
+
length = query_xpath(path).flatten.compact.length
|
190
|
+
length
|
191
|
+
end
|
192
|
+
|
193
|
+
def max(path)
|
194
|
+
a = query_xpath(path).flatten.compact.map(&:to_i)
|
195
|
+
a.max
|
196
|
+
end
|
197
|
+
|
198
|
+
def name()
|
199
|
+
if @rexle then
|
200
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] } if @rexle.prefixes.is_a? Array
|
201
|
+
prefix ? @name.sub(prefix + ':', '') : @name
|
202
|
+
else
|
203
|
+
@name
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def xpath(path, rlist=[], &blk)
|
208
|
+
r = filter_xpath(path, rlist=[], &blk)
|
209
|
+
r.is_a?(Array) ? r.compact : r
|
210
|
+
end
|
211
|
+
|
212
|
+
def filter_xpath(path, rlist=[], &blk)
|
128
213
|
|
129
|
-
|
130
|
-
|
131
|
-
|
214
|
+
# is it a function
|
215
|
+
fn_match = path.match(/^(\w+)\(["']?([^\)]*)["']?\)$/)
|
216
|
+
|
217
|
+
# Array: proc {|x| x.flatten.compact},
|
218
|
+
if (fn_match and fn_match.captures.first[/^(attribute|@)/]) or fn_match.nil? then
|
219
|
+
procs = {
|
220
|
+
Array: proc {|x| block_given? ? x : x.flatten.uniq },
|
221
|
+
String: proc {|x| x},
|
222
|
+
Hash: proc {|x| x},
|
223
|
+
TrueClass: proc{|x| x},
|
224
|
+
FalseClass: proc{|x| x},
|
225
|
+
:"Rexle::Element" => proc {|x| [x]}
|
226
|
+
}
|
227
|
+
bucket = []
|
228
|
+
raw_results = path.split('|').map do |xp|
|
229
|
+
query_xpath(xp, bucket, &blk)
|
230
|
+
end
|
231
|
+
|
232
|
+
#results = raw_results.inject(&:+)
|
233
|
+
results = raw_results.last
|
234
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
235
|
+
|
236
|
+
else
|
237
|
+
m, xpath_value = fn_match.captures
|
238
|
+
xpath_value.empty? ? method(m.to_sym).call : method(m.to_sym).call(xpath_value)
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
244
|
+
|
245
|
+
#remove any pre'fixes
|
246
|
+
#@rexle.prefixes.each {|x| xpath_value.sub!(x + ':','') }
|
247
|
+
flag_func = false
|
248
|
+
|
249
|
+
xpath_value = raw_xpath_value.sub('child::','./')
|
250
|
+
#xpath_value.sub!(/\.\/(?=[\/])/,'')
|
251
|
+
|
252
|
+
if xpath_value[/^[\w\/]+\s*=.*/] then
|
253
|
+
flag_func = true
|
254
|
+
|
255
|
+
xpath_value.sub!(/^\w+\s*=.*/,'.[\0]')
|
256
|
+
xpath_value.sub!(/\/([\w]+\s*=.*)/,'[\1]')
|
132
257
|
|
258
|
+
#result = self.element xpath_value
|
259
|
+
#return [(result.is_a?(Rexle::Element) ? true : false)]
|
260
|
+
end
|
261
|
+
|
262
|
+
#xpath_value.sub!(/^attribute::/,'*/attribute::')
|
263
|
+
raw_path, raw_condition = xpath_value.sub(/^\.?\/(?!\/)/,'')\
|
264
|
+
.match(/([^\[]+)(\[[^\]]+\])?/).captures
|
265
|
+
|
266
|
+
remaining_path = ($').to_s
|
267
|
+
|
268
|
+
r = raw_path[/([^\/]+)(?=\/\/)/,1]
|
269
|
+
if r then
|
270
|
+
a_path = raw_path.split(/(?=\/\/)/,2)
|
271
|
+
else
|
272
|
+
a_path = raw_path.split('/',2)
|
273
|
+
end
|
274
|
+
|
133
275
|
condition = raw_condition if a_path.length <= 1
|
134
276
|
|
135
277
|
if raw_path[0,2] == '//' then
|
136
|
-
s =
|
137
|
-
|
138
|
-
elsif raw_path == 'text()' then
|
278
|
+
s = ''
|
279
|
+
elsif raw_path == 'text()'
|
139
280
|
a_path.shift
|
140
281
|
return @value
|
141
282
|
else
|
142
|
-
|
143
|
-
|
144
|
-
|
283
|
+
|
284
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
285
|
+
|
286
|
+
return @attributes if attribute == '*'
|
287
|
+
return [@attributes[attribute.to_sym]] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
|
145
288
|
s = a_path.shift
|
146
289
|
end
|
147
290
|
|
148
291
|
# isolate the xpath to return just the path to the current element
|
149
|
-
elmnt_path = s[/^([\w\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
150
|
-
element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
|
151
292
|
|
293
|
+
elmnt_path = s[/^([\w:\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
294
|
+
element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
|
295
|
+
|
152
296
|
if element_part then
|
153
297
|
unless element_part[/^@/] then
|
154
|
-
element_name = element_part
|
298
|
+
element_name = element_part[/^[\w:\*\.]+/]
|
155
299
|
else
|
156
|
-
|
300
|
+
|
301
|
+
condition = xpath_value[/^\[/] ? xpath_value : element_part
|
157
302
|
element_name = nil
|
158
303
|
end
|
159
304
|
|
160
305
|
end
|
161
306
|
|
307
|
+
#element_name ||= '*'
|
162
308
|
raw_condition = '' if condition
|
309
|
+
attr_search = format_condition(condition) if condition and condition.length > 0
|
310
|
+
|
311
|
+
attr_search2 = xpath_value[/^\[(.*)\]$/,1]
|
312
|
+
if attr_search2 then
|
313
|
+
r4 = attribute_search(attr_search, self, self.attributes)
|
314
|
+
return r4
|
315
|
+
end
|
316
|
+
|
317
|
+
return_elements = []
|
163
318
|
|
164
|
-
|
319
|
+
if raw_path[0,2] == '//' then
|
165
320
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
321
|
+
regex = /\[(\d+)\]/
|
322
|
+
n = xpath_value[regex,1]
|
323
|
+
xpath_value.slice!(regex)
|
324
|
+
|
325
|
+
rs = scan_match(self, xpath_value).flatten.compact
|
326
|
+
return n ? rs[n.to_i-1] : rs
|
327
|
+
|
328
|
+
elsif (raw_path == '.' or raw_path == self.name) and attr_search.nil? then
|
329
|
+
return [self]
|
330
|
+
else
|
331
|
+
|
332
|
+
return_elements = @child_lookup.map.with_index.select do |x|
|
333
|
+
(x[0][0] == element_name || element_name == '.') or \
|
334
|
+
(element_name == '*' && x[0].is_a?(Array))
|
335
|
+
end
|
170
336
|
|
171
|
-
return_elements = @child_lookup.map.with_index.select do |x|
|
172
|
-
x[0][0] == element_name or element_name == '*'
|
173
337
|
end
|
174
|
-
|
338
|
+
|
175
339
|
if return_elements.length > 0 then
|
176
340
|
|
177
341
|
if (a_path + [remaining_path]).join.empty? then
|
@@ -180,10 +344,13 @@ class Rexle
|
|
180
344
|
else
|
181
345
|
|
182
346
|
rlist << return_elements.map.with_index do |x,i|
|
347
|
+
|
183
348
|
rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(a_path.join('/') + raw_condition.to_s + remaining_path, &blk); (r || e) }
|
184
349
|
next if rtn_element.nil? or (rtn_element.is_a? Array and rtn_element.empty?)
|
185
350
|
|
186
|
-
if rtn_element.is_a?
|
351
|
+
if rtn_element.is_a? Hash then
|
352
|
+
rtn_element
|
353
|
+
elsif rtn_element.is_a? Array then
|
187
354
|
rtn_element
|
188
355
|
elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) and not(rtn_element[0].is_a? String))
|
189
356
|
rtn_element
|
@@ -192,15 +359,17 @@ class Rexle
|
|
192
359
|
end
|
193
360
|
end
|
194
361
|
#
|
362
|
+
|
195
363
|
rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
|
196
364
|
|
197
365
|
end
|
366
|
+
|
198
367
|
rlist.compact! if rlist.is_a? Array
|
199
368
|
|
200
369
|
else
|
201
370
|
|
202
371
|
# strip off the 1st element from the XPath
|
203
|
-
new_xpath = xpath_value[
|
372
|
+
new_xpath = xpath_value[/^\/\/[\w:]+\/(.*)/,1]
|
204
373
|
|
205
374
|
if new_xpath then
|
206
375
|
self.xpath(new_xpath + raw_condition.to_s + remaining_path, rlist,&blk)
|
@@ -209,19 +378,31 @@ class Rexle
|
|
209
378
|
|
210
379
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) or (rlist.length > 1 and rlist[0].is_a? Array)
|
211
380
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
381
|
+
rlist = (rlist.length > 0 ? true : false) if flag_func == true
|
212
382
|
rlist
|
213
383
|
end
|
214
384
|
|
215
385
|
def add_element(item)
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
386
|
+
if item.is_a? Rexle::Element then
|
387
|
+
@child_lookup << [item.name, item.attributes, item.value]
|
388
|
+
@child_elements << item
|
389
|
+
# add a reference from this element (the parent) to the child
|
390
|
+
item.parent = self
|
391
|
+
item
|
392
|
+
elsif item.is_a? String then
|
393
|
+
@child_lookup << item
|
394
|
+
@child_elements << item
|
395
|
+
elsif item.is_a? Rexle then
|
396
|
+
self.add_element(item.root)
|
397
|
+
end
|
221
398
|
end
|
222
399
|
|
223
400
|
def inspect()
|
401
|
+
if self.xml.length > 30 then
|
224
402
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
403
|
+
else
|
404
|
+
self.xml
|
405
|
+
end
|
225
406
|
end
|
226
407
|
|
227
408
|
alias add add_element
|
@@ -240,11 +421,29 @@ class Rexle
|
|
240
421
|
end
|
241
422
|
|
242
423
|
def add_text(s) @value = s; self end
|
243
|
-
|
424
|
+
|
425
|
+
def attribute(key)
|
426
|
+
key = key.to_sym if key.is_a? String
|
427
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
428
|
+
end
|
429
|
+
|
244
430
|
def attributes() @attributes end
|
245
|
-
|
246
|
-
def children
|
247
|
-
|
431
|
+
|
432
|
+
def children()
|
433
|
+
return unless @value
|
434
|
+
r = (@value.empty? ? [] : [@value]) + @child_elements
|
435
|
+
def r.is_an_empty_string?()
|
436
|
+
self.length == 1 and self.first == ''
|
437
|
+
end
|
438
|
+
|
439
|
+
return r
|
440
|
+
end
|
441
|
+
|
442
|
+
def children=(a) @child_elements = a end
|
443
|
+
|
444
|
+
def deep_clone() Rexle.new(self.xml).root end
|
445
|
+
def clone() Element.new(@name, @value, @attributes) end
|
446
|
+
|
248
447
|
def delete(obj=nil)
|
249
448
|
if obj then
|
250
449
|
i = @child_elements.index(obj)
|
@@ -254,17 +453,30 @@ class Rexle
|
|
254
453
|
end
|
255
454
|
end
|
256
455
|
|
257
|
-
def element(s)
|
456
|
+
def element(s)
|
457
|
+
r = self.xpath(s)
|
458
|
+
r.is_a?(Array) ? r.first : r
|
459
|
+
end
|
258
460
|
|
259
461
|
def elements(s=nil)
|
260
462
|
procs = {
|
261
|
-
NilClass: proc {Elements.new(@child_elements)},
|
463
|
+
NilClass: proc {Elements.new(@child_elements.select{|x| x.is_a? Rexle::Element })},
|
262
464
|
String: proc {|x| @child_elements[x]}
|
263
465
|
}
|
466
|
+
|
264
467
|
procs[s.class.to_s.to_sym].call(s)
|
265
468
|
end
|
266
469
|
|
267
|
-
def
|
470
|
+
def doc_root() @rexle.root end
|
471
|
+
def each(&blk)
|
472
|
+
@child_elements.each(&blk) #unless @child_elements.empty?
|
473
|
+
end
|
474
|
+
def has_elements?() !self.elements.empty? end
|
475
|
+
|
476
|
+
def insert_after(node) insert(node, 1) end
|
477
|
+
def insert_before(node) insert(node) end
|
478
|
+
|
479
|
+
def root() self end #@rexle.root end
|
268
480
|
|
269
481
|
def text(s='')
|
270
482
|
|
@@ -274,6 +486,7 @@ class Rexle
|
|
274
486
|
e = self.element(s)
|
275
487
|
result = e.value if e
|
276
488
|
end
|
489
|
+
result = CGI.unescape_html result.to_s
|
277
490
|
|
278
491
|
def result.unescape()
|
279
492
|
s = self.clone
|
@@ -283,10 +496,14 @@ class Rexle
|
|
283
496
|
|
284
497
|
result
|
285
498
|
end
|
499
|
+
|
500
|
+
def texts()
|
501
|
+
[@value] + @child_elements.select {|x| x.is_a? String}
|
502
|
+
end
|
286
503
|
|
287
504
|
def value=(raw_s)
|
288
505
|
|
289
|
-
@value = raw_s.to_s.clone
|
506
|
+
@value = String.new(raw_s.to_s.clone)
|
290
507
|
escape_chars = %w(& & < < > >).each_slice(2).to_a
|
291
508
|
escape_chars.each{|x| @value.gsub!(*x)}
|
292
509
|
|
@@ -306,17 +523,27 @@ class Rexle
|
|
306
523
|
method(msg).call(self.children)
|
307
524
|
end
|
308
525
|
|
526
|
+
alias to_s xml
|
309
527
|
|
310
528
|
private
|
529
|
+
|
530
|
+
def insert(node,offset=0)
|
531
|
+
i = parent.child_elements.index(self)
|
532
|
+
return unless i
|
533
|
+
parent.child_elements.insert(i+offset,node)
|
534
|
+
parent.child_lookup.insert(i+offset, [node.name, node.attributes, node.value])
|
535
|
+
self
|
536
|
+
end
|
311
537
|
|
312
538
|
def format_condition(condition)
|
313
|
-
|
314
|
-
raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=<>]+|position\(\)|[@\w
|
539
|
+
|
540
|
+
raw_items = condition[1..-1].scan(/\'[^\']*\'|\"[^\"]*\"|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/&;]+/)
|
315
541
|
|
316
542
|
if raw_items[0][/^\d+$/] then
|
317
543
|
return raw_items[0].to_i
|
318
544
|
elsif raw_items[0] == 'position()' then
|
319
|
-
|
545
|
+
rrr = "i %s %s" % [raw_items[1].gsub('<','<').gsub('>','>'), raw_items[-1]]
|
546
|
+
return rrr
|
320
547
|
else
|
321
548
|
|
322
549
|
andor_items = raw_items.map.with_index.select{|x,i| x[/\band\b|\bor\b/]}.map{|x| [x.last, x.last + 1]}.flatten
|
@@ -331,9 +558,10 @@ class Rexle
|
|
331
558
|
|
332
559
|
if x.length >= 3 then
|
333
560
|
x[1] = '==' if x[1] == '='
|
334
|
-
"h[
|
561
|
+
"h[:'%s'] %s %s" % x
|
335
562
|
else
|
336
|
-
|
563
|
+
|
564
|
+
x.join[/^(and|or)$/] ? x : ("h[:'%s']" % x)
|
337
565
|
end
|
338
566
|
end
|
339
567
|
|
@@ -344,6 +572,7 @@ class Rexle
|
|
344
572
|
items = cons_items.map do |x|
|
345
573
|
|
346
574
|
if x.length >= 3 then
|
575
|
+
|
347
576
|
x[1] = '==' if x[1] == '='
|
348
577
|
if x[0] != '.' then
|
349
578
|
if x[0][/\//] then
|
@@ -351,7 +580,7 @@ class Rexle
|
|
351
580
|
|
352
581
|
"e.xpath('#{path}').first.value == #{value}"
|
353
582
|
else
|
354
|
-
"(name == '%s' and value %s %s)" % [x[0], x[1], x[2]]
|
583
|
+
"(name == '%s' and value %s '%s')" % [x[0], x[1], x[2].sub(/^['"](.*)['"]$/,'\1')]
|
355
584
|
end
|
356
585
|
else
|
357
586
|
"e.value %s %s" % [x[1], x[2]]
|
@@ -360,7 +589,7 @@ class Rexle
|
|
360
589
|
x
|
361
590
|
end
|
362
591
|
end
|
363
|
-
|
592
|
+
|
364
593
|
return items.join(' ')
|
365
594
|
end
|
366
595
|
end
|
@@ -368,59 +597,47 @@ class Rexle
|
|
368
597
|
|
369
598
|
end
|
370
599
|
|
371
|
-
|
600
|
+
|
601
|
+
def scan_match(node, path)
|
372
602
|
|
373
|
-
|
603
|
+
r = []
|
604
|
+
xpath2 = path[2..-1]
|
605
|
+
xpath2.sub!(/^\*\//,'')
|
606
|
+
xpath2.sub!(/^\*/,self.name)
|
607
|
+
xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
608
|
+
|
374
609
|
|
375
|
-
|
610
|
+
r << node.xpath(xpath2)
|
611
|
+
r << node.elements.map {|n| scan_match(n, path) if n.is_a? Rexle::Element}
|
612
|
+
r
|
613
|
+
end
|
376
614
|
|
615
|
+
# used by xpath function contains()
|
616
|
+
#
|
617
|
+
def scan_contents(node)
|
377
618
|
|
378
|
-
|
379
|
-
|
380
|
-
if attr_search then
|
381
|
-
rlist << x if h and eval(attr_search)
|
382
|
-
else
|
383
|
-
rlist << x
|
384
|
-
end
|
385
|
-
end
|
386
|
-
else
|
619
|
+
a = []
|
620
|
+
a << node.text
|
387
621
|
|
388
|
-
|
389
|
-
|
390
|
-
if h and h.has_key? attribute.to_sym then
|
391
|
-
rlist << h[attribute.to_sym]
|
392
|
-
end
|
393
|
-
else
|
394
|
-
rlist << x if h and eval(attr_search)
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
x.xpath('//' + element.to_s + condition.to_s, rlist) unless x.children.empty?
|
622
|
+
node.elements.each do |child|
|
623
|
+
a.concat scan_contents(child)
|
399
624
|
end
|
400
|
-
|
625
|
+
a
|
401
626
|
end
|
402
|
-
|
627
|
+
|
628
|
+
|
403
629
|
def filter(raw_element, i, attr_search, &blk)
|
404
630
|
|
405
631
|
x = raw_element
|
406
632
|
e = @child_elements[x.last]
|
407
|
-
h = x[0][1] # <-- fetch the attributes
|
408
633
|
|
634
|
+
return unless e.is_a? Rexle::Element
|
635
|
+
name, value = e.name, e.value if e.is_a? Rexle::Element
|
636
|
+
|
637
|
+
h = x[0][1] # <-- fetch the attributes
|
638
|
+
|
409
639
|
if attr_search then
|
410
|
-
|
411
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
412
|
-
elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
|
413
|
-
block_given? ? blk.call(e) : e
|
414
|
-
elsif h and attr_search[/^h\[/] and eval(attr_search)
|
415
|
-
block_given? ? blk.call(e) : e
|
416
|
-
elsif attr_search[/^\(name ==/] and \
|
417
|
-
e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
|
418
|
-
block_given? ? blk.call(e) : e
|
419
|
-
elsif attr_search[/^e\.value/] and eval(attr_search)
|
420
|
-
block_given? ? blk.call(e) : e
|
421
|
-
elsif attr_search[/^e\.xpath/] and eval(attr_search)
|
422
|
-
block_given? ? blk.call(e) : e
|
423
|
-
end
|
640
|
+
attribute_search(attr_search,e, h, i, &blk)
|
424
641
|
else
|
425
642
|
|
426
643
|
block_given? ? blk.call(e) : e
|
@@ -428,9 +645,28 @@ class Rexle
|
|
428
645
|
|
429
646
|
end
|
430
647
|
|
648
|
+
def attribute_search(attr_search, e, h, i=nil, &blk)
|
649
|
+
if attr_search.is_a? Fixnum then
|
650
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
651
|
+
elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
|
652
|
+
block_given? ? blk.call(e) : e
|
653
|
+
elsif h and attr_search[/^h\[/] and eval(attr_search)
|
654
|
+
block_given? ? blk.call(e) : e
|
655
|
+
elsif attr_search[/^\(name ==/] and e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
|
656
|
+
block_given? ? blk.call(e) : e
|
657
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
658
|
+
block_given? ? blk.call(e) : e
|
659
|
+
elsif attr_search[/^e\.value/] and eval(attr_search)
|
660
|
+
block_given? ? blk.call(e) : e
|
661
|
+
elsif attr_search[/^e\.xpath/] and eval(attr_search)
|
662
|
+
block_given? ? blk.call(e) : e
|
663
|
+
end
|
664
|
+
end
|
431
665
|
end # -- end of element --
|
432
666
|
|
433
667
|
class Elements
|
668
|
+
include Enumerable
|
669
|
+
|
434
670
|
def initialize(elements=[])
|
435
671
|
super()
|
436
672
|
@elements = elements
|
@@ -439,6 +675,10 @@ class Rexle
|
|
439
675
|
def [](i)
|
440
676
|
@elements[i-1]
|
441
677
|
end
|
678
|
+
|
679
|
+
def each(&blk) @elements.each(&blk) end
|
680
|
+
def to_a() @elements end
|
681
|
+
|
442
682
|
end # -- end of elements --
|
443
683
|
|
444
684
|
|
@@ -448,16 +688,17 @@ class Rexle
|
|
448
688
|
|
449
689
|
if x then
|
450
690
|
procs = {
|
451
|
-
|
452
|
-
|
453
|
-
|
691
|
+
String: proc {|x| parse_string(x)},
|
692
|
+
Array: proc {|x| x},
|
693
|
+
:"REXML::Document" => proc {|x| scan_doc x.root}
|
454
694
|
}
|
455
695
|
a = procs[x.class.to_s.to_sym].call(x)
|
456
696
|
else
|
457
697
|
a = yield
|
458
698
|
end
|
459
|
-
|
460
|
-
@
|
699
|
+
doc_node = ['doc','',{}]
|
700
|
+
@a = procs[x.class.to_s.to_sym].call(x)
|
701
|
+
@doc = scan_element(*(doc_node << @a))
|
461
702
|
self
|
462
703
|
end
|
463
704
|
|
@@ -465,15 +706,22 @@ class Rexle
|
|
465
706
|
def attribute(key) @doc.attribute(key) end
|
466
707
|
def attributes() @doc.attributes end
|
467
708
|
def add_element(element) @doc.root.add_element(element) end
|
709
|
+
def add_text(s) end
|
468
710
|
|
469
711
|
alias add add_element
|
470
712
|
|
471
|
-
def delete(xpath)
|
472
|
-
|
713
|
+
def delete(xpath)
|
714
|
+
e = @doc.element(xpath)
|
715
|
+
e.delete if e
|
716
|
+
end
|
717
|
+
|
718
|
+
def element(xpath) self.xpath(xpath).first end
|
473
719
|
def elements(s=nil) @doc.elements(s) end
|
720
|
+
def name() @doc.root.name end
|
721
|
+
def to_a() @a end
|
474
722
|
def to_s(options={}) self.xml options end
|
475
723
|
def text(xpath) @doc.text(xpath) end
|
476
|
-
def root() @doc end
|
724
|
+
def root() @doc.elements.first end
|
477
725
|
|
478
726
|
def write(f)
|
479
727
|
f.write xml
|
@@ -482,9 +730,11 @@ class Rexle
|
|
482
730
|
def xml(options={})
|
483
731
|
o = {pretty: false, declaration: true}.merge(options)
|
484
732
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
733
|
+
|
485
734
|
r = ''
|
486
735
|
r = "<?xml version='1.0' encoding='UTF-8'?>\n" if o[:declaration] == true
|
487
736
|
r << method(msg).call(self.root.children)
|
737
|
+
|
488
738
|
r
|
489
739
|
end
|
490
740
|
|
@@ -501,26 +751,37 @@ class Rexle
|
|
501
751
|
if recordx_type then
|
502
752
|
procs = {
|
503
753
|
'dynarex' => proc {|x| DynarexParser.new(x).to_a},
|
504
|
-
'polyrex' => proc {|x| PolyrexParser.new(x).to_a}
|
754
|
+
'polyrex' => proc {|x| PolyrexParser.new(x).to_a},
|
755
|
+
'polyrex' => proc {|x| RexleParser.new(x).to_a}
|
505
756
|
}
|
506
757
|
procs[recordx_type].call(x)
|
507
758
|
else
|
759
|
+
|
508
760
|
RexleParser.new(x).to_a
|
509
761
|
end
|
510
762
|
else
|
763
|
+
|
511
764
|
RexleParser.new(x).to_a
|
512
765
|
end
|
513
766
|
|
514
767
|
end
|
768
|
+
|
769
|
+
def scan_element(name, value=nil, attributes=nil, *children)
|
515
770
|
|
516
|
-
|
517
|
-
|
518
|
-
|
771
|
+
element = Element.new(name, value, attributes, self)
|
772
|
+
|
773
|
+
if children then
|
774
|
+
children.each do |x|
|
775
|
+
if x.is_a? Array then
|
776
|
+
element.add_element scan_element(*x)
|
777
|
+
elsif x.is_a? String
|
778
|
+
element.add_element x
|
779
|
+
end
|
780
|
+
end
|
781
|
+
end
|
519
782
|
return element
|
520
783
|
end
|
521
784
|
|
522
|
-
def count(path) @doc.xpath(path).flatten.compact.length end
|
523
|
-
def max(path) @doc.xpath(path).map(&:to_i).max end
|
524
785
|
|
525
786
|
# scan a rexml doc
|
526
787
|
#
|
metadata
CHANGED
@@ -1,57 +1,61 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease:
|
5
|
+
version: 0.9.5
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
-
|
7
|
+
authors:
|
8
|
+
- James Robertson
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
12
|
|
12
|
-
date:
|
13
|
-
default_executable:
|
13
|
+
date: 2012-10-20 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rexleparser
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
23
|
version: "0"
|
24
|
-
|
24
|
+
type: :runtime
|
25
|
+
version_requirements: *id001
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: dynarex-parser
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
30
31
|
requirements:
|
31
32
|
- - ">="
|
32
33
|
- !ruby/object:Gem::Version
|
33
34
|
version: "0"
|
34
|
-
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id002
|
35
37
|
- !ruby/object:Gem::Dependency
|
36
38
|
name: polyrex-parser
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
40
42
|
requirements:
|
41
43
|
- - ">="
|
42
44
|
- !ruby/object:Gem::Version
|
43
45
|
version: "0"
|
44
|
-
version:
|
45
|
-
- !ruby/object:Gem::Dependency
|
46
|
-
name: nokogiri
|
47
46
|
type: :runtime
|
48
|
-
|
49
|
-
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rexle-builder
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
50
53
|
requirements:
|
51
54
|
- - ">="
|
52
55
|
- !ruby/object:Gem::Version
|
53
56
|
version: "0"
|
54
|
-
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id004
|
55
59
|
description:
|
56
60
|
email:
|
57
61
|
executables: []
|
@@ -62,7 +66,6 @@ extra_rdoc_files: []
|
|
62
66
|
|
63
67
|
files:
|
64
68
|
- lib/rexle.rb
|
65
|
-
has_rdoc: true
|
66
69
|
homepage:
|
67
70
|
licenses: []
|
68
71
|
|
@@ -72,23 +75,23 @@ rdoc_options: []
|
|
72
75
|
require_paths:
|
73
76
|
- lib
|
74
77
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
75
79
|
requirements:
|
76
80
|
- - ">="
|
77
81
|
- !ruby/object:Gem::Version
|
78
82
|
version: "0"
|
79
|
-
version:
|
80
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
81
85
|
requirements:
|
82
86
|
- - ">="
|
83
87
|
- !ruby/object:Gem::Version
|
84
88
|
version: "0"
|
85
|
-
version:
|
86
89
|
requirements: []
|
87
90
|
|
88
91
|
rubyforge_project:
|
89
|
-
rubygems_version: 1.
|
92
|
+
rubygems_version: 1.8.23
|
90
93
|
signing_key:
|
91
94
|
specification_version: 3
|
92
|
-
summary:
|
95
|
+
summary: Rexle is a simple XML parser written purely in Ruby
|
93
96
|
test_files: []
|
94
97
|
|