rexle 0.9.4 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rexle.rb +418 -157
- metadata +30 -27
data/lib/rexle.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/ruby
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
# file: rexle.rb
|
4
4
|
|
@@ -6,20 +6,48 @@ require 'rexml/document'
|
|
6
6
|
require 'rexleparser'
|
7
7
|
require 'dynarex-parser'
|
8
8
|
require 'polyrex-parser'
|
9
|
+
require 'cgi'
|
9
10
|
include REXML
|
10
11
|
|
12
|
+
# modifications:
|
13
|
+
# 20-Oct-2012: feature: added Rexle::Element#texts which is the equivalent
|
14
|
+
# of REXML::Element#texts
|
15
|
+
# 10-Sep-2012: bug fix: Removed code from method pretty_print in order to
|
16
|
+
# get the XML displayed properly
|
17
|
+
# 23-Aug-2012: feature: implemented xpath function contains()
|
18
|
+
# 17-Aug-2012: bug fix: pretty print now ignores text containing empty space
|
19
|
+
# 16-Aug-2012: the current element's text (if its not empty) is now returned
|
20
|
+
# from its children method
|
21
|
+
# 15-Aug-2012: feature: xpath containing child:: now supported
|
22
|
+
# 13-Aug-2012: bug fix: xpath can now handle the name() function
|
23
|
+
# 11-Aug-2012: bug fix: separated the max() method from 1 line into 3
|
24
|
+
# and that fixed it
|
25
|
+
# 08-Aug-2012: feature: added Element#insert_before and Element#insert_after
|
26
|
+
# 19-Jul-2012: Changed children to elements where appropriate
|
27
|
+
# 15-Jul-2012: bug fix: self.root.value is no longer appended
|
28
|
+
# to the body if there are no child elements
|
29
|
+
# 19-Jun-2012: a bug fix for .//*[@class]
|
30
|
+
# 17-Jun-2012: a couple of new xpath things are supported '.' and '|'
|
31
|
+
# 15-Apr-2012: bug fix: New element names are typecast as string
|
32
|
+
# 16-Mar-2012: bug fix: Element names which contain a colon can now be selected
|
33
|
+
# in the xpath.
|
34
|
+
# 22-Feb-2012: bug resolution: Deactivated the PolyrexParser; using RexleParser instead
|
35
|
+
# 14-Jan-2012: Implemented Rexle::Elements#each
|
36
|
+
# 21-Dec-2011: Bug fix: xpath modified to allow querying from the actual
|
37
|
+
# root rather than the 1st child element from the root
|
38
|
+
|
11
39
|
module XMLhelper
|
12
40
|
|
13
41
|
def doc_print(children)
|
14
|
-
|
15
|
-
body = children.
|
42
|
+
|
43
|
+
body = (children.nil? or children.empty? or children.is_an_empty_string? ) ? '' : scan_print(children).join
|
16
44
|
a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
17
45
|
"<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), body, self.root.name]
|
18
46
|
end
|
19
47
|
|
20
48
|
def doc_pretty_print(children)
|
21
49
|
|
22
|
-
body =
|
50
|
+
body = pretty_print(children,2).join
|
23
51
|
a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
24
52
|
ind = "\n "
|
25
53
|
"<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), ind, body, "\n", self.root.name]
|
@@ -28,16 +56,27 @@ module XMLhelper
|
|
28
56
|
def scan_print(nodes)
|
29
57
|
|
30
58
|
nodes.map do |x|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
59
|
+
|
60
|
+
if x.is_a? Rexle::Element then
|
61
|
+
if x.name.chr != '!' then
|
62
|
+
a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
63
|
+
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
64
|
+
|
65
|
+
if x.value.length > 0 or (x.children.length > 0 and not x.children.is_an_empty_string?) then
|
66
|
+
out = ["<%s>" % tag]
|
67
|
+
#out << x.value unless x.value.nil? || x.value.empty?
|
68
|
+
out << scan_print(x.children)
|
69
|
+
out << "</%s>" % x.name
|
70
|
+
else
|
71
|
+
out = ["<%s/>" % tag]
|
72
|
+
end
|
73
|
+
elsif x.name == '!-' then
|
74
|
+
"<!--%s-->" % x.value
|
75
|
+
else
|
76
|
+
"<![CDATA[%s]]>" % x.value
|
77
|
+
end
|
78
|
+
elsif x.is_a? String then
|
79
|
+
x
|
41
80
|
end
|
42
81
|
end
|
43
82
|
|
@@ -45,21 +84,30 @@ module XMLhelper
|
|
45
84
|
|
46
85
|
def pretty_print(nodes, indent='0')
|
47
86
|
indent = indent.to_i
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
87
|
+
|
88
|
+
nodes.select(){|x| x.is_a? Rexle::Element or x.strip.length > 0}
|
89
|
+
.map.with_index do |x, i|
|
90
|
+
|
91
|
+
if x.is_a? Rexle::Element then
|
92
|
+
unless x.name == '![' then
|
93
|
+
#return ["<%s/>" % x.name] if x.value = ''
|
94
|
+
a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
95
|
+
a ||= []
|
96
|
+
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
97
|
+
|
98
|
+
start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
99
|
+
ind1 = x.children.grep(Rexle::Element).length > 0 ?
|
100
|
+
("\n" + ' ' * indent) : ''
|
101
|
+
out = ["%s<%s>%s" % [start, tag, ind1]]
|
102
|
+
|
103
|
+
out << pretty_print(x.children, (indent + 1).to_s.clone)
|
104
|
+
ind2 = ind1.length > 0 ? ("\n" + ' ' * (indent - 1)) : ''
|
105
|
+
out << "%s</%s>" % [ind2, x.name]
|
106
|
+
else
|
107
|
+
"<![CDATA[%s]]>" % x.value
|
108
|
+
end
|
109
|
+
elsif x.is_a? String then
|
110
|
+
x.sub(/^[\n\s]+$/,'')
|
63
111
|
end
|
64
112
|
end
|
65
113
|
|
@@ -70,108 +118,224 @@ end
|
|
70
118
|
class Rexle
|
71
119
|
include XMLhelper
|
72
120
|
|
121
|
+
attr_reader :prefixes
|
122
|
+
|
123
|
+
def self.version()
|
124
|
+
'0.9.xx'
|
125
|
+
end
|
126
|
+
|
73
127
|
def initialize(x=nil)
|
74
128
|
super()
|
75
129
|
|
130
|
+
# what type of input is it? Is it a string, array, or REXML doc?
|
76
131
|
if x then
|
77
132
|
procs = {
|
78
|
-
|
79
|
-
|
80
|
-
|
133
|
+
String: proc {|x| parse_string(x)},
|
134
|
+
Array: proc {|x| x},
|
135
|
+
:"REXML::Document" => proc {|x| scan_doc x.root}
|
81
136
|
}
|
137
|
+
|
138
|
+
doc_node = ['doc','',{}]
|
139
|
+
|
82
140
|
|
83
|
-
a = procs[x.class.to_s.to_sym].call(x)
|
84
|
-
@doc = scan_element(*a)
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
def xpath(path, &blk)
|
90
|
-
|
91
|
-
# is it a function
|
92
|
-
fn_match = path.match(/^(\w+)\(([^\)]+)\)$/)
|
141
|
+
@a = procs[x.class.to_s.to_sym].call(x)
|
142
|
+
@doc = scan_element(*(doc_node << @a))
|
143
|
+
|
144
|
+
# fetch the namespaces
|
145
|
+
@prefixes = []
|
146
|
+
if @doc.root.attributes then
|
93
147
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
Array: proc {|x| block_given? ? x : x.flatten },
|
98
|
-
String: proc {|x| x},
|
99
|
-
:"Rexle::Element" => proc {|x| [x]}
|
100
|
-
}
|
101
|
-
bucket = []
|
102
|
-
result = @doc.xpath(path, bucket, &blk)
|
148
|
+
xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
|
149
|
+
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
150
|
+
end
|
103
151
|
|
104
|
-
procs[result.class.to_s.to_sym].call(result)
|
105
|
-
|
106
|
-
else
|
107
|
-
m, xpath_value = fn_match.captures
|
108
|
-
method(m.to_sym).call(xpath_value)
|
109
152
|
end
|
110
153
|
|
111
154
|
end
|
155
|
+
|
156
|
+
def xpath(path, &blk)
|
157
|
+
@doc.xpath(path, &blk)
|
158
|
+
end
|
112
159
|
|
113
160
|
class Element
|
114
161
|
include XMLhelper
|
115
162
|
|
116
163
|
attr_accessor :name, :value, :parent
|
117
|
-
attr_reader :child_lookup
|
164
|
+
attr_reader :child_lookup, :child_elements
|
165
|
+
|
166
|
+
alias original_clone clone
|
118
167
|
|
119
|
-
def initialize(name=nil, value='', attributes={})
|
168
|
+
def initialize(name=nil, value='', attributes={}, rexle=nil)
|
169
|
+
@rexle = rexle
|
120
170
|
super()
|
121
|
-
@name, @value, @attributes = name, value, attributes
|
171
|
+
@name, @value, @attributes = name.to_s, value, attributes
|
122
172
|
raise "Element name must not be blank" unless name
|
123
173
|
@child_elements = []
|
124
174
|
@child_lookup = []
|
125
175
|
end
|
126
176
|
|
127
|
-
def
|
177
|
+
def contains(raw_args)
|
178
|
+
path, raw_val = raw_args.split(',',2)
|
179
|
+
val = raw_val.strip[/^["']?.*["']?$/]
|
180
|
+
|
181
|
+
anode = query_xpath(path)
|
182
|
+
return unless anode
|
183
|
+
a = scan_contents(anode.first)
|
184
|
+
|
185
|
+
[a.grep(/#{val}/).length > 0]
|
186
|
+
end
|
187
|
+
|
188
|
+
def count(path)
|
189
|
+
length = query_xpath(path).flatten.compact.length
|
190
|
+
length
|
191
|
+
end
|
192
|
+
|
193
|
+
def max(path)
|
194
|
+
a = query_xpath(path).flatten.compact.map(&:to_i)
|
195
|
+
a.max
|
196
|
+
end
|
197
|
+
|
198
|
+
def name()
|
199
|
+
if @rexle then
|
200
|
+
prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] } if @rexle.prefixes.is_a? Array
|
201
|
+
prefix ? @name.sub(prefix + ':', '') : @name
|
202
|
+
else
|
203
|
+
@name
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def xpath(path, rlist=[], &blk)
|
208
|
+
r = filter_xpath(path, rlist=[], &blk)
|
209
|
+
r.is_a?(Array) ? r.compact : r
|
210
|
+
end
|
211
|
+
|
212
|
+
def filter_xpath(path, rlist=[], &blk)
|
128
213
|
|
129
|
-
|
130
|
-
|
131
|
-
|
214
|
+
# is it a function
|
215
|
+
fn_match = path.match(/^(\w+)\(["']?([^\)]*)["']?\)$/)
|
216
|
+
|
217
|
+
# Array: proc {|x| x.flatten.compact},
|
218
|
+
if (fn_match and fn_match.captures.first[/^(attribute|@)/]) or fn_match.nil? then
|
219
|
+
procs = {
|
220
|
+
Array: proc {|x| block_given? ? x : x.flatten.uniq },
|
221
|
+
String: proc {|x| x},
|
222
|
+
Hash: proc {|x| x},
|
223
|
+
TrueClass: proc{|x| x},
|
224
|
+
FalseClass: proc{|x| x},
|
225
|
+
:"Rexle::Element" => proc {|x| [x]}
|
226
|
+
}
|
227
|
+
bucket = []
|
228
|
+
raw_results = path.split('|').map do |xp|
|
229
|
+
query_xpath(xp, bucket, &blk)
|
230
|
+
end
|
231
|
+
|
232
|
+
#results = raw_results.inject(&:+)
|
233
|
+
results = raw_results.last
|
234
|
+
procs[results.class.to_s.to_sym].call(results) if results
|
235
|
+
|
236
|
+
else
|
237
|
+
m, xpath_value = fn_match.captures
|
238
|
+
xpath_value.empty? ? method(m.to_sym).call : method(m.to_sym).call(xpath_value)
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
244
|
+
|
245
|
+
#remove any pre'fixes
|
246
|
+
#@rexle.prefixes.each {|x| xpath_value.sub!(x + ':','') }
|
247
|
+
flag_func = false
|
248
|
+
|
249
|
+
xpath_value = raw_xpath_value.sub('child::','./')
|
250
|
+
#xpath_value.sub!(/\.\/(?=[\/])/,'')
|
251
|
+
|
252
|
+
if xpath_value[/^[\w\/]+\s*=.*/] then
|
253
|
+
flag_func = true
|
254
|
+
|
255
|
+
xpath_value.sub!(/^\w+\s*=.*/,'.[\0]')
|
256
|
+
xpath_value.sub!(/\/([\w]+\s*=.*)/,'[\1]')
|
132
257
|
|
258
|
+
#result = self.element xpath_value
|
259
|
+
#return [(result.is_a?(Rexle::Element) ? true : false)]
|
260
|
+
end
|
261
|
+
|
262
|
+
#xpath_value.sub!(/^attribute::/,'*/attribute::')
|
263
|
+
raw_path, raw_condition = xpath_value.sub(/^\.?\/(?!\/)/,'')\
|
264
|
+
.match(/([^\[]+)(\[[^\]]+\])?/).captures
|
265
|
+
|
266
|
+
remaining_path = ($').to_s
|
267
|
+
|
268
|
+
r = raw_path[/([^\/]+)(?=\/\/)/,1]
|
269
|
+
if r then
|
270
|
+
a_path = raw_path.split(/(?=\/\/)/,2)
|
271
|
+
else
|
272
|
+
a_path = raw_path.split('/',2)
|
273
|
+
end
|
274
|
+
|
133
275
|
condition = raw_condition if a_path.length <= 1
|
134
276
|
|
135
277
|
if raw_path[0,2] == '//' then
|
136
|
-
s =
|
137
|
-
|
138
|
-
elsif raw_path == 'text()' then
|
278
|
+
s = ''
|
279
|
+
elsif raw_path == 'text()'
|
139
280
|
a_path.shift
|
140
281
|
return @value
|
141
282
|
else
|
142
|
-
|
143
|
-
|
144
|
-
|
283
|
+
|
284
|
+
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
285
|
+
|
286
|
+
return @attributes if attribute == '*'
|
287
|
+
return [@attributes[attribute.to_sym]] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
|
145
288
|
s = a_path.shift
|
146
289
|
end
|
147
290
|
|
148
291
|
# isolate the xpath to return just the path to the current element
|
149
|
-
elmnt_path = s[/^([\w\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
150
|
-
element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
|
151
292
|
|
293
|
+
elmnt_path = s[/^([\w:\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
294
|
+
element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
|
295
|
+
|
152
296
|
if element_part then
|
153
297
|
unless element_part[/^@/] then
|
154
|
-
element_name = element_part
|
298
|
+
element_name = element_part[/^[\w:\*\.]+/]
|
155
299
|
else
|
156
|
-
|
300
|
+
|
301
|
+
condition = xpath_value[/^\[/] ? xpath_value : element_part
|
157
302
|
element_name = nil
|
158
303
|
end
|
159
304
|
|
160
305
|
end
|
161
306
|
|
307
|
+
#element_name ||= '*'
|
162
308
|
raw_condition = '' if condition
|
309
|
+
attr_search = format_condition(condition) if condition and condition.length > 0
|
310
|
+
|
311
|
+
attr_search2 = xpath_value[/^\[(.*)\]$/,1]
|
312
|
+
if attr_search2 then
|
313
|
+
r4 = attribute_search(attr_search, self, self.attributes)
|
314
|
+
return r4
|
315
|
+
end
|
316
|
+
|
317
|
+
return_elements = []
|
163
318
|
|
164
|
-
|
319
|
+
if raw_path[0,2] == '//' then
|
165
320
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
321
|
+
regex = /\[(\d+)\]/
|
322
|
+
n = xpath_value[regex,1]
|
323
|
+
xpath_value.slice!(regex)
|
324
|
+
|
325
|
+
rs = scan_match(self, xpath_value).flatten.compact
|
326
|
+
return n ? rs[n.to_i-1] : rs
|
327
|
+
|
328
|
+
elsif (raw_path == '.' or raw_path == self.name) and attr_search.nil? then
|
329
|
+
return [self]
|
330
|
+
else
|
331
|
+
|
332
|
+
return_elements = @child_lookup.map.with_index.select do |x|
|
333
|
+
(x[0][0] == element_name || element_name == '.') or \
|
334
|
+
(element_name == '*' && x[0].is_a?(Array))
|
335
|
+
end
|
170
336
|
|
171
|
-
return_elements = @child_lookup.map.with_index.select do |x|
|
172
|
-
x[0][0] == element_name or element_name == '*'
|
173
337
|
end
|
174
|
-
|
338
|
+
|
175
339
|
if return_elements.length > 0 then
|
176
340
|
|
177
341
|
if (a_path + [remaining_path]).join.empty? then
|
@@ -180,10 +344,13 @@ class Rexle
|
|
180
344
|
else
|
181
345
|
|
182
346
|
rlist << return_elements.map.with_index do |x,i|
|
347
|
+
|
183
348
|
rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(a_path.join('/') + raw_condition.to_s + remaining_path, &blk); (r || e) }
|
184
349
|
next if rtn_element.nil? or (rtn_element.is_a? Array and rtn_element.empty?)
|
185
350
|
|
186
|
-
if rtn_element.is_a?
|
351
|
+
if rtn_element.is_a? Hash then
|
352
|
+
rtn_element
|
353
|
+
elsif rtn_element.is_a? Array then
|
187
354
|
rtn_element
|
188
355
|
elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) and not(rtn_element[0].is_a? String))
|
189
356
|
rtn_element
|
@@ -192,15 +359,17 @@ class Rexle
|
|
192
359
|
end
|
193
360
|
end
|
194
361
|
#
|
362
|
+
|
195
363
|
rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
|
196
364
|
|
197
365
|
end
|
366
|
+
|
198
367
|
rlist.compact! if rlist.is_a? Array
|
199
368
|
|
200
369
|
else
|
201
370
|
|
202
371
|
# strip off the 1st element from the XPath
|
203
|
-
new_xpath = xpath_value[
|
372
|
+
new_xpath = xpath_value[/^\/\/[\w:]+\/(.*)/,1]
|
204
373
|
|
205
374
|
if new_xpath then
|
206
375
|
self.xpath(new_xpath + raw_condition.to_s + remaining_path, rlist,&blk)
|
@@ -209,19 +378,31 @@ class Rexle
|
|
209
378
|
|
210
379
|
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) or (rlist.length > 1 and rlist[0].is_a? Array)
|
211
380
|
rlist = [rlist] if rlist.is_a? Rexle::Element
|
381
|
+
rlist = (rlist.length > 0 ? true : false) if flag_func == true
|
212
382
|
rlist
|
213
383
|
end
|
214
384
|
|
215
385
|
def add_element(item)
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
386
|
+
if item.is_a? Rexle::Element then
|
387
|
+
@child_lookup << [item.name, item.attributes, item.value]
|
388
|
+
@child_elements << item
|
389
|
+
# add a reference from this element (the parent) to the child
|
390
|
+
item.parent = self
|
391
|
+
item
|
392
|
+
elsif item.is_a? String then
|
393
|
+
@child_lookup << item
|
394
|
+
@child_elements << item
|
395
|
+
elsif item.is_a? Rexle then
|
396
|
+
self.add_element(item.root)
|
397
|
+
end
|
221
398
|
end
|
222
399
|
|
223
400
|
def inspect()
|
401
|
+
if self.xml.length > 30 then
|
224
402
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
403
|
+
else
|
404
|
+
self.xml
|
405
|
+
end
|
225
406
|
end
|
226
407
|
|
227
408
|
alias add add_element
|
@@ -240,11 +421,29 @@ class Rexle
|
|
240
421
|
end
|
241
422
|
|
242
423
|
def add_text(s) @value = s; self end
|
243
|
-
|
424
|
+
|
425
|
+
def attribute(key)
|
426
|
+
key = key.to_sym if key.is_a? String
|
427
|
+
@attributes[key].gsub('<','<').gsub('>','>')
|
428
|
+
end
|
429
|
+
|
244
430
|
def attributes() @attributes end
|
245
|
-
|
246
|
-
def children
|
247
|
-
|
431
|
+
|
432
|
+
def children()
|
433
|
+
return unless @value
|
434
|
+
r = (@value.empty? ? [] : [@value]) + @child_elements
|
435
|
+
def r.is_an_empty_string?()
|
436
|
+
self.length == 1 and self.first == ''
|
437
|
+
end
|
438
|
+
|
439
|
+
return r
|
440
|
+
end
|
441
|
+
|
442
|
+
def children=(a) @child_elements = a end
|
443
|
+
|
444
|
+
def deep_clone() Rexle.new(self.xml).root end
|
445
|
+
def clone() Element.new(@name, @value, @attributes) end
|
446
|
+
|
248
447
|
def delete(obj=nil)
|
249
448
|
if obj then
|
250
449
|
i = @child_elements.index(obj)
|
@@ -254,17 +453,30 @@ class Rexle
|
|
254
453
|
end
|
255
454
|
end
|
256
455
|
|
257
|
-
def element(s)
|
456
|
+
def element(s)
|
457
|
+
r = self.xpath(s)
|
458
|
+
r.is_a?(Array) ? r.first : r
|
459
|
+
end
|
258
460
|
|
259
461
|
def elements(s=nil)
|
260
462
|
procs = {
|
261
|
-
NilClass: proc {Elements.new(@child_elements)},
|
463
|
+
NilClass: proc {Elements.new(@child_elements.select{|x| x.is_a? Rexle::Element })},
|
262
464
|
String: proc {|x| @child_elements[x]}
|
263
465
|
}
|
466
|
+
|
264
467
|
procs[s.class.to_s.to_sym].call(s)
|
265
468
|
end
|
266
469
|
|
267
|
-
def
|
470
|
+
def doc_root() @rexle.root end
|
471
|
+
def each(&blk)
|
472
|
+
@child_elements.each(&blk) #unless @child_elements.empty?
|
473
|
+
end
|
474
|
+
def has_elements?() !self.elements.empty? end
|
475
|
+
|
476
|
+
def insert_after(node) insert(node, 1) end
|
477
|
+
def insert_before(node) insert(node) end
|
478
|
+
|
479
|
+
def root() self end #@rexle.root end
|
268
480
|
|
269
481
|
def text(s='')
|
270
482
|
|
@@ -274,6 +486,7 @@ class Rexle
|
|
274
486
|
e = self.element(s)
|
275
487
|
result = e.value if e
|
276
488
|
end
|
489
|
+
result = CGI.unescape_html result.to_s
|
277
490
|
|
278
491
|
def result.unescape()
|
279
492
|
s = self.clone
|
@@ -283,10 +496,14 @@ class Rexle
|
|
283
496
|
|
284
497
|
result
|
285
498
|
end
|
499
|
+
|
500
|
+
def texts()
|
501
|
+
[@value] + @child_elements.select {|x| x.is_a? String}
|
502
|
+
end
|
286
503
|
|
287
504
|
def value=(raw_s)
|
288
505
|
|
289
|
-
@value = raw_s.to_s.clone
|
506
|
+
@value = String.new(raw_s.to_s.clone)
|
290
507
|
escape_chars = %w(& & < < > >).each_slice(2).to_a
|
291
508
|
escape_chars.each{|x| @value.gsub!(*x)}
|
292
509
|
|
@@ -306,17 +523,27 @@ class Rexle
|
|
306
523
|
method(msg).call(self.children)
|
307
524
|
end
|
308
525
|
|
526
|
+
alias to_s xml
|
309
527
|
|
310
528
|
private
|
529
|
+
|
530
|
+
def insert(node,offset=0)
|
531
|
+
i = parent.child_elements.index(self)
|
532
|
+
return unless i
|
533
|
+
parent.child_elements.insert(i+offset,node)
|
534
|
+
parent.child_lookup.insert(i+offset, [node.name, node.attributes, node.value])
|
535
|
+
self
|
536
|
+
end
|
311
537
|
|
312
538
|
def format_condition(condition)
|
313
|
-
|
314
|
-
raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=<>]+|position\(\)|[@\w
|
539
|
+
|
540
|
+
raw_items = condition[1..-1].scan(/\'[^\']*\'|\"[^\"]*\"|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/&;]+/)
|
315
541
|
|
316
542
|
if raw_items[0][/^\d+$/] then
|
317
543
|
return raw_items[0].to_i
|
318
544
|
elsif raw_items[0] == 'position()' then
|
319
|
-
|
545
|
+
rrr = "i %s %s" % [raw_items[1].gsub('<','<').gsub('>','>'), raw_items[-1]]
|
546
|
+
return rrr
|
320
547
|
else
|
321
548
|
|
322
549
|
andor_items = raw_items.map.with_index.select{|x,i| x[/\band\b|\bor\b/]}.map{|x| [x.last, x.last + 1]}.flatten
|
@@ -331,9 +558,10 @@ class Rexle
|
|
331
558
|
|
332
559
|
if x.length >= 3 then
|
333
560
|
x[1] = '==' if x[1] == '='
|
334
|
-
"h[
|
561
|
+
"h[:'%s'] %s %s" % x
|
335
562
|
else
|
336
|
-
|
563
|
+
|
564
|
+
x.join[/^(and|or)$/] ? x : ("h[:'%s']" % x)
|
337
565
|
end
|
338
566
|
end
|
339
567
|
|
@@ -344,6 +572,7 @@ class Rexle
|
|
344
572
|
items = cons_items.map do |x|
|
345
573
|
|
346
574
|
if x.length >= 3 then
|
575
|
+
|
347
576
|
x[1] = '==' if x[1] == '='
|
348
577
|
if x[0] != '.' then
|
349
578
|
if x[0][/\//] then
|
@@ -351,7 +580,7 @@ class Rexle
|
|
351
580
|
|
352
581
|
"e.xpath('#{path}').first.value == #{value}"
|
353
582
|
else
|
354
|
-
"(name == '%s' and value %s %s)" % [x[0], x[1], x[2]]
|
583
|
+
"(name == '%s' and value %s '%s')" % [x[0], x[1], x[2].sub(/^['"](.*)['"]$/,'\1')]
|
355
584
|
end
|
356
585
|
else
|
357
586
|
"e.value %s %s" % [x[1], x[2]]
|
@@ -360,7 +589,7 @@ class Rexle
|
|
360
589
|
x
|
361
590
|
end
|
362
591
|
end
|
363
|
-
|
592
|
+
|
364
593
|
return items.join(' ')
|
365
594
|
end
|
366
595
|
end
|
@@ -368,59 +597,47 @@ class Rexle
|
|
368
597
|
|
369
598
|
end
|
370
599
|
|
371
|
-
|
600
|
+
|
601
|
+
def scan_match(node, path)
|
372
602
|
|
373
|
-
|
603
|
+
r = []
|
604
|
+
xpath2 = path[2..-1]
|
605
|
+
xpath2.sub!(/^\*\//,'')
|
606
|
+
xpath2.sub!(/^\*/,self.name)
|
607
|
+
xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
|
608
|
+
|
374
609
|
|
375
|
-
|
610
|
+
r << node.xpath(xpath2)
|
611
|
+
r << node.elements.map {|n| scan_match(n, path) if n.is_a? Rexle::Element}
|
612
|
+
r
|
613
|
+
end
|
376
614
|
|
615
|
+
# used by xpath function contains()
|
616
|
+
#
|
617
|
+
def scan_contents(node)
|
377
618
|
|
378
|
-
|
379
|
-
|
380
|
-
if attr_search then
|
381
|
-
rlist << x if h and eval(attr_search)
|
382
|
-
else
|
383
|
-
rlist << x
|
384
|
-
end
|
385
|
-
end
|
386
|
-
else
|
619
|
+
a = []
|
620
|
+
a << node.text
|
387
621
|
|
388
|
-
|
389
|
-
|
390
|
-
if h and h.has_key? attribute.to_sym then
|
391
|
-
rlist << h[attribute.to_sym]
|
392
|
-
end
|
393
|
-
else
|
394
|
-
rlist << x if h and eval(attr_search)
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
x.xpath('//' + element.to_s + condition.to_s, rlist) unless x.children.empty?
|
622
|
+
node.elements.each do |child|
|
623
|
+
a.concat scan_contents(child)
|
399
624
|
end
|
400
|
-
|
625
|
+
a
|
401
626
|
end
|
402
|
-
|
627
|
+
|
628
|
+
|
403
629
|
def filter(raw_element, i, attr_search, &blk)
|
404
630
|
|
405
631
|
x = raw_element
|
406
632
|
e = @child_elements[x.last]
|
407
|
-
h = x[0][1] # <-- fetch the attributes
|
408
633
|
|
634
|
+
return unless e.is_a? Rexle::Element
|
635
|
+
name, value = e.name, e.value if e.is_a? Rexle::Element
|
636
|
+
|
637
|
+
h = x[0][1] # <-- fetch the attributes
|
638
|
+
|
409
639
|
if attr_search then
|
410
|
-
|
411
|
-
block_given? ? blk.call(e) : e if i == attr_search
|
412
|
-
elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
|
413
|
-
block_given? ? blk.call(e) : e
|
414
|
-
elsif h and attr_search[/^h\[/] and eval(attr_search)
|
415
|
-
block_given? ? blk.call(e) : e
|
416
|
-
elsif attr_search[/^\(name ==/] and \
|
417
|
-
e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
|
418
|
-
block_given? ? blk.call(e) : e
|
419
|
-
elsif attr_search[/^e\.value/] and eval(attr_search)
|
420
|
-
block_given? ? blk.call(e) : e
|
421
|
-
elsif attr_search[/^e\.xpath/] and eval(attr_search)
|
422
|
-
block_given? ? blk.call(e) : e
|
423
|
-
end
|
640
|
+
attribute_search(attr_search,e, h, i, &blk)
|
424
641
|
else
|
425
642
|
|
426
643
|
block_given? ? blk.call(e) : e
|
@@ -428,9 +645,28 @@ class Rexle
|
|
428
645
|
|
429
646
|
end
|
430
647
|
|
648
|
+
def attribute_search(attr_search, e, h, i=nil, &blk)
|
649
|
+
if attr_search.is_a? Fixnum then
|
650
|
+
block_given? ? blk.call(e) : e if i == attr_search
|
651
|
+
elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
|
652
|
+
block_given? ? blk.call(e) : e
|
653
|
+
elsif h and attr_search[/^h\[/] and eval(attr_search)
|
654
|
+
block_given? ? blk.call(e) : e
|
655
|
+
elsif attr_search[/^\(name ==/] and e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
|
656
|
+
block_given? ? blk.call(e) : e
|
657
|
+
elsif attr_search[/^\(name ==/] and eval(attr_search)
|
658
|
+
block_given? ? blk.call(e) : e
|
659
|
+
elsif attr_search[/^e\.value/] and eval(attr_search)
|
660
|
+
block_given? ? blk.call(e) : e
|
661
|
+
elsif attr_search[/^e\.xpath/] and eval(attr_search)
|
662
|
+
block_given? ? blk.call(e) : e
|
663
|
+
end
|
664
|
+
end
|
431
665
|
end # -- end of element --
|
432
666
|
|
433
667
|
class Elements
|
668
|
+
include Enumerable
|
669
|
+
|
434
670
|
def initialize(elements=[])
|
435
671
|
super()
|
436
672
|
@elements = elements
|
@@ -439,6 +675,10 @@ class Rexle
|
|
439
675
|
def [](i)
|
440
676
|
@elements[i-1]
|
441
677
|
end
|
678
|
+
|
679
|
+
def each(&blk) @elements.each(&blk) end
|
680
|
+
def to_a() @elements end
|
681
|
+
|
442
682
|
end # -- end of elements --
|
443
683
|
|
444
684
|
|
@@ -448,16 +688,17 @@ class Rexle
|
|
448
688
|
|
449
689
|
if x then
|
450
690
|
procs = {
|
451
|
-
|
452
|
-
|
453
|
-
|
691
|
+
String: proc {|x| parse_string(x)},
|
692
|
+
Array: proc {|x| x},
|
693
|
+
:"REXML::Document" => proc {|x| scan_doc x.root}
|
454
694
|
}
|
455
695
|
a = procs[x.class.to_s.to_sym].call(x)
|
456
696
|
else
|
457
697
|
a = yield
|
458
698
|
end
|
459
|
-
|
460
|
-
@
|
699
|
+
doc_node = ['doc','',{}]
|
700
|
+
@a = procs[x.class.to_s.to_sym].call(x)
|
701
|
+
@doc = scan_element(*(doc_node << @a))
|
461
702
|
self
|
462
703
|
end
|
463
704
|
|
@@ -465,15 +706,22 @@ class Rexle
|
|
465
706
|
def attribute(key) @doc.attribute(key) end
|
466
707
|
def attributes() @doc.attributes end
|
467
708
|
def add_element(element) @doc.root.add_element(element) end
|
709
|
+
def add_text(s) end
|
468
710
|
|
469
711
|
alias add add_element
|
470
712
|
|
471
|
-
def delete(xpath)
|
472
|
-
|
713
|
+
def delete(xpath)
|
714
|
+
e = @doc.element(xpath)
|
715
|
+
e.delete if e
|
716
|
+
end
|
717
|
+
|
718
|
+
def element(xpath) self.xpath(xpath).first end
|
473
719
|
def elements(s=nil) @doc.elements(s) end
|
720
|
+
def name() @doc.root.name end
|
721
|
+
def to_a() @a end
|
474
722
|
def to_s(options={}) self.xml options end
|
475
723
|
def text(xpath) @doc.text(xpath) end
|
476
|
-
def root() @doc end
|
724
|
+
def root() @doc.elements.first end
|
477
725
|
|
478
726
|
def write(f)
|
479
727
|
f.write xml
|
@@ -482,9 +730,11 @@ class Rexle
|
|
482
730
|
def xml(options={})
|
483
731
|
o = {pretty: false, declaration: true}.merge(options)
|
484
732
|
msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
|
733
|
+
|
485
734
|
r = ''
|
486
735
|
r = "<?xml version='1.0' encoding='UTF-8'?>\n" if o[:declaration] == true
|
487
736
|
r << method(msg).call(self.root.children)
|
737
|
+
|
488
738
|
r
|
489
739
|
end
|
490
740
|
|
@@ -501,26 +751,37 @@ class Rexle
|
|
501
751
|
if recordx_type then
|
502
752
|
procs = {
|
503
753
|
'dynarex' => proc {|x| DynarexParser.new(x).to_a},
|
504
|
-
'polyrex' => proc {|x| PolyrexParser.new(x).to_a}
|
754
|
+
'polyrex' => proc {|x| PolyrexParser.new(x).to_a},
|
755
|
+
'polyrex' => proc {|x| RexleParser.new(x).to_a}
|
505
756
|
}
|
506
757
|
procs[recordx_type].call(x)
|
507
758
|
else
|
759
|
+
|
508
760
|
RexleParser.new(x).to_a
|
509
761
|
end
|
510
762
|
else
|
763
|
+
|
511
764
|
RexleParser.new(x).to_a
|
512
765
|
end
|
513
766
|
|
514
767
|
end
|
768
|
+
|
769
|
+
def scan_element(name, value=nil, attributes=nil, *children)
|
515
770
|
|
516
|
-
|
517
|
-
|
518
|
-
|
771
|
+
element = Element.new(name, value, attributes, self)
|
772
|
+
|
773
|
+
if children then
|
774
|
+
children.each do |x|
|
775
|
+
if x.is_a? Array then
|
776
|
+
element.add_element scan_element(*x)
|
777
|
+
elsif x.is_a? String
|
778
|
+
element.add_element x
|
779
|
+
end
|
780
|
+
end
|
781
|
+
end
|
519
782
|
return element
|
520
783
|
end
|
521
784
|
|
522
|
-
def count(path) @doc.xpath(path).flatten.compact.length end
|
523
|
-
def max(path) @doc.xpath(path).map(&:to_i).max end
|
524
785
|
|
525
786
|
# scan a rexml doc
|
526
787
|
#
|
metadata
CHANGED
@@ -1,57 +1,61 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease:
|
5
|
+
version: 0.9.5
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
-
|
7
|
+
authors:
|
8
|
+
- James Robertson
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
12
|
|
12
|
-
date:
|
13
|
-
default_executable:
|
13
|
+
date: 2012-10-20 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rexleparser
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
23
|
version: "0"
|
24
|
-
|
24
|
+
type: :runtime
|
25
|
+
version_requirements: *id001
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: dynarex-parser
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
30
31
|
requirements:
|
31
32
|
- - ">="
|
32
33
|
- !ruby/object:Gem::Version
|
33
34
|
version: "0"
|
34
|
-
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id002
|
35
37
|
- !ruby/object:Gem::Dependency
|
36
38
|
name: polyrex-parser
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
40
42
|
requirements:
|
41
43
|
- - ">="
|
42
44
|
- !ruby/object:Gem::Version
|
43
45
|
version: "0"
|
44
|
-
version:
|
45
|
-
- !ruby/object:Gem::Dependency
|
46
|
-
name: nokogiri
|
47
46
|
type: :runtime
|
48
|
-
|
49
|
-
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rexle-builder
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
50
53
|
requirements:
|
51
54
|
- - ">="
|
52
55
|
- !ruby/object:Gem::Version
|
53
56
|
version: "0"
|
54
|
-
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id004
|
55
59
|
description:
|
56
60
|
email:
|
57
61
|
executables: []
|
@@ -62,7 +66,6 @@ extra_rdoc_files: []
|
|
62
66
|
|
63
67
|
files:
|
64
68
|
- lib/rexle.rb
|
65
|
-
has_rdoc: true
|
66
69
|
homepage:
|
67
70
|
licenses: []
|
68
71
|
|
@@ -72,23 +75,23 @@ rdoc_options: []
|
|
72
75
|
require_paths:
|
73
76
|
- lib
|
74
77
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
75
79
|
requirements:
|
76
80
|
- - ">="
|
77
81
|
- !ruby/object:Gem::Version
|
78
82
|
version: "0"
|
79
|
-
version:
|
80
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
81
85
|
requirements:
|
82
86
|
- - ">="
|
83
87
|
- !ruby/object:Gem::Version
|
84
88
|
version: "0"
|
85
|
-
version:
|
86
89
|
requirements: []
|
87
90
|
|
88
91
|
rubyforge_project:
|
89
|
-
rubygems_version: 1.
|
92
|
+
rubygems_version: 1.8.23
|
90
93
|
signing_key:
|
91
94
|
specification_version: 3
|
92
|
-
summary:
|
95
|
+
summary: Rexle is a simple XML parser written purely in Ruby
|
93
96
|
test_files: []
|
94
97
|
|