rexle 0.9.27 → 0.9.28
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rexle.rb +36 -28
- metadata +2 -2
data/lib/rexle.rb
CHANGED
@@ -10,6 +10,7 @@ require 'cgi'
|
|
10
10
|
include REXML
|
11
11
|
|
12
12
|
# modifications:
|
13
|
+
# 17-Jun-2012: a couple of new xpath things are supported '.' and '|'
|
13
14
|
# 15-Apr-2012: bug fix: New element names are typecast as string
|
14
15
|
# 16-Mar-2012: bug fix: Element names which contain a colon can now be selected
|
15
16
|
# in the xpath.
|
@@ -78,6 +79,7 @@ module XMLhelper
|
|
78
79
|
if x.is_a? Rexle::Element then
|
79
80
|
unless x.name == '![' then
|
80
81
|
a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
|
82
|
+
a ||= []
|
81
83
|
tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
|
82
84
|
|
83
85
|
ind1 = x.children.length > 0 ? ("\n" + ' ' * indent) : ''
|
@@ -105,7 +107,6 @@ class Rexle
|
|
105
107
|
|
106
108
|
attr_reader :prefixes
|
107
109
|
|
108
|
-
def self.version() "1.1.1" end
|
109
110
|
def initialize(x=nil)
|
110
111
|
super()
|
111
112
|
|
@@ -118,10 +119,11 @@ class Rexle
|
|
118
119
|
}
|
119
120
|
|
120
121
|
doc_node = ['doc','',{}]
|
122
|
+
|
121
123
|
|
122
124
|
@a = procs[x.class.to_s.to_sym].call(x)
|
123
125
|
@doc = scan_element(*(doc_node << @a))
|
124
|
-
|
126
|
+
|
125
127
|
# fetch the namespaces
|
126
128
|
@prefixes = []
|
127
129
|
if @doc.root.attributes then
|
@@ -130,9 +132,8 @@ class Rexle
|
|
130
132
|
@prefixes = xmlns.keys.map{|x| x[/\w+$/]}
|
131
133
|
end
|
132
134
|
|
133
|
-
|
134
135
|
end
|
135
|
-
|
136
|
+
|
136
137
|
end
|
137
138
|
|
138
139
|
def xpath(path, &blk)
|
@@ -185,7 +186,7 @@ class Rexle
|
|
185
186
|
# Array: proc {|x| x.flatten.compact},
|
186
187
|
if (fn_match and fn_match.captures.first[/^(attribute|@)/]) or fn_match.nil? then
|
187
188
|
procs = {
|
188
|
-
Array: proc {|x| block_given? ? x : x.flatten },
|
189
|
+
Array: proc {|x| block_given? ? x : x.flatten.uniq },
|
189
190
|
String: proc {|x| x},
|
190
191
|
TrueClass: proc{|x| x},
|
191
192
|
FalseClass: proc{|x| x},
|
@@ -193,9 +194,12 @@ class Rexle
|
|
193
194
|
}
|
194
195
|
bucket = []
|
195
196
|
raw_results = path.split('|').map do |xp|
|
196
|
-
query_xpath(xp, bucket, &blk)
|
197
|
-
|
198
|
-
|
197
|
+
r3 = query_xpath(xp, bucket, &blk)
|
198
|
+
r3
|
199
|
+
end
|
200
|
+
|
201
|
+
#results = raw_results.inject(&:+)
|
202
|
+
results = raw_results.last
|
199
203
|
procs[results.class.to_s.to_sym].call(results)
|
200
204
|
|
201
205
|
else
|
@@ -203,7 +207,7 @@ class Rexle
|
|
203
207
|
method(m.to_sym).call(xpath_value)
|
204
208
|
end
|
205
209
|
|
206
|
-
end
|
210
|
+
end
|
207
211
|
|
208
212
|
def query_xpath(raw_xpath_value, rlist=[], &blk)
|
209
213
|
|
@@ -212,17 +216,21 @@ class Rexle
|
|
212
216
|
flag_func = false
|
213
217
|
|
214
218
|
xpath_value = raw_xpath_value.sub(/^\[/,'*[')
|
219
|
+
#xpath_value.sub!(/\.\/(?=[\/])/,'')
|
215
220
|
|
216
221
|
if xpath_value[/^[\w\/]+\s*=.*/] then
|
217
222
|
flag_func = true
|
218
|
-
|
219
|
-
xpath_value
|
223
|
+
|
224
|
+
xpath_value.sub!(/^\w+\s*=.*/,'.[\0]')
|
225
|
+
xpath_value.sub!(/\/([\w]+\s*=.*)/,'[\1]')
|
226
|
+
|
220
227
|
#result = self.element xpath_value
|
221
228
|
#return [(result.is_a?(Rexle::Element) ? true : false)]
|
222
229
|
end
|
223
230
|
|
224
231
|
#xpath_value.sub!(/^attribute::/,'*/attribute::')
|
225
|
-
raw_path, raw_condition = xpath_value.sub(
|
232
|
+
raw_path, raw_condition = xpath_value.sub(/^\.?\/(?!\/)/,'')\
|
233
|
+
.match(/([^\[]+)(\[[^\]]+\])?/).captures
|
226
234
|
|
227
235
|
remaining_path = ($').to_s
|
228
236
|
|
@@ -242,9 +250,7 @@ class Rexle
|
|
242
250
|
return @value
|
243
251
|
else
|
244
252
|
attribute = xpath_value[/^(attribute::|@)(.*)/,2]
|
245
|
-
|
246
253
|
return [@attributes[attribute.to_sym]] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
|
247
|
-
|
248
254
|
s = a_path.shift
|
249
255
|
end
|
250
256
|
|
@@ -268,19 +274,20 @@ class Rexle
|
|
268
274
|
attr_search = format_condition(condition) if condition and condition.length > 0
|
269
275
|
|
270
276
|
return_elements = []
|
271
|
-
|
272
|
-
if raw_path[0,2] == '//'
|
277
|
+
|
278
|
+
if raw_path[0,2] == '//' then
|
273
279
|
rs = scan_match(self, xpath_value).flatten.compact
|
274
280
|
return rs
|
275
|
-
elsif raw_path == '.' then
|
276
|
-
return [self]
|
281
|
+
elsif (raw_path == '.' or raw_path == self.name) and attr_search.nil? then
|
282
|
+
return [self]
|
277
283
|
else
|
278
284
|
|
279
|
-
return_elements = @child_lookup.map.with_index.select do |x|
|
280
285
|
|
281
|
-
|
282
|
-
|
283
|
-
|
286
|
+
return_elements = @child_lookup.map.with_index.select do |x|
|
287
|
+
|
288
|
+
(x[0][0] == element_name || element_name == '.') or \
|
289
|
+
(element_name == '*' && x[0].is_a?(Array))
|
290
|
+
end
|
284
291
|
|
285
292
|
end
|
286
293
|
|
@@ -306,6 +313,7 @@ class Rexle
|
|
306
313
|
end
|
307
314
|
end
|
308
315
|
#
|
316
|
+
|
309
317
|
rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
|
310
318
|
|
311
319
|
end
|
@@ -316,7 +324,7 @@ class Rexle
|
|
316
324
|
|
317
325
|
# strip off the 1st element from the XPath
|
318
326
|
new_xpath = xpath_value[/^\/\/[\w:]+\/(.*)/,1]
|
319
|
-
|
327
|
+
|
320
328
|
if new_xpath then
|
321
329
|
self.xpath(new_xpath + raw_condition.to_s + remaining_path, rlist,&blk)
|
322
330
|
end
|
@@ -344,7 +352,6 @@ class Rexle
|
|
344
352
|
end
|
345
353
|
|
346
354
|
def inspect()
|
347
|
-
|
348
355
|
if self.xml.length > 30 then
|
349
356
|
"%s ... </>" % self.xml[/<[^>]+>/]
|
350
357
|
else
|
@@ -425,7 +432,7 @@ class Rexle
|
|
425
432
|
|
426
433
|
def value=(raw_s)
|
427
434
|
|
428
|
-
@value = raw_s.to_s.clone
|
435
|
+
@value = String.new(raw_s.to_s.clone)
|
429
436
|
escape_chars = %w(& & < < > >).each_slice(2).to_a
|
430
437
|
escape_chars.each{|x| @value.gsub!(*x)}
|
431
438
|
|
@@ -451,7 +458,6 @@ class Rexle
|
|
451
458
|
|
452
459
|
def format_condition(condition)
|
453
460
|
|
454
|
-
#raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=]+|[@\w\.\/]+/)
|
455
461
|
raw_items = condition[1..-1].scan(/\'[^\']*\'|\"[^\"]*\"|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/]+/)
|
456
462
|
|
457
463
|
if raw_items[0][/^\d+$/] then
|
@@ -637,14 +643,16 @@ class Rexle
|
|
637
643
|
if recordx_type then
|
638
644
|
procs = {
|
639
645
|
'dynarex' => proc {|x| DynarexParser.new(x).to_a},
|
640
|
-
|
646
|
+
'polyrex' => proc {|x| PolyrexParser.new(x).to_a},
|
641
647
|
'polyrex' => proc {|x| RexleParser.new(x).to_a}
|
642
648
|
}
|
643
649
|
procs[recordx_type].call(x)
|
644
650
|
else
|
651
|
+
|
645
652
|
RexleParser.new(x).to_a
|
646
653
|
end
|
647
654
|
else
|
655
|
+
|
648
656
|
RexleParser.new(x).to_a
|
649
657
|
end
|
650
658
|
|
@@ -675,4 +683,4 @@ class Rexle
|
|
675
683
|
[node.name, node.text.to_s, attributes, *children]
|
676
684
|
end
|
677
685
|
|
678
|
-
end
|
686
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: rexle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.9.
|
5
|
+
version: 0.9.28
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- James Robertson
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-06-
|
13
|
+
date: 2012-06-17 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rexleparser
|