rexle 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rexle.rb +29 -20
- metadata +1 -1
data/lib/rexle.rb
CHANGED
@@ -24,18 +24,20 @@ class Rexle
|
|
24
24
|
|
25
25
|
end
|
26
26
|
|
27
|
-
def xpath(path)
|
27
|
+
def xpath(path, &blk)
|
28
28
|
|
29
29
|
# is it a function
|
30
30
|
fn_match = path.match(/^(\w+)\(([^\)]+)\)$/)
|
31
31
|
|
32
|
+
# Array: proc {|x| x.flatten.compact},
|
32
33
|
if fn_match.nil? then
|
33
34
|
procs = {
|
34
|
-
Array: proc {|x| x.flatten
|
35
|
-
String: proc {|x| x}
|
35
|
+
Array: proc {|x| block_given? ? x : x.flatten },
|
36
|
+
String: proc {|x| x},
|
37
|
+
:"Rexle::Element" => proc {|x| [x]}
|
36
38
|
}
|
37
39
|
bucket = []
|
38
|
-
result = @doc.xpath(path, bucket)
|
40
|
+
result = @doc.xpath(path, bucket, &blk)
|
39
41
|
|
40
42
|
procs[result.class.to_s.to_sym].call(result)
|
41
43
|
|
@@ -57,23 +59,23 @@ class Rexle
|
|
57
59
|
@child_lookup = []
|
58
60
|
end
|
59
61
|
|
60
|
-
def xpath(xpath_value, rlist=[])
|
62
|
+
def xpath(xpath_value, rlist=[], &blk)
|
61
63
|
|
62
|
-
|
64
|
+
a_path = xpath_value.split('/')
|
63
65
|
|
64
66
|
if xpath_value[0,2] == '//' then
|
65
|
-
s =
|
67
|
+
s = a_path[2]
|
66
68
|
elsif xpath_value == 'text()' then
|
67
|
-
|
69
|
+
a_path.shift
|
68
70
|
return @value
|
69
71
|
else
|
70
72
|
attribute = xpath_value[/^attribute::(.*)/,1]
|
71
73
|
return @attributes[attribute] if attribute and @attributes and @attributes.has_key?(attribute)
|
72
74
|
|
73
|
-
s =
|
75
|
+
s = a_path.shift
|
74
76
|
end
|
75
77
|
|
76
|
-
elmnt_path = s[/^(\w+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
78
|
+
elmnt_path = s[/^([\w\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
77
79
|
element_name, condition = elmnt_path.match(/(^[^\[]+)(\[[^\]]+\])?/).captures
|
78
80
|
|
79
81
|
attr_search = format_attributes(condition) if condition
|
@@ -86,22 +88,28 @@ class Rexle
|
|
86
88
|
|
87
89
|
if return_elements.length > 0 then
|
88
90
|
|
89
|
-
if
|
90
|
-
rlist = return_elements.map.with_index {|x,i| filter(x, i+1, attr_search)}
|
91
|
+
if a_path.empty? then
|
92
|
+
rlist = return_elements.map.with_index {|x,i| filter(x, i+1, attr_search, &blk)}
|
93
|
+
rlist = rlist[0] if rlist.length <= 1
|
91
94
|
else
|
92
95
|
|
93
96
|
rlist << return_elements.map.with_index do |x,i|
|
94
|
-
rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(
|
95
|
-
next
|
97
|
+
rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(a_path.join('/'), &blk); (r || e) }
|
98
|
+
next if rtn_element.nil? or (rtn_element.is_a? Array and rtn_element.empty?)
|
96
99
|
|
97
100
|
if rtn_element.is_a? Array then
|
98
|
-
rtn_element
|
99
|
-
elsif (rtn_element.is_a? String) || not(rtn_element[0].is_a? String)
|
101
|
+
rtn_element
|
102
|
+
elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) and not(rtn_element[0].is_a? String))
|
103
|
+
rtn_element
|
104
|
+
elsif rtn_element.is_a? Rexle::Element
|
100
105
|
rtn_element
|
101
106
|
end
|
102
107
|
end
|
108
|
+
#
|
109
|
+
rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
|
103
110
|
|
104
111
|
end
|
112
|
+
rlist.compact! if rlist.is_a? Array
|
105
113
|
|
106
114
|
else
|
107
115
|
|
@@ -109,11 +117,13 @@ class Rexle
|
|
109
117
|
new_xpath = xpath_value[/^\/\/\w+\/(.*)/,1]
|
110
118
|
|
111
119
|
if new_xpath then
|
112
|
-
self.xpath(new_xpath, rlist)
|
120
|
+
self.xpath(new_xpath, rlist,&blk)
|
113
121
|
end
|
114
122
|
end
|
115
123
|
|
116
124
|
#a.shift # added by jr 171110
|
125
|
+
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) or (rlist.length > 1 and rlist[0].is_a? Array)
|
126
|
+
rlist = [rlist] if rlist.is_a? Rexle::Element
|
117
127
|
rlist
|
118
128
|
end
|
119
129
|
|
@@ -215,7 +225,6 @@ class Rexle
|
|
215
225
|
e = @child_elements[x.last]
|
216
226
|
h = x[0][1] # <-- fetch the attributes
|
217
227
|
|
218
|
-
|
219
228
|
if attr_search then
|
220
229
|
if attr_search.is_a? Fixnum then
|
221
230
|
block_given? ? blk.call(e) : e if i == attr_search
|
@@ -311,10 +320,10 @@ class Rexle
|
|
311
320
|
|
312
321
|
# scan a rexml doc
|
313
322
|
#
|
314
|
-
def scan_doc(node)
|
323
|
+
def scan_doc(node)a = rexle.xpath("records/url"){|e| %w(full_url short_url).map{|x| e.text(x)}}
|
315
324
|
children = node.elements.map {|child| scan_doc child}
|
316
325
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
317
326
|
[node.name, node.text.to_s, attributes, *children]
|
318
327
|
end
|
319
328
|
|
320
|
-
end
|
329
|
+
end
|