rexle 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rexle.rb +29 -20
- metadata +1 -1
data/lib/rexle.rb
CHANGED
|
@@ -24,18 +24,20 @@ class Rexle
|
|
|
24
24
|
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def xpath(path)
|
|
27
|
+
def xpath(path, &blk)
|
|
28
28
|
|
|
29
29
|
# is it a function
|
|
30
30
|
fn_match = path.match(/^(\w+)\(([^\)]+)\)$/)
|
|
31
31
|
|
|
32
|
+
# Array: proc {|x| x.flatten.compact},
|
|
32
33
|
if fn_match.nil? then
|
|
33
34
|
procs = {
|
|
34
|
-
Array: proc {|x| x.flatten
|
|
35
|
-
String: proc {|x| x}
|
|
35
|
+
Array: proc {|x| block_given? ? x : x.flatten },
|
|
36
|
+
String: proc {|x| x},
|
|
37
|
+
:"Rexle::Element" => proc {|x| [x]}
|
|
36
38
|
}
|
|
37
39
|
bucket = []
|
|
38
|
-
result = @doc.xpath(path, bucket)
|
|
40
|
+
result = @doc.xpath(path, bucket, &blk)
|
|
39
41
|
|
|
40
42
|
procs[result.class.to_s.to_sym].call(result)
|
|
41
43
|
|
|
@@ -57,23 +59,23 @@ class Rexle
|
|
|
57
59
|
@child_lookup = []
|
|
58
60
|
end
|
|
59
61
|
|
|
60
|
-
def xpath(xpath_value, rlist=[])
|
|
62
|
+
def xpath(xpath_value, rlist=[], &blk)
|
|
61
63
|
|
|
62
|
-
|
|
64
|
+
a_path = xpath_value.split('/')
|
|
63
65
|
|
|
64
66
|
if xpath_value[0,2] == '//' then
|
|
65
|
-
s =
|
|
67
|
+
s = a_path[2]
|
|
66
68
|
elsif xpath_value == 'text()' then
|
|
67
|
-
|
|
69
|
+
a_path.shift
|
|
68
70
|
return @value
|
|
69
71
|
else
|
|
70
72
|
attribute = xpath_value[/^attribute::(.*)/,1]
|
|
71
73
|
return @attributes[attribute] if attribute and @attributes and @attributes.has_key?(attribute)
|
|
72
74
|
|
|
73
|
-
s =
|
|
75
|
+
s = a_path.shift
|
|
74
76
|
end
|
|
75
77
|
|
|
76
|
-
elmnt_path = s[/^(\w+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
|
78
|
+
elmnt_path = s[/^([\w\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
|
|
77
79
|
element_name, condition = elmnt_path.match(/(^[^\[]+)(\[[^\]]+\])?/).captures
|
|
78
80
|
|
|
79
81
|
attr_search = format_attributes(condition) if condition
|
|
@@ -86,22 +88,28 @@ class Rexle
|
|
|
86
88
|
|
|
87
89
|
if return_elements.length > 0 then
|
|
88
90
|
|
|
89
|
-
if
|
|
90
|
-
rlist = return_elements.map.with_index {|x,i| filter(x, i+1, attr_search)}
|
|
91
|
+
if a_path.empty? then
|
|
92
|
+
rlist = return_elements.map.with_index {|x,i| filter(x, i+1, attr_search, &blk)}
|
|
93
|
+
rlist = rlist[0] if rlist.length <= 1
|
|
91
94
|
else
|
|
92
95
|
|
|
93
96
|
rlist << return_elements.map.with_index do |x,i|
|
|
94
|
-
rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(
|
|
95
|
-
next
|
|
97
|
+
rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(a_path.join('/'), &blk); (r || e) }
|
|
98
|
+
next if rtn_element.nil? or (rtn_element.is_a? Array and rtn_element.empty?)
|
|
96
99
|
|
|
97
100
|
if rtn_element.is_a? Array then
|
|
98
|
-
rtn_element
|
|
99
|
-
elsif (rtn_element.is_a? String) || not(rtn_element[0].is_a? String)
|
|
101
|
+
rtn_element
|
|
102
|
+
elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) and not(rtn_element[0].is_a? String))
|
|
103
|
+
rtn_element
|
|
104
|
+
elsif rtn_element.is_a? Rexle::Element
|
|
100
105
|
rtn_element
|
|
101
106
|
end
|
|
102
107
|
end
|
|
108
|
+
#
|
|
109
|
+
rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
|
|
103
110
|
|
|
104
111
|
end
|
|
112
|
+
rlist.compact! if rlist.is_a? Array
|
|
105
113
|
|
|
106
114
|
else
|
|
107
115
|
|
|
@@ -109,11 +117,13 @@ class Rexle
|
|
|
109
117
|
new_xpath = xpath_value[/^\/\/\w+\/(.*)/,1]
|
|
110
118
|
|
|
111
119
|
if new_xpath then
|
|
112
|
-
self.xpath(new_xpath, rlist)
|
|
120
|
+
self.xpath(new_xpath, rlist,&blk)
|
|
113
121
|
end
|
|
114
122
|
end
|
|
115
123
|
|
|
116
124
|
#a.shift # added by jr 171110
|
|
125
|
+
rlist = rlist.flatten(1) unless not(rlist.is_a? Array) or (rlist.length > 1 and rlist[0].is_a? Array)
|
|
126
|
+
rlist = [rlist] if rlist.is_a? Rexle::Element
|
|
117
127
|
rlist
|
|
118
128
|
end
|
|
119
129
|
|
|
@@ -215,7 +225,6 @@ class Rexle
|
|
|
215
225
|
e = @child_elements[x.last]
|
|
216
226
|
h = x[0][1] # <-- fetch the attributes
|
|
217
227
|
|
|
218
|
-
|
|
219
228
|
if attr_search then
|
|
220
229
|
if attr_search.is_a? Fixnum then
|
|
221
230
|
block_given? ? blk.call(e) : e if i == attr_search
|
|
@@ -311,10 +320,10 @@ class Rexle
|
|
|
311
320
|
|
|
312
321
|
# scan a rexml doc
|
|
313
322
|
#
|
|
314
|
-
def scan_doc(node)
|
|
323
|
+
def scan_doc(node)a = rexle.xpath("records/url"){|e| %w(full_url short_url).map{|x| e.text(x)}}
|
|
315
324
|
children = node.elements.map {|child| scan_doc child}
|
|
316
325
|
attributes = node.attributes.inject({}){|r,x| r.merge(Hash[*x])}
|
|
317
326
|
[node.name, node.text.to_s, attributes, *children]
|
|
318
327
|
end
|
|
319
328
|
|
|
320
|
-
end
|
|
329
|
+
end
|