xml_row_finder 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/xml_row_finder.rb +8 -8
- data.tar.gz.sig +0 -0
- metadata +1 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 805ba05e167f14ca224f83611d91ada484c31b7c95b329468f4e2fce47302e0e
|
4
|
+
data.tar.gz: f70f9dd37c9e9f4be41d2d0c5d4408c70049d810273c84eeec1b451c40ad0df4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2edc584e01ae048ffb4d7b6ee88d4b08a1dd79ce9b593cecf0b24f9fe606809eb1ae50f6e85d5dee70f75a172dcce4eced9e2d3934f90bb23b77321c00fa04e6
|
7
|
+
data.tar.gz: 00ddeb4c7b968e5e31acf8e3accd7d466d9d9941e0d53d7892a9ffb6b82965b2b97f48d7d3cb32c6bb5013466888a12e384a3bd27c52486bddf79b75178758dc
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/xml_row_finder.rb
CHANGED
@@ -13,7 +13,7 @@ class XMLRowFinder
|
|
13
13
|
|
14
14
|
@debug = debug
|
15
15
|
|
16
|
-
|
16
|
+
doc = if raws =~ /^http/ then
|
17
17
|
|
18
18
|
nki = Nokorexi.new(url=raws) do |doc1|
|
19
19
|
doc1.xpath('//*[@onclick]').each do |e|
|
@@ -32,9 +32,11 @@ class XMLRowFinder
|
|
32
32
|
Rexle.new(raws)
|
33
33
|
end
|
34
34
|
|
35
|
+
@doc = Rexle.new(doc.xml)
|
36
|
+
|
35
37
|
a = []
|
36
38
|
|
37
|
-
|
39
|
+
doc.root.each_recursive do |e|
|
38
40
|
e.attributes.delete
|
39
41
|
a << e.backtrack.to_xpath
|
40
42
|
end
|
@@ -52,7 +54,7 @@ class XMLRowFinder
|
|
52
54
|
|
53
55
|
# using Nokogiri since Rexle has a bug with xpath predicates
|
54
56
|
#
|
55
|
-
@doc2 = Nokogiri::XML(
|
57
|
+
@doc2 = Nokogiri::XML(doc.root.xml)
|
56
58
|
|
57
59
|
a5 = a4[0..-2].map do |xpath2|
|
58
60
|
[@doc2.xpath(xpath2).length, xpath2]
|
@@ -65,11 +67,11 @@ class XMLRowFinder
|
|
65
67
|
# find the container element
|
66
68
|
xpath = @xpath[/^[^\[]+/]
|
67
69
|
axpath = xpath.split('/')
|
68
|
-
e =
|
70
|
+
e = doc.element xpath
|
69
71
|
|
70
72
|
until (e.xml.include? last_row) do
|
71
73
|
axpath.pop
|
72
|
-
e =
|
74
|
+
e = doc.element axpath.join('/')
|
73
75
|
end
|
74
76
|
|
75
77
|
@cont_xpath = axpath.join('/')
|
@@ -77,10 +79,9 @@ class XMLRowFinder
|
|
77
79
|
end
|
78
80
|
|
79
81
|
# returns the container element for all rows
|
80
|
-
# object returned: Rexle::Element
|
81
82
|
#
|
82
83
|
def body()
|
83
|
-
@doc.element
|
84
|
+
Rexle.new(@doc.element(@cont_xpath).xml)
|
84
85
|
end
|
85
86
|
|
86
87
|
# returns the xpath pointing to the container element for all rows
|
@@ -89,7 +90,6 @@ class XMLRowFinder
|
|
89
90
|
@cont_xpath
|
90
91
|
end
|
91
92
|
|
92
|
-
|
93
93
|
# returns rows
|
94
94
|
# object returned: An array of Nokogiri XML Element object
|
95
95
|
#
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
metadata.gz.sig
CHANGED
Binary file
|