spider 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +6 -0
- data/README +4 -4
- data/doc/classes/Net/HTTPRedirection.html +144 -0
- data/doc/classes/Net/HTTPResponse.html +166 -0
- data/doc/classes/Net/HTTPSuccess.html +144 -0
- data/doc/classes/NilClass.html +144 -0
- data/doc/classes/Spider.html +12 -12
- data/doc/classes/SpiderInstance.html +109 -32
- data/doc/created.rid +1 -1
- data/doc/files/README.html +5 -5
- data/doc/files/lib/spider_rb.html +5 -5
- data/doc/fr_class_index.html +0 -1
- data/doc/fr_file_index.html +1 -0
- data/doc/fr_method_index.html +5 -2
- data/lib/spider.rb +100 -58
- data/spec/spider_instance_spec.rb +115 -30
- data/spider.gemspec +1 -1
- data/test_server/client.rb +4 -4
- metadata +7 -2
data/CHANGES
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
2007-10-31:
|
2
|
+
* Add `setup' and `teardown' handlers.
|
3
|
+
* Can set the headers for a HTTP request.
|
4
|
+
* Changed :any to :every .
|
5
|
+
* Changed the arguments to the :every, :success, :failure, and code handler.
|
6
|
+
|
1
7
|
2007-10-23:
|
2
8
|
* URLs without a page component but with a query component.
|
3
9
|
* HTTP Redirect.
|
data/README
CHANGED
@@ -10,17 +10,17 @@ scraping, collecting, and looping so that you can just handle the data.
|
|
10
10
|
end
|
11
11
|
|
12
12
|
# Handle 404s.
|
13
|
-
s.on 404 do |a_url,
|
13
|
+
s.on 404 do |a_url, resp, prior_url|
|
14
14
|
puts "URL not found: #{a_url}"
|
15
15
|
end
|
16
16
|
|
17
17
|
# Handle 2xx.
|
18
|
-
s.on :success do |a_url,
|
19
|
-
puts "body: #{body}"
|
18
|
+
s.on :success do |a_url, resp, prior_url|
|
19
|
+
puts "body: #{resp.body}"
|
20
20
|
end
|
21
21
|
|
22
22
|
# Handle everything.
|
23
|
-
s.on :
|
23
|
+
s.on :every do |a_url, resp, prior_url|
|
24
24
|
puts "URL returned anything: #{a_url} with this code #{resp.code}"
|
25
25
|
end
|
26
26
|
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Net::HTTPRedirection</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Net::HTTPRedirection</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/spider_rb.html">
|
59
|
+
lib/spider.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000008">redirect?</a>
|
90
|
+
</div>
|
91
|
+
</div>
|
92
|
+
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
<!-- if includes -->
|
97
|
+
|
98
|
+
<div id="section">
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
<!-- if method_list -->
|
108
|
+
<div id="methods">
|
109
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
110
|
+
|
111
|
+
<div id="method-M000008" class="method-detail">
|
112
|
+
<a name="M000008"></a>
|
113
|
+
|
114
|
+
<div class="method-heading">
|
115
|
+
<a href="#M000008" class="method-signature">
|
116
|
+
<span class="method-name">redirect?</span><span class="method-args">()</span>
|
117
|
+
</a>
|
118
|
+
</div>
|
119
|
+
|
120
|
+
<div class="method-description">
|
121
|
+
<p><a class="source-toggle" href="#"
|
122
|
+
onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
|
123
|
+
<div class="method-source-code" id="M000008-source">
|
124
|
+
<pre>
|
125
|
+
<span class="ruby-comment cmt"># File lib/spider.rb, line 41</span>
|
126
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">redirect?</span>; <span class="ruby-keyword kw">true</span>; <span class="ruby-keyword kw">end</span>
|
127
|
+
</pre>
|
128
|
+
</div>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
|
133
|
+
</div>
|
134
|
+
|
135
|
+
|
136
|
+
</div>
|
137
|
+
|
138
|
+
|
139
|
+
<div id="validator-badges">
|
140
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
141
|
+
</div>
|
142
|
+
|
143
|
+
</body>
|
144
|
+
</html>
|
@@ -0,0 +1,166 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Net::HTTPResponse</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Net::HTTPResponse</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/spider_rb.html">
|
59
|
+
lib/spider.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000011">redirect?</a>
|
90
|
+
<a href="#M000010">success?</a>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
|
94
|
+
</div>
|
95
|
+
|
96
|
+
|
97
|
+
<!-- if includes -->
|
98
|
+
|
99
|
+
<div id="section">
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
<!-- if method_list -->
|
109
|
+
<div id="methods">
|
110
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
111
|
+
|
112
|
+
<div id="method-M000011" class="method-detail">
|
113
|
+
<a name="M000011"></a>
|
114
|
+
|
115
|
+
<div class="method-heading">
|
116
|
+
<a href="#M000011" class="method-signature">
|
117
|
+
<span class="method-name">redirect?</span><span class="method-args">()</span>
|
118
|
+
</a>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div class="method-description">
|
122
|
+
<p><a class="source-toggle" href="#"
|
123
|
+
onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
|
124
|
+
<div class="method-source-code" id="M000011-source">
|
125
|
+
<pre>
|
126
|
+
<span class="ruby-comment cmt"># File lib/spider.rb, line 35</span>
|
127
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">redirect?</span>; <span class="ruby-keyword kw">false</span>; <span class="ruby-keyword kw">end</span>
|
128
|
+
</pre>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
</div>
|
132
|
+
|
133
|
+
<div id="method-M000010" class="method-detail">
|
134
|
+
<a name="M000010"></a>
|
135
|
+
|
136
|
+
<div class="method-heading">
|
137
|
+
<a href="#M000010" class="method-signature">
|
138
|
+
<span class="method-name">success?</span><span class="method-args">()</span>
|
139
|
+
</a>
|
140
|
+
</div>
|
141
|
+
|
142
|
+
<div class="method-description">
|
143
|
+
<p><a class="source-toggle" href="#"
|
144
|
+
onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
|
145
|
+
<div class="method-source-code" id="M000010-source">
|
146
|
+
<pre>
|
147
|
+
<span class="ruby-comment cmt"># File lib/spider.rb, line 34</span>
|
148
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">success?</span>; <span class="ruby-keyword kw">false</span>; <span class="ruby-keyword kw">end</span>
|
149
|
+
</pre>
|
150
|
+
</div>
|
151
|
+
</div>
|
152
|
+
</div>
|
153
|
+
|
154
|
+
|
155
|
+
</div>
|
156
|
+
|
157
|
+
|
158
|
+
</div>
|
159
|
+
|
160
|
+
|
161
|
+
<div id="validator-badges">
|
162
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
163
|
+
</div>
|
164
|
+
|
165
|
+
</body>
|
166
|
+
</html>
|
@@ -0,0 +1,144 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Net::HTTPSuccess</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Net::HTTPSuccess</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/spider_rb.html">
|
59
|
+
lib/spider.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000009">success?</a>
|
90
|
+
</div>
|
91
|
+
</div>
|
92
|
+
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
<!-- if includes -->
|
97
|
+
|
98
|
+
<div id="section">
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
<!-- if method_list -->
|
108
|
+
<div id="methods">
|
109
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
110
|
+
|
111
|
+
<div id="method-M000009" class="method-detail">
|
112
|
+
<a name="M000009"></a>
|
113
|
+
|
114
|
+
<div class="method-heading">
|
115
|
+
<a href="#M000009" class="method-signature">
|
116
|
+
<span class="method-name">success?</span><span class="method-args">()</span>
|
117
|
+
</a>
|
118
|
+
</div>
|
119
|
+
|
120
|
+
<div class="method-description">
|
121
|
+
<p><a class="source-toggle" href="#"
|
122
|
+
onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
|
123
|
+
<div class="method-source-code" id="M000009-source">
|
124
|
+
<pre>
|
125
|
+
<span class="ruby-comment cmt"># File lib/spider.rb, line 38</span>
|
126
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">success?</span>; <span class="ruby-keyword kw">true</span>; <span class="ruby-keyword kw">end</span>
|
127
|
+
</pre>
|
128
|
+
</div>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
|
133
|
+
</div>
|
134
|
+
|
135
|
+
|
136
|
+
</div>
|
137
|
+
|
138
|
+
|
139
|
+
<div id="validator-badges">
|
140
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
141
|
+
</div>
|
142
|
+
|
143
|
+
</body>
|
144
|
+
</html>
|