spider 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES CHANGED
@@ -1,3 +1,9 @@
1
+ 2007-10-31:
2
+ * Add `setup' and `teardown' handlers.
3
+ * Can set the headers for a HTTP request.
4
+ * Changed :any to :every .
5
+ * Changed the arguments to the :every, :success, :failure, and code handler.
6
+
1
7
  2007-10-23:
2
8
  * URLs without a page component but with a query component.
3
9
  * HTTP Redirect.
data/README CHANGED
@@ -10,17 +10,17 @@ scraping, collecting, and looping so that you can just handle the data.
10
10
  end
11
11
 
12
12
  # Handle 404s.
13
- s.on 404 do |a_url, err_code|
13
+ s.on 404 do |a_url, resp, prior_url|
14
14
  puts "URL not found: #{a_url}"
15
15
  end
16
16
 
17
17
  # Handle 2xx.
18
- s.on :success do |a_url, code, headers, body|
19
- puts "body: #{body}"
18
+ s.on :success do |a_url, resp, prior_url|
19
+ puts "body: #{resp.body}"
20
20
  end
21
21
 
22
22
  # Handle everything.
23
- s.on :any do |a_url, resp|
23
+ s.on :every do |a_url, resp, prior_url|
24
24
  puts "URL returned anything: #{a_url} with this code #{resp.code}"
25
25
  end
26
26
  end
@@ -0,0 +1,144 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Net::HTTPRedirection</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Net::HTTPRedirection</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/spider_rb.html">
59
+ lib/spider.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+
82
+
83
+ </div>
84
+
85
+ <div id="method-list">
86
+ <h3 class="section-bar">Methods</h3>
87
+
88
+ <div class="name-list">
89
+ <a href="#M000008">redirect?</a>&nbsp;&nbsp;
90
+ </div>
91
+ </div>
92
+
93
+ </div>
94
+
95
+
96
+ <!-- if includes -->
97
+
98
+ <div id="section">
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+ <!-- if method_list -->
108
+ <div id="methods">
109
+ <h3 class="section-bar">Public Instance methods</h3>
110
+
111
+ <div id="method-M000008" class="method-detail">
112
+ <a name="M000008"></a>
113
+
114
+ <div class="method-heading">
115
+ <a href="#M000008" class="method-signature">
116
+ <span class="method-name">redirect?</span><span class="method-args">()</span>
117
+ </a>
118
+ </div>
119
+
120
+ <div class="method-description">
121
+ <p><a class="source-toggle" href="#"
122
+ onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
123
+ <div class="method-source-code" id="M000008-source">
124
+ <pre>
125
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 41</span>
126
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">redirect?</span>; <span class="ruby-keyword kw">true</span>; <span class="ruby-keyword kw">end</span>
127
+ </pre>
128
+ </div>
129
+ </div>
130
+ </div>
131
+
132
+
133
+ </div>
134
+
135
+
136
+ </div>
137
+
138
+
139
+ <div id="validator-badges">
140
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
141
+ </div>
142
+
143
+ </body>
144
+ </html>
@@ -0,0 +1,166 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Net::HTTPResponse</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Net::HTTPResponse</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/spider_rb.html">
59
+ lib/spider.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+
82
+
83
+ </div>
84
+
85
+ <div id="method-list">
86
+ <h3 class="section-bar">Methods</h3>
87
+
88
+ <div class="name-list">
89
+ <a href="#M000011">redirect?</a>&nbsp;&nbsp;
90
+ <a href="#M000010">success?</a>&nbsp;&nbsp;
91
+ </div>
92
+ </div>
93
+
94
+ </div>
95
+
96
+
97
+ <!-- if includes -->
98
+
99
+ <div id="section">
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+ <!-- if method_list -->
109
+ <div id="methods">
110
+ <h3 class="section-bar">Public Instance methods</h3>
111
+
112
+ <div id="method-M000011" class="method-detail">
113
+ <a name="M000011"></a>
114
+
115
+ <div class="method-heading">
116
+ <a href="#M000011" class="method-signature">
117
+ <span class="method-name">redirect?</span><span class="method-args">()</span>
118
+ </a>
119
+ </div>
120
+
121
+ <div class="method-description">
122
+ <p><a class="source-toggle" href="#"
123
+ onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
124
+ <div class="method-source-code" id="M000011-source">
125
+ <pre>
126
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 35</span>
127
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">redirect?</span>; <span class="ruby-keyword kw">false</span>; <span class="ruby-keyword kw">end</span>
128
+ </pre>
129
+ </div>
130
+ </div>
131
+ </div>
132
+
133
+ <div id="method-M000010" class="method-detail">
134
+ <a name="M000010"></a>
135
+
136
+ <div class="method-heading">
137
+ <a href="#M000010" class="method-signature">
138
+ <span class="method-name">success?</span><span class="method-args">()</span>
139
+ </a>
140
+ </div>
141
+
142
+ <div class="method-description">
143
+ <p><a class="source-toggle" href="#"
144
+ onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
145
+ <div class="method-source-code" id="M000010-source">
146
+ <pre>
147
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 34</span>
148
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">success?</span>; <span class="ruby-keyword kw">false</span>; <span class="ruby-keyword kw">end</span>
149
+ </pre>
150
+ </div>
151
+ </div>
152
+ </div>
153
+
154
+
155
+ </div>
156
+
157
+
158
+ </div>
159
+
160
+
161
+ <div id="validator-badges">
162
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
163
+ </div>
164
+
165
+ </body>
166
+ </html>
@@ -0,0 +1,144 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Net::HTTPSuccess</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Net::HTTPSuccess</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/spider_rb.html">
59
+ lib/spider.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+
82
+
83
+ </div>
84
+
85
+ <div id="method-list">
86
+ <h3 class="section-bar">Methods</h3>
87
+
88
+ <div class="name-list">
89
+ <a href="#M000009">success?</a>&nbsp;&nbsp;
90
+ </div>
91
+ </div>
92
+
93
+ </div>
94
+
95
+
96
+ <!-- if includes -->
97
+
98
+ <div id="section">
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+ <!-- if method_list -->
108
+ <div id="methods">
109
+ <h3 class="section-bar">Public Instance methods</h3>
110
+
111
+ <div id="method-M000009" class="method-detail">
112
+ <a name="M000009"></a>
113
+
114
+ <div class="method-heading">
115
+ <a href="#M000009" class="method-signature">
116
+ <span class="method-name">success?</span><span class="method-args">()</span>
117
+ </a>
118
+ </div>
119
+
120
+ <div class="method-description">
121
+ <p><a class="source-toggle" href="#"
122
+ onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
123
+ <div class="method-source-code" id="M000009-source">
124
+ <pre>
125
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 38</span>
126
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">success?</span>; <span class="ruby-keyword kw">true</span>; <span class="ruby-keyword kw">end</span>
127
+ </pre>
128
+ </div>
129
+ </div>
130
+ </div>
131
+
132
+
133
+ </div>
134
+
135
+
136
+ </div>
137
+
138
+
139
+ <div id="validator-badges">
140
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
141
+ </div>
142
+
143
+ </body>
144
+ </html>