spider 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES CHANGED
@@ -1,9 +1,17 @@
1
+ 2007-10-23:
2
+ * URLs without a page component but with a query component.
3
+ * HTTP Redirect.
4
+ * HTTPS.
5
+ * Version 0.2.1 .
6
+
1
7
  2007-10-22:
2
8
  * Use RSpec to ensure that it mostly works.
3
9
  * Use WEBrick to create a small test server for additional testing.
4
10
  * Completely re-do the API to prepare for future expansion.
5
11
  * Add the ability to apply each URL to a series of custom allowed?-like
6
12
  matchers.
13
+ * BSD license.
14
+ * Version 0.2.0 .
7
15
 
8
16
  2007-03-30:
9
17
  * Clean up the documentation.
@@ -93,7 +93,7 @@ links, and doing it all over again.
93
93
  <h3 class="section-bar">Methods</h3>
94
94
 
95
95
  <div class="name-list">
96
- <a href="#M000003">start_at</a>&nbsp;&nbsp;
96
+ <a href="#M000004">start_at</a>&nbsp;&nbsp;
97
97
  </div>
98
98
  </div>
99
99
 
@@ -115,11 +115,11 @@ links, and doing it all over again.
115
115
  <div id="methods">
116
116
  <h3 class="section-bar">Public Class methods</h3>
117
117
 
118
- <div id="method-M000003" class="method-detail">
119
- <a name="M000003"></a>
118
+ <div id="method-M000004" class="method-detail">
119
+ <a name="M000004"></a>
120
120
 
121
121
  <div class="method-heading">
122
- <a href="#M000003" class="method-signature">
122
+ <a href="#M000004" class="method-signature">
123
123
  <span class="method-name">start_at</span><span class="method-args">(a_url, &amp;block)</span>
124
124
  </a>
125
125
  </div>
@@ -150,10 +150,10 @@ define the rules and handlers for the discovered Web pages.
150
150
  end
151
151
  </pre>
152
152
  <p><a class="source-toggle" href="#"
153
- onclick="toggleCode('M000003-source');return false;">[Source]</a></p>
154
- <div class="method-source-code" id="M000003-source">
153
+ onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
154
+ <div class="method-source-code" id="M000004-source">
155
155
  <pre>
156
- <span class="ruby-comment cmt"># File lib/spider.rb, line 55</span>
156
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 68</span>
157
157
  <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
158
158
  <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
159
159
  <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>([<span class="ruby-identifier">a_url</span>], [], <span class="ruby-identifier">rules</span>, [])
@@ -88,6 +88,7 @@
88
88
  <div class="name-list">
89
89
  <a href="#M000001">add_url_check</a>&nbsp;&nbsp;
90
90
  <a href="#M000002">on</a>&nbsp;&nbsp;
91
+ <a href="#M000003">remove_trailing_slash</a>&nbsp;&nbsp;
91
92
  </div>
92
93
  </div>
93
94
 
@@ -135,7 +136,7 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
135
136
  onclick="toggleCode('M000001-source');return false;">[Source]</a></p>
136
137
  <div class="method-source-code" id="M000001-source">
137
138
  <pre>
138
- <span class="ruby-comment cmt"># File lib/spider.rb, line 81</span>
139
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 94</span>
139
140
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
140
141
  <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
141
142
  <span class="ruby-keyword kw">end</span>
@@ -195,7 +196,7 @@ For example:
195
196
  onclick="toggleCode('M000002-source');return false;">[Source]</a></p>
196
197
  <div class="method-source-code" id="M000002-source">
197
198
  <pre>
198
- <span class="ruby-comment cmt"># File lib/spider.rb, line 118</span>
199
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 131</span>
199
200
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
200
201
  <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
201
202
  <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
@@ -214,6 +215,29 @@ For example:
214
215
  </div>
215
216
  </div>
216
217
 
218
+ <div id="method-M000003" class="method-detail">
219
+ <a name="M000003"></a>
220
+
221
+ <div class="method-heading">
222
+ <a href="#M000003" class="method-signature">
223
+ <span class="method-name">remove_trailing_slash</span><span class="method-args">(s)</span>
224
+ </a>
225
+ </div>
226
+
227
+ <div class="method-description">
228
+ <p><a class="source-toggle" href="#"
229
+ onclick="toggleCode('M000003-source');return false;">[Source]</a></p>
230
+ <div class="method-source-code" id="M000003-source">
231
+ <pre>
232
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 257</span>
233
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_trailing_slash</span>(<span class="ruby-identifier">s</span>)
234
+ <span class="ruby-identifier">s</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">%r{/*$}</span>,<span class="ruby-value str">''</span>)
235
+ <span class="ruby-keyword kw">end</span>
236
+ </pre>
237
+ </div>
238
+ </div>
239
+ </div>
240
+
217
241
 
218
242
  </div>
219
243
 
@@ -1 +1 @@
1
- Mon, 22 Oct 2007 07:35:00 -0400
1
+ Tue, 23 Oct 2007 23:14:46 -0400
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Mon Oct 22 07:19:31 -0400 2007</td>
59
+ <td>Tue Oct 23 23:11:42 -0400 2007</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -125,6 +125,7 @@ href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589">blade.
125
125
  open-uri&nbsp;&nbsp;
126
126
  uri&nbsp;&nbsp;
127
127
  net/http&nbsp;&nbsp;
128
+ net/https&nbsp;&nbsp;
128
129
  </div>
129
130
  </div>
130
131
 
@@ -20,7 +20,6 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Files</h1>
22
22
  <div id="index-entries">
23
- <a href="files/README.html">README</a><br />
24
23
  <a href="files/lib/spider_rb.html">lib/spider.rb</a><br />
25
24
  </div>
26
25
  </div>
@@ -22,7 +22,8 @@
22
22
  <div id="index-entries">
23
23
  <a href="classes/SpiderInstance.html#M000001">add_url_check (SpiderInstance)</a><br />
24
24
  <a href="classes/SpiderInstance.html#M000002">on (SpiderInstance)</a><br />
25
- <a href="classes/Spider.html#M000003">start_at (Spider)</a><br />
25
+ <a href="classes/SpiderInstance.html#M000003">remove_trailing_slash (SpiderInstance)</a><br />
26
+ <a href="classes/Spider.html#M000004">start_at (Spider)</a><br />
26
27
  </div>
27
28
  </div>
28
29
  </body>
@@ -27,13 +27,18 @@ require 'robot_rules'
27
27
  require 'open-uri'
28
28
  require 'uri'
29
29
  require 'net/http'
30
+ require 'net/https'
30
31
 
31
32
  class Net::HTTPResponse #:nodoc:
32
33
  def success?; false; end
34
+ def redirect?; false; end
33
35
  end
34
36
  class Net::HTTPSuccess #:nodoc:
35
37
  def success?; true; end
36
38
  end
39
+ class Net::HTTPRedirection #:nodoc:
40
+ def redirect?; true; end
41
+ end
37
42
 
38
43
  # A spidering library for Ruby. Handles robots.txt, scraping, finding more
39
44
  # links, and doing it all over again.
@@ -188,15 +193,16 @@ class SpiderInstance
188
193
  def get_page(parsed_url, &block) #:nodoc:
189
194
  @seen << parsed_url
190
195
  begin
191
- Net::HTTP.start(parsed_url.host, parsed_url.port) do |http|
192
- r = http.request(Net::HTTP::Get.new(parsed_url.path))
193
- if r.is_a?(Net::HTTPRedirection)
194
- get_page(URI.parse(r['Location']), block)
195
- else
196
- block.call(r)
197
- end
196
+ http = Net::HTTP.new(parsed_url.host, parsed_url.port)
197
+ http.use_ssl = parsed_url.scheme == 'https'
198
+ # Uses start because http.finish cannot be called.
199
+ r = http.start {|h| h.request(Net::HTTP::Get.new(parsed_url.request_uri))}
200
+ if r.redirect?
201
+ get_page(URI.parse(r['Location']), &block)
202
+ else
203
+ block.call(r)
198
204
  end
199
- rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, Exception =>e
205
+ rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError => e
200
206
  p e
201
207
  nil
202
208
  end
@@ -207,7 +213,7 @@ class SpiderInstance
207
213
  if resp.success?
208
214
  cb_branch = @callbacks[:success]
209
215
  cb_branch[:any].call(a_url, resp.code, resp, resp.body) if cb_branch[:any]
210
- cb_branch[resp.code].call(a_url, resp.code, resp.headers, resp.body) if cb_branch[resp.code]
216
+ cb_branch[resp.code].call(a_url, resp.code, resp, resp.body) if cb_branch[resp.code]
211
217
  else
212
218
  cb_branch = @callbacks[:failure]
213
219
  cb_branch[:any].call(a_url, resp.code) if cb_branch[:any]
@@ -1,15 +1,84 @@
1
1
  require 'rubygems'
2
2
  require 'spec'
3
+ require 'webrick'
4
+ require 'webrick/https'
3
5
  require File.dirname(__FILE__)+'/../lib/spider'
4
6
 
5
7
  Spec::Runner.configure { |c| c.mock_with :mocha }
6
8
 
9
+ class QueryServlet < WEBrick::HTTPServlet::AbstractServlet
10
+ def do_GET(req, res)
11
+ res['Content-type'] = 'text/plain'
12
+ res.body = "response\n"
13
+ end
14
+ end
15
+
16
+ def null_logger
17
+ l = stub
18
+ [:log, :fatal, :error, :warn , :info, :debug].each do |k|
19
+ l.stubs(k)
20
+ l.stubs("#{k}?".to_sym)
21
+ end
22
+ l
23
+ end
24
+
7
25
  describe 'SpiderInstance' do
26
+ # Bug reported by John Nagro, using the example source http://eons.com/
27
+ # had to change line 192; uses request_uri now instead of path.
28
+ it 'should handle query URLs without a path' do
29
+ u = 'http://localhost:8888?s=1'
30
+ u_p = URI.parse(u)
31
+ @block_called = false
32
+ server = WEBrick::HTTPServer.new(:Port => 8888, :Logger => null_logger,
33
+ :AccessLog => [])
34
+ server.mount('/', QueryServlet)
35
+ Thread.new {server.start}
36
+ si = SpiderInstance.new([u])
37
+ si.get_page(u_p) do
38
+ @block_called = true
39
+ end
40
+ server.shutdown
41
+ @block_called.should be_true
42
+ end
43
+
44
+ # This solves a problem reported by John Nagro.
45
+ it 'should handle redirects' do
46
+ u = 'http://example.com/'
47
+ u_p = URI.parse(u)
48
+ @redirect_handled = false
49
+ mock_redirect_http
50
+ si = SpiderInstance.new([u])
51
+ si.get_page(u_p) do
52
+ @redirect_handled = true
53
+ end
54
+ @redirect_handled.should be_true
55
+ end
56
+
57
+ it 'should handle HTTPS' do
58
+ u = 'https://localhost:10443/'
59
+ u_p = URI.parse(u)
60
+ @page_called = false
61
+ server = WEBrick::HTTPServer.new(:Port => 10443,
62
+ :Logger => null_logger,
63
+ :AccessLog => [],
64
+ :SSLEnable => true,
65
+ :SSLCertName => [["O", "ruby-lang.org"], ["OU", "sample"], ["CN", WEBrick::Utils::getservername]],
66
+ :SSLComment => 'Comment of some sort')
67
+ server.mount('/', QueryServlet)
68
+ Thread.new {server.start}
69
+ si = SpiderInstance.new([u])
70
+ si.get_page(u_p) { @page_called = true }
71
+ server.shutdown
72
+ @page_called.should be_true
73
+ end
74
+
75
+ it 'should maintain the entire graph within some external object (or memory, or memcached)'
76
+
8
77
  it 'should skip URLs when allowable_url? is false' do
9
78
  u = 'http://example.com/'
10
79
  u_p = URI.parse(u)
11
- http_resp = stub(:success? => true, :code => 200, :headers => 1, :body => 1)
12
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
80
+ http_resp = stub(:redirect? => false, :success? => true, :code => 200, :headers => 1, :body => 1)
81
+ Net::HTTP.stubs(:new).returns(stub(:request => http_resp, :finish => nil))
13
82
  si = SpiderInstance.new([u])
14
83
  si.expects(:allowable_url?).with(u, u_p).returns(false)
15
84
  si.expects(:get_page).times(0)
@@ -19,8 +88,8 @@ describe 'SpiderInstance' do
19
88
  it 'should not skip URLs when allowable_url? is true' do
20
89
  u = 'http://example.com/'
21
90
  u_p = URI.parse(u)
22
- http_resp = stub(:success? => true, :code => 200, :headers => 1, :body => 1)
23
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
91
+ http_resp = stub(:redirect? => false, :success? => true, :code => 200, :headers => 1, :body => 1)
92
+ Net::HTTP.stubs(:new).returns(stub(:request => http_resp, :finish => nil))
24
93
  si = SpiderInstance.new([u])
25
94
  si.expects(:allowable_url?).with(u, u_p).returns(true)
26
95
  si.expects(:allowable_url?).with(nil, nil).returns(false)
@@ -68,19 +137,11 @@ describe 'SpiderInstance' do
68
137
  allowable = si.allowable_url?('http://example.com/',
69
138
  URI.parse('http://example.com/'))
70
139
  allowable.should == false
71
- @first_url_check == true
72
- @second_url_check == true
73
- end
74
-
75
- it 'should support memcached'
76
- it 'should avoid cycles using memcached'
77
-
78
- it 'should support memory' do
79
- si = SpiderInstance.new(['http://example.com/'])
80
- si.use_cache :memory # No exn
140
+ @first_url_check.should be_true
141
+ @second_url_check.should be_true
81
142
  end
82
143
 
83
- it 'should avoid cycles using memory' do
144
+ it 'should avoid cycles' do
84
145
  u = 'http://example.com/'
85
146
  u_p = URI.parse(u)
86
147
  si = SpiderInstance.new([u], [u_p])
@@ -92,32 +153,29 @@ describe 'SpiderInstance' do
92
153
 
93
154
  it 'should call the 404 handler for 404s' do
94
155
  @proc_called = false
95
- http_resp = stub(:success? => false, :code => 404)
96
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
156
+ mock_failed_http
97
157
  si = SpiderInstance.new(['http://example.com/'])
98
158
  si.stubs(:allowed?).returns(true)
99
159
  si.stubs(:generate_next_urls).returns([])
100
160
  si.on(404) {|*a| @proc_called = true}
101
161
  si.start!
102
- @proc_called.should == true
162
+ @proc_called.should be_true
103
163
  end
104
164
 
105
165
  it 'should call the :success handler on success' do
106
166
  @proc_called = false
107
- http_resp = stub(:success? => true, :code => 200, :headers => 1, :body => 1)
108
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
167
+ mock_successful_http
109
168
  si = SpiderInstance.new(['http://example.com/'])
110
169
  si.stubs(:allowed?).returns(true)
111
170
  si.stubs(:generate_next_urls).returns([])
112
171
  si.on(:success) {|*a| @proc_called = true}
113
172
  si.start!
114
- @proc_called.should == true
173
+ @proc_called.should be_true
115
174
  end
116
175
 
117
176
  it 'should not call the :success handler on failure' do
118
177
  @proc_called = false
119
- http_resp = stub(:success? => false, :code => 404)
120
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
178
+ mock_failed_http
121
179
  si = SpiderInstance.new(['http://example.com/'])
122
180
  si.stubs(:allowed?).returns(true)
123
181
  si.stubs(:generate_next_urls).returns([])
@@ -129,22 +187,20 @@ describe 'SpiderInstance' do
129
187
  it 'should call the :success handler and the 200 handler on 200' do
130
188
  @proc_200_called = false
131
189
  @proc_success_called = false
132
- http_resp = stub(:success? => true, :code => 200, :headers => 1, :body => 1)
133
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
190
+ mock_successful_http
134
191
  si = SpiderInstance.new(['http://example.com/'])
135
192
  si.stubs(:allowed?).returns(true)
136
193
  si.stubs(:generate_next_urls).returns([])
137
194
  si.on(:success) {|*a| @proc_success_called = true}
138
- si.on(200) {|*a| @proc_200_called = true}
195
+ si.on(200) {|*a| @proc_200_called = true}
139
196
  si.start!
140
- @proc_200_called.should == true
141
- @proc_success_called.should == true
197
+ @proc_200_called.should be_true
198
+ @proc_success_called.should be_true
142
199
  end
143
200
 
144
201
  it 'should not call the :failure handler on success' do
145
202
  @proc_called = false
146
- http_resp = stub(:success? => true, :code => 200, :headers => 1, :body => 1)
147
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
203
+ mock_successful_http
148
204
  si = SpiderInstance.new(['http://example.com/'])
149
205
  si.stubs(:allowed?).returns(true)
150
206
  si.stubs(:generate_next_urls).returns([])
@@ -155,65 +211,90 @@ describe 'SpiderInstance' do
155
211
 
156
212
  it 'should call the :failure handler on failure' do
157
213
  @proc_called = false
158
- http_resp = stub(:success? => false, :code => 404)
159
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
214
+ mock_failed_http
160
215
  si = SpiderInstance.new(['http://example.com/'])
161
216
  si.stubs(:allowed?).returns(true)
162
217
  si.stubs(:generate_next_urls).returns([])
163
218
  si.on(:failure) {|*a| @proc_called = true}
164
219
  si.start!
165
- @proc_called.should == true
220
+ @proc_called.should be_true
166
221
  end
167
222
 
168
223
  it 'should call the :failure handler and the 404 handler on 404' do
169
224
  @proc_404_called = false
170
225
  @proc_failure_called = false
171
- http_resp = stub(:success? => false, :code => 404)
172
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
226
+ mock_failed_http
173
227
  si = SpiderInstance.new(['http://example.com/'])
174
228
  si.stubs(:allowed?).returns(true)
175
229
  si.stubs(:generate_next_urls).returns([])
176
230
  si.on(:failure) {|*a| @proc_failure_called = true}
177
231
  si.on(404) {|*a| @proc_404_called = true}
178
232
  si.start!
179
- @proc_404_called.should == true
180
- @proc_failure_called.should == true
233
+ @proc_404_called.should be_true
234
+ @proc_failure_called.should be_true
181
235
  end
182
236
 
183
237
  it 'should call the :any handler even when a handler for the error code is defined' do
184
238
  @any_called = false
185
- http_resp = stub(:success? => true, :code => 200)
186
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
239
+ mock_successful_http
187
240
  si = SpiderInstance.new(['http://example.com/'])
188
241
  si.stubs(:allowed?).returns(true)
189
242
  si.stubs(:generate_next_urls).returns([])
190
243
  si.on(:any) { |*a| @any_called = true }
191
244
  si.on(202) {|*a|}
192
245
  si.start!
193
- @any_called.should == true
246
+ @any_called.should be_true
194
247
  end
195
248
 
196
249
  it 'should support a block as a response handler' do
197
250
  @proc_called = false
198
- http_resp = stub(:success? => true, :code => 200)
199
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
251
+ mock_successful_http
200
252
  si = SpiderInstance.new(['http://example.com/'])
201
253
  si.stubs(:allowed?).returns(true)
202
254
  si.stubs(:generate_next_urls).returns([])
203
255
  si.on(:any) { |*a| @proc_called = true }
204
256
  si.start!
205
- @proc_called.should == true
257
+ @proc_called.should be_true
206
258
  end
207
259
 
208
260
  it 'should support a proc as a response handler' do
209
261
  @proc_called = false
210
- http_resp = stub(:success? => true, :code => 200)
211
- Net::HTTP.stubs(:start).yields(stub(:request => http_resp))
262
+ mock_successful_http
212
263
  si = SpiderInstance.new(['http://example.com/'])
213
264
  si.stubs(:allowed?).returns(true)
214
265
  si.stubs(:generate_next_urls).returns([])
215
266
  si.on(:any, Proc.new { |*a| @proc_called = true })
216
267
  si.start!
217
- @proc_called.should == true
268
+ @proc_called.should be_true
269
+ end
270
+
271
+ def mock_http(http_req)
272
+ http_obj = mock(:use_ssl= => true)
273
+ http_obj.expects(:start).
274
+ yields(mock(:request => http_req)).returns(http_req)
275
+ Net::HTTP.expects(:new).returns(http_obj)
276
+ end
277
+
278
+ def mock_successful_http
279
+ http_req = stub(:redirect? => false, :success? => true, :code => 200, :body => 'body')
280
+ mock_http(http_req)
281
+ end
282
+
283
+ def mock_failed_http
284
+ http_req = stub(:redirect? => false, :success? => false, :code => 404)
285
+ mock_http(http_req)
286
+ end
287
+
288
+ def mock_redirect_http
289
+ http_req = stub(:redirect? => true, :success? => false, :code => 404)
290
+ http_req.expects(:[]).with('Location').returns('http://example.com/')
291
+ http_req2 = stub(:redirect? => false, :success? => true, :code => 200)
292
+ http_obj = mock(:use_ssl= => true)
293
+ http_obj.expects(:start).
294
+ yields(mock(:request => http_req)).returns(http_req)
295
+ http_obj2 = mock(:use_ssl= => true)
296
+ http_obj2.expects(:start).
297
+ yields(mock(:request => http_req2)).returns(http_req2)
298
+ Net::HTTP.expects(:new).times(2).returns(http_obj).then.returns(http_obj2)
218
299
  end
219
300
  end
@@ -13,5 +13,5 @@ spec = Gem::Specification.new do |s|
13
13
  A Web spidering library: handles robots.txt, scraping, finding more
14
14
  links, and doing it all over again.
15
15
  EOF
16
- s.version = '0.2.0'
16
+ s.version = '0.2.1'
17
17
  end
@@ -20,3 +20,7 @@ Spider.start_at('http://localhost:8880/page1.html') do |s|
20
20
  puts "URL returned anything: #{a_url} with this code #{resp.code}"
21
21
  end
22
22
  end
23
+
24
+ %w(INT TERM).each do |signal|
25
+ trap(signal) { exit }
26
+ end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: spider
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
7
- date: 2007-10-22 00:00:00 -04:00
6
+ version: 0.2.1
7
+ date: 2007-10-23 00:00:00 -04:00
8
8
  summary: A Web spidering library
9
9
  require_paths:
10
10
  - lib