spider 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,7 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Classes</h1>
22
22
  <div id="index-entries">
23
+ <a href="classes/IncludedInMemcached.html">IncludedInMemcached</a><br />
23
24
  <a href="classes/Spider.html">Spider</a><br />
24
25
  <a href="classes/SpiderInstance.html">SpiderInstance</a><br />
25
26
  </div>
@@ -21,7 +21,9 @@
21
21
  <h1 class="section-bar">Files</h1>
22
22
  <div id="index-entries">
23
23
  <a href="files/README.html">README</a><br />
24
+ <a href="files/lib/included_in_memcached_rb.html">lib/included_in_memcached.rb</a><br />
24
25
  <a href="files/lib/spider_rb.html">lib/spider.rb</a><br />
26
+ <a href="files/lib/spider_instance_rb.html">lib/spider_instance.rb</a><br />
25
27
  </div>
26
28
  </div>
27
29
  </body>
@@ -20,13 +20,17 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Methods</h1>
22
22
  <div id="index-entries">
23
- <a href="classes/SpiderInstance.html#M000001">add_url_check (SpiderInstance)</a><br />
24
- <a href="classes/SpiderInstance.html#M000006">clear_headers (SpiderInstance)</a><br />
25
- <a href="classes/SpiderInstance.html#M000005">headers (SpiderInstance)</a><br />
26
- <a href="classes/SpiderInstance.html#M000002">on (SpiderInstance)</a><br />
27
- <a href="classes/SpiderInstance.html#M000003">setup (SpiderInstance)</a><br />
28
- <a href="classes/Spider.html#M000007">start_at (Spider)</a><br />
29
- <a href="classes/SpiderInstance.html#M000004">teardown (SpiderInstance)</a><br />
23
+ <a href="classes/IncludedInMemcached.html#M000002"><< (IncludedInMemcached)</a><br />
24
+ <a href="classes/SpiderInstance.html#M000004">add_url_check (SpiderInstance)</a><br />
25
+ <a href="classes/SpiderInstance.html#M000005">check_already_seen_with (SpiderInstance)</a><br />
26
+ <a href="classes/SpiderInstance.html#M000010">clear_headers (SpiderInstance)</a><br />
27
+ <a href="classes/SpiderInstance.html#M000009">headers (SpiderInstance)</a><br />
28
+ <a href="classes/IncludedInMemcached.html#M000003">include? (IncludedInMemcached)</a><br />
29
+ <a href="classes/IncludedInMemcached.html#M000001">new (IncludedInMemcached)</a><br />
30
+ <a href="classes/SpiderInstance.html#M000006">on (SpiderInstance)</a><br />
31
+ <a href="classes/SpiderInstance.html#M000007">setup (SpiderInstance)</a><br />
32
+ <a href="classes/Spider.html#M000011">start_at (Spider)</a><br />
33
+ <a href="classes/SpiderInstance.html#M000008">teardown (SpiderInstance)</a><br />
30
34
  </div>
31
35
  </div>
32
36
  </body>
@@ -0,0 +1,22 @@
1
+ require 'memcache'
2
+
3
+ # A specialized class using memcached to track items stored. It supports
4
+ # three operations: new, <<, and include? . Together these can be used to
5
+ # add items to the memcache, then determine whether the item has been added.
6
+ class IncludedInMemcached
7
+ # Construct a new IncludedInMemcached instance. All arguments here are
8
+ # passed to MemCache (part of the memcache-client gem).
9
+ def initialize(*a)
10
+ @c = MemCache.new(*a)
11
+ end
12
+
13
+ # Add an item to the memcache.
14
+ def <<(v)
15
+ @c.add(v.to_s, v)
16
+ end
17
+
18
+ # True if the item is in the memcache.
19
+ def include?(v)
20
+ @c.get(v.to_s) == v
21
+ end
22
+ end
@@ -9,7 +9,7 @@
9
9
  # notice, this list of conditions and the following disclaimer in the
10
10
  # documentation and/or other materials provided with the distribution.
11
11
  # * Neither the name Mike Burns nor the
12
- # names of its contributors may be used to endorse or promote products
12
+ # names of his contributors may be used to endorse or promote products
13
13
  # derived from this software without specific prior written permission.
14
14
  #
15
15
  # THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
@@ -23,35 +23,15 @@
23
23
  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
24
  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
25
 
26
- require 'robot_rules'
27
- require 'open-uri'
28
- require 'uri'
29
- require 'net/http'
30
- require 'net/https'
31
-
32
- module Net #:nodoc:
33
- class HTTPResponse #:nodoc:
34
- def success?; false; end
35
- def redirect?; false; end
36
- end
37
- class HTTPSuccess #:nodoc:
38
- def success?; true; end
39
- end
40
- class HTTPRedirection #:nodoc:
41
- def redirect?; true; end
42
- end
43
- end
44
-
45
- class NilClass #:nodoc:
46
- def merge(h); h; end
47
- end
26
+ require File.dirname(__FILE__)+'/spider_instance'
48
27
 
49
28
  # A spidering library for Ruby. Handles robots.txt, scraping, finding more
50
29
  # links, and doing it all over again.
51
30
  class Spider
52
31
  # Runs the spider starting at the given URL. Also takes a block that is given
53
32
  # the SpiderInstance. Use the block to define the rules and handlers for
54
- # the discovered Web pages.
33
+ # the discovered Web pages. See SpiderInstance for the possible rules and
34
+ # handlers.
55
35
  #
56
36
  # Spider.start_at('http://mike-burns.com/') do |s|
57
37
  # s.add_url_check do |a_url|
@@ -78,225 +58,3 @@ class Spider
78
58
  a_spider.start!
79
59
  end
80
60
  end
81
-
82
- class SpiderInstance
83
- def initialize(next_urls, seen = [], rules = nil, robots_seen = []) #:nodoc:
84
- @url_checks = []
85
- @cache = :memory
86
- @callbacks = {}
87
- @next_urls = next_urls
88
- @seen = seen
89
- @rules = rules || RobotRules.new('Ruby Spider 1.0')
90
- @robots_seen = robots_seen
91
- @headers = {}
92
- @setup = nil
93
- @teardown = nil
94
- end
95
-
96
- # Add a predicate that determines whether to continue down this URL's path.
97
- # All predicates must be true in order for a URL to proceed.
98
- #
99
- # Takes a block that takes a string and produces a boolean. For example, this
100
- # will ensure that the URL starts with 'http://mike-burns.com':
101
- #
102
- # add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
103
- def add_url_check(&block)
104
- @url_checks << block
105
- end
106
-
107
- def use_cache(cache_type) #:nodoc:
108
- @cache = cache_type
109
- end
110
-
111
- # Add a response handler. A response handler's trigger can be :every,
112
- # :success, :failure, or any HTTP status code. The handler itself can be
113
- # either a Proc or a block.
114
- #
115
- # The arguments to the block are: the URL as a string, an instance of
116
- # Net::HTTPResponse, and the prior URL as a string.
117
- #
118
- #
119
- # For example:
120
- #
121
- # on 404 do |a_url, resp, prior_url|
122
- # puts "URL not found: #{a_url}"
123
- # end
124
- #
125
- # on :success do |a_url, resp, prior_url|
126
- # puts a_url
127
- # puts resp.body
128
- # end
129
- #
130
- # on :every do |a_url, resp, prior_url|
131
- # puts "Given this code: #{resp.code}"
132
- # end
133
- def on(code, p = nil, &block)
134
- f = p ? p : block
135
- case code
136
- when Fixnum
137
- @callbacks[code] = f
138
- else
139
- @callbacks[code.to_sym] = f
140
- end
141
- end
142
-
143
- # Run before the HTTP request. Given the URL as a string.
144
- # setup do |a_url|
145
- # headers['Cookies'] = 'user_id=1;admin=true'
146
- # end
147
- def setup(p = nil, &block)
148
- @setup = p ? p : block
149
- end
150
-
151
- # Run last, once for each page. Given the URL as a string.
152
- def teardown(p = nil, &block)
153
- @teardown = p ? p : block
154
- end
155
-
156
- # Use like a hash:
157
- # headers['Cookies'] = 'user_id=1;password=btrross3'
158
- def headers
159
- HeaderSetter.new(self)
160
- end
161
-
162
- def raw_headers #:nodoc:
163
- @headers
164
- end
165
- def raw_headers=(v) #:nodoc:
166
- @headers = v
167
- end
168
-
169
- # Reset the headers hash.
170
- def clear_headers
171
- @headers = {}
172
- end
173
-
174
- def start! #:nodoc:
175
- next_urls = @next_urls
176
- begin
177
- tmp_n_u = {}
178
- next_urls.each do |prior_url, urls|
179
- urls.map do |a_url|
180
- [a_url, (URI.parse(a_url) rescue nil)]
181
- end.select do |a_url, parsed_url|
182
- allowable_url?(a_url, parsed_url)
183
- end.each do |a_url, parsed_url|
184
- @setup.call(a_url) unless @setup.nil?
185
- get_page(parsed_url) do |response|
186
- do_callbacks(a_url, response, prior_url)
187
- tmp_n_u[a_url] = generate_next_urls(a_url, response)
188
- end
189
- @teardown.call(a_url) unless @teardown.nil?
190
- end
191
- end
192
- next_urls = tmp_n_u
193
- end while !next_urls.empty?
194
- end
195
-
196
- def success_or_failure(code) #:nodoc:
197
- if code > 199 && code < 300
198
- :success
199
- else
200
- :failure
201
- end
202
- end
203
-
204
- def allowable_url?(a_url, parsed_url) #:nodoc:
205
- !parsed_url.nil? && !@seen.include?(parsed_url) && allowed?(a_url, parsed_url) &&
206
- @url_checks.map{|url_check|url_check.call(a_url)}.all?
207
- end
208
-
209
- # True if the robots.txt for that URL allows access to it.
210
- def allowed?(a_url, parsed_url) # :nodoc:
211
- u = "#{parsed_url.scheme}://#{parsed_url.host}:#{parsed_url.port}/robots.txt"
212
- begin
213
- unless @robots_seen.include?(u)
214
- open(u, 'User-Agent' => 'Ruby Spider',
215
- 'Accept' => 'text/html,text/xml,application/xml,text/plain') do |url|
216
- @rules.parse(u, url.read)
217
- end
218
- @robots_seen << u
219
- end
220
- @rules.allowed?(a_url)
221
- rescue OpenURI::HTTPError
222
- true # No robots.txt
223
- rescue Exception, Timeout::Error # to keep it from crashing
224
- false
225
- end
226
- end
227
-
228
- def get_page(parsed_url, &block) #:nodoc:
229
- @seen << parsed_url
230
- begin
231
- http = Net::HTTP.new(parsed_url.host, parsed_url.port)
232
- http.use_ssl = parsed_url.scheme == 'https'
233
- # Uses start because http.finish cannot be called.
234
- r = http.start {|h| h.request(Net::HTTP::Get.new(parsed_url.request_uri,
235
- @headers))}
236
- if r.redirect?
237
- get_page(URI.parse(r['Location']), &block)
238
- else
239
- block.call(r)
240
- end
241
- rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError => e
242
- p e
243
- nil
244
- end
245
- end
246
-
247
- def do_callbacks(a_url, resp, prior_url) #:nodoc:
248
- cbs = [@callbacks[:every],
249
- resp.success? ? @callbacks[:success] : @callbacks[:failure],
250
- @callbacks[resp.code]]
251
-
252
- cbs.each do |cb|
253
- cb.call(a_url, resp, prior_url) if cb
254
- end
255
- end
256
-
257
- def generate_next_urls(a_url, resp) #:nodoc:
258
- web_page = resp.body
259
- base_url = (web_page.scan(/base\s+href="(.*?)"/i).flatten +
260
- [a_url[0,a_url.rindex('/')]])[0]
261
- base_url = remove_trailing_slash(base_url)
262
- web_page.scan(/href="(.*?)"/i).flatten.map do |link|
263
- begin
264
- parsed_link = URI.parse(link)
265
- if parsed_link.fragment == '#'
266
- nil
267
- else
268
- case parsed_link.scheme
269
- when 'http'
270
- link
271
- when nil
272
- u = URI.parse(base_url)
273
- if link[0].chr == '/'
274
- "#{u.scheme}://#{u.host}:#{u.port}#{link}"
275
- elsif u.path.nil? || u.path == ''
276
- "#{u.scheme}://#{u.host}:#{u.port}/#{link}"
277
- else
278
- "#{u.scheme}://#{u.host}:#{u.port}/#{u.path}/#{link}"
279
- end
280
- else
281
- nil
282
- end
283
- end
284
- rescue
285
- nil
286
- end
287
- end.compact
288
- end
289
-
290
- def remove_trailing_slash(s) #:nodoc:
291
- s.sub(%r{/*$},'')
292
- end
293
-
294
- class HeaderSetter #:nodoc:
295
- def initialize(si)
296
- @si = si
297
- end
298
- def []=(k,v)
299
- @si.raw_headers = @si.raw_headers.merge({k => v})
300
- end
301
- end
302
- end
@@ -0,0 +1,290 @@
1
+ # Copyright 2007 Mike Burns
2
+
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are met:
5
+ # * Redistributions of source code must retain the above copyright
6
+ # notice, this list of conditions and the following disclaimer.
7
+ # * Redistributions in binary form must reproduce the above copyright
8
+ # notice, this list of conditions and the following disclaimer in the
9
+ # documentation and/or other materials provided with the distribution.
10
+ # * Neither the name Mike Burns nor the
11
+ # names of his contributors may be used to endorse or promote products
12
+ # derived from this software without specific prior written permission.
13
+ #
14
+ # THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
15
+ # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ # DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY
18
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
+
25
+ require 'robot_rules'
26
+ require 'open-uri'
27
+ require 'uri'
28
+ require 'net/http'
29
+ require 'net/https'
30
+
31
+ module Net #:nodoc:
32
+ class HTTPResponse #:nodoc:
33
+ def success?; false; end
34
+ def redirect?; false; end
35
+ end
36
+ class HTTPSuccess #:nodoc:
37
+ def success?; true; end
38
+ end
39
+ class HTTPRedirection #:nodoc:
40
+ def redirect?; true; end
41
+ end
42
+ end
43
+
44
+ class NilClass #:nodoc:
45
+ def merge(h); h; end
46
+ end
47
+
48
+ class SpiderInstance
49
+ def initialize(next_urls, seen = [], rules = nil, robots_seen = []) #:nodoc:
50
+ @url_checks = []
51
+ @cache = :memory
52
+ @callbacks = {}
53
+ @next_urls = next_urls
54
+ @seen = seen
55
+ @rules = rules || RobotRules.new('Ruby Spider 1.0')
56
+ @robots_seen = robots_seen
57
+ @headers = {}
58
+ @setup = nil
59
+ @teardown = nil
60
+ end
61
+
62
+ # Add a predicate that determines whether to continue down this URL's path.
63
+ # All predicates must be true in order for a URL to proceed.
64
+ #
65
+ # Takes a block that takes a string and produces a boolean. For example, this
66
+ # will ensure that the URL starts with 'http://mike-burns.com':
67
+ #
68
+ # add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
69
+ def add_url_check(&block)
70
+ @url_checks << block
71
+ end
72
+
73
+ # The Web is a graph; to avoid cycles we store the nodes (URLs) already
74
+ # visited. The Web is a really, really, really big graph; as such, this list
75
+ # of visited nodes grows really, really, really big.
76
+ #
77
+ # Change the object used to store these seen nodes with this. The default
78
+ # object is an instance of Array. Available with Spider is a wrapper of
79
+ # memcached.
80
+ #
81
+ # You can implement a custom class for this; any object passed to
82
+ # check_already_seen_with must understand just << and included? .
83
+ #
84
+ # # default
85
+ # check_already_seen_with Array.new
86
+ #
87
+ # # memcached
88
+ # require 'spider/included_in_memcached'
89
+ # check_already_seen_with IncludedInMemcached.new('localhost:11211')
90
+ def check_already_seen_with(cacher)
91
+ if cacher.respond_to?(:<<) && cacher.respond_to?(:include?)
92
+ @seen = cacher
93
+ else
94
+ raise ArgumentError, 'expected something that responds to << and included?'
95
+ end
96
+ end
97
+
98
+ # Add a response handler. A response handler's trigger can be :every,
99
+ # :success, :failure, or any HTTP status code. The handler itself can be
100
+ # either a Proc or a block.
101
+ #
102
+ # The arguments to the block are: the URL as a string, an instance of
103
+ # Net::HTTPResponse, and the prior URL as a string.
104
+ #
105
+ #
106
+ # For example:
107
+ #
108
+ # on 404 do |a_url, resp, prior_url|
109
+ # puts "URL not found: #{a_url}"
110
+ # end
111
+ #
112
+ # on :success do |a_url, resp, prior_url|
113
+ # puts a_url
114
+ # puts resp.body
115
+ # end
116
+ #
117
+ # on :every do |a_url, resp, prior_url|
118
+ # puts "Given this code: #{resp.code}"
119
+ # end
120
+ def on(code, p = nil, &block)
121
+ f = p ? p : block
122
+ case code
123
+ when Fixnum
124
+ @callbacks[code] = f
125
+ else
126
+ @callbacks[code.to_sym] = f
127
+ end
128
+ end
129
+
130
+ # Run before the HTTP request. Given the URL as a string.
131
+ # setup do |a_url|
132
+ # headers['Cookies'] = 'user_id=1;admin=true'
133
+ # end
134
+ def setup(p = nil, &block)
135
+ @setup = p ? p : block
136
+ end
137
+
138
+ # Run last, once for each page. Given the URL as a string.
139
+ def teardown(p = nil, &block)
140
+ @teardown = p ? p : block
141
+ end
142
+
143
+ # Use like a hash:
144
+ # headers['Cookies'] = 'user_id=1;password=btrross3'
145
+ def headers
146
+ HeaderSetter.new(self)
147
+ end
148
+
149
+ def raw_headers #:nodoc:
150
+ @headers
151
+ end
152
+ def raw_headers=(v) #:nodoc:
153
+ @headers = v
154
+ end
155
+
156
+ # Reset the headers hash.
157
+ def clear_headers
158
+ @headers = {}
159
+ end
160
+
161
+ def start! #:nodoc:
162
+ next_urls = @next_urls
163
+ begin
164
+ tmp_n_u = {}
165
+ next_urls.each do |prior_url, urls|
166
+ urls.map do |a_url|
167
+ [a_url, (URI.parse(a_url) rescue nil)]
168
+ end.select do |a_url, parsed_url|
169
+ allowable_url?(a_url, parsed_url)
170
+ end.each do |a_url, parsed_url|
171
+ @setup.call(a_url) unless @setup.nil?
172
+ get_page(parsed_url) do |response|
173
+ do_callbacks(a_url, response, prior_url)
174
+ tmp_n_u[a_url] = generate_next_urls(a_url, response)
175
+ end
176
+ @teardown.call(a_url) unless @teardown.nil?
177
+ end
178
+ end
179
+ next_urls = tmp_n_u
180
+ end while !next_urls.empty?
181
+ end
182
+
183
+ def success_or_failure(code) #:nodoc:
184
+ if code > 199 && code < 300
185
+ :success
186
+ else
187
+ :failure
188
+ end
189
+ end
190
+
191
+ def allowable_url?(a_url, parsed_url) #:nodoc:
192
+ !parsed_url.nil? && !@seen.include?(parsed_url) && allowed?(a_url, parsed_url) &&
193
+ @url_checks.map{|url_check|url_check.call(a_url)}.all?
194
+ end
195
+
196
+ # True if the robots.txt for that URL allows access to it.
197
+ def allowed?(a_url, parsed_url) # :nodoc:
198
+ u = "#{parsed_url.scheme}://#{parsed_url.host}:#{parsed_url.port}/robots.txt"
199
+ begin
200
+ unless @robots_seen.include?(u)
201
+ open(u, 'User-Agent' => 'Ruby Spider',
202
+ 'Accept' => 'text/html,text/xml,application/xml,text/plain') do |url|
203
+ @rules.parse(u, url.read)
204
+ end
205
+ @robots_seen << u
206
+ end
207
+ @rules.allowed?(a_url)
208
+ rescue OpenURI::HTTPError
209
+ true # No robots.txt
210
+ rescue Exception, Timeout::Error # to keep it from crashing
211
+ false
212
+ end
213
+ end
214
+
215
+ def get_page(parsed_url, &block) #:nodoc:
216
+ @seen << parsed_url
217
+ begin
218
+ http = Net::HTTP.new(parsed_url.host, parsed_url.port)
219
+ http.use_ssl = parsed_url.scheme == 'https'
220
+ # Uses start because http.finish cannot be called.
221
+ r = http.start {|h| h.request(Net::HTTP::Get.new(parsed_url.request_uri,
222
+ @headers))}
223
+ if r.redirect?
224
+ get_page(URI.parse(r['Location']), &block)
225
+ else
226
+ block.call(r)
227
+ end
228
+ rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError => e
229
+ p e
230
+ nil
231
+ end
232
+ end
233
+
234
+ def do_callbacks(a_url, resp, prior_url) #:nodoc:
235
+ cbs = [@callbacks[:every],
236
+ resp.success? ? @callbacks[:success] : @callbacks[:failure],
237
+ @callbacks[resp.code]]
238
+
239
+ cbs.each do |cb|
240
+ cb.call(a_url, resp, prior_url) if cb
241
+ end
242
+ end
243
+
244
+ def generate_next_urls(a_url, resp) #:nodoc:
245
+ web_page = resp.body
246
+ base_url = (web_page.scan(/base\s+href="(.*?)"/i).flatten +
247
+ [a_url[0,a_url.rindex('/')]])[0]
248
+ base_url = remove_trailing_slash(base_url)
249
+ web_page.scan(/href="(.*?)"/i).flatten.map do |link|
250
+ begin
251
+ parsed_link = URI.parse(link)
252
+ if parsed_link.fragment == '#'
253
+ nil
254
+ else
255
+ case parsed_link.scheme
256
+ when 'http'
257
+ link
258
+ when nil
259
+ u = URI.parse(base_url)
260
+ if link[0].chr == '/'
261
+ "#{u.scheme}://#{u.host}:#{u.port}#{link}"
262
+ elsif u.path.nil? || u.path == ''
263
+ "#{u.scheme}://#{u.host}:#{u.port}/#{link}"
264
+ else
265
+ "#{u.scheme}://#{u.host}:#{u.port}/#{u.path}/#{link}"
266
+ end
267
+ else
268
+ nil
269
+ end
270
+ end
271
+ rescue
272
+ nil
273
+ end
274
+ end.compact
275
+ end
276
+
277
+ def remove_trailing_slash(s) #:nodoc:
278
+ s.sub(%r{/*$},'')
279
+ end
280
+
281
+ class HeaderSetter #:nodoc:
282
+ def initialize(si)
283
+ @si = si
284
+ end
285
+ def []=(k,v)
286
+ @si.raw_headers = @si.raw_headers.merge({k => v})
287
+ end
288
+ end
289
+ end
290
+