spider 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,6 +20,7 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Classes</h1>
22
22
  <div id="index-entries">
23
+ <a href="classes/IncludedInMemcached.html">IncludedInMemcached</a><br />
23
24
  <a href="classes/Spider.html">Spider</a><br />
24
25
  <a href="classes/SpiderInstance.html">SpiderInstance</a><br />
25
26
  </div>
@@ -21,7 +21,9 @@
21
21
  <h1 class="section-bar">Files</h1>
22
22
  <div id="index-entries">
23
23
  <a href="files/README.html">README</a><br />
24
+ <a href="files/lib/included_in_memcached_rb.html">lib/included_in_memcached.rb</a><br />
24
25
  <a href="files/lib/spider_rb.html">lib/spider.rb</a><br />
26
+ <a href="files/lib/spider_instance_rb.html">lib/spider_instance.rb</a><br />
25
27
  </div>
26
28
  </div>
27
29
  </body>
@@ -20,13 +20,17 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Methods</h1>
22
22
  <div id="index-entries">
23
- <a href="classes/SpiderInstance.html#M000001">add_url_check (SpiderInstance)</a><br />
24
- <a href="classes/SpiderInstance.html#M000006">clear_headers (SpiderInstance)</a><br />
25
- <a href="classes/SpiderInstance.html#M000005">headers (SpiderInstance)</a><br />
26
- <a href="classes/SpiderInstance.html#M000002">on (SpiderInstance)</a><br />
27
- <a href="classes/SpiderInstance.html#M000003">setup (SpiderInstance)</a><br />
28
- <a href="classes/Spider.html#M000007">start_at (Spider)</a><br />
29
- <a href="classes/SpiderInstance.html#M000004">teardown (SpiderInstance)</a><br />
23
+ <a href="classes/IncludedInMemcached.html#M000002"><< (IncludedInMemcached)</a><br />
24
+ <a href="classes/SpiderInstance.html#M000004">add_url_check (SpiderInstance)</a><br />
25
+ <a href="classes/SpiderInstance.html#M000005">check_already_seen_with (SpiderInstance)</a><br />
26
+ <a href="classes/SpiderInstance.html#M000010">clear_headers (SpiderInstance)</a><br />
27
+ <a href="classes/SpiderInstance.html#M000009">headers (SpiderInstance)</a><br />
28
+ <a href="classes/IncludedInMemcached.html#M000003">include? (IncludedInMemcached)</a><br />
29
+ <a href="classes/IncludedInMemcached.html#M000001">new (IncludedInMemcached)</a><br />
30
+ <a href="classes/SpiderInstance.html#M000006">on (SpiderInstance)</a><br />
31
+ <a href="classes/SpiderInstance.html#M000007">setup (SpiderInstance)</a><br />
32
+ <a href="classes/Spider.html#M000011">start_at (Spider)</a><br />
33
+ <a href="classes/SpiderInstance.html#M000008">teardown (SpiderInstance)</a><br />
30
34
  </div>
31
35
  </div>
32
36
  </body>
@@ -0,0 +1,22 @@
1
+ require 'memcache'
2
+
3
+ # A specialized class using memcached to track items stored. It supports
4
+ # three operations: new, <<, and include? . Together these can be used to
5
+ # add items to the memcache, then determine whether the item has been added.
6
+ class IncludedInMemcached
7
+ # Construct a new IncludedInMemcached instance. All arguments here are
8
+ # passed to MemCache (part of the memcache-client gem).
9
+ def initialize(*a)
10
+ @c = MemCache.new(*a)
11
+ end
12
+
13
+ # Add an item to the memcache.
14
+ def <<(v)
15
+ @c.add(v.to_s, v)
16
+ end
17
+
18
+ # True if the item is in the memcache.
19
+ def include?(v)
20
+ @c.get(v.to_s) == v
21
+ end
22
+ end
@@ -9,7 +9,7 @@
9
9
  # notice, this list of conditions and the following disclaimer in the
10
10
  # documentation and/or other materials provided with the distribution.
11
11
  # * Neither the name Mike Burns nor the
12
- # names of its contributors may be used to endorse or promote products
12
+ # names of his contributors may be used to endorse or promote products
13
13
  # derived from this software without specific prior written permission.
14
14
  #
15
15
  # THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
@@ -23,35 +23,15 @@
23
23
  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
24
  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
25
 
26
- require 'robot_rules'
27
- require 'open-uri'
28
- require 'uri'
29
- require 'net/http'
30
- require 'net/https'
31
-
32
- module Net #:nodoc:
33
- class HTTPResponse #:nodoc:
34
- def success?; false; end
35
- def redirect?; false; end
36
- end
37
- class HTTPSuccess #:nodoc:
38
- def success?; true; end
39
- end
40
- class HTTPRedirection #:nodoc:
41
- def redirect?; true; end
42
- end
43
- end
44
-
45
- class NilClass #:nodoc:
46
- def merge(h); h; end
47
- end
26
+ require File.dirname(__FILE__)+'/spider_instance'
48
27
 
49
28
  # A spidering library for Ruby. Handles robots.txt, scraping, finding more
50
29
  # links, and doing it all over again.
51
30
  class Spider
52
31
  # Runs the spider starting at the given URL. Also takes a block that is given
53
32
  # the SpiderInstance. Use the block to define the rules and handlers for
54
- # the discovered Web pages.
33
+ # the discovered Web pages. See SpiderInstance for the possible rules and
34
+ # handlers.
55
35
  #
56
36
  # Spider.start_at('http://mike-burns.com/') do |s|
57
37
  # s.add_url_check do |a_url|
@@ -78,225 +58,3 @@ class Spider
78
58
  a_spider.start!
79
59
  end
80
60
  end
81
-
82
- class SpiderInstance
83
- def initialize(next_urls, seen = [], rules = nil, robots_seen = []) #:nodoc:
84
- @url_checks = []
85
- @cache = :memory
86
- @callbacks = {}
87
- @next_urls = next_urls
88
- @seen = seen
89
- @rules = rules || RobotRules.new('Ruby Spider 1.0')
90
- @robots_seen = robots_seen
91
- @headers = {}
92
- @setup = nil
93
- @teardown = nil
94
- end
95
-
96
- # Add a predicate that determines whether to continue down this URL's path.
97
- # All predicates must be true in order for a URL to proceed.
98
- #
99
- # Takes a block that takes a string and produces a boolean. For example, this
100
- # will ensure that the URL starts with 'http://mike-burns.com':
101
- #
102
- # add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
103
- def add_url_check(&block)
104
- @url_checks << block
105
- end
106
-
107
- def use_cache(cache_type) #:nodoc:
108
- @cache = cache_type
109
- end
110
-
111
- # Add a response handler. A response handler's trigger can be :every,
112
- # :success, :failure, or any HTTP status code. The handler itself can be
113
- # either a Proc or a block.
114
- #
115
- # The arguments to the block are: the URL as a string, an instance of
116
- # Net::HTTPResponse, and the prior URL as a string.
117
- #
118
- #
119
- # For example:
120
- #
121
- # on 404 do |a_url, resp, prior_url|
122
- # puts "URL not found: #{a_url}"
123
- # end
124
- #
125
- # on :success do |a_url, resp, prior_url|
126
- # puts a_url
127
- # puts resp.body
128
- # end
129
- #
130
- # on :every do |a_url, resp, prior_url|
131
- # puts "Given this code: #{resp.code}"
132
- # end
133
- def on(code, p = nil, &block)
134
- f = p ? p : block
135
- case code
136
- when Fixnum
137
- @callbacks[code] = f
138
- else
139
- @callbacks[code.to_sym] = f
140
- end
141
- end
142
-
143
- # Run before the HTTP request. Given the URL as a string.
144
- # setup do |a_url|
145
- # headers['Cookies'] = 'user_id=1;admin=true'
146
- # end
147
- def setup(p = nil, &block)
148
- @setup = p ? p : block
149
- end
150
-
151
- # Run last, once for each page. Given the URL as a string.
152
- def teardown(p = nil, &block)
153
- @teardown = p ? p : block
154
- end
155
-
156
- # Use like a hash:
157
- # headers['Cookies'] = 'user_id=1;password=btrross3'
158
- def headers
159
- HeaderSetter.new(self)
160
- end
161
-
162
- def raw_headers #:nodoc:
163
- @headers
164
- end
165
- def raw_headers=(v) #:nodoc:
166
- @headers = v
167
- end
168
-
169
- # Reset the headers hash.
170
- def clear_headers
171
- @headers = {}
172
- end
173
-
174
- def start! #:nodoc:
175
- next_urls = @next_urls
176
- begin
177
- tmp_n_u = {}
178
- next_urls.each do |prior_url, urls|
179
- urls.map do |a_url|
180
- [a_url, (URI.parse(a_url) rescue nil)]
181
- end.select do |a_url, parsed_url|
182
- allowable_url?(a_url, parsed_url)
183
- end.each do |a_url, parsed_url|
184
- @setup.call(a_url) unless @setup.nil?
185
- get_page(parsed_url) do |response|
186
- do_callbacks(a_url, response, prior_url)
187
- tmp_n_u[a_url] = generate_next_urls(a_url, response)
188
- end
189
- @teardown.call(a_url) unless @teardown.nil?
190
- end
191
- end
192
- next_urls = tmp_n_u
193
- end while !next_urls.empty?
194
- end
195
-
196
- def success_or_failure(code) #:nodoc:
197
- if code > 199 && code < 300
198
- :success
199
- else
200
- :failure
201
- end
202
- end
203
-
204
- def allowable_url?(a_url, parsed_url) #:nodoc:
205
- !parsed_url.nil? && !@seen.include?(parsed_url) && allowed?(a_url, parsed_url) &&
206
- @url_checks.map{|url_check|url_check.call(a_url)}.all?
207
- end
208
-
209
- # True if the robots.txt for that URL allows access to it.
210
- def allowed?(a_url, parsed_url) # :nodoc:
211
- u = "#{parsed_url.scheme}://#{parsed_url.host}:#{parsed_url.port}/robots.txt"
212
- begin
213
- unless @robots_seen.include?(u)
214
- open(u, 'User-Agent' => 'Ruby Spider',
215
- 'Accept' => 'text/html,text/xml,application/xml,text/plain') do |url|
216
- @rules.parse(u, url.read)
217
- end
218
- @robots_seen << u
219
- end
220
- @rules.allowed?(a_url)
221
- rescue OpenURI::HTTPError
222
- true # No robots.txt
223
- rescue Exception, Timeout::Error # to keep it from crashing
224
- false
225
- end
226
- end
227
-
228
- def get_page(parsed_url, &block) #:nodoc:
229
- @seen << parsed_url
230
- begin
231
- http = Net::HTTP.new(parsed_url.host, parsed_url.port)
232
- http.use_ssl = parsed_url.scheme == 'https'
233
- # Uses start because http.finish cannot be called.
234
- r = http.start {|h| h.request(Net::HTTP::Get.new(parsed_url.request_uri,
235
- @headers))}
236
- if r.redirect?
237
- get_page(URI.parse(r['Location']), &block)
238
- else
239
- block.call(r)
240
- end
241
- rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError => e
242
- p e
243
- nil
244
- end
245
- end
246
-
247
- def do_callbacks(a_url, resp, prior_url) #:nodoc:
248
- cbs = [@callbacks[:every],
249
- resp.success? ? @callbacks[:success] : @callbacks[:failure],
250
- @callbacks[resp.code]]
251
-
252
- cbs.each do |cb|
253
- cb.call(a_url, resp, prior_url) if cb
254
- end
255
- end
256
-
257
- def generate_next_urls(a_url, resp) #:nodoc:
258
- web_page = resp.body
259
- base_url = (web_page.scan(/base\s+href="(.*?)"/i).flatten +
260
- [a_url[0,a_url.rindex('/')]])[0]
261
- base_url = remove_trailing_slash(base_url)
262
- web_page.scan(/href="(.*?)"/i).flatten.map do |link|
263
- begin
264
- parsed_link = URI.parse(link)
265
- if parsed_link.fragment == '#'
266
- nil
267
- else
268
- case parsed_link.scheme
269
- when 'http'
270
- link
271
- when nil
272
- u = URI.parse(base_url)
273
- if link[0].chr == '/'
274
- "#{u.scheme}://#{u.host}:#{u.port}#{link}"
275
- elsif u.path.nil? || u.path == ''
276
- "#{u.scheme}://#{u.host}:#{u.port}/#{link}"
277
- else
278
- "#{u.scheme}://#{u.host}:#{u.port}/#{u.path}/#{link}"
279
- end
280
- else
281
- nil
282
- end
283
- end
284
- rescue
285
- nil
286
- end
287
- end.compact
288
- end
289
-
290
- def remove_trailing_slash(s) #:nodoc:
291
- s.sub(%r{/*$},'')
292
- end
293
-
294
- class HeaderSetter #:nodoc:
295
- def initialize(si)
296
- @si = si
297
- end
298
- def []=(k,v)
299
- @si.raw_headers = @si.raw_headers.merge({k => v})
300
- end
301
- end
302
- end
@@ -0,0 +1,290 @@
1
+ # Copyright 2007 Mike Burns
2
+
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are met:
5
+ # * Redistributions of source code must retain the above copyright
6
+ # notice, this list of conditions and the following disclaimer.
7
+ # * Redistributions in binary form must reproduce the above copyright
8
+ # notice, this list of conditions and the following disclaimer in the
9
+ # documentation and/or other materials provided with the distribution.
10
+ # * Neither the name Mike Burns nor the
11
+ # names of his contributors may be used to endorse or promote products
12
+ # derived from this software without specific prior written permission.
13
+ #
14
+ # THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS'' AND ANY
15
+ # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ # DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY
18
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
+
25
+ require 'robot_rules'
26
+ require 'open-uri'
27
+ require 'uri'
28
+ require 'net/http'
29
+ require 'net/https'
30
+
31
+ module Net #:nodoc:
32
+ class HTTPResponse #:nodoc:
33
+ def success?; false; end
34
+ def redirect?; false; end
35
+ end
36
+ class HTTPSuccess #:nodoc:
37
+ def success?; true; end
38
+ end
39
+ class HTTPRedirection #:nodoc:
40
+ def redirect?; true; end
41
+ end
42
+ end
43
+
44
+ class NilClass #:nodoc:
45
+ def merge(h); h; end
46
+ end
47
+
48
+ class SpiderInstance
49
+ def initialize(next_urls, seen = [], rules = nil, robots_seen = []) #:nodoc:
50
+ @url_checks = []
51
+ @cache = :memory
52
+ @callbacks = {}
53
+ @next_urls = next_urls
54
+ @seen = seen
55
+ @rules = rules || RobotRules.new('Ruby Spider 1.0')
56
+ @robots_seen = robots_seen
57
+ @headers = {}
58
+ @setup = nil
59
+ @teardown = nil
60
+ end
61
+
62
+ # Add a predicate that determines whether to continue down this URL's path.
63
+ # All predicates must be true in order for a URL to proceed.
64
+ #
65
+ # Takes a block that takes a string and produces a boolean. For example, this
66
+ # will ensure that the URL starts with 'http://mike-burns.com':
67
+ #
68
+ # add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
69
+ def add_url_check(&block)
70
+ @url_checks << block
71
+ end
72
+
73
+ # The Web is a graph; to avoid cycles we store the nodes (URLs) already
74
+ # visited. The Web is a really, really, really big graph; as such, this list
75
+ # of visited nodes grows really, really, really big.
76
+ #
77
+ # Change the object used to store these seen nodes with this. The default
78
+ # object is an instance of Array. Available with Spider is a wrapper of
79
+ # memcached.
80
+ #
81
+ # You can implement a custom class for this; any object passed to
82
+ # check_already_seen_with must understand just << and included? .
83
+ #
84
+ # # default
85
+ # check_already_seen_with Array.new
86
+ #
87
+ # # memcached
88
+ # require 'spider/included_in_memcached'
89
+ # check_already_seen_with IncludedInMemcached.new('localhost:11211')
90
+ def check_already_seen_with(cacher)
91
+ if cacher.respond_to?(:<<) && cacher.respond_to?(:include?)
92
+ @seen = cacher
93
+ else
94
+ raise ArgumentError, 'expected something that responds to << and included?'
95
+ end
96
+ end
97
+
98
+ # Add a response handler. A response handler's trigger can be :every,
99
+ # :success, :failure, or any HTTP status code. The handler itself can be
100
+ # either a Proc or a block.
101
+ #
102
+ # The arguments to the block are: the URL as a string, an instance of
103
+ # Net::HTTPResponse, and the prior URL as a string.
104
+ #
105
+ #
106
+ # For example:
107
+ #
108
+ # on 404 do |a_url, resp, prior_url|
109
+ # puts "URL not found: #{a_url}"
110
+ # end
111
+ #
112
+ # on :success do |a_url, resp, prior_url|
113
+ # puts a_url
114
+ # puts resp.body
115
+ # end
116
+ #
117
+ # on :every do |a_url, resp, prior_url|
118
+ # puts "Given this code: #{resp.code}"
119
+ # end
120
+ def on(code, p = nil, &block)
121
+ f = p ? p : block
122
+ case code
123
+ when Fixnum
124
+ @callbacks[code] = f
125
+ else
126
+ @callbacks[code.to_sym] = f
127
+ end
128
+ end
129
+
130
+ # Run before the HTTP request. Given the URL as a string.
131
+ # setup do |a_url|
132
+ # headers['Cookies'] = 'user_id=1;admin=true'
133
+ # end
134
+ def setup(p = nil, &block)
135
+ @setup = p ? p : block
136
+ end
137
+
138
+ # Run last, once for each page. Given the URL as a string.
139
+ def teardown(p = nil, &block)
140
+ @teardown = p ? p : block
141
+ end
142
+
143
+ # Use like a hash:
144
+ # headers['Cookies'] = 'user_id=1;password=btrross3'
145
+ def headers
146
+ HeaderSetter.new(self)
147
+ end
148
+
149
+ def raw_headers #:nodoc:
150
+ @headers
151
+ end
152
+ def raw_headers=(v) #:nodoc:
153
+ @headers = v
154
+ end
155
+
156
+ # Reset the headers hash.
157
+ def clear_headers
158
+ @headers = {}
159
+ end
160
+
161
+ def start! #:nodoc:
162
+ next_urls = @next_urls
163
+ begin
164
+ tmp_n_u = {}
165
+ next_urls.each do |prior_url, urls|
166
+ urls.map do |a_url|
167
+ [a_url, (URI.parse(a_url) rescue nil)]
168
+ end.select do |a_url, parsed_url|
169
+ allowable_url?(a_url, parsed_url)
170
+ end.each do |a_url, parsed_url|
171
+ @setup.call(a_url) unless @setup.nil?
172
+ get_page(parsed_url) do |response|
173
+ do_callbacks(a_url, response, prior_url)
174
+ tmp_n_u[a_url] = generate_next_urls(a_url, response)
175
+ end
176
+ @teardown.call(a_url) unless @teardown.nil?
177
+ end
178
+ end
179
+ next_urls = tmp_n_u
180
+ end while !next_urls.empty?
181
+ end
182
+
183
+ def success_or_failure(code) #:nodoc:
184
+ if code > 199 && code < 300
185
+ :success
186
+ else
187
+ :failure
188
+ end
189
+ end
190
+
191
+ def allowable_url?(a_url, parsed_url) #:nodoc:
192
+ !parsed_url.nil? && !@seen.include?(parsed_url) && allowed?(a_url, parsed_url) &&
193
+ @url_checks.map{|url_check|url_check.call(a_url)}.all?
194
+ end
195
+
196
+ # True if the robots.txt for that URL allows access to it.
197
+ def allowed?(a_url, parsed_url) # :nodoc:
198
+ u = "#{parsed_url.scheme}://#{parsed_url.host}:#{parsed_url.port}/robots.txt"
199
+ begin
200
+ unless @robots_seen.include?(u)
201
+ open(u, 'User-Agent' => 'Ruby Spider',
202
+ 'Accept' => 'text/html,text/xml,application/xml,text/plain') do |url|
203
+ @rules.parse(u, url.read)
204
+ end
205
+ @robots_seen << u
206
+ end
207
+ @rules.allowed?(a_url)
208
+ rescue OpenURI::HTTPError
209
+ true # No robots.txt
210
+ rescue Exception, Timeout::Error # to keep it from crashing
211
+ false
212
+ end
213
+ end
214
+
215
+ def get_page(parsed_url, &block) #:nodoc:
216
+ @seen << parsed_url
217
+ begin
218
+ http = Net::HTTP.new(parsed_url.host, parsed_url.port)
219
+ http.use_ssl = parsed_url.scheme == 'https'
220
+ # Uses start because http.finish cannot be called.
221
+ r = http.start {|h| h.request(Net::HTTP::Get.new(parsed_url.request_uri,
222
+ @headers))}
223
+ if r.redirect?
224
+ get_page(URI.parse(r['Location']), &block)
225
+ else
226
+ block.call(r)
227
+ end
228
+ rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError => e
229
+ p e
230
+ nil
231
+ end
232
+ end
233
+
234
+ def do_callbacks(a_url, resp, prior_url) #:nodoc:
235
+ cbs = [@callbacks[:every],
236
+ resp.success? ? @callbacks[:success] : @callbacks[:failure],
237
+ @callbacks[resp.code]]
238
+
239
+ cbs.each do |cb|
240
+ cb.call(a_url, resp, prior_url) if cb
241
+ end
242
+ end
243
+
244
+ def generate_next_urls(a_url, resp) #:nodoc:
245
+ web_page = resp.body
246
+ base_url = (web_page.scan(/base\s+href="(.*?)"/i).flatten +
247
+ [a_url[0,a_url.rindex('/')]])[0]
248
+ base_url = remove_trailing_slash(base_url)
249
+ web_page.scan(/href="(.*?)"/i).flatten.map do |link|
250
+ begin
251
+ parsed_link = URI.parse(link)
252
+ if parsed_link.fragment == '#'
253
+ nil
254
+ else
255
+ case parsed_link.scheme
256
+ when 'http'
257
+ link
258
+ when nil
259
+ u = URI.parse(base_url)
260
+ if link[0].chr == '/'
261
+ "#{u.scheme}://#{u.host}:#{u.port}#{link}"
262
+ elsif u.path.nil? || u.path == ''
263
+ "#{u.scheme}://#{u.host}:#{u.port}/#{link}"
264
+ else
265
+ "#{u.scheme}://#{u.host}:#{u.port}/#{u.path}/#{link}"
266
+ end
267
+ else
268
+ nil
269
+ end
270
+ end
271
+ rescue
272
+ nil
273
+ end
274
+ end.compact
275
+ end
276
+
277
+ def remove_trailing_slash(s) #:nodoc:
278
+ s.sub(%r{/*$},'')
279
+ end
280
+
281
+ class HeaderSetter #:nodoc:
282
+ def initialize(si)
283
+ @si = si
284
+ end
285
+ def []=(k,v)
286
+ @si.raw_headers = @si.raw_headers.merge({k => v})
287
+ end
288
+ end
289
+ end
290
+