spider 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES CHANGED
@@ -1,3 +1,6 @@
1
+ 2007-11-02:
2
+ * Memcached support.
3
+
1
4
  2007-10-31:
2
5
  * Add `setup' and `teardown' handlers.
3
6
  * Can set the headers for a HTTP request.
data/README CHANGED
@@ -1,41 +1,114 @@
1
1
  Spider, a Web spidering library for Ruby. It handles the robots.txt,
2
2
  scraping, collecting, and looping so that you can just handle the data.
3
3
 
4
- == Usage
4
+ == Examples
5
5
 
6
+ === Crawl the Web, loading each page in turn, until you run out of memory
7
+
8
+ require 'spider'
9
+ Spider.start_at('http://mike-burns.com/') {}
10
+
11
+ === To handle erroneous responses
12
+
13
+ require 'spider'
14
+ Spider.start_at('http://mike-burns.com/') do |s|
15
+ s.on :failure do |a_url, resp, prior_url|
16
+ puts "URL failed: #{a_url}"
17
+ puts " linked from #{prior_url}"
18
+ end
19
+ end
20
+
21
+ === Or handle successful responses
22
+
23
+ require 'spider'
24
+ Spider.start_at('http://mike-burns.com/') do |s|
25
+ s.on :success do |a_url, resp, prior_url|
26
+ puts "#{a_url}: #{resp.code}"
27
+ puts resp.body
28
+ puts
29
+ end
30
+ end
31
+
32
+ === Limit to just one domain
33
+
34
+ require 'spider'
6
35
  Spider.start_at('http://mike-burns.com/') do |s|
7
- # Limit the pages to just this domain.
8
36
  s.add_url_check do |a_url|
9
37
  a_url =~ %r{^http://mike-burns.com.*}
10
38
  end
39
+ end
11
40
 
12
- # Handle 404s.
13
- s.on 404 do |a_url, resp, prior_url|
14
- puts "URL not found: #{a_url}"
15
- end
41
+ === Pass headers to some requests
16
42
 
17
- # Handle 2xx.
18
- s.on :success do |a_url, resp, prior_url|
19
- puts "body: #{resp.body}"
43
+ require 'spider'
44
+ Spider.start_at('http://mike-burns.com/') do |s|
45
+ s.setup do |a_url|
46
+ if a_url =~ %r{^http://.*wikipedia.*}
47
+ headers['User-Agent'] = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
48
+ end
20
49
  end
50
+ end
21
51
 
22
- # Handle everything.
23
- s.on :every do |a_url, resp, prior_url|
24
- puts "URL returned anything: #{a_url} with this code #{resp.code}"
52
+ === Use memcached to track cycles
53
+
54
+ require 'spider'
55
+ require 'spider/included_in_memcached'
56
+ SERVERS = ['10.0.10.2:11211','10.0.10.3:11211','10.0.10.4:11211']
57
+ Spider.start_at('http://mike-burns.com/') do |s|
58
+ s.check_already_seen_with IncludedInMemcached.new(SERVERS)
59
+ end
60
+
61
+ === Track cycles with a custom object
62
+
63
+ require 'spider'
64
+
65
+ class ExpireLinks < Hash
66
+ def <<(v)
67
+ [v] = Time.now
68
+ end
69
+ def include?(v)
70
+ [v] && (Time.now + 86400) <= [v]
25
71
  end
26
72
  end
27
73
 
74
+ Spider.start_at('http://mike-burns.com/') do |s|
75
+ s.check_already_seen_with ExpireLinks.new
76
+ end
77
+
78
+ === Create a URL graph
79
+
80
+ require 'spider'
81
+ nodes = {}
82
+ Spider.start_at('http://mike-burns.com/') do |s|
83
+ s.add_url_check {|a_url| a_url =~ %r{^http://mike-burns.com.*} }
28
84
 
29
- == Requirements
85
+ s.on(:every) do |a_url, resp, prior_url|
86
+ nodes[prior_url] ||= []
87
+ nodes[prior_url] << a_url
88
+ end
89
+ end
30
90
 
31
- This library uses `robot_rules' (included), `open-uri', and `uri'. Any modern
32
- Ruby should work; if yours doesn't, let me know so I can update this with your
33
- version number.
91
+ === Use a proxy
92
+
93
+ require 'net/http_configuration'
94
+ require 'spider'
95
+ http_conf = Net::HTTP::Configuration.new(:proxy_host => '7proxies.org',
96
+ :proxy_port => 8881)
97
+ http_conf.apply do
98
+ Spider.start_at('http://img.4chan.org/b/') do |s|
99
+ s.on(:success) do |a_url, resp, prior_url|
100
+ File.open(a_url.gsub('/',':'),'w') do |f|
101
+ f.write(resp.body)
102
+ end
103
+ end
104
+ end
105
+ end
34
106
 
35
107
  == Author
36
108
 
37
109
  Mike Burns http://mike-burns.com mike@mike-burns.com
38
110
 
39
- With help from Matt Horan and John Nagro.
111
+ Help from Matt Horan and John Nagro.
112
+
40
113
  With `robot_rules' from James Edward Gray II via
41
114
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589
@@ -0,0 +1,217 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: IncludedInMemcached</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">IncludedInMemcached</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../files/lib/included_in_memcached_rb.html">
59
+ lib/included_in_memcached.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+ <div id="description">
82
+ <p>
83
+ A specialized class using memcached to track items stored. It supports
84
+ three operations: <a href="IncludedInMemcached.html#M000001">new</a>,
85
+ &lt;&lt;, and <a href="IncludedInMemcached.html#M000003">include?</a> .
86
+ Together these can be used to add items to the memcache, then determine
87
+ whether the item has been added.
88
+ </p>
89
+
90
+ </div>
91
+
92
+
93
+ </div>
94
+
95
+ <div id="method-list">
96
+ <h3 class="section-bar">Methods</h3>
97
+
98
+ <div class="name-list">
99
+ <a href="#M000002">&lt;&lt;</a>&nbsp;&nbsp;
100
+ <a href="#M000003">include?</a>&nbsp;&nbsp;
101
+ <a href="#M000001">new</a>&nbsp;&nbsp;
102
+ </div>
103
+ </div>
104
+
105
+ </div>
106
+
107
+
108
+ <!-- if includes -->
109
+
110
+ <div id="section">
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+ <!-- if method_list -->
120
+ <div id="methods">
121
+ <h3 class="section-bar">Public Class methods</h3>
122
+
123
+ <div id="method-M000001" class="method-detail">
124
+ <a name="M000001"></a>
125
+
126
+ <div class="method-heading">
127
+ <a href="#M000001" class="method-signature">
128
+ <span class="method-name">new</span><span class="method-args">(*a)</span>
129
+ </a>
130
+ </div>
131
+
132
+ <div class="method-description">
133
+ <p>
134
+ Construct a <a href="IncludedInMemcached.html#M000001">new</a> <a
135
+ href="IncludedInMemcached.html">IncludedInMemcached</a> instance. All
136
+ arguments here are passed to MemCache (part of the memcache-client gem).
137
+ </p>
138
+ <p><a class="source-toggle" href="#"
139
+ onclick="toggleCode('M000001-source');return false;">[Source]</a></p>
140
+ <div class="method-source-code" id="M000001-source">
141
+ <pre>
142
+ <span class="ruby-comment cmt"># File lib/included_in_memcached.rb, line 9</span>
143
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">a</span>)
144
+ <span class="ruby-ivar">@c</span> = <span class="ruby-constant">MemCache</span>.<span class="ruby-identifier">new</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">a</span>)
145
+ <span class="ruby-keyword kw">end</span>
146
+ </pre>
147
+ </div>
148
+ </div>
149
+ </div>
150
+
151
+ <h3 class="section-bar">Public Instance methods</h3>
152
+
153
+ <div id="method-M000002" class="method-detail">
154
+ <a name="M000002"></a>
155
+
156
+ <div class="method-heading">
157
+ <a href="#M000002" class="method-signature">
158
+ <span class="method-name">&lt;&lt;</span><span class="method-args">(v)</span>
159
+ </a>
160
+ </div>
161
+
162
+ <div class="method-description">
163
+ <p>
164
+ Add an item to the memcache.
165
+ </p>
166
+ <p><a class="source-toggle" href="#"
167
+ onclick="toggleCode('M000002-source');return false;">[Source]</a></p>
168
+ <div class="method-source-code" id="M000002-source">
169
+ <pre>
170
+ <span class="ruby-comment cmt"># File lib/included_in_memcached.rb, line 14</span>
171
+ <span class="ruby-keyword kw">def</span> <span class="ruby-operator">&lt;&lt;</span>(<span class="ruby-identifier">v</span>)
172
+ <span class="ruby-ivar">@c</span>.<span class="ruby-identifier">add</span>(<span class="ruby-identifier">v</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-identifier">v</span>)
173
+ <span class="ruby-keyword kw">end</span>
174
+ </pre>
175
+ </div>
176
+ </div>
177
+ </div>
178
+
179
+ <div id="method-M000003" class="method-detail">
180
+ <a name="M000003"></a>
181
+
182
+ <div class="method-heading">
183
+ <a href="#M000003" class="method-signature">
184
+ <span class="method-name">include?</span><span class="method-args">(v)</span>
185
+ </a>
186
+ </div>
187
+
188
+ <div class="method-description">
189
+ <p>
190
+ True if the item is in the memcache.
191
+ </p>
192
+ <p><a class="source-toggle" href="#"
193
+ onclick="toggleCode('M000003-source');return false;">[Source]</a></p>
194
+ <div class="method-source-code" id="M000003-source">
195
+ <pre>
196
+ <span class="ruby-comment cmt"># File lib/included_in_memcached.rb, line 19</span>
197
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">include?</span>(<span class="ruby-identifier">v</span>)
198
+ <span class="ruby-ivar">@c</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">v</span>.<span class="ruby-identifier">to_s</span>) <span class="ruby-operator">==</span> <span class="ruby-identifier">v</span>
199
+ <span class="ruby-keyword kw">end</span>
200
+ </pre>
201
+ </div>
202
+ </div>
203
+ </div>
204
+
205
+
206
+ </div>
207
+
208
+
209
+ </div>
210
+
211
+
212
+ <div id="validator-badges">
213
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
214
+ </div>
215
+
216
+ </body>
217
+ </html>
@@ -93,7 +93,7 @@ links, and doing it all over again.
93
93
  <h3 class="section-bar">Methods</h3>
94
94
 
95
95
  <div class="name-list">
96
- <a href="#M000007">start_at</a>&nbsp;&nbsp;
96
+ <a href="#M000011">start_at</a>&nbsp;&nbsp;
97
97
  </div>
98
98
  </div>
99
99
 
@@ -115,11 +115,11 @@ links, and doing it all over again.
115
115
  <div id="methods">
116
116
  <h3 class="section-bar">Public Class methods</h3>
117
117
 
118
- <div id="method-M000007" class="method-detail">
119
- <a name="M000007"></a>
118
+ <div id="method-M000011" class="method-detail">
119
+ <a name="M000011"></a>
120
120
 
121
121
  <div class="method-heading">
122
- <a href="#M000007" class="method-signature">
122
+ <a href="#M000011" class="method-signature">
123
123
  <span class="method-name">start_at</span><span class="method-args">(a_url, &amp;block)</span>
124
124
  </a>
125
125
  </div>
@@ -128,7 +128,9 @@ links, and doing it all over again.
128
128
  <p>
129
129
  Runs the spider starting at the given URL. Also takes a block that is given
130
130
  the <a href="SpiderInstance.html">SpiderInstance</a>. Use the block to
131
- define the rules and handlers for the discovered Web pages.
131
+ define the rules and handlers for the discovered Web pages. See <a
132
+ href="SpiderInstance.html">SpiderInstance</a> for the possible rules and
133
+ handlers.
132
134
  </p>
133
135
  <pre>
134
136
  Spider.start_at('http://mike-burns.com/') do |s|
@@ -150,10 +152,10 @@ define the rules and handlers for the discovered Web pages.
150
152
  end
151
153
  </pre>
152
154
  <p><a class="source-toggle" href="#"
153
- onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
154
- <div class="method-source-code" id="M000007-source">
155
+ onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
156
+ <div class="method-source-code" id="M000011-source">
155
157
  <pre>
156
- <span class="ruby-comment cmt"># File lib/spider.rb, line 74</span>
158
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
157
159
  <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
158
160
  <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
159
161
  <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
@@ -55,8 +55,8 @@
55
55
  <tr class="top-aligned-row">
56
56
  <td><strong>In:</strong></td>
57
57
  <td>
58
- <a href="../files/lib/spider_rb.html">
59
- lib/spider.rb
58
+ <a href="../files/lib/spider_instance_rb.html">
59
+ lib/spider_instance.rb
60
60
  </a>
61
61
  <br />
62
62
  </td>
@@ -86,12 +86,13 @@
86
86
  <h3 class="section-bar">Methods</h3>
87
87
 
88
88
  <div class="name-list">
89
- <a href="#M000001">add_url_check</a>&nbsp;&nbsp;
90
- <a href="#M000006">clear_headers</a>&nbsp;&nbsp;
91
- <a href="#M000005">headers</a>&nbsp;&nbsp;
92
- <a href="#M000002">on</a>&nbsp;&nbsp;
93
- <a href="#M000003">setup</a>&nbsp;&nbsp;
94
- <a href="#M000004">teardown</a>&nbsp;&nbsp;
89
+ <a href="#M000004">add_url_check</a>&nbsp;&nbsp;
90
+ <a href="#M000005">check_already_seen_with</a>&nbsp;&nbsp;
91
+ <a href="#M000010">clear_headers</a>&nbsp;&nbsp;
92
+ <a href="#M000009">headers</a>&nbsp;&nbsp;
93
+ <a href="#M000006">on</a>&nbsp;&nbsp;
94
+ <a href="#M000007">setup</a>&nbsp;&nbsp;
95
+ <a href="#M000008">teardown</a>&nbsp;&nbsp;
95
96
  </div>
96
97
  </div>
97
98
 
@@ -113,11 +114,11 @@
113
114
  <div id="methods">
114
115
  <h3 class="section-bar">Public Instance methods</h3>
115
116
 
116
- <div id="method-M000001" class="method-detail">
117
- <a name="M000001"></a>
117
+ <div id="method-M000004" class="method-detail">
118
+ <a name="M000004"></a>
118
119
 
119
120
  <div class="method-heading">
120
- <a href="#M000001" class="method-signature">
121
+ <a href="#M000004" class="method-signature">
121
122
  <span class="method-name">add_url_check</span><span class="method-args">(&amp;block)</span>
122
123
  </a>
123
124
  </div>
@@ -136,10 +137,10 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
136
137
  add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
137
138
  </pre>
138
139
  <p><a class="source-toggle" href="#"
139
- onclick="toggleCode('M000001-source');return false;">[Source]</a></p>
140
- <div class="method-source-code" id="M000001-source">
140
+ onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
141
+ <div class="method-source-code" id="M000004-source">
141
142
  <pre>
142
- <span class="ruby-comment cmt"># File lib/spider.rb, line 103</span>
143
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 69</span>
143
144
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
144
145
  <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
145
146
  <span class="ruby-keyword kw">end</span>
@@ -148,24 +149,74 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
148
149
  </div>
149
150
  </div>
150
151
 
151
- <div id="method-M000006" class="method-detail">
152
- <a name="M000006"></a>
152
+ <div id="method-M000005" class="method-detail">
153
+ <a name="M000005"></a>
153
154
 
154
155
  <div class="method-heading">
155
- <a href="#M000006" class="method-signature">
156
+ <a href="#M000005" class="method-signature">
157
+ <span class="method-name">check_already_seen_with</span><span class="method-args">(cacher)</span>
158
+ </a>
159
+ </div>
160
+
161
+ <div class="method-description">
162
+ <p>
163
+ The Web is a graph; to avoid cycles we store the nodes (URLs) already
164
+ visited. The Web is a really, really, really big graph; as such, this list
165
+ of visited nodes grows really, really, really big.
166
+ </p>
167
+ <p>
168
+ Change the object used to store these seen nodes with this. The default
169
+ object is an instance of Array. Available with <a
170
+ href="Spider.html">Spider</a> is a wrapper of memcached.
171
+ </p>
172
+ <p>
173
+ You can implement a custom class for this; any object passed to <a
174
+ href="SpiderInstance.html#M000005">check_already_seen_with</a> must
175
+ understand just &lt;&lt; and included? .
176
+ </p>
177
+ <pre>
178
+ # default
179
+ check_already_seen_with Array.new
180
+
181
+ # memcached
182
+ require 'spider/included_in_memcached'
183
+ check_already_seen_with IncludedInMemcached.new('localhost:11211')
184
+ </pre>
185
+ <p><a class="source-toggle" href="#"
186
+ onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
187
+ <div class="method-source-code" id="M000005-source">
188
+ <pre>
189
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 90</span>
190
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
191
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:&lt;&lt;</span>) <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
192
+ <span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
193
+ <span class="ruby-keyword kw">else</span>
194
+ <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to &lt;&lt; and included?'</span>
195
+ <span class="ruby-keyword kw">end</span>
196
+ <span class="ruby-keyword kw">end</span>
197
+ </pre>
198
+ </div>
199
+ </div>
200
+ </div>
201
+
202
+ <div id="method-M000010" class="method-detail">
203
+ <a name="M000010"></a>
204
+
205
+ <div class="method-heading">
206
+ <a href="#M000010" class="method-signature">
156
207
  <span class="method-name">clear_headers</span><span class="method-args">()</span>
157
208
  </a>
158
209
  </div>
159
210
 
160
211
  <div class="method-description">
161
212
  <p>
162
- Reset the <a href="SpiderInstance.html#M000005">headers</a> hash.
213
+ Reset the <a href="SpiderInstance.html#M000009">headers</a> hash.
163
214
  </p>
164
215
  <p><a class="source-toggle" href="#"
165
- onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
166
- <div class="method-source-code" id="M000006-source">
216
+ onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
217
+ <div class="method-source-code" id="M000010-source">
167
218
  <pre>
168
- <span class="ruby-comment cmt"># File lib/spider.rb, line 170</span>
219
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 157</span>
169
220
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
170
221
  <span class="ruby-ivar">@headers</span> = {}
171
222
  <span class="ruby-keyword kw">end</span>
@@ -174,11 +225,11 @@ Reset the <a href="SpiderInstance.html#M000005">headers</a> hash.
174
225
  </div>
175
226
  </div>
176
227
 
177
- <div id="method-M000005" class="method-detail">
178
- <a name="M000005"></a>
228
+ <div id="method-M000009" class="method-detail">
229
+ <a name="M000009"></a>
179
230
 
180
231
  <div class="method-heading">
181
- <a href="#M000005" class="method-signature">
232
+ <a href="#M000009" class="method-signature">
182
233
  <span class="method-name">headers</span><span class="method-args">()</span>
183
234
  </a>
184
235
  </div>
@@ -191,10 +242,10 @@ Use like a hash:
191
242
  headers['Cookies'] = 'user_id=1;password=btrross3'
192
243
  </pre>
193
244
  <p><a class="source-toggle" href="#"
194
- onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
195
- <div class="method-source-code" id="M000005-source">
245
+ onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
246
+ <div class="method-source-code" id="M000009-source">
196
247
  <pre>
197
- <span class="ruby-comment cmt"># File lib/spider.rb, line 158</span>
248
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 145</span>
198
249
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
199
250
  <span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
200
251
  <span class="ruby-keyword kw">end</span>
@@ -203,11 +254,11 @@ Use like a hash:
203
254
  </div>
204
255
  </div>
205
256
 
206
- <div id="method-M000002" class="method-detail">
207
- <a name="M000002"></a>
257
+ <div id="method-M000006" class="method-detail">
258
+ <a name="M000006"></a>
208
259
 
209
260
  <div class="method-heading">
210
- <a href="#M000002" class="method-signature">
261
+ <a href="#M000006" class="method-signature">
211
262
  <span class="method-name">on</span><span class="method-args">(code, p = nil, &amp;block)</span>
212
263
  </a>
213
264
  </div>
@@ -240,10 +291,10 @@ For example:
240
291
  end
241
292
  </pre>
242
293
  <p><a class="source-toggle" href="#"
243
- onclick="toggleCode('M000002-source');return false;">[Source]</a></p>
244
- <div class="method-source-code" id="M000002-source">
294
+ onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
295
+ <div class="method-source-code" id="M000006-source">
245
296
  <pre>
246
- <span class="ruby-comment cmt"># File lib/spider.rb, line 133</span>
297
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 120</span>
247
298
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
248
299
  <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
249
300
  <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
@@ -258,11 +309,11 @@ For example:
258
309
  </div>
259
310
  </div>
260
311
 
261
- <div id="method-M000003" class="method-detail">
262
- <a name="M000003"></a>
312
+ <div id="method-M000007" class="method-detail">
313
+ <a name="M000007"></a>
263
314
 
264
315
  <div class="method-heading">
265
- <a href="#M000003" class="method-signature">
316
+ <a href="#M000007" class="method-signature">
266
317
  <span class="method-name">setup</span><span class="method-args">(p = nil, &amp;block)</span>
267
318
  </a>
268
319
  </div>
@@ -277,10 +328,10 @@ Run before the HTTP request. Given the URL as a string.
277
328
  end
278
329
  </pre>
279
330
  <p><a class="source-toggle" href="#"
280
- onclick="toggleCode('M000003-source');return false;">[Source]</a></p>
281
- <div class="method-source-code" id="M000003-source">
331
+ onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
332
+ <div class="method-source-code" id="M000007-source">
282
333
  <pre>
283
- <span class="ruby-comment cmt"># File lib/spider.rb, line 147</span>
334
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 134</span>
284
335
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
285
336
  <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
286
337
  <span class="ruby-keyword kw">end</span>
@@ -289,11 +340,11 @@ Run before the HTTP request. Given the URL as a string.
289
340
  </div>
290
341
  </div>
291
342
 
292
- <div id="method-M000004" class="method-detail">
293
- <a name="M000004"></a>
343
+ <div id="method-M000008" class="method-detail">
344
+ <a name="M000008"></a>
294
345
 
295
346
  <div class="method-heading">
296
- <a href="#M000004" class="method-signature">
347
+ <a href="#M000008" class="method-signature">
297
348
  <span class="method-name">teardown</span><span class="method-args">(p = nil, &amp;block)</span>
298
349
  </a>
299
350
  </div>
@@ -303,10 +354,10 @@ Run before the HTTP request. Given the URL as a string.
303
354
  Run last, once for each page. Given the URL as a string.
304
355
  </p>
305
356
  <p><a class="source-toggle" href="#"
306
- onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
307
- <div class="method-source-code" id="M000004-source">
357
+ onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
358
+ <div class="method-source-code" id="M000008-source">
308
359
  <pre>
309
- <span class="ruby-comment cmt"># File lib/spider.rb, line 152</span>
360
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 139</span>
310
361
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
311
362
  <span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
312
363
  <span class="ruby-keyword kw">end</span>