spider 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES CHANGED
@@ -1,3 +1,6 @@
1
+ 2007-11-02:
2
+ * Memcached support.
3
+
1
4
  2007-10-31:
2
5
  * Add `setup' and `teardown' handlers.
3
6
  * Can set the headers for a HTTP request.
data/README CHANGED
@@ -1,41 +1,114 @@
1
1
  Spider, a Web spidering library for Ruby. It handles the robots.txt,
2
2
  scraping, collecting, and looping so that you can just handle the data.
3
3
 
4
- == Usage
4
+ == Examples
5
5
 
6
+ === Crawl the Web, loading each page in turn, until you run out of memory
7
+
8
+ require 'spider'
9
+ Spider.start_at('http://mike-burns.com/') {}
10
+
11
+ === To handle erroneous responses
12
+
13
+ require 'spider'
14
+ Spider.start_at('http://mike-burns.com/') do |s|
15
+ s.on :failure do |a_url, resp, prior_url|
16
+ puts "URL failed: #{a_url}"
17
+ puts " linked from #{prior_url}"
18
+ end
19
+ end
20
+
21
+ === Or handle successful responses
22
+
23
+ require 'spider'
24
+ Spider.start_at('http://mike-burns.com/') do |s|
25
+ s.on :success do |a_url, resp, prior_url|
26
+ puts "#{a_url}: #{resp.code}"
27
+ puts resp.body
28
+ puts
29
+ end
30
+ end
31
+
32
+ === Limit to just one domain
33
+
34
+ require 'spider'
6
35
  Spider.start_at('http://mike-burns.com/') do |s|
7
- # Limit the pages to just this domain.
8
36
  s.add_url_check do |a_url|
9
37
  a_url =~ %r{^http://mike-burns.com.*}
10
38
  end
39
+ end
11
40
 
12
- # Handle 404s.
13
- s.on 404 do |a_url, resp, prior_url|
14
- puts "URL not found: #{a_url}"
15
- end
41
+ === Pass headers to some requests
16
42
 
17
- # Handle 2xx.
18
- s.on :success do |a_url, resp, prior_url|
19
- puts "body: #{resp.body}"
43
+ require 'spider'
44
+ Spider.start_at('http://mike-burns.com/') do |s|
45
+ s.setup do |a_url|
46
+ if a_url =~ %r{^http://.*wikipedia.*}
47
+ headers['User-Agent'] = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
48
+ end
20
49
  end
50
+ end
21
51
 
22
- # Handle everything.
23
- s.on :every do |a_url, resp, prior_url|
24
- puts "URL returned anything: #{a_url} with this code #{resp.code}"
52
+ === Use memcached to track cycles
53
+
54
+ require 'spider'
55
+ require 'spider/included_in_memcached'
56
+ SERVERS = ['10.0.10.2:11211','10.0.10.3:11211','10.0.10.4:11211']
57
+ Spider.start_at('http://mike-burns.com/') do |s|
58
+ s.check_already_seen_with IncludedInMemcached.new(SERVERS)
59
+ end
60
+
61
+ === Track cycles with a custom object
62
+
63
+ require 'spider'
64
+
65
+ class ExpireLinks < Hash
66
+ def <<(v)
67
+ [v] = Time.now
68
+ end
69
+ def include?(v)
70
+ [v] && (Time.now + 86400) <= [v]
25
71
  end
26
72
  end
27
73
 
74
+ Spider.start_at('http://mike-burns.com/') do |s|
75
+ s.check_already_seen_with ExpireLinks.new
76
+ end
77
+
78
+ === Create a URL graph
79
+
80
+ require 'spider'
81
+ nodes = {}
82
+ Spider.start_at('http://mike-burns.com/') do |s|
83
+ s.add_url_check {|a_url| a_url =~ %r{^http://mike-burns.com.*} }
28
84
 
29
- == Requirements
85
+ s.on(:every) do |a_url, resp, prior_url|
86
+ nodes[prior_url] ||= []
87
+ nodes[prior_url] << a_url
88
+ end
89
+ end
30
90
 
31
- This library uses `robot_rules' (included), `open-uri', and `uri'. Any modern
32
- Ruby should work; if yours doesn't, let me know so I can update this with your
33
- version number.
91
+ === Use a proxy
92
+
93
+ require 'net/http_configuration'
94
+ require 'spider'
95
+ http_conf = Net::HTTP::Configuration.new(:proxy_host => '7proxies.org',
96
+ :proxy_port => 8881)
97
+ http_conf.apply do
98
+ Spider.start_at('http://img.4chan.org/b/') do |s|
99
+ s.on(:success) do |a_url, resp, prior_url|
100
+ File.open(a_url.gsub('/',':'),'w') do |f|
101
+ f.write(resp.body)
102
+ end
103
+ end
104
+ end
105
+ end
34
106
 
35
107
  == Author
36
108
 
37
109
  Mike Burns http://mike-burns.com mike@mike-burns.com
38
110
 
39
- With help from Matt Horan and John Nagro.
111
+ Help from Matt Horan and John Nagro.
112
+
40
113
  With `robot_rules' from James Edward Gray II via
41
114
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589
@@ -0,0 +1,217 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: IncludedInMemcached</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">IncludedInMemcached</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../files/lib/included_in_memcached_rb.html">
59
+ lib/included_in_memcached.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+ <div id="description">
82
+ <p>
83
+ A specialized class using memcached to track items stored. It supports
84
+ three operations: <a href="IncludedInMemcached.html#M000001">new</a>,
85
+ &lt;&lt;, and <a href="IncludedInMemcached.html#M000003">include?</a> .
86
+ Together these can be used to add items to the memcache, then determine
87
+ whether the item has been added.
88
+ </p>
89
+
90
+ </div>
91
+
92
+
93
+ </div>
94
+
95
+ <div id="method-list">
96
+ <h3 class="section-bar">Methods</h3>
97
+
98
+ <div class="name-list">
99
+ <a href="#M000002">&lt;&lt;</a>&nbsp;&nbsp;
100
+ <a href="#M000003">include?</a>&nbsp;&nbsp;
101
+ <a href="#M000001">new</a>&nbsp;&nbsp;
102
+ </div>
103
+ </div>
104
+
105
+ </div>
106
+
107
+
108
+ <!-- if includes -->
109
+
110
+ <div id="section">
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+ <!-- if method_list -->
120
+ <div id="methods">
121
+ <h3 class="section-bar">Public Class methods</h3>
122
+
123
+ <div id="method-M000001" class="method-detail">
124
+ <a name="M000001"></a>
125
+
126
+ <div class="method-heading">
127
+ <a href="#M000001" class="method-signature">
128
+ <span class="method-name">new</span><span class="method-args">(*a)</span>
129
+ </a>
130
+ </div>
131
+
132
+ <div class="method-description">
133
+ <p>
134
+ Construct a <a href="IncludedInMemcached.html#M000001">new</a> <a
135
+ href="IncludedInMemcached.html">IncludedInMemcached</a> instance. All
136
+ arguments here are passed to MemCache (part of the memcache-client gem).
137
+ </p>
138
+ <p><a class="source-toggle" href="#"
139
+ onclick="toggleCode('M000001-source');return false;">[Source]</a></p>
140
+ <div class="method-source-code" id="M000001-source">
141
+ <pre>
142
+ <span class="ruby-comment cmt"># File lib/included_in_memcached.rb, line 9</span>
143
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">a</span>)
144
+ <span class="ruby-ivar">@c</span> = <span class="ruby-constant">MemCache</span>.<span class="ruby-identifier">new</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">a</span>)
145
+ <span class="ruby-keyword kw">end</span>
146
+ </pre>
147
+ </div>
148
+ </div>
149
+ </div>
150
+
151
+ <h3 class="section-bar">Public Instance methods</h3>
152
+
153
+ <div id="method-M000002" class="method-detail">
154
+ <a name="M000002"></a>
155
+
156
+ <div class="method-heading">
157
+ <a href="#M000002" class="method-signature">
158
+ <span class="method-name">&lt;&lt;</span><span class="method-args">(v)</span>
159
+ </a>
160
+ </div>
161
+
162
+ <div class="method-description">
163
+ <p>
164
+ Add an item to the memcache.
165
+ </p>
166
+ <p><a class="source-toggle" href="#"
167
+ onclick="toggleCode('M000002-source');return false;">[Source]</a></p>
168
+ <div class="method-source-code" id="M000002-source">
169
+ <pre>
170
+ <span class="ruby-comment cmt"># File lib/included_in_memcached.rb, line 14</span>
171
+ <span class="ruby-keyword kw">def</span> <span class="ruby-operator">&lt;&lt;</span>(<span class="ruby-identifier">v</span>)
172
+ <span class="ruby-ivar">@c</span>.<span class="ruby-identifier">add</span>(<span class="ruby-identifier">v</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-identifier">v</span>)
173
+ <span class="ruby-keyword kw">end</span>
174
+ </pre>
175
+ </div>
176
+ </div>
177
+ </div>
178
+
179
+ <div id="method-M000003" class="method-detail">
180
+ <a name="M000003"></a>
181
+
182
+ <div class="method-heading">
183
+ <a href="#M000003" class="method-signature">
184
+ <span class="method-name">include?</span><span class="method-args">(v)</span>
185
+ </a>
186
+ </div>
187
+
188
+ <div class="method-description">
189
+ <p>
190
+ True if the item is in the memcache.
191
+ </p>
192
+ <p><a class="source-toggle" href="#"
193
+ onclick="toggleCode('M000003-source');return false;">[Source]</a></p>
194
+ <div class="method-source-code" id="M000003-source">
195
+ <pre>
196
+ <span class="ruby-comment cmt"># File lib/included_in_memcached.rb, line 19</span>
197
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">include?</span>(<span class="ruby-identifier">v</span>)
198
+ <span class="ruby-ivar">@c</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">v</span>.<span class="ruby-identifier">to_s</span>) <span class="ruby-operator">==</span> <span class="ruby-identifier">v</span>
199
+ <span class="ruby-keyword kw">end</span>
200
+ </pre>
201
+ </div>
202
+ </div>
203
+ </div>
204
+
205
+
206
+ </div>
207
+
208
+
209
+ </div>
210
+
211
+
212
+ <div id="validator-badges">
213
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
214
+ </div>
215
+
216
+ </body>
217
+ </html>
@@ -93,7 +93,7 @@ links, and doing it all over again.
93
93
  <h3 class="section-bar">Methods</h3>
94
94
 
95
95
  <div class="name-list">
96
- <a href="#M000007">start_at</a>&nbsp;&nbsp;
96
+ <a href="#M000011">start_at</a>&nbsp;&nbsp;
97
97
  </div>
98
98
  </div>
99
99
 
@@ -115,11 +115,11 @@ links, and doing it all over again.
115
115
  <div id="methods">
116
116
  <h3 class="section-bar">Public Class methods</h3>
117
117
 
118
- <div id="method-M000007" class="method-detail">
119
- <a name="M000007"></a>
118
+ <div id="method-M000011" class="method-detail">
119
+ <a name="M000011"></a>
120
120
 
121
121
  <div class="method-heading">
122
- <a href="#M000007" class="method-signature">
122
+ <a href="#M000011" class="method-signature">
123
123
  <span class="method-name">start_at</span><span class="method-args">(a_url, &amp;block)</span>
124
124
  </a>
125
125
  </div>
@@ -128,7 +128,9 @@ links, and doing it all over again.
128
128
  <p>
129
129
  Runs the spider starting at the given URL. Also takes a block that is given
130
130
  the <a href="SpiderInstance.html">SpiderInstance</a>. Use the block to
131
- define the rules and handlers for the discovered Web pages.
131
+ define the rules and handlers for the discovered Web pages. See <a
132
+ href="SpiderInstance.html">SpiderInstance</a> for the possible rules and
133
+ handlers.
132
134
  </p>
133
135
  <pre>
134
136
  Spider.start_at('http://mike-burns.com/') do |s|
@@ -150,10 +152,10 @@ define the rules and handlers for the discovered Web pages.
150
152
  end
151
153
  </pre>
152
154
  <p><a class="source-toggle" href="#"
153
- onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
154
- <div class="method-source-code" id="M000007-source">
155
+ onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
156
+ <div class="method-source-code" id="M000011-source">
155
157
  <pre>
156
- <span class="ruby-comment cmt"># File lib/spider.rb, line 74</span>
158
+ <span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
157
159
  <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
158
160
  <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
159
161
  <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
@@ -55,8 +55,8 @@
55
55
  <tr class="top-aligned-row">
56
56
  <td><strong>In:</strong></td>
57
57
  <td>
58
- <a href="../files/lib/spider_rb.html">
59
- lib/spider.rb
58
+ <a href="../files/lib/spider_instance_rb.html">
59
+ lib/spider_instance.rb
60
60
  </a>
61
61
  <br />
62
62
  </td>
@@ -86,12 +86,13 @@
86
86
  <h3 class="section-bar">Methods</h3>
87
87
 
88
88
  <div class="name-list">
89
- <a href="#M000001">add_url_check</a>&nbsp;&nbsp;
90
- <a href="#M000006">clear_headers</a>&nbsp;&nbsp;
91
- <a href="#M000005">headers</a>&nbsp;&nbsp;
92
- <a href="#M000002">on</a>&nbsp;&nbsp;
93
- <a href="#M000003">setup</a>&nbsp;&nbsp;
94
- <a href="#M000004">teardown</a>&nbsp;&nbsp;
89
+ <a href="#M000004">add_url_check</a>&nbsp;&nbsp;
90
+ <a href="#M000005">check_already_seen_with</a>&nbsp;&nbsp;
91
+ <a href="#M000010">clear_headers</a>&nbsp;&nbsp;
92
+ <a href="#M000009">headers</a>&nbsp;&nbsp;
93
+ <a href="#M000006">on</a>&nbsp;&nbsp;
94
+ <a href="#M000007">setup</a>&nbsp;&nbsp;
95
+ <a href="#M000008">teardown</a>&nbsp;&nbsp;
95
96
  </div>
96
97
  </div>
97
98
 
@@ -113,11 +114,11 @@
113
114
  <div id="methods">
114
115
  <h3 class="section-bar">Public Instance methods</h3>
115
116
 
116
- <div id="method-M000001" class="method-detail">
117
- <a name="M000001"></a>
117
+ <div id="method-M000004" class="method-detail">
118
+ <a name="M000004"></a>
118
119
 
119
120
  <div class="method-heading">
120
- <a href="#M000001" class="method-signature">
121
+ <a href="#M000004" class="method-signature">
121
122
  <span class="method-name">add_url_check</span><span class="method-args">(&amp;block)</span>
122
123
  </a>
123
124
  </div>
@@ -136,10 +137,10 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
136
137
  add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
137
138
  </pre>
138
139
  <p><a class="source-toggle" href="#"
139
- onclick="toggleCode('M000001-source');return false;">[Source]</a></p>
140
- <div class="method-source-code" id="M000001-source">
140
+ onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
141
+ <div class="method-source-code" id="M000004-source">
141
142
  <pre>
142
- <span class="ruby-comment cmt"># File lib/spider.rb, line 103</span>
143
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 69</span>
143
144
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
144
145
  <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
145
146
  <span class="ruby-keyword kw">end</span>
@@ -148,24 +149,74 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
148
149
  </div>
149
150
  </div>
150
151
 
151
- <div id="method-M000006" class="method-detail">
152
- <a name="M000006"></a>
152
+ <div id="method-M000005" class="method-detail">
153
+ <a name="M000005"></a>
153
154
 
154
155
  <div class="method-heading">
155
- <a href="#M000006" class="method-signature">
156
+ <a href="#M000005" class="method-signature">
157
+ <span class="method-name">check_already_seen_with</span><span class="method-args">(cacher)</span>
158
+ </a>
159
+ </div>
160
+
161
+ <div class="method-description">
162
+ <p>
163
+ The Web is a graph; to avoid cycles we store the nodes (URLs) already
164
+ visited. The Web is a really, really, really big graph; as such, this list
165
+ of visited nodes grows really, really, really big.
166
+ </p>
167
+ <p>
168
+ Change the object used to store these seen nodes with this. The default
169
+ object is an instance of Array. Available with <a
170
+ href="Spider.html">Spider</a> is a wrapper of memcached.
171
+ </p>
172
+ <p>
173
+ You can implement a custom class for this; any object passed to <a
174
+ href="SpiderInstance.html#M000005">check_already_seen_with</a> must
175
+ understand just &lt;&lt; and included? .
176
+ </p>
177
+ <pre>
178
+ # default
179
+ check_already_seen_with Array.new
180
+
181
+ # memcached
182
+ require 'spider/included_in_memcached'
183
+ check_already_seen_with IncludedInMemcached.new('localhost:11211')
184
+ </pre>
185
+ <p><a class="source-toggle" href="#"
186
+ onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
187
+ <div class="method-source-code" id="M000005-source">
188
+ <pre>
189
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 90</span>
190
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
191
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:&lt;&lt;</span>) <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
192
+ <span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
193
+ <span class="ruby-keyword kw">else</span>
194
+ <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to &lt;&lt; and included?'</span>
195
+ <span class="ruby-keyword kw">end</span>
196
+ <span class="ruby-keyword kw">end</span>
197
+ </pre>
198
+ </div>
199
+ </div>
200
+ </div>
201
+
202
+ <div id="method-M000010" class="method-detail">
203
+ <a name="M000010"></a>
204
+
205
+ <div class="method-heading">
206
+ <a href="#M000010" class="method-signature">
156
207
  <span class="method-name">clear_headers</span><span class="method-args">()</span>
157
208
  </a>
158
209
  </div>
159
210
 
160
211
  <div class="method-description">
161
212
  <p>
162
- Reset the <a href="SpiderInstance.html#M000005">headers</a> hash.
213
+ Reset the <a href="SpiderInstance.html#M000009">headers</a> hash.
163
214
  </p>
164
215
  <p><a class="source-toggle" href="#"
165
- onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
166
- <div class="method-source-code" id="M000006-source">
216
+ onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
217
+ <div class="method-source-code" id="M000010-source">
167
218
  <pre>
168
- <span class="ruby-comment cmt"># File lib/spider.rb, line 170</span>
219
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 157</span>
169
220
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
170
221
  <span class="ruby-ivar">@headers</span> = {}
171
222
  <span class="ruby-keyword kw">end</span>
@@ -174,11 +225,11 @@ Reset the <a href="SpiderInstance.html#M000005">headers</a> hash.
174
225
  </div>
175
226
  </div>
176
227
 
177
- <div id="method-M000005" class="method-detail">
178
- <a name="M000005"></a>
228
+ <div id="method-M000009" class="method-detail">
229
+ <a name="M000009"></a>
179
230
 
180
231
  <div class="method-heading">
181
- <a href="#M000005" class="method-signature">
232
+ <a href="#M000009" class="method-signature">
182
233
  <span class="method-name">headers</span><span class="method-args">()</span>
183
234
  </a>
184
235
  </div>
@@ -191,10 +242,10 @@ Use like a hash:
191
242
  headers['Cookies'] = 'user_id=1;password=btrross3'
192
243
  </pre>
193
244
  <p><a class="source-toggle" href="#"
194
- onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
195
- <div class="method-source-code" id="M000005-source">
245
+ onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
246
+ <div class="method-source-code" id="M000009-source">
196
247
  <pre>
197
- <span class="ruby-comment cmt"># File lib/spider.rb, line 158</span>
248
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 145</span>
198
249
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
199
250
  <span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
200
251
  <span class="ruby-keyword kw">end</span>
@@ -203,11 +254,11 @@ Use like a hash:
203
254
  </div>
204
255
  </div>
205
256
 
206
- <div id="method-M000002" class="method-detail">
207
- <a name="M000002"></a>
257
+ <div id="method-M000006" class="method-detail">
258
+ <a name="M000006"></a>
208
259
 
209
260
  <div class="method-heading">
210
- <a href="#M000002" class="method-signature">
261
+ <a href="#M000006" class="method-signature">
211
262
  <span class="method-name">on</span><span class="method-args">(code, p = nil, &amp;block)</span>
212
263
  </a>
213
264
  </div>
@@ -240,10 +291,10 @@ For example:
240
291
  end
241
292
  </pre>
242
293
  <p><a class="source-toggle" href="#"
243
- onclick="toggleCode('M000002-source');return false;">[Source]</a></p>
244
- <div class="method-source-code" id="M000002-source">
294
+ onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
295
+ <div class="method-source-code" id="M000006-source">
245
296
  <pre>
246
- <span class="ruby-comment cmt"># File lib/spider.rb, line 133</span>
297
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 120</span>
247
298
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
248
299
  <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
249
300
  <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
@@ -258,11 +309,11 @@ For example:
258
309
  </div>
259
310
  </div>
260
311
 
261
- <div id="method-M000003" class="method-detail">
262
- <a name="M000003"></a>
312
+ <div id="method-M000007" class="method-detail">
313
+ <a name="M000007"></a>
263
314
 
264
315
  <div class="method-heading">
265
- <a href="#M000003" class="method-signature">
316
+ <a href="#M000007" class="method-signature">
266
317
  <span class="method-name">setup</span><span class="method-args">(p = nil, &amp;block)</span>
267
318
  </a>
268
319
  </div>
@@ -277,10 +328,10 @@ Run before the HTTP request. Given the URL as a string.
277
328
  end
278
329
  </pre>
279
330
  <p><a class="source-toggle" href="#"
280
- onclick="toggleCode('M000003-source');return false;">[Source]</a></p>
281
- <div class="method-source-code" id="M000003-source">
331
+ onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
332
+ <div class="method-source-code" id="M000007-source">
282
333
  <pre>
283
- <span class="ruby-comment cmt"># File lib/spider.rb, line 147</span>
334
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 134</span>
284
335
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
285
336
  <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
286
337
  <span class="ruby-keyword kw">end</span>
@@ -289,11 +340,11 @@ Run before the HTTP request. Given the URL as a string.
289
340
  </div>
290
341
  </div>
291
342
 
292
- <div id="method-M000004" class="method-detail">
293
- <a name="M000004"></a>
343
+ <div id="method-M000008" class="method-detail">
344
+ <a name="M000008"></a>
294
345
 
295
346
  <div class="method-heading">
296
- <a href="#M000004" class="method-signature">
347
+ <a href="#M000008" class="method-signature">
297
348
  <span class="method-name">teardown</span><span class="method-args">(p = nil, &amp;block)</span>
298
349
  </a>
299
350
  </div>
@@ -303,10 +354,10 @@ Run before the HTTP request. Given the URL as a string.
303
354
  Run last, once for each page. Given the URL as a string.
304
355
  </p>
305
356
  <p><a class="source-toggle" href="#"
306
- onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
307
- <div class="method-source-code" id="M000004-source">
357
+ onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
358
+ <div class="method-source-code" id="M000008-source">
308
359
  <pre>
309
- <span class="ruby-comment cmt"># File lib/spider.rb, line 152</span>
360
+ <span class="ruby-comment cmt"># File lib/spider_instance.rb, line 139</span>
310
361
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
311
362
  <span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
312
363
  <span class="ruby-keyword kw">end</span>