spider 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +6 -0
- data/README +3 -3
- data/doc/classes/BeStaticServerPages.html +197 -0
- data/doc/classes/BeStaticServerPages.src/M000030.html +19 -0
- data/doc/classes/BeStaticServerPages.src/M000031.html +19 -0
- data/doc/classes/BeStaticServerPages.src/M000032.html +18 -0
- data/doc/classes/BeStaticServerPages.src/M000033.html +18 -0
- data/doc/classes/IncludedInMemcached.html +18 -45
- data/doc/classes/IncludedInMemcached.src/M000015.html +18 -0
- data/doc/classes/IncludedInMemcached.src/M000016.html +18 -0
- data/doc/classes/IncludedInMemcached.src/M000017.html +18 -0
- data/doc/classes/LoopingServlet.html +137 -0
- data/doc/classes/LoopingServlet.src/M000037.html +23 -0
- data/doc/classes/NextUrlsInSQS.html +204 -0
- data/doc/classes/NextUrlsInSQS.src/M000018.html +19 -0
- data/doc/classes/NextUrlsInSQS.src/M000019.html +22 -0
- data/doc/classes/NextUrlsInSQS.src/M000020.html +19 -0
- data/doc/classes/QueryServlet.html +137 -0
- data/doc/classes/QueryServlet.src/M000038.html +19 -0
- data/doc/classes/RobotRules.html +175 -0
- data/doc/classes/RobotRules.src/M000034.html +19 -0
- data/doc/classes/RobotRules.src/M000035.html +67 -0
- data/doc/classes/RobotRules.src/M000036.html +24 -0
- data/doc/classes/Spider.html +5 -17
- data/doc/classes/Spider.src/M000029.html +21 -0
- data/doc/classes/SpiderInstance.html +72 -108
- data/doc/classes/SpiderInstance.src/M000021.html +18 -0
- data/doc/classes/SpiderInstance.src/M000022.html +22 -0
- data/doc/classes/SpiderInstance.src/M000023.html +22 -0
- data/doc/classes/SpiderInstance.src/M000024.html +24 -0
- data/doc/classes/SpiderInstance.src/M000025.html +18 -0
- data/doc/classes/SpiderInstance.src/M000026.html +18 -0
- data/doc/classes/SpiderInstance.src/M000027.html +18 -0
- data/doc/classes/SpiderInstance.src/M000028.html +18 -0
- data/doc/created.rid +1 -1
- data/doc/files/lib/spider/included_in_memcached_rb.html +29 -1
- data/doc/files/lib/spider/next_urls_in_sqs_rb.html +144 -0
- data/doc/files/lib/spider/robot_rules_rb.html +114 -0
- data/doc/files/lib/spider/spider_instance_rb.html +1 -2
- data/doc/files/lib/spider_rb.html +40 -9
- data/doc/files/spec/spec_helper_rb.html +196 -0
- data/doc/files/spec/spec_helper_rb.src/M000001.html +20 -0
- data/doc/files/spec/spec_helper_rb.src/M000002.html +26 -0
- data/doc/files/spec/spec_helper_rb.src/M000003.html +24 -0
- data/doc/files/spec/spec_helper_rb.src/M000004.html +18 -0
- data/doc/files/spec/spec_helper_rb.src/M000005.html +23 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.html +142 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +19 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +18 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.html +210 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +21 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +19 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +19 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +27 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +26 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +27 -0
- data/doc/files/spec/spider_spec_rb.html +127 -0
- data/doc/files/spec/spider_spec_rb.src/M000014.html +23 -0
- data/doc/fr_class_index.html +5 -0
- data/doc/fr_file_index.html +6 -1
- data/doc/fr_method_index.html +38 -11
- data/doc/index.html +1 -1
- data/lib/spider/spider_instance.rb +15 -7
- data/spider.gemspec +1 -1
- metadata +84 -22
- data/lib/test.rb +0 -27
data/doc/classes/Spider.html
CHANGED
@@ -93,7 +93,7 @@ links, and doing it all over again.
|
|
93
93
|
<h3 class="section-bar">Methods</h3>
|
94
94
|
|
95
95
|
<div class="name-list">
|
96
|
-
<a href="#
|
96
|
+
<a href="#M000029">start_at</a>
|
97
97
|
</div>
|
98
98
|
</div>
|
99
99
|
|
@@ -115,11 +115,12 @@ links, and doing it all over again.
|
|
115
115
|
<div id="methods">
|
116
116
|
<h3 class="section-bar">Public Class methods</h3>
|
117
117
|
|
118
|
-
<div id="method-
|
119
|
-
<a name="
|
118
|
+
<div id="method-M000029" class="method-detail">
|
119
|
+
<a name="M000029"></a>
|
120
120
|
|
121
121
|
<div class="method-heading">
|
122
|
-
<a href="
|
122
|
+
<a href="Spider.src/M000029.html" target="Code" class="method-signature"
|
123
|
+
onclick="popupCode('Spider.src/M000029.html');return false;">
|
123
124
|
<span class="method-name">start_at</span><span class="method-args">(a_url, &block)</span>
|
124
125
|
</a>
|
125
126
|
</div>
|
@@ -151,19 +152,6 @@ handlers.
|
|
151
152
|
end
|
152
153
|
end
|
153
154
|
</pre>
|
154
|
-
<p><a class="source-toggle" href="#"
|
155
|
-
onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
|
156
|
-
<div class="method-source-code" id="M000011-source">
|
157
|
-
<pre>
|
158
|
-
<span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
|
159
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
160
|
-
<span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
|
161
|
-
<span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
|
162
|
-
<span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
|
163
|
-
<span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
|
164
|
-
<span class="ruby-keyword kw">end</span>
|
165
|
-
</pre>
|
166
|
-
</div>
|
167
155
|
</div>
|
168
156
|
</div>
|
169
157
|
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>start_at (Spider)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
|
16
|
+
<span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
|
17
|
+
<span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
|
18
|
+
<span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
|
19
|
+
<span class="ruby-keyword kw">end</span></pre>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -86,13 +86,14 @@
|
|
86
86
|
<h3 class="section-bar">Methods</h3>
|
87
87
|
|
88
88
|
<div class="name-list">
|
89
|
-
<a href="#
|
90
|
-
<a href="#
|
91
|
-
<a href="#
|
92
|
-
<a href="#
|
93
|
-
<a href="#
|
94
|
-
<a href="#
|
95
|
-
<a href="#
|
89
|
+
<a href="#M000021">add_url_check</a>
|
90
|
+
<a href="#M000022">check_already_seen_with</a>
|
91
|
+
<a href="#M000028">clear_headers</a>
|
92
|
+
<a href="#M000027">headers</a>
|
93
|
+
<a href="#M000024">on</a>
|
94
|
+
<a href="#M000025">setup</a>
|
95
|
+
<a href="#M000023">store_next_urls_with</a>
|
96
|
+
<a href="#M000026">teardown</a>
|
96
97
|
</div>
|
97
98
|
</div>
|
98
99
|
|
@@ -114,11 +115,12 @@
|
|
114
115
|
<div id="methods">
|
115
116
|
<h3 class="section-bar">Public Instance methods</h3>
|
116
117
|
|
117
|
-
<div id="method-
|
118
|
-
<a name="
|
118
|
+
<div id="method-M000021" class="method-detail">
|
119
|
+
<a name="M000021"></a>
|
119
120
|
|
120
121
|
<div class="method-heading">
|
121
|
-
<a href="
|
122
|
+
<a href="SpiderInstance.src/M000021.html" target="Code" class="method-signature"
|
123
|
+
onclick="popupCode('SpiderInstance.src/M000021.html');return false;">
|
122
124
|
<span class="method-name">add_url_check</span><span class="method-args">(&block)</span>
|
123
125
|
</a>
|
124
126
|
</div>
|
@@ -136,24 +138,15 @@ href="http://mike-burns.com">mike-burns.com</a>’:
|
|
136
138
|
<pre>
|
137
139
|
add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
|
138
140
|
</pre>
|
139
|
-
<p><a class="source-toggle" href="#"
|
140
|
-
onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
|
141
|
-
<div class="method-source-code" id="M000004-source">
|
142
|
-
<pre>
|
143
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
|
144
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
145
|
-
<span class="ruby-ivar">@url_checks</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">block</span>
|
146
|
-
<span class="ruby-keyword kw">end</span>
|
147
|
-
</pre>
|
148
|
-
</div>
|
149
141
|
</div>
|
150
142
|
</div>
|
151
143
|
|
152
|
-
<div id="method-
|
153
|
-
<a name="
|
144
|
+
<div id="method-M000022" class="method-detail">
|
145
|
+
<a name="M000022"></a>
|
154
146
|
|
155
147
|
<div class="method-heading">
|
156
|
-
<a href="
|
148
|
+
<a href="SpiderInstance.src/M000022.html" target="Code" class="method-signature"
|
149
|
+
onclick="popupCode('SpiderInstance.src/M000022.html');return false;">
|
157
150
|
<span class="method-name">check_already_seen_with</span><span class="method-args">(cacher)</span>
|
158
151
|
</a>
|
159
152
|
</div>
|
@@ -171,7 +164,7 @@ href="Spider.html">Spider</a> is a wrapper of memcached.
|
|
171
164
|
</p>
|
172
165
|
<p>
|
173
166
|
You can implement a custom class for this; any object passed to <a
|
174
|
-
href="SpiderInstance.html#
|
167
|
+
href="SpiderInstance.html#M000022">check_already_seen_with</a> must
|
175
168
|
understand just << and included? .
|
176
169
|
</p>
|
177
170
|
<pre>
|
@@ -182,54 +175,32 @@ understand just << and included? .
|
|
182
175
|
require 'spider/included_in_memcached'
|
183
176
|
check_already_seen_with IncludedInMemcached.new('localhost:11211')
|
184
177
|
</pre>
|
185
|
-
<p><a class="source-toggle" href="#"
|
186
|
-
onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
|
187
|
-
<div class="method-source-code" id="M000005-source">
|
188
|
-
<pre>
|
189
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
|
190
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
|
191
|
-
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:<<</span>) <span class="ruby-operator">&&</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
|
192
|
-
<span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
|
193
|
-
<span class="ruby-keyword kw">else</span>
|
194
|
-
<span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to << and included?'</span>
|
195
|
-
<span class="ruby-keyword kw">end</span>
|
196
|
-
<span class="ruby-keyword kw">end</span>
|
197
|
-
</pre>
|
198
|
-
</div>
|
199
178
|
</div>
|
200
179
|
</div>
|
201
180
|
|
202
|
-
<div id="method-
|
203
|
-
<a name="
|
181
|
+
<div id="method-M000028" class="method-detail">
|
182
|
+
<a name="M000028"></a>
|
204
183
|
|
205
184
|
<div class="method-heading">
|
206
|
-
<a href="
|
185
|
+
<a href="SpiderInstance.src/M000028.html" target="Code" class="method-signature"
|
186
|
+
onclick="popupCode('SpiderInstance.src/M000028.html');return false;">
|
207
187
|
<span class="method-name">clear_headers</span><span class="method-args">()</span>
|
208
188
|
</a>
|
209
189
|
</div>
|
210
190
|
|
211
191
|
<div class="method-description">
|
212
192
|
<p>
|
213
|
-
Reset the <a href="SpiderInstance.html#
|
193
|
+
Reset the <a href="SpiderInstance.html#M000027">headers</a> hash.
|
214
194
|
</p>
|
215
|
-
<p><a class="source-toggle" href="#"
|
216
|
-
onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
|
217
|
-
<div class="method-source-code" id="M000010-source">
|
218
|
-
<pre>
|
219
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 158</span>
|
220
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
|
221
|
-
<span class="ruby-ivar">@headers</span> = {}
|
222
|
-
<span class="ruby-keyword kw">end</span>
|
223
|
-
</pre>
|
224
|
-
</div>
|
225
195
|
</div>
|
226
196
|
</div>
|
227
197
|
|
228
|
-
<div id="method-
|
229
|
-
<a name="
|
198
|
+
<div id="method-M000027" class="method-detail">
|
199
|
+
<a name="M000027"></a>
|
230
200
|
|
231
201
|
<div class="method-heading">
|
232
|
-
<a href="
|
202
|
+
<a href="SpiderInstance.src/M000027.html" target="Code" class="method-signature"
|
203
|
+
onclick="popupCode('SpiderInstance.src/M000027.html');return false;">
|
233
204
|
<span class="method-name">headers</span><span class="method-args">()</span>
|
234
205
|
</a>
|
235
206
|
</div>
|
@@ -241,24 +212,15 @@ Use like a hash:
|
|
241
212
|
<pre>
|
242
213
|
headers['Cookies'] = 'user_id=1;password=btrross3'
|
243
214
|
</pre>
|
244
|
-
<p><a class="source-toggle" href="#"
|
245
|
-
onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
|
246
|
-
<div class="method-source-code" id="M000009-source">
|
247
|
-
<pre>
|
248
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 146</span>
|
249
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
|
250
|
-
<span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
|
251
|
-
<span class="ruby-keyword kw">end</span>
|
252
|
-
</pre>
|
253
|
-
</div>
|
254
215
|
</div>
|
255
216
|
</div>
|
256
217
|
|
257
|
-
<div id="method-
|
258
|
-
<a name="
|
218
|
+
<div id="method-M000024" class="method-detail">
|
219
|
+
<a name="M000024"></a>
|
259
220
|
|
260
221
|
<div class="method-heading">
|
261
|
-
<a href="
|
222
|
+
<a href="SpiderInstance.src/M000024.html" target="Code" class="method-signature"
|
223
|
+
onclick="popupCode('SpiderInstance.src/M000024.html');return false;">
|
262
224
|
<span class="method-name">on</span><span class="method-args">(code, p = nil, &block)</span>
|
263
225
|
</a>
|
264
226
|
</div>
|
@@ -290,30 +252,15 @@ For example:
|
|
290
252
|
puts "Given this code: #{resp.code}"
|
291
253
|
end
|
292
254
|
</pre>
|
293
|
-
<p><a class="source-toggle" href="#"
|
294
|
-
onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
|
295
|
-
<div class="method-source-code" id="M000006-source">
|
296
|
-
<pre>
|
297
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 121</span>
|
298
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
299
|
-
<span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
300
|
-
<span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
|
301
|
-
<span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
|
302
|
-
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
|
303
|
-
<span class="ruby-keyword kw">else</span>
|
304
|
-
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
|
305
|
-
<span class="ruby-keyword kw">end</span>
|
306
|
-
<span class="ruby-keyword kw">end</span>
|
307
|
-
</pre>
|
308
|
-
</div>
|
309
255
|
</div>
|
310
256
|
</div>
|
311
257
|
|
312
|
-
<div id="method-
|
313
|
-
<a name="
|
258
|
+
<div id="method-M000025" class="method-detail">
|
259
|
+
<a name="M000025"></a>
|
314
260
|
|
315
261
|
<div class="method-heading">
|
316
|
-
<a href="
|
262
|
+
<a href="SpiderInstance.src/M000025.html" target="Code" class="method-signature"
|
263
|
+
onclick="popupCode('SpiderInstance.src/M000025.html');return false;">
|
317
264
|
<span class="method-name">setup</span><span class="method-args">(p = nil, &block)</span>
|
318
265
|
</a>
|
319
266
|
</div>
|
@@ -327,24 +274,51 @@ Run before the HTTP request. Given the URL as a string.
|
|
327
274
|
headers['Cookies'] = 'user_id=1;admin=true'
|
328
275
|
end
|
329
276
|
</pre>
|
330
|
-
|
331
|
-
|
332
|
-
|
277
|
+
</div>
|
278
|
+
</div>
|
279
|
+
|
280
|
+
<div id="method-M000023" class="method-detail">
|
281
|
+
<a name="M000023"></a>
|
282
|
+
|
283
|
+
<div class="method-heading">
|
284
|
+
<a href="SpiderInstance.src/M000023.html" target="Code" class="method-signature"
|
285
|
+
onclick="popupCode('SpiderInstance.src/M000023.html');return false;">
|
286
|
+
<span class="method-name">store_next_urls_with</span><span class="method-args">(a_store)</span>
|
287
|
+
</a>
|
288
|
+
</div>
|
289
|
+
|
290
|
+
<div class="method-description">
|
291
|
+
<p>
|
292
|
+
The Web is a really, really, really big graph; as such, this list of nodes
|
293
|
+
to visit grows really, really, really big.
|
294
|
+
</p>
|
295
|
+
<p>
|
296
|
+
Change the object used to store nodes we have yet to walk. The default
|
297
|
+
object is an instance of Array. Available with <a
|
298
|
+
href="Spider.html">Spider</a> is a wrapper of AmazonSQS.
|
299
|
+
</p>
|
300
|
+
<p>
|
301
|
+
You can implement a custom class for this; any object passed to <a
|
302
|
+
href="SpiderInstance.html#M000022">check_already_seen_with</a> must
|
303
|
+
understand just push and pop .
|
304
|
+
</p>
|
333
305
|
<pre>
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
306
|
+
# default
|
307
|
+
store_next_urls_with Array.new
|
308
|
+
|
309
|
+
# AmazonSQS
|
310
|
+
require 'spider/next_urls_in_sqs'
|
311
|
+
store_next_urls_with NextUrlsInSQS.new(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, queue_name)
|
338
312
|
</pre>
|
339
|
-
</div>
|
340
313
|
</div>
|
341
314
|
</div>
|
342
315
|
|
343
|
-
<div id="method-
|
344
|
-
<a name="
|
316
|
+
<div id="method-M000026" class="method-detail">
|
317
|
+
<a name="M000026"></a>
|
345
318
|
|
346
319
|
<div class="method-heading">
|
347
|
-
<a href="
|
320
|
+
<a href="SpiderInstance.src/M000026.html" target="Code" class="method-signature"
|
321
|
+
onclick="popupCode('SpiderInstance.src/M000026.html');return false;">
|
348
322
|
<span class="method-name">teardown</span><span class="method-args">(p = nil, &block)</span>
|
349
323
|
</a>
|
350
324
|
</div>
|
@@ -353,16 +327,6 @@ Run before the HTTP request. Given the URL as a string.
|
|
353
327
|
<p>
|
354
328
|
Run last, once for each page. Given the URL as a string.
|
355
329
|
</p>
|
356
|
-
<p><a class="source-toggle" href="#"
|
357
|
-
onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
|
358
|
-
<div class="method-source-code" id="M000008-source">
|
359
|
-
<pre>
|
360
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 140</span>
|
361
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
362
|
-
<span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
363
|
-
<span class="ruby-keyword kw">end</span>
|
364
|
-
</pre>
|
365
|
-
</div>
|
366
330
|
</div>
|
367
331
|
</div>
|
368
332
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>add_url_check (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-ivar">@url_checks</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">block</span>
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>check_already_seen_with (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
|
15
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:<<</span>) <span class="ruby-operator">&&</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
|
16
|
+
<span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
|
17
|
+
<span class="ruby-keyword kw">else</span>
|
18
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to << and included?'</span>
|
19
|
+
<span class="ruby-keyword kw">end</span>
|
20
|
+
<span class="ruby-keyword kw">end</span></pre>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>store_next_urls_with (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 115</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">store_next_urls_with</span>(<span class="ruby-identifier">a_store</span>)
|
15
|
+
<span class="ruby-identifier">tmp_next_urls</span> = <span class="ruby-ivar">@next_urls</span>
|
16
|
+
<span class="ruby-ivar">@next_urls</span> = <span class="ruby-identifier">a_store</span>
|
17
|
+
<span class="ruby-identifier">tmp_next_urls</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">a_url_hash</span><span class="ruby-operator">|</span>
|
18
|
+
<span class="ruby-ivar">@next_urls</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">a_url_hash</span>
|
19
|
+
<span class="ruby-keyword kw">end</span>
|
20
|
+
<span class="ruby-keyword kw">end</span></pre>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>on (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 145</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
16
|
+
<span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
|
17
|
+
<span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
|
18
|
+
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
|
19
|
+
<span class="ruby-keyword kw">else</span>
|
20
|
+
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
|
21
|
+
<span class="ruby-keyword kw">end</span>
|
22
|
+
<span class="ruby-keyword kw">end</span></pre>
|
23
|
+
</body>
|
24
|
+
</html>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>setup (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 159</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|