spider 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +6 -0
- data/README +3 -3
- data/doc/classes/BeStaticServerPages.html +197 -0
- data/doc/classes/BeStaticServerPages.src/M000030.html +19 -0
- data/doc/classes/BeStaticServerPages.src/M000031.html +19 -0
- data/doc/classes/BeStaticServerPages.src/M000032.html +18 -0
- data/doc/classes/BeStaticServerPages.src/M000033.html +18 -0
- data/doc/classes/IncludedInMemcached.html +18 -45
- data/doc/classes/IncludedInMemcached.src/M000015.html +18 -0
- data/doc/classes/IncludedInMemcached.src/M000016.html +18 -0
- data/doc/classes/IncludedInMemcached.src/M000017.html +18 -0
- data/doc/classes/LoopingServlet.html +137 -0
- data/doc/classes/LoopingServlet.src/M000037.html +23 -0
- data/doc/classes/NextUrlsInSQS.html +204 -0
- data/doc/classes/NextUrlsInSQS.src/M000018.html +19 -0
- data/doc/classes/NextUrlsInSQS.src/M000019.html +22 -0
- data/doc/classes/NextUrlsInSQS.src/M000020.html +19 -0
- data/doc/classes/QueryServlet.html +137 -0
- data/doc/classes/QueryServlet.src/M000038.html +19 -0
- data/doc/classes/RobotRules.html +175 -0
- data/doc/classes/RobotRules.src/M000034.html +19 -0
- data/doc/classes/RobotRules.src/M000035.html +67 -0
- data/doc/classes/RobotRules.src/M000036.html +24 -0
- data/doc/classes/Spider.html +5 -17
- data/doc/classes/Spider.src/M000029.html +21 -0
- data/doc/classes/SpiderInstance.html +72 -108
- data/doc/classes/SpiderInstance.src/M000021.html +18 -0
- data/doc/classes/SpiderInstance.src/M000022.html +22 -0
- data/doc/classes/SpiderInstance.src/M000023.html +22 -0
- data/doc/classes/SpiderInstance.src/M000024.html +24 -0
- data/doc/classes/SpiderInstance.src/M000025.html +18 -0
- data/doc/classes/SpiderInstance.src/M000026.html +18 -0
- data/doc/classes/SpiderInstance.src/M000027.html +18 -0
- data/doc/classes/SpiderInstance.src/M000028.html +18 -0
- data/doc/created.rid +1 -1
- data/doc/files/lib/spider/included_in_memcached_rb.html +29 -1
- data/doc/files/lib/spider/next_urls_in_sqs_rb.html +144 -0
- data/doc/files/lib/spider/robot_rules_rb.html +114 -0
- data/doc/files/lib/spider/spider_instance_rb.html +1 -2
- data/doc/files/lib/spider_rb.html +40 -9
- data/doc/files/spec/spec_helper_rb.html +196 -0
- data/doc/files/spec/spec_helper_rb.src/M000001.html +20 -0
- data/doc/files/spec/spec_helper_rb.src/M000002.html +26 -0
- data/doc/files/spec/spec_helper_rb.src/M000003.html +24 -0
- data/doc/files/spec/spec_helper_rb.src/M000004.html +18 -0
- data/doc/files/spec/spec_helper_rb.src/M000005.html +23 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.html +142 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +19 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +18 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.html +210 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +21 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +19 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +19 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +27 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +26 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +27 -0
- data/doc/files/spec/spider_spec_rb.html +127 -0
- data/doc/files/spec/spider_spec_rb.src/M000014.html +23 -0
- data/doc/fr_class_index.html +5 -0
- data/doc/fr_file_index.html +6 -1
- data/doc/fr_method_index.html +38 -11
- data/doc/index.html +1 -1
- data/lib/spider/spider_instance.rb +15 -7
- data/spider.gemspec +1 -1
- metadata +84 -22
- data/lib/test.rb +0 -27
data/doc/classes/Spider.html
CHANGED
@@ -93,7 +93,7 @@ links, and doing it all over again.
|
|
93
93
|
<h3 class="section-bar">Methods</h3>
|
94
94
|
|
95
95
|
<div class="name-list">
|
96
|
-
<a href="#
|
96
|
+
<a href="#M000029">start_at</a>
|
97
97
|
</div>
|
98
98
|
</div>
|
99
99
|
|
@@ -115,11 +115,12 @@ links, and doing it all over again.
|
|
115
115
|
<div id="methods">
|
116
116
|
<h3 class="section-bar">Public Class methods</h3>
|
117
117
|
|
118
|
-
<div id="method-
|
119
|
-
<a name="
|
118
|
+
<div id="method-M000029" class="method-detail">
|
119
|
+
<a name="M000029"></a>
|
120
120
|
|
121
121
|
<div class="method-heading">
|
122
|
-
<a href="
|
122
|
+
<a href="Spider.src/M000029.html" target="Code" class="method-signature"
|
123
|
+
onclick="popupCode('Spider.src/M000029.html');return false;">
|
123
124
|
<span class="method-name">start_at</span><span class="method-args">(a_url, &block)</span>
|
124
125
|
</a>
|
125
126
|
</div>
|
@@ -151,19 +152,6 @@ handlers.
|
|
151
152
|
end
|
152
153
|
end
|
153
154
|
</pre>
|
154
|
-
<p><a class="source-toggle" href="#"
|
155
|
-
onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
|
156
|
-
<div class="method-source-code" id="M000011-source">
|
157
|
-
<pre>
|
158
|
-
<span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
|
159
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
160
|
-
<span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
|
161
|
-
<span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
|
162
|
-
<span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
|
163
|
-
<span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
|
164
|
-
<span class="ruby-keyword kw">end</span>
|
165
|
-
</pre>
|
166
|
-
</div>
|
167
155
|
</div>
|
168
156
|
</div>
|
169
157
|
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>start_at (Spider)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
|
16
|
+
<span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
|
17
|
+
<span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
|
18
|
+
<span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
|
19
|
+
<span class="ruby-keyword kw">end</span></pre>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -86,13 +86,14 @@
|
|
86
86
|
<h3 class="section-bar">Methods</h3>
|
87
87
|
|
88
88
|
<div class="name-list">
|
89
|
-
<a href="#
|
90
|
-
<a href="#
|
91
|
-
<a href="#
|
92
|
-
<a href="#
|
93
|
-
<a href="#
|
94
|
-
<a href="#
|
95
|
-
<a href="#
|
89
|
+
<a href="#M000021">add_url_check</a>
|
90
|
+
<a href="#M000022">check_already_seen_with</a>
|
91
|
+
<a href="#M000028">clear_headers</a>
|
92
|
+
<a href="#M000027">headers</a>
|
93
|
+
<a href="#M000024">on</a>
|
94
|
+
<a href="#M000025">setup</a>
|
95
|
+
<a href="#M000023">store_next_urls_with</a>
|
96
|
+
<a href="#M000026">teardown</a>
|
96
97
|
</div>
|
97
98
|
</div>
|
98
99
|
|
@@ -114,11 +115,12 @@
|
|
114
115
|
<div id="methods">
|
115
116
|
<h3 class="section-bar">Public Instance methods</h3>
|
116
117
|
|
117
|
-
<div id="method-
|
118
|
-
<a name="
|
118
|
+
<div id="method-M000021" class="method-detail">
|
119
|
+
<a name="M000021"></a>
|
119
120
|
|
120
121
|
<div class="method-heading">
|
121
|
-
<a href="
|
122
|
+
<a href="SpiderInstance.src/M000021.html" target="Code" class="method-signature"
|
123
|
+
onclick="popupCode('SpiderInstance.src/M000021.html');return false;">
|
122
124
|
<span class="method-name">add_url_check</span><span class="method-args">(&block)</span>
|
123
125
|
</a>
|
124
126
|
</div>
|
@@ -136,24 +138,15 @@ href="http://mike-burns.com">mike-burns.com</a>’:
|
|
136
138
|
<pre>
|
137
139
|
add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
|
138
140
|
</pre>
|
139
|
-
<p><a class="source-toggle" href="#"
|
140
|
-
onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
|
141
|
-
<div class="method-source-code" id="M000004-source">
|
142
|
-
<pre>
|
143
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
|
144
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
145
|
-
<span class="ruby-ivar">@url_checks</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">block</span>
|
146
|
-
<span class="ruby-keyword kw">end</span>
|
147
|
-
</pre>
|
148
|
-
</div>
|
149
141
|
</div>
|
150
142
|
</div>
|
151
143
|
|
152
|
-
<div id="method-
|
153
|
-
<a name="
|
144
|
+
<div id="method-M000022" class="method-detail">
|
145
|
+
<a name="M000022"></a>
|
154
146
|
|
155
147
|
<div class="method-heading">
|
156
|
-
<a href="
|
148
|
+
<a href="SpiderInstance.src/M000022.html" target="Code" class="method-signature"
|
149
|
+
onclick="popupCode('SpiderInstance.src/M000022.html');return false;">
|
157
150
|
<span class="method-name">check_already_seen_with</span><span class="method-args">(cacher)</span>
|
158
151
|
</a>
|
159
152
|
</div>
|
@@ -171,7 +164,7 @@ href="Spider.html">Spider</a> is a wrapper of memcached.
|
|
171
164
|
</p>
|
172
165
|
<p>
|
173
166
|
You can implement a custom class for this; any object passed to <a
|
174
|
-
href="SpiderInstance.html#
|
167
|
+
href="SpiderInstance.html#M000022">check_already_seen_with</a> must
|
175
168
|
understand just << and included? .
|
176
169
|
</p>
|
177
170
|
<pre>
|
@@ -182,54 +175,32 @@ understand just << and included? .
|
|
182
175
|
require 'spider/included_in_memcached'
|
183
176
|
check_already_seen_with IncludedInMemcached.new('localhost:11211')
|
184
177
|
</pre>
|
185
|
-
<p><a class="source-toggle" href="#"
|
186
|
-
onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
|
187
|
-
<div class="method-source-code" id="M000005-source">
|
188
|
-
<pre>
|
189
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
|
190
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
|
191
|
-
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:<<</span>) <span class="ruby-operator">&&</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
|
192
|
-
<span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
|
193
|
-
<span class="ruby-keyword kw">else</span>
|
194
|
-
<span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to << and included?'</span>
|
195
|
-
<span class="ruby-keyword kw">end</span>
|
196
|
-
<span class="ruby-keyword kw">end</span>
|
197
|
-
</pre>
|
198
|
-
</div>
|
199
178
|
</div>
|
200
179
|
</div>
|
201
180
|
|
202
|
-
<div id="method-
|
203
|
-
<a name="
|
181
|
+
<div id="method-M000028" class="method-detail">
|
182
|
+
<a name="M000028"></a>
|
204
183
|
|
205
184
|
<div class="method-heading">
|
206
|
-
<a href="
|
185
|
+
<a href="SpiderInstance.src/M000028.html" target="Code" class="method-signature"
|
186
|
+
onclick="popupCode('SpiderInstance.src/M000028.html');return false;">
|
207
187
|
<span class="method-name">clear_headers</span><span class="method-args">()</span>
|
208
188
|
</a>
|
209
189
|
</div>
|
210
190
|
|
211
191
|
<div class="method-description">
|
212
192
|
<p>
|
213
|
-
Reset the <a href="SpiderInstance.html#
|
193
|
+
Reset the <a href="SpiderInstance.html#M000027">headers</a> hash.
|
214
194
|
</p>
|
215
|
-
<p><a class="source-toggle" href="#"
|
216
|
-
onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
|
217
|
-
<div class="method-source-code" id="M000010-source">
|
218
|
-
<pre>
|
219
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 158</span>
|
220
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
|
221
|
-
<span class="ruby-ivar">@headers</span> = {}
|
222
|
-
<span class="ruby-keyword kw">end</span>
|
223
|
-
</pre>
|
224
|
-
</div>
|
225
195
|
</div>
|
226
196
|
</div>
|
227
197
|
|
228
|
-
<div id="method-
|
229
|
-
<a name="
|
198
|
+
<div id="method-M000027" class="method-detail">
|
199
|
+
<a name="M000027"></a>
|
230
200
|
|
231
201
|
<div class="method-heading">
|
232
|
-
<a href="
|
202
|
+
<a href="SpiderInstance.src/M000027.html" target="Code" class="method-signature"
|
203
|
+
onclick="popupCode('SpiderInstance.src/M000027.html');return false;">
|
233
204
|
<span class="method-name">headers</span><span class="method-args">()</span>
|
234
205
|
</a>
|
235
206
|
</div>
|
@@ -241,24 +212,15 @@ Use like a hash:
|
|
241
212
|
<pre>
|
242
213
|
headers['Cookies'] = 'user_id=1;password=btrross3'
|
243
214
|
</pre>
|
244
|
-
<p><a class="source-toggle" href="#"
|
245
|
-
onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
|
246
|
-
<div class="method-source-code" id="M000009-source">
|
247
|
-
<pre>
|
248
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 146</span>
|
249
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
|
250
|
-
<span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
|
251
|
-
<span class="ruby-keyword kw">end</span>
|
252
|
-
</pre>
|
253
|
-
</div>
|
254
215
|
</div>
|
255
216
|
</div>
|
256
217
|
|
257
|
-
<div id="method-
|
258
|
-
<a name="
|
218
|
+
<div id="method-M000024" class="method-detail">
|
219
|
+
<a name="M000024"></a>
|
259
220
|
|
260
221
|
<div class="method-heading">
|
261
|
-
<a href="
|
222
|
+
<a href="SpiderInstance.src/M000024.html" target="Code" class="method-signature"
|
223
|
+
onclick="popupCode('SpiderInstance.src/M000024.html');return false;">
|
262
224
|
<span class="method-name">on</span><span class="method-args">(code, p = nil, &block)</span>
|
263
225
|
</a>
|
264
226
|
</div>
|
@@ -290,30 +252,15 @@ For example:
|
|
290
252
|
puts "Given this code: #{resp.code}"
|
291
253
|
end
|
292
254
|
</pre>
|
293
|
-
<p><a class="source-toggle" href="#"
|
294
|
-
onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
|
295
|
-
<div class="method-source-code" id="M000006-source">
|
296
|
-
<pre>
|
297
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 121</span>
|
298
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
299
|
-
<span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
300
|
-
<span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
|
301
|
-
<span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
|
302
|
-
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
|
303
|
-
<span class="ruby-keyword kw">else</span>
|
304
|
-
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
|
305
|
-
<span class="ruby-keyword kw">end</span>
|
306
|
-
<span class="ruby-keyword kw">end</span>
|
307
|
-
</pre>
|
308
|
-
</div>
|
309
255
|
</div>
|
310
256
|
</div>
|
311
257
|
|
312
|
-
<div id="method-
|
313
|
-
<a name="
|
258
|
+
<div id="method-M000025" class="method-detail">
|
259
|
+
<a name="M000025"></a>
|
314
260
|
|
315
261
|
<div class="method-heading">
|
316
|
-
<a href="
|
262
|
+
<a href="SpiderInstance.src/M000025.html" target="Code" class="method-signature"
|
263
|
+
onclick="popupCode('SpiderInstance.src/M000025.html');return false;">
|
317
264
|
<span class="method-name">setup</span><span class="method-args">(p = nil, &block)</span>
|
318
265
|
</a>
|
319
266
|
</div>
|
@@ -327,24 +274,51 @@ Run before the HTTP request. Given the URL as a string.
|
|
327
274
|
headers['Cookies'] = 'user_id=1;admin=true'
|
328
275
|
end
|
329
276
|
</pre>
|
330
|
-
|
331
|
-
|
332
|
-
|
277
|
+
</div>
|
278
|
+
</div>
|
279
|
+
|
280
|
+
<div id="method-M000023" class="method-detail">
|
281
|
+
<a name="M000023"></a>
|
282
|
+
|
283
|
+
<div class="method-heading">
|
284
|
+
<a href="SpiderInstance.src/M000023.html" target="Code" class="method-signature"
|
285
|
+
onclick="popupCode('SpiderInstance.src/M000023.html');return false;">
|
286
|
+
<span class="method-name">store_next_urls_with</span><span class="method-args">(a_store)</span>
|
287
|
+
</a>
|
288
|
+
</div>
|
289
|
+
|
290
|
+
<div class="method-description">
|
291
|
+
<p>
|
292
|
+
The Web is a really, really, really big graph; as such, this list of nodes
|
293
|
+
to visit grows really, really, really big.
|
294
|
+
</p>
|
295
|
+
<p>
|
296
|
+
Change the object used to store nodes we have yet to walk. The default
|
297
|
+
object is an instance of Array. Available with <a
|
298
|
+
href="Spider.html">Spider</a> is a wrapper of AmazonSQS.
|
299
|
+
</p>
|
300
|
+
<p>
|
301
|
+
You can implement a custom class for this; any object passed to <a
|
302
|
+
href="SpiderInstance.html#M000022">check_already_seen_with</a> must
|
303
|
+
understand just push and pop .
|
304
|
+
</p>
|
333
305
|
<pre>
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
306
|
+
# default
|
307
|
+
store_next_urls_with Array.new
|
308
|
+
|
309
|
+
# AmazonSQS
|
310
|
+
require 'spider/next_urls_in_sqs'
|
311
|
+
store_next_urls_with NextUrlsInSQS.new(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, queue_name)
|
338
312
|
</pre>
|
339
|
-
</div>
|
340
313
|
</div>
|
341
314
|
</div>
|
342
315
|
|
343
|
-
<div id="method-
|
344
|
-
<a name="
|
316
|
+
<div id="method-M000026" class="method-detail">
|
317
|
+
<a name="M000026"></a>
|
345
318
|
|
346
319
|
<div class="method-heading">
|
347
|
-
<a href="
|
320
|
+
<a href="SpiderInstance.src/M000026.html" target="Code" class="method-signature"
|
321
|
+
onclick="popupCode('SpiderInstance.src/M000026.html');return false;">
|
348
322
|
<span class="method-name">teardown</span><span class="method-args">(p = nil, &block)</span>
|
349
323
|
</a>
|
350
324
|
</div>
|
@@ -353,16 +327,6 @@ Run before the HTTP request. Given the URL as a string.
|
|
353
327
|
<p>
|
354
328
|
Run last, once for each page. Given the URL as a string.
|
355
329
|
</p>
|
356
|
-
<p><a class="source-toggle" href="#"
|
357
|
-
onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
|
358
|
-
<div class="method-source-code" id="M000008-source">
|
359
|
-
<pre>
|
360
|
-
<span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 140</span>
|
361
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
362
|
-
<span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
363
|
-
<span class="ruby-keyword kw">end</span>
|
364
|
-
</pre>
|
365
|
-
</div>
|
366
330
|
</div>
|
367
331
|
</div>
|
368
332
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>add_url_check (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-ivar">@url_checks</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">block</span>
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>check_already_seen_with (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
|
15
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:<<</span>) <span class="ruby-operator">&&</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
|
16
|
+
<span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
|
17
|
+
<span class="ruby-keyword kw">else</span>
|
18
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to << and included?'</span>
|
19
|
+
<span class="ruby-keyword kw">end</span>
|
20
|
+
<span class="ruby-keyword kw">end</span></pre>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>store_next_urls_with (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 115</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">store_next_urls_with</span>(<span class="ruby-identifier">a_store</span>)
|
15
|
+
<span class="ruby-identifier">tmp_next_urls</span> = <span class="ruby-ivar">@next_urls</span>
|
16
|
+
<span class="ruby-ivar">@next_urls</span> = <span class="ruby-identifier">a_store</span>
|
17
|
+
<span class="ruby-identifier">tmp_next_urls</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">a_url_hash</span><span class="ruby-operator">|</span>
|
18
|
+
<span class="ruby-ivar">@next_urls</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">a_url_hash</span>
|
19
|
+
<span class="ruby-keyword kw">end</span>
|
20
|
+
<span class="ruby-keyword kw">end</span></pre>
|
21
|
+
</body>
|
22
|
+
</html>
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>on (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 145</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
16
|
+
<span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
|
17
|
+
<span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
|
18
|
+
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
|
19
|
+
<span class="ruby-keyword kw">else</span>
|
20
|
+
<span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
|
21
|
+
<span class="ruby-keyword kw">end</span>
|
22
|
+
<span class="ruby-keyword kw">end</span></pre>
|
23
|
+
</body>
|
24
|
+
</html>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>setup (SpiderInstance)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 159</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&</span><span class="ruby-identifier">block</span>)
|
15
|
+
<span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|