spider 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/CHANGES +6 -0
  2. data/README +3 -3
  3. data/doc/classes/BeStaticServerPages.html +197 -0
  4. data/doc/classes/BeStaticServerPages.src/M000030.html +19 -0
  5. data/doc/classes/BeStaticServerPages.src/M000031.html +19 -0
  6. data/doc/classes/BeStaticServerPages.src/M000032.html +18 -0
  7. data/doc/classes/BeStaticServerPages.src/M000033.html +18 -0
  8. data/doc/classes/IncludedInMemcached.html +18 -45
  9. data/doc/classes/IncludedInMemcached.src/M000015.html +18 -0
  10. data/doc/classes/IncludedInMemcached.src/M000016.html +18 -0
  11. data/doc/classes/IncludedInMemcached.src/M000017.html +18 -0
  12. data/doc/classes/LoopingServlet.html +137 -0
  13. data/doc/classes/LoopingServlet.src/M000037.html +23 -0
  14. data/doc/classes/NextUrlsInSQS.html +204 -0
  15. data/doc/classes/NextUrlsInSQS.src/M000018.html +19 -0
  16. data/doc/classes/NextUrlsInSQS.src/M000019.html +22 -0
  17. data/doc/classes/NextUrlsInSQS.src/M000020.html +19 -0
  18. data/doc/classes/QueryServlet.html +137 -0
  19. data/doc/classes/QueryServlet.src/M000038.html +19 -0
  20. data/doc/classes/RobotRules.html +175 -0
  21. data/doc/classes/RobotRules.src/M000034.html +19 -0
  22. data/doc/classes/RobotRules.src/M000035.html +67 -0
  23. data/doc/classes/RobotRules.src/M000036.html +24 -0
  24. data/doc/classes/Spider.html +5 -17
  25. data/doc/classes/Spider.src/M000029.html +21 -0
  26. data/doc/classes/SpiderInstance.html +72 -108
  27. data/doc/classes/SpiderInstance.src/M000021.html +18 -0
  28. data/doc/classes/SpiderInstance.src/M000022.html +22 -0
  29. data/doc/classes/SpiderInstance.src/M000023.html +22 -0
  30. data/doc/classes/SpiderInstance.src/M000024.html +24 -0
  31. data/doc/classes/SpiderInstance.src/M000025.html +18 -0
  32. data/doc/classes/SpiderInstance.src/M000026.html +18 -0
  33. data/doc/classes/SpiderInstance.src/M000027.html +18 -0
  34. data/doc/classes/SpiderInstance.src/M000028.html +18 -0
  35. data/doc/created.rid +1 -1
  36. data/doc/files/lib/spider/included_in_memcached_rb.html +29 -1
  37. data/doc/files/lib/spider/next_urls_in_sqs_rb.html +144 -0
  38. data/doc/files/lib/spider/robot_rules_rb.html +114 -0
  39. data/doc/files/lib/spider/spider_instance_rb.html +1 -2
  40. data/doc/files/lib/spider_rb.html +40 -9
  41. data/doc/files/spec/spec_helper_rb.html +196 -0
  42. data/doc/files/spec/spec_helper_rb.src/M000001.html +20 -0
  43. data/doc/files/spec/spec_helper_rb.src/M000002.html +26 -0
  44. data/doc/files/spec/spec_helper_rb.src/M000003.html +24 -0
  45. data/doc/files/spec/spec_helper_rb.src/M000004.html +18 -0
  46. data/doc/files/spec/spec_helper_rb.src/M000005.html +23 -0
  47. data/doc/files/spec/spider/included_in_memcached_spec_rb.html +142 -0
  48. data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +19 -0
  49. data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +18 -0
  50. data/doc/files/spec/spider/spider_instance_spec_rb.html +210 -0
  51. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +21 -0
  52. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +19 -0
  53. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +19 -0
  54. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +27 -0
  55. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +26 -0
  56. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +27 -0
  57. data/doc/files/spec/spider_spec_rb.html +127 -0
  58. data/doc/files/spec/spider_spec_rb.src/M000014.html +23 -0
  59. data/doc/fr_class_index.html +5 -0
  60. data/doc/fr_file_index.html +6 -1
  61. data/doc/fr_method_index.html +38 -11
  62. data/doc/index.html +1 -1
  63. data/lib/spider/spider_instance.rb +15 -7
  64. data/spider.gemspec +1 -1
  65. metadata +84 -22
  66. data/lib/test.rb +0 -27
@@ -93,7 +93,7 @@ links, and doing it all over again.
93
93
  <h3 class="section-bar">Methods</h3>
94
94
 
95
95
  <div class="name-list">
96
- <a href="#M000011">start_at</a>&nbsp;&nbsp;
96
+ <a href="#M000029">start_at</a>&nbsp;&nbsp;
97
97
  </div>
98
98
  </div>
99
99
 
@@ -115,11 +115,12 @@ links, and doing it all over again.
115
115
  <div id="methods">
116
116
  <h3 class="section-bar">Public Class methods</h3>
117
117
 
118
- <div id="method-M000011" class="method-detail">
119
- <a name="M000011"></a>
118
+ <div id="method-M000029" class="method-detail">
119
+ <a name="M000029"></a>
120
120
 
121
121
  <div class="method-heading">
122
- <a href="#M000011" class="method-signature">
122
+ <a href="Spider.src/M000029.html" target="Code" class="method-signature"
123
+ onclick="popupCode('Spider.src/M000029.html');return false;">
123
124
  <span class="method-name">start_at</span><span class="method-args">(a_url, &amp;block)</span>
124
125
  </a>
125
126
  </div>
@@ -151,19 +152,6 @@ handlers.
151
152
  end
152
153
  end
153
154
  </pre>
154
- <p><a class="source-toggle" href="#"
155
- onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
156
- <div class="method-source-code" id="M000011-source">
157
- <pre>
158
- <span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
159
- <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
160
- <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
161
- <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
162
- <span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
163
- <span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
164
- <span class="ruby-keyword kw">end</span>
165
- </pre>
166
- </div>
167
155
  </div>
168
156
  </div>
169
157
 
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>start_at (Spider)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
16
+ <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
17
+ <span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
18
+ <span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
19
+ <span class="ruby-keyword kw">end</span></pre>
20
+ </body>
21
+ </html>
@@ -86,13 +86,14 @@
86
86
  <h3 class="section-bar">Methods</h3>
87
87
 
88
88
  <div class="name-list">
89
- <a href="#M000004">add_url_check</a>&nbsp;&nbsp;
90
- <a href="#M000005">check_already_seen_with</a>&nbsp;&nbsp;
91
- <a href="#M000010">clear_headers</a>&nbsp;&nbsp;
92
- <a href="#M000009">headers</a>&nbsp;&nbsp;
93
- <a href="#M000006">on</a>&nbsp;&nbsp;
94
- <a href="#M000007">setup</a>&nbsp;&nbsp;
95
- <a href="#M000008">teardown</a>&nbsp;&nbsp;
89
+ <a href="#M000021">add_url_check</a>&nbsp;&nbsp;
90
+ <a href="#M000022">check_already_seen_with</a>&nbsp;&nbsp;
91
+ <a href="#M000028">clear_headers</a>&nbsp;&nbsp;
92
+ <a href="#M000027">headers</a>&nbsp;&nbsp;
93
+ <a href="#M000024">on</a>&nbsp;&nbsp;
94
+ <a href="#M000025">setup</a>&nbsp;&nbsp;
95
+ <a href="#M000023">store_next_urls_with</a>&nbsp;&nbsp;
96
+ <a href="#M000026">teardown</a>&nbsp;&nbsp;
96
97
  </div>
97
98
  </div>
98
99
 
@@ -114,11 +115,12 @@
114
115
  <div id="methods">
115
116
  <h3 class="section-bar">Public Instance methods</h3>
116
117
 
117
- <div id="method-M000004" class="method-detail">
118
- <a name="M000004"></a>
118
+ <div id="method-M000021" class="method-detail">
119
+ <a name="M000021"></a>
119
120
 
120
121
  <div class="method-heading">
121
- <a href="#M000004" class="method-signature">
122
+ <a href="SpiderInstance.src/M000021.html" target="Code" class="method-signature"
123
+ onclick="popupCode('SpiderInstance.src/M000021.html');return false;">
122
124
  <span class="method-name">add_url_check</span><span class="method-args">(&amp;block)</span>
123
125
  </a>
124
126
  </div>
@@ -136,24 +138,15 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
136
138
  <pre>
137
139
  add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
138
140
  </pre>
139
- <p><a class="source-toggle" href="#"
140
- onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
141
- <div class="method-source-code" id="M000004-source">
142
- <pre>
143
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
144
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
145
- <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
146
- <span class="ruby-keyword kw">end</span>
147
- </pre>
148
- </div>
149
141
  </div>
150
142
  </div>
151
143
 
152
- <div id="method-M000005" class="method-detail">
153
- <a name="M000005"></a>
144
+ <div id="method-M000022" class="method-detail">
145
+ <a name="M000022"></a>
154
146
 
155
147
  <div class="method-heading">
156
- <a href="#M000005" class="method-signature">
148
+ <a href="SpiderInstance.src/M000022.html" target="Code" class="method-signature"
149
+ onclick="popupCode('SpiderInstance.src/M000022.html');return false;">
157
150
  <span class="method-name">check_already_seen_with</span><span class="method-args">(cacher)</span>
158
151
  </a>
159
152
  </div>
@@ -171,7 +164,7 @@ href="Spider.html">Spider</a> is a wrapper of memcached.
171
164
  </p>
172
165
  <p>
173
166
  You can implement a custom class for this; any object passed to <a
174
- href="SpiderInstance.html#M000005">check_already_seen_with</a> must
167
+ href="SpiderInstance.html#M000022">check_already_seen_with</a> must
175
168
  understand just &lt;&lt; and included? .
176
169
  </p>
177
170
  <pre>
@@ -182,54 +175,32 @@ understand just &lt;&lt; and included? .
182
175
  require 'spider/included_in_memcached'
183
176
  check_already_seen_with IncludedInMemcached.new('localhost:11211')
184
177
  </pre>
185
- <p><a class="source-toggle" href="#"
186
- onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
187
- <div class="method-source-code" id="M000005-source">
188
- <pre>
189
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
190
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
191
- <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:&lt;&lt;</span>) <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
192
- <span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
193
- <span class="ruby-keyword kw">else</span>
194
- <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to &lt;&lt; and included?'</span>
195
- <span class="ruby-keyword kw">end</span>
196
- <span class="ruby-keyword kw">end</span>
197
- </pre>
198
- </div>
199
178
  </div>
200
179
  </div>
201
180
 
202
- <div id="method-M000010" class="method-detail">
203
- <a name="M000010"></a>
181
+ <div id="method-M000028" class="method-detail">
182
+ <a name="M000028"></a>
204
183
 
205
184
  <div class="method-heading">
206
- <a href="#M000010" class="method-signature">
185
+ <a href="SpiderInstance.src/M000028.html" target="Code" class="method-signature"
186
+ onclick="popupCode('SpiderInstance.src/M000028.html');return false;">
207
187
  <span class="method-name">clear_headers</span><span class="method-args">()</span>
208
188
  </a>
209
189
  </div>
210
190
 
211
191
  <div class="method-description">
212
192
  <p>
213
- Reset the <a href="SpiderInstance.html#M000009">headers</a> hash.
193
+ Reset the <a href="SpiderInstance.html#M000027">headers</a> hash.
214
194
  </p>
215
- <p><a class="source-toggle" href="#"
216
- onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
217
- <div class="method-source-code" id="M000010-source">
218
- <pre>
219
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 158</span>
220
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
221
- <span class="ruby-ivar">@headers</span> = {}
222
- <span class="ruby-keyword kw">end</span>
223
- </pre>
224
- </div>
225
195
  </div>
226
196
  </div>
227
197
 
228
- <div id="method-M000009" class="method-detail">
229
- <a name="M000009"></a>
198
+ <div id="method-M000027" class="method-detail">
199
+ <a name="M000027"></a>
230
200
 
231
201
  <div class="method-heading">
232
- <a href="#M000009" class="method-signature">
202
+ <a href="SpiderInstance.src/M000027.html" target="Code" class="method-signature"
203
+ onclick="popupCode('SpiderInstance.src/M000027.html');return false;">
233
204
  <span class="method-name">headers</span><span class="method-args">()</span>
234
205
  </a>
235
206
  </div>
@@ -241,24 +212,15 @@ Use like a hash:
241
212
  <pre>
242
213
  headers['Cookies'] = 'user_id=1;password=btrross3'
243
214
  </pre>
244
- <p><a class="source-toggle" href="#"
245
- onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
246
- <div class="method-source-code" id="M000009-source">
247
- <pre>
248
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 146</span>
249
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
250
- <span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
251
- <span class="ruby-keyword kw">end</span>
252
- </pre>
253
- </div>
254
215
  </div>
255
216
  </div>
256
217
 
257
- <div id="method-M000006" class="method-detail">
258
- <a name="M000006"></a>
218
+ <div id="method-M000024" class="method-detail">
219
+ <a name="M000024"></a>
259
220
 
260
221
  <div class="method-heading">
261
- <a href="#M000006" class="method-signature">
222
+ <a href="SpiderInstance.src/M000024.html" target="Code" class="method-signature"
223
+ onclick="popupCode('SpiderInstance.src/M000024.html');return false;">
262
224
  <span class="method-name">on</span><span class="method-args">(code, p = nil, &amp;block)</span>
263
225
  </a>
264
226
  </div>
@@ -290,30 +252,15 @@ For example:
290
252
  puts &quot;Given this code: #{resp.code}&quot;
291
253
  end
292
254
  </pre>
293
- <p><a class="source-toggle" href="#"
294
- onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
295
- <div class="method-source-code" id="M000006-source">
296
- <pre>
297
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 121</span>
298
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
299
- <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
300
- <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
301
- <span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
302
- <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
303
- <span class="ruby-keyword kw">else</span>
304
- <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
305
- <span class="ruby-keyword kw">end</span>
306
- <span class="ruby-keyword kw">end</span>
307
- </pre>
308
- </div>
309
255
  </div>
310
256
  </div>
311
257
 
312
- <div id="method-M000007" class="method-detail">
313
- <a name="M000007"></a>
258
+ <div id="method-M000025" class="method-detail">
259
+ <a name="M000025"></a>
314
260
 
315
261
  <div class="method-heading">
316
- <a href="#M000007" class="method-signature">
262
+ <a href="SpiderInstance.src/M000025.html" target="Code" class="method-signature"
263
+ onclick="popupCode('SpiderInstance.src/M000025.html');return false;">
317
264
  <span class="method-name">setup</span><span class="method-args">(p = nil, &amp;block)</span>
318
265
  </a>
319
266
  </div>
@@ -327,24 +274,51 @@ Run before the HTTP request. Given the URL as a string.
327
274
  headers['Cookies'] = 'user_id=1;admin=true'
328
275
  end
329
276
  </pre>
330
- <p><a class="source-toggle" href="#"
331
- onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
332
- <div class="method-source-code" id="M000007-source">
277
+ </div>
278
+ </div>
279
+
280
+ <div id="method-M000023" class="method-detail">
281
+ <a name="M000023"></a>
282
+
283
+ <div class="method-heading">
284
+ <a href="SpiderInstance.src/M000023.html" target="Code" class="method-signature"
285
+ onclick="popupCode('SpiderInstance.src/M000023.html');return false;">
286
+ <span class="method-name">store_next_urls_with</span><span class="method-args">(a_store)</span>
287
+ </a>
288
+ </div>
289
+
290
+ <div class="method-description">
291
+ <p>
292
+ The Web is a really, really, really big graph; as such, this list of nodes
293
+ to visit grows really, really, really big.
294
+ </p>
295
+ <p>
296
+ Change the object used to store nodes we have yet to walk. The default
297
+ object is an instance of Array. Available with <a
298
+ href="Spider.html">Spider</a> is a wrapper of AmazonSQS.
299
+ </p>
300
+ <p>
301
+ You can implement a custom class for this; any object passed to <a
302
+ href="SpiderInstance.html#M000022">check_already_seen_with</a> must
303
+ understand just push and pop .
304
+ </p>
333
305
  <pre>
334
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 135</span>
335
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
336
- <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
337
- <span class="ruby-keyword kw">end</span>
306
+ # default
307
+ store_next_urls_with Array.new
308
+
309
+ # AmazonSQS
310
+ require 'spider/next_urls_in_sqs'
311
+ store_next_urls_with NextUrlsInSQS.new(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, queue_name)
338
312
  </pre>
339
- </div>
340
313
  </div>
341
314
  </div>
342
315
 
343
- <div id="method-M000008" class="method-detail">
344
- <a name="M000008"></a>
316
+ <div id="method-M000026" class="method-detail">
317
+ <a name="M000026"></a>
345
318
 
346
319
  <div class="method-heading">
347
- <a href="#M000008" class="method-signature">
320
+ <a href="SpiderInstance.src/M000026.html" target="Code" class="method-signature"
321
+ onclick="popupCode('SpiderInstance.src/M000026.html');return false;">
348
322
  <span class="method-name">teardown</span><span class="method-args">(p = nil, &amp;block)</span>
349
323
  </a>
350
324
  </div>
@@ -353,16 +327,6 @@ Run before the HTTP request. Given the URL as a string.
353
327
  <p>
354
328
  Run last, once for each page. Given the URL as a string.
355
329
  </p>
356
- <p><a class="source-toggle" href="#"
357
- onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
358
- <div class="method-source-code" id="M000008-source">
359
- <pre>
360
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 140</span>
361
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
362
- <span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
363
- <span class="ruby-keyword kw">end</span>
364
- </pre>
365
- </div>
366
330
  </div>
367
331
  </div>
368
332
 
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>add_url_check (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>check_already_seen_with (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
15
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:&lt;&lt;</span>) <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
16
+ <span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
17
+ <span class="ruby-keyword kw">else</span>
18
+ <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to &lt;&lt; and included?'</span>
19
+ <span class="ruby-keyword kw">end</span>
20
+ <span class="ruby-keyword kw">end</span></pre>
21
+ </body>
22
+ </html>
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>store_next_urls_with (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 115</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">store_next_urls_with</span>(<span class="ruby-identifier">a_store</span>)
15
+ <span class="ruby-identifier">tmp_next_urls</span> = <span class="ruby-ivar">@next_urls</span>
16
+ <span class="ruby-ivar">@next_urls</span> = <span class="ruby-identifier">a_store</span>
17
+ <span class="ruby-identifier">tmp_next_urls</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">a_url_hash</span><span class="ruby-operator">|</span>
18
+ <span class="ruby-ivar">@next_urls</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">a_url_hash</span>
19
+ <span class="ruby-keyword kw">end</span>
20
+ <span class="ruby-keyword kw">end</span></pre>
21
+ </body>
22
+ </html>
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>on (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 145</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
16
+ <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
17
+ <span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
18
+ <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
19
+ <span class="ruby-keyword kw">else</span>
20
+ <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
21
+ <span class="ruby-keyword kw">end</span>
22
+ <span class="ruby-keyword kw">end</span></pre>
23
+ </body>
24
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>setup (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 159</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>