spider 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/CHANGES +6 -0
  2. data/README +3 -3
  3. data/doc/classes/BeStaticServerPages.html +197 -0
  4. data/doc/classes/BeStaticServerPages.src/M000030.html +19 -0
  5. data/doc/classes/BeStaticServerPages.src/M000031.html +19 -0
  6. data/doc/classes/BeStaticServerPages.src/M000032.html +18 -0
  7. data/doc/classes/BeStaticServerPages.src/M000033.html +18 -0
  8. data/doc/classes/IncludedInMemcached.html +18 -45
  9. data/doc/classes/IncludedInMemcached.src/M000015.html +18 -0
  10. data/doc/classes/IncludedInMemcached.src/M000016.html +18 -0
  11. data/doc/classes/IncludedInMemcached.src/M000017.html +18 -0
  12. data/doc/classes/LoopingServlet.html +137 -0
  13. data/doc/classes/LoopingServlet.src/M000037.html +23 -0
  14. data/doc/classes/NextUrlsInSQS.html +204 -0
  15. data/doc/classes/NextUrlsInSQS.src/M000018.html +19 -0
  16. data/doc/classes/NextUrlsInSQS.src/M000019.html +22 -0
  17. data/doc/classes/NextUrlsInSQS.src/M000020.html +19 -0
  18. data/doc/classes/QueryServlet.html +137 -0
  19. data/doc/classes/QueryServlet.src/M000038.html +19 -0
  20. data/doc/classes/RobotRules.html +175 -0
  21. data/doc/classes/RobotRules.src/M000034.html +19 -0
  22. data/doc/classes/RobotRules.src/M000035.html +67 -0
  23. data/doc/classes/RobotRules.src/M000036.html +24 -0
  24. data/doc/classes/Spider.html +5 -17
  25. data/doc/classes/Spider.src/M000029.html +21 -0
  26. data/doc/classes/SpiderInstance.html +72 -108
  27. data/doc/classes/SpiderInstance.src/M000021.html +18 -0
  28. data/doc/classes/SpiderInstance.src/M000022.html +22 -0
  29. data/doc/classes/SpiderInstance.src/M000023.html +22 -0
  30. data/doc/classes/SpiderInstance.src/M000024.html +24 -0
  31. data/doc/classes/SpiderInstance.src/M000025.html +18 -0
  32. data/doc/classes/SpiderInstance.src/M000026.html +18 -0
  33. data/doc/classes/SpiderInstance.src/M000027.html +18 -0
  34. data/doc/classes/SpiderInstance.src/M000028.html +18 -0
  35. data/doc/created.rid +1 -1
  36. data/doc/files/lib/spider/included_in_memcached_rb.html +29 -1
  37. data/doc/files/lib/spider/next_urls_in_sqs_rb.html +144 -0
  38. data/doc/files/lib/spider/robot_rules_rb.html +114 -0
  39. data/doc/files/lib/spider/spider_instance_rb.html +1 -2
  40. data/doc/files/lib/spider_rb.html +40 -9
  41. data/doc/files/spec/spec_helper_rb.html +196 -0
  42. data/doc/files/spec/spec_helper_rb.src/M000001.html +20 -0
  43. data/doc/files/spec/spec_helper_rb.src/M000002.html +26 -0
  44. data/doc/files/spec/spec_helper_rb.src/M000003.html +24 -0
  45. data/doc/files/spec/spec_helper_rb.src/M000004.html +18 -0
  46. data/doc/files/spec/spec_helper_rb.src/M000005.html +23 -0
  47. data/doc/files/spec/spider/included_in_memcached_spec_rb.html +142 -0
  48. data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +19 -0
  49. data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +18 -0
  50. data/doc/files/spec/spider/spider_instance_spec_rb.html +210 -0
  51. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +21 -0
  52. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +19 -0
  53. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +19 -0
  54. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +27 -0
  55. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +26 -0
  56. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +27 -0
  57. data/doc/files/spec/spider_spec_rb.html +127 -0
  58. data/doc/files/spec/spider_spec_rb.src/M000014.html +23 -0
  59. data/doc/fr_class_index.html +5 -0
  60. data/doc/fr_file_index.html +6 -1
  61. data/doc/fr_method_index.html +38 -11
  62. data/doc/index.html +1 -1
  63. data/lib/spider/spider_instance.rb +15 -7
  64. data/spider.gemspec +1 -1
  65. metadata +84 -22
  66. data/lib/test.rb +0 -27
@@ -93,7 +93,7 @@ links, and doing it all over again.
93
93
  <h3 class="section-bar">Methods</h3>
94
94
 
95
95
  <div class="name-list">
96
- <a href="#M000011">start_at</a>&nbsp;&nbsp;
96
+ <a href="#M000029">start_at</a>&nbsp;&nbsp;
97
97
  </div>
98
98
  </div>
99
99
 
@@ -115,11 +115,12 @@ links, and doing it all over again.
115
115
  <div id="methods">
116
116
  <h3 class="section-bar">Public Class methods</h3>
117
117
 
118
- <div id="method-M000011" class="method-detail">
119
- <a name="M000011"></a>
118
+ <div id="method-M000029" class="method-detail">
119
+ <a name="M000029"></a>
120
120
 
121
121
  <div class="method-heading">
122
- <a href="#M000011" class="method-signature">
122
+ <a href="Spider.src/M000029.html" target="Code" class="method-signature"
123
+ onclick="popupCode('Spider.src/M000029.html');return false;">
123
124
  <span class="method-name">start_at</span><span class="method-args">(a_url, &amp;block)</span>
124
125
  </a>
125
126
  </div>
@@ -151,19 +152,6 @@ handlers.
151
152
  end
152
153
  end
153
154
  </pre>
154
- <p><a class="source-toggle" href="#"
155
- onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
156
- <div class="method-source-code" id="M000011-source">
157
- <pre>
158
- <span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
159
- <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
160
- <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
161
- <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
162
- <span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
163
- <span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
164
- <span class="ruby-keyword kw">end</span>
165
- </pre>
166
- </div>
167
155
  </div>
168
156
  </div>
169
157
 
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>start_at (Spider)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider.rb, line 54</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">start_at</span>(<span class="ruby-identifier">a_url</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-identifier">rules</span> = <span class="ruby-constant">RobotRules</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'Ruby Spider 1.0'</span>)
16
+ <span class="ruby-identifier">a_spider</span> = <span class="ruby-constant">SpiderInstance</span>.<span class="ruby-identifier">new</span>({<span class="ruby-keyword kw">nil</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">a_url</span>}, [], <span class="ruby-identifier">rules</span>, [])
17
+ <span class="ruby-identifier">block</span>.<span class="ruby-identifier">call</span>(<span class="ruby-identifier">a_spider</span>)
18
+ <span class="ruby-identifier">a_spider</span>.<span class="ruby-identifier">start!</span>
19
+ <span class="ruby-keyword kw">end</span></pre>
20
+ </body>
21
+ </html>
@@ -86,13 +86,14 @@
86
86
  <h3 class="section-bar">Methods</h3>
87
87
 
88
88
  <div class="name-list">
89
- <a href="#M000004">add_url_check</a>&nbsp;&nbsp;
90
- <a href="#M000005">check_already_seen_with</a>&nbsp;&nbsp;
91
- <a href="#M000010">clear_headers</a>&nbsp;&nbsp;
92
- <a href="#M000009">headers</a>&nbsp;&nbsp;
93
- <a href="#M000006">on</a>&nbsp;&nbsp;
94
- <a href="#M000007">setup</a>&nbsp;&nbsp;
95
- <a href="#M000008">teardown</a>&nbsp;&nbsp;
89
+ <a href="#M000021">add_url_check</a>&nbsp;&nbsp;
90
+ <a href="#M000022">check_already_seen_with</a>&nbsp;&nbsp;
91
+ <a href="#M000028">clear_headers</a>&nbsp;&nbsp;
92
+ <a href="#M000027">headers</a>&nbsp;&nbsp;
93
+ <a href="#M000024">on</a>&nbsp;&nbsp;
94
+ <a href="#M000025">setup</a>&nbsp;&nbsp;
95
+ <a href="#M000023">store_next_urls_with</a>&nbsp;&nbsp;
96
+ <a href="#M000026">teardown</a>&nbsp;&nbsp;
96
97
  </div>
97
98
  </div>
98
99
 
@@ -114,11 +115,12 @@
114
115
  <div id="methods">
115
116
  <h3 class="section-bar">Public Instance methods</h3>
116
117
 
117
- <div id="method-M000004" class="method-detail">
118
- <a name="M000004"></a>
118
+ <div id="method-M000021" class="method-detail">
119
+ <a name="M000021"></a>
119
120
 
120
121
  <div class="method-heading">
121
- <a href="#M000004" class="method-signature">
122
+ <a href="SpiderInstance.src/M000021.html" target="Code" class="method-signature"
123
+ onclick="popupCode('SpiderInstance.src/M000021.html');return false;">
122
124
  <span class="method-name">add_url_check</span><span class="method-args">(&amp;block)</span>
123
125
  </a>
124
126
  </div>
@@ -136,24 +138,15 @@ href="http://mike-burns.com">mike-burns.com</a>&#8217;:
136
138
  <pre>
137
139
  add_url_check { |a_url| a_url =~ %r{^http://mike-burns.com.*}
138
140
  </pre>
139
- <p><a class="source-toggle" href="#"
140
- onclick="toggleCode('M000004-source');return false;">[Source]</a></p>
141
- <div class="method-source-code" id="M000004-source">
142
- <pre>
143
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
144
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
145
- <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
146
- <span class="ruby-keyword kw">end</span>
147
- </pre>
148
- </div>
149
141
  </div>
150
142
  </div>
151
143
 
152
- <div id="method-M000005" class="method-detail">
153
- <a name="M000005"></a>
144
+ <div id="method-M000022" class="method-detail">
145
+ <a name="M000022"></a>
154
146
 
155
147
  <div class="method-heading">
156
- <a href="#M000005" class="method-signature">
148
+ <a href="SpiderInstance.src/M000022.html" target="Code" class="method-signature"
149
+ onclick="popupCode('SpiderInstance.src/M000022.html');return false;">
157
150
  <span class="method-name">check_already_seen_with</span><span class="method-args">(cacher)</span>
158
151
  </a>
159
152
  </div>
@@ -171,7 +164,7 @@ href="Spider.html">Spider</a> is a wrapper of memcached.
171
164
  </p>
172
165
  <p>
173
166
  You can implement a custom class for this; any object passed to <a
174
- href="SpiderInstance.html#M000005">check_already_seen_with</a> must
167
+ href="SpiderInstance.html#M000022">check_already_seen_with</a> must
175
168
  understand just &lt;&lt; and included? .
176
169
  </p>
177
170
  <pre>
@@ -182,54 +175,32 @@ understand just &lt;&lt; and included? .
182
175
  require 'spider/included_in_memcached'
183
176
  check_already_seen_with IncludedInMemcached.new('localhost:11211')
184
177
  </pre>
185
- <p><a class="source-toggle" href="#"
186
- onclick="toggleCode('M000005-source');return false;">[Source]</a></p>
187
- <div class="method-source-code" id="M000005-source">
188
- <pre>
189
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
190
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
191
- <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:&lt;&lt;</span>) <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
192
- <span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
193
- <span class="ruby-keyword kw">else</span>
194
- <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to &lt;&lt; and included?'</span>
195
- <span class="ruby-keyword kw">end</span>
196
- <span class="ruby-keyword kw">end</span>
197
- </pre>
198
- </div>
199
178
  </div>
200
179
  </div>
201
180
 
202
- <div id="method-M000010" class="method-detail">
203
- <a name="M000010"></a>
181
+ <div id="method-M000028" class="method-detail">
182
+ <a name="M000028"></a>
204
183
 
205
184
  <div class="method-heading">
206
- <a href="#M000010" class="method-signature">
185
+ <a href="SpiderInstance.src/M000028.html" target="Code" class="method-signature"
186
+ onclick="popupCode('SpiderInstance.src/M000028.html');return false;">
207
187
  <span class="method-name">clear_headers</span><span class="method-args">()</span>
208
188
  </a>
209
189
  </div>
210
190
 
211
191
  <div class="method-description">
212
192
  <p>
213
- Reset the <a href="SpiderInstance.html#M000009">headers</a> hash.
193
+ Reset the <a href="SpiderInstance.html#M000027">headers</a> hash.
214
194
  </p>
215
- <p><a class="source-toggle" href="#"
216
- onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
217
- <div class="method-source-code" id="M000010-source">
218
- <pre>
219
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 158</span>
220
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
221
- <span class="ruby-ivar">@headers</span> = {}
222
- <span class="ruby-keyword kw">end</span>
223
- </pre>
224
- </div>
225
195
  </div>
226
196
  </div>
227
197
 
228
- <div id="method-M000009" class="method-detail">
229
- <a name="M000009"></a>
198
+ <div id="method-M000027" class="method-detail">
199
+ <a name="M000027"></a>
230
200
 
231
201
  <div class="method-heading">
232
- <a href="#M000009" class="method-signature">
202
+ <a href="SpiderInstance.src/M000027.html" target="Code" class="method-signature"
203
+ onclick="popupCode('SpiderInstance.src/M000027.html');return false;">
233
204
  <span class="method-name">headers</span><span class="method-args">()</span>
234
205
  </a>
235
206
  </div>
@@ -241,24 +212,15 @@ Use like a hash:
241
212
  <pre>
242
213
  headers['Cookies'] = 'user_id=1;password=btrross3'
243
214
  </pre>
244
- <p><a class="source-toggle" href="#"
245
- onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
246
- <div class="method-source-code" id="M000009-source">
247
- <pre>
248
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 146</span>
249
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
250
- <span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
251
- <span class="ruby-keyword kw">end</span>
252
- </pre>
253
- </div>
254
215
  </div>
255
216
  </div>
256
217
 
257
- <div id="method-M000006" class="method-detail">
258
- <a name="M000006"></a>
218
+ <div id="method-M000024" class="method-detail">
219
+ <a name="M000024"></a>
259
220
 
260
221
  <div class="method-heading">
261
- <a href="#M000006" class="method-signature">
222
+ <a href="SpiderInstance.src/M000024.html" target="Code" class="method-signature"
223
+ onclick="popupCode('SpiderInstance.src/M000024.html');return false;">
262
224
  <span class="method-name">on</span><span class="method-args">(code, p = nil, &amp;block)</span>
263
225
  </a>
264
226
  </div>
@@ -290,30 +252,15 @@ For example:
290
252
  puts &quot;Given this code: #{resp.code}&quot;
291
253
  end
292
254
  </pre>
293
- <p><a class="source-toggle" href="#"
294
- onclick="toggleCode('M000006-source');return false;">[Source]</a></p>
295
- <div class="method-source-code" id="M000006-source">
296
- <pre>
297
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 121</span>
298
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
299
- <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
300
- <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
301
- <span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
302
- <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
303
- <span class="ruby-keyword kw">else</span>
304
- <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
305
- <span class="ruby-keyword kw">end</span>
306
- <span class="ruby-keyword kw">end</span>
307
- </pre>
308
- </div>
309
255
  </div>
310
256
  </div>
311
257
 
312
- <div id="method-M000007" class="method-detail">
313
- <a name="M000007"></a>
258
+ <div id="method-M000025" class="method-detail">
259
+ <a name="M000025"></a>
314
260
 
315
261
  <div class="method-heading">
316
- <a href="#M000007" class="method-signature">
262
+ <a href="SpiderInstance.src/M000025.html" target="Code" class="method-signature"
263
+ onclick="popupCode('SpiderInstance.src/M000025.html');return false;">
317
264
  <span class="method-name">setup</span><span class="method-args">(p = nil, &amp;block)</span>
318
265
  </a>
319
266
  </div>
@@ -327,24 +274,51 @@ Run before the HTTP request. Given the URL as a string.
327
274
  headers['Cookies'] = 'user_id=1;admin=true'
328
275
  end
329
276
  </pre>
330
- <p><a class="source-toggle" href="#"
331
- onclick="toggleCode('M000007-source');return false;">[Source]</a></p>
332
- <div class="method-source-code" id="M000007-source">
277
+ </div>
278
+ </div>
279
+
280
+ <div id="method-M000023" class="method-detail">
281
+ <a name="M000023"></a>
282
+
283
+ <div class="method-heading">
284
+ <a href="SpiderInstance.src/M000023.html" target="Code" class="method-signature"
285
+ onclick="popupCode('SpiderInstance.src/M000023.html');return false;">
286
+ <span class="method-name">store_next_urls_with</span><span class="method-args">(a_store)</span>
287
+ </a>
288
+ </div>
289
+
290
+ <div class="method-description">
291
+ <p>
292
+ The Web is a really, really, really big graph; as such, this list of nodes
293
+ to visit grows really, really, really big.
294
+ </p>
295
+ <p>
296
+ Change the object used to store nodes we have yet to walk. The default
297
+ object is an instance of Array. Available with <a
298
+ href="Spider.html">Spider</a> is a wrapper of AmazonSQS.
299
+ </p>
300
+ <p>
301
+ You can implement a custom class for this; any object passed to <a
302
+ href="SpiderInstance.html#M000022">check_already_seen_with</a> must
303
+ understand just push and pop .
304
+ </p>
333
305
  <pre>
334
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 135</span>
335
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
336
- <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
337
- <span class="ruby-keyword kw">end</span>
306
+ # default
307
+ store_next_urls_with Array.new
308
+
309
+ # AmazonSQS
310
+ require 'spider/next_urls_in_sqs'
311
+ store_next_urls_with NextUrlsInSQS.new(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, queue_name)
338
312
  </pre>
339
- </div>
340
313
  </div>
341
314
  </div>
342
315
 
343
- <div id="method-M000008" class="method-detail">
344
- <a name="M000008"></a>
316
+ <div id="method-M000026" class="method-detail">
317
+ <a name="M000026"></a>
345
318
 
346
319
  <div class="method-heading">
347
- <a href="#M000008" class="method-signature">
320
+ <a href="SpiderInstance.src/M000026.html" target="Code" class="method-signature"
321
+ onclick="popupCode('SpiderInstance.src/M000026.html');return false;">
348
322
  <span class="method-name">teardown</span><span class="method-args">(p = nil, &amp;block)</span>
349
323
  </a>
350
324
  </div>
@@ -353,16 +327,6 @@ Run before the HTTP request. Given the URL as a string.
353
327
  <p>
354
328
  Run last, once for each page. Given the URL as a string.
355
329
  </p>
356
- <p><a class="source-toggle" href="#"
357
- onclick="toggleCode('M000008-source');return false;">[Source]</a></p>
358
- <div class="method-source-code" id="M000008-source">
359
- <pre>
360
- <span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 140</span>
361
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
362
- <span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
363
- <span class="ruby-keyword kw">end</span>
364
- </pre>
365
- </div>
366
330
  </div>
367
331
  </div>
368
332
 
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>add_url_check (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 70</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_url_check</span>(<span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-ivar">@url_checks</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">block</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>check_already_seen_with (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 91</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">check_already_seen_with</span>(<span class="ruby-identifier">cacher</span>)
15
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:&lt;&lt;</span>) <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">cacher</span>.<span class="ruby-identifier">respond_to?</span>(<span class="ruby-identifier">:include?</span>)
16
+ <span class="ruby-ivar">@seen</span> = <span class="ruby-identifier">cacher</span>
17
+ <span class="ruby-keyword kw">else</span>
18
+ <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">'expected something that responds to &lt;&lt; and included?'</span>
19
+ <span class="ruby-keyword kw">end</span>
20
+ <span class="ruby-keyword kw">end</span></pre>
21
+ </body>
22
+ </html>
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>store_next_urls_with (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 115</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">store_next_urls_with</span>(<span class="ruby-identifier">a_store</span>)
15
+ <span class="ruby-identifier">tmp_next_urls</span> = <span class="ruby-ivar">@next_urls</span>
16
+ <span class="ruby-ivar">@next_urls</span> = <span class="ruby-identifier">a_store</span>
17
+ <span class="ruby-identifier">tmp_next_urls</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">a_url_hash</span><span class="ruby-operator">|</span>
18
+ <span class="ruby-ivar">@next_urls</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">a_url_hash</span>
19
+ <span class="ruby-keyword kw">end</span>
20
+ <span class="ruby-keyword kw">end</span></pre>
21
+ </body>
22
+ </html>
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>on (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 145</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">on</span>(<span class="ruby-identifier">code</span>, <span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-identifier">f</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
16
+ <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">code</span>
17
+ <span class="ruby-keyword kw">when</span> <span class="ruby-constant">Fixnum</span>
18
+ <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>] = <span class="ruby-identifier">f</span>
19
+ <span class="ruby-keyword kw">else</span>
20
+ <span class="ruby-ivar">@callbacks</span>[<span class="ruby-identifier">code</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">f</span>
21
+ <span class="ruby-keyword kw">end</span>
22
+ <span class="ruby-keyword kw">end</span></pre>
23
+ </body>
24
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>setup (SpiderInstance)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 159</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
+ <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>