spider 0.4.4 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/AUTHORS +12 -0
  3. data/CHANGES +6 -0
  4. data/LICENSE +21 -0
  5. data/{README → README.md} +50 -43
  6. data/lib/spider.rb +12 -29
  7. data/lib/spider/included_in_memcached.rb +1 -24
  8. data/lib/spider/next_urls_in_sqs.rb +6 -29
  9. data/lib/spider/robot_rules.rb +61 -57
  10. data/lib/spider/spider_instance.rb +8 -31
  11. data/spider.gemspec +4 -2
  12. metadata +33 -124
  13. data/doc/classes/BeStaticServerPages.html +0 -197
  14. data/doc/classes/BeStaticServerPages.src/M000030.html +0 -19
  15. data/doc/classes/BeStaticServerPages.src/M000031.html +0 -19
  16. data/doc/classes/BeStaticServerPages.src/M000032.html +0 -18
  17. data/doc/classes/BeStaticServerPages.src/M000033.html +0 -18
  18. data/doc/classes/IncludedInMemcached.html +0 -199
  19. data/doc/classes/IncludedInMemcached.src/M000015.html +0 -18
  20. data/doc/classes/IncludedInMemcached.src/M000016.html +0 -18
  21. data/doc/classes/IncludedInMemcached.src/M000017.html +0 -18
  22. data/doc/classes/LoopingServlet.html +0 -137
  23. data/doc/classes/LoopingServlet.src/M000037.html +0 -23
  24. data/doc/classes/NextUrlsInSQS.html +0 -204
  25. data/doc/classes/NextUrlsInSQS.src/M000018.html +0 -19
  26. data/doc/classes/NextUrlsInSQS.src/M000019.html +0 -22
  27. data/doc/classes/NextUrlsInSQS.src/M000020.html +0 -19
  28. data/doc/classes/QueryServlet.html +0 -137
  29. data/doc/classes/QueryServlet.src/M000038.html +0 -19
  30. data/doc/classes/RobotRules.html +0 -175
  31. data/doc/classes/RobotRules.src/M000034.html +0 -19
  32. data/doc/classes/RobotRules.src/M000035.html +0 -67
  33. data/doc/classes/RobotRules.src/M000036.html +0 -24
  34. data/doc/classes/Spider.html +0 -170
  35. data/doc/classes/Spider.src/M000029.html +0 -21
  36. data/doc/classes/SpiderInstance.html +0 -345
  37. data/doc/classes/SpiderInstance.src/M000021.html +0 -18
  38. data/doc/classes/SpiderInstance.src/M000022.html +0 -22
  39. data/doc/classes/SpiderInstance.src/M000023.html +0 -22
  40. data/doc/classes/SpiderInstance.src/M000024.html +0 -24
  41. data/doc/classes/SpiderInstance.src/M000025.html +0 -18
  42. data/doc/classes/SpiderInstance.src/M000026.html +0 -18
  43. data/doc/classes/SpiderInstance.src/M000027.html +0 -18
  44. data/doc/classes/SpiderInstance.src/M000028.html +0 -18
  45. data/doc/created.rid +0 -1
  46. data/doc/files/README.html +0 -223
  47. data/doc/files/lib/spider/included_in_memcached_rb.html +0 -142
  48. data/doc/files/lib/spider/next_urls_in_sqs_rb.html +0 -144
  49. data/doc/files/lib/spider/robot_rules_rb.html +0 -114
  50. data/doc/files/lib/spider/spider_instance_rb.html +0 -117
  51. data/doc/files/lib/spider_rb.html +0 -254
  52. data/doc/files/spec/spec_helper_rb.html +0 -196
  53. data/doc/files/spec/spec_helper_rb.src/M000001.html +0 -20
  54. data/doc/files/spec/spec_helper_rb.src/M000002.html +0 -26
  55. data/doc/files/spec/spec_helper_rb.src/M000003.html +0 -24
  56. data/doc/files/spec/spec_helper_rb.src/M000004.html +0 -18
  57. data/doc/files/spec/spec_helper_rb.src/M000005.html +0 -23
  58. data/doc/files/spec/spider/included_in_memcached_spec_rb.html +0 -142
  59. data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +0 -19
  60. data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +0 -18
  61. data/doc/files/spec/spider/spider_instance_spec_rb.html +0 -210
  62. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +0 -21
  63. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +0 -19
  64. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +0 -19
  65. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +0 -27
  66. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +0 -26
  67. data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +0 -27
  68. data/doc/files/spec/spider_spec_rb.html +0 -127
  69. data/doc/files/spec/spider_spec_rb.src/M000014.html +0 -23
  70. data/doc/fr_class_index.html +0 -34
  71. data/doc/fr_file_index.html +0 -35
  72. data/doc/fr_method_index.html +0 -64
  73. data/doc/index.html +0 -24
  74. data/doc/rdoc-style.css +0 -208
@@ -1,18 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>setup (SpiderInstance)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 159</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">setup</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
- <span class="ruby-ivar">@setup</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
16
- <span class="ruby-keyword kw">end</span></pre>
17
- </body>
18
- </html>
@@ -1,18 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>teardown (SpiderInstance)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 164</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">teardown</span>(<span class="ruby-identifier">p</span> = <span class="ruby-keyword kw">nil</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span>)
15
- <span class="ruby-ivar">@teardown</span> = <span class="ruby-identifier">p</span> <span class="ruby-value">? </span><span class="ruby-identifier">p</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">block</span>
16
- <span class="ruby-keyword kw">end</span></pre>
17
- </body>
18
- </html>
@@ -1,18 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>headers (SpiderInstance)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 170</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">headers</span>
15
- <span class="ruby-constant">HeaderSetter</span>.<span class="ruby-identifier">new</span>(<span class="ruby-keyword kw">self</span>)
16
- <span class="ruby-keyword kw">end</span></pre>
17
- </body>
18
- </html>
@@ -1,18 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>clear_headers (SpiderInstance)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/spider/spider_instance.rb, line 182</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">clear_headers</span>
15
- <span class="ruby-ivar">@headers</span> = {}
16
- <span class="ruby-keyword kw">end</span></pre>
17
- </body>
18
- </html>
@@ -1 +0,0 @@
1
- Thu, 21 May 2009 15:42:01 +0000
@@ -1,223 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>File: README</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="fileHeader">
50
- <h1>README</h1>
51
- <table class="header-table">
52
- <tr class="top-aligned-row">
53
- <td><strong>Path:</strong></td>
54
- <td>README
55
- </td>
56
- </tr>
57
- <tr class="top-aligned-row">
58
- <td><strong>Last Update:</strong></td>
59
- <td>Thu Nov 08 17:51:17 -0500 2007</td>
60
- </tr>
61
- </table>
62
- </div>
63
- <!-- banner header -->
64
-
65
- <div id="bodyContent">
66
-
67
-
68
-
69
- <div id="contextContent">
70
-
71
- <div id="description">
72
- <p>
73
- <a href="../classes/Spider.html">Spider</a>, a Web spidering library for
74
- Ruby. It handles the robots.txt, scraping, collecting, and looping so that
75
- you can just handle the data.
76
- </p>
77
- <h2>Examples</h2>
78
- <h3>Crawl the Web, loading each page in turn, until you run out of memory</h3>
79
- <pre>
80
- require 'spider'
81
- Spider.start_at('http://mike-burns.com/') {}
82
- </pre>
83
- <h3>To handle erroneous responses</h3>
84
- <pre>
85
- require 'spider'
86
- Spider.start_at('http://mike-burns.com/') do |s|
87
- s.on :failure do |a_url, resp, prior_url|
88
- puts &quot;URL failed: #{a_url}&quot;
89
- puts &quot; linked from #{prior_url}&quot;
90
- end
91
- end
92
- </pre>
93
- <h3>Or handle successful responses</h3>
94
- <pre>
95
- require 'spider'
96
- Spider.start_at('http://mike-burns.com/') do |s|
97
- s.on :success do |a_url, resp, prior_url|
98
- puts &quot;#{a_url}: #{resp.code}&quot;
99
- puts resp.body
100
- puts
101
- end
102
- end
103
- </pre>
104
- <h3>Limit to just one domain</h3>
105
- <pre>
106
- require 'spider'
107
- Spider.start_at('http://mike-burns.com/') do |s|
108
- s.add_url_check do |a_url|
109
- a_url =~ %r{^http://mike-burns.com.*}
110
- end
111
- end
112
- </pre>
113
- <h3>Pass headers to some requests</h3>
114
- <pre>
115
- require 'spider'
116
- Spider.start_at('http://mike-burns.com/') do |s|
117
- s.setup do |a_url|
118
- if a_url =~ %r{^http://.*wikipedia.*}
119
- headers['User-Agent'] = &quot;Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)&quot;
120
- end
121
- end
122
- end
123
- </pre>
124
- <h3>Use memcached to track cycles</h3>
125
- <pre>
126
- require 'spider'
127
- require 'spider/included_in_memcached'
128
- SERVERS = ['10.0.10.2:11211','10.0.10.3:11211','10.0.10.4:11211']
129
- Spider.start_at('http://mike-burns.com/') do |s|
130
- s.check_already_seen_with IncludedInMemcached.new(SERVERS)
131
- end
132
- </pre>
133
- <h3>Track cycles with a custom object</h3>
134
- <pre>
135
- require 'spider'
136
-
137
- class ExpireLinks &lt; Hash
138
- def &lt;&lt;(v)
139
- [v] = Time.now
140
- end
141
- def include?(v)
142
- [v] &amp;&amp; (Time.now + 86400) &lt;= [v]
143
- end
144
- end
145
-
146
- Spider.start_at('http://mike-burns.com/') do |s|
147
- s.check_already_seen_with ExpireLinks.new
148
- end
149
- </pre>
150
- <h3>Create a URL graph</h3>
151
- <pre>
152
- require 'spider'
153
- nodes = {}
154
- Spider.start_at('http://mike-burns.com/') do |s|
155
- s.add_url_check {|a_url| a_url =~ %r{^http://mike-burns.com.*} }
156
-
157
- s.on(:every) do |a_url, resp, prior_url|
158
- nodes[prior_url] ||= []
159
- nodes[prior_url] &lt;&lt; a_url
160
- end
161
- end
162
- </pre>
163
- <h3>Use a proxy</h3>
164
- <pre>
165
- require 'net/http_configuration'
166
- require 'spider'
167
- http_conf = Net::HTTP::Configuration.new(:proxy_host =&gt; '7proxies.org',
168
- :proxy_port =&gt; 8881)
169
- http_conf.apply do
170
- Spider.start_at('http://img.4chan.org/b/') do |s|
171
- s.on(:success) do |a_url, resp, prior_url|
172
- File.open(a_url.gsub('/',':'),'w') do |f|
173
- f.write(resp.body)
174
- end
175
- end
176
- end
177
- end
178
- </pre>
179
- <h2>Author</h2>
180
- <p>
181
- Mike Burns <a href="http://mike-burns.com">mike-burns.com</a>
182
- mike@mike-burns.com
183
- </p>
184
- <p>
185
- Help from Matt Horan, John Nagro, and Henri Cook.
186
- </p>
187
- <p>
188
- With `robot_rules&#8217; from James Edward Gray II via <a
189
- href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589">blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589</a>
190
- </p>
191
-
192
- </div>
193
-
194
-
195
- </div>
196
-
197
-
198
- </div>
199
-
200
-
201
- <!-- if includes -->
202
-
203
- <div id="section">
204
-
205
-
206
-
207
-
208
-
209
-
210
-
211
-
212
- <!-- if method_list -->
213
-
214
-
215
- </div>
216
-
217
-
218
- <div id="validator-badges">
219
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
220
- </div>
221
-
222
- </body>
223
- </html>
@@ -1,142 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>File: included_in_memcached.rb</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="fileHeader">
50
- <h1>included_in_memcached.rb</h1>
51
- <table class="header-table">
52
- <tr class="top-aligned-row">
53
- <td><strong>Path:</strong></td>
54
- <td>lib/spider/included_in_memcached.rb
55
- </td>
56
- </tr>
57
- <tr class="top-aligned-row">
58
- <td><strong>Last Update:</strong></td>
59
- <td>Thu May 21 13:19:06 +0000 2009</td>
60
- </tr>
61
- </table>
62
- </div>
63
- <!-- banner header -->
64
-
65
- <div id="bodyContent">
66
-
67
-
68
-
69
- <div id="contextContent">
70
-
71
- <div id="description">
72
- <p>
73
- Use memcached to track cycles.
74
- </p>
75
- <p>
76
- Copyright 2007 Mike Burns Redistribution and use in source and binary
77
- forms, with or without modification, are permitted provided that the
78
- following conditions are met:
79
- </p>
80
- <pre>
81
- * Redistributions of source code must retain the above copyright
82
- notice, this list of conditions and the following disclaimer.
83
- * Redistributions in binary form must reproduce the above copyright
84
- notice, this list of conditions and the following disclaimer in the
85
- documentation and/or other materials provided with the distribution.
86
- * Neither the name Mike Burns nor the
87
- names of his contributors may be used to endorse or promote products
88
- derived from this software without specific prior written permission.
89
- </pre>
90
- <p>
91
- THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS&#8217;&#8217; AND ANY
92
- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
93
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
94
- DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY DIRECT,
95
- INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
96
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
97
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
98
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99
- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100
- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
101
- DAMAGE.
102
- </p>
103
-
104
- </div>
105
-
106
- <div id="requires-list">
107
- <h3 class="section-bar">Required files</h3>
108
-
109
- <div class="name-list">
110
- memcache&nbsp;&nbsp;
111
- </div>
112
- </div>
113
-
114
- </div>
115
-
116
-
117
- </div>
118
-
119
-
120
- <!-- if includes -->
121
-
122
- <div id="section">
123
-
124
-
125
-
126
-
127
-
128
-
129
-
130
-
131
- <!-- if method_list -->
132
-
133
-
134
- </div>
135
-
136
-
137
- <div id="validator-badges">
138
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
139
- </div>
140
-
141
- </body>
142
- </html>
@@ -1,144 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>File: next_urls_in_sqs.rb</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="fileHeader">
50
- <h1>next_urls_in_sqs.rb</h1>
51
- <table class="header-table">
52
- <tr class="top-aligned-row">
53
- <td><strong>Path:</strong></td>
54
- <td>lib/spider/next_urls_in_sqs.rb
55
- </td>
56
- </tr>
57
- <tr class="top-aligned-row">
58
- <td><strong>Last Update:</strong></td>
59
- <td>Thu May 21 13:19:06 +0000 2009</td>
60
- </tr>
61
- </table>
62
- </div>
63
- <!-- banner header -->
64
-
65
- <div id="bodyContent">
66
-
67
-
68
-
69
- <div id="contextContent">
70
-
71
- <div id="description">
72
- <p>
73
- Use AmazonSQS to track nodes to visit.
74
- </p>
75
- <p>
76
- Copyright 2008 John Nagro Redistribution and use in source and binary
77
- forms, with or without modification, are permitted provided that the
78
- following conditions are met:
79
- </p>
80
- <pre>
81
- * Redistributions of source code must retain the above copyright
82
- notice, this list of conditions and the following disclaimer.
83
- * Redistributions in binary form must reproduce the above copyright
84
- notice, this list of conditions and the following disclaimer in the
85
- documentation and/or other materials provided with the distribution.
86
- * Neither the name Mike Burns nor the
87
- names of his contributors may be used to endorse or promote products
88
- derived from this software without specific prior written permission.
89
- </pre>
90
- <p>
91
- THIS SOFTWARE IS PROVIDED BY Mike Burns ``AS IS&#8217;&#8217; AND ANY
92
- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
93
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
94
- DISCLAIMED. IN NO EVENT SHALL Mike Burns BE LIABLE FOR ANY DIRECT,
95
- INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
96
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
97
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
98
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99
- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100
- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
101
- DAMAGE.
102
- </p>
103
-
104
- </div>
105
-
106
- <div id="requires-list">
107
- <h3 class="section-bar">Required files</h3>
108
-
109
- <div class="name-list">
110
- rubygems&nbsp;&nbsp;
111
- right_aws&nbsp;&nbsp;
112
- yaml&nbsp;&nbsp;
113
- </div>
114
- </div>
115
-
116
- </div>
117
-
118
-
119
- </div>
120
-
121
-
122
- <!-- if includes -->
123
-
124
- <div id="section">
125
-
126
-
127
-
128
-
129
-
130
-
131
-
132
-
133
- <!-- if method_list -->
134
-
135
-
136
- </div>
137
-
138
-
139
- <div id="validator-badges">
140
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
141
- </div>
142
-
143
- </body>
144
- </html>