spider 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +6 -0
- data/README +3 -3
- data/doc/classes/BeStaticServerPages.html +197 -0
- data/doc/classes/BeStaticServerPages.src/M000030.html +19 -0
- data/doc/classes/BeStaticServerPages.src/M000031.html +19 -0
- data/doc/classes/BeStaticServerPages.src/M000032.html +18 -0
- data/doc/classes/BeStaticServerPages.src/M000033.html +18 -0
- data/doc/classes/IncludedInMemcached.html +18 -45
- data/doc/classes/IncludedInMemcached.src/M000015.html +18 -0
- data/doc/classes/IncludedInMemcached.src/M000016.html +18 -0
- data/doc/classes/IncludedInMemcached.src/M000017.html +18 -0
- data/doc/classes/LoopingServlet.html +137 -0
- data/doc/classes/LoopingServlet.src/M000037.html +23 -0
- data/doc/classes/NextUrlsInSQS.html +204 -0
- data/doc/classes/NextUrlsInSQS.src/M000018.html +19 -0
- data/doc/classes/NextUrlsInSQS.src/M000019.html +22 -0
- data/doc/classes/NextUrlsInSQS.src/M000020.html +19 -0
- data/doc/classes/QueryServlet.html +137 -0
- data/doc/classes/QueryServlet.src/M000038.html +19 -0
- data/doc/classes/RobotRules.html +175 -0
- data/doc/classes/RobotRules.src/M000034.html +19 -0
- data/doc/classes/RobotRules.src/M000035.html +67 -0
- data/doc/classes/RobotRules.src/M000036.html +24 -0
- data/doc/classes/Spider.html +5 -17
- data/doc/classes/Spider.src/M000029.html +21 -0
- data/doc/classes/SpiderInstance.html +72 -108
- data/doc/classes/SpiderInstance.src/M000021.html +18 -0
- data/doc/classes/SpiderInstance.src/M000022.html +22 -0
- data/doc/classes/SpiderInstance.src/M000023.html +22 -0
- data/doc/classes/SpiderInstance.src/M000024.html +24 -0
- data/doc/classes/SpiderInstance.src/M000025.html +18 -0
- data/doc/classes/SpiderInstance.src/M000026.html +18 -0
- data/doc/classes/SpiderInstance.src/M000027.html +18 -0
- data/doc/classes/SpiderInstance.src/M000028.html +18 -0
- data/doc/created.rid +1 -1
- data/doc/files/lib/spider/included_in_memcached_rb.html +29 -1
- data/doc/files/lib/spider/next_urls_in_sqs_rb.html +144 -0
- data/doc/files/lib/spider/robot_rules_rb.html +114 -0
- data/doc/files/lib/spider/spider_instance_rb.html +1 -2
- data/doc/files/lib/spider_rb.html +40 -9
- data/doc/files/spec/spec_helper_rb.html +196 -0
- data/doc/files/spec/spec_helper_rb.src/M000001.html +20 -0
- data/doc/files/spec/spec_helper_rb.src/M000002.html +26 -0
- data/doc/files/spec/spec_helper_rb.src/M000003.html +24 -0
- data/doc/files/spec/spec_helper_rb.src/M000004.html +18 -0
- data/doc/files/spec/spec_helper_rb.src/M000005.html +23 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.html +142 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000006.html +19 -0
- data/doc/files/spec/spider/included_in_memcached_spec_rb.src/M000007.html +18 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.html +210 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000008.html +21 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000009.html +19 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000010.html +19 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000011.html +27 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000012.html +26 -0
- data/doc/files/spec/spider/spider_instance_spec_rb.src/M000013.html +27 -0
- data/doc/files/spec/spider_spec_rb.html +127 -0
- data/doc/files/spec/spider_spec_rb.src/M000014.html +23 -0
- data/doc/fr_class_index.html +5 -0
- data/doc/fr_file_index.html +6 -1
- data/doc/fr_method_index.html +38 -11
- data/doc/index.html +1 -1
- data/lib/spider/spider_instance.rb +15 -7
- data/spider.gemspec +1 -1
- metadata +84 -22
- data/lib/test.rb +0 -27
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title><< (IncludedInMemcached)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/included_in_memcached.rb, line 45</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-operator"><<</span>(<span class="ruby-identifier">v</span>)
|
15
|
+
<span class="ruby-ivar">@c</span>.<span class="ruby-identifier">add</span>(<span class="ruby-identifier">v</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-identifier">v</span>)
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>include? (IncludedInMemcached)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/included_in_memcached.rb, line 50</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">include?</span>(<span class="ruby-identifier">v</span>)
|
15
|
+
<span class="ruby-ivar">@c</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">v</span>.<span class="ruby-identifier">to_s</span>) <span class="ruby-operator">==</span> <span class="ruby-identifier">v</span>
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,137 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: LoopingServlet</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">LoopingServlet</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/spec/spec_helper_rb.html">
|
59
|
+
spec/spec_helper.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
WEBrick::HTTPServlet::AbstractServlet
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000037">do_GET</a>
|
90
|
+
</div>
|
91
|
+
</div>
|
92
|
+
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
<!-- if includes -->
|
97
|
+
|
98
|
+
<div id="section">
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
<!-- if method_list -->
|
108
|
+
<div id="methods">
|
109
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
110
|
+
|
111
|
+
<div id="method-M000037" class="method-detail">
|
112
|
+
<a name="M000037"></a>
|
113
|
+
|
114
|
+
<div class="method-heading">
|
115
|
+
<a href="LoopingServlet.src/M000037.html" target="Code" class="method-signature"
|
116
|
+
onclick="popupCode('LoopingServlet.src/M000037.html');return false;">
|
117
|
+
<span class="method-name">do_GET</span><span class="method-args">(req, res)</span>
|
118
|
+
</a>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div class="method-description">
|
122
|
+
</div>
|
123
|
+
</div>
|
124
|
+
|
125
|
+
|
126
|
+
</div>
|
127
|
+
|
128
|
+
|
129
|
+
</div>
|
130
|
+
|
131
|
+
|
132
|
+
<div id="validator-badges">
|
133
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
134
|
+
</div>
|
135
|
+
|
136
|
+
</body>
|
137
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>do_GET (LoopingServlet)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File spec/spec_helper.rb, line 69</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">do_GET</span>(<span class="ruby-identifier">req</span>, <span class="ruby-identifier">res</span>)
|
15
|
+
<span class="ruby-identifier">res</span>[<span class="ruby-value str">'Content-type'</span>] = <span class="ruby-value str">'text/html'</span>
|
16
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">req</span>.<span class="ruby-identifier">path</span> <span class="ruby-operator">==</span> <span class="ruby-value str">'/foo'</span>
|
17
|
+
<span class="ruby-identifier">res</span>.<span class="ruby-identifier">body</span> = <span class="ruby-value str">"<a href=\"/\">a</a>\n"</span>
|
18
|
+
<span class="ruby-keyword kw">else</span>
|
19
|
+
<span class="ruby-identifier">res</span>.<span class="ruby-identifier">body</span> = <span class="ruby-value str">"<a href=\"/foo\">b</a>\n"</span>
|
20
|
+
<span class="ruby-keyword kw">end</span>
|
21
|
+
<span class="ruby-keyword kw">end</span></pre>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,204 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: NextUrlsInSQS</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">NextUrlsInSQS</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/lib/spider/next_urls_in_sqs_rb.html">
|
59
|
+
lib/spider/next_urls_in_sqs.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
<div id="description">
|
82
|
+
<p>
|
83
|
+
A specialized class using AmazonSQS to track nodes to walk. It supports two
|
84
|
+
operations: <a href="NextUrlsInSQS.html#M000020">push</a> and <a
|
85
|
+
href="NextUrlsInSQS.html#M000019">pop</a> . Together these can be used to
|
86
|
+
add items to the queue, then pull items off the queue.
|
87
|
+
</p>
|
88
|
+
<p>
|
89
|
+
This is useful if you want multiple <a href="Spider.html">Spider</a>
|
90
|
+
processes crawling the same data set.
|
91
|
+
</p>
|
92
|
+
<p>
|
93
|
+
To use it with <a href="Spider.html">Spider</a> use the
|
94
|
+
store_next_urls_with method:
|
95
|
+
</p>
|
96
|
+
<pre>
|
97
|
+
Spider.start_at('http://example.com/') do |s|
|
98
|
+
s.store_next_urls_with NextUrlsInSQS.new(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, queue_name)
|
99
|
+
end
|
100
|
+
</pre>
|
101
|
+
|
102
|
+
</div>
|
103
|
+
|
104
|
+
|
105
|
+
</div>
|
106
|
+
|
107
|
+
<div id="method-list">
|
108
|
+
<h3 class="section-bar">Methods</h3>
|
109
|
+
|
110
|
+
<div class="name-list">
|
111
|
+
<a href="#M000018">new</a>
|
112
|
+
<a href="#M000019">pop</a>
|
113
|
+
<a href="#M000020">push</a>
|
114
|
+
</div>
|
115
|
+
</div>
|
116
|
+
|
117
|
+
</div>
|
118
|
+
|
119
|
+
|
120
|
+
<!-- if includes -->
|
121
|
+
|
122
|
+
<div id="section">
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
<!-- if method_list -->
|
132
|
+
<div id="methods">
|
133
|
+
<h3 class="section-bar">Public Class methods</h3>
|
134
|
+
|
135
|
+
<div id="method-M000018" class="method-detail">
|
136
|
+
<a name="M000018"></a>
|
137
|
+
|
138
|
+
<div class="method-heading">
|
139
|
+
<a href="NextUrlsInSQS.src/M000018.html" target="Code" class="method-signature"
|
140
|
+
onclick="popupCode('NextUrlsInSQS.src/M000018.html');return false;">
|
141
|
+
<span class="method-name">new</span><span class="method-args">(aws_access_key, aws_secret_access_key, queue_name = 'ruby-spider')</span>
|
142
|
+
</a>
|
143
|
+
</div>
|
144
|
+
|
145
|
+
<div class="method-description">
|
146
|
+
<p>
|
147
|
+
Construct a <a href="NextUrlsInSQS.html#M000018">new</a> <a
|
148
|
+
href="NextUrlsInSQS.html">NextUrlsInSQS</a> instance. All arguments here
|
149
|
+
are passed to RightAWS::SqsGen2 (part of the right_aws gem) or used to set
|
150
|
+
the AmazonSQS queue name (optional).
|
151
|
+
</p>
|
152
|
+
</div>
|
153
|
+
</div>
|
154
|
+
|
155
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
156
|
+
|
157
|
+
<div id="method-M000019" class="method-detail">
|
158
|
+
<a name="M000019"></a>
|
159
|
+
|
160
|
+
<div class="method-heading">
|
161
|
+
<a href="NextUrlsInSQS.src/M000019.html" target="Code" class="method-signature"
|
162
|
+
onclick="popupCode('NextUrlsInSQS.src/M000019.html');return false;">
|
163
|
+
<span class="method-name">pop</span><span class="method-args">()</span>
|
164
|
+
</a>
|
165
|
+
</div>
|
166
|
+
|
167
|
+
<div class="method-description">
|
168
|
+
<p>
|
169
|
+
Pull an item off the queue, loop until data is found. Data is encoded with
|
170
|
+
YAML.
|
171
|
+
</p>
|
172
|
+
</div>
|
173
|
+
</div>
|
174
|
+
|
175
|
+
<div id="method-M000020" class="method-detail">
|
176
|
+
<a name="M000020"></a>
|
177
|
+
|
178
|
+
<div class="method-heading">
|
179
|
+
<a href="NextUrlsInSQS.src/M000020.html" target="Code" class="method-signature"
|
180
|
+
onclick="popupCode('NextUrlsInSQS.src/M000020.html');return false;">
|
181
|
+
<span class="method-name">push</span><span class="method-args">(a_msg)</span>
|
182
|
+
</a>
|
183
|
+
</div>
|
184
|
+
|
185
|
+
<div class="method-description">
|
186
|
+
<p>
|
187
|
+
Put data on the queue. Data is encoded with YAML.
|
188
|
+
</p>
|
189
|
+
</div>
|
190
|
+
</div>
|
191
|
+
|
192
|
+
|
193
|
+
</div>
|
194
|
+
|
195
|
+
|
196
|
+
</div>
|
197
|
+
|
198
|
+
|
199
|
+
<div id="validator-badges">
|
200
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
201
|
+
</div>
|
202
|
+
|
203
|
+
</body>
|
204
|
+
</html>
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>new (NextUrlsInSQS)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/next_urls_in_sqs.rb, line 46</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">aws_access_key</span>, <span class="ruby-identifier">aws_secret_access_key</span>, <span class="ruby-identifier">queue_name</span> = <span class="ruby-value str">'ruby-spider'</span>)
|
15
|
+
<span class="ruby-ivar">@sqs</span> = <span class="ruby-constant">RightAws</span><span class="ruby-operator">::</span><span class="ruby-constant">SqsGen2</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">aws_access_key</span>, <span class="ruby-identifier">aws_secret_access_key</span>)
|
16
|
+
<span class="ruby-ivar">@queue</span> = <span class="ruby-ivar">@sqs</span>.<span class="ruby-identifier">queue</span>(<span class="ruby-identifier">queue_name</span>)
|
17
|
+
<span class="ruby-keyword kw">end</span></pre>
|
18
|
+
</body>
|
19
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>pop (NextUrlsInSQS)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/spider/next_urls_in_sqs.rb, line 53</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">pop</span>
|
15
|
+
<span class="ruby-keyword kw">while</span> <span class="ruby-keyword kw">true</span>
|
16
|
+
<span class="ruby-identifier">message</span> = <span class="ruby-ivar">@queue</span>.<span class="ruby-identifier">pop</span>
|
17
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-constant">YAML</span><span class="ruby-operator">::</span><span class="ruby-identifier">load</span>(<span class="ruby-identifier">message</span>.<span class="ruby-identifier">to_s</span>) <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">message</span>.<span class="ruby-identifier">nil?</span>
|
18
|
+
<span class="ruby-identifier">sleep</span> <span class="ruby-value">5</span>
|
19
|
+
<span class="ruby-keyword kw">end</span>
|
20
|
+
<span class="ruby-keyword kw">end</span></pre>
|
21
|
+
</body>
|
22
|
+
</html>
|