spider 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/doc/created.rid ADDED
@@ -0,0 +1 @@
1
+ Mon, 22 Oct 2007 07:35:00 -0400
@@ -0,0 +1,149 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: README</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>README</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>README
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Mon Oct 22 07:34:31 -0400 2007</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <p>
73
+ <a href="../classes/Spider.html">Spider</a>, a Web spidering library for
74
+ Ruby. It handles the robots.txt, scraping, collecting, and looping so that
75
+ you can just handle the data.
76
+ </p>
77
+ <h2>Usage</h2>
78
+ <pre>
79
+ Spider.start_at('http://mike-burns.com/') do |s|
80
+ # Limit the pages to just this domain.
81
+ s.add_url_check do |a_url|
82
+ a_url =~ %r{^http://mike-burns.com.*}
83
+ end
84
+
85
+ # Handle 404s.
86
+ s.on 404 do |a_url, err_code|
87
+ puts &quot;URL not found: #{a_url}&quot;
88
+ end
89
+
90
+ # Handle 2xx.
91
+ s.on :success do |a_url, code, headers, body|
92
+ puts &quot;body: #{body}&quot;
93
+ end
94
+
95
+ # Handle everything.
96
+ s.on :any do |a_url, resp|
97
+ puts &quot;URL returned anything: #{a_url} with this code #{resp.code}&quot;
98
+ end
99
+ end
100
+ </pre>
101
+ <h2>Requirements</h2>
102
+ <p>
103
+ This library uses `robot_rules&#8217; (included), `open-uri&#8217;, and
104
+ `uri&#8217;. Any modern Ruby should work; if yours doesn&#8216;t, let me
105
+ know so I can update this with your version number.
106
+ </p>
107
+ <h2>Author</h2>
108
+ <p>
109
+ Mike Burns <a href="http://mike-burns.com">mike-burns.com</a>
110
+ mike@mike-burns.com
111
+ </p>
112
+ <p>
113
+ With help from Matt Horan and John Nagro. With `robot_rules&#8217; from
114
+ James Edward Gray II via <a
115
+ href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589">blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589</a>
116
+ </p>
117
+
118
+ </div>
119
+
120
+
121
+ </div>
122
+
123
+
124
+ </div>
125
+
126
+
127
+ <!-- if includes -->
128
+
129
+ <div id="section">
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+ <!-- if method_list -->
139
+
140
+
141
+ </div>
142
+
143
+
144
+ <div id="validator-badges">
145
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
146
+ </div>
147
+
148
+ </body>
149
+ </html>
@@ -0,0 +1,159 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: spider.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>spider.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/spider.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Mon Oct 22 07:19:31 -0400 2007</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <p>
73
+ Copyright 2007 Mike Burns <a href="../../classes/Spider.html">Spider</a>, a
74
+ Web spidering library for Ruby. It handles the robots.txt, scraping,
75
+ collecting, and looping so that you can just handle the data.
76
+ </p>
77
+ <h2>Usage</h2>
78
+ <pre>
79
+ Spider.start_at('http://mike-burns.com/') do |s|
80
+ # Limit the pages to just this domain.
81
+ s.add_url_check do |a_url|
82
+ a_url =~ %r{^http://mike-burns.com.*}
83
+ end
84
+
85
+ # Handle 404s.
86
+ s.on 404 do |a_url, err_code|
87
+ puts &quot;URL not found: #{a_url}&quot;
88
+ end
89
+
90
+ # Handle 2xx.
91
+ s.on :success do |a_url, code, headers, body|
92
+ puts &quot;body: #{body}&quot;
93
+ end
94
+
95
+ # Handle everything.
96
+ s.on :any do |a_url, resp|
97
+ puts &quot;URL returned anything: #{a_url} with this code #{resp.code}&quot;
98
+ end
99
+ end
100
+ </pre>
101
+ <h2>Requirements</h2>
102
+ <p>
103
+ This library uses `robot_rules&#8217; (included), `open-uri&#8217;, and
104
+ `uri&#8217;. Any modern Ruby should work; if yours doesn&#8216;t, let me
105
+ know so I can update this with your version number.
106
+ </p>
107
+ <h2>Author</h2>
108
+ <p>
109
+ Mike Burns <a href="http://mike-burns.com">mike-burns.com</a>
110
+ mike@mike-burns.com
111
+ </p>
112
+ <p>
113
+ With help from Matt Horan and John Nagro. With `robot_rules&#8217; from
114
+ James Edward Gray II via <a
115
+ href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589">blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589</a>
116
+ </p>
117
+
118
+ </div>
119
+
120
+ <div id="requires-list">
121
+ <h3 class="section-bar">Required files</h3>
122
+
123
+ <div class="name-list">
124
+ robot_rules&nbsp;&nbsp;
125
+ open-uri&nbsp;&nbsp;
126
+ uri&nbsp;&nbsp;
127
+ net/http&nbsp;&nbsp;
128
+ </div>
129
+ </div>
130
+
131
+ </div>
132
+
133
+
134
+ </div>
135
+
136
+
137
+ <!-- if includes -->
138
+
139
+ <div id="section">
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+ <!-- if method_list -->
149
+
150
+
151
+ </div>
152
+
153
+
154
+ <div id="validator-badges">
155
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
156
+ </div>
157
+
158
+ </body>
159
+ </html>
@@ -0,0 +1,29 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Classes
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Classes</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Classes</h1>
22
+ <div id="index-entries">
23
+ <a href="classes/Net.html">Net</a><br />
24
+ <a href="classes/Spider.html">Spider</a><br />
25
+ <a href="classes/SpiderInstance.html">SpiderInstance</a><br />
26
+ </div>
27
+ </div>
28
+ </body>
29
+ </html>
@@ -0,0 +1,28 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Files
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Files</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Files</h1>
22
+ <div id="index-entries">
23
+ <a href="files/README.html">README</a><br />
24
+ <a href="files/lib/spider_rb.html">lib/spider.rb</a><br />
25
+ </div>
26
+ </div>
27
+ </body>
28
+ </html>
@@ -0,0 +1,29 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Methods
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Methods</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Methods</h1>
22
+ <div id="index-entries">
23
+ <a href="classes/SpiderInstance.html#M000001">add_url_check (SpiderInstance)</a><br />
24
+ <a href="classes/SpiderInstance.html#M000002">on (SpiderInstance)</a><br />
25
+ <a href="classes/Spider.html#M000003">start_at (Spider)</a><br />
26
+ </div>
27
+ </div>
28
+ </body>
29
+ </html>
data/doc/index.html ADDED
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5
+
6
+ <!--
7
+
8
+ RDoc Documentation
9
+
10
+ -->
11
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
12
+ <head>
13
+ <title>RDoc Documentation</title>
14
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
15
+ </head>
16
+ <frameset rows="20%, 80%">
17
+ <frameset cols="25%,35%,45%">
18
+ <frame src="fr_file_index.html" title="Files" name="Files" />
19
+ <frame src="fr_class_index.html" name="Classes" />
20
+ <frame src="fr_method_index.html" name="Methods" />
21
+ </frameset>
22
+ <frame src="files/lib/spider_rb.html" name="docwin" />
23
+ </frameset>
24
+ </html>
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }