spider 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/doc/created.rid ADDED
@@ -0,0 +1 @@
1
+ Mon, 22 Oct 2007 07:35:00 -0400
@@ -0,0 +1,149 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: README</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>README</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>README
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Mon Oct 22 07:34:31 -0400 2007</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <p>
73
+ <a href="../classes/Spider.html">Spider</a>, a Web spidering library for
74
+ Ruby. It handles the robots.txt, scraping, collecting, and looping so that
75
+ you can just handle the data.
76
+ </p>
77
+ <h2>Usage</h2>
78
+ <pre>
79
+ Spider.start_at('http://mike-burns.com/') do |s|
80
+ # Limit the pages to just this domain.
81
+ s.add_url_check do |a_url|
82
+ a_url =~ %r{^http://mike-burns.com.*}
83
+ end
84
+
85
+ # Handle 404s.
86
+ s.on 404 do |a_url, err_code|
87
+ puts &quot;URL not found: #{a_url}&quot;
88
+ end
89
+
90
+ # Handle 2xx.
91
+ s.on :success do |a_url, code, headers, body|
92
+ puts &quot;body: #{body}&quot;
93
+ end
94
+
95
+ # Handle everything.
96
+ s.on :any do |a_url, resp|
97
+ puts &quot;URL returned anything: #{a_url} with this code #{resp.code}&quot;
98
+ end
99
+ end
100
+ </pre>
101
+ <h2>Requirements</h2>
102
+ <p>
103
+ This library uses `robot_rules&#8217; (included), `open-uri&#8217;, and
104
+ `uri&#8217;. Any modern Ruby should work; if yours doesn&#8216;t, let me
105
+ know so I can update this with your version number.
106
+ </p>
107
+ <h2>Author</h2>
108
+ <p>
109
+ Mike Burns <a href="http://mike-burns.com">mike-burns.com</a>
110
+ mike@mike-burns.com
111
+ </p>
112
+ <p>
113
+ With help from Matt Horan and John Nagro. With `robot_rules&#8217; from
114
+ James Edward Gray II via <a
115
+ href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589">blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589</a>
116
+ </p>
117
+
118
+ </div>
119
+
120
+
121
+ </div>
122
+
123
+
124
+ </div>
125
+
126
+
127
+ <!-- if includes -->
128
+
129
+ <div id="section">
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+ <!-- if method_list -->
139
+
140
+
141
+ </div>
142
+
143
+
144
+ <div id="validator-badges">
145
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
146
+ </div>
147
+
148
+ </body>
149
+ </html>
@@ -0,0 +1,159 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: spider.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>spider.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/spider.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Mon Oct 22 07:19:31 -0400 2007</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <p>
73
+ Copyright 2007 Mike Burns <a href="../../classes/Spider.html">Spider</a>, a
74
+ Web spidering library for Ruby. It handles the robots.txt, scraping,
75
+ collecting, and looping so that you can just handle the data.
76
+ </p>
77
+ <h2>Usage</h2>
78
+ <pre>
79
+ Spider.start_at('http://mike-burns.com/') do |s|
80
+ # Limit the pages to just this domain.
81
+ s.add_url_check do |a_url|
82
+ a_url =~ %r{^http://mike-burns.com.*}
83
+ end
84
+
85
+ # Handle 404s.
86
+ s.on 404 do |a_url, err_code|
87
+ puts &quot;URL not found: #{a_url}&quot;
88
+ end
89
+
90
+ # Handle 2xx.
91
+ s.on :success do |a_url, code, headers, body|
92
+ puts &quot;body: #{body}&quot;
93
+ end
94
+
95
+ # Handle everything.
96
+ s.on :any do |a_url, resp|
97
+ puts &quot;URL returned anything: #{a_url} with this code #{resp.code}&quot;
98
+ end
99
+ end
100
+ </pre>
101
+ <h2>Requirements</h2>
102
+ <p>
103
+ This library uses `robot_rules&#8217; (included), `open-uri&#8217;, and
104
+ `uri&#8217;. Any modern Ruby should work; if yours doesn&#8216;t, let me
105
+ know so I can update this with your version number.
106
+ </p>
107
+ <h2>Author</h2>
108
+ <p>
109
+ Mike Burns <a href="http://mike-burns.com">mike-burns.com</a>
110
+ mike@mike-burns.com
111
+ </p>
112
+ <p>
113
+ With help from Matt Horan and John Nagro. With `robot_rules&#8217; from
114
+ James Edward Gray II via <a
115
+ href="http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589">blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589</a>
116
+ </p>
117
+
118
+ </div>
119
+
120
+ <div id="requires-list">
121
+ <h3 class="section-bar">Required files</h3>
122
+
123
+ <div class="name-list">
124
+ robot_rules&nbsp;&nbsp;
125
+ open-uri&nbsp;&nbsp;
126
+ uri&nbsp;&nbsp;
127
+ net/http&nbsp;&nbsp;
128
+ </div>
129
+ </div>
130
+
131
+ </div>
132
+
133
+
134
+ </div>
135
+
136
+
137
+ <!-- if includes -->
138
+
139
+ <div id="section">
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+ <!-- if method_list -->
149
+
150
+
151
+ </div>
152
+
153
+
154
+ <div id="validator-badges">
155
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
156
+ </div>
157
+
158
+ </body>
159
+ </html>
@@ -0,0 +1,29 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Classes
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Classes</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Classes</h1>
22
+ <div id="index-entries">
23
+ <a href="classes/Net.html">Net</a><br />
24
+ <a href="classes/Spider.html">Spider</a><br />
25
+ <a href="classes/SpiderInstance.html">SpiderInstance</a><br />
26
+ </div>
27
+ </div>
28
+ </body>
29
+ </html>
@@ -0,0 +1,28 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Files
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Files</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Files</h1>
22
+ <div id="index-entries">
23
+ <a href="files/README.html">README</a><br />
24
+ <a href="files/lib/spider_rb.html">lib/spider.rb</a><br />
25
+ </div>
26
+ </div>
27
+ </body>
28
+ </html>
@@ -0,0 +1,29 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Methods
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Methods</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Methods</h1>
22
+ <div id="index-entries">
23
+ <a href="classes/SpiderInstance.html#M000001">add_url_check (SpiderInstance)</a><br />
24
+ <a href="classes/SpiderInstance.html#M000002">on (SpiderInstance)</a><br />
25
+ <a href="classes/Spider.html#M000003">start_at (Spider)</a><br />
26
+ </div>
27
+ </div>
28
+ </body>
29
+ </html>
data/doc/index.html ADDED
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5
+
6
+ <!--
7
+
8
+ RDoc Documentation
9
+
10
+ -->
11
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
12
+ <head>
13
+ <title>RDoc Documentation</title>
14
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
15
+ </head>
16
+ <frameset rows="20%, 80%">
17
+ <frameset cols="25%,35%,45%">
18
+ <frame src="fr_file_index.html" title="Files" name="Files" />
19
+ <frame src="fr_class_index.html" name="Classes" />
20
+ <frame src="fr_method_index.html" name="Methods" />
21
+ </frameset>
22
+ <frame src="files/lib/spider_rb.html" name="docwin" />
23
+ </frameset>
24
+ </html>
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }