spidr 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/.specopts +1 -0
- data/.yardopts +1 -0
- data/{History.rdoc → ChangeLog.md} +47 -39
- data/LICENSE.txt +21 -0
- data/{README.rdoc → README.md} +57 -49
- data/Rakefile +36 -22
- data/lib/spidr/actions/actions.rb +4 -0
- data/lib/spidr/actions/exceptions/action.rb +3 -0
- data/lib/spidr/actions/exceptions/paused.rb +3 -0
- data/lib/spidr/actions/exceptions/skip_link.rb +4 -0
- data/lib/spidr/actions/exceptions/skip_page.rb +4 -0
- data/lib/spidr/agent.rb +61 -17
- data/lib/spidr/auth_credential.rb +3 -0
- data/lib/spidr/auth_store.rb +12 -8
- data/lib/spidr/cookie_jar.rb +4 -1
- data/lib/spidr/events.rb +25 -0
- data/lib/spidr/filters.rb +5 -1
- data/lib/spidr/page.rb +29 -24
- data/lib/spidr/rules.rb +4 -0
- data/lib/spidr/sanitizers.rb +4 -0
- data/lib/spidr/session_cache.rb +26 -1
- data/lib/spidr/version.rb +1 -1
- data/spec/auth_store_spec.rb +85 -0
- data/spec/cookie_jar_spec.rb +108 -0
- data/spec/page_spec.rb +0 -1
- data/spec/session_cache.rb +58 -0
- data/spidr.gemspec +115 -0
- metadata +99 -90
- data.tar.gz.sig +0 -2
- data/Manifest.txt +0 -41
- data/tasks/spec.rb +0 -10
- data/tasks/yard.rb +0 -12
- metadata.gz.sig +0 -0
data/.gitignore
ADDED
data/.specopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format specdoc
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown --title 'Spidr Documentation' --protected --files ChangeLog.md,LICENSE.txt
|
@@ -1,4 +1,12 @@
|
|
1
|
-
|
1
|
+
### 0.2.3 / 2010-02-27
|
2
|
+
|
3
|
+
* Migrated to Jeweler, for the packaging and releasing RubyGems.
|
4
|
+
* Switched to MarkDown formatted YARD documentation.
|
5
|
+
* Added {Spidr::Events#every_link}.
|
6
|
+
* Added {Spidr::SessionCache#active?}.
|
7
|
+
* Added specs for {Spidr::SessionCache}.
|
8
|
+
|
9
|
+
### 0.2.2 / 2010-01-06
|
2
10
|
|
3
11
|
* Require Web Spider Obstacle Course (WSOC) >= 0.1.1.
|
4
12
|
* Integrated the new WSOC into the specs.
|
@@ -12,10 +20,10 @@
|
|
12
20
|
* Added {Spidr::CookieJar} (thanks Nick Plante).
|
13
21
|
* Added {Spidr::AuthStore} (thanks Nick Plante).
|
14
22
|
* Added {Spidr::Agent#post_page} (thanks Nick Plante).
|
15
|
-
* Renamed Spidr::Agent#get_session to {Spidr::SessionCache#[]}.
|
16
|
-
* Renamed Spidr::Agent#kill_session to {Spidr::SessionCache#kill!}.
|
23
|
+
* Renamed `Spidr::Agent#get_session` to {Spidr::SessionCache#[]}.
|
24
|
+
* Renamed `Spidr::Agent#kill_session` to {Spidr::SessionCache#kill!}.
|
17
25
|
|
18
|
-
|
26
|
+
### 0.2.1 / 2009-11-25
|
19
27
|
|
20
28
|
* Added {Spidr::Events#every_ok_page}.
|
21
29
|
* Added {Spidr::Events#every_redirect_page}.
|
@@ -44,9 +52,9 @@
|
|
44
52
|
* Added {Spidr::Events#every_zip_page}.
|
45
53
|
* Fixed a bug where {Spidr::Agent#delay} was not being used to delay
|
46
54
|
requesting pages.
|
47
|
-
* Spider
|
55
|
+
* Spider `link` and `script` tags in HTML pages (thanks Nick Plante).
|
48
56
|
|
49
|
-
|
57
|
+
### 0.2.0 / 2009-10-10
|
50
58
|
|
51
59
|
* Added {URI.expand_path}.
|
52
60
|
* Added {Spidr::Page#search}.
|
@@ -54,16 +62,16 @@
|
|
54
62
|
* Added {Spidr::Page#title}.
|
55
63
|
* Added {Spidr::Agent#failures=}.
|
56
64
|
* Added a HTTP session cache to {Spidr::Agent}, per suggestion of falter.
|
57
|
-
* Added Spidr::Agent#get_session
|
58
|
-
* Added Spidr::Agent#kill_session
|
65
|
+
* Added `Spidr::Agent#get_session`.
|
66
|
+
* Added `Spidr::Agent#kill_session`.
|
59
67
|
* Added {Spidr.proxy=}.
|
60
68
|
* Added {Spidr.disable_proxy!}.
|
61
|
-
* Aliased Spidr::Page#txt
|
62
|
-
* Aliased Spidr::Page#ok
|
63
|
-
* Aliased Spidr::Page#redirect
|
64
|
-
* Aliased Spidr::Page#unauthorized
|
65
|
-
* Aliased Spidr::Page#forbidden
|
66
|
-
* Aliased Spidr::Page#missing
|
69
|
+
* Aliased `Spidr::Page#txt?` to {Spidr::Page#plain_text?}.
|
70
|
+
* Aliased `Spidr::Page#ok?` to {Spidr::Page#is_ok?}.
|
71
|
+
* Aliased `Spidr::Page#redirect?` to {Spidr::Page#is_redirect?}.
|
72
|
+
* Aliased `Spidr::Page#unauthorized?` to {Spidr::Page#is_unauthorized?}.
|
73
|
+
* Aliased `Spidr::Page#forbidden?` to {Spidr::Page#is_forbidden?}.
|
74
|
+
* Aliased `Spidr::Page#missing?` to {Spidr::Page#is_missing?}.
|
67
75
|
* Split URL filtering code out of {Spidr::Agent} and into
|
68
76
|
{Spidr::Filters}.
|
69
77
|
* Split URL / Page event code out of {Spidr::Agent} and into
|
@@ -71,11 +79,11 @@
|
|
71
79
|
* Split pause! / continue! / skip_link! / skip_page! methods out of
|
72
80
|
{Spidr::Agent} and into {Spidr::Actions}.
|
73
81
|
* Fixed a bug in {Spidr::Page#code}, where it was not returning an Integer.
|
74
|
-
* Make sure {Spidr::Page#doc} returns Nokogiri::XML::Document objects for
|
82
|
+
* Make sure {Spidr::Page#doc} returns `Nokogiri::XML::Document` objects for
|
75
83
|
RSS/RDF/Atom pages as well.
|
76
84
|
* Fixed the handling of the Location header in {Spidr::Page#links}
|
77
85
|
(thanks falter).
|
78
|
-
* Fixed a bug in {Spidr::Page#to_absolute} where trailing
|
86
|
+
* Fixed a bug in {Spidr::Page#to_absolute} where trailing `/` characters on
|
79
87
|
URI paths were not being preserved (thanks falter).
|
80
88
|
* Fixed a bug where the URI query was not being sent with the request
|
81
89
|
in {Spidr::Agent#get_page} (thanks Damian Steer).
|
@@ -86,17 +94,17 @@
|
|
86
94
|
* Switched {Spidr::Agent#failures} to a Set.
|
87
95
|
* Allow a block to be passed to {Spidr::Agent#run}, which will receive all
|
88
96
|
pages visited.
|
89
|
-
* Allow Spidr::Agent#start_at and Spidr::Agent#continue
|
97
|
+
* Allow `Spidr::Agent#start_at` and `Spidr::Agent#continue!` to pass blocks
|
90
98
|
to {Spidr::Agent#run}.
|
91
99
|
* Made {Spidr::Agent#visit_page} public.
|
92
100
|
* Moved to YARD based documentation.
|
93
101
|
|
94
|
-
|
102
|
+
### 0.1.9 / 2009-06-13
|
95
103
|
|
96
104
|
* Upgraded to Hoe 2.0.0.
|
97
105
|
* Use Hoe.spec instead of Hoe.new.
|
98
106
|
* Use the Hoe signing task for signed gems.
|
99
|
-
* Added the Spidr::Agent#schemes and Spidr::Agent#schemes
|
107
|
+
* Added the `Spidr::Agent#schemes` and `Spidr::Agent#schemes=` methods.
|
100
108
|
* Added a warning message if 'net/https' cannot be loaded.
|
101
109
|
* Allow the list of acceptable URL schemes to be passed into
|
102
110
|
{Spidr::Agent#initialize}.
|
@@ -108,10 +116,10 @@
|
|
108
116
|
could not be loaded.
|
109
117
|
* Removed Spidr::Agent::SCHEMES.
|
110
118
|
|
111
|
-
|
119
|
+
### 0.1.8 / 2009-05-27
|
112
120
|
|
113
|
-
* Added the Spidr::Agent#pause
|
114
|
-
* Added the Spidr::Agent#running
|
121
|
+
* Added the `Spidr::Agent#pause!` and `Spidr::Agent#continue!` methods.
|
122
|
+
* Added the `Spidr::Agent#running?` and `Spidr::Agent#paused?` methods.
|
115
123
|
* Added an alias for pending_urls to the queue methods.
|
116
124
|
* Added {Spidr::Agent#queue} to provide read access to the queue.
|
117
125
|
* Added {Spidr::Agent#queue=} and {Spidr::Agent#history=} for setting the
|
@@ -121,49 +129,49 @@
|
|
121
129
|
* Made {Spidr::Agent#enqueue} and {Spidr::Agent#queued?} public.
|
122
130
|
* Added more specs.
|
123
131
|
|
124
|
-
|
132
|
+
### 0.1.7 / 2009-04-24
|
125
133
|
|
126
|
-
* Added Spidr::Agent#all_headers
|
127
|
-
* Fixed a bug where Page#headers was always
|
134
|
+
* Added `Spidr::Agent#all_headers`.
|
135
|
+
* Fixed a bug where {Spidr::Page#headers} was always `nil`.
|
128
136
|
* {Spidr::Spidr::Agent} will now follow the Location header in HTTP 300,
|
129
137
|
301, 302, 303 and 307 Redirects.
|
130
138
|
* {Spidr::Agent} will now follow iframe and frame tags.
|
131
139
|
|
132
|
-
|
140
|
+
### 0.1.6 / 2009-04-14
|
133
141
|
|
134
142
|
* Added {Spidr::Agent#failures}, a list of URLs which could not be visited.
|
135
143
|
* Added {Spidr::Agent#failed?}.
|
136
|
-
* Added Spidr::Agent#every_failed_url
|
144
|
+
* Added `Spidr::Agent#every_failed_url`.
|
137
145
|
* Added {Spidr::Agent#clear}, which clears the history and failures URL
|
138
146
|
lists.
|
139
147
|
* Improved fault tolerance in {Spidr::Agent#get_page}.
|
140
148
|
* If a Network or HTTP error is encountered, the URL will be added to
|
141
149
|
the failures list and the next URL will be visited.
|
142
|
-
* Fixed a typo in Spidr::Agent#ignore_exts_like
|
150
|
+
* Fixed a typo in `Spidr::Agent#ignore_exts_like`.
|
143
151
|
* Updated the Web Spider Obstacle Course with links that always fail to be
|
144
152
|
visited.
|
145
153
|
|
146
|
-
|
154
|
+
### 0.1.5 / 2009-03-22
|
147
155
|
|
148
|
-
* Catch malformed URIs in {Spidr::Page#to_absolute} and return
|
149
|
-
* Filter out
|
156
|
+
* Catch malformed URIs in {Spidr::Page#to_absolute} and return `nil`.
|
157
|
+
* Filter out `nil` URIs in {Spidr::Page#urls}.
|
150
158
|
|
151
|
-
|
159
|
+
### 0.1.4 / 2009-01-15
|
152
160
|
|
153
161
|
* Use Nokogiri for HTML and XML parsing.
|
154
162
|
|
155
|
-
|
163
|
+
### 0.1.3 / 2009-01-10
|
156
164
|
|
157
|
-
* Added the
|
165
|
+
* Added the `:host` options to {Spidr::Agent#initialize}.
|
158
166
|
* Added the Web Spider Obstacle Course files to the Manifest.
|
159
167
|
* Aliased {Spidr::Agent#visited_urls} to {Spidr::Agent#history}.
|
160
168
|
|
161
|
-
|
169
|
+
### 0.1.2 / 2008-11-06
|
162
170
|
|
163
171
|
* Fixed a bug in {Spidr::Page#to_absolute} where URLs with no path were not
|
164
|
-
receiving a default path of
|
172
|
+
receiving a default path of `/`.
|
165
173
|
* Fixed a bug in {Spidr::Page#to_absolute} where URL paths were not being
|
166
|
-
expanded, in order to remove
|
174
|
+
expanded, in order to remove `..` and `.` directories.
|
167
175
|
* Fixed a bug where absolute URLs could have a blank path, thus causing
|
168
176
|
{Spidr::Agent#get_page} to crash when it performed the HTTP request.
|
169
177
|
* Added RSpec spec tests.
|
@@ -171,12 +179,12 @@
|
|
171
179
|
(http://spidr.rubyforge.org/course/start.html) which is used in the spec
|
172
180
|
tests.
|
173
181
|
|
174
|
-
|
182
|
+
### 0.1.1 / 2008-10-04
|
175
183
|
|
176
184
|
* Added a reader method for the response instance variable in Page.
|
177
185
|
* Fixed a bug in {Spidr::Page#method_missing}.
|
178
186
|
|
179
|
-
|
187
|
+
### 0.1.0 / 2008-05-23
|
180
188
|
|
181
189
|
* Initial release.
|
182
190
|
* Black-list or white-list URLs based upon:
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
Copyright (c) 2008-2010 Hal Brodigan
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
'Software'), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,18 +1,18 @@
|
|
1
|
-
|
1
|
+
# Spidr
|
2
2
|
|
3
|
-
* http://spidr.rubyforge.org
|
4
|
-
* http://github.com/postmodern/spidr
|
5
|
-
* http://github.com/postmodern/spidr/issues
|
6
|
-
* http://groups.google.com/group/spidr
|
3
|
+
* [spidr.rubyforge.org](http://spidr.rubyforge.org/)
|
4
|
+
* [github.com/postmodern/spidr](http://github.com/postmodern/spidr)
|
5
|
+
* [github.com/postmodern/spidr/issues](http://github.com/postmodern/spidr/issues)
|
6
|
+
* [groups.google.com/group/spidr](http://groups.google.com/group/spidr)
|
7
7
|
* irc.freenode.net #spidr
|
8
8
|
|
9
|
-
|
9
|
+
## Description
|
10
10
|
|
11
11
|
Spidr is a versatile Ruby web spidering library that can spider a site,
|
12
12
|
multiple domains, certain links or infinitely. Spidr is designed to be fast
|
13
13
|
and easy to use.
|
14
14
|
|
15
|
-
|
15
|
+
## Features
|
16
16
|
|
17
17
|
* Follows:
|
18
18
|
* a tags.
|
@@ -31,6 +31,7 @@ and easy to use.
|
|
31
31
|
* Every visited Page.
|
32
32
|
* Every visited URL.
|
33
33
|
* Every visited URL that matches a specified pattern.
|
34
|
+
* Every origin and destination URI of a link.
|
34
35
|
* Every URL that failed to be visited.
|
35
36
|
* Provides action methods to:
|
36
37
|
* Pause spidering.
|
@@ -39,22 +40,23 @@ and easy to use.
|
|
39
40
|
* Restore the spidering queue and history from a previous session.
|
40
41
|
* Custom User-Agent strings.
|
41
42
|
* Custom proxy settings.
|
43
|
+
* HTTPS support.
|
42
44
|
|
43
|
-
|
45
|
+
## Examples
|
44
46
|
|
45
|
-
|
47
|
+
Start spidering from a URL:
|
46
48
|
|
47
49
|
Spidr.start_at('http://tenderlovemaking.com/')
|
48
50
|
|
49
|
-
|
51
|
+
Spider a host:
|
50
52
|
|
51
53
|
Spidr.host('coderrr.wordpress.com')
|
52
54
|
|
53
|
-
|
55
|
+
Spider a site:
|
54
56
|
|
55
57
|
Spidr.site('http://rubyflow.com/')
|
56
58
|
|
57
|
-
|
59
|
+
Spider multiple hosts:
|
58
60
|
|
59
61
|
Spidr.start_at(
|
60
62
|
'http://company.com/',
|
@@ -64,30 +66,56 @@ and easy to use.
|
|
64
66
|
]
|
65
67
|
)
|
66
68
|
|
67
|
-
|
69
|
+
Do not spider certain links:
|
68
70
|
|
69
71
|
Spidr.site('http://matasano.com/', :ignore_links => [/log/])
|
70
72
|
|
71
|
-
|
73
|
+
Do not spider links on certain ports:
|
72
74
|
|
73
75
|
Spidr.site(
|
74
76
|
'http://sketchy.content.com/',
|
75
77
|
:ignore_ports => [8000, 8010, 8080]
|
76
78
|
)
|
77
79
|
|
78
|
-
|
80
|
+
Print out visited URLs:
|
79
81
|
|
80
82
|
Spidr.site('http://rubyinside.org/') do |spider|
|
81
83
|
spider.every_url { |url| puts url }
|
82
84
|
end
|
83
85
|
|
84
|
-
|
86
|
+
Build a URL map of a site:
|
87
|
+
|
88
|
+
url_map = Hash.new { |hash,key| hash[key] = [] }
|
89
|
+
|
90
|
+
Spidr.site('http://intranet.com/') do |spider|
|
91
|
+
spider.every_link do |origin,dest|
|
92
|
+
url_map[dest] << origin
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
Print out the URLs that could not be requested:
|
85
97
|
|
86
98
|
Spidr.site('http://sketchy.content.com/') do |spider|
|
87
99
|
spider.every_failed_url { |url| puts url }
|
88
100
|
end
|
89
101
|
|
90
|
-
|
102
|
+
Finds all pages which have broken links:
|
103
|
+
|
104
|
+
url_map = Hash.new { |hash,key| hash[key] = [] }
|
105
|
+
|
106
|
+
spider = Spidr.site('http://intranet.com/') do |spider|
|
107
|
+
spider.every_link do |origin,dest|
|
108
|
+
url_map[dest] << origin
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
spider.failures.each do |url|
|
113
|
+
puts "Broken link #{url} found in:"
|
114
|
+
|
115
|
+
url_map[url].each { |page| puts " #{page}" }
|
116
|
+
end
|
117
|
+
|
118
|
+
Search HTML and XML pages:
|
91
119
|
|
92
120
|
Spidr.site('http://company.withablog.com/') do |spider|
|
93
121
|
spider.every_page do |page|
|
@@ -98,11 +126,11 @@ and easy to use.
|
|
98
126
|
value = meta.attributes['content']
|
99
127
|
|
100
128
|
puts " #{name} = #{value}"
|
101
|
-
|
129
|
+
end
|
102
130
|
end
|
103
131
|
end
|
104
132
|
|
105
|
-
|
133
|
+
Print out the titles from every page:
|
106
134
|
|
107
135
|
Spidr.site('http://www.rubypulse.com/') do |spider|
|
108
136
|
spider.every_html_page do |page|
|
@@ -110,7 +138,7 @@ and easy to use.
|
|
110
138
|
end
|
111
139
|
end
|
112
140
|
|
113
|
-
|
141
|
+
Find what kinds of web servers a host is using, by accessing the headers:
|
114
142
|
|
115
143
|
servers = Set[]
|
116
144
|
|
@@ -120,7 +148,7 @@ and easy to use.
|
|
120
148
|
end
|
121
149
|
end
|
122
150
|
|
123
|
-
|
151
|
+
Pause the spider on a forbidden page:
|
124
152
|
|
125
153
|
spider = Spidr.host('overnight.startup.com') do |spider|
|
126
154
|
spider.every_forbidden_page do |page|
|
@@ -128,7 +156,7 @@ and easy to use.
|
|
128
156
|
end
|
129
157
|
end
|
130
158
|
|
131
|
-
|
159
|
+
Skip the processing of a page:
|
132
160
|
|
133
161
|
Spidr.host('sketchy.content.com') do |spider|
|
134
162
|
spider.every_missing_page do |page|
|
@@ -136,7 +164,7 @@ and easy to use.
|
|
136
164
|
end
|
137
165
|
end
|
138
166
|
|
139
|
-
|
167
|
+
Skip the processing of links:
|
140
168
|
|
141
169
|
Spidr.host('sketchy.content.com') do |spider|
|
142
170
|
spider.every_url do |url|
|
@@ -146,35 +174,15 @@ and easy to use.
|
|
146
174
|
end
|
147
175
|
end
|
148
176
|
|
149
|
-
|
150
|
-
|
151
|
-
* {nokogiri}[http://nokogiri.rubyforge.org/] >= 1.2.0
|
152
|
-
|
153
|
-
== INSTALL:
|
154
|
-
|
155
|
-
$ sudo gem install spidr
|
177
|
+
## Requirements
|
156
178
|
|
157
|
-
|
179
|
+
* [nokogiri](http://nokogiri.rubyforge.org/) >= 1.2.0
|
158
180
|
|
159
|
-
|
181
|
+
## Install
|
160
182
|
|
161
|
-
|
183
|
+
$ sudo gem install spidr
|
162
184
|
|
163
|
-
|
164
|
-
a copy of this software and associated documentation files (the
|
165
|
-
'Software'), to deal in the Software without restriction, including
|
166
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
167
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
168
|
-
permit persons to whom the Software is furnished to do so, subject to
|
169
|
-
the following conditions:
|
185
|
+
## License
|
170
186
|
|
171
|
-
|
172
|
-
included in all copies or substantial portions of the Software.
|
187
|
+
See {file:LICENSE.txt} for license information.
|
173
188
|
|
174
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
175
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
176
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
177
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
178
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
179
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
180
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
CHANGED
@@ -1,29 +1,43 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require './tasks/spec.rb'
|
7
|
-
require './tasks/yard.rb'
|
2
|
+
require 'rake'
|
3
|
+
require './lib/spidr/version.rb'
|
8
4
|
|
9
|
-
|
10
|
-
|
5
|
+
begin
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = 'spidr'
|
9
|
+
gem.version = Spidr::VERSION
|
10
|
+
gem.summary = %Q{A versatile Ruby web spidering library}
|
11
|
+
gem.description = %Q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
12
|
+
gem.email = 'postmodern.mod3@gmail.com'
|
13
|
+
gem.homepage = 'http://github.com/postmodern/spidr'
|
14
|
+
gem.authors = ['Postmodern']
|
15
|
+
gem.add_dependency 'nokogiri', '>= 1.2.0'
|
16
|
+
gem.add_development_dependency 'rspec', '>= 1.3.0'
|
17
|
+
gem.add_development_dependency 'yard', '>= 0.5.3'
|
18
|
+
gem.add_development_dependency 'wsoc', '>= 0.1.1'
|
19
|
+
gem.has_rdoc = 'yard'
|
20
|
+
end
|
21
|
+
rescue LoadError
|
22
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
|
+
end
|
11
24
|
|
12
|
-
|
13
|
-
|
14
|
-
|
25
|
+
require 'spec/rake/spectask'
|
26
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
27
|
+
spec.libs += ['lib', 'spec']
|
28
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
29
|
+
spec.spec_opts = ['--options', '.specopts']
|
30
|
+
end
|
15
31
|
|
16
|
-
|
17
|
-
|
18
|
-
]
|
32
|
+
task :spec => :check_dependencies
|
33
|
+
task :default => :spec
|
19
34
|
|
20
|
-
|
21
|
-
|
22
|
-
['yard', '>=0.4.0'],
|
23
|
-
['wsoc', '>=0.1.1']
|
24
|
-
]
|
35
|
+
begin
|
36
|
+
require 'yard'
|
25
37
|
|
26
|
-
|
38
|
+
YARD::Rake::YardocTask.new
|
39
|
+
rescue LoadError
|
40
|
+
task :yard do
|
41
|
+
abort "YARD is not available. In order to run yard, you must: gem install yard"
|
42
|
+
end
|
27
43
|
end
|
28
|
-
|
29
|
-
# vim: syntax=Ruby
|