spidr 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/.specopts +1 -0
- data/.yardopts +1 -0
- data/{History.rdoc → ChangeLog.md} +47 -39
- data/LICENSE.txt +21 -0
- data/{README.rdoc → README.md} +57 -49
- data/Rakefile +36 -22
- data/lib/spidr/actions/actions.rb +4 -0
- data/lib/spidr/actions/exceptions/action.rb +3 -0
- data/lib/spidr/actions/exceptions/paused.rb +3 -0
- data/lib/spidr/actions/exceptions/skip_link.rb +4 -0
- data/lib/spidr/actions/exceptions/skip_page.rb +4 -0
- data/lib/spidr/agent.rb +61 -17
- data/lib/spidr/auth_credential.rb +3 -0
- data/lib/spidr/auth_store.rb +12 -8
- data/lib/spidr/cookie_jar.rb +4 -1
- data/lib/spidr/events.rb +25 -0
- data/lib/spidr/filters.rb +5 -1
- data/lib/spidr/page.rb +29 -24
- data/lib/spidr/rules.rb +4 -0
- data/lib/spidr/sanitizers.rb +4 -0
- data/lib/spidr/session_cache.rb +26 -1
- data/lib/spidr/version.rb +1 -1
- data/spec/auth_store_spec.rb +85 -0
- data/spec/cookie_jar_spec.rb +108 -0
- data/spec/page_spec.rb +0 -1
- data/spec/session_cache.rb +58 -0
- data/spidr.gemspec +115 -0
- metadata +99 -90
- data.tar.gz.sig +0 -2
- data/Manifest.txt +0 -41
- data/tasks/spec.rb +0 -10
- data/tasks/yard.rb +0 -12
- metadata.gz.sig +0 -0
data/.gitignore
ADDED
data/.specopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format specdoc
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown --title 'Spidr Documentation' --protected --files ChangeLog.md,LICENSE.txt
|
@@ -1,4 +1,12 @@
|
|
1
|
-
|
1
|
+
### 0.2.3 / 2010-02-27
|
2
|
+
|
3
|
+
* Migrated to Jeweler, for the packaging and releasing RubyGems.
|
4
|
+
* Switched to MarkDown formatted YARD documentation.
|
5
|
+
* Added {Spidr::Events#every_link}.
|
6
|
+
* Added {Spidr::SessionCache#active?}.
|
7
|
+
* Added specs for {Spidr::SessionCache}.
|
8
|
+
|
9
|
+
### 0.2.2 / 2010-01-06
|
2
10
|
|
3
11
|
* Require Web Spider Obstacle Course (WSOC) >= 0.1.1.
|
4
12
|
* Integrated the new WSOC into the specs.
|
@@ -12,10 +20,10 @@
|
|
12
20
|
* Added {Spidr::CookieJar} (thanks Nick Plante).
|
13
21
|
* Added {Spidr::AuthStore} (thanks Nick Plante).
|
14
22
|
* Added {Spidr::Agent#post_page} (thanks Nick Plante).
|
15
|
-
* Renamed Spidr::Agent#get_session to {Spidr::SessionCache#[]}.
|
16
|
-
* Renamed Spidr::Agent#kill_session to {Spidr::SessionCache#kill!}.
|
23
|
+
* Renamed `Spidr::Agent#get_session` to {Spidr::SessionCache#[]}.
|
24
|
+
* Renamed `Spidr::Agent#kill_session` to {Spidr::SessionCache#kill!}.
|
17
25
|
|
18
|
-
|
26
|
+
### 0.2.1 / 2009-11-25
|
19
27
|
|
20
28
|
* Added {Spidr::Events#every_ok_page}.
|
21
29
|
* Added {Spidr::Events#every_redirect_page}.
|
@@ -44,9 +52,9 @@
|
|
44
52
|
* Added {Spidr::Events#every_zip_page}.
|
45
53
|
* Fixed a bug where {Spidr::Agent#delay} was not being used to delay
|
46
54
|
requesting pages.
|
47
|
-
* Spider
|
55
|
+
* Spider `link` and `script` tags in HTML pages (thanks Nick Plante).
|
48
56
|
|
49
|
-
|
57
|
+
### 0.2.0 / 2009-10-10
|
50
58
|
|
51
59
|
* Added {URI.expand_path}.
|
52
60
|
* Added {Spidr::Page#search}.
|
@@ -54,16 +62,16 @@
|
|
54
62
|
* Added {Spidr::Page#title}.
|
55
63
|
* Added {Spidr::Agent#failures=}.
|
56
64
|
* Added a HTTP session cache to {Spidr::Agent}, per suggestion of falter.
|
57
|
-
* Added Spidr::Agent#get_session
|
58
|
-
* Added Spidr::Agent#kill_session
|
65
|
+
* Added `Spidr::Agent#get_session`.
|
66
|
+
* Added `Spidr::Agent#kill_session`.
|
59
67
|
* Added {Spidr.proxy=}.
|
60
68
|
* Added {Spidr.disable_proxy!}.
|
61
|
-
* Aliased Spidr::Page#txt
|
62
|
-
* Aliased Spidr::Page#ok
|
63
|
-
* Aliased Spidr::Page#redirect
|
64
|
-
* Aliased Spidr::Page#unauthorized
|
65
|
-
* Aliased Spidr::Page#forbidden
|
66
|
-
* Aliased Spidr::Page#missing
|
69
|
+
* Aliased `Spidr::Page#txt?` to {Spidr::Page#plain_text?}.
|
70
|
+
* Aliased `Spidr::Page#ok?` to {Spidr::Page#is_ok?}.
|
71
|
+
* Aliased `Spidr::Page#redirect?` to {Spidr::Page#is_redirect?}.
|
72
|
+
* Aliased `Spidr::Page#unauthorized?` to {Spidr::Page#is_unauthorized?}.
|
73
|
+
* Aliased `Spidr::Page#forbidden?` to {Spidr::Page#is_forbidden?}.
|
74
|
+
* Aliased `Spidr::Page#missing?` to {Spidr::Page#is_missing?}.
|
67
75
|
* Split URL filtering code out of {Spidr::Agent} and into
|
68
76
|
{Spidr::Filters}.
|
69
77
|
* Split URL / Page event code out of {Spidr::Agent} and into
|
@@ -71,11 +79,11 @@
|
|
71
79
|
* Split pause! / continue! / skip_link! / skip_page! methods out of
|
72
80
|
{Spidr::Agent} and into {Spidr::Actions}.
|
73
81
|
* Fixed a bug in {Spidr::Page#code}, where it was not returning an Integer.
|
74
|
-
* Make sure {Spidr::Page#doc} returns Nokogiri::XML::Document objects for
|
82
|
+
* Make sure {Spidr::Page#doc} returns `Nokogiri::XML::Document` objects for
|
75
83
|
RSS/RDF/Atom pages as well.
|
76
84
|
* Fixed the handling of the Location header in {Spidr::Page#links}
|
77
85
|
(thanks falter).
|
78
|
-
* Fixed a bug in {Spidr::Page#to_absolute} where trailing
|
86
|
+
* Fixed a bug in {Spidr::Page#to_absolute} where trailing `/` characters on
|
79
87
|
URI paths were not being preserved (thanks falter).
|
80
88
|
* Fixed a bug where the URI query was not being sent with the request
|
81
89
|
in {Spidr::Agent#get_page} (thanks Damian Steer).
|
@@ -86,17 +94,17 @@
|
|
86
94
|
* Switched {Spidr::Agent#failures} to a Set.
|
87
95
|
* Allow a block to be passed to {Spidr::Agent#run}, which will receive all
|
88
96
|
pages visited.
|
89
|
-
* Allow Spidr::Agent#start_at and Spidr::Agent#continue
|
97
|
+
* Allow `Spidr::Agent#start_at` and `Spidr::Agent#continue!` to pass blocks
|
90
98
|
to {Spidr::Agent#run}.
|
91
99
|
* Made {Spidr::Agent#visit_page} public.
|
92
100
|
* Moved to YARD based documentation.
|
93
101
|
|
94
|
-
|
102
|
+
### 0.1.9 / 2009-06-13
|
95
103
|
|
96
104
|
* Upgraded to Hoe 2.0.0.
|
97
105
|
* Use Hoe.spec instead of Hoe.new.
|
98
106
|
* Use the Hoe signing task for signed gems.
|
99
|
-
* Added the Spidr::Agent#schemes and Spidr::Agent#schemes
|
107
|
+
* Added the `Spidr::Agent#schemes` and `Spidr::Agent#schemes=` methods.
|
100
108
|
* Added a warning message if 'net/https' cannot be loaded.
|
101
109
|
* Allow the list of acceptable URL schemes to be passed into
|
102
110
|
{Spidr::Agent#initialize}.
|
@@ -108,10 +116,10 @@
|
|
108
116
|
could not be loaded.
|
109
117
|
* Removed Spidr::Agent::SCHEMES.
|
110
118
|
|
111
|
-
|
119
|
+
### 0.1.8 / 2009-05-27
|
112
120
|
|
113
|
-
* Added the Spidr::Agent#pause
|
114
|
-
* Added the Spidr::Agent#running
|
121
|
+
* Added the `Spidr::Agent#pause!` and `Spidr::Agent#continue!` methods.
|
122
|
+
* Added the `Spidr::Agent#running?` and `Spidr::Agent#paused?` methods.
|
115
123
|
* Added an alias for pending_urls to the queue methods.
|
116
124
|
* Added {Spidr::Agent#queue} to provide read access to the queue.
|
117
125
|
* Added {Spidr::Agent#queue=} and {Spidr::Agent#history=} for setting the
|
@@ -121,49 +129,49 @@
|
|
121
129
|
* Made {Spidr::Agent#enqueue} and {Spidr::Agent#queued?} public.
|
122
130
|
* Added more specs.
|
123
131
|
|
124
|
-
|
132
|
+
### 0.1.7 / 2009-04-24
|
125
133
|
|
126
|
-
* Added Spidr::Agent#all_headers
|
127
|
-
* Fixed a bug where Page#headers was always
|
134
|
+
* Added `Spidr::Agent#all_headers`.
|
135
|
+
* Fixed a bug where {Spidr::Page#headers} was always `nil`.
|
128
136
|
* {Spidr::Spidr::Agent} will now follow the Location header in HTTP 300,
|
129
137
|
301, 302, 303 and 307 Redirects.
|
130
138
|
* {Spidr::Agent} will now follow iframe and frame tags.
|
131
139
|
|
132
|
-
|
140
|
+
### 0.1.6 / 2009-04-14
|
133
141
|
|
134
142
|
* Added {Spidr::Agent#failures}, a list of URLs which could not be visited.
|
135
143
|
* Added {Spidr::Agent#failed?}.
|
136
|
-
* Added Spidr::Agent#every_failed_url
|
144
|
+
* Added `Spidr::Agent#every_failed_url`.
|
137
145
|
* Added {Spidr::Agent#clear}, which clears the history and failures URL
|
138
146
|
lists.
|
139
147
|
* Improved fault tolerance in {Spidr::Agent#get_page}.
|
140
148
|
* If a Network or HTTP error is encountered, the URL will be added to
|
141
149
|
the failures list and the next URL will be visited.
|
142
|
-
* Fixed a typo in Spidr::Agent#ignore_exts_like
|
150
|
+
* Fixed a typo in `Spidr::Agent#ignore_exts_like`.
|
143
151
|
* Updated the Web Spider Obstacle Course with links that always fail to be
|
144
152
|
visited.
|
145
153
|
|
146
|
-
|
154
|
+
### 0.1.5 / 2009-03-22
|
147
155
|
|
148
|
-
* Catch malformed URIs in {Spidr::Page#to_absolute} and return
|
149
|
-
* Filter out
|
156
|
+
* Catch malformed URIs in {Spidr::Page#to_absolute} and return `nil`.
|
157
|
+
* Filter out `nil` URIs in {Spidr::Page#urls}.
|
150
158
|
|
151
|
-
|
159
|
+
### 0.1.4 / 2009-01-15
|
152
160
|
|
153
161
|
* Use Nokogiri for HTML and XML parsing.
|
154
162
|
|
155
|
-
|
163
|
+
### 0.1.3 / 2009-01-10
|
156
164
|
|
157
|
-
* Added the
|
165
|
+
* Added the `:host` options to {Spidr::Agent#initialize}.
|
158
166
|
* Added the Web Spider Obstacle Course files to the Manifest.
|
159
167
|
* Aliased {Spidr::Agent#visited_urls} to {Spidr::Agent#history}.
|
160
168
|
|
161
|
-
|
169
|
+
### 0.1.2 / 2008-11-06
|
162
170
|
|
163
171
|
* Fixed a bug in {Spidr::Page#to_absolute} where URLs with no path were not
|
164
|
-
receiving a default path of
|
172
|
+
receiving a default path of `/`.
|
165
173
|
* Fixed a bug in {Spidr::Page#to_absolute} where URL paths were not being
|
166
|
-
expanded, in order to remove
|
174
|
+
expanded, in order to remove `..` and `.` directories.
|
167
175
|
* Fixed a bug where absolute URLs could have a blank path, thus causing
|
168
176
|
{Spidr::Agent#get_page} to crash when it performed the HTTP request.
|
169
177
|
* Added RSpec spec tests.
|
@@ -171,12 +179,12 @@
|
|
171
179
|
(http://spidr.rubyforge.org/course/start.html) which is used in the spec
|
172
180
|
tests.
|
173
181
|
|
174
|
-
|
182
|
+
### 0.1.1 / 2008-10-04
|
175
183
|
|
176
184
|
* Added a reader method for the response instance variable in Page.
|
177
185
|
* Fixed a bug in {Spidr::Page#method_missing}.
|
178
186
|
|
179
|
-
|
187
|
+
### 0.1.0 / 2008-05-23
|
180
188
|
|
181
189
|
* Initial release.
|
182
190
|
* Black-list or white-list URLs based upon:
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
Copyright (c) 2008-2010 Hal Brodigan
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
'Software'), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,18 +1,18 @@
|
|
1
|
-
|
1
|
+
# Spidr
|
2
2
|
|
3
|
-
* http://spidr.rubyforge.org
|
4
|
-
* http://github.com/postmodern/spidr
|
5
|
-
* http://github.com/postmodern/spidr/issues
|
6
|
-
* http://groups.google.com/group/spidr
|
3
|
+
* [spidr.rubyforge.org](http://spidr.rubyforge.org/)
|
4
|
+
* [github.com/postmodern/spidr](http://github.com/postmodern/spidr)
|
5
|
+
* [github.com/postmodern/spidr/issues](http://github.com/postmodern/spidr/issues)
|
6
|
+
* [groups.google.com/group/spidr](http://groups.google.com/group/spidr)
|
7
7
|
* irc.freenode.net #spidr
|
8
8
|
|
9
|
-
|
9
|
+
## Description
|
10
10
|
|
11
11
|
Spidr is a versatile Ruby web spidering library that can spider a site,
|
12
12
|
multiple domains, certain links or infinitely. Spidr is designed to be fast
|
13
13
|
and easy to use.
|
14
14
|
|
15
|
-
|
15
|
+
## Features
|
16
16
|
|
17
17
|
* Follows:
|
18
18
|
* a tags.
|
@@ -31,6 +31,7 @@ and easy to use.
|
|
31
31
|
* Every visited Page.
|
32
32
|
* Every visited URL.
|
33
33
|
* Every visited URL that matches a specified pattern.
|
34
|
+
* Every origin and destination URI of a link.
|
34
35
|
* Every URL that failed to be visited.
|
35
36
|
* Provides action methods to:
|
36
37
|
* Pause spidering.
|
@@ -39,22 +40,23 @@ and easy to use.
|
|
39
40
|
* Restore the spidering queue and history from a previous session.
|
40
41
|
* Custom User-Agent strings.
|
41
42
|
* Custom proxy settings.
|
43
|
+
* HTTPS support.
|
42
44
|
|
43
|
-
|
45
|
+
## Examples
|
44
46
|
|
45
|
-
|
47
|
+
Start spidering from a URL:
|
46
48
|
|
47
49
|
Spidr.start_at('http://tenderlovemaking.com/')
|
48
50
|
|
49
|
-
|
51
|
+
Spider a host:
|
50
52
|
|
51
53
|
Spidr.host('coderrr.wordpress.com')
|
52
54
|
|
53
|
-
|
55
|
+
Spider a site:
|
54
56
|
|
55
57
|
Spidr.site('http://rubyflow.com/')
|
56
58
|
|
57
|
-
|
59
|
+
Spider multiple hosts:
|
58
60
|
|
59
61
|
Spidr.start_at(
|
60
62
|
'http://company.com/',
|
@@ -64,30 +66,56 @@ and easy to use.
|
|
64
66
|
]
|
65
67
|
)
|
66
68
|
|
67
|
-
|
69
|
+
Do not spider certain links:
|
68
70
|
|
69
71
|
Spidr.site('http://matasano.com/', :ignore_links => [/log/])
|
70
72
|
|
71
|
-
|
73
|
+
Do not spider links on certain ports:
|
72
74
|
|
73
75
|
Spidr.site(
|
74
76
|
'http://sketchy.content.com/',
|
75
77
|
:ignore_ports => [8000, 8010, 8080]
|
76
78
|
)
|
77
79
|
|
78
|
-
|
80
|
+
Print out visited URLs:
|
79
81
|
|
80
82
|
Spidr.site('http://rubyinside.org/') do |spider|
|
81
83
|
spider.every_url { |url| puts url }
|
82
84
|
end
|
83
85
|
|
84
|
-
|
86
|
+
Build a URL map of a site:
|
87
|
+
|
88
|
+
url_map = Hash.new { |hash,key| hash[key] = [] }
|
89
|
+
|
90
|
+
Spidr.site('http://intranet.com/') do |spider|
|
91
|
+
spider.every_link do |origin,dest|
|
92
|
+
url_map[dest] << origin
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
Print out the URLs that could not be requested:
|
85
97
|
|
86
98
|
Spidr.site('http://sketchy.content.com/') do |spider|
|
87
99
|
spider.every_failed_url { |url| puts url }
|
88
100
|
end
|
89
101
|
|
90
|
-
|
102
|
+
Finds all pages which have broken links:
|
103
|
+
|
104
|
+
url_map = Hash.new { |hash,key| hash[key] = [] }
|
105
|
+
|
106
|
+
spider = Spidr.site('http://intranet.com/') do |spider|
|
107
|
+
spider.every_link do |origin,dest|
|
108
|
+
url_map[dest] << origin
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
spider.failures.each do |url|
|
113
|
+
puts "Broken link #{url} found in:"
|
114
|
+
|
115
|
+
url_map[url].each { |page| puts " #{page}" }
|
116
|
+
end
|
117
|
+
|
118
|
+
Search HTML and XML pages:
|
91
119
|
|
92
120
|
Spidr.site('http://company.withablog.com/') do |spider|
|
93
121
|
spider.every_page do |page|
|
@@ -98,11 +126,11 @@ and easy to use.
|
|
98
126
|
value = meta.attributes['content']
|
99
127
|
|
100
128
|
puts " #{name} = #{value}"
|
101
|
-
|
129
|
+
end
|
102
130
|
end
|
103
131
|
end
|
104
132
|
|
105
|
-
|
133
|
+
Print out the titles from every page:
|
106
134
|
|
107
135
|
Spidr.site('http://www.rubypulse.com/') do |spider|
|
108
136
|
spider.every_html_page do |page|
|
@@ -110,7 +138,7 @@ and easy to use.
|
|
110
138
|
end
|
111
139
|
end
|
112
140
|
|
113
|
-
|
141
|
+
Find what kinds of web servers a host is using, by accessing the headers:
|
114
142
|
|
115
143
|
servers = Set[]
|
116
144
|
|
@@ -120,7 +148,7 @@ and easy to use.
|
|
120
148
|
end
|
121
149
|
end
|
122
150
|
|
123
|
-
|
151
|
+
Pause the spider on a forbidden page:
|
124
152
|
|
125
153
|
spider = Spidr.host('overnight.startup.com') do |spider|
|
126
154
|
spider.every_forbidden_page do |page|
|
@@ -128,7 +156,7 @@ and easy to use.
|
|
128
156
|
end
|
129
157
|
end
|
130
158
|
|
131
|
-
|
159
|
+
Skip the processing of a page:
|
132
160
|
|
133
161
|
Spidr.host('sketchy.content.com') do |spider|
|
134
162
|
spider.every_missing_page do |page|
|
@@ -136,7 +164,7 @@ and easy to use.
|
|
136
164
|
end
|
137
165
|
end
|
138
166
|
|
139
|
-
|
167
|
+
Skip the processing of links:
|
140
168
|
|
141
169
|
Spidr.host('sketchy.content.com') do |spider|
|
142
170
|
spider.every_url do |url|
|
@@ -146,35 +174,15 @@ and easy to use.
|
|
146
174
|
end
|
147
175
|
end
|
148
176
|
|
149
|
-
|
150
|
-
|
151
|
-
* {nokogiri}[http://nokogiri.rubyforge.org/] >= 1.2.0
|
152
|
-
|
153
|
-
== INSTALL:
|
154
|
-
|
155
|
-
$ sudo gem install spidr
|
177
|
+
## Requirements
|
156
178
|
|
157
|
-
|
179
|
+
* [nokogiri](http://nokogiri.rubyforge.org/) >= 1.2.0
|
158
180
|
|
159
|
-
|
181
|
+
## Install
|
160
182
|
|
161
|
-
|
183
|
+
$ sudo gem install spidr
|
162
184
|
|
163
|
-
|
164
|
-
a copy of this software and associated documentation files (the
|
165
|
-
'Software'), to deal in the Software without restriction, including
|
166
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
167
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
168
|
-
permit persons to whom the Software is furnished to do so, subject to
|
169
|
-
the following conditions:
|
185
|
+
## License
|
170
186
|
|
171
|
-
|
172
|
-
included in all copies or substantial portions of the Software.
|
187
|
+
See {file:LICENSE.txt} for license information.
|
173
188
|
|
174
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
175
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
176
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
177
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
178
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
179
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
180
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
CHANGED
@@ -1,29 +1,43 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require './tasks/spec.rb'
|
7
|
-
require './tasks/yard.rb'
|
2
|
+
require 'rake'
|
3
|
+
require './lib/spidr/version.rb'
|
8
4
|
|
9
|
-
|
10
|
-
|
5
|
+
begin
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = 'spidr'
|
9
|
+
gem.version = Spidr::VERSION
|
10
|
+
gem.summary = %Q{A versatile Ruby web spidering library}
|
11
|
+
gem.description = %Q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
12
|
+
gem.email = 'postmodern.mod3@gmail.com'
|
13
|
+
gem.homepage = 'http://github.com/postmodern/spidr'
|
14
|
+
gem.authors = ['Postmodern']
|
15
|
+
gem.add_dependency 'nokogiri', '>= 1.2.0'
|
16
|
+
gem.add_development_dependency 'rspec', '>= 1.3.0'
|
17
|
+
gem.add_development_dependency 'yard', '>= 0.5.3'
|
18
|
+
gem.add_development_dependency 'wsoc', '>= 0.1.1'
|
19
|
+
gem.has_rdoc = 'yard'
|
20
|
+
end
|
21
|
+
rescue LoadError
|
22
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
|
+
end
|
11
24
|
|
12
|
-
|
13
|
-
|
14
|
-
|
25
|
+
require 'spec/rake/spectask'
|
26
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
27
|
+
spec.libs += ['lib', 'spec']
|
28
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
29
|
+
spec.spec_opts = ['--options', '.specopts']
|
30
|
+
end
|
15
31
|
|
16
|
-
|
17
|
-
|
18
|
-
]
|
32
|
+
task :spec => :check_dependencies
|
33
|
+
task :default => :spec
|
19
34
|
|
20
|
-
|
21
|
-
|
22
|
-
['yard', '>=0.4.0'],
|
23
|
-
['wsoc', '>=0.1.1']
|
24
|
-
]
|
35
|
+
begin
|
36
|
+
require 'yard'
|
25
37
|
|
26
|
-
|
38
|
+
YARD::Rake::YardocTask.new
|
39
|
+
rescue LoadError
|
40
|
+
task :yard do
|
41
|
+
abort "YARD is not available. In order to run yard, you must: gem install yard"
|
42
|
+
end
|
27
43
|
end
|
28
|
-
|
29
|
-
# vim: syntax=Ruby
|