spidr 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.rdoc +191 -0
- data/Manifest.txt +10 -34
- data/{README.txt → README.rdoc} +3 -1
- data/Rakefile +6 -4
- data/lib/spidr/agent.rb +137 -97
- data/lib/spidr/auth_credential.rb +25 -0
- data/lib/spidr/auth_store.rb +157 -0
- data/lib/spidr/cookie_jar.rb +166 -0
- data/lib/spidr/filters.rb +2 -0
- data/lib/spidr/page.rb +75 -11
- data/lib/spidr/sanitizers.rb +59 -0
- data/lib/spidr/session_cache.rb +119 -0
- data/lib/spidr/version.rb +1 -1
- data/spec/agent_spec.rb +2 -2
- data/spec/helpers/history.rb +34 -0
- data/spec/helpers/wsoc.rb +83 -0
- data/spec/page_examples.rb +5 -1
- data/spec/page_spec.rb +30 -0
- data/spec/sanitizers_spec.rb +67 -0
- data/tasks/yard.rb +1 -1
- metadata +24 -40
- metadata.gz.sig +0 -0
- data/History.txt +0 -167
- data/spec/helpers/course.rb +0 -95
- data/static/course/absolute/index.html +0 -10
- data/static/course/absolute/next.html +0 -9
- data/static/course/absolute/start.html +0 -19
- data/static/course/empty/index.html +0 -10
- data/static/course/empty/start.html +0 -23
- data/static/course/fail.html +0 -14
- data/static/course/frames/frame.html +0 -15
- data/static/course/frames/frame_next.html +0 -9
- data/static/course/frames/iframe.html +0 -15
- data/static/course/frames/iframe_next.html +0 -9
- data/static/course/frames/index.html +0 -10
- data/static/course/frames/start.html +0 -15
- data/static/course/index.html +0 -10
- data/static/course/javascript/index.html +0 -10
- data/static/course/javascript/start.html +0 -19
- data/static/course/loop/index.html +0 -10
- data/static/course/loop/next.html +0 -13
- data/static/course/loop/start.html +0 -19
- data/static/course/relative/current_directory.html +0 -9
- data/static/course/relative/index.html +0 -10
- data/static/course/relative/normal.html +0 -9
- data/static/course/relative/same_directory.html +0 -9
- data/static/course/relative/start.html +0 -27
- data/static/course/remote/index.html +0 -10
- data/static/course/remote/next.html +0 -9
- data/static/course/remote/start.html +0 -27
- data/static/course/scripts/course.js +0 -29
- data/static/course/scripts/jquery-1.2.6.min.js +0 -32
- data/static/course/specs.json +0 -1
- data/static/course/start.html +0 -27
- data/tasks/course.rb +0 -63
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: spidr
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Postmodern
|
|
@@ -30,7 +30,7 @@ cert_chain:
|
|
|
30
30
|
pDj+ws7QjtH/Qcrr1l9jfN0ehDs=
|
|
31
31
|
-----END CERTIFICATE-----
|
|
32
32
|
|
|
33
|
-
date:
|
|
33
|
+
date: 2010-01-06 00:00:00 -08:00
|
|
34
34
|
default_executable:
|
|
35
35
|
dependencies:
|
|
36
36
|
- !ruby/object:Gem::Dependency
|
|
@@ -63,6 +63,16 @@ dependencies:
|
|
|
63
63
|
- !ruby/object:Gem::Version
|
|
64
64
|
version: 0.4.0
|
|
65
65
|
version:
|
|
66
|
+
- !ruby/object:Gem::Dependency
|
|
67
|
+
name: wsoc
|
|
68
|
+
type: :development
|
|
69
|
+
version_requirement:
|
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: 0.1.1
|
|
75
|
+
version:
|
|
66
76
|
- !ruby/object:Gem::Dependency
|
|
67
77
|
name: hoe
|
|
68
78
|
type: :development
|
|
@@ -71,7 +81,7 @@ dependencies:
|
|
|
71
81
|
requirements:
|
|
72
82
|
- - ">="
|
|
73
83
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: 2.
|
|
84
|
+
version: 2.4.0
|
|
75
85
|
version:
|
|
76
86
|
description: |-
|
|
77
87
|
Spidr is a versatile Ruby web spidering library that can spider a site,
|
|
@@ -84,18 +94,17 @@ executables: []
|
|
|
84
94
|
extensions: []
|
|
85
95
|
|
|
86
96
|
extra_rdoc_files:
|
|
87
|
-
- History.txt
|
|
88
97
|
- Manifest.txt
|
|
89
|
-
- README.txt
|
|
90
98
|
files:
|
|
91
|
-
- History.
|
|
99
|
+
- History.rdoc
|
|
92
100
|
- Manifest.txt
|
|
93
|
-
- README.
|
|
101
|
+
- README.rdoc
|
|
94
102
|
- Rakefile
|
|
95
103
|
- lib/spidr.rb
|
|
96
104
|
- lib/spidr/extensions.rb
|
|
97
105
|
- lib/spidr/extensions/uri.rb
|
|
98
106
|
- lib/spidr/page.rb
|
|
107
|
+
- lib/spidr/sanitizers.rb
|
|
99
108
|
- lib/spidr/rules.rb
|
|
100
109
|
- lib/spidr/filters.rb
|
|
101
110
|
- lib/spidr/events.rb
|
|
@@ -106,53 +115,28 @@ files:
|
|
|
106
115
|
- lib/spidr/actions/exceptions/skip_link.rb
|
|
107
116
|
- lib/spidr/actions/exceptions/skip_page.rb
|
|
108
117
|
- lib/spidr/actions/actions.rb
|
|
118
|
+
- lib/spidr/session_cache.rb
|
|
119
|
+
- lib/spidr/cookie_jar.rb
|
|
120
|
+
- lib/spidr/auth_credential.rb
|
|
121
|
+
- lib/spidr/auth_store.rb
|
|
109
122
|
- lib/spidr/agent.rb
|
|
110
123
|
- lib/spidr/spidr.rb
|
|
111
124
|
- lib/spidr/version.rb
|
|
112
125
|
- tasks/spec.rb
|
|
113
126
|
- tasks/yard.rb
|
|
114
|
-
- tasks/course.rb
|
|
115
127
|
- spec/spec_helper.rb
|
|
116
|
-
- spec/helpers/
|
|
128
|
+
- spec/helpers/history.rb
|
|
129
|
+
- spec/helpers/wsoc.rb
|
|
117
130
|
- spec/helpers/page.rb
|
|
118
131
|
- spec/extensions/uri_spec.rb
|
|
119
132
|
- spec/page_examples.rb
|
|
120
133
|
- spec/page_spec.rb
|
|
121
134
|
- spec/rules_spec.rb
|
|
135
|
+
- spec/sanitizers_spec.rb
|
|
122
136
|
- spec/filters_spec.rb
|
|
123
137
|
- spec/actions_spec.rb
|
|
124
138
|
- spec/agent_spec.rb
|
|
125
139
|
- spec/spidr_spec.rb
|
|
126
|
-
- static/course/index.html
|
|
127
|
-
- static/course/start.html
|
|
128
|
-
- static/course/fail.html
|
|
129
|
-
- static/course/scripts/jquery-1.2.6.min.js
|
|
130
|
-
- static/course/scripts/course.js
|
|
131
|
-
- static/course/empty/index.html
|
|
132
|
-
- static/course/empty/start.html
|
|
133
|
-
- static/course/javascript/index.html
|
|
134
|
-
- static/course/javascript/start.html
|
|
135
|
-
- static/course/loop/index.html
|
|
136
|
-
- static/course/loop/start.html
|
|
137
|
-
- static/course/loop/next.html
|
|
138
|
-
- static/course/relative/index.html
|
|
139
|
-
- static/course/relative/start.html
|
|
140
|
-
- static/course/relative/normal.html
|
|
141
|
-
- static/course/relative/current_directory.html
|
|
142
|
-
- static/course/relative/same_directory.html
|
|
143
|
-
- static/course/absolute/index.html
|
|
144
|
-
- static/course/absolute/start.html
|
|
145
|
-
- static/course/absolute/next.html
|
|
146
|
-
- static/course/remote/index.html
|
|
147
|
-
- static/course/remote/start.html
|
|
148
|
-
- static/course/remote/next.html
|
|
149
|
-
- static/course/frames/index.html
|
|
150
|
-
- static/course/frames/start.html
|
|
151
|
-
- static/course/frames/iframe.html
|
|
152
|
-
- static/course/frames/iframe_next.html
|
|
153
|
-
- static/course/frames/frame.html
|
|
154
|
-
- static/course/frames/frame_next.html
|
|
155
|
-
- static/course/specs.json
|
|
156
140
|
has_rdoc: yard
|
|
157
141
|
homepage: http://spidr.rubyforge.org
|
|
158
142
|
licenses: []
|
|
@@ -160,7 +144,7 @@ licenses: []
|
|
|
160
144
|
post_install_message:
|
|
161
145
|
rdoc_options:
|
|
162
146
|
- --main
|
|
163
|
-
- README.
|
|
147
|
+
- README.rdoc
|
|
164
148
|
require_paths:
|
|
165
149
|
- lib
|
|
166
150
|
required_ruby_version: !ruby/object:Gem::Requirement
|
metadata.gz.sig
CHANGED
|
Binary file
|
data/History.txt
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
=== 0.2.1 / 2009-11-25
|
|
2
|
-
|
|
3
|
-
* Added Spidr::Events#every_ok_page.
|
|
4
|
-
* Added Spidr::Events#every_redirect_page.
|
|
5
|
-
* Added Spidr::Events#every_timedout_page.
|
|
6
|
-
* Added Spidr::Events#every_bad_request_page.
|
|
7
|
-
* Added Spidr::Events#every_unauthorized_page.
|
|
8
|
-
* Added Spidr::Events#every_forbidden_page.
|
|
9
|
-
* Added Spidr::Events#every_missing_page.
|
|
10
|
-
* Added Spidr::Events#every_internal_server_error_page.
|
|
11
|
-
* Added Spidr::Events#every_txt_page.
|
|
12
|
-
* Added Spidr::Events#every_html_page.
|
|
13
|
-
* Added Spidr::Events#every_xml_page.
|
|
14
|
-
* Added Spidr::Events#every_xsl_page.
|
|
15
|
-
* Added Spidr::Events#every_doc.
|
|
16
|
-
* Added Spidr::Events#every_html_doc.
|
|
17
|
-
* Added Spidr::Events#every_xml_doc.
|
|
18
|
-
* Added Spidr::Events#every_xsl_doc.
|
|
19
|
-
* Added Spidr::Events#every_rss_doc.
|
|
20
|
-
* Added Spidr::Events#every_atom_doc.
|
|
21
|
-
* Added Spidr::Events#every_javascript_page.
|
|
22
|
-
* Added Spidr::Events#every_css_page.
|
|
23
|
-
* Added Spidr::Events#every_rss_page.
|
|
24
|
-
* Added Spidr::Events#every_atom_page.
|
|
25
|
-
* Added Spidr::Events#every_ms_word_page.
|
|
26
|
-
* Added Spidr::Events#every_pdf_page.
|
|
27
|
-
* Added Spidr::Events#every_zip_page.
|
|
28
|
-
* Fixed a bug where Spidr::Agent#delay was not being used to delay
|
|
29
|
-
requesting pages.
|
|
30
|
-
* Spider +link+ and +script+ tags in HTML pages (thanks Nick Plante).
|
|
31
|
-
|
|
32
|
-
=== 0.2.0 / 2009-10-10
|
|
33
|
-
|
|
34
|
-
* Added URI.expand_path.
|
|
35
|
-
* Added Spidr::Page#search.
|
|
36
|
-
* Added Spidr::Page#at.
|
|
37
|
-
* Added Spidr::Page#title.
|
|
38
|
-
* Added Spidr::Agent#failures=.
|
|
39
|
-
* Added a HTTP session cache to Spidr::Agent, per suggestion of falter.
|
|
40
|
-
* Added Spidr::Agent#get_session.
|
|
41
|
-
* Added Spidr::Agent#kill_session.
|
|
42
|
-
* Added Spidr.proxy=.
|
|
43
|
-
* Added Spidr.disable_proxy!.
|
|
44
|
-
* Aliased Spidr::Page#txt? to Spidr::Page#plain_text?.
|
|
45
|
-
* Aliased Spidr::Page#ok? to Spidr::Page#is_ok?.
|
|
46
|
-
* Aliased Spidr::Page#redirect? to Spidr::Page#is_redirect?.
|
|
47
|
-
* Aliased Spidr::Page#unauthorized? to Spidr::Page#is_unauthorized?.
|
|
48
|
-
* Aliased Spidr::Page#forbidden? to Spidr::Page#is_forbidden?.
|
|
49
|
-
* Aliased Spidr::Page#missing? to Spidr::Page#is_missing?.
|
|
50
|
-
* Split URL filtering code out of Spidr::Agent and into Spidr::Filtering.
|
|
51
|
-
* Split URL / Page event code out of Spidr::Agent and into Spidr::Events.
|
|
52
|
-
* Split pause! / continue! / skip_link! / skip_page! methods out of
|
|
53
|
-
Spidr::Agent and into Spidr::Actions.
|
|
54
|
-
* Fixed a bug in Spidr::Page#code, where it was not returning an Integer.
|
|
55
|
-
* Make sure Spidr::Page#doc returns Nokogiri::XML::Document objects for
|
|
56
|
-
RSS/RDF/Atom pages as well.
|
|
57
|
-
* Fixed the handling of the Location header in Spidr::Page#links
|
|
58
|
-
(thanks falter).
|
|
59
|
-
* Fixed a bug in Spidr::Page#to_absolute where trailing '/' characters on
|
|
60
|
-
URI paths were not being preserved (thanks falter).
|
|
61
|
-
* Fixed a bug where the URI query was not being sent with the request
|
|
62
|
-
in Spidr::Agent#get_page (thanks Damian Steer).
|
|
63
|
-
* Fixed a bug where SSL sessions were not being properly setup
|
|
64
|
-
(thanks falter).
|
|
65
|
-
* Switched Spidr::Agent#history to be a Set, to improve search-time
|
|
66
|
-
of the history (thanks falter).
|
|
67
|
-
* Switched Spidr::Agent#failures to a Set.
|
|
68
|
-
* Allow a block to be passed to Spidr::Agent#run, which will receive all
|
|
69
|
-
pages visited.
|
|
70
|
-
* Allow Spidr::Agent#start_at and Spidr::Agent#continue! to pass blocks to
|
|
71
|
-
Spidr::Agent#run.
|
|
72
|
-
* Made Spidr::Agent#visit_page public.
|
|
73
|
-
* Moved to YARD based documentation.
|
|
74
|
-
|
|
75
|
-
=== 0.1.9 / 2009-06-13
|
|
76
|
-
|
|
77
|
-
* Upgraded to Hoe 2.0.0.
|
|
78
|
-
* Use Hoe.spec instead of Hoe.new.
|
|
79
|
-
* Use the Hoe signing task for signed gems.
|
|
80
|
-
* Added the Agent#schemes and Agent#schemes= methods.
|
|
81
|
-
* Added a warning message if 'net/https' cannot be loaded.
|
|
82
|
-
* Allow the list of acceptable URL schemes to be passed into Agent.new.
|
|
83
|
-
* Allow history and queue information to be passed into Agent.new.
|
|
84
|
-
* Agent#start_at no longer clears the history or the queue.
|
|
85
|
-
* Fixed a bug in the sanitization of semi-escaped URLs.
|
|
86
|
-
* Fixed a bug where https URLs would be followed even if 'net/https'
|
|
87
|
-
could not be loaded.
|
|
88
|
-
* Removed Agent::SCHEMES.
|
|
89
|
-
|
|
90
|
-
=== 0.1.8 / 2009-05-27
|
|
91
|
-
|
|
92
|
-
* Added the Agent#pause! and Agent#continue! methods.
|
|
93
|
-
* Added the Agent#running? and Agent#paused? methods.
|
|
94
|
-
* Added an alias for pending_urls to the queue methods.
|
|
95
|
-
* Added Agent#queue to provide read access to the queue.
|
|
96
|
-
* Added Agent#queue= and Agent#history= for setting the queue and history.
|
|
97
|
-
* Added Agent#to_hash which returns a Hash of the agents queue and history.
|
|
98
|
-
* Made Agent#enqueue and Agent#queued? public.
|
|
99
|
-
* Added more specs.
|
|
100
|
-
|
|
101
|
-
=== 0.1.7 / 2009-04-24
|
|
102
|
-
|
|
103
|
-
* Added Agent#all_headers.
|
|
104
|
-
* Fixed a bug where Page#headers was always +nil+.
|
|
105
|
-
* Spidr::Agent will now follow the Location header in HTTP 300, 301, 302,
|
|
106
|
-
303 and 307 Redirects.
|
|
107
|
-
* Spidr::Agent will now follow iframe and frame tags.
|
|
108
|
-
|
|
109
|
-
=== 0.1.6 / 2009-04-14
|
|
110
|
-
|
|
111
|
-
* Added Agent#failures, a list of URLs which could not be visited.
|
|
112
|
-
* Added Agent#failed?.
|
|
113
|
-
* Added Agent#every_failed_url.
|
|
114
|
-
* Added Agent#clear, which clears the history and failures URL lists.
|
|
115
|
-
* Improved fault tolerance in Agent#get_page.
|
|
116
|
-
* If a Network or HTTP error is encountered, the URL will be added to
|
|
117
|
-
the failures list and the next URL will be visited.
|
|
118
|
-
* Fixed a typo in Agent#ignore_exts_like.
|
|
119
|
-
* Updated the Web Spider Obstacle Course with links that always fail to be
|
|
120
|
-
visited.
|
|
121
|
-
|
|
122
|
-
=== 0.1.5 / 2009-03-22
|
|
123
|
-
|
|
124
|
-
* Catch malformed URIs in Page#to_absolute and return +nil+.
|
|
125
|
-
* Filter out +nil+ URIs in Page#urls.
|
|
126
|
-
|
|
127
|
-
=== 0.1.4 / 2009-01-15
|
|
128
|
-
|
|
129
|
-
* Use Nokogiri for HTML and XML parsing.
|
|
130
|
-
|
|
131
|
-
=== 0.1.3 / 2009-01-10
|
|
132
|
-
|
|
133
|
-
* Added the :host options to Spidr::Agent#initialize.
|
|
134
|
-
* Added the Web Spider Obstacle Course files to the Manifest.
|
|
135
|
-
* Aliased Spidr::Agent#visited_urls to Spidr::Agent#history.
|
|
136
|
-
|
|
137
|
-
=== 0.1.2 / 2008-11-06
|
|
138
|
-
|
|
139
|
-
* Fixed a bug in Page#to_absolute where URLs with no path were not
|
|
140
|
-
receiving a default path of <tt>/</tt>.
|
|
141
|
-
* Fixed a bug in Page#to_absolute where URL paths were not being
|
|
142
|
-
expanded, in order to remove <tt>..</tt> and <tt>.</tt> directories.
|
|
143
|
-
* Fixed a bug where absolute URLs could have a blank path, thus causing
|
|
144
|
-
Agent#get_page to crash when it performed the HTTP request.
|
|
145
|
-
* Added RSpec spec tests.
|
|
146
|
-
* Created a Web-Spider Obstacle Course
|
|
147
|
-
(http://spidr.rubyforge.org/course/start.html) which is used in the spec
|
|
148
|
-
tests.
|
|
149
|
-
|
|
150
|
-
=== 0.1.1 / 2008-10-04
|
|
151
|
-
|
|
152
|
-
* Added a reader method for the response instance variable in Page.
|
|
153
|
-
* Fixed a bug in Page#method_missing.
|
|
154
|
-
|
|
155
|
-
=== 0.1.0 / 2008-05-23
|
|
156
|
-
|
|
157
|
-
* Initial release.
|
|
158
|
-
* Black-list or white-list URLs based upon:
|
|
159
|
-
* Host name
|
|
160
|
-
* Port number
|
|
161
|
-
* Full link
|
|
162
|
-
* URL extension
|
|
163
|
-
* Provides call-backs for:
|
|
164
|
-
* Every visited Page.
|
|
165
|
-
* Every visited URL.
|
|
166
|
-
* Every visited URL that matches a specified pattern.
|
|
167
|
-
|
data/spec/helpers/course.rb
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
require 'open-uri'
|
|
2
|
-
require 'json'
|
|
3
|
-
|
|
4
|
-
module Helpers
|
|
5
|
-
module Course
|
|
6
|
-
COURSE_URL = URI('http://spidr.rubyforge.org/course/start.html')
|
|
7
|
-
|
|
8
|
-
SPECS_URL = 'http://spidr.rubyforge.org/course/specs.json'
|
|
9
|
-
|
|
10
|
-
def self.included(base)
|
|
11
|
-
specs = JSON.parse(open(SPECS_URL).read)
|
|
12
|
-
|
|
13
|
-
if specs.kind_of?(Array)
|
|
14
|
-
specs.each do |spec|
|
|
15
|
-
message = spec['message'].to_s.dump
|
|
16
|
-
url = spec['url'].to_s.dump
|
|
17
|
-
|
|
18
|
-
case spec['behavior']
|
|
19
|
-
when 'follow'
|
|
20
|
-
base.module_eval %{
|
|
21
|
-
it #{message} do
|
|
22
|
-
should_visit_link(#{url})
|
|
23
|
-
end
|
|
24
|
-
}
|
|
25
|
-
when 'nofollow'
|
|
26
|
-
base.module_eval %{
|
|
27
|
-
it #{message} do
|
|
28
|
-
should_visit_once(#{url})
|
|
29
|
-
end
|
|
30
|
-
}
|
|
31
|
-
when 'fail'
|
|
32
|
-
base.module_eval %{
|
|
33
|
-
it #{message} do
|
|
34
|
-
should_fail_link(#{url})
|
|
35
|
-
end
|
|
36
|
-
}
|
|
37
|
-
else
|
|
38
|
-
link = spec['link'].to_s.dump
|
|
39
|
-
|
|
40
|
-
base.module_eval %{
|
|
41
|
-
it #{message} do
|
|
42
|
-
should_ignore_link(#{link})
|
|
43
|
-
should_ignore_link(#{url})
|
|
44
|
-
end
|
|
45
|
-
}
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def run_course
|
|
52
|
-
Agent.start_at(COURSE_URL,:hosts => [COURSE_URL.host]) do |agent|
|
|
53
|
-
agent.every_failed_url { |url| puts "[FAILED] #{url}" }
|
|
54
|
-
agent.every_url { |url| puts url }
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def visited_once?(link)
|
|
59
|
-
url = COURSE_URL.merge(URI.encode(link))
|
|
60
|
-
|
|
61
|
-
return @agent.visited_urls.select { |visited_url|
|
|
62
|
-
visited_url == url
|
|
63
|
-
}.length == 1
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
#
|
|
67
|
-
# Returns +true+ if the agent has visited the specified _link_, returns
|
|
68
|
-
# +false+ otherwise.
|
|
69
|
-
#
|
|
70
|
-
def visited_link?(link)
|
|
71
|
-
@agent.visited?(COURSE_URL.merge(URI.encode(link)))
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def visit_failed?(link)
|
|
75
|
-
@agent.failed?(COURSE_URL.merge(URI.encode(link)))
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
def should_visit_link(link)
|
|
79
|
-
visited_link?(link).should == true
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
def should_ignore_link(link)
|
|
83
|
-
visited_link?(link).should == false
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def should_visit_once(link)
|
|
87
|
-
visited_once?(link).should == true
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
def should_fail_link(link)
|
|
91
|
-
visited_link?(link).should == false
|
|
92
|
-
visit_failed?(link).should == true
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
end
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
<html>
|
|
2
|
-
<head>
|
|
3
|
-
<title>Spidr :: Web-Spider Obstacle Course :: Empty Links</title>
|
|
4
|
-
<script type="text/javascript" src="../scripts/jquery-1.2.6.min.js"></script>
|
|
5
|
-
<script type="text/javascript" src="../scripts/course.js"></script>
|
|
6
|
-
<script type="text/javascript">
|
|
7
|
-
fail();
|
|
8
|
-
</script>
|
|
9
|
-
</head>
|
|
10
|
-
</html>
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
<html>
|
|
2
|
-
<head>
|
|
3
|
-
<title>Spidr :: Web-Spider Obstacle Course :: Absolute Links</title>
|
|
4
|
-
</head>
|
|
5
|
-
|
|
6
|
-
<body>
|
|
7
|
-
<p>Absolute links</p>
|
|
8
|
-
|
|
9
|
-
<ul>
|
|
10
|
-
<li class="nofollow">
|
|
11
|
-
<a href="/course/absolute/start.html">should not follow absolute links to the current page</a>
|
|
12
|
-
</li>
|
|
13
|
-
|
|
14
|
-
<li class="follow">
|
|
15
|
-
<a href="/course/absolute/next.html">should follow absolute links to unvisited pages</a>
|
|
16
|
-
</li>
|
|
17
|
-
</ul>
|
|
18
|
-
</body>
|
|
19
|
-
</html>
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
<html>
|
|
2
|
-
<head>
|
|
3
|
-
<title>Spidr :: Web-Spider Obstacle Course :: Empty Links</title>
|
|
4
|
-
<script type="text/javascript" src="../scripts/jquery-1.2.6.min.js"></script>
|
|
5
|
-
<script type="text/javascript" src="../scripts/course.js"></script>
|
|
6
|
-
<script type="text/javascript">
|
|
7
|
-
fail();
|
|
8
|
-
</script>
|
|
9
|
-
</head>
|
|
10
|
-
</html>
|