spidr_epg 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +291 -0
- data/ChangeLog.md~ +291 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +49 -0
- data/Gemfile~ +16 -0
- data/LICENSE.txt +20 -0
- data/README.md +193 -0
- data/README.md~ +190 -0
- data/Rakefile +29 -0
- data/gemspec.yml +19 -0
- data/lib/spidr/actions/actions.rb +83 -0
- data/lib/spidr/actions/exceptions/action.rb +9 -0
- data/lib/spidr/actions/exceptions/paused.rb +11 -0
- data/lib/spidr/actions/exceptions/skip_link.rb +12 -0
- data/lib/spidr/actions/exceptions/skip_page.rb +12 -0
- data/lib/spidr/actions/exceptions.rb +4 -0
- data/lib/spidr/actions.rb +2 -0
- data/lib/spidr/agent.rb +866 -0
- data/lib/spidr/auth_credential.rb +28 -0
- data/lib/spidr/auth_store.rb +161 -0
- data/lib/spidr/body.rb +98 -0
- data/lib/spidr/cookie_jar.rb +202 -0
- data/lib/spidr/events.rb +537 -0
- data/lib/spidr/extensions/uri.rb +52 -0
- data/lib/spidr/extensions.rb +1 -0
- data/lib/spidr/filters.rb +539 -0
- data/lib/spidr/headers.rb +370 -0
- data/lib/spidr/links.rb +229 -0
- data/lib/spidr/page.rb +108 -0
- data/lib/spidr/rules.rb +79 -0
- data/lib/spidr/sanitizers.rb +56 -0
- data/lib/spidr/session_cache.rb +145 -0
- data/lib/spidr/spidr.rb +107 -0
- data/lib/spidr/version.rb +4 -0
- data/lib/spidr/version.rb~ +4 -0
- data/lib/spidr.rb +3 -0
- data/pkg/spidr-1.0.0.gem +0 -0
- data/spec/actions_spec.rb +59 -0
- data/spec/agent_spec.rb +81 -0
- data/spec/auth_store_spec.rb +85 -0
- data/spec/cookie_jar_spec.rb +144 -0
- data/spec/extensions/uri_spec.rb +43 -0
- data/spec/filters_spec.rb +61 -0
- data/spec/helpers/history.rb +34 -0
- data/spec/helpers/page.rb +8 -0
- data/spec/helpers/wsoc.rb +83 -0
- data/spec/page_examples.rb +21 -0
- data/spec/page_spec.rb +125 -0
- data/spec/rules_spec.rb +45 -0
- data/spec/sanitizers_spec.rb +61 -0
- data/spec/session_cache.rb +58 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/spidr_spec.rb +39 -0
- data/spidr.gemspec +133 -0
- data/spidr.gemspec~ +131 -0
- metadata +158 -0
data/lib/spidr/rules.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
module Spidr
|
2
|
+
#
|
3
|
+
# The {Rules} class represents collections of acceptance and rejection
|
4
|
+
# rules, which are used to filter data.
|
5
|
+
#
|
6
|
+
class Rules
|
7
|
+
|
8
|
+
# Accept rules
|
9
|
+
attr_reader :accept
|
10
|
+
|
11
|
+
# Reject rules
|
12
|
+
attr_reader :reject
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates a new Rules object.
|
16
|
+
#
|
17
|
+
# @param [Hash] options
|
18
|
+
# Additional options.
|
19
|
+
#
|
20
|
+
# @option options [Array<String, Regexp, Proc>] :accept
|
21
|
+
# The patterns to accept data with.
|
22
|
+
#
|
23
|
+
# @option options [Array<String, Regexp, Proc>] :reject
|
24
|
+
# The patterns to reject data with.
|
25
|
+
#
|
26
|
+
def initialize(options={})
|
27
|
+
@accept = []
|
28
|
+
@reject = []
|
29
|
+
|
30
|
+
@accept += options[:accept] if options[:accept]
|
31
|
+
@reject += options[:reject] if options[:reject]
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Determines whether the data should be accepted or rejected.
|
36
|
+
#
|
37
|
+
# @return [Boolean]
|
38
|
+
# Specifies whether the given data was accepted, using the rules
|
39
|
+
# acceptance patterns.
|
40
|
+
#
|
41
|
+
def accept?(data)
|
42
|
+
unless @accept.empty?
|
43
|
+
@accept.any? { |rule| test_data(data,rule) }
|
44
|
+
else
|
45
|
+
!@reject.any? { |rule| test_data(data,rule) }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Determines whether the data should be rejected or accepted.
|
51
|
+
#
|
52
|
+
# @return [Boolean]
|
53
|
+
# Specifies whether the given data was rejected, using the rules
|
54
|
+
# rejection patterns.
|
55
|
+
#
|
56
|
+
def reject?(data)
|
57
|
+
!accept?(data)
|
58
|
+
end
|
59
|
+
|
60
|
+
protected
|
61
|
+
|
62
|
+
#
|
63
|
+
# Tests the given data against a given pattern.
|
64
|
+
#
|
65
|
+
# @return [Boolean]
|
66
|
+
# Specifies whether the given data matched the pattern.
|
67
|
+
#
|
68
|
+
def test_data(data,rule)
|
69
|
+
if rule.kind_of?(Proc)
|
70
|
+
rule.call(data) == true
|
71
|
+
elsif rule.kind_of?(Regexp)
|
72
|
+
!((data.to_s =~ rule).nil?)
|
73
|
+
else
|
74
|
+
data == rule
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Spidr
|
4
|
+
#
|
5
|
+
# The {Sanitizers} module adds methods to {Agent} which control the
|
6
|
+
# sanitation of incoming links.
|
7
|
+
#
|
8
|
+
module Sanitizers
|
9
|
+
# Specifies whether the Agent will strip URI fragments
|
10
|
+
attr_accessor :strip_fragments
|
11
|
+
|
12
|
+
# Specifies whether the Agent will strip URI queries
|
13
|
+
attr_accessor :strip_query
|
14
|
+
|
15
|
+
#
|
16
|
+
# Sanitizes a URL based on filtering options.
|
17
|
+
#
|
18
|
+
# @param [URI::HTTP, URI::HTTPS, String] url
|
19
|
+
# The URL to be sanitized
|
20
|
+
#
|
21
|
+
# @return [URI::HTTP, URI::HTTPS]
|
22
|
+
# The new sanitized URL.
|
23
|
+
#
|
24
|
+
# @since 0.2.2
|
25
|
+
#
|
26
|
+
def sanitize_url(url)
|
27
|
+
url = URI(url.to_s) unless url.kind_of?(URI)
|
28
|
+
|
29
|
+
url.fragment = nil if @strip_fragments
|
30
|
+
url.query = nil if @strip_query
|
31
|
+
|
32
|
+
return url
|
33
|
+
end
|
34
|
+
|
35
|
+
protected
|
36
|
+
|
37
|
+
#
|
38
|
+
# Initializes the Sanitizer rules.
|
39
|
+
#
|
40
|
+
# @param [Hash] options
|
41
|
+
# Additional options.
|
42
|
+
#
|
43
|
+
# @option options [Boolean] :strip_fragments (true)
|
44
|
+
# Specifies whether or not to strip the fragment component from URLs.
|
45
|
+
#
|
46
|
+
# @option options [Boolean] :strip_query (false)
|
47
|
+
# Specifies whether or not to strip the query component from URLs.
|
48
|
+
#
|
49
|
+
# @since 0.2.2
|
50
|
+
#
|
51
|
+
def initialize_sanitizers(options={})
|
52
|
+
@strip_fragments = options.fetch(:strip_fragments,true)
|
53
|
+
@strip_query = options.fetch(:strip_query,false)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'spidrs/spidrs'
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
|
5
|
+
module Spidr
|
6
|
+
#
|
7
|
+
# Stores active HTTP Sessions organized by scheme, host-name and port.
|
8
|
+
#
|
9
|
+
class SessionCache
|
10
|
+
|
11
|
+
# Proxy to use
|
12
|
+
attr_accessor :proxy
|
13
|
+
|
14
|
+
#
|
15
|
+
# Creates a new session cache.
|
16
|
+
#
|
17
|
+
# @param [Hash] proxy (Spidr.proxy)
|
18
|
+
# Proxy options.
|
19
|
+
#
|
20
|
+
# @option proxy [String] :host
|
21
|
+
# The host the proxy is running on.
|
22
|
+
#
|
23
|
+
# @option proxy [Integer] :port
|
24
|
+
# The port the proxy is running on.
|
25
|
+
#
|
26
|
+
# @option proxy [String] :user
|
27
|
+
# The user to authenticate as with the proxy.
|
28
|
+
#
|
29
|
+
# @option proxy [String] :password
|
30
|
+
# The password to authenticate with.
|
31
|
+
#
|
32
|
+
# @since 0.2.2
|
33
|
+
#
|
34
|
+
def initialize(proxy=Spidr.proxy)
|
35
|
+
@proxy = proxy
|
36
|
+
@sessions = {}
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Determines if there is an active HTTP session for a given URL.
|
41
|
+
#
|
42
|
+
# @param [URI::HTTP, String] url
|
43
|
+
# The URL that represents a session.
|
44
|
+
#
|
45
|
+
# @return [Boolean]
|
46
|
+
# Specifies whether there is an active HTTP session.
|
47
|
+
#
|
48
|
+
# @since 0.2.3
|
49
|
+
#
|
50
|
+
def active?(url)
|
51
|
+
# normalize the url
|
52
|
+
url = URI(url.to_s) unless url.kind_of?(URI)
|
53
|
+
|
54
|
+
# session key
|
55
|
+
key = [url.scheme, url.host, url.port]
|
56
|
+
|
57
|
+
return @sessions.has_key?(key)
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Provides an active HTTP session for a given URL.
|
62
|
+
#
|
63
|
+
# @param [URI::HTTP, String] url
|
64
|
+
# The URL which will be requested later.
|
65
|
+
#
|
66
|
+
# @return [Net::HTTP]
|
67
|
+
# The active HTTP session object.
|
68
|
+
#
|
69
|
+
def [](url)
|
70
|
+
# normalize the url
|
71
|
+
url = URI(url.to_s) unless url.kind_of?(URI)
|
72
|
+
|
73
|
+
# session key
|
74
|
+
key = [url.scheme, url.host, url.port]
|
75
|
+
|
76
|
+
unless @sessions[key]
|
77
|
+
session = Net::HTTP::Proxy(
|
78
|
+
@proxy[:host],
|
79
|
+
@proxy[:port],
|
80
|
+
@proxy[:user],
|
81
|
+
@proxy[:password]
|
82
|
+
).new(url.host,url.port)
|
83
|
+
|
84
|
+
if url.scheme == 'https'
|
85
|
+
session.use_ssl = true
|
86
|
+
session.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
87
|
+
session.start
|
88
|
+
end
|
89
|
+
|
90
|
+
@sessions[key] = session
|
91
|
+
end
|
92
|
+
|
93
|
+
return @sessions[key]
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Destroys an HTTP session for the given scheme, host and port.
|
98
|
+
#
|
99
|
+
# @param [URI::HTTP, String] url
|
100
|
+
# The URL of the requested session.
|
101
|
+
#
|
102
|
+
# @return [nil]
|
103
|
+
#
|
104
|
+
# @since 0.2.2
|
105
|
+
#
|
106
|
+
def kill!(url)
|
107
|
+
# normalize the url
|
108
|
+
url = URI(url.to_s) unless url.kind_of?(URI)
|
109
|
+
|
110
|
+
# session key
|
111
|
+
key = [url.scheme, url.host, url.port]
|
112
|
+
|
113
|
+
if (sess = @sessions[key])
|
114
|
+
begin
|
115
|
+
sess.finish
|
116
|
+
rescue IOError
|
117
|
+
end
|
118
|
+
|
119
|
+
@sessions.delete(key)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Clears the session cache.
|
125
|
+
#
|
126
|
+
# @return [SessionCache]
|
127
|
+
# The cleared session cache.
|
128
|
+
#
|
129
|
+
# @since 0.2.2
|
130
|
+
#
|
131
|
+
def clear
|
132
|
+
@sessions.each_value do |sess|
|
133
|
+
begin
|
134
|
+
sess.finish
|
135
|
+
rescue IOError
|
136
|
+
nil
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
@sessions.clear
|
141
|
+
return self
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
data/lib/spidr/spidr.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'spidrs/agent'
|
2
|
+
|
3
|
+
module Spidr
|
4
|
+
# Common proxy port.
|
5
|
+
COMMON_PROXY_PORT = 8080
|
6
|
+
|
7
|
+
# Default proxy information.
|
8
|
+
DEFAULT_PROXY = {
|
9
|
+
:host => nil,
|
10
|
+
:port => COMMON_PROXY_PORT,
|
11
|
+
:user => nil,
|
12
|
+
:password => nil
|
13
|
+
}
|
14
|
+
|
15
|
+
#
|
16
|
+
# Proxy information used by all newly created Agent objects by default.
|
17
|
+
#
|
18
|
+
# @return [Hash]
|
19
|
+
# The Spidr proxy information.
|
20
|
+
#
|
21
|
+
def Spidr.proxy
|
22
|
+
@@spidr_proxy ||= DEFAULT_PROXY
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Sets the proxy information used by Agent objects.
|
27
|
+
#
|
28
|
+
# @param [Hash] new_proxy
|
29
|
+
# The new proxy information.
|
30
|
+
#
|
31
|
+
# @option new_proxy [String] :host
|
32
|
+
# The host-name of the proxy.
|
33
|
+
#
|
34
|
+
# @option new_proxy [Integer] :port (COMMON_PROXY_PORT)
|
35
|
+
# The port of the proxy.
|
36
|
+
#
|
37
|
+
# @option new_proxy [String] :user
|
38
|
+
# The user to authenticate with the proxy as.
|
39
|
+
#
|
40
|
+
# @option new_proxy [String] :password
|
41
|
+
# The password to authenticate with the proxy.
|
42
|
+
#
|
43
|
+
# @return [Hash]
|
44
|
+
# The new proxy information.
|
45
|
+
#
|
46
|
+
def Spidr.proxy=(new_proxy)
|
47
|
+
@@spidr_proxy = {:port => COMMON_PROXY_PORT}.merge(new_proxy)
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Disables the proxy settings used by all newly created Agent objects.
|
52
|
+
#
|
53
|
+
def Spidr.disable_proxy!
|
54
|
+
@@spidr_proxy = DEFAULT_PROXY
|
55
|
+
return true
|
56
|
+
end
|
57
|
+
|
58
|
+
#
|
59
|
+
# The User-Agent string used by all Agent objects by default.
|
60
|
+
#
|
61
|
+
# @return [String]
|
62
|
+
# The Spidr User-Agent string.
|
63
|
+
#
|
64
|
+
def Spidr.user_agent
|
65
|
+
@@spidr_user_agent ||= nil
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Sets the Spidr User-Agent string.
|
70
|
+
#
|
71
|
+
# @param [String] new_agent
|
72
|
+
# The new User-Agent string.
|
73
|
+
#
|
74
|
+
def Spidr.user_agent=(new_agent)
|
75
|
+
@@spidr_user_agent = new_agent
|
76
|
+
end
|
77
|
+
|
78
|
+
#
|
79
|
+
# @see Agent.start_at
|
80
|
+
#
|
81
|
+
def Spidr.start_at(url,options={},&block)
|
82
|
+
Agent.start_at(url,options,&block)
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# @see Agent.start_at
|
87
|
+
# regex use for match url
|
88
|
+
# with this faction could find specific url
|
89
|
+
#
|
90
|
+
def Spidr.start_at(url,regex,options={},&block)
|
91
|
+
Agent.start_at(url,regex,options,&block)
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# @see Agent.host
|
96
|
+
#
|
97
|
+
def Spidr.host(name,options={},&block)
|
98
|
+
Agent.host(name,options,&block)
|
99
|
+
end
|
100
|
+
|
101
|
+
#
|
102
|
+
# @see Agent.site
|
103
|
+
#
|
104
|
+
def Spidr.site(url,options={},&block)
|
105
|
+
Agent.site(url,options,&block)
|
106
|
+
end
|
107
|
+
end
|
data/lib/spidr.rb
ADDED
data/pkg/spidr-1.0.0.gem
ADDED
Binary file
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spidr/actions'
|
2
|
+
require 'spidr/agent'
|
3
|
+
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
describe Actions do
|
7
|
+
let(:url) { URI('http://spidr.rubyforge.org/') }
|
8
|
+
|
9
|
+
it "should be able to pause spidering" do
|
10
|
+
count = 0
|
11
|
+
agent = Agent.host('spidr.rubyforge.org') do |spider|
|
12
|
+
spider.every_page do |page|
|
13
|
+
count += 1
|
14
|
+
spider.pause! if count >= 2
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
agent.should be_paused
|
19
|
+
agent.history.length.should == 2
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should be able to continue spidering after being paused" do
|
23
|
+
agent = Agent.new do |spider|
|
24
|
+
spider.every_page do |page|
|
25
|
+
spider.pause!
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
agent.enqueue(url)
|
30
|
+
agent.continue!
|
31
|
+
|
32
|
+
agent.visited?(url).should == true
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should allow skipping of enqueued links" do
|
36
|
+
agent = Agent.new do |spider|
|
37
|
+
spider.every_url do |url|
|
38
|
+
spider.skip_link!
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
agent.enqueue(url)
|
43
|
+
|
44
|
+
agent.queue.should be_empty
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should allow skipping of visited pages" do
|
48
|
+
agent = Agent.new do |spider|
|
49
|
+
spider.every_page do |url|
|
50
|
+
spider.skip_page!
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
agent.visit_page(url)
|
55
|
+
|
56
|
+
agent.history.should == Set[url]
|
57
|
+
agent.queue.should be_empty
|
58
|
+
end
|
59
|
+
end
|
data/spec/agent_spec.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'spidr/agent'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'helpers/wsoc'
|
5
|
+
|
6
|
+
describe Agent do
|
7
|
+
include Helpers::WSOC
|
8
|
+
|
9
|
+
before(:all) do
|
10
|
+
@agent = run_course
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should provide the history" do
|
14
|
+
@agent.history.should_not be_empty
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should provide the queue" do
|
18
|
+
@agent.queue.should be_empty
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should be able to restore the history" do
|
22
|
+
agent = Agent.new
|
23
|
+
previous_history = Set[URI('http://www.example.com')]
|
24
|
+
|
25
|
+
agent.history = previous_history
|
26
|
+
agent.history.should == previous_history
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should convert new histories to an Set of URIs" do
|
30
|
+
agent = Agent.new
|
31
|
+
previous_history = ['http://www.example.com']
|
32
|
+
expected_history = Set[URI('http://www.example.com')]
|
33
|
+
|
34
|
+
agent.history = previous_history
|
35
|
+
agent.history.should_not == previous_history
|
36
|
+
agent.history.should == expected_history
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should be able to restore the failures" do
|
40
|
+
agent = Agent.new
|
41
|
+
previous_failures = Set[URI('http://localhost/')]
|
42
|
+
|
43
|
+
agent.failures = previous_failures
|
44
|
+
agent.failures.should == previous_failures
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should convert new histories to a Set of URIs" do
|
48
|
+
agent = Agent.new
|
49
|
+
previous_failures = ['http://localhost/']
|
50
|
+
expected_failures = Set[URI('http://localhost/')]
|
51
|
+
|
52
|
+
agent.failures = previous_failures
|
53
|
+
agent.failures.should_not == previous_failures
|
54
|
+
agent.failures.should == expected_failures
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should be able to restore the queue" do
|
58
|
+
agent = Agent.new
|
59
|
+
previous_queue = [URI('http://www.example.com')]
|
60
|
+
|
61
|
+
agent.queue = previous_queue
|
62
|
+
agent.queue.should == previous_queue
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should convert new queues to an Array of URIs" do
|
66
|
+
agent = Agent.new
|
67
|
+
previous_queue = ['http://www.example.com']
|
68
|
+
expected_queue = [URI('http://www.example.com')]
|
69
|
+
|
70
|
+
agent.queue = previous_queue
|
71
|
+
agent.queue.should_not == previous_queue
|
72
|
+
agent.queue.should == expected_queue
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should provide a to_hash method that returns the queue and history" do
|
76
|
+
hash = @agent.to_hash
|
77
|
+
|
78
|
+
hash[:queue].should be_empty
|
79
|
+
hash[:history].should_not be_empty
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'spidr/auth_store'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe AuthStore do
|
6
|
+
let(:root_uri) { URI('http://zerosum.org/') }
|
7
|
+
let(:uri) { root_uri.merge('/course/auth') }
|
8
|
+
|
9
|
+
before(:each) do
|
10
|
+
@auth_store = AuthStore.new
|
11
|
+
@auth_store.add(uri, 'admin', 'password')
|
12
|
+
end
|
13
|
+
|
14
|
+
after(:each) do
|
15
|
+
@auth_store.clear!
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should retrieve auth credentials for the URL' do
|
19
|
+
@auth_store[root_uri] = AuthCredential.new('user1', 'pass1')
|
20
|
+
@auth_store[root_uri].username.should == 'user1'
|
21
|
+
@auth_store[root_uri].password.should == 'pass1'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should add auth credentials for the URL' do
|
25
|
+
lambda {
|
26
|
+
@auth_store.add(root_uri, 'user1', 'pass1')
|
27
|
+
}.should change(@auth_store, :size)
|
28
|
+
|
29
|
+
@auth_store[root_uri].username.should == 'user1'
|
30
|
+
@auth_store[root_uri].password.should == 'pass1'
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'matching' do
|
34
|
+
let(:sub_uri) { uri.merge('/course/auth/protected.html') }
|
35
|
+
|
36
|
+
it 'should match a longer URL to the base' do
|
37
|
+
@auth_store[sub_uri].username.should == 'admin'
|
38
|
+
@auth_store[sub_uri].password.should == 'password'
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should match the longest of all matching URLs' do
|
42
|
+
@auth_store.add(uri.merge('/course'), 'user1', 'pass1')
|
43
|
+
@auth_store.add(uri.merge('/course/auth/special'), 'user2', 'pass2')
|
44
|
+
@auth_store.add(uri.merge('/course/auth/special/extra'), 'user3', 'pass3')
|
45
|
+
|
46
|
+
auth = @auth_store[uri.merge('/course/auth/special/1.html')]
|
47
|
+
auth.username.should == 'user2'
|
48
|
+
auth.password.should == 'pass2'
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should not match a URL with a different host' do
|
52
|
+
remote_uri = URI('http://spidr.rubyforge.org/course/auth')
|
53
|
+
|
54
|
+
@auth_store[remote_uri].should be_nil
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'should not match a URL with an alternate path' do
|
58
|
+
relative_uri = uri.merge('/course/admin/protected.html')
|
59
|
+
|
60
|
+
@auth_store[relative_uri].should be_nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'should override previous auth credentials' do
|
65
|
+
@auth_store.add(uri, 'newuser', 'newpass')
|
66
|
+
|
67
|
+
@auth_store[uri].username.should == 'newuser'
|
68
|
+
@auth_store[uri].password.should == 'newpass'
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'should clear all cookies' do
|
72
|
+
@auth_store.clear!
|
73
|
+
@auth_store.size.should == 0
|
74
|
+
end
|
75
|
+
|
76
|
+
describe 'for_url' do
|
77
|
+
it 'should return nil if no authorization exists' do
|
78
|
+
@auth_store.for_url(URI('http://php.net')).should be_nil
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should create an encoded authorization string' do
|
82
|
+
@auth_store.for_url(uri).should == "YWRtaW46cGFzc3dvcmQ=\n"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|