spidr 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog.md +7 -0
- data/Gemfile.lock +39 -0
- data/Rakefile +1 -0
- data/lib/spidr/cookie_jar.rb +30 -13
- data/lib/spidr/page.rb +21 -2
- data/lib/spidr/version.rb +1 -1
- data/spec/cookie_jar_spec.rb +34 -0
- data/spec/page_spec.rb +7 -0
- data/spidr.gemspec +3 -5
- metadata +11 -25
data/ChangeLog.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
### 0.2.7 / 2010-08-17
|
2
|
+
|
3
|
+
* Added {Spidr::CookieJar#cookies_for_host} (thanks zapnap).
|
4
|
+
* Renamed `Spidr::Page#cookie` to {Spidr::Page#raw_cookie}.
|
5
|
+
* Rescue `URI::InvalidComponentError` exceptions in
|
6
|
+
{Spidr::Page#to_absolute} (thanks zapnap).
|
7
|
+
|
1
8
|
### 0.2.6 / 2010-07-05
|
2
9
|
|
3
10
|
* Fixed a bug in {Spidr::Page#meta_redirect}, by calling
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
GIT
|
2
|
+
remote: git://github.com/technicalpickles/jeweler.git
|
3
|
+
revision: 7856803
|
4
|
+
specs:
|
5
|
+
jeweler (1.4.0)
|
6
|
+
bundler (>= 0.9.5)
|
7
|
+
gemcutter (>= 0.1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
gemcutter (0.6.1)
|
15
|
+
git (1.2.5)
|
16
|
+
json_pure (1.4.6)
|
17
|
+
nokogiri (1.4.3.1)
|
18
|
+
rack (1.2.1)
|
19
|
+
rake (0.8.7)
|
20
|
+
rdiscount (1.6.5)
|
21
|
+
rspec (1.3.0)
|
22
|
+
sinatra (1.0)
|
23
|
+
rack (>= 1.0)
|
24
|
+
wsoc (0.1.3)
|
25
|
+
json_pure (~> 1.4.0)
|
26
|
+
sinatra (~> 1.0)
|
27
|
+
yard (0.5.8)
|
28
|
+
|
29
|
+
PLATFORMS
|
30
|
+
ruby
|
31
|
+
|
32
|
+
DEPENDENCIES
|
33
|
+
jeweler (~> 1.4.0)!
|
34
|
+
nokogiri (>= 1.3.0)
|
35
|
+
rake (~> 0.8.7)
|
36
|
+
rdiscount (~> 1.6.3)
|
37
|
+
rspec (~> 1.3.0)
|
38
|
+
wsoc (~> 0.1.3)
|
39
|
+
yard (~> 0.5.3)
|
data/Rakefile
CHANGED
data/lib/spidr/cookie_jar.rb
CHANGED
@@ -122,7 +122,7 @@ module Spidr
|
|
122
122
|
if @dirty.include?(host)
|
123
123
|
values = []
|
124
124
|
|
125
|
-
|
125
|
+
cookies_for_host(host).each do |name,value|
|
126
126
|
values << "#{name}=#{value}"
|
127
127
|
end
|
128
128
|
|
@@ -130,23 +130,40 @@ module Spidr
|
|
130
130
|
@dirty.delete(host)
|
131
131
|
end
|
132
132
|
|
133
|
-
|
133
|
+
return @cookies[host]
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
# Returns raw cookie value pairs for a given host. Includes cookies set on
|
138
|
+
# parent domain(s).
|
139
|
+
#
|
140
|
+
# @param [String] host
|
141
|
+
# The name of the host.
|
142
|
+
#
|
143
|
+
# @return [Hash{String => String}]
|
144
|
+
# Cookie params.
|
145
|
+
#
|
146
|
+
# @since 0.2.7
|
147
|
+
#
|
148
|
+
def cookies_for_host(host)
|
149
|
+
host_cookies = (@params[host] || {})
|
150
|
+
sub_domains = host.split('.')
|
134
151
|
|
135
|
-
|
136
|
-
|
152
|
+
while sub_domains.length > 2
|
153
|
+
sub_domains.shift
|
137
154
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
155
|
+
if (parent_cookies = @params[sub_domains.join('.')])
|
156
|
+
parent_cookies.each do |name,value|
|
157
|
+
# copy in the parent cookies, only if they haven't been
|
158
|
+
# overridden yet.
|
159
|
+
unless host_cookies.has_key?(name)
|
160
|
+
host_cookies[name] = value
|
161
|
+
end
|
162
|
+
end
|
146
163
|
end
|
147
164
|
end
|
148
165
|
|
149
|
-
return
|
166
|
+
return host_cookies
|
150
167
|
end
|
151
168
|
|
152
169
|
#
|
data/lib/spidr/page.rb
CHANGED
@@ -289,10 +289,29 @@ module Spidr
|
|
289
289
|
# @return [String]
|
290
290
|
# The raw Cookie from the response.
|
291
291
|
#
|
292
|
+
# @since 0.2.7
|
293
|
+
#
|
294
|
+
def raw_cookie
|
295
|
+
(@response['Set-Cookie'] || '')
|
296
|
+
end
|
297
|
+
|
298
|
+
#
|
299
|
+
# The raw Cookie String sent along with the page.
|
300
|
+
#
|
301
|
+
# @return [String]
|
302
|
+
# The raw Cookie from the response.
|
303
|
+
#
|
304
|
+
# @deprecated
|
305
|
+
# Deprecated in 0.2.7 and will be removed in 0.3.0.
|
306
|
+
# Use {#raw_cookie} instead.
|
307
|
+
#
|
292
308
|
# @since 0.2.2
|
293
309
|
#
|
294
310
|
def cookie
|
295
|
-
|
311
|
+
STDERR.puts 'DEPRECATION: Spidr::Page#cookie will be removed in 0.3.0'
|
312
|
+
STDERR.puts 'DEPRECATION: Use Spidr::Page#raw_cookie instead'
|
313
|
+
|
314
|
+
return raw_cookie
|
296
315
|
end
|
297
316
|
|
298
317
|
#
|
@@ -507,7 +526,7 @@ module Spidr
|
|
507
526
|
def to_absolute(link)
|
508
527
|
begin
|
509
528
|
url = @url.merge(link.to_s)
|
510
|
-
rescue URI::InvalidURIError
|
529
|
+
rescue URI::InvalidURIError, URI::InvalidComponentError
|
511
530
|
return nil
|
512
531
|
end
|
513
532
|
|
data/lib/spidr/version.rb
CHANGED
data/spec/cookie_jar_spec.rb
CHANGED
@@ -77,6 +77,40 @@ describe CookieJar do
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
|
+
describe "cookies_for_host" do
|
81
|
+
before(:each) do
|
82
|
+
@cookie_jar = CookieJar.new
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should return an empty Hash for unknown hosts" do
|
86
|
+
@cookie_jar.cookies_for_host('lol.com').should be_empty
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should return an empty Hash for hosts with no cookie params" do
|
90
|
+
@cookie_jar['lol.com'] = {}
|
91
|
+
|
92
|
+
@cookie_jar.cookies_for_host('lol.com').should be_empty
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should return cookie parameters for the host" do
|
96
|
+
@cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
|
97
|
+
@cookie_jar['zerosum.org'] = {'other' => '1'}
|
98
|
+
cookie = @cookie_jar.cookies_for_host('zerosum.org')
|
99
|
+
|
100
|
+
cookie['admin'].should == 'ofcourseiam'
|
101
|
+
cookie['other'].should == '1'
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should include cookies for the parent domain" do
|
105
|
+
@cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
|
106
|
+
@cookie_jar['sub.zerosum.org'] = {'other' => '1'}
|
107
|
+
cookie = @cookie_jar.cookies_for_host('sub.zerosum.org')
|
108
|
+
|
109
|
+
cookie['admin'].should == 'ofcourseiam'
|
110
|
+
cookie['other'].should == '1'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
80
114
|
describe "for_host" do
|
81
115
|
before(:each) do
|
82
116
|
@cookie_jar = CookieJar.new
|
data/spec/page_spec.rb
CHANGED
@@ -100,6 +100,13 @@ describe Page do
|
|
100
100
|
end
|
101
101
|
|
102
102
|
it "should provide access to the raw Cookie" do
|
103
|
+
cookie = @page.raw_cookie
|
104
|
+
|
105
|
+
cookie.should_not be_nil
|
106
|
+
cookie.should_not be_empty
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should still support the deprecated #cookie method" do
|
103
110
|
cookie = @page.cookie
|
104
111
|
|
105
112
|
cookie.should_not be_nil
|
data/spidr.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{spidr}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Postmodern"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-08-17}
|
13
13
|
s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
14
14
|
s.email = %q{postmodern.mod3@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
".yardopts",
|
24
24
|
"ChangeLog.md",
|
25
25
|
"Gemfile",
|
26
|
+
"Gemfile.lock",
|
26
27
|
"LICENSE.txt",
|
27
28
|
"README.md",
|
28
29
|
"Rakefile",
|
@@ -96,18 +97,15 @@ Gem::Specification.new do |s|
|
|
96
97
|
s.specification_version = 3
|
97
98
|
|
98
99
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
99
|
-
s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.0"])
|
100
100
|
s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
|
101
101
|
s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
|
102
102
|
s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
|
103
103
|
else
|
104
|
-
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
105
104
|
s.add_dependency(%q<rake>, ["~> 0.8.7"])
|
106
105
|
s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
|
107
106
|
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
108
107
|
end
|
109
108
|
else
|
110
|
-
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
111
109
|
s.add_dependency(%q<rake>, ["~> 0.8.7"])
|
112
110
|
s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
|
113
111
|
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 7
|
9
|
+
version: 0.2.7
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Postmodern
|
@@ -14,27 +14,12 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-08-17 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: nokogiri
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
-
none: false
|
24
|
-
requirements:
|
25
|
-
- - ">="
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
segments:
|
28
|
-
- 1
|
29
|
-
- 3
|
30
|
-
- 0
|
31
|
-
version: 1.3.0
|
32
|
-
type: :runtime
|
33
|
-
prerelease: false
|
34
|
-
version_requirements: *id001
|
35
20
|
- !ruby/object:Gem::Dependency
|
36
21
|
name: rake
|
37
|
-
requirement: &
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
38
23
|
none: false
|
39
24
|
requirements:
|
40
25
|
- - ~>
|
@@ -46,10 +31,10 @@ dependencies:
|
|
46
31
|
version: 0.8.7
|
47
32
|
type: :development
|
48
33
|
prerelease: false
|
49
|
-
version_requirements: *
|
34
|
+
version_requirements: *id001
|
50
35
|
- !ruby/object:Gem::Dependency
|
51
36
|
name: jeweler
|
52
|
-
requirement: &
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
53
38
|
none: false
|
54
39
|
requirements:
|
55
40
|
- - ~>
|
@@ -61,10 +46,10 @@ dependencies:
|
|
61
46
|
version: 1.4.0
|
62
47
|
type: :development
|
63
48
|
prerelease: false
|
64
|
-
version_requirements: *
|
49
|
+
version_requirements: *id002
|
65
50
|
- !ruby/object:Gem::Dependency
|
66
51
|
name: rspec
|
67
|
-
requirement: &
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
68
53
|
none: false
|
69
54
|
requirements:
|
70
55
|
- - ~>
|
@@ -76,7 +61,7 @@ dependencies:
|
|
76
61
|
version: 1.3.0
|
77
62
|
type: :development
|
78
63
|
prerelease: false
|
79
|
-
version_requirements: *
|
64
|
+
version_requirements: *id003
|
80
65
|
description: Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.
|
81
66
|
email: postmodern.mod3@gmail.com
|
82
67
|
executables: []
|
@@ -93,6 +78,7 @@ files:
|
|
93
78
|
- .yardopts
|
94
79
|
- ChangeLog.md
|
95
80
|
- Gemfile
|
81
|
+
- Gemfile.lock
|
96
82
|
- LICENSE.txt
|
97
83
|
- README.md
|
98
84
|
- Rakefile
|
@@ -149,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
135
|
requirements:
|
150
136
|
- - ">="
|
151
137
|
- !ruby/object:Gem::Version
|
152
|
-
hash:
|
138
|
+
hash: 4533863298463290280
|
153
139
|
segments:
|
154
140
|
- 0
|
155
141
|
version: "0"
|