spidr 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog.md +7 -0
- data/Gemfile.lock +39 -0
- data/Rakefile +1 -0
- data/lib/spidr/cookie_jar.rb +30 -13
- data/lib/spidr/page.rb +21 -2
- data/lib/spidr/version.rb +1 -1
- data/spec/cookie_jar_spec.rb +34 -0
- data/spec/page_spec.rb +7 -0
- data/spidr.gemspec +3 -5
- metadata +11 -25
data/ChangeLog.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
### 0.2.7 / 2010-08-17
|
2
|
+
|
3
|
+
* Added {Spidr::CookieJar#cookies_for_host} (thanks zapnap).
|
4
|
+
* Renamed `Spidr::Page#cookie` to {Spidr::Page#raw_cookie}.
|
5
|
+
* Rescue `URI::InvalidComponentError` exceptions in
|
6
|
+
{Spidr::Page#to_absolute} (thanks zapnap).
|
7
|
+
|
1
8
|
### 0.2.6 / 2010-07-05
|
2
9
|
|
3
10
|
* Fixed a bug in {Spidr::Page#meta_redirect}, by calling
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
GIT
|
2
|
+
remote: git://github.com/technicalpickles/jeweler.git
|
3
|
+
revision: 7856803
|
4
|
+
specs:
|
5
|
+
jeweler (1.4.0)
|
6
|
+
bundler (>= 0.9.5)
|
7
|
+
gemcutter (>= 0.1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
gemcutter (0.6.1)
|
15
|
+
git (1.2.5)
|
16
|
+
json_pure (1.4.6)
|
17
|
+
nokogiri (1.4.3.1)
|
18
|
+
rack (1.2.1)
|
19
|
+
rake (0.8.7)
|
20
|
+
rdiscount (1.6.5)
|
21
|
+
rspec (1.3.0)
|
22
|
+
sinatra (1.0)
|
23
|
+
rack (>= 1.0)
|
24
|
+
wsoc (0.1.3)
|
25
|
+
json_pure (~> 1.4.0)
|
26
|
+
sinatra (~> 1.0)
|
27
|
+
yard (0.5.8)
|
28
|
+
|
29
|
+
PLATFORMS
|
30
|
+
ruby
|
31
|
+
|
32
|
+
DEPENDENCIES
|
33
|
+
jeweler (~> 1.4.0)!
|
34
|
+
nokogiri (>= 1.3.0)
|
35
|
+
rake (~> 0.8.7)
|
36
|
+
rdiscount (~> 1.6.3)
|
37
|
+
rspec (~> 1.3.0)
|
38
|
+
wsoc (~> 0.1.3)
|
39
|
+
yard (~> 0.5.3)
|
data/Rakefile
CHANGED
data/lib/spidr/cookie_jar.rb
CHANGED
@@ -122,7 +122,7 @@ module Spidr
|
|
122
122
|
if @dirty.include?(host)
|
123
123
|
values = []
|
124
124
|
|
125
|
-
|
125
|
+
cookies_for_host(host).each do |name,value|
|
126
126
|
values << "#{name}=#{value}"
|
127
127
|
end
|
128
128
|
|
@@ -130,23 +130,40 @@ module Spidr
|
|
130
130
|
@dirty.delete(host)
|
131
131
|
end
|
132
132
|
|
133
|
-
|
133
|
+
return @cookies[host]
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
# Returns raw cookie value pairs for a given host. Includes cookies set on
|
138
|
+
# parent domain(s).
|
139
|
+
#
|
140
|
+
# @param [String] host
|
141
|
+
# The name of the host.
|
142
|
+
#
|
143
|
+
# @return [Hash{String => String}]
|
144
|
+
# Cookie params.
|
145
|
+
#
|
146
|
+
# @since 0.2.7
|
147
|
+
#
|
148
|
+
def cookies_for_host(host)
|
149
|
+
host_cookies = (@params[host] || {})
|
150
|
+
sub_domains = host.split('.')
|
134
151
|
|
135
|
-
|
136
|
-
|
152
|
+
while sub_domains.length > 2
|
153
|
+
sub_domains.shift
|
137
154
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
155
|
+
if (parent_cookies = @params[sub_domains.join('.')])
|
156
|
+
parent_cookies.each do |name,value|
|
157
|
+
# copy in the parent cookies, only if they haven't been
|
158
|
+
# overridden yet.
|
159
|
+
unless host_cookies.has_key?(name)
|
160
|
+
host_cookies[name] = value
|
161
|
+
end
|
162
|
+
end
|
146
163
|
end
|
147
164
|
end
|
148
165
|
|
149
|
-
return
|
166
|
+
return host_cookies
|
150
167
|
end
|
151
168
|
|
152
169
|
#
|
data/lib/spidr/page.rb
CHANGED
@@ -289,10 +289,29 @@ module Spidr
|
|
289
289
|
# @return [String]
|
290
290
|
# The raw Cookie from the response.
|
291
291
|
#
|
292
|
+
# @since 0.2.7
|
293
|
+
#
|
294
|
+
def raw_cookie
|
295
|
+
(@response['Set-Cookie'] || '')
|
296
|
+
end
|
297
|
+
|
298
|
+
#
|
299
|
+
# The raw Cookie String sent along with the page.
|
300
|
+
#
|
301
|
+
# @return [String]
|
302
|
+
# The raw Cookie from the response.
|
303
|
+
#
|
304
|
+
# @deprecated
|
305
|
+
# Deprecated in 0.2.7 and will be removed in 0.3.0.
|
306
|
+
# Use {#raw_cookie} instead.
|
307
|
+
#
|
292
308
|
# @since 0.2.2
|
293
309
|
#
|
294
310
|
def cookie
|
295
|
-
|
311
|
+
STDERR.puts 'DEPRECATION: Spidr::Page#cookie will be removed in 0.3.0'
|
312
|
+
STDERR.puts 'DEPRECATION: Use Spidr::Page#raw_cookie instead'
|
313
|
+
|
314
|
+
return raw_cookie
|
296
315
|
end
|
297
316
|
|
298
317
|
#
|
@@ -507,7 +526,7 @@ module Spidr
|
|
507
526
|
def to_absolute(link)
|
508
527
|
begin
|
509
528
|
url = @url.merge(link.to_s)
|
510
|
-
rescue URI::InvalidURIError
|
529
|
+
rescue URI::InvalidURIError, URI::InvalidComponentError
|
511
530
|
return nil
|
512
531
|
end
|
513
532
|
|
data/lib/spidr/version.rb
CHANGED
data/spec/cookie_jar_spec.rb
CHANGED
@@ -77,6 +77,40 @@ describe CookieJar do
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
|
+
describe "cookies_for_host" do
|
81
|
+
before(:each) do
|
82
|
+
@cookie_jar = CookieJar.new
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should return an empty Hash for unknown hosts" do
|
86
|
+
@cookie_jar.cookies_for_host('lol.com').should be_empty
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should return an empty Hash for hosts with no cookie params" do
|
90
|
+
@cookie_jar['lol.com'] = {}
|
91
|
+
|
92
|
+
@cookie_jar.cookies_for_host('lol.com').should be_empty
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should return cookie parameters for the host" do
|
96
|
+
@cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
|
97
|
+
@cookie_jar['zerosum.org'] = {'other' => '1'}
|
98
|
+
cookie = @cookie_jar.cookies_for_host('zerosum.org')
|
99
|
+
|
100
|
+
cookie['admin'].should == 'ofcourseiam'
|
101
|
+
cookie['other'].should == '1'
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should include cookies for the parent domain" do
|
105
|
+
@cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
|
106
|
+
@cookie_jar['sub.zerosum.org'] = {'other' => '1'}
|
107
|
+
cookie = @cookie_jar.cookies_for_host('sub.zerosum.org')
|
108
|
+
|
109
|
+
cookie['admin'].should == 'ofcourseiam'
|
110
|
+
cookie['other'].should == '1'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
80
114
|
describe "for_host" do
|
81
115
|
before(:each) do
|
82
116
|
@cookie_jar = CookieJar.new
|
data/spec/page_spec.rb
CHANGED
@@ -100,6 +100,13 @@ describe Page do
|
|
100
100
|
end
|
101
101
|
|
102
102
|
it "should provide access to the raw Cookie" do
|
103
|
+
cookie = @page.raw_cookie
|
104
|
+
|
105
|
+
cookie.should_not be_nil
|
106
|
+
cookie.should_not be_empty
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should still support the deprecated #cookie method" do
|
103
110
|
cookie = @page.cookie
|
104
111
|
|
105
112
|
cookie.should_not be_nil
|
data/spidr.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{spidr}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Postmodern"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-08-17}
|
13
13
|
s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
14
14
|
s.email = %q{postmodern.mod3@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
".yardopts",
|
24
24
|
"ChangeLog.md",
|
25
25
|
"Gemfile",
|
26
|
+
"Gemfile.lock",
|
26
27
|
"LICENSE.txt",
|
27
28
|
"README.md",
|
28
29
|
"Rakefile",
|
@@ -96,18 +97,15 @@ Gem::Specification.new do |s|
|
|
96
97
|
s.specification_version = 3
|
97
98
|
|
98
99
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
99
|
-
s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.0"])
|
100
100
|
s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
|
101
101
|
s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
|
102
102
|
s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
|
103
103
|
else
|
104
|
-
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
105
104
|
s.add_dependency(%q<rake>, ["~> 0.8.7"])
|
106
105
|
s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
|
107
106
|
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
108
107
|
end
|
109
108
|
else
|
110
|
-
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
111
109
|
s.add_dependency(%q<rake>, ["~> 0.8.7"])
|
112
110
|
s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
|
113
111
|
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 7
|
9
|
+
version: 0.2.7
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Postmodern
|
@@ -14,27 +14,12 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-08-17 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: nokogiri
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
-
none: false
|
24
|
-
requirements:
|
25
|
-
- - ">="
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
segments:
|
28
|
-
- 1
|
29
|
-
- 3
|
30
|
-
- 0
|
31
|
-
version: 1.3.0
|
32
|
-
type: :runtime
|
33
|
-
prerelease: false
|
34
|
-
version_requirements: *id001
|
35
20
|
- !ruby/object:Gem::Dependency
|
36
21
|
name: rake
|
37
|
-
requirement: &
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
38
23
|
none: false
|
39
24
|
requirements:
|
40
25
|
- - ~>
|
@@ -46,10 +31,10 @@ dependencies:
|
|
46
31
|
version: 0.8.7
|
47
32
|
type: :development
|
48
33
|
prerelease: false
|
49
|
-
version_requirements: *
|
34
|
+
version_requirements: *id001
|
50
35
|
- !ruby/object:Gem::Dependency
|
51
36
|
name: jeweler
|
52
|
-
requirement: &
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
53
38
|
none: false
|
54
39
|
requirements:
|
55
40
|
- - ~>
|
@@ -61,10 +46,10 @@ dependencies:
|
|
61
46
|
version: 1.4.0
|
62
47
|
type: :development
|
63
48
|
prerelease: false
|
64
|
-
version_requirements: *
|
49
|
+
version_requirements: *id002
|
65
50
|
- !ruby/object:Gem::Dependency
|
66
51
|
name: rspec
|
67
|
-
requirement: &
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
68
53
|
none: false
|
69
54
|
requirements:
|
70
55
|
- - ~>
|
@@ -76,7 +61,7 @@ dependencies:
|
|
76
61
|
version: 1.3.0
|
77
62
|
type: :development
|
78
63
|
prerelease: false
|
79
|
-
version_requirements: *
|
64
|
+
version_requirements: *id003
|
80
65
|
description: Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.
|
81
66
|
email: postmodern.mod3@gmail.com
|
82
67
|
executables: []
|
@@ -93,6 +78,7 @@ files:
|
|
93
78
|
- .yardopts
|
94
79
|
- ChangeLog.md
|
95
80
|
- Gemfile
|
81
|
+
- Gemfile.lock
|
96
82
|
- LICENSE.txt
|
97
83
|
- README.md
|
98
84
|
- Rakefile
|
@@ -149,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
135
|
requirements:
|
150
136
|
- - ">="
|
151
137
|
- !ruby/object:Gem::Version
|
152
|
-
hash:
|
138
|
+
hash: 4533863298463290280
|
153
139
|
segments:
|
154
140
|
- 0
|
155
141
|
version: "0"
|