spidr 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog.md CHANGED
@@ -1,3 +1,10 @@
1
+ ### 0.2.7 / 2010-08-17
2
+
3
+ * Added {Spidr::CookieJar#cookies_for_host} (thanks zapnap).
4
+ * Renamed `Spidr::Page#cookie` to {Spidr::Page#raw_cookie}.
5
+ * Rescue `URI::InvalidComponentError` exceptions in
6
+ {Spidr::Page#to_absolute} (thanks zapnap).
7
+
1
8
  ### 0.2.6 / 2010-07-05
2
9
 
3
10
  * Fixed a bug in {Spidr::Page#meta_redirect}, by calling
data/Gemfile.lock ADDED
@@ -0,0 +1,39 @@
1
+ GIT
2
+ remote: git://github.com/technicalpickles/jeweler.git
3
+ revision: 7856803
4
+ specs:
5
+ jeweler (1.4.0)
6
+ bundler (>= 0.9.5)
7
+ gemcutter (>= 0.1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ gemcutter (0.6.1)
15
+ git (1.2.5)
16
+ json_pure (1.4.6)
17
+ nokogiri (1.4.3.1)
18
+ rack (1.2.1)
19
+ rake (0.8.7)
20
+ rdiscount (1.6.5)
21
+ rspec (1.3.0)
22
+ sinatra (1.0)
23
+ rack (>= 1.0)
24
+ wsoc (0.1.3)
25
+ json_pure (~> 1.4.0)
26
+ sinatra (~> 1.0)
27
+ yard (0.5.8)
28
+
29
+ PLATFORMS
30
+ ruby
31
+
32
+ DEPENDENCIES
33
+ jeweler (~> 1.4.0)!
34
+ nokogiri (>= 1.3.0)
35
+ rake (~> 0.8.7)
36
+ rdiscount (~> 1.6.3)
37
+ rspec (~> 1.3.0)
38
+ wsoc (~> 0.1.3)
39
+ yard (~> 0.5.3)
data/Rakefile CHANGED
@@ -24,6 +24,7 @@ Jeweler::Tasks.new do |gem|
24
24
  gem.authors = ['Postmodern']
25
25
  gem.has_rdoc = 'yard'
26
26
  end
27
+ Jeweler::GemcutterTasks.new
27
28
 
28
29
  require 'spec/rake/spectask'
29
30
  Spec::Rake::SpecTask.new(:spec) do |spec|
@@ -122,7 +122,7 @@ module Spidr
122
122
  if @dirty.include?(host)
123
123
  values = []
124
124
 
125
- @params[host].each do |name,value|
125
+ cookies_for_host(host).each do |name,value|
126
126
  values << "#{name}=#{value}"
127
127
  end
128
128
 
@@ -130,23 +130,40 @@ module Spidr
130
130
  @dirty.delete(host)
131
131
  end
132
132
 
133
- hdomain = host.split('.')
133
+ return @cookies[host]
134
+ end
135
+
136
+ #
137
+ # Returns raw cookie value pairs for a given host. Includes cookies set on
138
+ # parent domain(s).
139
+ #
140
+ # @param [String] host
141
+ # The name of the host.
142
+ #
143
+ # @return [Hash{String => String}]
144
+ # Cookie params.
145
+ #
146
+ # @since 0.2.7
147
+ #
148
+ def cookies_for_host(host)
149
+ host_cookies = (@params[host] || {})
150
+ sub_domains = host.split('.')
134
151
 
135
- if hdomain.length > 2
136
- parent_cookies = for_host(hdomain[1..-1].join('.'))
152
+ while sub_domains.length > 2
153
+ sub_domains.shift
137
154
 
138
- unless (parent_cookies.nil? || parent_cookies.empty?)
139
- @cookies[host] = if @cookies[host].nil?
140
- # inherit the parent cookies
141
- parent_cookies
142
- else
143
- # merge the parent cookies with any host-specific cookies
144
- "#{parent_cookies}; #{@cookies[host]}"
145
- end
155
+ if (parent_cookies = @params[sub_domains.join('.')])
156
+ parent_cookies.each do |name,value|
157
+ # copy in the parent cookies, only if they haven't been
158
+ # overridden yet.
159
+ unless host_cookies.has_key?(name)
160
+ host_cookies[name] = value
161
+ end
162
+ end
146
163
  end
147
164
  end
148
165
 
149
- return @cookies[host]
166
+ return host_cookies
150
167
  end
151
168
 
152
169
  #
data/lib/spidr/page.rb CHANGED
@@ -289,10 +289,29 @@ module Spidr
289
289
  # @return [String]
290
290
  # The raw Cookie from the response.
291
291
  #
292
+ # @since 0.2.7
293
+ #
294
+ def raw_cookie
295
+ (@response['Set-Cookie'] || '')
296
+ end
297
+
298
+ #
299
+ # The raw Cookie String sent along with the page.
300
+ #
301
+ # @return [String]
302
+ # The raw Cookie from the response.
303
+ #
304
+ # @deprecated
305
+ # Deprecated in 0.2.7 and will be removed in 0.3.0.
306
+ # Use {#raw_cookie} instead.
307
+ #
292
308
  # @since 0.2.2
293
309
  #
294
310
  def cookie
295
- (@response['Set-Cookie'] || '')
311
+ STDERR.puts 'DEPRECATION: Spidr::Page#cookie will be removed in 0.3.0'
312
+ STDERR.puts 'DEPRECATION: Use Spidr::Page#raw_cookie instead'
313
+
314
+ return raw_cookie
296
315
  end
297
316
 
298
317
  #
@@ -507,7 +526,7 @@ module Spidr
507
526
  def to_absolute(link)
508
527
  begin
509
528
  url = @url.merge(link.to_s)
510
- rescue URI::InvalidURIError
529
+ rescue URI::InvalidURIError, URI::InvalidComponentError
511
530
  return nil
512
531
  end
513
532
 
data/lib/spidr/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Spidr
2
2
  # Spidr version
3
- VERSION = '0.2.6'
3
+ VERSION = '0.2.7'
4
4
  end
@@ -77,6 +77,40 @@ describe CookieJar do
77
77
  end
78
78
  end
79
79
 
80
+ describe "cookies_for_host" do
81
+ before(:each) do
82
+ @cookie_jar = CookieJar.new
83
+ end
84
+
85
+ it "should return an empty Hash for unknown hosts" do
86
+ @cookie_jar.cookies_for_host('lol.com').should be_empty
87
+ end
88
+
89
+ it "should return an empty Hash for hosts with no cookie params" do
90
+ @cookie_jar['lol.com'] = {}
91
+
92
+ @cookie_jar.cookies_for_host('lol.com').should be_empty
93
+ end
94
+
95
+ it "should return cookie parameters for the host" do
96
+ @cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
97
+ @cookie_jar['zerosum.org'] = {'other' => '1'}
98
+ cookie = @cookie_jar.cookies_for_host('zerosum.org')
99
+
100
+ cookie['admin'].should == 'ofcourseiam'
101
+ cookie['other'].should == '1'
102
+ end
103
+
104
+ it "should include cookies for the parent domain" do
105
+ @cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
106
+ @cookie_jar['sub.zerosum.org'] = {'other' => '1'}
107
+ cookie = @cookie_jar.cookies_for_host('sub.zerosum.org')
108
+
109
+ cookie['admin'].should == 'ofcourseiam'
110
+ cookie['other'].should == '1'
111
+ end
112
+ end
113
+
80
114
  describe "for_host" do
81
115
  before(:each) do
82
116
  @cookie_jar = CookieJar.new
data/spec/page_spec.rb CHANGED
@@ -100,6 +100,13 @@ describe Page do
100
100
  end
101
101
 
102
102
  it "should provide access to the raw Cookie" do
103
+ cookie = @page.raw_cookie
104
+
105
+ cookie.should_not be_nil
106
+ cookie.should_not be_empty
107
+ end
108
+
109
+ it "should still support the deprecated #cookie method" do
103
110
  cookie = @page.cookie
104
111
 
105
112
  cookie.should_not be_nil
data/spidr.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{spidr}
8
- s.version = "0.2.6"
8
+ s.version = "0.2.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Postmodern"]
12
- s.date = %q{2010-07-05}
12
+ s.date = %q{2010-08-17}
13
13
  s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
14
14
  s.email = %q{postmodern.mod3@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
23
23
  ".yardopts",
24
24
  "ChangeLog.md",
25
25
  "Gemfile",
26
+ "Gemfile.lock",
26
27
  "LICENSE.txt",
27
28
  "README.md",
28
29
  "Rakefile",
@@ -96,18 +97,15 @@ Gem::Specification.new do |s|
96
97
  s.specification_version = 3
97
98
 
98
99
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
99
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.0"])
100
100
  s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
101
101
  s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
102
102
  s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
103
103
  else
104
- s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
105
104
  s.add_dependency(%q<rake>, ["~> 0.8.7"])
106
105
  s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
107
106
  s.add_dependency(%q<rspec>, ["~> 1.3.0"])
108
107
  end
109
108
  else
110
- s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
111
109
  s.add_dependency(%q<rake>, ["~> 0.8.7"])
112
110
  s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
113
111
  s.add_dependency(%q<rspec>, ["~> 1.3.0"])
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 6
9
- version: 0.2.6
8
+ - 7
9
+ version: 0.2.7
10
10
  platform: ruby
11
11
  authors:
12
12
  - Postmodern
@@ -14,27 +14,12 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-07-05 00:00:00 -07:00
17
+ date: 2010-08-17 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: nokogiri
22
- requirement: &id001 !ruby/object:Gem::Requirement
23
- none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 1
29
- - 3
30
- - 0
31
- version: 1.3.0
32
- type: :runtime
33
- prerelease: false
34
- version_requirements: *id001
35
20
  - !ruby/object:Gem::Dependency
36
21
  name: rake
37
- requirement: &id002 !ruby/object:Gem::Requirement
22
+ requirement: &id001 !ruby/object:Gem::Requirement
38
23
  none: false
39
24
  requirements:
40
25
  - - ~>
@@ -46,10 +31,10 @@ dependencies:
46
31
  version: 0.8.7
47
32
  type: :development
48
33
  prerelease: false
49
- version_requirements: *id002
34
+ version_requirements: *id001
50
35
  - !ruby/object:Gem::Dependency
51
36
  name: jeweler
52
- requirement: &id003 !ruby/object:Gem::Requirement
37
+ requirement: &id002 !ruby/object:Gem::Requirement
53
38
  none: false
54
39
  requirements:
55
40
  - - ~>
@@ -61,10 +46,10 @@ dependencies:
61
46
  version: 1.4.0
62
47
  type: :development
63
48
  prerelease: false
64
- version_requirements: *id003
49
+ version_requirements: *id002
65
50
  - !ruby/object:Gem::Dependency
66
51
  name: rspec
67
- requirement: &id004 !ruby/object:Gem::Requirement
52
+ requirement: &id003 !ruby/object:Gem::Requirement
68
53
  none: false
69
54
  requirements:
70
55
  - - ~>
@@ -76,7 +61,7 @@ dependencies:
76
61
  version: 1.3.0
77
62
  type: :development
78
63
  prerelease: false
79
- version_requirements: *id004
64
+ version_requirements: *id003
80
65
  description: Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.
81
66
  email: postmodern.mod3@gmail.com
82
67
  executables: []
@@ -93,6 +78,7 @@ files:
93
78
  - .yardopts
94
79
  - ChangeLog.md
95
80
  - Gemfile
81
+ - Gemfile.lock
96
82
  - LICENSE.txt
97
83
  - README.md
98
84
  - Rakefile
@@ -149,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
149
135
  requirements:
150
136
  - - ">="
151
137
  - !ruby/object:Gem::Version
152
- hash: -707506015
138
+ hash: 4533863298463290280
153
139
  segments:
154
140
  - 0
155
141
  version: "0"