spidr 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog.md CHANGED
@@ -1,3 +1,10 @@
1
+ ### 0.2.7 / 2010-08-17
2
+
3
+ * Added {Spidr::CookieJar#cookies_for_host} (thanks zapnap).
4
+ * Renamed `Spidr::Page#cookie` to {Spidr::Page#raw_cookie}.
5
+ * Rescue `URI::InvalidComponentError` exceptions in
6
+ {Spidr::Page#to_absolute} (thanks zapnap).
7
+
1
8
  ### 0.2.6 / 2010-07-05
2
9
 
3
10
  * Fixed a bug in {Spidr::Page#meta_redirect}, by calling
data/Gemfile.lock ADDED
@@ -0,0 +1,39 @@
1
+ GIT
2
+ remote: git://github.com/technicalpickles/jeweler.git
3
+ revision: 7856803
4
+ specs:
5
+ jeweler (1.4.0)
6
+ bundler (>= 0.9.5)
7
+ gemcutter (>= 0.1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ gemcutter (0.6.1)
15
+ git (1.2.5)
16
+ json_pure (1.4.6)
17
+ nokogiri (1.4.3.1)
18
+ rack (1.2.1)
19
+ rake (0.8.7)
20
+ rdiscount (1.6.5)
21
+ rspec (1.3.0)
22
+ sinatra (1.0)
23
+ rack (>= 1.0)
24
+ wsoc (0.1.3)
25
+ json_pure (~> 1.4.0)
26
+ sinatra (~> 1.0)
27
+ yard (0.5.8)
28
+
29
+ PLATFORMS
30
+ ruby
31
+
32
+ DEPENDENCIES
33
+ jeweler (~> 1.4.0)!
34
+ nokogiri (>= 1.3.0)
35
+ rake (~> 0.8.7)
36
+ rdiscount (~> 1.6.3)
37
+ rspec (~> 1.3.0)
38
+ wsoc (~> 0.1.3)
39
+ yard (~> 0.5.3)
data/Rakefile CHANGED
@@ -24,6 +24,7 @@ Jeweler::Tasks.new do |gem|
24
24
  gem.authors = ['Postmodern']
25
25
  gem.has_rdoc = 'yard'
26
26
  end
27
+ Jeweler::GemcutterTasks.new
27
28
 
28
29
  require 'spec/rake/spectask'
29
30
  Spec::Rake::SpecTask.new(:spec) do |spec|
@@ -122,7 +122,7 @@ module Spidr
122
122
  if @dirty.include?(host)
123
123
  values = []
124
124
 
125
- @params[host].each do |name,value|
125
+ cookies_for_host(host).each do |name,value|
126
126
  values << "#{name}=#{value}"
127
127
  end
128
128
 
@@ -130,23 +130,40 @@ module Spidr
130
130
  @dirty.delete(host)
131
131
  end
132
132
 
133
- hdomain = host.split('.')
133
+ return @cookies[host]
134
+ end
135
+
136
+ #
137
+ # Returns raw cookie value pairs for a given host. Includes cookies set on
138
+ # parent domain(s).
139
+ #
140
+ # @param [String] host
141
+ # The name of the host.
142
+ #
143
+ # @return [Hash{String => String}]
144
+ # Cookie params.
145
+ #
146
+ # @since 0.2.7
147
+ #
148
+ def cookies_for_host(host)
149
+ host_cookies = (@params[host] || {})
150
+ sub_domains = host.split('.')
134
151
 
135
- if hdomain.length > 2
136
- parent_cookies = for_host(hdomain[1..-1].join('.'))
152
+ while sub_domains.length > 2
153
+ sub_domains.shift
137
154
 
138
- unless (parent_cookies.nil? || parent_cookies.empty?)
139
- @cookies[host] = if @cookies[host].nil?
140
- # inherit the parent cookies
141
- parent_cookies
142
- else
143
- # merge the parent cookies with any host-specific cookies
144
- "#{parent_cookies}; #{@cookies[host]}"
145
- end
155
+ if (parent_cookies = @params[sub_domains.join('.')])
156
+ parent_cookies.each do |name,value|
157
+ # copy in the parent cookies, only if they haven't been
158
+ # overridden yet.
159
+ unless host_cookies.has_key?(name)
160
+ host_cookies[name] = value
161
+ end
162
+ end
146
163
  end
147
164
  end
148
165
 
149
- return @cookies[host]
166
+ return host_cookies
150
167
  end
151
168
 
152
169
  #
data/lib/spidr/page.rb CHANGED
@@ -289,10 +289,29 @@ module Spidr
289
289
  # @return [String]
290
290
  # The raw Cookie from the response.
291
291
  #
292
+ # @since 0.2.7
293
+ #
294
+ def raw_cookie
295
+ (@response['Set-Cookie'] || '')
296
+ end
297
+
298
+ #
299
+ # The raw Cookie String sent along with the page.
300
+ #
301
+ # @return [String]
302
+ # The raw Cookie from the response.
303
+ #
304
+ # @deprecated
305
+ # Deprecated in 0.2.7 and will be removed in 0.3.0.
306
+ # Use {#raw_cookie} instead.
307
+ #
292
308
  # @since 0.2.2
293
309
  #
294
310
  def cookie
295
- (@response['Set-Cookie'] || '')
311
+ STDERR.puts 'DEPRECATION: Spidr::Page#cookie will be removed in 0.3.0'
312
+ STDERR.puts 'DEPRECATION: Use Spidr::Page#raw_cookie instead'
313
+
314
+ return raw_cookie
296
315
  end
297
316
 
298
317
  #
@@ -507,7 +526,7 @@ module Spidr
507
526
  def to_absolute(link)
508
527
  begin
509
528
  url = @url.merge(link.to_s)
510
- rescue URI::InvalidURIError
529
+ rescue URI::InvalidURIError, URI::InvalidComponentError
511
530
  return nil
512
531
  end
513
532
 
data/lib/spidr/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Spidr
2
2
  # Spidr version
3
- VERSION = '0.2.6'
3
+ VERSION = '0.2.7'
4
4
  end
@@ -77,6 +77,40 @@ describe CookieJar do
77
77
  end
78
78
  end
79
79
 
80
+ describe "cookies_for_host" do
81
+ before(:each) do
82
+ @cookie_jar = CookieJar.new
83
+ end
84
+
85
+ it "should return an empty Hash for unknown hosts" do
86
+ @cookie_jar.cookies_for_host('lol.com').should be_empty
87
+ end
88
+
89
+ it "should return an empty Hash for hosts with no cookie params" do
90
+ @cookie_jar['lol.com'] = {}
91
+
92
+ @cookie_jar.cookies_for_host('lol.com').should be_empty
93
+ end
94
+
95
+ it "should return cookie parameters for the host" do
96
+ @cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
97
+ @cookie_jar['zerosum.org'] = {'other' => '1'}
98
+ cookie = @cookie_jar.cookies_for_host('zerosum.org')
99
+
100
+ cookie['admin'].should == 'ofcourseiam'
101
+ cookie['other'].should == '1'
102
+ end
103
+
104
+ it "should include cookies for the parent domain" do
105
+ @cookie_jar['zerosum.org'] = {'admin' => 'ofcourseiam'}
106
+ @cookie_jar['sub.zerosum.org'] = {'other' => '1'}
107
+ cookie = @cookie_jar.cookies_for_host('sub.zerosum.org')
108
+
109
+ cookie['admin'].should == 'ofcourseiam'
110
+ cookie['other'].should == '1'
111
+ end
112
+ end
113
+
80
114
  describe "for_host" do
81
115
  before(:each) do
82
116
  @cookie_jar = CookieJar.new
data/spec/page_spec.rb CHANGED
@@ -100,6 +100,13 @@ describe Page do
100
100
  end
101
101
 
102
102
  it "should provide access to the raw Cookie" do
103
+ cookie = @page.raw_cookie
104
+
105
+ cookie.should_not be_nil
106
+ cookie.should_not be_empty
107
+ end
108
+
109
+ it "should still support the deprecated #cookie method" do
103
110
  cookie = @page.cookie
104
111
 
105
112
  cookie.should_not be_nil
data/spidr.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{spidr}
8
- s.version = "0.2.6"
8
+ s.version = "0.2.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Postmodern"]
12
- s.date = %q{2010-07-05}
12
+ s.date = %q{2010-08-17}
13
13
  s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
14
14
  s.email = %q{postmodern.mod3@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
23
23
  ".yardopts",
24
24
  "ChangeLog.md",
25
25
  "Gemfile",
26
+ "Gemfile.lock",
26
27
  "LICENSE.txt",
27
28
  "README.md",
28
29
  "Rakefile",
@@ -96,18 +97,15 @@ Gem::Specification.new do |s|
96
97
  s.specification_version = 3
97
98
 
98
99
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
99
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.0"])
100
100
  s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
101
101
  s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
102
102
  s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
103
103
  else
104
- s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
105
104
  s.add_dependency(%q<rake>, ["~> 0.8.7"])
106
105
  s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
107
106
  s.add_dependency(%q<rspec>, ["~> 1.3.0"])
108
107
  end
109
108
  else
110
- s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
111
109
  s.add_dependency(%q<rake>, ["~> 0.8.7"])
112
110
  s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
113
111
  s.add_dependency(%q<rspec>, ["~> 1.3.0"])
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 6
9
- version: 0.2.6
8
+ - 7
9
+ version: 0.2.7
10
10
  platform: ruby
11
11
  authors:
12
12
  - Postmodern
@@ -14,27 +14,12 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-07-05 00:00:00 -07:00
17
+ date: 2010-08-17 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: nokogiri
22
- requirement: &id001 !ruby/object:Gem::Requirement
23
- none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 1
29
- - 3
30
- - 0
31
- version: 1.3.0
32
- type: :runtime
33
- prerelease: false
34
- version_requirements: *id001
35
20
  - !ruby/object:Gem::Dependency
36
21
  name: rake
37
- requirement: &id002 !ruby/object:Gem::Requirement
22
+ requirement: &id001 !ruby/object:Gem::Requirement
38
23
  none: false
39
24
  requirements:
40
25
  - - ~>
@@ -46,10 +31,10 @@ dependencies:
46
31
  version: 0.8.7
47
32
  type: :development
48
33
  prerelease: false
49
- version_requirements: *id002
34
+ version_requirements: *id001
50
35
  - !ruby/object:Gem::Dependency
51
36
  name: jeweler
52
- requirement: &id003 !ruby/object:Gem::Requirement
37
+ requirement: &id002 !ruby/object:Gem::Requirement
53
38
  none: false
54
39
  requirements:
55
40
  - - ~>
@@ -61,10 +46,10 @@ dependencies:
61
46
  version: 1.4.0
62
47
  type: :development
63
48
  prerelease: false
64
- version_requirements: *id003
49
+ version_requirements: *id002
65
50
  - !ruby/object:Gem::Dependency
66
51
  name: rspec
67
- requirement: &id004 !ruby/object:Gem::Requirement
52
+ requirement: &id003 !ruby/object:Gem::Requirement
68
53
  none: false
69
54
  requirements:
70
55
  - - ~>
@@ -76,7 +61,7 @@ dependencies:
76
61
  version: 1.3.0
77
62
  type: :development
78
63
  prerelease: false
79
- version_requirements: *id004
64
+ version_requirements: *id003
80
65
  description: Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.
81
66
  email: postmodern.mod3@gmail.com
82
67
  executables: []
@@ -93,6 +78,7 @@ files:
93
78
  - .yardopts
94
79
  - ChangeLog.md
95
80
  - Gemfile
81
+ - Gemfile.lock
96
82
  - LICENSE.txt
97
83
  - README.md
98
84
  - Rakefile
@@ -149,7 +135,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
149
135
  requirements:
150
136
  - - ">="
151
137
  - !ruby/object:Gem::Version
152
- hash: -707506015
138
+ hash: 4533863298463290280
153
139
  segments:
154
140
  - 0
155
141
  version: "0"