spidr 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/spec/page_spec.rb CHANGED
@@ -100,13 +100,6 @@ describe Page do
100
100
  end
101
101
 
102
102
  it "should provide access to the raw Cookie" do
103
- cookie = @page.raw_cookie
104
-
105
- cookie.should_not be_nil
106
- cookie.should_not be_empty
107
- end
108
-
109
- it "should still support the deprecated #cookie method" do
110
103
  cookie = @page.cookie
111
104
 
112
105
  cookie.should_not be_nil
data/spec/rules_spec.rb CHANGED
@@ -3,39 +3,41 @@ require 'spidr/rules'
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Rules do
6
+ subject { Rules }
7
+
6
8
  it "should accept data based on acceptance data" do
7
- rules = Rules.new(:accept => [1])
9
+ rules = subject.new(:accept => [1])
8
10
 
9
11
  rules.accept?(1).should == true
10
12
  end
11
13
 
12
14
  it "should accept data based on acceptance regexps" do
13
- rules = Rules.new(:accept => [/1/])
15
+ rules = subject.new(:accept => [/1/])
14
16
 
15
17
  rules.accept?('1').should == true
16
18
  end
17
19
 
18
20
  it "should match non-Strings using acceptance regexps" do
19
- rules = Rules.new(:accept => [/1/])
21
+ rules = subject.new(:accept => [/1/])
20
22
 
21
23
  rules.accept?(1).should == true
22
24
  end
23
25
 
24
26
  it "should accept data using acceptance lambdas" do
25
- rules = Rules.new(:accept => [lambda { |data| data > 2 }])
27
+ rules = subject.new(:accept => [lambda { |data| data > 2 }])
26
28
 
27
29
  rules.accept?(3).should == true
28
30
  end
29
31
 
30
32
  it "should reject data that does not match any acceptance patterns" do
31
- rules = Rules.new(:accept => [1, 2, 3])
33
+ rules = subject.new(:accept => [1, 2, 3])
32
34
 
33
35
  rules.accept?(2).should == true
34
36
  rules.accept?(4).should == false
35
37
  end
36
38
 
37
39
  it "should accept data that does not match any rejection patterns" do
38
- rules = Rules.new(:reject => [1, 2, 3])
40
+ rules = subject.new(:reject => [1, 2, 3])
39
41
 
40
42
  rules.accept?(2).should == false
41
43
  rules.accept?(4).should == true
@@ -5,61 +5,55 @@ require 'spec_helper'
5
5
 
6
6
  describe Sanitizers do
7
7
  describe "sanitize_url" do
8
- before(:all) do
9
- @agent = Agent.new
10
- @url = 'http://host.com'
11
- end
8
+ let(:url) { 'http://host.com' }
9
+ before(:all) { @agent = Agent.new }
12
10
 
13
11
  it "should sanitize URLs" do
14
12
  agent = Agent.new
15
- clean_url = agent.sanitize_url(URI(@url))
13
+ clean_url = agent.sanitize_url(URI(url))
16
14
 
17
15
  clean_url.host.should == 'host.com'
18
16
  end
19
17
 
20
18
  it "should sanitize URLs given as Strings" do
21
19
  agent = Agent.new
22
- clean_url = agent.sanitize_url(@url)
20
+ clean_url = agent.sanitize_url(url)
23
21
 
24
22
  clean_url.host.should == 'host.com'
25
23
  end
26
24
  end
27
25
 
28
26
  describe "strip_fragments" do
29
- before(:all) do
30
- @url = URI("http://host.com/page#lol")
31
- end
27
+ let(:url) { URI("http://host.com/page#lol") }
32
28
 
33
29
  it "should strip fragment components by default" do
34
30
  agent = Agent.new
35
- clean_url = agent.sanitize_url(@url)
31
+ clean_url = agent.sanitize_url(url)
36
32
 
37
33
  clean_url.fragment.should be_nil
38
34
  end
39
35
 
40
36
  it "should allow perserving fragment components" do
41
37
  agent = Agent.new(:strip_fragments => false)
42
- clean_url = agent.sanitize_url(@url)
38
+ clean_url = agent.sanitize_url(url)
43
39
 
44
40
  clean_url.fragment.should == 'lol'
45
41
  end
46
42
  end
47
43
 
48
44
  describe "strip_query" do
49
- before(:all) do
50
- @url = URI("http://host.com/page?x=1")
51
- end
45
+ let(:url) { URI("http://host.com/page?x=1") }
52
46
 
53
47
  it "should not strip query components by default" do
54
48
  agent = Agent.new
55
- clean_url = agent.sanitize_url(@url)
49
+ clean_url = agent.sanitize_url(url)
56
50
 
57
51
  clean_url.query.should == 'x=1'
58
52
  end
59
53
 
60
54
  it "should allow stripping of query components" do
61
55
  agent = Agent.new(:strip_query => true)
62
- clean_url = agent.sanitize_url(@url)
56
+ clean_url = agent.sanitize_url(url)
63
57
 
64
58
  clean_url.query.should be_nil
65
59
  end
data/spec/spec_helper.rb CHANGED
@@ -1,15 +1,4 @@
1
- require 'rubygems'
2
- require 'bundler'
3
-
4
- begin
5
- Bundler.setup(:runtime, :test)
6
- rescue Bundler::BundlerError => e
7
- STDERR.puts e.message
8
- STDERR.puts "Run `bundle install` to install missing gems"
9
- exit e.status_code
10
- end
11
-
12
- require 'spec'
1
+ require 'rspec'
13
2
  require 'spidr/version'
14
3
 
15
4
  include Spidr
data/spec/spidr_spec.rb CHANGED
@@ -4,36 +4,36 @@ require 'spec_helper'
4
4
 
5
5
  describe Spidr do
6
6
  it "should have a VERSION constant" do
7
- Spidr.const_defined?('VERSION').should == true
7
+ subject.const_defined?('VERSION').should == true
8
8
  end
9
9
 
10
10
  describe "proxy" do
11
11
  after(:all) do
12
- Spidr.disable_proxy!
12
+ subject.disable_proxy!
13
13
  end
14
14
 
15
15
  it "should not have proxy settings by default" do
16
- Spidr.proxy[:host].should be_nil
16
+ subject.proxy[:host].should be_nil
17
17
  end
18
18
 
19
19
  it "should allow setting new proxy settings" do
20
- Spidr.proxy = {:host => 'example.com', :port => 8010}
20
+ subject.proxy = {:host => 'example.com', :port => 8010}
21
21
 
22
- Spidr.proxy[:host].should == 'example.com'
23
- Spidr.proxy[:port].should == 8010
22
+ subject.proxy[:host].should == 'example.com'
23
+ subject.proxy[:port].should == 8010
24
24
  end
25
25
 
26
26
  it "should default the :port option of new proxy settings" do
27
- Spidr.proxy = {:host => 'example.com'}
27
+ subject.proxy = {:host => 'example.com'}
28
28
 
29
- Spidr.proxy[:host].should == 'example.com'
30
- Spidr.proxy[:port].should == Spidr::COMMON_PROXY_PORT
29
+ subject.proxy[:host].should == 'example.com'
30
+ subject.proxy[:port].should == Spidr::COMMON_PROXY_PORT
31
31
  end
32
32
 
33
33
  it "should allow disabling the proxy" do
34
- Spidr.disable_proxy!
34
+ subject.disable_proxy!
35
35
 
36
- Spidr.proxy[:host].should be_nil
36
+ subject.proxy[:host].should be_nil
37
37
  end
38
38
  end
39
39
  end
data/spidr.gemspec CHANGED
@@ -1,114 +1,15 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
1
  # -*- encoding: utf-8 -*-
5
2
 
6
- Gem::Specification.new do |s|
7
- s.name = %q{spidr}
8
- s.version = "0.2.7"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Postmodern"]
12
- s.date = %q{2010-08-17}
13
- s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
14
- s.email = %q{postmodern.mod3@gmail.com}
15
- s.extra_rdoc_files = [
16
- "ChangeLog.md",
17
- "LICENSE.txt",
18
- "README.md"
19
- ]
20
- s.files = [
21
- ".gitignore",
22
- ".specopts",
23
- ".yardopts",
24
- "ChangeLog.md",
25
- "Gemfile",
26
- "Gemfile.lock",
27
- "LICENSE.txt",
28
- "README.md",
29
- "Rakefile",
30
- "lib/spidr.rb",
31
- "lib/spidr/actions.rb",
32
- "lib/spidr/actions/actions.rb",
33
- "lib/spidr/actions/exceptions.rb",
34
- "lib/spidr/actions/exceptions/action.rb",
35
- "lib/spidr/actions/exceptions/paused.rb",
36
- "lib/spidr/actions/exceptions/skip_link.rb",
37
- "lib/spidr/actions/exceptions/skip_page.rb",
38
- "lib/spidr/agent.rb",
39
- "lib/spidr/auth_credential.rb",
40
- "lib/spidr/auth_store.rb",
41
- "lib/spidr/cookie_jar.rb",
42
- "lib/spidr/events.rb",
43
- "lib/spidr/extensions.rb",
44
- "lib/spidr/extensions/uri.rb",
45
- "lib/spidr/filters.rb",
46
- "lib/spidr/page.rb",
47
- "lib/spidr/rules.rb",
48
- "lib/spidr/sanitizers.rb",
49
- "lib/spidr/session_cache.rb",
50
- "lib/spidr/spidr.rb",
51
- "lib/spidr/version.rb",
52
- "spec/actions_spec.rb",
53
- "spec/agent_spec.rb",
54
- "spec/auth_store_spec.rb",
55
- "spec/cookie_jar_spec.rb",
56
- "spec/extensions/uri_spec.rb",
57
- "spec/filters_spec.rb",
58
- "spec/helpers/history.rb",
59
- "spec/helpers/page.rb",
60
- "spec/helpers/wsoc.rb",
61
- "spec/page_examples.rb",
62
- "spec/page_spec.rb",
63
- "spec/rules_spec.rb",
64
- "spec/sanitizers_spec.rb",
65
- "spec/session_cache.rb",
66
- "spec/spec_helper.rb",
67
- "spec/spidr_spec.rb",
68
- "spidr.gemspec"
69
- ]
70
- s.has_rdoc = %q{yard}
71
- s.homepage = %q{http://github.com/postmodern/spidr}
72
- s.licenses = ["MIT"]
73
- s.require_paths = ["lib"]
74
- s.rubygems_version = %q{1.3.7}
75
- s.summary = %q{A versatile Ruby web spidering library}
76
- s.test_files = [
77
- "spec/actions_spec.rb",
78
- "spec/agent_spec.rb",
79
- "spec/auth_store_spec.rb",
80
- "spec/cookie_jar_spec.rb",
81
- "spec/extensions/uri_spec.rb",
82
- "spec/filters_spec.rb",
83
- "spec/helpers/history.rb",
84
- "spec/helpers/page.rb",
85
- "spec/helpers/wsoc.rb",
86
- "spec/page_examples.rb",
87
- "spec/page_spec.rb",
88
- "spec/rules_spec.rb",
89
- "spec/sanitizers_spec.rb",
90
- "spec/session_cache.rb",
91
- "spec/spec_helper.rb",
92
- "spec/spidr_spec.rb"
93
- ]
94
-
95
- if s.respond_to? :specification_version then
96
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
97
- s.specification_version = 3
98
-
99
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
100
- s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
101
- s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
102
- s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
103
- else
104
- s.add_dependency(%q<rake>, ["~> 0.8.7"])
105
- s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
106
- s.add_dependency(%q<rspec>, ["~> 1.3.0"])
107
- end
108
- else
109
- s.add_dependency(%q<rake>, ["~> 0.8.7"])
110
- s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
111
- s.add_dependency(%q<rspec>, ["~> 1.3.0"])
3
+ begin
4
+ Ore::Specification.new do |gemspec|
5
+ # custom logic here
6
+ end
7
+ rescue NameError
8
+ begin
9
+ require 'ore/specification'
10
+ retry
11
+ rescue LoadError
12
+ STDERR.puts "The '#{__FILE__}' file requires Ore."
13
+ STDERR.puts "Run `gem install ore-core` to install Ore."
112
14
  end
113
15
  end
114
-
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidr
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 7
9
- version: 0.2.7
4
+ prerelease:
5
+ version: 0.3.0
10
6
  platform: ruby
11
7
  authors:
12
8
  - Postmodern
@@ -14,74 +10,59 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2010-08-17 00:00:00 -07:00
18
- default_executable:
13
+ date: 2011-04-14 00:00:00 Z
19
14
  dependencies:
20
15
  - !ruby/object:Gem::Dependency
21
- name: rake
16
+ name: nokogiri
22
17
  requirement: &id001 !ruby/object:Gem::Requirement
23
18
  none: false
24
19
  requirements:
25
20
  - - ~>
26
21
  - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- - 8
30
- - 7
31
- version: 0.8.7
32
- type: :development
22
+ version: "1.3"
23
+ type: :runtime
33
24
  prerelease: false
34
25
  version_requirements: *id001
35
26
  - !ruby/object:Gem::Dependency
36
- name: jeweler
27
+ name: bundler
37
28
  requirement: &id002 !ruby/object:Gem::Requirement
38
29
  none: false
39
30
  requirements:
40
31
  - - ~>
41
32
  - !ruby/object:Gem::Version
42
- segments:
43
- - 1
44
- - 4
45
- - 0
46
- version: 1.4.0
33
+ version: 1.0.0
47
34
  type: :development
48
35
  prerelease: false
49
36
  version_requirements: *id002
50
37
  - !ruby/object:Gem::Dependency
51
- name: rspec
38
+ name: yard
52
39
  requirement: &id003 !ruby/object:Gem::Requirement
53
40
  none: false
54
41
  requirements:
55
42
  - - ~>
56
43
  - !ruby/object:Gem::Version
57
- segments:
58
- - 1
59
- - 3
60
- - 0
61
- version: 1.3.0
44
+ version: 0.6.0
62
45
  type: :development
63
46
  prerelease: false
64
47
  version_requirements: *id003
65
48
  description: Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.
66
- email: postmodern.mod3@gmail.com
49
+ email:
50
+ - postmodern.mod3@gmail.com
67
51
  executables: []
68
52
 
69
53
  extensions: []
70
54
 
71
55
  extra_rdoc_files:
72
- - ChangeLog.md
73
- - LICENSE.txt
74
56
  - README.md
75
57
  files:
76
- - .gitignore
77
- - .specopts
58
+ - .rspec
78
59
  - .yardopts
79
60
  - ChangeLog.md
80
61
  - Gemfile
81
- - Gemfile.lock
82
62
  - LICENSE.txt
83
63
  - README.md
84
64
  - Rakefile
65
+ - gemspec.yml
85
66
  - lib/spidr.rb
86
67
  - lib/spidr/actions.rb
87
68
  - lib/spidr/actions/actions.rb
@@ -93,11 +74,14 @@ files:
93
74
  - lib/spidr/agent.rb
94
75
  - lib/spidr/auth_credential.rb
95
76
  - lib/spidr/auth_store.rb
77
+ - lib/spidr/body.rb
96
78
  - lib/spidr/cookie_jar.rb
97
79
  - lib/spidr/events.rb
98
80
  - lib/spidr/extensions.rb
99
81
  - lib/spidr/extensions/uri.rb
100
82
  - lib/spidr/filters.rb
83
+ - lib/spidr/headers.rb
84
+ - lib/spidr/links.rb
101
85
  - lib/spidr/page.rb
102
86
  - lib/spidr/rules.rb
103
87
  - lib/spidr/sanitizers.rb
@@ -121,7 +105,6 @@ files:
121
105
  - spec/spec_helper.rb
122
106
  - spec/spidr_spec.rb
123
107
  - spidr.gemspec
124
- has_rdoc: yard
125
108
  homepage: http://github.com/postmodern/spidr
126
109
  licenses:
127
110
  - MIT
@@ -135,39 +118,28 @@ required_ruby_version: !ruby/object:Gem::Requirement
135
118
  requirements:
136
119
  - - ">="
137
120
  - !ruby/object:Gem::Version
138
- hash: 4533863298463290280
139
- segments:
140
- - 0
141
121
  version: "0"
142
122
  required_rubygems_version: !ruby/object:Gem::Requirement
143
123
  none: false
144
124
  requirements:
145
125
  - - ">="
146
126
  - !ruby/object:Gem::Version
147
- segments:
148
- - 0
149
- version: "0"
127
+ version: 1.3.6
150
128
  requirements: []
151
129
 
152
- rubyforge_project:
153
- rubygems_version: 1.3.7
130
+ rubyforge_project: spidr
131
+ rubygems_version: 1.7.2
154
132
  signing_key:
155
133
  specification_version: 3
156
134
  summary: A versatile Ruby web spidering library
157
135
  test_files:
158
- - spec/actions_spec.rb
159
136
  - spec/agent_spec.rb
137
+ - spec/actions_spec.rb
138
+ - spec/rules_spec.rb
139
+ - spec/extensions/uri_spec.rb
160
140
  - spec/auth_store_spec.rb
161
141
  - spec/cookie_jar_spec.rb
162
- - spec/extensions/uri_spec.rb
163
142
  - spec/filters_spec.rb
164
- - spec/helpers/history.rb
165
- - spec/helpers/page.rb
166
- - spec/helpers/wsoc.rb
167
- - spec/page_examples.rb
168
- - spec/page_spec.rb
169
- - spec/rules_spec.rb
170
143
  - spec/sanitizers_spec.rb
171
- - spec/session_cache.rb
172
- - spec/spec_helper.rb
173
144
  - spec/spidr_spec.rb
145
+ - spec/page_spec.rb