spidr 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/spec/page_spec.rb CHANGED
@@ -100,13 +100,6 @@ describe Page do
100
100
  end
101
101
 
102
102
  it "should provide access to the raw Cookie" do
103
- cookie = @page.raw_cookie
104
-
105
- cookie.should_not be_nil
106
- cookie.should_not be_empty
107
- end
108
-
109
- it "should still support the deprecated #cookie method" do
110
103
  cookie = @page.cookie
111
104
 
112
105
  cookie.should_not be_nil
data/spec/rules_spec.rb CHANGED
@@ -3,39 +3,41 @@ require 'spidr/rules'
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Rules do
6
+ subject { Rules }
7
+
6
8
  it "should accept data based on acceptance data" do
7
- rules = Rules.new(:accept => [1])
9
+ rules = subject.new(:accept => [1])
8
10
 
9
11
  rules.accept?(1).should == true
10
12
  end
11
13
 
12
14
  it "should accept data based on acceptance regexps" do
13
- rules = Rules.new(:accept => [/1/])
15
+ rules = subject.new(:accept => [/1/])
14
16
 
15
17
  rules.accept?('1').should == true
16
18
  end
17
19
 
18
20
  it "should match non-Strings using acceptance regexps" do
19
- rules = Rules.new(:accept => [/1/])
21
+ rules = subject.new(:accept => [/1/])
20
22
 
21
23
  rules.accept?(1).should == true
22
24
  end
23
25
 
24
26
  it "should accept data using acceptance lambdas" do
25
- rules = Rules.new(:accept => [lambda { |data| data > 2 }])
27
+ rules = subject.new(:accept => [lambda { |data| data > 2 }])
26
28
 
27
29
  rules.accept?(3).should == true
28
30
  end
29
31
 
30
32
  it "should reject data that does not match any acceptance patterns" do
31
- rules = Rules.new(:accept => [1, 2, 3])
33
+ rules = subject.new(:accept => [1, 2, 3])
32
34
 
33
35
  rules.accept?(2).should == true
34
36
  rules.accept?(4).should == false
35
37
  end
36
38
 
37
39
  it "should accept data that does not match any rejection patterns" do
38
- rules = Rules.new(:reject => [1, 2, 3])
40
+ rules = subject.new(:reject => [1, 2, 3])
39
41
 
40
42
  rules.accept?(2).should == false
41
43
  rules.accept?(4).should == true
@@ -5,61 +5,55 @@ require 'spec_helper'
5
5
 
6
6
  describe Sanitizers do
7
7
  describe "sanitize_url" do
8
- before(:all) do
9
- @agent = Agent.new
10
- @url = 'http://host.com'
11
- end
8
+ let(:url) { 'http://host.com' }
9
+ before(:all) { @agent = Agent.new }
12
10
 
13
11
  it "should sanitize URLs" do
14
12
  agent = Agent.new
15
- clean_url = agent.sanitize_url(URI(@url))
13
+ clean_url = agent.sanitize_url(URI(url))
16
14
 
17
15
  clean_url.host.should == 'host.com'
18
16
  end
19
17
 
20
18
  it "should sanitize URLs given as Strings" do
21
19
  agent = Agent.new
22
- clean_url = agent.sanitize_url(@url)
20
+ clean_url = agent.sanitize_url(url)
23
21
 
24
22
  clean_url.host.should == 'host.com'
25
23
  end
26
24
  end
27
25
 
28
26
  describe "strip_fragments" do
29
- before(:all) do
30
- @url = URI("http://host.com/page#lol")
31
- end
27
+ let(:url) { URI("http://host.com/page#lol") }
32
28
 
33
29
  it "should strip fragment components by default" do
34
30
  agent = Agent.new
35
- clean_url = agent.sanitize_url(@url)
31
+ clean_url = agent.sanitize_url(url)
36
32
 
37
33
  clean_url.fragment.should be_nil
38
34
  end
39
35
 
40
36
  it "should allow perserving fragment components" do
41
37
  agent = Agent.new(:strip_fragments => false)
42
- clean_url = agent.sanitize_url(@url)
38
+ clean_url = agent.sanitize_url(url)
43
39
 
44
40
  clean_url.fragment.should == 'lol'
45
41
  end
46
42
  end
47
43
 
48
44
  describe "strip_query" do
49
- before(:all) do
50
- @url = URI("http://host.com/page?x=1")
51
- end
45
+ let(:url) { URI("http://host.com/page?x=1") }
52
46
 
53
47
  it "should not strip query components by default" do
54
48
  agent = Agent.new
55
- clean_url = agent.sanitize_url(@url)
49
+ clean_url = agent.sanitize_url(url)
56
50
 
57
51
  clean_url.query.should == 'x=1'
58
52
  end
59
53
 
60
54
  it "should allow stripping of query components" do
61
55
  agent = Agent.new(:strip_query => true)
62
- clean_url = agent.sanitize_url(@url)
56
+ clean_url = agent.sanitize_url(url)
63
57
 
64
58
  clean_url.query.should be_nil
65
59
  end
data/spec/spec_helper.rb CHANGED
@@ -1,15 +1,4 @@
1
- require 'rubygems'
2
- require 'bundler'
3
-
4
- begin
5
- Bundler.setup(:runtime, :test)
6
- rescue Bundler::BundlerError => e
7
- STDERR.puts e.message
8
- STDERR.puts "Run `bundle install` to install missing gems"
9
- exit e.status_code
10
- end
11
-
12
- require 'spec'
1
+ require 'rspec'
13
2
  require 'spidr/version'
14
3
 
15
4
  include Spidr
data/spec/spidr_spec.rb CHANGED
@@ -4,36 +4,36 @@ require 'spec_helper'
4
4
 
5
5
  describe Spidr do
6
6
  it "should have a VERSION constant" do
7
- Spidr.const_defined?('VERSION').should == true
7
+ subject.const_defined?('VERSION').should == true
8
8
  end
9
9
 
10
10
  describe "proxy" do
11
11
  after(:all) do
12
- Spidr.disable_proxy!
12
+ subject.disable_proxy!
13
13
  end
14
14
 
15
15
  it "should not have proxy settings by default" do
16
- Spidr.proxy[:host].should be_nil
16
+ subject.proxy[:host].should be_nil
17
17
  end
18
18
 
19
19
  it "should allow setting new proxy settings" do
20
- Spidr.proxy = {:host => 'example.com', :port => 8010}
20
+ subject.proxy = {:host => 'example.com', :port => 8010}
21
21
 
22
- Spidr.proxy[:host].should == 'example.com'
23
- Spidr.proxy[:port].should == 8010
22
+ subject.proxy[:host].should == 'example.com'
23
+ subject.proxy[:port].should == 8010
24
24
  end
25
25
 
26
26
  it "should default the :port option of new proxy settings" do
27
- Spidr.proxy = {:host => 'example.com'}
27
+ subject.proxy = {:host => 'example.com'}
28
28
 
29
- Spidr.proxy[:host].should == 'example.com'
30
- Spidr.proxy[:port].should == Spidr::COMMON_PROXY_PORT
29
+ subject.proxy[:host].should == 'example.com'
30
+ subject.proxy[:port].should == Spidr::COMMON_PROXY_PORT
31
31
  end
32
32
 
33
33
  it "should allow disabling the proxy" do
34
- Spidr.disable_proxy!
34
+ subject.disable_proxy!
35
35
 
36
- Spidr.proxy[:host].should be_nil
36
+ subject.proxy[:host].should be_nil
37
37
  end
38
38
  end
39
39
  end
data/spidr.gemspec CHANGED
@@ -1,114 +1,15 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
1
  # -*- encoding: utf-8 -*-
5
2
 
6
- Gem::Specification.new do |s|
7
- s.name = %q{spidr}
8
- s.version = "0.2.7"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Postmodern"]
12
- s.date = %q{2010-08-17}
13
- s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
14
- s.email = %q{postmodern.mod3@gmail.com}
15
- s.extra_rdoc_files = [
16
- "ChangeLog.md",
17
- "LICENSE.txt",
18
- "README.md"
19
- ]
20
- s.files = [
21
- ".gitignore",
22
- ".specopts",
23
- ".yardopts",
24
- "ChangeLog.md",
25
- "Gemfile",
26
- "Gemfile.lock",
27
- "LICENSE.txt",
28
- "README.md",
29
- "Rakefile",
30
- "lib/spidr.rb",
31
- "lib/spidr/actions.rb",
32
- "lib/spidr/actions/actions.rb",
33
- "lib/spidr/actions/exceptions.rb",
34
- "lib/spidr/actions/exceptions/action.rb",
35
- "lib/spidr/actions/exceptions/paused.rb",
36
- "lib/spidr/actions/exceptions/skip_link.rb",
37
- "lib/spidr/actions/exceptions/skip_page.rb",
38
- "lib/spidr/agent.rb",
39
- "lib/spidr/auth_credential.rb",
40
- "lib/spidr/auth_store.rb",
41
- "lib/spidr/cookie_jar.rb",
42
- "lib/spidr/events.rb",
43
- "lib/spidr/extensions.rb",
44
- "lib/spidr/extensions/uri.rb",
45
- "lib/spidr/filters.rb",
46
- "lib/spidr/page.rb",
47
- "lib/spidr/rules.rb",
48
- "lib/spidr/sanitizers.rb",
49
- "lib/spidr/session_cache.rb",
50
- "lib/spidr/spidr.rb",
51
- "lib/spidr/version.rb",
52
- "spec/actions_spec.rb",
53
- "spec/agent_spec.rb",
54
- "spec/auth_store_spec.rb",
55
- "spec/cookie_jar_spec.rb",
56
- "spec/extensions/uri_spec.rb",
57
- "spec/filters_spec.rb",
58
- "spec/helpers/history.rb",
59
- "spec/helpers/page.rb",
60
- "spec/helpers/wsoc.rb",
61
- "spec/page_examples.rb",
62
- "spec/page_spec.rb",
63
- "spec/rules_spec.rb",
64
- "spec/sanitizers_spec.rb",
65
- "spec/session_cache.rb",
66
- "spec/spec_helper.rb",
67
- "spec/spidr_spec.rb",
68
- "spidr.gemspec"
69
- ]
70
- s.has_rdoc = %q{yard}
71
- s.homepage = %q{http://github.com/postmodern/spidr}
72
- s.licenses = ["MIT"]
73
- s.require_paths = ["lib"]
74
- s.rubygems_version = %q{1.3.7}
75
- s.summary = %q{A versatile Ruby web spidering library}
76
- s.test_files = [
77
- "spec/actions_spec.rb",
78
- "spec/agent_spec.rb",
79
- "spec/auth_store_spec.rb",
80
- "spec/cookie_jar_spec.rb",
81
- "spec/extensions/uri_spec.rb",
82
- "spec/filters_spec.rb",
83
- "spec/helpers/history.rb",
84
- "spec/helpers/page.rb",
85
- "spec/helpers/wsoc.rb",
86
- "spec/page_examples.rb",
87
- "spec/page_spec.rb",
88
- "spec/rules_spec.rb",
89
- "spec/sanitizers_spec.rb",
90
- "spec/session_cache.rb",
91
- "spec/spec_helper.rb",
92
- "spec/spidr_spec.rb"
93
- ]
94
-
95
- if s.respond_to? :specification_version then
96
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
97
- s.specification_version = 3
98
-
99
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
100
- s.add_development_dependency(%q<rake>, ["~> 0.8.7"])
101
- s.add_development_dependency(%q<jeweler>, ["~> 1.4.0"])
102
- s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
103
- else
104
- s.add_dependency(%q<rake>, ["~> 0.8.7"])
105
- s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
106
- s.add_dependency(%q<rspec>, ["~> 1.3.0"])
107
- end
108
- else
109
- s.add_dependency(%q<rake>, ["~> 0.8.7"])
110
- s.add_dependency(%q<jeweler>, ["~> 1.4.0"])
111
- s.add_dependency(%q<rspec>, ["~> 1.3.0"])
3
+ begin
4
+ Ore::Specification.new do |gemspec|
5
+ # custom logic here
6
+ end
7
+ rescue NameError
8
+ begin
9
+ require 'ore/specification'
10
+ retry
11
+ rescue LoadError
12
+ STDERR.puts "The '#{__FILE__}' file requires Ore."
13
+ STDERR.puts "Run `gem install ore-core` to install Ore."
112
14
  end
113
15
  end
114
-
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidr
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 7
9
- version: 0.2.7
4
+ prerelease:
5
+ version: 0.3.0
10
6
  platform: ruby
11
7
  authors:
12
8
  - Postmodern
@@ -14,74 +10,59 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2010-08-17 00:00:00 -07:00
18
- default_executable:
13
+ date: 2011-04-14 00:00:00 Z
19
14
  dependencies:
20
15
  - !ruby/object:Gem::Dependency
21
- name: rake
16
+ name: nokogiri
22
17
  requirement: &id001 !ruby/object:Gem::Requirement
23
18
  none: false
24
19
  requirements:
25
20
  - - ~>
26
21
  - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- - 8
30
- - 7
31
- version: 0.8.7
32
- type: :development
22
+ version: "1.3"
23
+ type: :runtime
33
24
  prerelease: false
34
25
  version_requirements: *id001
35
26
  - !ruby/object:Gem::Dependency
36
- name: jeweler
27
+ name: bundler
37
28
  requirement: &id002 !ruby/object:Gem::Requirement
38
29
  none: false
39
30
  requirements:
40
31
  - - ~>
41
32
  - !ruby/object:Gem::Version
42
- segments:
43
- - 1
44
- - 4
45
- - 0
46
- version: 1.4.0
33
+ version: 1.0.0
47
34
  type: :development
48
35
  prerelease: false
49
36
  version_requirements: *id002
50
37
  - !ruby/object:Gem::Dependency
51
- name: rspec
38
+ name: yard
52
39
  requirement: &id003 !ruby/object:Gem::Requirement
53
40
  none: false
54
41
  requirements:
55
42
  - - ~>
56
43
  - !ruby/object:Gem::Version
57
- segments:
58
- - 1
59
- - 3
60
- - 0
61
- version: 1.3.0
44
+ version: 0.6.0
62
45
  type: :development
63
46
  prerelease: false
64
47
  version_requirements: *id003
65
48
  description: Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.
66
- email: postmodern.mod3@gmail.com
49
+ email:
50
+ - postmodern.mod3@gmail.com
67
51
  executables: []
68
52
 
69
53
  extensions: []
70
54
 
71
55
  extra_rdoc_files:
72
- - ChangeLog.md
73
- - LICENSE.txt
74
56
  - README.md
75
57
  files:
76
- - .gitignore
77
- - .specopts
58
+ - .rspec
78
59
  - .yardopts
79
60
  - ChangeLog.md
80
61
  - Gemfile
81
- - Gemfile.lock
82
62
  - LICENSE.txt
83
63
  - README.md
84
64
  - Rakefile
65
+ - gemspec.yml
85
66
  - lib/spidr.rb
86
67
  - lib/spidr/actions.rb
87
68
  - lib/spidr/actions/actions.rb
@@ -93,11 +74,14 @@ files:
93
74
  - lib/spidr/agent.rb
94
75
  - lib/spidr/auth_credential.rb
95
76
  - lib/spidr/auth_store.rb
77
+ - lib/spidr/body.rb
96
78
  - lib/spidr/cookie_jar.rb
97
79
  - lib/spidr/events.rb
98
80
  - lib/spidr/extensions.rb
99
81
  - lib/spidr/extensions/uri.rb
100
82
  - lib/spidr/filters.rb
83
+ - lib/spidr/headers.rb
84
+ - lib/spidr/links.rb
101
85
  - lib/spidr/page.rb
102
86
  - lib/spidr/rules.rb
103
87
  - lib/spidr/sanitizers.rb
@@ -121,7 +105,6 @@ files:
121
105
  - spec/spec_helper.rb
122
106
  - spec/spidr_spec.rb
123
107
  - spidr.gemspec
124
- has_rdoc: yard
125
108
  homepage: http://github.com/postmodern/spidr
126
109
  licenses:
127
110
  - MIT
@@ -135,39 +118,28 @@ required_ruby_version: !ruby/object:Gem::Requirement
135
118
  requirements:
136
119
  - - ">="
137
120
  - !ruby/object:Gem::Version
138
- hash: 4533863298463290280
139
- segments:
140
- - 0
141
121
  version: "0"
142
122
  required_rubygems_version: !ruby/object:Gem::Requirement
143
123
  none: false
144
124
  requirements:
145
125
  - - ">="
146
126
  - !ruby/object:Gem::Version
147
- segments:
148
- - 0
149
- version: "0"
127
+ version: 1.3.6
150
128
  requirements: []
151
129
 
152
- rubyforge_project:
153
- rubygems_version: 1.3.7
130
+ rubyforge_project: spidr
131
+ rubygems_version: 1.7.2
154
132
  signing_key:
155
133
  specification_version: 3
156
134
  summary: A versatile Ruby web spidering library
157
135
  test_files:
158
- - spec/actions_spec.rb
159
136
  - spec/agent_spec.rb
137
+ - spec/actions_spec.rb
138
+ - spec/rules_spec.rb
139
+ - spec/extensions/uri_spec.rb
160
140
  - spec/auth_store_spec.rb
161
141
  - spec/cookie_jar_spec.rb
162
- - spec/extensions/uri_spec.rb
163
142
  - spec/filters_spec.rb
164
- - spec/helpers/history.rb
165
- - spec/helpers/page.rb
166
- - spec/helpers/wsoc.rb
167
- - spec/page_examples.rb
168
- - spec/page_spec.rb
169
- - spec/rules_spec.rb
170
143
  - spec/sanitizers_spec.rb
171
- - spec/session_cache.rb
172
- - spec/spec_helper.rb
173
144
  - spec/spidr_spec.rb
145
+ - spec/page_spec.rb