tarantula 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG +36 -2
  2. data/README.rdoc +17 -0
  3. data/Rakefile +20 -5
  4. data/VERSION.yml +1 -1
  5. data/examples/example_helper.rb +13 -15
  6. data/examples/relevance/core_extensions/ellipsize_example.rb +1 -1
  7. data/examples/relevance/core_extensions/file_example.rb +1 -1
  8. data/examples/relevance/core_extensions/response_example.rb +1 -1
  9. data/examples/relevance/core_extensions/test_case_example.rb +5 -1
  10. data/examples/relevance/tarantula/attack_form_submission_example.rb +1 -1
  11. data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
  12. data/examples/relevance/tarantula/crawler_example.rb +313 -223
  13. data/examples/relevance/tarantula/form_example.rb +1 -1
  14. data/examples/relevance/tarantula/form_submission_example.rb +1 -1
  15. data/examples/relevance/tarantula/html_document_handler_example.rb +1 -1
  16. data/examples/relevance/tarantula/html_report_helper_example.rb +1 -1
  17. data/examples/relevance/tarantula/html_reporter_example.rb +1 -1
  18. data/examples/relevance/tarantula/invalid_html_handler_example.rb +1 -1
  19. data/examples/relevance/tarantula/io_reporter_example.rb +1 -1
  20. data/examples/relevance/tarantula/link_example.rb +1 -1
  21. data/examples/relevance/tarantula/log_grabber_example.rb +1 -1
  22. data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
  23. data/examples/relevance/tarantula/result_example.rb +1 -1
  24. data/examples/relevance/tarantula/tidy_handler_example.rb +1 -1
  25. data/examples/relevance/tarantula/transform_example.rb +1 -1
  26. data/examples/relevance/tarantula_example.rb +1 -1
  27. data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
  28. data/lib/relevance/core_extensions/test_case.rb +8 -1
  29. data/lib/relevance/tarantula.rb +1 -1
  30. data/lib/relevance/tarantula/crawler.rb +39 -15
  31. data/lib/relevance/tarantula/index.html.erb +2 -2
  32. data/lib/relevance/tarantula/test_report.html.erb +1 -1
  33. data/lib/relevance/tarantula/tidy_handler.rb +1 -1
  34. metadata +53 -29
  35. data/examples/relevance/tarantula/rails_init_example.rb +0 -14
data/CHANGELOG CHANGED
@@ -1,3 +1,34 @@
1
+ v0.1.8 Add timeouts for crawls to help really long builds [Rob Sanheim]
2
+
3
+ v0.1.7 Minor clean up [Rob Sanheim]
4
+
5
+ v0.1.6
6
+ * add testing for all Rails versions 2.0.2 and up
7
+ * various clean up and housekeeping tasks;
8
+ * start Ruby 1.9 work (but we need Hpricot)
9
+ * show 50 chars of URL, not 30
10
+ * ensure that ActiveRecord gets loaded correctly for the crawler, so that it can rescue RecordNotFound exceptions
11
+ [Rob Sanheim]
12
+
13
+ v0.1.5 Initial implementation of updated look-and-feel [Erik Yowell] [Jason Rudolph]
14
+
15
+ v0.1.4 Bugfix: Include look-and-feel files when building the gem #16 [Jason Rudolph]
16
+
17
+ v0.1.3 Update list of known static file types (e.g., PDFs) to prevent false reports of 404s for links to files that exist in RAILS_ROOT/public [Aaron Bedra]
18
+
19
+ v0.1.2 Remove dependency on Facets gem [Aaron Bedra]
20
+
21
+ v0.1.1 Bugfix: Add ability to handle anchor tags that lack an href attribute #13 [Kevin Gisi]
22
+
23
+ v0.1.0
24
+ * Improve the generated test template to include inline documentation and make the simple case simple [Jason Rudolph]
25
+ * Update README to better serve first-time users [Jason Rudolph]
26
+ * Update development dependencies declarations [Jason Rudolph]
27
+ * Internal refactorings [Aaron Bedra]
28
+ ** Convert test suite to micronaut
29
+ ** Replace Echoe with Jeweler for gem management
30
+ ** Remove unused code
31
+
1
32
  v0.0.8.1
2
33
  * Fix numerous installation and initial setup issues
3
34
  * Enhance rake tasks to support use of Tarantula in a continuous integration environment
@@ -8,6 +39,9 @@ v0.0.8.1
8
39
  ** Include example of adding a custom attack handler
9
40
  * Simplify design to address concerns about hard-to-read fonts
10
41
 
11
- v0.0.5 Make sure we don't include Relevance::Tarantula into Object - will cause issues Rails dependencies and is a bad idea in general; update Rakefile for dev dependencies; another small clean up tasks
42
+ v0.0.5
43
+ * Make sure we don't include Relevance::Tarantula into Object - will cause issues with Rails dependencies and is a bad idea in general
44
+ * Update Rakefile for development dependencies
45
+ * Other small clean up tasks
12
46
 
13
- v0.0.1 Tarantula becomes a gem. (Aaron Bedra)
47
+ v0.0.1 Tarantula becomes a gem. [Aaron Bedra]
data/README.rdoc CHANGED
@@ -134,12 +134,29 @@ This example adds custom attacks for both SQL injection and XSS. It also tells T
134
134
  app 2 times. This is important for XSS attacks because the results won't appear until the second time
135
135
  Tarantula performs the crawl.
136
136
 
137
+ == Timeout
138
+
139
+ You can specify a timeout for each specific crawl that Tarantula runs. For example:
140
+
141
+ def test_tarantula
142
+ t = tarantula_crawler(self)
143
+ t.times_to_crawl = 2
144
+ t.crawl_timeout = 5.minutes
145
+ t.crawl "/"
146
+ end
147
+
148
+ The above will crawl your app twice, and each specific crawl will timeout if it takes longer then 5 minutes. You may need a timeout to keep the tarantula test time reasonable if your app is large or just happens to have a large amount of 'never-ending' links, such as with an any sort of "auto-admin" interface.
149
+
137
150
  == Bugs/Requests
138
151
 
139
152
  Please submit your bug reports, patches, or feature requests at Lighthouse:
140
153
 
141
154
  http://relevance.lighthouseapp.com/projects/17868-tarantula/overview
142
155
 
156
+ You can view the continuous integration results for Tarantula, including results against all supported versions of Rails, on RunCodeRun here:
157
+
158
+ http://runcoderun.com/relevance/tarantula
159
+
143
160
  == License
144
161
 
145
162
  Tarantula is released under the MIT license.
data/Rakefile CHANGED
@@ -1,12 +1,9 @@
1
1
  require 'rake'
2
2
  require 'rake/testtask'
3
3
  require 'rake/rdoctask'
4
- require 'rcov/rcovtask'
5
- require 'rubygems'
6
- gem "spicycode-micronaut", ">= 0.2.0"
4
+ gem "spicycode-micronaut", ">= 0.2.4"
7
5
  require 'micronaut'
8
6
  require 'micronaut/rake_task'
9
- require 'lib/relevance/tarantula.rb'
10
7
 
11
8
  begin
12
9
  require 'jeweler'
@@ -22,6 +19,9 @@ begin
22
19
  s.authors = ["Relevance, Inc."]
23
20
  s.require_paths = ["lib"]
24
21
  s.files = files.flatten
22
+ s.add_dependency 'htmlentities'
23
+ s.add_dependency 'hpricot'
24
+ s.rubyforge_project = 'thinkrelevance'
25
25
  end
26
26
  rescue LoadError
27
27
  puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
@@ -48,6 +48,21 @@ namespace :examples do
48
48
  t.rcov = true
49
49
  t.rcov_opts = %[--exclude "gems/*,/Library/Ruby/*,config/*" --text-summary --sort coverage --no-validator-links]
50
50
  end
51
+
52
+ RAILS_VERSIONS = %w[2.0.2 2.1.0 2.1.1 2.2.2 2.3.1 2.3.2]
53
+
54
+ desc "Run exmaples with multiple versions of rails"
55
+ task :multi_rails do
56
+ RAILS_VERSIONS.each do |rails_version|
57
+ puts
58
+ sh "RAILS_VERSION='#{rails_version}' rake examples"
59
+ end
60
+ end
61
+
51
62
  end
52
63
 
53
- task :default => "examples"
64
+ if ENV["RUN_CODE_RUN"]
65
+ task :default => "examples:multi_rails"
66
+ else
67
+ task :default => "examples"
68
+ end
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 5
2
+ :patch: 8
3
3
  :major: 0
4
4
  :minor: 1
@@ -1,27 +1,30 @@
1
1
  lib_path = File.expand_path(File.dirname(__FILE__) + "/../lib")
2
2
  $LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
3
3
 
4
- require 'rubygems'
5
- gem "spicycode-micronaut", ">= 0.2.0"
4
+ gem "spicycode-micronaut", ">= 0.2.4"
6
5
  gem "log_buddy"
7
6
  gem "mocha"
8
- gem 'ruby-debug'
9
- gem 'test-spec'
7
+ if rails_version = ENV['RAILS_VERSION']
8
+ gem "rails", rails_version
9
+ end
10
+ require "rails/version"
11
+ if Rails::VERSION::STRING < "2.3.1" && RUBY_VERSION >= "1.9.1"
12
+ puts "Tarantula requires Rails 2.3.1 or higher for Ruby 1.9 support"
13
+ exit(1)
14
+ end
15
+ puts "==== Testing with Rails #{Rails::VERSION::STRING} ===="
10
16
  gem 'actionpack'
11
17
  gem 'activerecord'
12
18
  gem 'activesupport'
13
19
 
14
20
  require 'ostruct'
15
- require 'ruby-debug'
16
- require 'activerecord'
21
+ require 'active_support'
22
+ require 'action_controller'
23
+ require 'active_record'
17
24
  require 'relevance/tarantula'
18
25
  require 'micronaut'
19
26
  require 'mocha'
20
27
 
21
- # needed for html-scanner, grr
22
- require 'active_support'
23
- require 'action_controller'
24
-
25
28
  def test_output_dir
26
29
  File.join(File.dirname(__FILE__), "..", "tmp", "test_output")
27
30
  end
@@ -36,12 +39,7 @@ def not_in_editor?
36
39
  ['TM_MODE', 'EMACS', 'VIM'].all? { |k| !ENV.has_key?(k) }
37
40
  end
38
41
 
39
- def in_runcoderun?
40
- ENV["RUN_CODE_RUN"]
41
- end
42
-
43
42
  Micronaut.configure do |c|
44
- c.formatter = :documentation if in_runcoderun?
45
43
  c.alias_example_to :fit, :focused => true
46
44
  c.alias_example_to :xit, :disabled => true
47
45
  c.mock_with :mocha
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "../..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::CoreExtensions::Object#ellipsize" do
4
4
  it "converts nil to empty string" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "../..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../..", "example_helper.rb"))
2
2
  require 'relevance/core_extensions/file'
3
3
 
4
4
  describe "Relevance::CoreExtensions::File#extension" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "../..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../..", "example_helper.rb"))
2
2
  require 'relevance/core_extensions/file'
3
3
 
4
4
  describe "Relevance::CoreExtensions::Response#html?" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "../..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../..", "example_helper.rb"))
2
2
  require 'relevance/core_extensions/test_case'
3
3
 
4
4
  describe "TestCase extensions" do
@@ -13,4 +13,8 @@ describe "TestCase extensions" do
13
13
  expects(:tarantula_crawler).returns(crawler)
14
14
  tarantula_crawl(:integration_test_stub, :url => "/foo")
15
15
  end
16
+
17
+ it "should get mixed into ActionController::IntegrationTest" do
18
+ ActionController::IntegrationTest.ancestors.should include(Relevance::CoreExtensions::TestCaseExtensions)
19
+ end
16
20
  end
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::AttackFormSubmission" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::AttackHandler" do
4
4
  before do
@@ -1,204 +1,246 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
- describe 'Relevance::Tarantula::Crawler#transform_url' do
4
- before {@crawler = Relevance::Tarantula::Crawler.new}
5
- it "de-obfuscates unicode obfuscated urls" do
6
- obfuscated_mailto = "&#109;&#97;&#105;&#108;&#116;&#111;&#58;"
7
- @crawler.transform_url(obfuscated_mailto).should == "mailto:"
8
- end
3
+ describe Relevance::Tarantula::Crawler do
9
4
 
10
- it "strips the trailing name portion of a link" do
11
- @crawler.transform_url('http://host/path#name').should == 'http://host/path'
12
- end
13
- end
5
+ describe "transform_url" do
14
6
 
15
- describe 'Relevance::Tarantula::Crawler log grabbing' do
16
- it "returns nil if no grabber is specified" do
17
- crawler = Relevance::Tarantula::Crawler.new
18
- crawler.grab_log!.should == nil
19
- end
7
+ before { @crawler = Relevance::Tarantula::Crawler.new }
8
+
9
+ it "de-obfuscates unicode obfuscated urls" do
10
+ obfuscated_mailto = "&#109;&#97;&#105;&#108;&#116;&#111;&#58;"
11
+ @crawler.transform_url(obfuscated_mailto).should == "mailto:"
12
+ end
20
13
 
21
- it "returns grabber.grab if grabber is specified" do
22
- crawler = Relevance::Tarantula::Crawler.new
23
- crawler.log_grabber = stub(:grab! => "fake log entry")
24
- crawler.grab_log!.should == "fake log entry"
14
+ it "strips the trailing name portion of a link" do
15
+ @crawler.transform_url('http://host/path#name').should == 'http://host/path'
16
+ end
25
17
  end
26
- end
18
+
19
+
20
+ describe "log grabbing" do
27
21
 
28
- describe 'Relevance::Tarantula::Crawler interruption' do
29
- it 'catches interruption and writes the partial report' do
30
- crawler = Relevance::Tarantula::Crawler.new
31
- crawler.stubs(:queue_link)
32
- crawler.stubs(:do_crawl).raises(Interrupt)
33
- crawler.expects(:report_results)
34
- $stderr.expects(:puts).with("CTRL-C")
35
- crawler.crawl
36
- end
37
- end
22
+ it "returns nil if no grabber is specified" do
23
+ crawler = Relevance::Tarantula::Crawler.new
24
+ crawler.grab_log!.should == nil
25
+ end
38
26
 
39
- describe 'Relevance::Tarantula::Crawler handle_form_results' do
40
- it 'captures the result values (bugfix)' do
41
- response = stub_everything
42
- result_args = {:url => :action_stub,
43
- :data => 'nil',
44
- :response => response,
45
- :referrer => :action_stub,
46
- :log => nil,
47
- :method => :stub_method,
48
- :test_name => nil}
49
- result = Relevance::Tarantula::Result.new(result_args)
50
- Relevance::Tarantula::Result.expects(:new).with(result_args).returns(result)
51
- crawler = Relevance::Tarantula::Crawler.new
52
- crawler.handle_form_results(stub_everything(:method => :stub_method, :action => :action_stub),
53
- response)
54
- end
55
- end
56
-
57
- describe 'Relevance::Tarantula::Crawler#crawl' do
58
- it 'queues the first url, does crawl, and then reports results' do
59
- crawler = Relevance::Tarantula::Crawler.new
60
- crawler.expects(:queue_link).with("/foobar")
61
- crawler.expects(:do_crawl)
62
- crawler.expects(:report_results)
63
- crawler.crawl("/foobar")
27
+ it "returns grabber.grab if grabber is specified" do
28
+ crawler = Relevance::Tarantula::Crawler.new
29
+ crawler.log_grabber = stub(:grab! => "fake log entry")
30
+ crawler.grab_log!.should == "fake log entry"
31
+ end
32
+
64
33
  end
65
34
 
66
- it 'reports results even if the crawl fails' do
67
- crawler = Relevance::Tarantula::Crawler.new
68
- crawler.expects(:do_crawl).raises(RuntimeError)
69
- crawler.expects(:report_results)
70
- lambda {crawler.crawl('/')}.should raise_error(RuntimeError)
71
- end
72
- end
73
-
74
- describe 'Relevance::Tarantula::Crawler queuing' do
75
- it 'queues and remembers links' do
76
- crawler = Relevance::Tarantula::Crawler.new
77
- crawler.expects(:transform_url).with("/url").returns("/transformed")
78
- crawler.queue_link("/url")
79
- crawler.links_to_crawl.should == [Relevance::Tarantula::Link.new("/transformed")]
80
- crawler.links_queued.should == Set.new([Relevance::Tarantula::Link.new("/transformed")])
35
+ describe "interrupt" do
36
+
37
+ it 'catches interruption and writes the partial report' do
38
+ crawler = Relevance::Tarantula::Crawler.new
39
+ crawler.stubs(:queue_link)
40
+ crawler.stubs(:do_crawl).raises(Interrupt)
41
+ crawler.expects(:report_results)
42
+ $stderr.expects(:puts).with("CTRL-C")
43
+ crawler.crawl
44
+ end
45
+
81
46
  end
82
47
 
83
- it 'queues and remembers forms' do
84
- crawler = Relevance::Tarantula::Crawler.new
85
- form = Hpricot('<form action="/action" method="post"/>').at('form')
86
- signature = Relevance::Tarantula::FormSubmission.new(Relevance::Tarantula::Form.new(form)).signature
87
- crawler.queue_form(form)
88
- crawler.forms_to_crawl.size.should == 1
89
- crawler.form_signatures_queued.should == Set.new([signature])
48
+ describe 'handle_form_results' do
49
+
50
+ it 'captures the result values (bugfix)' do
51
+ response = stub_everything
52
+ result_args = {:url => :action_stub,
53
+ :data => 'nil',
54
+ :response => response,
55
+ :referrer => :action_stub,
56
+ :log => nil,
57
+ :method => :stub_method,
58
+ :test_name => nil}
59
+ result = Relevance::Tarantula::Result.new(result_args)
60
+ Relevance::Tarantula::Result.expects(:new).with(result_args).returns(result)
61
+ crawler = Relevance::Tarantula::Crawler.new
62
+ crawler.handle_form_results(stub_everything(:method => :stub_method, :action => :action_stub),
63
+ response)
64
+ end
65
+
90
66
  end
91
67
 
92
- it 'remembers link referrer if there is one' do
93
- crawler = Relevance::Tarantula::Crawler.new
94
- crawler.queue_link("/url", "/some-referrer")
95
- crawler.referrers.should == {Relevance::Tarantula::Link.new("/url") => "/some-referrer"}
68
+ describe "crawl" do
69
+
70
+ it 'queues the first url, does crawl, and then reports results' do
71
+ crawler = Relevance::Tarantula::Crawler.new
72
+ crawler.expects(:queue_link).with("/foobar")
73
+ crawler.expects(:do_crawl)
74
+ crawler.expects(:report_results)
75
+ crawler.crawl("/foobar")
76
+ end
77
+
78
+ it 'reports results even if the crawl fails' do
79
+ crawler = Relevance::Tarantula::Crawler.new
80
+ crawler.expects(:do_crawl).raises(RuntimeError)
81
+ crawler.expects(:report_results)
82
+ lambda {crawler.crawl('/')}.should raise_error(RuntimeError)
83
+ end
84
+
96
85
  end
97
86
 
98
- end
87
+ describe "queueing" do
99
88
 
100
- describe 'Relevance::Tarantula::Crawler#report_results' do
101
- it "delegates to generate_reports" do
102
- crawler = Relevance::Tarantula::Crawler.new
103
- crawler.expects(:generate_reports)
104
- crawler.report_results
105
- end
106
- end
89
+ it 'queues and remembers links' do
90
+ crawler = Relevance::Tarantula::Crawler.new
91
+ crawler.expects(:transform_url).with("/url").returns("/transformed")
92
+ crawler.queue_link("/url")
93
+ crawler.links_to_crawl.should == [Relevance::Tarantula::Link.new("/transformed")]
94
+ crawler.links_queued.should == Set.new([Relevance::Tarantula::Link.new("/transformed")])
95
+ end
107
96
 
108
- describe 'Relevance::Tarantula::Crawler#crawling' do
97
+ it 'queues and remembers forms' do
98
+ crawler = Relevance::Tarantula::Crawler.new
99
+ form = Hpricot('<form action="/action" method="post"/>').at('form')
100
+ signature = Relevance::Tarantula::FormSubmission.new(Relevance::Tarantula::Form.new(form)).signature
101
+ crawler.queue_form(form)
102
+ crawler.forms_to_crawl.size.should == 1
103
+ crawler.form_signatures_queued.should == Set.new([signature])
104
+ end
109
105
 
110
- it "converts ActiveRecord::RecordNotFound into a 404" do
111
- (proxy = stub_everything).expects(:send).raises(ActiveRecord::RecordNotFound)
112
- crawler = Relevance::Tarantula::Crawler.new
113
- crawler.proxy = proxy
114
- response = crawler.crawl_form stub_everything(:method => nil)
115
- response.code.should == "404"
116
- response.content_type.should == "text/plain"
117
- response.body.should == "ActiveRecord::RecordNotFound"
106
+ it 'remembers link referrer if there is one' do
107
+ crawler = Relevance::Tarantula::Crawler.new
108
+ crawler.queue_link("/url", "/some-referrer")
109
+ crawler.referrers.should == {Relevance::Tarantula::Link.new("/url") => "/some-referrer"}
110
+ end
111
+
118
112
  end
113
+
114
+ describe "crawling" do
115
+
116
+ it "converts ActiveRecord::RecordNotFound into a 404" do
117
+ (proxy = stub_everything).expects(:send).raises(ActiveRecord::RecordNotFound)
118
+ crawler = Relevance::Tarantula::Crawler.new
119
+ crawler.proxy = proxy
120
+ response = crawler.crawl_form stub_everything(:method => nil)
121
+ response.code.should == "404"
122
+ response.content_type.should == "text/plain"
123
+ response.body.should == "ActiveRecord::RecordNotFound"
124
+ end
119
125
 
120
- it "does four things with each link: get, log, handle, and blip" do
121
- crawler = Relevance::Tarantula::Crawler.new
122
- crawler.proxy = stub
123
- response = stub(:code => "200")
124
- crawler.links_to_crawl = [stub(:href => "/foo1", :method => :get), stub(:href => "/foo2", :method => :get)]
125
- crawler.proxy.expects(:get).returns(response).times(2)
126
- crawler.expects(:log).times(2)
127
- crawler.expects(:handle_link_results).times(2)
128
- crawler.expects(:blip).times(2)
129
- crawler.crawl_queued_links
130
- crawler.links_to_crawl.should == []
131
- end
126
+ it "does four things with each link: get, log, handle, and blip" do
127
+ crawler = Relevance::Tarantula::Crawler.new
128
+ crawler.proxy = stub
129
+ response = stub(:code => "200")
130
+ crawler.links_to_crawl = [stub(:href => "/foo1", :method => :get), stub(:href => "/foo2", :method => :get)]
131
+ crawler.proxy.expects(:get).returns(response).times(2)
132
+ crawler.expects(:log).times(2)
133
+ crawler.expects(:handle_link_results).times(2)
134
+ crawler.expects(:blip).times(2)
135
+ crawler.crawl_queued_links
136
+ crawler.links_to_crawl.should == []
137
+ end
138
+
139
+ it "invokes queued forms, logs responses, and calls handlers" do
140
+ crawler = Relevance::Tarantula::Crawler.new
141
+ crawler.forms_to_crawl << stub_everything(:method => "get",
142
+ :action => "/foo",
143
+ :data => "some data",
144
+ :to_s => "stub")
145
+ crawler.proxy = stub_everything(:send => stub(:code => "200" ))
146
+ crawler.expects(:log).with("Response 200 for stub")
147
+ crawler.expects(:blip)
148
+ crawler.crawl_queued_forms
149
+ end
150
+
151
+ it "breaks out early if a timeout is set" do
152
+ crawler = Relevance::Tarantula::Crawler.new
153
+ stub_puts_and_print(crawler)
154
+ crawler.proxy = stub
155
+ response = stub(:code => "200")
156
+ crawler.links_to_crawl = [stub(:href => "/foo", :method => :get)]
157
+ crawler.proxy.expects(:get).returns(response).times(4)
158
+ crawler.forms_to_crawl << stub_everything(:method => "post",
159
+ :action => "/foo",
160
+ :data => "some data",
161
+ :to_s => "stub")
162
+ crawler.proxy.expects(:post).returns(response).times(2)
163
+ crawler.expects(:links_completed_count).returns(0,1,2,3,4,5).times(6)
164
+ crawler.times_to_crawl = 2
165
+ crawler.crawl
166
+
167
+ end
168
+
169
+ it "resets to the initial links/forms on subsequent crawls when times_to_crawl > 1" do
170
+ crawler = Relevance::Tarantula::Crawler.new
171
+ stub_puts_and_print(crawler)
172
+ crawler.proxy = stub
173
+ response = stub(:code => "200")
174
+ crawler.links_to_crawl = [stub(:href => "/foo", :method => :get)]
175
+ crawler.proxy.expects(:get).returns(response).times(4) # (stub and "/") * 2
176
+ crawler.forms_to_crawl << stub_everything(:method => "post",
177
+ :action => "/foo",
178
+ :data => "some data",
179
+ :to_s => "stub")
180
+ crawler.proxy.expects(:post).returns(response).times(2)
181
+ crawler.expects(:links_completed_count).returns(0,1,2,3,4,5).times(6)
182
+ crawler.times_to_crawl = 2
183
+ crawler.crawl
184
+ end
132
185
 
133
- it "invokes queued forms, logs responses, and calls handlers" do
134
- crawler = Relevance::Tarantula::Crawler.new
135
- crawler.forms_to_crawl << stub_everything(:method => "get",
136
- :action => "/foo",
137
- :data => "some data",
138
- :to_s => "stub")
139
- crawler.proxy = stub_everything(:send => stub(:code => "200" ))
140
- crawler.expects(:log).with("Response 200 for stub")
141
- crawler.expects(:blip)
142
- crawler.crawl_queued_forms
143
186
  end
144
187
 
145
- it "resets to the initial links/forms on subsequent crawls when times_to_crawl > 1" do
146
- crawler = Relevance::Tarantula::Crawler.new
147
- stub_puts_and_print(crawler)
148
- crawler.proxy = stub
149
- response = stub(:code => "200")
150
- crawler.links_to_crawl = [stub(:href => "/foo", :method => :get)]
151
- crawler.proxy.expects(:get).returns(response).times(4) # (stub and "/") * 2
152
- crawler.forms_to_crawl << stub_everything(:method => "post",
153
- :action => "/foo",
154
- :data => "some data",
155
- :to_s => "stub")
156
- crawler.proxy.expects(:post).returns(response).times(2)
157
- crawler.expects(:links_completed_count).returns(*(0..6).to_a).times(6)
158
- crawler.times_to_crawl = 2
159
- crawler.crawl
160
- end
161
- end
188
+ describe "report_results" do
162
189
 
163
- describe 'Crawler blip' do
164
- it "blips the current progress if !verbose" do
165
- crawler = Relevance::Tarantula::Crawler.new
166
- crawler.stubs(:verbose).returns false
167
- crawler.expects(:print).with("\r 0 of 0 links completed ")
168
- crawler.blip
169
- end
170
- it "blips nothing if verbose" do
171
- crawler = Relevance::Tarantula::Crawler.new
172
- crawler.stubs(:verbose).returns true
173
- crawler.expects(:print).never
174
- crawler.blip
190
+ it "delegates to generate_reports" do
191
+ crawler = Relevance::Tarantula::Crawler.new
192
+ crawler.expects(:generate_reports)
193
+ crawler.report_results
194
+ end
195
+
175
196
  end
176
- end
197
+
198
+ describe "blip" do
177
199
 
178
- describe 'Relevance::Tarantula::Crawler' do
179
- it "is finished when the links and forms are crawled" do
180
- crawler = Relevance::Tarantula::Crawler.new
181
- crawler.finished?.should == true
200
+ it "blips the current progress if !verbose" do
201
+ crawler = Relevance::Tarantula::Crawler.new
202
+ crawler.stubs(:verbose).returns false
203
+ crawler.stubs(:timeout_if_too_long)
204
+ crawler.expects(:print).with("\r 0 of 0 links completed ")
205
+ crawler.blip
206
+ end
207
+
208
+ it "blips nothing if verbose" do
209
+ crawler = Relevance::Tarantula::Crawler.new
210
+ crawler.stubs(:verbose).returns true
211
+ crawler.expects(:print).never
212
+ crawler.blip
213
+ end
214
+
182
215
  end
216
+
217
+ describe "finished?" do
183
218
 
184
- it "isn't finished when links remain" do
185
- crawler = Relevance::Tarantula::Crawler.new
186
- crawler.links_to_crawl = [:stub_link]
187
- crawler.finished?.should == false
188
- end
219
+ it "is finished when the links and forms are crawled" do
220
+ crawler = Relevance::Tarantula::Crawler.new
221
+ crawler.finished?.should == true
222
+ end
189
223
 
190
- it "isn't finished when links remain" do
191
- crawler = Relevance::Tarantula::Crawler.new
192
- crawler.forms_to_crawl = [:stub_form]
193
- crawler.finished?.should == false
224
+ it "isn't finished when links remain" do
225
+ crawler = Relevance::Tarantula::Crawler.new
226
+ crawler.links_to_crawl = [:stub_link]
227
+ crawler.finished?.should == false
228
+ end
229
+
230
+ it "isn't finished when links remain" do
231
+ crawler = Relevance::Tarantula::Crawler.new
232
+ crawler.forms_to_crawl = [:stub_form]
233
+ crawler.finished?.should == false
234
+ end
235
+
194
236
  end
195
-
237
+
196
238
  it "crawls links and forms again and again until finished?==true" do
197
239
  crawler = Relevance::Tarantula::Crawler.new
198
240
  crawler.expects(:finished?).times(3).returns(false, false, true)
199
241
  crawler.expects(:crawl_queued_links).times(2)
200
242
  crawler.expects(:crawl_queued_forms).times(2)
201
- crawler.do_crawl
243
+ crawler.do_crawl(1)
202
244
  end
203
245
 
204
246
  it "asks each reporter to write its report in report_dir" do
@@ -225,72 +267,120 @@ describe 'Relevance::Tarantula::Crawler' do
225
267
  crawler.should_skip_link?(Relevance::Tarantula::Link.new("/foo")).should == true
226
268
  end
227
269
 
228
- end
229
-
230
- describe "Crawler link skipping" do
231
- before do
232
- @crawler = Relevance::Tarantula::Crawler.new
233
- end
234
-
235
- it "skips links that are too long" do
236
- @crawler.should_skip_link?(Relevance::Tarantula::Link.new("/foo")).should == false
237
- @crawler.max_url_length = 2
238
- @crawler.expects(:log).with("Skipping long url /foo")
239
- @crawler.should_skip_link?(Relevance::Tarantula::Link.new("/foo")).should == true
240
- end
270
+ describe "link skipping" do
271
+
272
+ before { @crawler = Relevance::Tarantula::Crawler.new }
273
+
274
+ it "skips links that are too long" do
275
+ @crawler.should_skip_link?(Relevance::Tarantula::Link.new("/foo")).should == false
276
+ @crawler.max_url_length = 2
277
+ @crawler.expects(:log).with("Skipping long url /foo")
278
+ @crawler.should_skip_link?(Relevance::Tarantula::Link.new("/foo")).should == true
279
+ end
241
280
 
242
- it "skips outbound links (those that begin with http)" do
243
- @crawler.expects(:log).with("Skipping http-anything")
244
- @crawler.should_skip_link?(Relevance::Tarantula::Link.new("http-anything")).should == true
245
- end
281
+ it "skips outbound links (those that begin with http)" do
282
+ @crawler.expects(:log).with("Skipping http-anything")
283
+ @crawler.should_skip_link?(Relevance::Tarantula::Link.new("http-anything")).should == true
284
+ end
246
285
 
247
- it "skips javascript links (those that begin with javascript)" do
248
- @crawler.expects(:log).with("Skipping javascript-anything")
249
- @crawler.should_skip_link?(Relevance::Tarantula::Link.new("javascript-anything")).should == true
250
- end
286
+ it "skips javascript links (those that begin with javascript)" do
287
+ @crawler.expects(:log).with("Skipping javascript-anything")
288
+ @crawler.should_skip_link?(Relevance::Tarantula::Link.new("javascript-anything")).should == true
289
+ end
251
290
 
252
- it "skips mailto links (those that begin with http)" do
253
- @crawler.expects(:log).with("Skipping mailto-anything")
254
- @crawler.should_skip_link?(Relevance::Tarantula::Link.new("mailto-anything")).should == true
255
- end
291
+ it "skips mailto links (those that begin with http)" do
292
+ @crawler.expects(:log).with("Skipping mailto-anything")
293
+ @crawler.should_skip_link?(Relevance::Tarantula::Link.new("mailto-anything")).should == true
294
+ end
256
295
 
257
- it 'skips blank links' do
258
- @crawler.queue_link(nil)
259
- @crawler.links_to_crawl.should == []
260
- @crawler.queue_link("")
261
- @crawler.links_to_crawl.should == []
262
- end
296
+ it 'skips blank links' do
297
+ @crawler.queue_link(nil)
298
+ @crawler.links_to_crawl.should == []
299
+ @crawler.queue_link("")
300
+ @crawler.links_to_crawl.should == []
301
+ end
263
302
 
264
- it "logs and skips links that match a pattern" do
265
- @crawler.expects(:log).with("Skipping /the-red-button")
266
- @crawler.skip_uri_patterns << /red-button/
267
- @crawler.queue_link("/blue-button").should == Relevance::Tarantula::Link.new("/blue-button")
268
- @crawler.queue_link("/the-red-button").should == nil
269
- end
303
+ it "logs and skips links that match a pattern" do
304
+ @crawler.expects(:log).with("Skipping /the-red-button")
305
+ @crawler.skip_uri_patterns << /red-button/
306
+ @crawler.queue_link("/blue-button").should == Relevance::Tarantula::Link.new("/blue-button")
307
+ @crawler.queue_link("/the-red-button").should == nil
308
+ end
270
309
 
271
- it "logs and skips form submissions that match a pattern" do
272
- @crawler.expects(:log).with("Skipping /reset-password-form")
273
- @crawler.skip_uri_patterns << /reset-password/
274
- fs = stub_everything(:action => "/reset-password-form")
275
- @crawler.should_skip_form_submission?(fs).should == true
310
+ it "logs and skips form submissions that match a pattern" do
311
+ @crawler.expects(:log).with("Skipping /reset-password-form")
312
+ @crawler.skip_uri_patterns << /reset-password/
313
+ fs = stub_everything(:action => "/reset-password-form")
314
+ @crawler.should_skip_form_submission?(fs).should == true
315
+ end
276
316
  end
277
- end
317
+
318
+ describe "allow_nnn_for" do
278
319
 
279
- describe "allow_nnn_for" do
280
- it "installs result as a response_code_handler" do
281
- crawler = Relevance::Tarantula::Crawler.new
282
- crawler.response_code_handler.should == Relevance::Tarantula::Result
320
+ it "installs result as a response_code_handler" do
321
+ crawler = Relevance::Tarantula::Crawler.new
322
+ crawler.response_code_handler.should == Relevance::Tarantula::Result
323
+ end
324
+
325
+ it "delegates to the response_code_handler" do
326
+ crawler = Relevance::Tarantula::Crawler.new
327
+ (response_code_handler = mock).expects(:allow_404_for).with(:stub)
328
+ crawler.response_code_handler = response_code_handler
329
+ crawler.allow_404_for(:stub)
330
+ end
331
+
332
+ it "chains up to super for method_missing" do
333
+ crawler = Relevance::Tarantula::Crawler.new
334
+ lambda{crawler.foo}.should raise_error(NoMethodError)
335
+ end
336
+
283
337
  end
284
338
 
285
- it "delegates to the response_code_handler" do
286
- crawler = Relevance::Tarantula::Crawler.new
287
- (response_code_handler = mock).expects(:allow_404_for).with(:stub)
288
- crawler.response_code_handler = response_code_handler
289
- crawler.allow_404_for(:stub)
339
+ describe "timeouts" do
340
+
341
+ it "sets start and end times for a single crawl" do
342
+ start_time = Time.parse("March 1st, 2008 10:00am")
343
+ end_time = Time.parse("March 1st, 2008 10:10am")
344
+ Time.stubs(:now).returns(start_time, end_time)
345
+
346
+ crawler = Relevance::Tarantula::Crawler.new
347
+ stub_puts_and_print(crawler)
348
+ crawler.proxy = stub_everything(:get => response = stub(:code => "200"))
349
+ crawler.crawl
350
+ crawler.crawl_start_times.first.should == start_time
351
+ crawler.crawl_end_times.first.should == end_time
352
+ end
353
+
354
+ it "has elasped time for a crawl" do
355
+ start_time = Time.parse("March 1st, 2008 10:00am")
356
+ elasped_time_check = Time.parse("March 1st, 2008, 10:10:00am")
357
+ Time.stubs(:now).returns(start_time, elasped_time_check)
358
+
359
+ crawler = Relevance::Tarantula::Crawler.new
360
+ stub_puts_and_print(crawler)
361
+ crawler.proxy = stub_everything(:get => response = stub(:code => "200"))
362
+ crawler.crawl
363
+ crawler.elasped_time_for_pass(0).should == 600.seconds
364
+ end
365
+
366
+ it "raises out of the crawl if elasped time is greater then the crawl timeout" do
367
+ start_time = Time.parse("March 1st, 2008 10:00am")
368
+ elasped_time_check = Time.parse("March 1st, 2008, 10:35:00am")
369
+ Time.stubs(:now).returns(start_time, elasped_time_check)
370
+
371
+ crawler = Relevance::Tarantula::Crawler.new
372
+ crawler.crawl_timeout = 5.minutes
373
+
374
+ crawler.links_to_crawl = [stub(:href => "/foo1", :method => :get), stub(:href => "/foo2", :method => :get)]
375
+ crawler.proxy = stub
376
+ crawler.proxy.stubs(:get).returns(response = stub(:code => "200"))
377
+
378
+ stub_puts_and_print(crawler)
379
+ lambda {
380
+ crawler.do_crawl(0)
381
+ }.should raise_error
382
+ end
383
+
290
384
  end
291
385
 
292
- it "chains up to super for method_missing" do
293
- crawler = Relevance::Tarantula::Crawler.new
294
- lambda{crawler.foo}.should raise_error(NoMethodError)
295
- end
296
- end
386
+ end