tarantula 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG +36 -2
  2. data/README.rdoc +17 -0
  3. data/Rakefile +20 -5
  4. data/VERSION.yml +1 -1
  5. data/examples/example_helper.rb +13 -15
  6. data/examples/relevance/core_extensions/ellipsize_example.rb +1 -1
  7. data/examples/relevance/core_extensions/file_example.rb +1 -1
  8. data/examples/relevance/core_extensions/response_example.rb +1 -1
  9. data/examples/relevance/core_extensions/test_case_example.rb +5 -1
  10. data/examples/relevance/tarantula/attack_form_submission_example.rb +1 -1
  11. data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
  12. data/examples/relevance/tarantula/crawler_example.rb +313 -223
  13. data/examples/relevance/tarantula/form_example.rb +1 -1
  14. data/examples/relevance/tarantula/form_submission_example.rb +1 -1
  15. data/examples/relevance/tarantula/html_document_handler_example.rb +1 -1
  16. data/examples/relevance/tarantula/html_report_helper_example.rb +1 -1
  17. data/examples/relevance/tarantula/html_reporter_example.rb +1 -1
  18. data/examples/relevance/tarantula/invalid_html_handler_example.rb +1 -1
  19. data/examples/relevance/tarantula/io_reporter_example.rb +1 -1
  20. data/examples/relevance/tarantula/link_example.rb +1 -1
  21. data/examples/relevance/tarantula/log_grabber_example.rb +1 -1
  22. data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
  23. data/examples/relevance/tarantula/result_example.rb +1 -1
  24. data/examples/relevance/tarantula/tidy_handler_example.rb +1 -1
  25. data/examples/relevance/tarantula/transform_example.rb +1 -1
  26. data/examples/relevance/tarantula_example.rb +1 -1
  27. data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
  28. data/lib/relevance/core_extensions/test_case.rb +8 -1
  29. data/lib/relevance/tarantula.rb +1 -1
  30. data/lib/relevance/tarantula/crawler.rb +39 -15
  31. data/lib/relevance/tarantula/index.html.erb +2 -2
  32. data/lib/relevance/tarantula/test_report.html.erb +1 -1
  33. data/lib/relevance/tarantula/tidy_handler.rb +1 -1
  34. metadata +53 -29
  35. data/examples/relevance/tarantula/rails_init_example.rb +0 -14
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Form large example" do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::FormSubmission" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::HtmlDocumentHandler" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  module HtmlReportHelperSpec
4
4
  # Is there an idiom for this?
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::HtmlReporter file output" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::InvalidHtmlHandler" do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe 'Relevance::Tarantula::IOReporter' do
4
4
  it "reports errors to stderr and then raises" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Link" do
4
4
  include ActionView::Helpers::UrlHelper
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe 'Relevance::Tarantula::LogGrabber' do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::RailsIntegrationProxy rails_integration_test" do
4
4
  before {
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Result" do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  if defined?(Tidy) && ENV['TIDY_PATH']
4
4
  describe "Relevance::Tarantula::TidyHandler default" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "../..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Transform" do
4
4
  it "can do a simple replace" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "example_helper.rb"))
2
2
 
3
3
  describe Relevance::Tarantula do
4
4
  include Relevance::Tarantula
@@ -0,0 +1,11 @@
1
+ if RUBY_VERSION == "1.8.7" # fix interaction between Ruby 187 and Rails 202, so we can at least run the test suite on that combination
2
+ unless '1.9'.respond_to?(:force_encoding)
3
+ String.class_eval do
4
+ begin
5
+ remove_method :chars
6
+ rescue NameError
7
+ # OK
8
+ end
9
+ end
10
+ end
11
+ end
@@ -1,4 +1,7 @@
1
- class Test::Unit::TestCase
1
+ require 'action_controller/integration'
2
+
3
+ module Relevance::CoreExtensions::TestCaseExtensions
4
+
2
5
  def tarantula_crawl(integration_test, options = {})
3
6
  url = options[:url] || "/"
4
7
  t = tarantula_crawler(integration_test, options)
@@ -8,5 +11,9 @@ class Test::Unit::TestCase
8
11
  def tarantula_crawler(integration_test, options = {})
9
12
  Relevance::Tarantula::RailsIntegrationProxy.rails_integration_test(integration_test, options)
10
13
  end
14
+
11
15
  end
12
16
 
17
+ if defined? ActionController::IntegrationTest
18
+ ActionController::IntegrationTest.class_eval { include Relevance::CoreExtensions::TestCaseExtensions }
19
+ end
@@ -2,7 +2,6 @@ TARANTULA_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "../.."))
2
2
 
3
3
  require 'forwardable'
4
4
  require 'erb'
5
- require 'rubygems'
6
5
  require 'active_support'
7
6
  require 'action_controller'
8
7
 
@@ -38,6 +37,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "e
38
37
  require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "file"))
39
38
  require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "response"))
40
39
  require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "metaclass"))
40
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "string_chars_fix"))
41
41
 
42
42
  require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_reporter"))
43
43
  require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_report_helper"))
@@ -1,3 +1,5 @@
1
+ require 'active_record'
2
+ require 'active_record/base'
1
3
  require File.expand_path(File.join(File.dirname(__FILE__), "rails_integration_proxy"))
2
4
  require File.expand_path(File.join(File.dirname(__FILE__), "html_document_handler.rb"))
3
5
 
@@ -5,11 +7,13 @@ class Relevance::Tarantula::Crawler
5
7
  extend Forwardable
6
8
  include Relevance::Tarantula
7
9
 
10
+ class CrawlTimeout < RuntimeError; end
11
+
8
12
  attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
9
13
  :reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
10
14
  :form_signatures_queued, :max_url_length, :response_code_handler,
11
- :times_to_crawl, :fuzzers, :test_name
12
- attr_reader :transform_url_patterns, :referrers, :failures, :successes
15
+ :times_to_crawl, :fuzzers, :test_name, :crawl_timeout
16
+ attr_reader :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
13
17
 
14
18
  def initialize
15
19
  @max_url_length = 1024
@@ -20,6 +24,8 @@ class Relevance::Tarantula::Crawler
20
24
  @form_signatures_queued = Set.new
21
25
  @links_to_crawl = []
22
26
  @forms_to_crawl = []
27
+ @crawl_start_times, @crawl_end_times = [], []
28
+ @crawl_timeout = 20.minutes
23
29
  @referrers = {}
24
30
  @skip_uri_patterns = [
25
31
  /^javascript/,
@@ -51,13 +57,18 @@ class Relevance::Tarantula::Crawler
51
57
  orig_form_signatures_queued = @form_signatures_queued.dup
52
58
  orig_links_to_crawl = @links_to_crawl.dup
53
59
  orig_forms_to_crawl = @forms_to_crawl.dup
54
- @times_to_crawl.times do |i|
60
+ @times_to_crawl.times do |num|
55
61
  queue_link url
56
- do_crawl
57
-
58
- puts "#{(i+1).ordinalize} crawl" if @times_to_crawl > 1
62
+
63
+ begin
64
+ do_crawl num
65
+ rescue CrawlTimeout => e
66
+ puts e.message
67
+ end
68
+
69
+ puts "#{(num+1).ordinalize} crawl" if @times_to_crawl > 1
59
70
 
60
- if i + 1 < @times_to_crawl
71
+ if num + 1 < @times_to_crawl
61
72
  @links_queued = orig_links_queued
62
73
  @form_signatures_queued = orig_form_signatures_queued
63
74
  @links_to_crawl = orig_links_to_crawl
@@ -75,19 +86,21 @@ class Relevance::Tarantula::Crawler
75
86
  @links_to_crawl.empty? && @forms_to_crawl.empty?
76
87
  end
77
88
 
78
- def do_crawl
89
+ def do_crawl(number)
79
90
  while (!finished?)
80
- crawl_queued_links
81
- crawl_queued_forms
91
+ @crawl_start_times << Time.now
92
+ crawl_queued_links(number)
93
+ crawl_queued_forms(number)
94
+ @crawl_end_times << Time.now
82
95
  end
83
96
  end
84
97
 
85
- def crawl_queued_links
98
+ def crawl_queued_links(number = 0)
86
99
  while (link = @links_to_crawl.pop)
87
100
  response = proxy.send(link.method, link.href)
88
101
  log "Response #{response.code} for #{link}"
89
102
  handle_link_results(link, response)
90
- blip
103
+ blip(number)
91
104
  end
92
105
  end
93
106
 
@@ -122,13 +135,17 @@ class Relevance::Tarantula::Crawler
122
135
  Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
123
136
  end
124
137
 
125
- def crawl_queued_forms
138
+ def crawl_queued_forms(number = 0)
126
139
  while (form = @forms_to_crawl.pop)
127
140
  response = crawl_form(form)
128
141
  handle_form_results(form, response)
129
- blip
142
+ blip(number)
130
143
  end
131
144
  end
145
+
146
+ def elasped_time_for_pass(num)
147
+ Time.now - crawl_start_times[num]
148
+ end
132
149
 
133
150
  def grab_log!
134
151
  @log_grabber && @log_grabber.grab!
@@ -232,9 +249,16 @@ class Relevance::Tarantula::Crawler
232
249
  total_links_count - links_remaining_count
233
250
  end
234
251
 
235
- def blip
252
+ def blip(number = 0)
236
253
  unless verbose
237
254
  print "\r #{links_completed_count} of #{total_links_count} links completed "
255
+ timeout_if_too_long(number)
256
+ end
257
+ end
258
+
259
+ def timeout_if_too_long(number = 0)
260
+ if elasped_time_for_pass(number) > crawl_timeout
261
+ raise CrawlTimeout, "Exceeded crawl timeout of #{crawl_timeout} seconds - skipping to the next crawl..."
238
262
  end
239
263
  end
240
264
  end
@@ -23,9 +23,9 @@
23
23
  and lives at <a href="http://github.com/relevance/tarantula">http://github.com/relevance/tarantula</a>.</p>
24
24
  <hr/>
25
25
  </div>
26
- <div id="page">
26
+ <div id="page">
27
27
  <div id="tabs-container">
28
- <ul class="tabs"></ul>
28
+ <ul class="tabs"> </ul>
29
29
  </div>
30
30
 
31
31
  <div id="results-container">
@@ -18,7 +18,7 @@
18
18
  <tbody>
19
19
  <% send(result_type).sort{|x,y| y.code.to_s <=> x.code.to_s}.each_with_index do |result,i| %>
20
20
  <tr class="<%= (i%2 == 0) ? 'even' : 'odd' %>">
21
- <td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(30) %></a></td>
21
+ <td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(50) %></a></td>
22
22
  <td class="method"><%= result.method.to_s.upcase %></td> <!-- TODO Clean up demeter violation -->
23
23
  <td><span class="<%= class_for_code(result.code) %>"><%= result.code %></span></td>
24
24
  <td class="left"><%= result.description %></td>
@@ -3,7 +3,7 @@ begin
3
3
  gem 'tidy'
4
4
  require 'tidy'
5
5
  rescue Gem::LoadError
6
- # tidy not available
6
+ puts "Tidy gem not available -- 'gem install tidy' to get it."
7
7
  end
8
8
 
9
9
  if defined? Tidy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tarantula
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Relevance, Inc.
@@ -9,32 +9,48 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-02-20 00:00:00 -05:00
12
+ date: 2009-04-07 00:00:00 -04:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: htmlentities
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
16
35
  description: A big hairy fuzzy spider that crawls your site, wreaking havoc
17
36
  email: opensource@thinkrelevance.com
18
37
  executables: []
19
38
 
20
39
  extensions: []
21
40
 
22
- extra_rdoc_files: []
23
-
41
+ extra_rdoc_files:
42
+ - README.rdoc
24
43
  files:
25
44
  - CHANGELOG
26
45
  - MIT-LICENSE
27
- - Rakefile
28
46
  - README.rdoc
47
+ - Rakefile
29
48
  - VERSION.yml
30
49
  - examples/example_helper.rb
31
- - examples/relevance
32
- - examples/relevance/core_extensions
33
50
  - examples/relevance/core_extensions/ellipsize_example.rb
34
51
  - examples/relevance/core_extensions/file_example.rb
35
52
  - examples/relevance/core_extensions/response_example.rb
36
53
  - examples/relevance/core_extensions/test_case_example.rb
37
- - examples/relevance/tarantula
38
54
  - examples/relevance/tarantula/attack_form_submission_example.rb
39
55
  - examples/relevance/tarantula/attack_handler_example.rb
40
56
  - examples/relevance/tarantula/crawler_example.rb
@@ -47,35 +63,26 @@ files:
47
63
  - examples/relevance/tarantula/io_reporter_example.rb
48
64
  - examples/relevance/tarantula/link_example.rb
49
65
  - examples/relevance/tarantula/log_grabber_example.rb
50
- - examples/relevance/tarantula/rails_init_example.rb
51
66
  - examples/relevance/tarantula/rails_integration_proxy_example.rb
52
67
  - examples/relevance/tarantula/result_example.rb
53
- - examples/relevance/tarantula/STUB_RAILS_ROOT
54
- - examples/relevance/tarantula/STUB_RAILS_ROOT/tmp
55
- - examples/relevance/tarantula/STUB_RAILS_ROOT/tmp/tarantula
56
68
  - examples/relevance/tarantula/tidy_handler_example.rb
57
69
  - examples/relevance/tarantula/transform_example.rb
58
70
  - examples/relevance/tarantula_example.rb
59
- - laf/images
60
71
  - laf/images/button_active.png
61
72
  - laf/images/button_hover.png
62
73
  - laf/images/button_inactive.png
63
74
  - laf/images/header_bg.jpg
64
75
  - laf/images/logo.png
65
76
  - laf/images/tagline.png
66
- - laf/javascripts
67
77
  - laf/javascripts/jquery-1.2.3.js
68
78
  - laf/javascripts/jquery-ui-tabs.js
69
79
  - laf/javascripts/jquery.tablesorter.js
80
+ - laf/javascripts/niftyLayout.js
70
81
  - laf/javascripts/niftycube-details.js
71
82
  - laf/javascripts/niftycube.js
72
- - laf/javascripts/niftyLayout.js
73
83
  - laf/javascripts/tarantula.js
74
- - laf/stylesheets
75
84
  - laf/stylesheets/tarantula.css
76
- - laf/v2
77
85
  - laf/v2/detail.html
78
- - laf/v2/images
79
86
  - laf/v2/images/button_active.png
80
87
  - laf/v2/images/button_hover.png
81
88
  - laf/v2/images/button_inactive.png
@@ -83,16 +90,14 @@ files:
83
90
  - laf/v2/images/logo.png
84
91
  - laf/v2/images/tagline.png
85
92
  - laf/v2/index.html
86
- - laf/v2/stylesheets
87
93
  - laf/v2/stylesheets/tarantula.v2.css
88
- - lib/relevance
89
- - lib/relevance/core_extensions
90
94
  - lib/relevance/core_extensions/ellipsize.rb
91
95
  - lib/relevance/core_extensions/file.rb
92
96
  - lib/relevance/core_extensions/metaclass.rb
93
97
  - lib/relevance/core_extensions/response.rb
98
+ - lib/relevance/core_extensions/string_chars_fix.rb
94
99
  - lib/relevance/core_extensions/test_case.rb
95
- - lib/relevance/tarantula
100
+ - lib/relevance/tarantula.rb
96
101
  - lib/relevance/tarantula/attack.rb
97
102
  - lib/relevance/tarantula/attack_form_submission.rb
98
103
  - lib/relevance/tarantula/attack_handler.rb
@@ -115,14 +120,12 @@ files:
115
120
  - lib/relevance/tarantula/test_report.html.erb
116
121
  - lib/relevance/tarantula/tidy_handler.rb
117
122
  - lib/relevance/tarantula/transform.rb
118
- - lib/relevance/tarantula.rb
119
123
  - tasks/tarantula_tasks.rake
120
124
  - template/tarantula_test.rb
121
125
  has_rdoc: true
122
126
  homepage: http://github.com/relevance/tarantula
123
127
  post_install_message:
124
128
  rdoc_options:
125
- - --inline-source
126
129
  - --charset=UTF-8
127
130
  require_paths:
128
131
  - lib
@@ -140,10 +143,31 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
143
  version:
141
144
  requirements: []
142
145
 
143
- rubyforge_project:
146
+ rubyforge_project: thinkrelevance
144
147
  rubygems_version: 1.3.1
145
148
  signing_key:
146
149
  specification_version: 2
147
150
  summary: A big hairy fuzzy spider that crawls your site, wreaking havoc
148
- test_files: []
149
-
151
+ test_files:
152
+ - examples/example_helper.rb
153
+ - examples/relevance/core_extensions/ellipsize_example.rb
154
+ - examples/relevance/core_extensions/file_example.rb
155
+ - examples/relevance/core_extensions/response_example.rb
156
+ - examples/relevance/core_extensions/test_case_example.rb
157
+ - examples/relevance/tarantula/attack_form_submission_example.rb
158
+ - examples/relevance/tarantula/attack_handler_example.rb
159
+ - examples/relevance/tarantula/crawler_example.rb
160
+ - examples/relevance/tarantula/form_example.rb
161
+ - examples/relevance/tarantula/form_submission_example.rb
162
+ - examples/relevance/tarantula/html_document_handler_example.rb
163
+ - examples/relevance/tarantula/html_report_helper_example.rb
164
+ - examples/relevance/tarantula/html_reporter_example.rb
165
+ - examples/relevance/tarantula/invalid_html_handler_example.rb
166
+ - examples/relevance/tarantula/io_reporter_example.rb
167
+ - examples/relevance/tarantula/link_example.rb
168
+ - examples/relevance/tarantula/log_grabber_example.rb
169
+ - examples/relevance/tarantula/rails_integration_proxy_example.rb
170
+ - examples/relevance/tarantula/result_example.rb
171
+ - examples/relevance/tarantula/tidy_handler_example.rb
172
+ - examples/relevance/tarantula/transform_example.rb
173
+ - examples/relevance/tarantula_example.rb