tarantula 0.1.5 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/CHANGELOG +36 -2
  2. data/README.rdoc +17 -0
  3. data/Rakefile +20 -5
  4. data/VERSION.yml +1 -1
  5. data/examples/example_helper.rb +13 -15
  6. data/examples/relevance/core_extensions/ellipsize_example.rb +1 -1
  7. data/examples/relevance/core_extensions/file_example.rb +1 -1
  8. data/examples/relevance/core_extensions/response_example.rb +1 -1
  9. data/examples/relevance/core_extensions/test_case_example.rb +5 -1
  10. data/examples/relevance/tarantula/attack_form_submission_example.rb +1 -1
  11. data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
  12. data/examples/relevance/tarantula/crawler_example.rb +313 -223
  13. data/examples/relevance/tarantula/form_example.rb +1 -1
  14. data/examples/relevance/tarantula/form_submission_example.rb +1 -1
  15. data/examples/relevance/tarantula/html_document_handler_example.rb +1 -1
  16. data/examples/relevance/tarantula/html_report_helper_example.rb +1 -1
  17. data/examples/relevance/tarantula/html_reporter_example.rb +1 -1
  18. data/examples/relevance/tarantula/invalid_html_handler_example.rb +1 -1
  19. data/examples/relevance/tarantula/io_reporter_example.rb +1 -1
  20. data/examples/relevance/tarantula/link_example.rb +1 -1
  21. data/examples/relevance/tarantula/log_grabber_example.rb +1 -1
  22. data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
  23. data/examples/relevance/tarantula/result_example.rb +1 -1
  24. data/examples/relevance/tarantula/tidy_handler_example.rb +1 -1
  25. data/examples/relevance/tarantula/transform_example.rb +1 -1
  26. data/examples/relevance/tarantula_example.rb +1 -1
  27. data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
  28. data/lib/relevance/core_extensions/test_case.rb +8 -1
  29. data/lib/relevance/tarantula.rb +1 -1
  30. data/lib/relevance/tarantula/crawler.rb +39 -15
  31. data/lib/relevance/tarantula/index.html.erb +2 -2
  32. data/lib/relevance/tarantula/test_report.html.erb +1 -1
  33. data/lib/relevance/tarantula/tidy_handler.rb +1 -1
  34. metadata +53 -29
  35. data/examples/relevance/tarantula/rails_init_example.rb +0 -14
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Form large example" do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::FormSubmission" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::HtmlDocumentHandler" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  module HtmlReportHelperSpec
4
4
  # Is there an idiom for this?
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::HtmlReporter file output" do
4
4
 
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::InvalidHtmlHandler" do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe 'Relevance::Tarantula::IOReporter' do
4
4
  it "reports errors to stderr and then raises" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Link" do
4
4
  include ActionView::Helpers::UrlHelper
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe 'Relevance::Tarantula::LogGrabber' do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::RailsIntegrationProxy rails_integration_test" do
4
4
  before {
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Result" do
4
4
  before do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
2
 
3
3
  if defined?(Tidy) && ENV['TIDY_PATH']
4
4
  describe "Relevance::Tarantula::TidyHandler default" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "../..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../..", "example_helper.rb"))
2
2
 
3
3
  describe "Relevance::Tarantula::Transform" do
4
4
  it "can do a simple replace" do
@@ -1,4 +1,4 @@
1
- require File.join(File.dirname(__FILE__), "..", "example_helper.rb")
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "example_helper.rb"))
2
2
 
3
3
  describe Relevance::Tarantula do
4
4
  include Relevance::Tarantula
@@ -0,0 +1,11 @@
1
+ if RUBY_VERSION == "1.8.7" # fix interaction between Ruby 187 and Rails 202, so we can at least run the test suite on that combination
2
+ unless '1.9'.respond_to?(:force_encoding)
3
+ String.class_eval do
4
+ begin
5
+ remove_method :chars
6
+ rescue NameError
7
+ # OK
8
+ end
9
+ end
10
+ end
11
+ end
@@ -1,4 +1,7 @@
1
- class Test::Unit::TestCase
1
+ require 'action_controller/integration'
2
+
3
+ module Relevance::CoreExtensions::TestCaseExtensions
4
+
2
5
  def tarantula_crawl(integration_test, options = {})
3
6
  url = options[:url] || "/"
4
7
  t = tarantula_crawler(integration_test, options)
@@ -8,5 +11,9 @@ class Test::Unit::TestCase
8
11
  def tarantula_crawler(integration_test, options = {})
9
12
  Relevance::Tarantula::RailsIntegrationProxy.rails_integration_test(integration_test, options)
10
13
  end
14
+
11
15
  end
12
16
 
17
+ if defined? ActionController::IntegrationTest
18
+ ActionController::IntegrationTest.class_eval { include Relevance::CoreExtensions::TestCaseExtensions }
19
+ end
@@ -2,7 +2,6 @@ TARANTULA_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "../.."))
2
2
 
3
3
  require 'forwardable'
4
4
  require 'erb'
5
- require 'rubygems'
6
5
  require 'active_support'
7
6
  require 'action_controller'
8
7
 
@@ -38,6 +37,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "e
38
37
  require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "file"))
39
38
  require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "response"))
40
39
  require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "metaclass"))
40
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "string_chars_fix"))
41
41
 
42
42
  require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_reporter"))
43
43
  require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_report_helper"))
@@ -1,3 +1,5 @@
1
+ require 'active_record'
2
+ require 'active_record/base'
1
3
  require File.expand_path(File.join(File.dirname(__FILE__), "rails_integration_proxy"))
2
4
  require File.expand_path(File.join(File.dirname(__FILE__), "html_document_handler.rb"))
3
5
 
@@ -5,11 +7,13 @@ class Relevance::Tarantula::Crawler
5
7
  extend Forwardable
6
8
  include Relevance::Tarantula
7
9
 
10
+ class CrawlTimeout < RuntimeError; end
11
+
8
12
  attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
9
13
  :reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
10
14
  :form_signatures_queued, :max_url_length, :response_code_handler,
11
- :times_to_crawl, :fuzzers, :test_name
12
- attr_reader :transform_url_patterns, :referrers, :failures, :successes
15
+ :times_to_crawl, :fuzzers, :test_name, :crawl_timeout
16
+ attr_reader :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
13
17
 
14
18
  def initialize
15
19
  @max_url_length = 1024
@@ -20,6 +24,8 @@ class Relevance::Tarantula::Crawler
20
24
  @form_signatures_queued = Set.new
21
25
  @links_to_crawl = []
22
26
  @forms_to_crawl = []
27
+ @crawl_start_times, @crawl_end_times = [], []
28
+ @crawl_timeout = 20.minutes
23
29
  @referrers = {}
24
30
  @skip_uri_patterns = [
25
31
  /^javascript/,
@@ -51,13 +57,18 @@ class Relevance::Tarantula::Crawler
51
57
  orig_form_signatures_queued = @form_signatures_queued.dup
52
58
  orig_links_to_crawl = @links_to_crawl.dup
53
59
  orig_forms_to_crawl = @forms_to_crawl.dup
54
- @times_to_crawl.times do |i|
60
+ @times_to_crawl.times do |num|
55
61
  queue_link url
56
- do_crawl
57
-
58
- puts "#{(i+1).ordinalize} crawl" if @times_to_crawl > 1
62
+
63
+ begin
64
+ do_crawl num
65
+ rescue CrawlTimeout => e
66
+ puts e.message
67
+ end
68
+
69
+ puts "#{(num+1).ordinalize} crawl" if @times_to_crawl > 1
59
70
 
60
- if i + 1 < @times_to_crawl
71
+ if num + 1 < @times_to_crawl
61
72
  @links_queued = orig_links_queued
62
73
  @form_signatures_queued = orig_form_signatures_queued
63
74
  @links_to_crawl = orig_links_to_crawl
@@ -75,19 +86,21 @@ class Relevance::Tarantula::Crawler
75
86
  @links_to_crawl.empty? && @forms_to_crawl.empty?
76
87
  end
77
88
 
78
- def do_crawl
89
+ def do_crawl(number)
79
90
  while (!finished?)
80
- crawl_queued_links
81
- crawl_queued_forms
91
+ @crawl_start_times << Time.now
92
+ crawl_queued_links(number)
93
+ crawl_queued_forms(number)
94
+ @crawl_end_times << Time.now
82
95
  end
83
96
  end
84
97
 
85
- def crawl_queued_links
98
+ def crawl_queued_links(number = 0)
86
99
  while (link = @links_to_crawl.pop)
87
100
  response = proxy.send(link.method, link.href)
88
101
  log "Response #{response.code} for #{link}"
89
102
  handle_link_results(link, response)
90
- blip
103
+ blip(number)
91
104
  end
92
105
  end
93
106
 
@@ -122,13 +135,17 @@ class Relevance::Tarantula::Crawler
122
135
  Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
123
136
  end
124
137
 
125
- def crawl_queued_forms
138
+ def crawl_queued_forms(number = 0)
126
139
  while (form = @forms_to_crawl.pop)
127
140
  response = crawl_form(form)
128
141
  handle_form_results(form, response)
129
- blip
142
+ blip(number)
130
143
  end
131
144
  end
145
+
146
+ def elasped_time_for_pass(num)
147
+ Time.now - crawl_start_times[num]
148
+ end
132
149
 
133
150
  def grab_log!
134
151
  @log_grabber && @log_grabber.grab!
@@ -232,9 +249,16 @@ class Relevance::Tarantula::Crawler
232
249
  total_links_count - links_remaining_count
233
250
  end
234
251
 
235
- def blip
252
+ def blip(number = 0)
236
253
  unless verbose
237
254
  print "\r #{links_completed_count} of #{total_links_count} links completed "
255
+ timeout_if_too_long(number)
256
+ end
257
+ end
258
+
259
+ def timeout_if_too_long(number = 0)
260
+ if elasped_time_for_pass(number) > crawl_timeout
261
+ raise CrawlTimeout, "Exceeded crawl timeout of #{crawl_timeout} seconds - skipping to the next crawl..."
238
262
  end
239
263
  end
240
264
  end
@@ -23,9 +23,9 @@
23
23
  and lives at <a href="http://github.com/relevance/tarantula">http://github.com/relevance/tarantula</a>.</p>
24
24
  <hr/>
25
25
  </div>
26
- <div id="page">
26
+ <div id="page">
27
27
  <div id="tabs-container">
28
- <ul class="tabs"></ul>
28
+ <ul class="tabs"> </ul>
29
29
  </div>
30
30
 
31
31
  <div id="results-container">
@@ -18,7 +18,7 @@
18
18
  <tbody>
19
19
  <% send(result_type).sort{|x,y| y.code.to_s <=> x.code.to_s}.each_with_index do |result,i| %>
20
20
  <tr class="<%= (i%2 == 0) ? 'even' : 'odd' %>">
21
- <td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(30) %></a></td>
21
+ <td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(50) %></a></td>
22
22
  <td class="method"><%= result.method.to_s.upcase %></td> <!-- TODO Clean up demeter violation -->
23
23
  <td><span class="<%= class_for_code(result.code) %>"><%= result.code %></span></td>
24
24
  <td class="left"><%= result.description %></td>
@@ -3,7 +3,7 @@ begin
3
3
  gem 'tidy'
4
4
  require 'tidy'
5
5
  rescue Gem::LoadError
6
- # tidy not available
6
+ puts "Tidy gem not available -- 'gem install tidy' to get it."
7
7
  end
8
8
 
9
9
  if defined? Tidy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tarantula
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Relevance, Inc.
@@ -9,32 +9,48 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-02-20 00:00:00 -05:00
12
+ date: 2009-04-07 00:00:00 -04:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: htmlentities
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
16
35
  description: A big hairy fuzzy spider that crawls your site, wreaking havoc
17
36
  email: opensource@thinkrelevance.com
18
37
  executables: []
19
38
 
20
39
  extensions: []
21
40
 
22
- extra_rdoc_files: []
23
-
41
+ extra_rdoc_files:
42
+ - README.rdoc
24
43
  files:
25
44
  - CHANGELOG
26
45
  - MIT-LICENSE
27
- - Rakefile
28
46
  - README.rdoc
47
+ - Rakefile
29
48
  - VERSION.yml
30
49
  - examples/example_helper.rb
31
- - examples/relevance
32
- - examples/relevance/core_extensions
33
50
  - examples/relevance/core_extensions/ellipsize_example.rb
34
51
  - examples/relevance/core_extensions/file_example.rb
35
52
  - examples/relevance/core_extensions/response_example.rb
36
53
  - examples/relevance/core_extensions/test_case_example.rb
37
- - examples/relevance/tarantula
38
54
  - examples/relevance/tarantula/attack_form_submission_example.rb
39
55
  - examples/relevance/tarantula/attack_handler_example.rb
40
56
  - examples/relevance/tarantula/crawler_example.rb
@@ -47,35 +63,26 @@ files:
47
63
  - examples/relevance/tarantula/io_reporter_example.rb
48
64
  - examples/relevance/tarantula/link_example.rb
49
65
  - examples/relevance/tarantula/log_grabber_example.rb
50
- - examples/relevance/tarantula/rails_init_example.rb
51
66
  - examples/relevance/tarantula/rails_integration_proxy_example.rb
52
67
  - examples/relevance/tarantula/result_example.rb
53
- - examples/relevance/tarantula/STUB_RAILS_ROOT
54
- - examples/relevance/tarantula/STUB_RAILS_ROOT/tmp
55
- - examples/relevance/tarantula/STUB_RAILS_ROOT/tmp/tarantula
56
68
  - examples/relevance/tarantula/tidy_handler_example.rb
57
69
  - examples/relevance/tarantula/transform_example.rb
58
70
  - examples/relevance/tarantula_example.rb
59
- - laf/images
60
71
  - laf/images/button_active.png
61
72
  - laf/images/button_hover.png
62
73
  - laf/images/button_inactive.png
63
74
  - laf/images/header_bg.jpg
64
75
  - laf/images/logo.png
65
76
  - laf/images/tagline.png
66
- - laf/javascripts
67
77
  - laf/javascripts/jquery-1.2.3.js
68
78
  - laf/javascripts/jquery-ui-tabs.js
69
79
  - laf/javascripts/jquery.tablesorter.js
80
+ - laf/javascripts/niftyLayout.js
70
81
  - laf/javascripts/niftycube-details.js
71
82
  - laf/javascripts/niftycube.js
72
- - laf/javascripts/niftyLayout.js
73
83
  - laf/javascripts/tarantula.js
74
- - laf/stylesheets
75
84
  - laf/stylesheets/tarantula.css
76
- - laf/v2
77
85
  - laf/v2/detail.html
78
- - laf/v2/images
79
86
  - laf/v2/images/button_active.png
80
87
  - laf/v2/images/button_hover.png
81
88
  - laf/v2/images/button_inactive.png
@@ -83,16 +90,14 @@ files:
83
90
  - laf/v2/images/logo.png
84
91
  - laf/v2/images/tagline.png
85
92
  - laf/v2/index.html
86
- - laf/v2/stylesheets
87
93
  - laf/v2/stylesheets/tarantula.v2.css
88
- - lib/relevance
89
- - lib/relevance/core_extensions
90
94
  - lib/relevance/core_extensions/ellipsize.rb
91
95
  - lib/relevance/core_extensions/file.rb
92
96
  - lib/relevance/core_extensions/metaclass.rb
93
97
  - lib/relevance/core_extensions/response.rb
98
+ - lib/relevance/core_extensions/string_chars_fix.rb
94
99
  - lib/relevance/core_extensions/test_case.rb
95
- - lib/relevance/tarantula
100
+ - lib/relevance/tarantula.rb
96
101
  - lib/relevance/tarantula/attack.rb
97
102
  - lib/relevance/tarantula/attack_form_submission.rb
98
103
  - lib/relevance/tarantula/attack_handler.rb
@@ -115,14 +120,12 @@ files:
115
120
  - lib/relevance/tarantula/test_report.html.erb
116
121
  - lib/relevance/tarantula/tidy_handler.rb
117
122
  - lib/relevance/tarantula/transform.rb
118
- - lib/relevance/tarantula.rb
119
123
  - tasks/tarantula_tasks.rake
120
124
  - template/tarantula_test.rb
121
125
  has_rdoc: true
122
126
  homepage: http://github.com/relevance/tarantula
123
127
  post_install_message:
124
128
  rdoc_options:
125
- - --inline-source
126
129
  - --charset=UTF-8
127
130
  require_paths:
128
131
  - lib
@@ -140,10 +143,31 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
143
  version:
141
144
  requirements: []
142
145
 
143
- rubyforge_project:
146
+ rubyforge_project: thinkrelevance
144
147
  rubygems_version: 1.3.1
145
148
  signing_key:
146
149
  specification_version: 2
147
150
  summary: A big hairy fuzzy spider that crawls your site, wreaking havoc
148
- test_files: []
149
-
151
+ test_files:
152
+ - examples/example_helper.rb
153
+ - examples/relevance/core_extensions/ellipsize_example.rb
154
+ - examples/relevance/core_extensions/file_example.rb
155
+ - examples/relevance/core_extensions/response_example.rb
156
+ - examples/relevance/core_extensions/test_case_example.rb
157
+ - examples/relevance/tarantula/attack_form_submission_example.rb
158
+ - examples/relevance/tarantula/attack_handler_example.rb
159
+ - examples/relevance/tarantula/crawler_example.rb
160
+ - examples/relevance/tarantula/form_example.rb
161
+ - examples/relevance/tarantula/form_submission_example.rb
162
+ - examples/relevance/tarantula/html_document_handler_example.rb
163
+ - examples/relevance/tarantula/html_report_helper_example.rb
164
+ - examples/relevance/tarantula/html_reporter_example.rb
165
+ - examples/relevance/tarantula/invalid_html_handler_example.rb
166
+ - examples/relevance/tarantula/io_reporter_example.rb
167
+ - examples/relevance/tarantula/link_example.rb
168
+ - examples/relevance/tarantula/log_grabber_example.rb
169
+ - examples/relevance/tarantula/rails_integration_proxy_example.rb
170
+ - examples/relevance/tarantula/result_example.rb
171
+ - examples/relevance/tarantula/tidy_handler_example.rb
172
+ - examples/relevance/tarantula/transform_example.rb
173
+ - examples/relevance/tarantula_example.rb