tarantula 0.1.5 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +36 -2
- data/README.rdoc +17 -0
- data/Rakefile +20 -5
- data/VERSION.yml +1 -1
- data/examples/example_helper.rb +13 -15
- data/examples/relevance/core_extensions/ellipsize_example.rb +1 -1
- data/examples/relevance/core_extensions/file_example.rb +1 -1
- data/examples/relevance/core_extensions/response_example.rb +1 -1
- data/examples/relevance/core_extensions/test_case_example.rb +5 -1
- data/examples/relevance/tarantula/attack_form_submission_example.rb +1 -1
- data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
- data/examples/relevance/tarantula/crawler_example.rb +313 -223
- data/examples/relevance/tarantula/form_example.rb +1 -1
- data/examples/relevance/tarantula/form_submission_example.rb +1 -1
- data/examples/relevance/tarantula/html_document_handler_example.rb +1 -1
- data/examples/relevance/tarantula/html_report_helper_example.rb +1 -1
- data/examples/relevance/tarantula/html_reporter_example.rb +1 -1
- data/examples/relevance/tarantula/invalid_html_handler_example.rb +1 -1
- data/examples/relevance/tarantula/io_reporter_example.rb +1 -1
- data/examples/relevance/tarantula/link_example.rb +1 -1
- data/examples/relevance/tarantula/log_grabber_example.rb +1 -1
- data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
- data/examples/relevance/tarantula/result_example.rb +1 -1
- data/examples/relevance/tarantula/tidy_handler_example.rb +1 -1
- data/examples/relevance/tarantula/transform_example.rb +1 -1
- data/examples/relevance/tarantula_example.rb +1 -1
- data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
- data/lib/relevance/core_extensions/test_case.rb +8 -1
- data/lib/relevance/tarantula.rb +1 -1
- data/lib/relevance/tarantula/crawler.rb +39 -15
- data/lib/relevance/tarantula/index.html.erb +2 -2
- data/lib/relevance/tarantula/test_report.html.erb +1 -1
- data/lib/relevance/tarantula/tidy_handler.rb +1 -1
- metadata +53 -29
- data/examples/relevance/tarantula/rails_init_example.rb +0 -14
@@ -0,0 +1,11 @@
|
|
1
|
+
if RUBY_VERSION == "1.8.7" # fix interaction between Ruby 187 and Rails 202, so we can at least run the test suite on that combination
|
2
|
+
unless '1.9'.respond_to?(:force_encoding)
|
3
|
+
String.class_eval do
|
4
|
+
begin
|
5
|
+
remove_method :chars
|
6
|
+
rescue NameError
|
7
|
+
# OK
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
require 'action_controller/integration'
|
2
|
+
|
3
|
+
module Relevance::CoreExtensions::TestCaseExtensions
|
4
|
+
|
2
5
|
def tarantula_crawl(integration_test, options = {})
|
3
6
|
url = options[:url] || "/"
|
4
7
|
t = tarantula_crawler(integration_test, options)
|
@@ -8,5 +11,9 @@ class Test::Unit::TestCase
|
|
8
11
|
def tarantula_crawler(integration_test, options = {})
|
9
12
|
Relevance::Tarantula::RailsIntegrationProxy.rails_integration_test(integration_test, options)
|
10
13
|
end
|
14
|
+
|
11
15
|
end
|
12
16
|
|
17
|
+
if defined? ActionController::IntegrationTest
|
18
|
+
ActionController::IntegrationTest.class_eval { include Relevance::CoreExtensions::TestCaseExtensions }
|
19
|
+
end
|
data/lib/relevance/tarantula.rb
CHANGED
@@ -2,7 +2,6 @@ TARANTULA_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "../.."))
|
|
2
2
|
|
3
3
|
require 'forwardable'
|
4
4
|
require 'erb'
|
5
|
-
require 'rubygems'
|
6
5
|
require 'active_support'
|
7
6
|
require 'action_controller'
|
8
7
|
|
@@ -38,6 +37,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "e
|
|
38
37
|
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "file"))
|
39
38
|
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "response"))
|
40
39
|
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "metaclass"))
|
40
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "string_chars_fix"))
|
41
41
|
|
42
42
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_reporter"))
|
43
43
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_report_helper"))
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'active_record/base'
|
1
3
|
require File.expand_path(File.join(File.dirname(__FILE__), "rails_integration_proxy"))
|
2
4
|
require File.expand_path(File.join(File.dirname(__FILE__), "html_document_handler.rb"))
|
3
5
|
|
@@ -5,11 +7,13 @@ class Relevance::Tarantula::Crawler
|
|
5
7
|
extend Forwardable
|
6
8
|
include Relevance::Tarantula
|
7
9
|
|
10
|
+
class CrawlTimeout < RuntimeError; end
|
11
|
+
|
8
12
|
attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
|
9
13
|
:reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
|
10
14
|
:form_signatures_queued, :max_url_length, :response_code_handler,
|
11
|
-
:times_to_crawl, :fuzzers, :test_name
|
12
|
-
attr_reader :transform_url_patterns, :referrers, :failures, :successes
|
15
|
+
:times_to_crawl, :fuzzers, :test_name, :crawl_timeout
|
16
|
+
attr_reader :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
|
13
17
|
|
14
18
|
def initialize
|
15
19
|
@max_url_length = 1024
|
@@ -20,6 +24,8 @@ class Relevance::Tarantula::Crawler
|
|
20
24
|
@form_signatures_queued = Set.new
|
21
25
|
@links_to_crawl = []
|
22
26
|
@forms_to_crawl = []
|
27
|
+
@crawl_start_times, @crawl_end_times = [], []
|
28
|
+
@crawl_timeout = 20.minutes
|
23
29
|
@referrers = {}
|
24
30
|
@skip_uri_patterns = [
|
25
31
|
/^javascript/,
|
@@ -51,13 +57,18 @@ class Relevance::Tarantula::Crawler
|
|
51
57
|
orig_form_signatures_queued = @form_signatures_queued.dup
|
52
58
|
orig_links_to_crawl = @links_to_crawl.dup
|
53
59
|
orig_forms_to_crawl = @forms_to_crawl.dup
|
54
|
-
@times_to_crawl.times do |
|
60
|
+
@times_to_crawl.times do |num|
|
55
61
|
queue_link url
|
56
|
-
|
57
|
-
|
58
|
-
|
62
|
+
|
63
|
+
begin
|
64
|
+
do_crawl num
|
65
|
+
rescue CrawlTimeout => e
|
66
|
+
puts e.message
|
67
|
+
end
|
68
|
+
|
69
|
+
puts "#{(num+1).ordinalize} crawl" if @times_to_crawl > 1
|
59
70
|
|
60
|
-
if
|
71
|
+
if num + 1 < @times_to_crawl
|
61
72
|
@links_queued = orig_links_queued
|
62
73
|
@form_signatures_queued = orig_form_signatures_queued
|
63
74
|
@links_to_crawl = orig_links_to_crawl
|
@@ -75,19 +86,21 @@ class Relevance::Tarantula::Crawler
|
|
75
86
|
@links_to_crawl.empty? && @forms_to_crawl.empty?
|
76
87
|
end
|
77
88
|
|
78
|
-
def do_crawl
|
89
|
+
def do_crawl(number)
|
79
90
|
while (!finished?)
|
80
|
-
|
81
|
-
|
91
|
+
@crawl_start_times << Time.now
|
92
|
+
crawl_queued_links(number)
|
93
|
+
crawl_queued_forms(number)
|
94
|
+
@crawl_end_times << Time.now
|
82
95
|
end
|
83
96
|
end
|
84
97
|
|
85
|
-
def crawl_queued_links
|
98
|
+
def crawl_queued_links(number = 0)
|
86
99
|
while (link = @links_to_crawl.pop)
|
87
100
|
response = proxy.send(link.method, link.href)
|
88
101
|
log "Response #{response.code} for #{link}"
|
89
102
|
handle_link_results(link, response)
|
90
|
-
blip
|
103
|
+
blip(number)
|
91
104
|
end
|
92
105
|
end
|
93
106
|
|
@@ -122,13 +135,17 @@ class Relevance::Tarantula::Crawler
|
|
122
135
|
Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
|
123
136
|
end
|
124
137
|
|
125
|
-
def crawl_queued_forms
|
138
|
+
def crawl_queued_forms(number = 0)
|
126
139
|
while (form = @forms_to_crawl.pop)
|
127
140
|
response = crawl_form(form)
|
128
141
|
handle_form_results(form, response)
|
129
|
-
blip
|
142
|
+
blip(number)
|
130
143
|
end
|
131
144
|
end
|
145
|
+
|
146
|
+
def elasped_time_for_pass(num)
|
147
|
+
Time.now - crawl_start_times[num]
|
148
|
+
end
|
132
149
|
|
133
150
|
def grab_log!
|
134
151
|
@log_grabber && @log_grabber.grab!
|
@@ -232,9 +249,16 @@ class Relevance::Tarantula::Crawler
|
|
232
249
|
total_links_count - links_remaining_count
|
233
250
|
end
|
234
251
|
|
235
|
-
def blip
|
252
|
+
def blip(number = 0)
|
236
253
|
unless verbose
|
237
254
|
print "\r #{links_completed_count} of #{total_links_count} links completed "
|
255
|
+
timeout_if_too_long(number)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def timeout_if_too_long(number = 0)
|
260
|
+
if elasped_time_for_pass(number) > crawl_timeout
|
261
|
+
raise CrawlTimeout, "Exceeded crawl timeout of #{crawl_timeout} seconds - skipping to the next crawl..."
|
238
262
|
end
|
239
263
|
end
|
240
264
|
end
|
@@ -23,9 +23,9 @@
|
|
23
23
|
and lives at <a href="http://github.com/relevance/tarantula">http://github.com/relevance/tarantula</a>.</p>
|
24
24
|
<hr/>
|
25
25
|
</div>
|
26
|
-
<div id="page">
|
26
|
+
<div id="page">
|
27
27
|
<div id="tabs-container">
|
28
|
-
<ul class="tabs"
|
28
|
+
<ul class="tabs"> </ul>
|
29
29
|
</div>
|
30
30
|
|
31
31
|
<div id="results-container">
|
@@ -18,7 +18,7 @@
|
|
18
18
|
<tbody>
|
19
19
|
<% send(result_type).sort{|x,y| y.code.to_s <=> x.code.to_s}.each_with_index do |result,i| %>
|
20
20
|
<tr class="<%= (i%2 == 0) ? 'even' : 'odd' %>">
|
21
|
-
<td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(
|
21
|
+
<td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(50) %></a></td>
|
22
22
|
<td class="method"><%= result.method.to_s.upcase %></td> <!-- TODO Clean up demeter violation -->
|
23
23
|
<td><span class="<%= class_for_code(result.code) %>"><%= result.code %></span></td>
|
24
24
|
<td class="left"><%= result.description %></td>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tarantula
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Relevance, Inc.
|
@@ -9,32 +9,48 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-04-07 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: htmlentities
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hpricot
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
16
35
|
description: A big hairy fuzzy spider that crawls your site, wreaking havoc
|
17
36
|
email: opensource@thinkrelevance.com
|
18
37
|
executables: []
|
19
38
|
|
20
39
|
extensions: []
|
21
40
|
|
22
|
-
extra_rdoc_files:
|
23
|
-
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.rdoc
|
24
43
|
files:
|
25
44
|
- CHANGELOG
|
26
45
|
- MIT-LICENSE
|
27
|
-
- Rakefile
|
28
46
|
- README.rdoc
|
47
|
+
- Rakefile
|
29
48
|
- VERSION.yml
|
30
49
|
- examples/example_helper.rb
|
31
|
-
- examples/relevance
|
32
|
-
- examples/relevance/core_extensions
|
33
50
|
- examples/relevance/core_extensions/ellipsize_example.rb
|
34
51
|
- examples/relevance/core_extensions/file_example.rb
|
35
52
|
- examples/relevance/core_extensions/response_example.rb
|
36
53
|
- examples/relevance/core_extensions/test_case_example.rb
|
37
|
-
- examples/relevance/tarantula
|
38
54
|
- examples/relevance/tarantula/attack_form_submission_example.rb
|
39
55
|
- examples/relevance/tarantula/attack_handler_example.rb
|
40
56
|
- examples/relevance/tarantula/crawler_example.rb
|
@@ -47,35 +63,26 @@ files:
|
|
47
63
|
- examples/relevance/tarantula/io_reporter_example.rb
|
48
64
|
- examples/relevance/tarantula/link_example.rb
|
49
65
|
- examples/relevance/tarantula/log_grabber_example.rb
|
50
|
-
- examples/relevance/tarantula/rails_init_example.rb
|
51
66
|
- examples/relevance/tarantula/rails_integration_proxy_example.rb
|
52
67
|
- examples/relevance/tarantula/result_example.rb
|
53
|
-
- examples/relevance/tarantula/STUB_RAILS_ROOT
|
54
|
-
- examples/relevance/tarantula/STUB_RAILS_ROOT/tmp
|
55
|
-
- examples/relevance/tarantula/STUB_RAILS_ROOT/tmp/tarantula
|
56
68
|
- examples/relevance/tarantula/tidy_handler_example.rb
|
57
69
|
- examples/relevance/tarantula/transform_example.rb
|
58
70
|
- examples/relevance/tarantula_example.rb
|
59
|
-
- laf/images
|
60
71
|
- laf/images/button_active.png
|
61
72
|
- laf/images/button_hover.png
|
62
73
|
- laf/images/button_inactive.png
|
63
74
|
- laf/images/header_bg.jpg
|
64
75
|
- laf/images/logo.png
|
65
76
|
- laf/images/tagline.png
|
66
|
-
- laf/javascripts
|
67
77
|
- laf/javascripts/jquery-1.2.3.js
|
68
78
|
- laf/javascripts/jquery-ui-tabs.js
|
69
79
|
- laf/javascripts/jquery.tablesorter.js
|
80
|
+
- laf/javascripts/niftyLayout.js
|
70
81
|
- laf/javascripts/niftycube-details.js
|
71
82
|
- laf/javascripts/niftycube.js
|
72
|
-
- laf/javascripts/niftyLayout.js
|
73
83
|
- laf/javascripts/tarantula.js
|
74
|
-
- laf/stylesheets
|
75
84
|
- laf/stylesheets/tarantula.css
|
76
|
-
- laf/v2
|
77
85
|
- laf/v2/detail.html
|
78
|
-
- laf/v2/images
|
79
86
|
- laf/v2/images/button_active.png
|
80
87
|
- laf/v2/images/button_hover.png
|
81
88
|
- laf/v2/images/button_inactive.png
|
@@ -83,16 +90,14 @@ files:
|
|
83
90
|
- laf/v2/images/logo.png
|
84
91
|
- laf/v2/images/tagline.png
|
85
92
|
- laf/v2/index.html
|
86
|
-
- laf/v2/stylesheets
|
87
93
|
- laf/v2/stylesheets/tarantula.v2.css
|
88
|
-
- lib/relevance
|
89
|
-
- lib/relevance/core_extensions
|
90
94
|
- lib/relevance/core_extensions/ellipsize.rb
|
91
95
|
- lib/relevance/core_extensions/file.rb
|
92
96
|
- lib/relevance/core_extensions/metaclass.rb
|
93
97
|
- lib/relevance/core_extensions/response.rb
|
98
|
+
- lib/relevance/core_extensions/string_chars_fix.rb
|
94
99
|
- lib/relevance/core_extensions/test_case.rb
|
95
|
-
- lib/relevance/tarantula
|
100
|
+
- lib/relevance/tarantula.rb
|
96
101
|
- lib/relevance/tarantula/attack.rb
|
97
102
|
- lib/relevance/tarantula/attack_form_submission.rb
|
98
103
|
- lib/relevance/tarantula/attack_handler.rb
|
@@ -115,14 +120,12 @@ files:
|
|
115
120
|
- lib/relevance/tarantula/test_report.html.erb
|
116
121
|
- lib/relevance/tarantula/tidy_handler.rb
|
117
122
|
- lib/relevance/tarantula/transform.rb
|
118
|
-
- lib/relevance/tarantula.rb
|
119
123
|
- tasks/tarantula_tasks.rake
|
120
124
|
- template/tarantula_test.rb
|
121
125
|
has_rdoc: true
|
122
126
|
homepage: http://github.com/relevance/tarantula
|
123
127
|
post_install_message:
|
124
128
|
rdoc_options:
|
125
|
-
- --inline-source
|
126
129
|
- --charset=UTF-8
|
127
130
|
require_paths:
|
128
131
|
- lib
|
@@ -140,10 +143,31 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
143
|
version:
|
141
144
|
requirements: []
|
142
145
|
|
143
|
-
rubyforge_project:
|
146
|
+
rubyforge_project: thinkrelevance
|
144
147
|
rubygems_version: 1.3.1
|
145
148
|
signing_key:
|
146
149
|
specification_version: 2
|
147
150
|
summary: A big hairy fuzzy spider that crawls your site, wreaking havoc
|
148
|
-
test_files:
|
149
|
-
|
151
|
+
test_files:
|
152
|
+
- examples/example_helper.rb
|
153
|
+
- examples/relevance/core_extensions/ellipsize_example.rb
|
154
|
+
- examples/relevance/core_extensions/file_example.rb
|
155
|
+
- examples/relevance/core_extensions/response_example.rb
|
156
|
+
- examples/relevance/core_extensions/test_case_example.rb
|
157
|
+
- examples/relevance/tarantula/attack_form_submission_example.rb
|
158
|
+
- examples/relevance/tarantula/attack_handler_example.rb
|
159
|
+
- examples/relevance/tarantula/crawler_example.rb
|
160
|
+
- examples/relevance/tarantula/form_example.rb
|
161
|
+
- examples/relevance/tarantula/form_submission_example.rb
|
162
|
+
- examples/relevance/tarantula/html_document_handler_example.rb
|
163
|
+
- examples/relevance/tarantula/html_report_helper_example.rb
|
164
|
+
- examples/relevance/tarantula/html_reporter_example.rb
|
165
|
+
- examples/relevance/tarantula/invalid_html_handler_example.rb
|
166
|
+
- examples/relevance/tarantula/io_reporter_example.rb
|
167
|
+
- examples/relevance/tarantula/link_example.rb
|
168
|
+
- examples/relevance/tarantula/log_grabber_example.rb
|
169
|
+
- examples/relevance/tarantula/rails_integration_proxy_example.rb
|
170
|
+
- examples/relevance/tarantula/result_example.rb
|
171
|
+
- examples/relevance/tarantula/tidy_handler_example.rb
|
172
|
+
- examples/relevance/tarantula/transform_example.rb
|
173
|
+
- examples/relevance/tarantula_example.rb
|