tarantula 0.1.5 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +36 -2
- data/README.rdoc +17 -0
- data/Rakefile +20 -5
- data/VERSION.yml +1 -1
- data/examples/example_helper.rb +13 -15
- data/examples/relevance/core_extensions/ellipsize_example.rb +1 -1
- data/examples/relevance/core_extensions/file_example.rb +1 -1
- data/examples/relevance/core_extensions/response_example.rb +1 -1
- data/examples/relevance/core_extensions/test_case_example.rb +5 -1
- data/examples/relevance/tarantula/attack_form_submission_example.rb +1 -1
- data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
- data/examples/relevance/tarantula/crawler_example.rb +313 -223
- data/examples/relevance/tarantula/form_example.rb +1 -1
- data/examples/relevance/tarantula/form_submission_example.rb +1 -1
- data/examples/relevance/tarantula/html_document_handler_example.rb +1 -1
- data/examples/relevance/tarantula/html_report_helper_example.rb +1 -1
- data/examples/relevance/tarantula/html_reporter_example.rb +1 -1
- data/examples/relevance/tarantula/invalid_html_handler_example.rb +1 -1
- data/examples/relevance/tarantula/io_reporter_example.rb +1 -1
- data/examples/relevance/tarantula/link_example.rb +1 -1
- data/examples/relevance/tarantula/log_grabber_example.rb +1 -1
- data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
- data/examples/relevance/tarantula/result_example.rb +1 -1
- data/examples/relevance/tarantula/tidy_handler_example.rb +1 -1
- data/examples/relevance/tarantula/transform_example.rb +1 -1
- data/examples/relevance/tarantula_example.rb +1 -1
- data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
- data/lib/relevance/core_extensions/test_case.rb +8 -1
- data/lib/relevance/tarantula.rb +1 -1
- data/lib/relevance/tarantula/crawler.rb +39 -15
- data/lib/relevance/tarantula/index.html.erb +2 -2
- data/lib/relevance/tarantula/test_report.html.erb +1 -1
- data/lib/relevance/tarantula/tidy_handler.rb +1 -1
- metadata +53 -29
- data/examples/relevance/tarantula/rails_init_example.rb +0 -14
@@ -0,0 +1,11 @@
|
|
1
|
+
if RUBY_VERSION == "1.8.7" # fix interaction between Ruby 187 and Rails 202, so we can at least run the test suite on that combination
|
2
|
+
unless '1.9'.respond_to?(:force_encoding)
|
3
|
+
String.class_eval do
|
4
|
+
begin
|
5
|
+
remove_method :chars
|
6
|
+
rescue NameError
|
7
|
+
# OK
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
require 'action_controller/integration'
|
2
|
+
|
3
|
+
module Relevance::CoreExtensions::TestCaseExtensions
|
4
|
+
|
2
5
|
def tarantula_crawl(integration_test, options = {})
|
3
6
|
url = options[:url] || "/"
|
4
7
|
t = tarantula_crawler(integration_test, options)
|
@@ -8,5 +11,9 @@ class Test::Unit::TestCase
|
|
8
11
|
def tarantula_crawler(integration_test, options = {})
|
9
12
|
Relevance::Tarantula::RailsIntegrationProxy.rails_integration_test(integration_test, options)
|
10
13
|
end
|
14
|
+
|
11
15
|
end
|
12
16
|
|
17
|
+
if defined? ActionController::IntegrationTest
|
18
|
+
ActionController::IntegrationTest.class_eval { include Relevance::CoreExtensions::TestCaseExtensions }
|
19
|
+
end
|
data/lib/relevance/tarantula.rb
CHANGED
@@ -2,7 +2,6 @@ TARANTULA_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "../.."))
|
|
2
2
|
|
3
3
|
require 'forwardable'
|
4
4
|
require 'erb'
|
5
|
-
require 'rubygems'
|
6
5
|
require 'active_support'
|
7
6
|
require 'action_controller'
|
8
7
|
|
@@ -38,6 +37,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "e
|
|
38
37
|
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "file"))
|
39
38
|
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "response"))
|
40
39
|
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "metaclass"))
|
40
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "string_chars_fix"))
|
41
41
|
|
42
42
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_reporter"))
|
43
43
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_report_helper"))
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'active_record/base'
|
1
3
|
require File.expand_path(File.join(File.dirname(__FILE__), "rails_integration_proxy"))
|
2
4
|
require File.expand_path(File.join(File.dirname(__FILE__), "html_document_handler.rb"))
|
3
5
|
|
@@ -5,11 +7,13 @@ class Relevance::Tarantula::Crawler
|
|
5
7
|
extend Forwardable
|
6
8
|
include Relevance::Tarantula
|
7
9
|
|
10
|
+
class CrawlTimeout < RuntimeError; end
|
11
|
+
|
8
12
|
attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
|
9
13
|
:reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
|
10
14
|
:form_signatures_queued, :max_url_length, :response_code_handler,
|
11
|
-
:times_to_crawl, :fuzzers, :test_name
|
12
|
-
attr_reader :transform_url_patterns, :referrers, :failures, :successes
|
15
|
+
:times_to_crawl, :fuzzers, :test_name, :crawl_timeout
|
16
|
+
attr_reader :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
|
13
17
|
|
14
18
|
def initialize
|
15
19
|
@max_url_length = 1024
|
@@ -20,6 +24,8 @@ class Relevance::Tarantula::Crawler
|
|
20
24
|
@form_signatures_queued = Set.new
|
21
25
|
@links_to_crawl = []
|
22
26
|
@forms_to_crawl = []
|
27
|
+
@crawl_start_times, @crawl_end_times = [], []
|
28
|
+
@crawl_timeout = 20.minutes
|
23
29
|
@referrers = {}
|
24
30
|
@skip_uri_patterns = [
|
25
31
|
/^javascript/,
|
@@ -51,13 +57,18 @@ class Relevance::Tarantula::Crawler
|
|
51
57
|
orig_form_signatures_queued = @form_signatures_queued.dup
|
52
58
|
orig_links_to_crawl = @links_to_crawl.dup
|
53
59
|
orig_forms_to_crawl = @forms_to_crawl.dup
|
54
|
-
@times_to_crawl.times do |
|
60
|
+
@times_to_crawl.times do |num|
|
55
61
|
queue_link url
|
56
|
-
|
57
|
-
|
58
|
-
|
62
|
+
|
63
|
+
begin
|
64
|
+
do_crawl num
|
65
|
+
rescue CrawlTimeout => e
|
66
|
+
puts e.message
|
67
|
+
end
|
68
|
+
|
69
|
+
puts "#{(num+1).ordinalize} crawl" if @times_to_crawl > 1
|
59
70
|
|
60
|
-
if
|
71
|
+
if num + 1 < @times_to_crawl
|
61
72
|
@links_queued = orig_links_queued
|
62
73
|
@form_signatures_queued = orig_form_signatures_queued
|
63
74
|
@links_to_crawl = orig_links_to_crawl
|
@@ -75,19 +86,21 @@ class Relevance::Tarantula::Crawler
|
|
75
86
|
@links_to_crawl.empty? && @forms_to_crawl.empty?
|
76
87
|
end
|
77
88
|
|
78
|
-
def do_crawl
|
89
|
+
def do_crawl(number)
|
79
90
|
while (!finished?)
|
80
|
-
|
81
|
-
|
91
|
+
@crawl_start_times << Time.now
|
92
|
+
crawl_queued_links(number)
|
93
|
+
crawl_queued_forms(number)
|
94
|
+
@crawl_end_times << Time.now
|
82
95
|
end
|
83
96
|
end
|
84
97
|
|
85
|
-
def crawl_queued_links
|
98
|
+
def crawl_queued_links(number = 0)
|
86
99
|
while (link = @links_to_crawl.pop)
|
87
100
|
response = proxy.send(link.method, link.href)
|
88
101
|
log "Response #{response.code} for #{link}"
|
89
102
|
handle_link_results(link, response)
|
90
|
-
blip
|
103
|
+
blip(number)
|
91
104
|
end
|
92
105
|
end
|
93
106
|
|
@@ -122,13 +135,17 @@ class Relevance::Tarantula::Crawler
|
|
122
135
|
Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
|
123
136
|
end
|
124
137
|
|
125
|
-
def crawl_queued_forms
|
138
|
+
def crawl_queued_forms(number = 0)
|
126
139
|
while (form = @forms_to_crawl.pop)
|
127
140
|
response = crawl_form(form)
|
128
141
|
handle_form_results(form, response)
|
129
|
-
blip
|
142
|
+
blip(number)
|
130
143
|
end
|
131
144
|
end
|
145
|
+
|
146
|
+
def elasped_time_for_pass(num)
|
147
|
+
Time.now - crawl_start_times[num]
|
148
|
+
end
|
132
149
|
|
133
150
|
def grab_log!
|
134
151
|
@log_grabber && @log_grabber.grab!
|
@@ -232,9 +249,16 @@ class Relevance::Tarantula::Crawler
|
|
232
249
|
total_links_count - links_remaining_count
|
233
250
|
end
|
234
251
|
|
235
|
-
def blip
|
252
|
+
def blip(number = 0)
|
236
253
|
unless verbose
|
237
254
|
print "\r #{links_completed_count} of #{total_links_count} links completed "
|
255
|
+
timeout_if_too_long(number)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def timeout_if_too_long(number = 0)
|
260
|
+
if elasped_time_for_pass(number) > crawl_timeout
|
261
|
+
raise CrawlTimeout, "Exceeded crawl timeout of #{crawl_timeout} seconds - skipping to the next crawl..."
|
238
262
|
end
|
239
263
|
end
|
240
264
|
end
|
@@ -23,9 +23,9 @@
|
|
23
23
|
and lives at <a href="http://github.com/relevance/tarantula">http://github.com/relevance/tarantula</a>.</p>
|
24
24
|
<hr/>
|
25
25
|
</div>
|
26
|
-
<div id="page">
|
26
|
+
<div id="page">
|
27
27
|
<div id="tabs-container">
|
28
|
-
<ul class="tabs"
|
28
|
+
<ul class="tabs"> </ul>
|
29
29
|
</div>
|
30
30
|
|
31
31
|
<div id="results-container">
|
@@ -18,7 +18,7 @@
|
|
18
18
|
<tbody>
|
19
19
|
<% send(result_type).sort{|x,y| y.code.to_s <=> x.code.to_s}.each_with_index do |result,i| %>
|
20
20
|
<tr class="<%= (i%2 == 0) ? 'even' : 'odd' %>">
|
21
|
-
<td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(
|
21
|
+
<td class="left"><a href="<%= "#{test_name}/#{result.file_name}" %>"><%= result.url.ellipsize(50) %></a></td>
|
22
22
|
<td class="method"><%= result.method.to_s.upcase %></td> <!-- TODO Clean up demeter violation -->
|
23
23
|
<td><span class="<%= class_for_code(result.code) %>"><%= result.code %></span></td>
|
24
24
|
<td class="left"><%= result.description %></td>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tarantula
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Relevance, Inc.
|
@@ -9,32 +9,48 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-04-07 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: htmlentities
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hpricot
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
16
35
|
description: A big hairy fuzzy spider that crawls your site, wreaking havoc
|
17
36
|
email: opensource@thinkrelevance.com
|
18
37
|
executables: []
|
19
38
|
|
20
39
|
extensions: []
|
21
40
|
|
22
|
-
extra_rdoc_files:
|
23
|
-
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.rdoc
|
24
43
|
files:
|
25
44
|
- CHANGELOG
|
26
45
|
- MIT-LICENSE
|
27
|
-
- Rakefile
|
28
46
|
- README.rdoc
|
47
|
+
- Rakefile
|
29
48
|
- VERSION.yml
|
30
49
|
- examples/example_helper.rb
|
31
|
-
- examples/relevance
|
32
|
-
- examples/relevance/core_extensions
|
33
50
|
- examples/relevance/core_extensions/ellipsize_example.rb
|
34
51
|
- examples/relevance/core_extensions/file_example.rb
|
35
52
|
- examples/relevance/core_extensions/response_example.rb
|
36
53
|
- examples/relevance/core_extensions/test_case_example.rb
|
37
|
-
- examples/relevance/tarantula
|
38
54
|
- examples/relevance/tarantula/attack_form_submission_example.rb
|
39
55
|
- examples/relevance/tarantula/attack_handler_example.rb
|
40
56
|
- examples/relevance/tarantula/crawler_example.rb
|
@@ -47,35 +63,26 @@ files:
|
|
47
63
|
- examples/relevance/tarantula/io_reporter_example.rb
|
48
64
|
- examples/relevance/tarantula/link_example.rb
|
49
65
|
- examples/relevance/tarantula/log_grabber_example.rb
|
50
|
-
- examples/relevance/tarantula/rails_init_example.rb
|
51
66
|
- examples/relevance/tarantula/rails_integration_proxy_example.rb
|
52
67
|
- examples/relevance/tarantula/result_example.rb
|
53
|
-
- examples/relevance/tarantula/STUB_RAILS_ROOT
|
54
|
-
- examples/relevance/tarantula/STUB_RAILS_ROOT/tmp
|
55
|
-
- examples/relevance/tarantula/STUB_RAILS_ROOT/tmp/tarantula
|
56
68
|
- examples/relevance/tarantula/tidy_handler_example.rb
|
57
69
|
- examples/relevance/tarantula/transform_example.rb
|
58
70
|
- examples/relevance/tarantula_example.rb
|
59
|
-
- laf/images
|
60
71
|
- laf/images/button_active.png
|
61
72
|
- laf/images/button_hover.png
|
62
73
|
- laf/images/button_inactive.png
|
63
74
|
- laf/images/header_bg.jpg
|
64
75
|
- laf/images/logo.png
|
65
76
|
- laf/images/tagline.png
|
66
|
-
- laf/javascripts
|
67
77
|
- laf/javascripts/jquery-1.2.3.js
|
68
78
|
- laf/javascripts/jquery-ui-tabs.js
|
69
79
|
- laf/javascripts/jquery.tablesorter.js
|
80
|
+
- laf/javascripts/niftyLayout.js
|
70
81
|
- laf/javascripts/niftycube-details.js
|
71
82
|
- laf/javascripts/niftycube.js
|
72
|
-
- laf/javascripts/niftyLayout.js
|
73
83
|
- laf/javascripts/tarantula.js
|
74
|
-
- laf/stylesheets
|
75
84
|
- laf/stylesheets/tarantula.css
|
76
|
-
- laf/v2
|
77
85
|
- laf/v2/detail.html
|
78
|
-
- laf/v2/images
|
79
86
|
- laf/v2/images/button_active.png
|
80
87
|
- laf/v2/images/button_hover.png
|
81
88
|
- laf/v2/images/button_inactive.png
|
@@ -83,16 +90,14 @@ files:
|
|
83
90
|
- laf/v2/images/logo.png
|
84
91
|
- laf/v2/images/tagline.png
|
85
92
|
- laf/v2/index.html
|
86
|
-
- laf/v2/stylesheets
|
87
93
|
- laf/v2/stylesheets/tarantula.v2.css
|
88
|
-
- lib/relevance
|
89
|
-
- lib/relevance/core_extensions
|
90
94
|
- lib/relevance/core_extensions/ellipsize.rb
|
91
95
|
- lib/relevance/core_extensions/file.rb
|
92
96
|
- lib/relevance/core_extensions/metaclass.rb
|
93
97
|
- lib/relevance/core_extensions/response.rb
|
98
|
+
- lib/relevance/core_extensions/string_chars_fix.rb
|
94
99
|
- lib/relevance/core_extensions/test_case.rb
|
95
|
-
- lib/relevance/tarantula
|
100
|
+
- lib/relevance/tarantula.rb
|
96
101
|
- lib/relevance/tarantula/attack.rb
|
97
102
|
- lib/relevance/tarantula/attack_form_submission.rb
|
98
103
|
- lib/relevance/tarantula/attack_handler.rb
|
@@ -115,14 +120,12 @@ files:
|
|
115
120
|
- lib/relevance/tarantula/test_report.html.erb
|
116
121
|
- lib/relevance/tarantula/tidy_handler.rb
|
117
122
|
- lib/relevance/tarantula/transform.rb
|
118
|
-
- lib/relevance/tarantula.rb
|
119
123
|
- tasks/tarantula_tasks.rake
|
120
124
|
- template/tarantula_test.rb
|
121
125
|
has_rdoc: true
|
122
126
|
homepage: http://github.com/relevance/tarantula
|
123
127
|
post_install_message:
|
124
128
|
rdoc_options:
|
125
|
-
- --inline-source
|
126
129
|
- --charset=UTF-8
|
127
130
|
require_paths:
|
128
131
|
- lib
|
@@ -140,10 +143,31 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
143
|
version:
|
141
144
|
requirements: []
|
142
145
|
|
143
|
-
rubyforge_project:
|
146
|
+
rubyforge_project: thinkrelevance
|
144
147
|
rubygems_version: 1.3.1
|
145
148
|
signing_key:
|
146
149
|
specification_version: 2
|
147
150
|
summary: A big hairy fuzzy spider that crawls your site, wreaking havoc
|
148
|
-
test_files:
|
149
|
-
|
151
|
+
test_files:
|
152
|
+
- examples/example_helper.rb
|
153
|
+
- examples/relevance/core_extensions/ellipsize_example.rb
|
154
|
+
- examples/relevance/core_extensions/file_example.rb
|
155
|
+
- examples/relevance/core_extensions/response_example.rb
|
156
|
+
- examples/relevance/core_extensions/test_case_example.rb
|
157
|
+
- examples/relevance/tarantula/attack_form_submission_example.rb
|
158
|
+
- examples/relevance/tarantula/attack_handler_example.rb
|
159
|
+
- examples/relevance/tarantula/crawler_example.rb
|
160
|
+
- examples/relevance/tarantula/form_example.rb
|
161
|
+
- examples/relevance/tarantula/form_submission_example.rb
|
162
|
+
- examples/relevance/tarantula/html_document_handler_example.rb
|
163
|
+
- examples/relevance/tarantula/html_report_helper_example.rb
|
164
|
+
- examples/relevance/tarantula/html_reporter_example.rb
|
165
|
+
- examples/relevance/tarantula/invalid_html_handler_example.rb
|
166
|
+
- examples/relevance/tarantula/io_reporter_example.rb
|
167
|
+
- examples/relevance/tarantula/link_example.rb
|
168
|
+
- examples/relevance/tarantula/log_grabber_example.rb
|
169
|
+
- examples/relevance/tarantula/rails_integration_proxy_example.rb
|
170
|
+
- examples/relevance/tarantula/result_example.rb
|
171
|
+
- examples/relevance/tarantula/tidy_handler_example.rb
|
172
|
+
- examples/relevance/tarantula/transform_example.rb
|
173
|
+
- examples/relevance/tarantula_example.rb
|