tarantula 0.2.0 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +3 -4
- data/Rakefile +9 -5
- data/VERSION.yml +2 -2
- data/examples/example_helper.rb +10 -1
- data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
- data/examples/relevance/tarantula/basic_attack_example.rb +12 -0
- data/examples/relevance/tarantula/crawler_example.rb +66 -77
- data/examples/relevance/tarantula/form_example.rb +3 -3
- data/examples/relevance/tarantula/form_submission_example.rb +157 -57
- data/examples/relevance/tarantula/link_example.rb +24 -7
- data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
- data/lib/relevance/tarantula/attack.rb +3 -0
- data/lib/relevance/tarantula/attack_handler.rb +1 -1
- data/lib/relevance/tarantula/basic_attack.rb +40 -0
- data/lib/relevance/tarantula/crawler.rb +36 -46
- data/lib/relevance/tarantula/detail.html.erb +11 -11
- data/lib/relevance/tarantula/form.rb +4 -2
- data/lib/relevance/tarantula/form_submission.rb +47 -29
- data/lib/relevance/tarantula/link.rb +24 -4
- data/lib/relevance/tarantula/rails_integration_proxy.rb +1 -1
- data/lib/relevance/tarantula/result.rb +14 -3
- data/lib/relevance/tarantula.rb +1 -1
- metadata +6 -6
- data/examples/relevance/tarantula/attack_form_submission_example.rb +0 -79
- data/lib/relevance/tarantula/attack_form_submission.rb +0 -75
@@ -4,47 +4,64 @@ describe "Relevance::Tarantula::Link" do
|
|
4
4
|
include ActionView::Helpers::UrlHelper
|
5
5
|
|
6
6
|
it "does not raise an error when initializing without href attribtue" do
|
7
|
-
link =
|
7
|
+
link = make_link(Hpricot('<a="/foo">foo</a>').at('a'))
|
8
8
|
link.href.should == nil
|
9
9
|
link.method.should == :get
|
10
10
|
end
|
11
11
|
|
12
12
|
it "parses anchor tags" do
|
13
|
-
link =
|
13
|
+
link = make_link(Hpricot('<a href="/foo">foo</a>').at('a'))
|
14
14
|
link.href.should == '/foo'
|
15
15
|
link.method.should == :get
|
16
16
|
end
|
17
17
|
|
18
18
|
it "parses anchor tags with POST 'method'" do
|
19
|
-
link =
|
19
|
+
link = make_link(Hpricot(%Q{<a href="/foo" onclick="#{method_javascript_function(:post)}">foo</a>}).at('a'))
|
20
20
|
link.href.should == '/foo'
|
21
21
|
link.method.should == :post
|
22
22
|
end
|
23
23
|
|
24
24
|
it "parses anchor tags with PUT 'method'" do
|
25
|
-
link =
|
25
|
+
link = make_link(Hpricot(%Q{<a href="/foo" onclick="#{method_javascript_function(:put)}">foo</a>}).at('a'))
|
26
26
|
link.href.should == '/foo'
|
27
27
|
link.method.should == :put
|
28
28
|
end
|
29
29
|
|
30
30
|
it "parses anchor tags with DELETE 'method'" do
|
31
|
-
link =
|
31
|
+
link = make_link(Hpricot(%Q{<a href="/foo" onclick="#{method_javascript_function(:delete)}">foo</a>}).at('a'))
|
32
32
|
link.href.should == '/foo'
|
33
33
|
link.method.should == :delete
|
34
34
|
end
|
35
35
|
|
36
36
|
it "parses link tags with text" do
|
37
|
-
link =
|
37
|
+
link = make_link(Hpricot('<link href="/bar">bar</a>').at('link'))
|
38
38
|
link.href.should == '/bar'
|
39
39
|
link.method.should == :get
|
40
40
|
end
|
41
41
|
|
42
42
|
it "parses link tags without text" do
|
43
|
-
link =
|
43
|
+
link = make_link(Hpricot('<link href="/bar" />').at('link'))
|
44
44
|
link.href.should == '/bar'
|
45
45
|
link.method.should == :get
|
46
46
|
end
|
47
47
|
|
48
|
+
it 'remembers link referrer if there is one' do
|
49
|
+
link = make_link('/url', stub_everything, '/some-referrer')
|
50
|
+
link.referrer.should == '/some-referrer'
|
51
|
+
end
|
52
|
+
|
53
|
+
it "does two things when crawled: follow, log, and handle" do
|
54
|
+
crawler = Relevance::Tarantula::Crawler.new
|
55
|
+
link = make_link('/foo', crawler)
|
56
|
+
|
57
|
+
response = stub(:code => "200")
|
58
|
+
crawler.expects(:follow).returns(response)
|
59
|
+
link.expects(:log)
|
60
|
+
crawler.expects(:handle_link_results)
|
61
|
+
|
62
|
+
link.crawl
|
63
|
+
end
|
64
|
+
|
48
65
|
# method_javascript_function needs this method
|
49
66
|
def protect_against_forgery?
|
50
67
|
false
|
@@ -41,7 +41,7 @@ describe "Relevance::Tarantula::RailsIntegrationProxy" do
|
|
41
41
|
it "adds a response accessor to its delegate rails integration test" do
|
42
42
|
o = Object.new
|
43
43
|
Relevance::Tarantula::RailsIntegrationProxy.new(o)
|
44
|
-
o.methods(false).sort.should == %w{response response=}
|
44
|
+
o.methods(false).map(&:to_s).sort.should == %w{response response=}
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class Relevance::Tarantula::BasicAttack
|
2
|
+
ATTRS = [:name, :output, :description]
|
3
|
+
|
4
|
+
attr_reader *ATTRS
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@name = "Tarantula Basic Fuzzer"
|
8
|
+
@output = nil
|
9
|
+
@description = "Supplies purely random but simplistically generated form input."
|
10
|
+
end
|
11
|
+
|
12
|
+
def ==(other)
|
13
|
+
Relevance::Tarantula::BasicAttack === other && ATTRS.all? { |attr| send(attr) == other.send(attr)}
|
14
|
+
end
|
15
|
+
|
16
|
+
def input(input_field)
|
17
|
+
case input_field['name']
|
18
|
+
when /amount/ then random_int
|
19
|
+
when /_id$/ then random_whole_number
|
20
|
+
when /uploaded_data/ then nil
|
21
|
+
when nil then input['value']
|
22
|
+
else
|
23
|
+
random_int
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def big_number
|
28
|
+
10000 # arbitrary
|
29
|
+
end
|
30
|
+
|
31
|
+
def random_int
|
32
|
+
rand(big_number) - (big_number/2)
|
33
|
+
end
|
34
|
+
|
35
|
+
def random_whole_number
|
36
|
+
rand(big_number)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
@@ -10,7 +10,7 @@ class Relevance::Tarantula::Crawler
|
|
10
10
|
class CrawlTimeout < RuntimeError; end
|
11
11
|
|
12
12
|
attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
|
13
|
-
:reporters, :
|
13
|
+
:reporters, :crawl_queue, :links_queued,
|
14
14
|
:form_signatures_queued, :max_url_length, :response_code_handler,
|
15
15
|
:times_to_crawl, :fuzzers, :test_name, :crawl_timeout
|
16
16
|
attr_reader :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
|
@@ -22,8 +22,7 @@ class Relevance::Tarantula::Crawler
|
|
22
22
|
@handlers = [@response_code_handler = Result]
|
23
23
|
@links_queued = Set.new
|
24
24
|
@form_signatures_queued = Set.new
|
25
|
-
@
|
26
|
-
@forms_to_crawl = []
|
25
|
+
@crawl_queue = []
|
27
26
|
@crawl_start_times, @crawl_end_times = [], []
|
28
27
|
@crawl_timeout = 20.minutes
|
29
28
|
@referrers = {}
|
@@ -39,6 +38,8 @@ class Relevance::Tarantula::Crawler
|
|
39
38
|
@decoder = HTMLEntities.new
|
40
39
|
@times_to_crawl = 1
|
41
40
|
@fuzzers = [Relevance::Tarantula::FormSubmission]
|
41
|
+
|
42
|
+
@stdout_tty = $stdout.tty?
|
42
43
|
end
|
43
44
|
|
44
45
|
def method_missing(meth, *args)
|
@@ -55,14 +56,14 @@ class Relevance::Tarantula::Crawler
|
|
55
56
|
def crawl(url = "/")
|
56
57
|
orig_links_queued = @links_queued.dup
|
57
58
|
orig_form_signatures_queued = @form_signatures_queued.dup
|
58
|
-
|
59
|
-
orig_forms_to_crawl = @forms_to_crawl.dup
|
59
|
+
orig_crawl_queue = @crawl_queue.dup
|
60
60
|
@times_to_crawl.times do |num|
|
61
61
|
queue_link url
|
62
62
|
|
63
63
|
begin
|
64
64
|
do_crawl num
|
65
65
|
rescue CrawlTimeout => e
|
66
|
+
puts
|
66
67
|
puts e.message
|
67
68
|
end
|
68
69
|
|
@@ -71,8 +72,7 @@ class Relevance::Tarantula::Crawler
|
|
71
72
|
if num + 1 < @times_to_crawl
|
72
73
|
@links_queued = orig_links_queued
|
73
74
|
@form_signatures_queued = orig_form_signatures_queued
|
74
|
-
@
|
75
|
-
@forms_to_crawl = orig_forms_to_crawl
|
75
|
+
@crawl_queue = orig_crawl_queue
|
76
76
|
@referrers = {}
|
77
77
|
end
|
78
78
|
end
|
@@ -83,23 +83,20 @@ class Relevance::Tarantula::Crawler
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def finished?
|
86
|
-
@
|
86
|
+
@crawl_queue.empty?
|
87
87
|
end
|
88
88
|
|
89
89
|
def do_crawl(number)
|
90
90
|
while (!finished?)
|
91
91
|
@crawl_start_times << Time.now
|
92
|
-
|
93
|
-
crawl_queued_forms(number)
|
92
|
+
crawl_the_queue(number)
|
94
93
|
@crawl_end_times << Time.now
|
95
94
|
end
|
96
95
|
end
|
97
96
|
|
98
|
-
def
|
99
|
-
while (
|
100
|
-
|
101
|
-
log "Response #{response.code} for #{link}"
|
102
|
-
handle_link_results(link, response)
|
97
|
+
def crawl_the_queue(number = 0)
|
98
|
+
while (request = @crawl_queue.pop)
|
99
|
+
request.crawl
|
103
100
|
blip(number)
|
104
101
|
end
|
105
102
|
end
|
@@ -110,15 +107,10 @@ class Relevance::Tarantula::Crawler
|
|
110
107
|
end
|
111
108
|
end
|
112
109
|
|
113
|
-
def handle_link_results(link,
|
110
|
+
def handle_link_results(link, result)
|
114
111
|
handlers.each do |h|
|
115
112
|
begin
|
116
|
-
save_result h.handle(
|
117
|
-
:url => link.href,
|
118
|
-
:response => response,
|
119
|
-
:log => grab_log!,
|
120
|
-
:referrer => referrers[link],
|
121
|
-
:test_name => test_name).freeze)
|
113
|
+
save_result h.handle(result)
|
122
114
|
rescue Exception => e
|
123
115
|
log "error handling #{link} #{e.message}"
|
124
116
|
# TODO: pass to results
|
@@ -126,23 +118,14 @@ class Relevance::Tarantula::Crawler
|
|
126
118
|
end
|
127
119
|
end
|
128
120
|
|
129
|
-
def
|
130
|
-
|
131
|
-
log "Response #{response.code} for #{form}"
|
132
|
-
response
|
133
|
-
rescue ActiveRecord::RecordNotFound => e
|
134
|
-
log "Skipping #{form.action}, presumed ok that record is missing"
|
135
|
-
Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
|
136
|
-
end
|
137
|
-
|
138
|
-
def crawl_queued_forms(number = 0)
|
139
|
-
while (form = @forms_to_crawl.pop)
|
140
|
-
response = crawl_form(form)
|
141
|
-
handle_form_results(form, response)
|
142
|
-
blip(number)
|
143
|
-
end
|
121
|
+
def follow(method, url, data=nil)
|
122
|
+
proxy.send(method, url, data)
|
144
123
|
end
|
145
124
|
|
125
|
+
def submit(method, action, data)
|
126
|
+
proxy.send(method, action, data)
|
127
|
+
end
|
128
|
+
|
146
129
|
def elasped_time_for_pass(num)
|
147
130
|
Time.now - crawl_start_times[num]
|
148
131
|
end
|
@@ -150,6 +133,14 @@ class Relevance::Tarantula::Crawler
|
|
150
133
|
def grab_log!
|
151
134
|
@log_grabber && @log_grabber.grab!
|
152
135
|
end
|
136
|
+
|
137
|
+
def make_result(options)
|
138
|
+
defaults = {
|
139
|
+
:log => grab_log!,
|
140
|
+
:test_name => test_name
|
141
|
+
}
|
142
|
+
Result.new(defaults.merge(options)).freeze
|
143
|
+
end
|
153
144
|
|
154
145
|
def handle_form_results(form, response)
|
155
146
|
handlers.each do |h|
|
@@ -193,23 +184,21 @@ class Relevance::Tarantula::Crawler
|
|
193
184
|
end
|
194
185
|
|
195
186
|
def queue_link(dest, referrer = nil)
|
196
|
-
dest = Link.new(dest)
|
197
|
-
dest.href = transform_url(dest.href)
|
187
|
+
dest = Link.new(dest, self, referrer)
|
198
188
|
return if should_skip_link?(dest)
|
199
|
-
@
|
200
|
-
@links_to_crawl << dest
|
189
|
+
@crawl_queue << dest
|
201
190
|
@links_queued << dest
|
202
191
|
dest
|
203
192
|
end
|
204
193
|
|
205
194
|
def queue_form(form, referrer = nil)
|
206
195
|
fuzzers.each do |fuzzer|
|
207
|
-
fuzzer.mutate(Form.new(form)).each do |fs|
|
208
|
-
# fs = fuzzer.new(Form.new(form))
|
196
|
+
fuzzer.mutate(Form.new(form, self, referrer)).each do |fs|
|
197
|
+
# fs = fuzzer.new(Form.new(form, self, referrer))
|
209
198
|
fs.action = transform_url(fs.action)
|
210
199
|
return if should_skip_form_submission?(fs)
|
211
200
|
@referrers[fs.action] = referrer if referrer
|
212
|
-
@
|
201
|
+
@crawl_queue << fs
|
213
202
|
@form_signatures_queued << fs.signature
|
214
203
|
end
|
215
204
|
end
|
@@ -234,6 +223,7 @@ class Relevance::Tarantula::Crawler
|
|
234
223
|
end
|
235
224
|
|
236
225
|
def report_results
|
226
|
+
puts "Crawled #{total_links_count} links and forms."
|
237
227
|
generate_reports
|
238
228
|
end
|
239
229
|
|
@@ -242,7 +232,7 @@ class Relevance::Tarantula::Crawler
|
|
242
232
|
end
|
243
233
|
|
244
234
|
def links_remaining_count
|
245
|
-
@
|
235
|
+
@crawl_queue.size
|
246
236
|
end
|
247
237
|
|
248
238
|
def links_completed_count
|
@@ -251,7 +241,7 @@ class Relevance::Tarantula::Crawler
|
|
251
241
|
|
252
242
|
def blip(number = 0)
|
253
243
|
unless verbose
|
254
|
-
print "\r #{links_completed_count} of #{total_links_count} links completed "
|
244
|
+
print "\r #{links_completed_count} of #{total_links_count} links completed " if @stdout_tty
|
255
245
|
timeout_if_too_long(number)
|
256
246
|
end
|
257
247
|
end
|
@@ -25,18 +25,18 @@
|
|
25
25
|
</ul>
|
26
26
|
|
27
27
|
<div id="report">
|
28
|
-
<h3>Detail of <%= short_description %> <em>Generated on <%= Time.now %></em></h3>
|
29
|
-
<p><b>Resource</b> <a href="<%= full_url %>"><%= full_url %></a></p>
|
30
|
-
<p><b>Response</b> <span class="r<%= code.first %>"><%= code %></span></p>
|
31
|
-
<p><b>Referrer</b> <%= referrer || "" %></p>
|
28
|
+
<h3>Detail of <%= result.short_description %> <em>Generated on <%= Time.now %></em></h3>
|
29
|
+
<p><b>Resource</b> <a href="<%= result.full_url %>"><%= result.full_url %></a></p>
|
30
|
+
<p><b>Response</b> <span class="r<%= result.code.first %>"><%= result.code %></span></p>
|
31
|
+
<p><b>Referrer</b> <%= result.referrer || "" %></p>
|
32
32
|
|
33
33
|
<table class="output">
|
34
34
|
<tbody>
|
35
35
|
<tr>
|
36
36
|
<th colspan="2"># Data</th>
|
37
37
|
</tr>
|
38
|
-
<% if data %>
|
39
|
-
<%= wrap_in_line_number_table_row(data) %>
|
38
|
+
<% if result.data %>
|
39
|
+
<%= result.wrap_in_line_number_table_row(result.data) %>
|
40
40
|
<% else %>
|
41
41
|
<tr>
|
42
42
|
<td colspan="2">No Data</td>
|
@@ -50,8 +50,8 @@
|
|
50
50
|
<tr>
|
51
51
|
<th colspan="2"># Body</th>
|
52
52
|
</tr>
|
53
|
-
<% if body %>
|
54
|
-
<%= wrap_in_line_number_table_row(body) %>
|
53
|
+
<% if result.body %>
|
54
|
+
<%= result.wrap_in_line_number_table_row(result.body) %>
|
55
55
|
<% else %>
|
56
56
|
<tr>
|
57
57
|
<td colspan="2">No Body</td>
|
@@ -65,8 +65,8 @@
|
|
65
65
|
<tr>
|
66
66
|
<th colspan="2"># Log</th>
|
67
67
|
</tr>
|
68
|
-
<% if log %>
|
69
|
-
<%= wrap_in_line_number_table_row(log) {|line| wrap_stack_trace_line(line)} %>
|
68
|
+
<% if result.log %>
|
69
|
+
<%= result.wrap_in_line_number_table_row(result.log) {|line| wrap_stack_trace_line(line)} %>
|
70
70
|
<% else %>
|
71
71
|
<tr>
|
72
72
|
<td colspan="2">No Log</td>
|
@@ -78,4 +78,4 @@
|
|
78
78
|
</div>
|
79
79
|
</div>
|
80
80
|
</body>
|
81
|
-
</html>
|
81
|
+
</html>
|
@@ -2,8 +2,10 @@ class Relevance::Tarantula::Form
|
|
2
2
|
extend Forwardable
|
3
3
|
def_delegators("@tag", :search)
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
attr_accessor :crawler, :referrer
|
6
|
+
|
7
|
+
def initialize(tag, crawler, referrer)
|
8
|
+
@tag, @crawler, @referrer = tag, crawler, referrer
|
7
9
|
end
|
8
10
|
|
9
11
|
def action
|
@@ -1,25 +1,58 @@
|
|
1
1
|
class Relevance::Tarantula::FormSubmission
|
2
|
-
|
3
|
-
|
2
|
+
include Relevance::Tarantula
|
3
|
+
attr_accessor :method, :action, :data, :attack, :form
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def attacks
|
7
|
+
# normalize from hash input to Attack
|
8
|
+
@attacks = @attacks.map do |val|
|
9
|
+
Hash === val ? Relevance::Tarantula::Attack.new(val) : val
|
10
|
+
end
|
11
|
+
@attacks
|
12
|
+
end
|
13
|
+
def attacks=(atts)
|
14
|
+
# normalize from hash input to Attack
|
15
|
+
@attacks = atts.map do |val|
|
16
|
+
Hash === val ? Relevance::Tarantula::Attack.new(val) : val
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
@attacks = [Relevance::Tarantula::BasicAttack.new]
|
21
|
+
|
22
|
+
def initialize(form, attack = Relevance::Tarantula::BasicAttack.new)
|
23
|
+
@form = form
|
4
24
|
@method = form.method
|
5
25
|
@action = form.action
|
26
|
+
@attack = attack
|
6
27
|
@data = mutate_selects(form).merge(mutate_text_areas(form)).merge(mutate_inputs(form))
|
7
28
|
end
|
8
29
|
|
30
|
+
def crawl
|
31
|
+
begin
|
32
|
+
response = form.crawler.submit(method, action, data)
|
33
|
+
log "Response #{response.code} for #{self}"
|
34
|
+
rescue ActiveRecord::RecordNotFound => e
|
35
|
+
log "Skipping #{action}, presumed ok that record is missing"
|
36
|
+
response = Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
|
37
|
+
end
|
38
|
+
form.crawler.handle_form_results(self, response)
|
39
|
+
response
|
40
|
+
end
|
41
|
+
|
9
42
|
def self.mutate(form)
|
10
|
-
|
43
|
+
attacks.map{|attack| new(form, attack)} if attacks
|
11
44
|
end
|
12
|
-
|
45
|
+
|
13
46
|
def to_s
|
14
|
-
"#{action} #{method} #{data.inspect}"
|
47
|
+
"#{action} #{method} #{data.inspect} #{attack.inspect}"
|
15
48
|
end
|
16
|
-
|
49
|
+
|
17
50
|
# a form's signature is what makes it unique (e.g. action + fields)
|
18
51
|
# used to keep track of which forms we have submitted already
|
19
52
|
def signature
|
20
|
-
[action, data.keys.sort]
|
53
|
+
[action, data.keys.sort, attack.name]
|
21
54
|
end
|
22
|
-
|
55
|
+
|
23
56
|
def create_random_data_for(form, tag_selector)
|
24
57
|
form.search(tag_selector).inject({}) do |form_args, input|
|
25
58
|
# TODO: test
|
@@ -35,36 +68,21 @@ class Relevance::Tarantula::FormSubmission
|
|
35
68
|
def mutate_text_areas(form)
|
36
69
|
create_random_data_for(form, 'textarea')
|
37
70
|
end
|
38
|
-
|
71
|
+
|
39
72
|
def mutate_selects(form)
|
40
73
|
form.search('select').inject({}) do |form_args, select|
|
41
74
|
options = select.search('option')
|
42
75
|
option = options.rand
|
43
|
-
form_args[select['name']] = option['value']
|
76
|
+
form_args[select['name']] = option['value']
|
44
77
|
form_args
|
45
78
|
end
|
46
79
|
end
|
47
|
-
|
80
|
+
|
48
81
|
def random_data(input)
|
49
82
|
case input['name']
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
when /^_method$/ : input['value']
|
54
|
-
when nil : input['value']
|
55
|
-
else random_int
|
83
|
+
when /^_method$/ then input['value']
|
84
|
+
else
|
85
|
+
attack.input(input)
|
56
86
|
end
|
57
87
|
end
|
58
|
-
|
59
|
-
def big_number
|
60
|
-
10000 # arbitrary
|
61
|
-
end
|
62
|
-
|
63
|
-
def random_int
|
64
|
-
rand(big_number) - (big_number/2)
|
65
|
-
end
|
66
|
-
|
67
|
-
def random_whole_number
|
68
|
-
rand(big_number)
|
69
|
-
end
|
70
88
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
class Relevance::Tarantula::Link
|
2
|
+
include Relevance::Tarantula
|
2
3
|
|
3
4
|
class << self
|
4
5
|
include ActionView::Helpers::UrlHelper
|
@@ -17,18 +18,33 @@ class Relevance::Tarantula::Link
|
|
17
18
|
METHOD_REGEXPS[m] = /#{s}/
|
18
19
|
end
|
19
20
|
|
20
|
-
attr_accessor :href
|
21
|
+
attr_accessor :href, :crawler, :referrer
|
21
22
|
|
22
|
-
def initialize(link)
|
23
|
+
def initialize(link, crawler, referrer)
|
24
|
+
@crawler, @referrer = crawler, referrer
|
25
|
+
|
23
26
|
if String === link || link.nil?
|
24
|
-
@href = link
|
27
|
+
@href = transform_url(link)
|
25
28
|
@method = :get
|
26
29
|
else # should be a tag
|
27
|
-
@href = link['href'] ? link['href'].downcase : nil
|
30
|
+
@href = link['href'] ? transform_url(link['href'].downcase) : nil
|
28
31
|
@tag = link
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
35
|
+
def crawl
|
36
|
+
response = crawler.follow(method, href)
|
37
|
+
log "Response #{response.code} for #{self}"
|
38
|
+
crawler.handle_link_results(self, make_result(response))
|
39
|
+
end
|
40
|
+
|
41
|
+
def make_result(response)
|
42
|
+
crawler.make_result(:method => method,
|
43
|
+
:url => href,
|
44
|
+
:response => response,
|
45
|
+
:referrer => referrer)
|
46
|
+
end
|
47
|
+
|
32
48
|
def method
|
33
49
|
@method ||= begin
|
34
50
|
(@tag &&
|
@@ -39,6 +55,10 @@ class Relevance::Tarantula::Link
|
|
39
55
|
end
|
40
56
|
end
|
41
57
|
|
58
|
+
def transform_url(link)
|
59
|
+
crawler.transform_url(link)
|
60
|
+
end
|
61
|
+
|
42
62
|
def ==(obj)
|
43
63
|
obj.respond_to?(:href) && obj.respond_to?(:method) &&
|
44
64
|
self.href.to_s == obj.href.to_s && self.method.to_s == obj.method.to_s
|
@@ -41,7 +41,7 @@ class Relevance::Tarantula::RailsIntegrationProxy
|
|
41
41
|
if response.code == '404'
|
42
42
|
if File.exist?(static_content_path(url))
|
43
43
|
case ext = File.extension(url)
|
44
|
-
when /html|te?xt|css|js|jpe?g|gif|psd|png|eps|pdf/
|
44
|
+
when /html|te?xt|css|js|jpe?g|gif|psd|png|eps|pdf|ico/
|
45
45
|
response.body = static_content_file(url)
|
46
46
|
response.headers["type"] = "text/#{ext}" # readable as response.content_type
|
47
47
|
response.meta.attr_accessor :code
|
@@ -11,33 +11,43 @@ class Relevance::Tarantula::Result
|
|
11
11
|
self.instance_variable_set("@#{k}", v)
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
14
15
|
def short_description
|
15
16
|
[method,url].join(" ")
|
16
17
|
end
|
18
|
+
|
17
19
|
def sequence_number
|
18
20
|
@sequence_number ||= (self.class.next_number += 1)
|
19
21
|
end
|
22
|
+
|
20
23
|
def file_name
|
21
24
|
"#{sequence_number}.html"
|
22
25
|
end
|
26
|
+
|
23
27
|
def code
|
24
28
|
response && response.code
|
25
29
|
end
|
30
|
+
|
26
31
|
def body
|
27
32
|
response && response.body
|
28
33
|
end
|
34
|
+
|
29
35
|
def full_url
|
30
36
|
"#{DEFAULT_LOCALHOST}#{url}"
|
31
37
|
end
|
38
|
+
|
32
39
|
ALLOW_NNN_FOR = /^allow_(\d\d\d)_for$/
|
40
|
+
|
33
41
|
class << self
|
34
42
|
attr_accessor :next_number
|
43
|
+
|
35
44
|
def handle(result)
|
36
45
|
retval = result.dup
|
37
46
|
retval.success = successful?(result.response) || can_skip_error?(result)
|
38
47
|
retval.description = "Bad HTTP Response" unless retval.success
|
39
48
|
retval
|
40
49
|
end
|
50
|
+
|
41
51
|
def success_codes
|
42
52
|
%w{200 201 302 401}
|
43
53
|
end
|
@@ -51,16 +61,17 @@ class Relevance::Tarantula::Result
|
|
51
61
|
return false unless coll
|
52
62
|
coll.any? {|item| item === result.url}
|
53
63
|
end
|
64
|
+
|
54
65
|
def successful?(response)
|
55
66
|
success_codes.member?(response.code)
|
56
67
|
end
|
68
|
+
|
57
69
|
def method_missing(meth, *args)
|
58
70
|
super unless ALLOW_NNN_FOR =~ meth.to_s
|
59
71
|
(allow_errors_for[$1] ||= []).push(*args)
|
60
72
|
end
|
61
73
|
end
|
74
|
+
|
62
75
|
self.allow_errors_for = {}
|
63
76
|
self.next_number = 0
|
64
|
-
|
65
|
-
|
66
|
-
end
|
77
|
+
end
|
data/lib/relevance/tarantula.rb
CHANGED
@@ -49,10 +49,10 @@ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "log_gra
|
|
49
49
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "invalid_html_handler"))
|
50
50
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "transform"))
|
51
51
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "crawler"))
|
52
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "basic_attack"))
|
52
53
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form"))
|
53
54
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form_submission"))
|
54
55
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack"))
|
55
|
-
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_form_submission"))
|
56
56
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_handler"))
|
57
57
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "link"))
|
58
58
|
|