tarantula 0.2.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +3 -4
- data/Rakefile +9 -5
- data/VERSION.yml +2 -2
- data/examples/example_helper.rb +10 -1
- data/examples/relevance/tarantula/attack_handler_example.rb +1 -1
- data/examples/relevance/tarantula/basic_attack_example.rb +12 -0
- data/examples/relevance/tarantula/crawler_example.rb +66 -77
- data/examples/relevance/tarantula/form_example.rb +3 -3
- data/examples/relevance/tarantula/form_submission_example.rb +157 -57
- data/examples/relevance/tarantula/link_example.rb +24 -7
- data/examples/relevance/tarantula/rails_integration_proxy_example.rb +1 -1
- data/lib/relevance/tarantula/attack.rb +3 -0
- data/lib/relevance/tarantula/attack_handler.rb +1 -1
- data/lib/relevance/tarantula/basic_attack.rb +40 -0
- data/lib/relevance/tarantula/crawler.rb +36 -46
- data/lib/relevance/tarantula/detail.html.erb +11 -11
- data/lib/relevance/tarantula/form.rb +4 -2
- data/lib/relevance/tarantula/form_submission.rb +47 -29
- data/lib/relevance/tarantula/link.rb +24 -4
- data/lib/relevance/tarantula/rails_integration_proxy.rb +1 -1
- data/lib/relevance/tarantula/result.rb +14 -3
- data/lib/relevance/tarantula.rb +1 -1
- metadata +6 -6
- data/examples/relevance/tarantula/attack_form_submission_example.rb +0 -79
- data/lib/relevance/tarantula/attack_form_submission.rb +0 -75
@@ -4,47 +4,64 @@ describe "Relevance::Tarantula::Link" do
|
|
4
4
|
include ActionView::Helpers::UrlHelper
|
5
5
|
|
6
6
|
it "does not raise an error when initializing without href attribtue" do
|
7
|
-
link =
|
7
|
+
link = make_link(Hpricot('<a="/foo">foo</a>').at('a'))
|
8
8
|
link.href.should == nil
|
9
9
|
link.method.should == :get
|
10
10
|
end
|
11
11
|
|
12
12
|
it "parses anchor tags" do
|
13
|
-
link =
|
13
|
+
link = make_link(Hpricot('<a href="/foo">foo</a>').at('a'))
|
14
14
|
link.href.should == '/foo'
|
15
15
|
link.method.should == :get
|
16
16
|
end
|
17
17
|
|
18
18
|
it "parses anchor tags with POST 'method'" do
|
19
|
-
link =
|
19
|
+
link = make_link(Hpricot(%Q{<a href="/foo" onclick="#{method_javascript_function(:post)}">foo</a>}).at('a'))
|
20
20
|
link.href.should == '/foo'
|
21
21
|
link.method.should == :post
|
22
22
|
end
|
23
23
|
|
24
24
|
it "parses anchor tags with PUT 'method'" do
|
25
|
-
link =
|
25
|
+
link = make_link(Hpricot(%Q{<a href="/foo" onclick="#{method_javascript_function(:put)}">foo</a>}).at('a'))
|
26
26
|
link.href.should == '/foo'
|
27
27
|
link.method.should == :put
|
28
28
|
end
|
29
29
|
|
30
30
|
it "parses anchor tags with DELETE 'method'" do
|
31
|
-
link =
|
31
|
+
link = make_link(Hpricot(%Q{<a href="/foo" onclick="#{method_javascript_function(:delete)}">foo</a>}).at('a'))
|
32
32
|
link.href.should == '/foo'
|
33
33
|
link.method.should == :delete
|
34
34
|
end
|
35
35
|
|
36
36
|
it "parses link tags with text" do
|
37
|
-
link =
|
37
|
+
link = make_link(Hpricot('<link href="/bar">bar</a>').at('link'))
|
38
38
|
link.href.should == '/bar'
|
39
39
|
link.method.should == :get
|
40
40
|
end
|
41
41
|
|
42
42
|
it "parses link tags without text" do
|
43
|
-
link =
|
43
|
+
link = make_link(Hpricot('<link href="/bar" />').at('link'))
|
44
44
|
link.href.should == '/bar'
|
45
45
|
link.method.should == :get
|
46
46
|
end
|
47
47
|
|
48
|
+
it 'remembers link referrer if there is one' do
|
49
|
+
link = make_link('/url', stub_everything, '/some-referrer')
|
50
|
+
link.referrer.should == '/some-referrer'
|
51
|
+
end
|
52
|
+
|
53
|
+
it "does two things when crawled: follow, log, and handle" do
|
54
|
+
crawler = Relevance::Tarantula::Crawler.new
|
55
|
+
link = make_link('/foo', crawler)
|
56
|
+
|
57
|
+
response = stub(:code => "200")
|
58
|
+
crawler.expects(:follow).returns(response)
|
59
|
+
link.expects(:log)
|
60
|
+
crawler.expects(:handle_link_results)
|
61
|
+
|
62
|
+
link.crawl
|
63
|
+
end
|
64
|
+
|
48
65
|
# method_javascript_function needs this method
|
49
66
|
def protect_against_forgery?
|
50
67
|
false
|
@@ -41,7 +41,7 @@ describe "Relevance::Tarantula::RailsIntegrationProxy" do
|
|
41
41
|
it "adds a response accessor to its delegate rails integration test" do
|
42
42
|
o = Object.new
|
43
43
|
Relevance::Tarantula::RailsIntegrationProxy.new(o)
|
44
|
-
o.methods(false).sort.should == %w{response response=}
|
44
|
+
o.methods(false).map(&:to_s).sort.should == %w{response response=}
|
45
45
|
end
|
46
46
|
|
47
47
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class Relevance::Tarantula::BasicAttack
|
2
|
+
ATTRS = [:name, :output, :description]
|
3
|
+
|
4
|
+
attr_reader *ATTRS
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@name = "Tarantula Basic Fuzzer"
|
8
|
+
@output = nil
|
9
|
+
@description = "Supplies purely random but simplistically generated form input."
|
10
|
+
end
|
11
|
+
|
12
|
+
def ==(other)
|
13
|
+
Relevance::Tarantula::BasicAttack === other && ATTRS.all? { |attr| send(attr) == other.send(attr)}
|
14
|
+
end
|
15
|
+
|
16
|
+
def input(input_field)
|
17
|
+
case input_field['name']
|
18
|
+
when /amount/ then random_int
|
19
|
+
when /_id$/ then random_whole_number
|
20
|
+
when /uploaded_data/ then nil
|
21
|
+
when nil then input['value']
|
22
|
+
else
|
23
|
+
random_int
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def big_number
|
28
|
+
10000 # arbitrary
|
29
|
+
end
|
30
|
+
|
31
|
+
def random_int
|
32
|
+
rand(big_number) - (big_number/2)
|
33
|
+
end
|
34
|
+
|
35
|
+
def random_whole_number
|
36
|
+
rand(big_number)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
@@ -10,7 +10,7 @@ class Relevance::Tarantula::Crawler
|
|
10
10
|
class CrawlTimeout < RuntimeError; end
|
11
11
|
|
12
12
|
attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
|
13
|
-
:reporters, :
|
13
|
+
:reporters, :crawl_queue, :links_queued,
|
14
14
|
:form_signatures_queued, :max_url_length, :response_code_handler,
|
15
15
|
:times_to_crawl, :fuzzers, :test_name, :crawl_timeout
|
16
16
|
attr_reader :transform_url_patterns, :referrers, :failures, :successes, :crawl_start_times, :crawl_end_times
|
@@ -22,8 +22,7 @@ class Relevance::Tarantula::Crawler
|
|
22
22
|
@handlers = [@response_code_handler = Result]
|
23
23
|
@links_queued = Set.new
|
24
24
|
@form_signatures_queued = Set.new
|
25
|
-
@
|
26
|
-
@forms_to_crawl = []
|
25
|
+
@crawl_queue = []
|
27
26
|
@crawl_start_times, @crawl_end_times = [], []
|
28
27
|
@crawl_timeout = 20.minutes
|
29
28
|
@referrers = {}
|
@@ -39,6 +38,8 @@ class Relevance::Tarantula::Crawler
|
|
39
38
|
@decoder = HTMLEntities.new
|
40
39
|
@times_to_crawl = 1
|
41
40
|
@fuzzers = [Relevance::Tarantula::FormSubmission]
|
41
|
+
|
42
|
+
@stdout_tty = $stdout.tty?
|
42
43
|
end
|
43
44
|
|
44
45
|
def method_missing(meth, *args)
|
@@ -55,14 +56,14 @@ class Relevance::Tarantula::Crawler
|
|
55
56
|
def crawl(url = "/")
|
56
57
|
orig_links_queued = @links_queued.dup
|
57
58
|
orig_form_signatures_queued = @form_signatures_queued.dup
|
58
|
-
|
59
|
-
orig_forms_to_crawl = @forms_to_crawl.dup
|
59
|
+
orig_crawl_queue = @crawl_queue.dup
|
60
60
|
@times_to_crawl.times do |num|
|
61
61
|
queue_link url
|
62
62
|
|
63
63
|
begin
|
64
64
|
do_crawl num
|
65
65
|
rescue CrawlTimeout => e
|
66
|
+
puts
|
66
67
|
puts e.message
|
67
68
|
end
|
68
69
|
|
@@ -71,8 +72,7 @@ class Relevance::Tarantula::Crawler
|
|
71
72
|
if num + 1 < @times_to_crawl
|
72
73
|
@links_queued = orig_links_queued
|
73
74
|
@form_signatures_queued = orig_form_signatures_queued
|
74
|
-
@
|
75
|
-
@forms_to_crawl = orig_forms_to_crawl
|
75
|
+
@crawl_queue = orig_crawl_queue
|
76
76
|
@referrers = {}
|
77
77
|
end
|
78
78
|
end
|
@@ -83,23 +83,20 @@ class Relevance::Tarantula::Crawler
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def finished?
|
86
|
-
@
|
86
|
+
@crawl_queue.empty?
|
87
87
|
end
|
88
88
|
|
89
89
|
def do_crawl(number)
|
90
90
|
while (!finished?)
|
91
91
|
@crawl_start_times << Time.now
|
92
|
-
|
93
|
-
crawl_queued_forms(number)
|
92
|
+
crawl_the_queue(number)
|
94
93
|
@crawl_end_times << Time.now
|
95
94
|
end
|
96
95
|
end
|
97
96
|
|
98
|
-
def
|
99
|
-
while (
|
100
|
-
|
101
|
-
log "Response #{response.code} for #{link}"
|
102
|
-
handle_link_results(link, response)
|
97
|
+
def crawl_the_queue(number = 0)
|
98
|
+
while (request = @crawl_queue.pop)
|
99
|
+
request.crawl
|
103
100
|
blip(number)
|
104
101
|
end
|
105
102
|
end
|
@@ -110,15 +107,10 @@ class Relevance::Tarantula::Crawler
|
|
110
107
|
end
|
111
108
|
end
|
112
109
|
|
113
|
-
def handle_link_results(link,
|
110
|
+
def handle_link_results(link, result)
|
114
111
|
handlers.each do |h|
|
115
112
|
begin
|
116
|
-
save_result h.handle(
|
117
|
-
:url => link.href,
|
118
|
-
:response => response,
|
119
|
-
:log => grab_log!,
|
120
|
-
:referrer => referrers[link],
|
121
|
-
:test_name => test_name).freeze)
|
113
|
+
save_result h.handle(result)
|
122
114
|
rescue Exception => e
|
123
115
|
log "error handling #{link} #{e.message}"
|
124
116
|
# TODO: pass to results
|
@@ -126,23 +118,14 @@ class Relevance::Tarantula::Crawler
|
|
126
118
|
end
|
127
119
|
end
|
128
120
|
|
129
|
-
def
|
130
|
-
|
131
|
-
log "Response #{response.code} for #{form}"
|
132
|
-
response
|
133
|
-
rescue ActiveRecord::RecordNotFound => e
|
134
|
-
log "Skipping #{form.action}, presumed ok that record is missing"
|
135
|
-
Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
|
136
|
-
end
|
137
|
-
|
138
|
-
def crawl_queued_forms(number = 0)
|
139
|
-
while (form = @forms_to_crawl.pop)
|
140
|
-
response = crawl_form(form)
|
141
|
-
handle_form_results(form, response)
|
142
|
-
blip(number)
|
143
|
-
end
|
121
|
+
def follow(method, url, data=nil)
|
122
|
+
proxy.send(method, url, data)
|
144
123
|
end
|
145
124
|
|
125
|
+
def submit(method, action, data)
|
126
|
+
proxy.send(method, action, data)
|
127
|
+
end
|
128
|
+
|
146
129
|
def elasped_time_for_pass(num)
|
147
130
|
Time.now - crawl_start_times[num]
|
148
131
|
end
|
@@ -150,6 +133,14 @@ class Relevance::Tarantula::Crawler
|
|
150
133
|
def grab_log!
|
151
134
|
@log_grabber && @log_grabber.grab!
|
152
135
|
end
|
136
|
+
|
137
|
+
def make_result(options)
|
138
|
+
defaults = {
|
139
|
+
:log => grab_log!,
|
140
|
+
:test_name => test_name
|
141
|
+
}
|
142
|
+
Result.new(defaults.merge(options)).freeze
|
143
|
+
end
|
153
144
|
|
154
145
|
def handle_form_results(form, response)
|
155
146
|
handlers.each do |h|
|
@@ -193,23 +184,21 @@ class Relevance::Tarantula::Crawler
|
|
193
184
|
end
|
194
185
|
|
195
186
|
def queue_link(dest, referrer = nil)
|
196
|
-
dest = Link.new(dest)
|
197
|
-
dest.href = transform_url(dest.href)
|
187
|
+
dest = Link.new(dest, self, referrer)
|
198
188
|
return if should_skip_link?(dest)
|
199
|
-
@
|
200
|
-
@links_to_crawl << dest
|
189
|
+
@crawl_queue << dest
|
201
190
|
@links_queued << dest
|
202
191
|
dest
|
203
192
|
end
|
204
193
|
|
205
194
|
def queue_form(form, referrer = nil)
|
206
195
|
fuzzers.each do |fuzzer|
|
207
|
-
fuzzer.mutate(Form.new(form)).each do |fs|
|
208
|
-
# fs = fuzzer.new(Form.new(form))
|
196
|
+
fuzzer.mutate(Form.new(form, self, referrer)).each do |fs|
|
197
|
+
# fs = fuzzer.new(Form.new(form, self, referrer))
|
209
198
|
fs.action = transform_url(fs.action)
|
210
199
|
return if should_skip_form_submission?(fs)
|
211
200
|
@referrers[fs.action] = referrer if referrer
|
212
|
-
@
|
201
|
+
@crawl_queue << fs
|
213
202
|
@form_signatures_queued << fs.signature
|
214
203
|
end
|
215
204
|
end
|
@@ -234,6 +223,7 @@ class Relevance::Tarantula::Crawler
|
|
234
223
|
end
|
235
224
|
|
236
225
|
def report_results
|
226
|
+
puts "Crawled #{total_links_count} links and forms."
|
237
227
|
generate_reports
|
238
228
|
end
|
239
229
|
|
@@ -242,7 +232,7 @@ class Relevance::Tarantula::Crawler
|
|
242
232
|
end
|
243
233
|
|
244
234
|
def links_remaining_count
|
245
|
-
@
|
235
|
+
@crawl_queue.size
|
246
236
|
end
|
247
237
|
|
248
238
|
def links_completed_count
|
@@ -251,7 +241,7 @@ class Relevance::Tarantula::Crawler
|
|
251
241
|
|
252
242
|
def blip(number = 0)
|
253
243
|
unless verbose
|
254
|
-
print "\r #{links_completed_count} of #{total_links_count} links completed "
|
244
|
+
print "\r #{links_completed_count} of #{total_links_count} links completed " if @stdout_tty
|
255
245
|
timeout_if_too_long(number)
|
256
246
|
end
|
257
247
|
end
|
@@ -25,18 +25,18 @@
|
|
25
25
|
</ul>
|
26
26
|
|
27
27
|
<div id="report">
|
28
|
-
<h3>Detail of <%= short_description %> <em>Generated on <%= Time.now %></em></h3>
|
29
|
-
<p><b>Resource</b> <a href="<%= full_url %>"><%= full_url %></a></p>
|
30
|
-
<p><b>Response</b> <span class="r<%= code.first %>"><%= code %></span></p>
|
31
|
-
<p><b>Referrer</b> <%= referrer || "" %></p>
|
28
|
+
<h3>Detail of <%= result.short_description %> <em>Generated on <%= Time.now %></em></h3>
|
29
|
+
<p><b>Resource</b> <a href="<%= result.full_url %>"><%= result.full_url %></a></p>
|
30
|
+
<p><b>Response</b> <span class="r<%= result.code.first %>"><%= result.code %></span></p>
|
31
|
+
<p><b>Referrer</b> <%= result.referrer || "" %></p>
|
32
32
|
|
33
33
|
<table class="output">
|
34
34
|
<tbody>
|
35
35
|
<tr>
|
36
36
|
<th colspan="2"># Data</th>
|
37
37
|
</tr>
|
38
|
-
<% if data %>
|
39
|
-
<%= wrap_in_line_number_table_row(data) %>
|
38
|
+
<% if result.data %>
|
39
|
+
<%= result.wrap_in_line_number_table_row(result.data) %>
|
40
40
|
<% else %>
|
41
41
|
<tr>
|
42
42
|
<td colspan="2">No Data</td>
|
@@ -50,8 +50,8 @@
|
|
50
50
|
<tr>
|
51
51
|
<th colspan="2"># Body</th>
|
52
52
|
</tr>
|
53
|
-
<% if body %>
|
54
|
-
<%= wrap_in_line_number_table_row(body) %>
|
53
|
+
<% if result.body %>
|
54
|
+
<%= result.wrap_in_line_number_table_row(result.body) %>
|
55
55
|
<% else %>
|
56
56
|
<tr>
|
57
57
|
<td colspan="2">No Body</td>
|
@@ -65,8 +65,8 @@
|
|
65
65
|
<tr>
|
66
66
|
<th colspan="2"># Log</th>
|
67
67
|
</tr>
|
68
|
-
<% if log %>
|
69
|
-
<%= wrap_in_line_number_table_row(log) {|line| wrap_stack_trace_line(line)} %>
|
68
|
+
<% if result.log %>
|
69
|
+
<%= result.wrap_in_line_number_table_row(result.log) {|line| wrap_stack_trace_line(line)} %>
|
70
70
|
<% else %>
|
71
71
|
<tr>
|
72
72
|
<td colspan="2">No Log</td>
|
@@ -78,4 +78,4 @@
|
|
78
78
|
</div>
|
79
79
|
</div>
|
80
80
|
</body>
|
81
|
-
</html>
|
81
|
+
</html>
|
@@ -2,8 +2,10 @@ class Relevance::Tarantula::Form
|
|
2
2
|
extend Forwardable
|
3
3
|
def_delegators("@tag", :search)
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
attr_accessor :crawler, :referrer
|
6
|
+
|
7
|
+
def initialize(tag, crawler, referrer)
|
8
|
+
@tag, @crawler, @referrer = tag, crawler, referrer
|
7
9
|
end
|
8
10
|
|
9
11
|
def action
|
@@ -1,25 +1,58 @@
|
|
1
1
|
class Relevance::Tarantula::FormSubmission
|
2
|
-
|
3
|
-
|
2
|
+
include Relevance::Tarantula
|
3
|
+
attr_accessor :method, :action, :data, :attack, :form
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def attacks
|
7
|
+
# normalize from hash input to Attack
|
8
|
+
@attacks = @attacks.map do |val|
|
9
|
+
Hash === val ? Relevance::Tarantula::Attack.new(val) : val
|
10
|
+
end
|
11
|
+
@attacks
|
12
|
+
end
|
13
|
+
def attacks=(atts)
|
14
|
+
# normalize from hash input to Attack
|
15
|
+
@attacks = atts.map do |val|
|
16
|
+
Hash === val ? Relevance::Tarantula::Attack.new(val) : val
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
@attacks = [Relevance::Tarantula::BasicAttack.new]
|
21
|
+
|
22
|
+
def initialize(form, attack = Relevance::Tarantula::BasicAttack.new)
|
23
|
+
@form = form
|
4
24
|
@method = form.method
|
5
25
|
@action = form.action
|
26
|
+
@attack = attack
|
6
27
|
@data = mutate_selects(form).merge(mutate_text_areas(form)).merge(mutate_inputs(form))
|
7
28
|
end
|
8
29
|
|
30
|
+
def crawl
|
31
|
+
begin
|
32
|
+
response = form.crawler.submit(method, action, data)
|
33
|
+
log "Response #{response.code} for #{self}"
|
34
|
+
rescue ActiveRecord::RecordNotFound => e
|
35
|
+
log "Skipping #{action}, presumed ok that record is missing"
|
36
|
+
response = Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
|
37
|
+
end
|
38
|
+
form.crawler.handle_form_results(self, response)
|
39
|
+
response
|
40
|
+
end
|
41
|
+
|
9
42
|
def self.mutate(form)
|
10
|
-
|
43
|
+
attacks.map{|attack| new(form, attack)} if attacks
|
11
44
|
end
|
12
|
-
|
45
|
+
|
13
46
|
def to_s
|
14
|
-
"#{action} #{method} #{data.inspect}"
|
47
|
+
"#{action} #{method} #{data.inspect} #{attack.inspect}"
|
15
48
|
end
|
16
|
-
|
49
|
+
|
17
50
|
# a form's signature is what makes it unique (e.g. action + fields)
|
18
51
|
# used to keep track of which forms we have submitted already
|
19
52
|
def signature
|
20
|
-
[action, data.keys.sort]
|
53
|
+
[action, data.keys.sort, attack.name]
|
21
54
|
end
|
22
|
-
|
55
|
+
|
23
56
|
def create_random_data_for(form, tag_selector)
|
24
57
|
form.search(tag_selector).inject({}) do |form_args, input|
|
25
58
|
# TODO: test
|
@@ -35,36 +68,21 @@ class Relevance::Tarantula::FormSubmission
|
|
35
68
|
def mutate_text_areas(form)
|
36
69
|
create_random_data_for(form, 'textarea')
|
37
70
|
end
|
38
|
-
|
71
|
+
|
39
72
|
def mutate_selects(form)
|
40
73
|
form.search('select').inject({}) do |form_args, select|
|
41
74
|
options = select.search('option')
|
42
75
|
option = options.rand
|
43
|
-
form_args[select['name']] = option['value']
|
76
|
+
form_args[select['name']] = option['value']
|
44
77
|
form_args
|
45
78
|
end
|
46
79
|
end
|
47
|
-
|
80
|
+
|
48
81
|
def random_data(input)
|
49
82
|
case input['name']
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
when /^_method$/ : input['value']
|
54
|
-
when nil : input['value']
|
55
|
-
else random_int
|
83
|
+
when /^_method$/ then input['value']
|
84
|
+
else
|
85
|
+
attack.input(input)
|
56
86
|
end
|
57
87
|
end
|
58
|
-
|
59
|
-
def big_number
|
60
|
-
10000 # arbitrary
|
61
|
-
end
|
62
|
-
|
63
|
-
def random_int
|
64
|
-
rand(big_number) - (big_number/2)
|
65
|
-
end
|
66
|
-
|
67
|
-
def random_whole_number
|
68
|
-
rand(big_number)
|
69
|
-
end
|
70
88
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
class Relevance::Tarantula::Link
|
2
|
+
include Relevance::Tarantula
|
2
3
|
|
3
4
|
class << self
|
4
5
|
include ActionView::Helpers::UrlHelper
|
@@ -17,18 +18,33 @@ class Relevance::Tarantula::Link
|
|
17
18
|
METHOD_REGEXPS[m] = /#{s}/
|
18
19
|
end
|
19
20
|
|
20
|
-
attr_accessor :href
|
21
|
+
attr_accessor :href, :crawler, :referrer
|
21
22
|
|
22
|
-
def initialize(link)
|
23
|
+
def initialize(link, crawler, referrer)
|
24
|
+
@crawler, @referrer = crawler, referrer
|
25
|
+
|
23
26
|
if String === link || link.nil?
|
24
|
-
@href = link
|
27
|
+
@href = transform_url(link)
|
25
28
|
@method = :get
|
26
29
|
else # should be a tag
|
27
|
-
@href = link['href'] ? link['href'].downcase : nil
|
30
|
+
@href = link['href'] ? transform_url(link['href'].downcase) : nil
|
28
31
|
@tag = link
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
35
|
+
def crawl
|
36
|
+
response = crawler.follow(method, href)
|
37
|
+
log "Response #{response.code} for #{self}"
|
38
|
+
crawler.handle_link_results(self, make_result(response))
|
39
|
+
end
|
40
|
+
|
41
|
+
def make_result(response)
|
42
|
+
crawler.make_result(:method => method,
|
43
|
+
:url => href,
|
44
|
+
:response => response,
|
45
|
+
:referrer => referrer)
|
46
|
+
end
|
47
|
+
|
32
48
|
def method
|
33
49
|
@method ||= begin
|
34
50
|
(@tag &&
|
@@ -39,6 +55,10 @@ class Relevance::Tarantula::Link
|
|
39
55
|
end
|
40
56
|
end
|
41
57
|
|
58
|
+
def transform_url(link)
|
59
|
+
crawler.transform_url(link)
|
60
|
+
end
|
61
|
+
|
42
62
|
def ==(obj)
|
43
63
|
obj.respond_to?(:href) && obj.respond_to?(:method) &&
|
44
64
|
self.href.to_s == obj.href.to_s && self.method.to_s == obj.method.to_s
|
@@ -41,7 +41,7 @@ class Relevance::Tarantula::RailsIntegrationProxy
|
|
41
41
|
if response.code == '404'
|
42
42
|
if File.exist?(static_content_path(url))
|
43
43
|
case ext = File.extension(url)
|
44
|
-
when /html|te?xt|css|js|jpe?g|gif|psd|png|eps|pdf/
|
44
|
+
when /html|te?xt|css|js|jpe?g|gif|psd|png|eps|pdf|ico/
|
45
45
|
response.body = static_content_file(url)
|
46
46
|
response.headers["type"] = "text/#{ext}" # readable as response.content_type
|
47
47
|
response.meta.attr_accessor :code
|
@@ -11,33 +11,43 @@ class Relevance::Tarantula::Result
|
|
11
11
|
self.instance_variable_set("@#{k}", v)
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
14
15
|
def short_description
|
15
16
|
[method,url].join(" ")
|
16
17
|
end
|
18
|
+
|
17
19
|
def sequence_number
|
18
20
|
@sequence_number ||= (self.class.next_number += 1)
|
19
21
|
end
|
22
|
+
|
20
23
|
def file_name
|
21
24
|
"#{sequence_number}.html"
|
22
25
|
end
|
26
|
+
|
23
27
|
def code
|
24
28
|
response && response.code
|
25
29
|
end
|
30
|
+
|
26
31
|
def body
|
27
32
|
response && response.body
|
28
33
|
end
|
34
|
+
|
29
35
|
def full_url
|
30
36
|
"#{DEFAULT_LOCALHOST}#{url}"
|
31
37
|
end
|
38
|
+
|
32
39
|
ALLOW_NNN_FOR = /^allow_(\d\d\d)_for$/
|
40
|
+
|
33
41
|
class << self
|
34
42
|
attr_accessor :next_number
|
43
|
+
|
35
44
|
def handle(result)
|
36
45
|
retval = result.dup
|
37
46
|
retval.success = successful?(result.response) || can_skip_error?(result)
|
38
47
|
retval.description = "Bad HTTP Response" unless retval.success
|
39
48
|
retval
|
40
49
|
end
|
50
|
+
|
41
51
|
def success_codes
|
42
52
|
%w{200 201 302 401}
|
43
53
|
end
|
@@ -51,16 +61,17 @@ class Relevance::Tarantula::Result
|
|
51
61
|
return false unless coll
|
52
62
|
coll.any? {|item| item === result.url}
|
53
63
|
end
|
64
|
+
|
54
65
|
def successful?(response)
|
55
66
|
success_codes.member?(response.code)
|
56
67
|
end
|
68
|
+
|
57
69
|
def method_missing(meth, *args)
|
58
70
|
super unless ALLOW_NNN_FOR =~ meth.to_s
|
59
71
|
(allow_errors_for[$1] ||= []).push(*args)
|
60
72
|
end
|
61
73
|
end
|
74
|
+
|
62
75
|
self.allow_errors_for = {}
|
63
76
|
self.next_number = 0
|
64
|
-
|
65
|
-
|
66
|
-
end
|
77
|
+
end
|
data/lib/relevance/tarantula.rb
CHANGED
@@ -49,10 +49,10 @@ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "log_gra
|
|
49
49
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "invalid_html_handler"))
|
50
50
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "transform"))
|
51
51
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "crawler"))
|
52
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "basic_attack"))
|
52
53
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form"))
|
53
54
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form_submission"))
|
54
55
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack"))
|
55
|
-
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_form_submission"))
|
56
56
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_handler"))
|
57
57
|
require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "link"))
|
58
58
|
|