tarantula-rails3 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/CHANGELOG +49 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +161 -0
  4. data/Rakefile +83 -0
  5. data/VERSION.yml +4 -0
  6. data/examples/example_helper.rb +57 -0
  7. data/examples/relevance/core_extensions/ellipsize_example.rb +19 -0
  8. data/examples/relevance/core_extensions/file_example.rb +8 -0
  9. data/examples/relevance/core_extensions/response_example.rb +29 -0
  10. data/examples/relevance/core_extensions/test_case_example.rb +20 -0
  11. data/examples/relevance/tarantula/attack_handler_example.rb +29 -0
  12. data/examples/relevance/tarantula/basic_attack_example.rb +12 -0
  13. data/examples/relevance/tarantula/crawler_example.rb +375 -0
  14. data/examples/relevance/tarantula/form_example.rb +50 -0
  15. data/examples/relevance/tarantula/form_submission_example.rb +171 -0
  16. data/examples/relevance/tarantula/html_document_handler_example.rb +43 -0
  17. data/examples/relevance/tarantula/html_report_helper_example.rb +46 -0
  18. data/examples/relevance/tarantula/html_reporter_example.rb +82 -0
  19. data/examples/relevance/tarantula/invalid_html_handler_example.rb +33 -0
  20. data/examples/relevance/tarantula/io_reporter_example.rb +11 -0
  21. data/examples/relevance/tarantula/link_example.rb +84 -0
  22. data/examples/relevance/tarantula/log_grabber_example.rb +26 -0
  23. data/examples/relevance/tarantula/rails_integration_proxy_example.rb +88 -0
  24. data/examples/relevance/tarantula/result_example.rb +85 -0
  25. data/examples/relevance/tarantula/tidy_handler_example.rb +58 -0
  26. data/examples/relevance/tarantula/transform_example.rb +20 -0
  27. data/examples/relevance/tarantula_example.rb +23 -0
  28. data/laf/images/header_bg.jpg +0 -0
  29. data/laf/images/logo.png +0 -0
  30. data/laf/images/tagline.png +0 -0
  31. data/laf/javascripts/jquery-1.2.3.js +3408 -0
  32. data/laf/javascripts/jquery-ui-tabs.js +890 -0
  33. data/laf/javascripts/jquery.tablesorter.js +861 -0
  34. data/laf/javascripts/tarantula.js +10 -0
  35. data/laf/stylesheets/tarantula.css +346 -0
  36. data/lib/relevance/core_extensions/ellipsize.rb +34 -0
  37. data/lib/relevance/core_extensions/file.rb +9 -0
  38. data/lib/relevance/core_extensions/metaclass.rb +78 -0
  39. data/lib/relevance/core_extensions/response.rb +9 -0
  40. data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
  41. data/lib/relevance/core_extensions/test_case.rb +19 -0
  42. data/lib/relevance/tarantula.rb +58 -0
  43. data/lib/relevance/tarantula/attack.rb +18 -0
  44. data/lib/relevance/tarantula/attack_handler.rb +37 -0
  45. data/lib/relevance/tarantula/basic_attack.rb +40 -0
  46. data/lib/relevance/tarantula/crawler.rb +254 -0
  47. data/lib/relevance/tarantula/detail.html.erb +81 -0
  48. data/lib/relevance/tarantula/form.rb +23 -0
  49. data/lib/relevance/tarantula/form_submission.rb +88 -0
  50. data/lib/relevance/tarantula/html_document_handler.rb +36 -0
  51. data/lib/relevance/tarantula/html_report_helper.rb +39 -0
  52. data/lib/relevance/tarantula/html_reporter.rb +105 -0
  53. data/lib/relevance/tarantula/index.html.erb +37 -0
  54. data/lib/relevance/tarantula/invalid_html_handler.rb +18 -0
  55. data/lib/relevance/tarantula/io_reporter.rb +34 -0
  56. data/lib/relevance/tarantula/link.rb +94 -0
  57. data/lib/relevance/tarantula/log_grabber.rb +16 -0
  58. data/lib/relevance/tarantula/rails_integration_proxy.rb +68 -0
  59. data/lib/relevance/tarantula/recording.rb +12 -0
  60. data/lib/relevance/tarantula/response.rb +13 -0
  61. data/lib/relevance/tarantula/result.rb +77 -0
  62. data/lib/relevance/tarantula/test_report.html.erb +32 -0
  63. data/lib/relevance/tarantula/tidy_handler.rb +32 -0
  64. data/lib/relevance/tarantula/transform.rb +17 -0
  65. data/lib/relevance/tasks/tarantula_tasks.rake +42 -0
  66. data/lib/tarantula-rails3.rb +9 -0
  67. data/template/tarantula_test.rb +22 -0
  68. metadata +164 -0
@@ -0,0 +1,29 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
+
3
+ describe "Relevance::Tarantula::AttackHandler" do
4
+ before do
5
+ @handler = Relevance::Tarantula::AttackHandler.new
6
+ attack = Relevance::Tarantula::Attack.new({:name => 'foo_name', :input => 'foo_code', :output => '<bad>'})
7
+ @handler.stubs(:attacks).returns([attack])
8
+ end
9
+
10
+ it "lets safe documents through" do
11
+ result = @handler.handle(Relevance::Tarantula::Result.new(:response => stub(:html? => true, :body => '<a href="/foo">good</a>')))
12
+ result.should == nil
13
+ end
14
+
15
+ it "detects the supplied code" do
16
+ result = @handler.handle(Relevance::Tarantula::Result.new(:response => stub(:html? => true, :body => '<a href="/foo"><bad></a>')))
17
+ result.success.should == false
18
+ end
19
+ end
20
+
21
+ describe "Attacks without an output specified" do
22
+ it "never matches anything" do
23
+ handler = Relevance::Tarantula::AttackHandler.new
24
+ attack = Relevance::Tarantula::Attack.new({:name => 'foo_name', :input => 'foo_code'})
25
+ Relevance::Tarantula::FormSubmission.stubs(:attacks).returns([attack])
26
+ result = handler.handle(Relevance::Tarantula::Result.new(:response => stub(:html? => true, :body => '<a href="/foo">good</a>')))
27
+ result.should == nil
28
+ end
29
+ end
@@ -0,0 +1,12 @@
1
+ require File.dirname(__FILE__) + "/../../example_helper.rb"
2
+
3
+ describe Relevance::Tarantula::BasicAttack do
4
+ before do
5
+ @attack = Relevance::Tarantula::BasicAttack.new
6
+ end
7
+
8
+ it "can generate a random whole number" do
9
+ @attack.random_whole_number.should >= 0
10
+ Fixnum.should === @attack.random_whole_number
11
+ end
12
+ end
@@ -0,0 +1,375 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
+
3
+ describe Relevance::Tarantula::Crawler do
4
+
5
+ describe "transform_url" do
6
+
7
+ before { @crawler = Relevance::Tarantula::Crawler.new }
8
+
9
+ it "de-obfuscates unicode obfuscated urls" do
10
+ obfuscated_mailto = "&#109;&#97;&#105;&#108;&#116;&#111;&#58;"
11
+ @crawler.transform_url(obfuscated_mailto).should == "mailto:"
12
+ end
13
+
14
+ it "strips the trailing name portion of a link" do
15
+ @crawler.transform_url('http://host/path#name').should == 'http://host/path'
16
+ end
17
+ end
18
+
19
+
20
+ describe "log grabbing" do
21
+
22
+ it "returns nil if no grabber is specified" do
23
+ crawler = Relevance::Tarantula::Crawler.new
24
+ crawler.grab_log!.should == nil
25
+ end
26
+
27
+ it "returns grabber.grab if grabber is specified" do
28
+ crawler = Relevance::Tarantula::Crawler.new
29
+ crawler.log_grabber = stub(:grab! => "fake log entry")
30
+ crawler.grab_log!.should == "fake log entry"
31
+ end
32
+
33
+ end
34
+
35
+ describe "interrupt" do
36
+
37
+ it 'catches interruption and writes the partial report' do
38
+ crawler = Relevance::Tarantula::Crawler.new
39
+ crawler.stubs(:queue_link)
40
+ crawler.stubs(:do_crawl).raises(Interrupt)
41
+ crawler.expects(:report_results)
42
+ $stderr.expects(:puts).with("CTRL-C")
43
+ crawler.crawl
44
+ end
45
+
46
+ end
47
+
48
+ describe 'handle_form_results' do
49
+
50
+ it 'captures the result values (bugfix)' do
51
+ response = stub_everything
52
+ result_args = {:url => :action_stub,
53
+ :data => 'nil',
54
+ :response => response,
55
+ :referrer => :action_stub,
56
+ :log => nil,
57
+ :method => :stub_method,
58
+ :test_name => nil}
59
+ result = Relevance::Tarantula::Result.new(result_args)
60
+ Relevance::Tarantula::Result.expects(:new).with(result_args).returns(result)
61
+ crawler = Relevance::Tarantula::Crawler.new
62
+ crawler.handle_form_results(stub_everything(:method => :stub_method, :action => :action_stub),
63
+ response)
64
+ end
65
+
66
+ end
67
+
68
+ describe "crawl" do
69
+
70
+ it 'queues the first url, does crawl, and then reports results' do
71
+ crawler = Relevance::Tarantula::Crawler.new
72
+ crawler.expects(:queue_link).with("/foobar")
73
+ crawler.expects(:do_crawl)
74
+ crawler.expects(:report_results)
75
+ crawler.crawl("/foobar")
76
+ end
77
+
78
+ it 'reports results even if the crawl fails' do
79
+ crawler = Relevance::Tarantula::Crawler.new
80
+ crawler.expects(:do_crawl).raises(RuntimeError)
81
+ crawler.expects(:report_results)
82
+ lambda {crawler.crawl('/')}.should raise_error(RuntimeError)
83
+ end
84
+
85
+ end
86
+
87
+ describe "queueing" do
88
+
89
+ it 'queues and remembers links' do
90
+ crawler = Relevance::Tarantula::Crawler.new
91
+ crawler.expects(:transform_url).with("/url").returns("/transformed").at_least_once
92
+ crawler.queue_link("/url")
93
+ # TODO not sure this is the best way to test this anymore; relying on result of transform in both actual and expected
94
+ crawler.crawl_queue.should == [make_link("/url", crawler)]
95
+ crawler.links_queued.should == Set.new([make_link("/url", crawler)])
96
+ end
97
+
98
+ it 'queues and remembers forms' do
99
+ crawler = Relevance::Tarantula::Crawler.new
100
+ form = Hpricot('<form action="/action" method="post"/>').at('form')
101
+ signature = Relevance::Tarantula::FormSubmission.new(make_form(form)).signature
102
+ crawler.queue_form(form)
103
+ crawler.crawl_queue.size.should == 1
104
+ crawler.form_signatures_queued.should == Set.new([signature])
105
+ end
106
+
107
+ it "passes link, self, and referrer when creating Link objects" do
108
+ crawler = Relevance::Tarantula::Crawler.new
109
+ Relevance::Tarantula::Link.expects(:new).with('/url', crawler, '/some-referrer')
110
+ crawler.stubs(:should_skip_link?)
111
+ crawler.queue_link('/url', '/some-referrer')
112
+ end
113
+
114
+ end
115
+
116
+ describe "crawling" do
117
+ before do
118
+ @form = Hpricot('<form action="/action" method="post"/>').at('form')
119
+ end
120
+
121
+ it "does two things with each link: crawl and blip" do
122
+ crawler = Relevance::Tarantula::Crawler.new
123
+ crawler.proxy = stub
124
+ crawler.crawl_queue = links = [make_link("/foo1", crawler), make_link("/foo2", crawler)]
125
+
126
+ links.each{|link| link.expects(:crawl)}
127
+ crawler.expects(:blip).times(2)
128
+
129
+ crawler.crawl_the_queue
130
+ crawler.crawl_queue.should == []
131
+ end
132
+
133
+ it "invokes queued forms, logs responses, and calls handlers" do
134
+ crawler = Relevance::Tarantula::Crawler.new
135
+ crawler.crawl_queue << Relevance::Tarantula::FormSubmission.new(make_form(@form, crawler))
136
+ crawler.expects(:submit).returns(stub(:code => "200"))
137
+ crawler.expects(:blip)
138
+ crawler.crawl_the_queue
139
+ end
140
+
141
+ # TODO this is the same as "resets to the initial links/forms ..." and doesn't appear to test anything related to a timeout.
142
+ it "breaks out early if a timeout is set"
143
+
144
+ it "resets to the initial links/forms on subsequent crawls when times_to_crawl > 1" do
145
+ crawler = Relevance::Tarantula::Crawler.new
146
+ stub_puts_and_print(crawler)
147
+ response = stub(:code => "200")
148
+ crawler.queue_link('/foo')
149
+ crawler.expects(:follow).returns(response).times(4) # (stub and "/") * 2
150
+ crawler.queue_form(@form)
151
+ crawler.expects(:submit).returns(response).times(2)
152
+ crawler.expects(:blip).times(6)
153
+ crawler.times_to_crawl = 2
154
+ crawler.crawl
155
+ end
156
+
157
+ end
158
+
159
+ describe "report_results" do
160
+ it "prints a final summary line" do
161
+ crawler = Relevance::Tarantula::Crawler.new
162
+ crawler.stubs(:generate_reports)
163
+ crawler.expects(:total_links_count).returns(42)
164
+ crawler.expects(:puts).with("Crawled 42 links and forms.")
165
+ crawler.report_results
166
+ end
167
+
168
+ it "delegates to generate_reports" do
169
+ crawler = Relevance::Tarantula::Crawler.new
170
+ crawler.stubs(:puts)
171
+ crawler.expects(:generate_reports)
172
+ crawler.report_results
173
+ end
174
+
175
+ end
176
+
177
+ describe "blip" do
178
+
179
+ it "blips the current progress if !verbose" do
180
+ $stdout.stubs(:tty?).returns(true)
181
+ crawler = Relevance::Tarantula::Crawler.new
182
+ crawler.stubs(:verbose).returns false
183
+ crawler.stubs(:timeout_if_too_long)
184
+ crawler.expects(:print).with("\r 0 of 0 links completed ")
185
+ crawler.blip
186
+ end
187
+
188
+ it "suppresses the blip message if not writing to a tty" do
189
+ $stdout.stubs(:tty?).returns(false)
190
+ crawler = Relevance::Tarantula::Crawler.new
191
+ crawler.stubs(:verbose).returns false
192
+ crawler.stubs(:timeout_if_too_long)
193
+ crawler.expects(:print).never
194
+ crawler.blip
195
+ end
196
+
197
+ it "blips nothing if verbose" do
198
+ $stdout.stubs(:tty?).returns(true)
199
+ crawler = Relevance::Tarantula::Crawler.new
200
+ crawler.stubs(:verbose).returns true
201
+ crawler.expects(:print).never
202
+ crawler.blip
203
+ end
204
+
205
+ end
206
+
207
+ describe "finished?" do
208
+
209
+ it "is finished when the links and forms are crawled" do
210
+ crawler = Relevance::Tarantula::Crawler.new
211
+ crawler.finished?.should == true
212
+ end
213
+
214
+ it "isn't finished when links remain" do
215
+ crawler = Relevance::Tarantula::Crawler.new
216
+ crawler.crawl_queue = [:stub_link]
217
+ crawler.finished?.should == false
218
+ end
219
+
220
+ it "isn't finished when forms remain" do
221
+ crawler = Relevance::Tarantula::Crawler.new
222
+ crawler.crawl_queue = [:stub_form]
223
+ crawler.finished?.should == false
224
+ end
225
+
226
+ end
227
+
228
+ it "crawls links and forms again and again until finished?==true" do
229
+ crawler = Relevance::Tarantula::Crawler.new
230
+ crawler.expects(:finished?).times(3).returns(false, false, true)
231
+ crawler.expects(:crawl_the_queue).times(2)
232
+ crawler.do_crawl(1)
233
+ end
234
+
235
+ it "asks each reporter to write its report in report_dir" do
236
+ crawler = Relevance::Tarantula::Crawler.new
237
+ crawler.stubs(:report_dir).returns(test_output_dir)
238
+ reporter = stub_everything
239
+ reporter.expects(:report)
240
+ reporter.expects(:finish_report)
241
+ crawler.reporters = [reporter]
242
+ crawler.save_result stub(:code => "404", :url => "/uh-oh")
243
+ crawler.generate_reports
244
+ end
245
+
246
+ it "builds a report dir relative to rails root" do
247
+ crawler = Relevance::Tarantula::Crawler.new
248
+ crawler.expects(:rails_root).returns("faux_rails_root")
249
+ crawler.report_dir.should == "faux_rails_root/tmp/tarantula"
250
+ end
251
+
252
+ it "skips links that are already queued" do
253
+ crawler = Relevance::Tarantula::Crawler.new
254
+ crawler.should_skip_link?(make_link("/foo")).should == false
255
+ crawler.queue_link("/foo").should == make_link("/foo")
256
+ crawler.should_skip_link?(make_link("/foo")).should == true
257
+ end
258
+
259
+ describe "link skipping" do
260
+
261
+ before { @crawler = Relevance::Tarantula::Crawler.new }
262
+
263
+ it "skips links that are too long" do
264
+ @crawler.should_skip_link?(make_link("/foo")).should == false
265
+ @crawler.max_url_length = 2
266
+ @crawler.expects(:log).with("Skipping long url /foo")
267
+ @crawler.should_skip_link?(make_link("/foo")).should == true
268
+ end
269
+
270
+ it "skips outbound links (those that begin with http)" do
271
+ @crawler.expects(:log).with("Skipping http-anything")
272
+ @crawler.should_skip_link?(make_link("http-anything")).should == true
273
+ end
274
+
275
+ it "skips javascript links (those that begin with javascript)" do
276
+ @crawler.expects(:log).with("Skipping javascript-anything")
277
+ @crawler.should_skip_link?(make_link("javascript-anything")).should == true
278
+ end
279
+
280
+ it "skips mailto links (those that begin with http)" do
281
+ @crawler.expects(:log).with("Skipping mailto-anything")
282
+ @crawler.should_skip_link?(make_link("mailto-anything")).should == true
283
+ end
284
+
285
+ it 'skips blank links' do
286
+ @crawler.queue_link(nil)
287
+ @crawler.crawl_queue.should == []
288
+ @crawler.queue_link("")
289
+ @crawler.crawl_queue.should == []
290
+ end
291
+
292
+ it "logs and skips links that match a pattern" do
293
+ @crawler.expects(:log).with("Skipping /the-red-button")
294
+ @crawler.skip_uri_patterns << /red-button/
295
+ @crawler.queue_link("/blue-button").should == make_link("/blue-button")
296
+ @crawler.queue_link("/the-red-button").should == nil
297
+ end
298
+
299
+ it "logs and skips form submissions that match a pattern" do
300
+ @crawler.expects(:log).with("Skipping /reset-password-form")
301
+ @crawler.skip_uri_patterns << /reset-password/
302
+ fs = stub_everything(:action => "/reset-password-form")
303
+ @crawler.should_skip_form_submission?(fs).should == true
304
+ end
305
+ end
306
+
307
+ describe "allow_nnn_for" do
308
+
309
+ it "installs result as a response_code_handler" do
310
+ crawler = Relevance::Tarantula::Crawler.new
311
+ crawler.response_code_handler.should == Relevance::Tarantula::Result
312
+ end
313
+
314
+ it "delegates to the response_code_handler" do
315
+ crawler = Relevance::Tarantula::Crawler.new
316
+ (response_code_handler = mock).expects(:allow_404_for).with(:stub)
317
+ crawler.response_code_handler = response_code_handler
318
+ crawler.allow_404_for(:stub)
319
+ end
320
+
321
+ it "chains up to super for method_missing" do
322
+ crawler = Relevance::Tarantula::Crawler.new
323
+ lambda{crawler.foo}.should raise_error(NoMethodError)
324
+ end
325
+
326
+ end
327
+
328
+ describe "timeouts" do
329
+
330
+ it "sets start and end times for a single crawl" do
331
+ start_time = Time.parse("March 1st, 2008 10:00am")
332
+ end_time = Time.parse("March 1st, 2008 10:10am")
333
+ Time.stubs(:now).returns(start_time, end_time)
334
+
335
+ crawler = Relevance::Tarantula::Crawler.new
336
+ stub_puts_and_print(crawler)
337
+ crawler.proxy = stub_everything(:get => response = stub(:code => "200"))
338
+ crawler.crawl
339
+ crawler.crawl_start_times.first.should == start_time
340
+ crawler.crawl_end_times.first.should == end_time
341
+ end
342
+
343
+ it "has elasped time for a crawl" do
344
+ start_time = Time.parse("March 1st, 2008 10:00am")
345
+ elasped_time_check = Time.parse("March 1st, 2008, 10:10:00am")
346
+ Time.stubs(:now).returns(start_time, elasped_time_check)
347
+
348
+ crawler = Relevance::Tarantula::Crawler.new
349
+ stub_puts_and_print(crawler)
350
+ crawler.proxy = stub_everything(:get => response = stub(:code => "200"))
351
+ crawler.crawl
352
+ crawler.elasped_time_for_pass(0).should == 600.seconds
353
+ end
354
+
355
+ it "raises out of the crawl if elasped time is greater then the crawl timeout" do
356
+ start_time = Time.parse("March 1st, 2008 10:00am")
357
+ elasped_time_check = Time.parse("March 1st, 2008, 10:35:00am")
358
+ Time.stubs(:now).returns(start_time, elasped_time_check)
359
+
360
+ crawler = Relevance::Tarantula::Crawler.new
361
+ crawler.crawl_timeout = 5.minutes
362
+
363
+ crawler.crawl_queue = [stub(:href => "/foo1", :method => :get), stub(:href => "/foo2", :method => :get)]
364
+ crawler.proxy = stub
365
+ crawler.proxy.stubs(:get).returns(response = stub(:code => "200"))
366
+
367
+ stub_puts_and_print(crawler)
368
+ lambda {
369
+ crawler.do_crawl(0)
370
+ }.should raise_error
371
+ end
372
+
373
+ end
374
+
375
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
+
3
+ describe "Relevance::Tarantula::Form large example" do
4
+ before do
5
+ @tag = Hpricot(<<END)
6
+ <form action="/session" method="post">
7
+ <input name="authenticity_token" type="hidden" value="1be0d07c6e13669a87b8f52a3c7e1d1ffa77708d" />
8
+ <input id="email" name="email" size="30" type="text" />
9
+ <input id="password" name="password" size="30" type="password" />
10
+ <input id="remember_me" name="remember_me" type="checkbox" value="1" />
11
+ <input name="commit" type="submit" value="Log in" />
12
+ </form>
13
+ END
14
+ @form = make_form(@tag.at('form'))
15
+ end
16
+
17
+ it "has an action" do
18
+ @form.action.should == "/session"
19
+ end
20
+
21
+ it "has a method" do
22
+ @form.method.should == "post"
23
+ end
24
+
25
+ end
26
+
27
+ describe "A Relevance::Tarantula::Form" do
28
+ it "defaults method to 'get'" do
29
+ @tag = Hpricot("<form/>")
30
+ @form = make_form(@tag.at('form'))
31
+ @form.method.should == 'get'
32
+ end
33
+ end
34
+
35
+ describe "A Relevance::Tarantula::Form with a hacked _method" do
36
+ before do
37
+ @tag = Hpricot(<<END)
38
+ <form action="/foo">
39
+ <input name="authenticity_token" type="hidden" value="1be0d07c6e13669a87b8f52a3c7e1d1ffa77708d" />
40
+ <input id="_method" name="_method" size="30" type="text" value="PUT"/>
41
+ </form>
42
+ END
43
+ @form = make_form(@tag.at('form'))
44
+ end
45
+
46
+ it "has a method" do
47
+ @form.method.should == "put"
48
+ end
49
+
50
+ end