tarantula-rails3 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/CHANGELOG +49 -0
  2. data/LICENSE +20 -0
  3. data/README.rdoc +161 -0
  4. data/Rakefile +83 -0
  5. data/VERSION.yml +4 -0
  6. data/examples/example_helper.rb +57 -0
  7. data/examples/relevance/core_extensions/ellipsize_example.rb +19 -0
  8. data/examples/relevance/core_extensions/file_example.rb +8 -0
  9. data/examples/relevance/core_extensions/response_example.rb +29 -0
  10. data/examples/relevance/core_extensions/test_case_example.rb +20 -0
  11. data/examples/relevance/tarantula/attack_handler_example.rb +29 -0
  12. data/examples/relevance/tarantula/basic_attack_example.rb +12 -0
  13. data/examples/relevance/tarantula/crawler_example.rb +375 -0
  14. data/examples/relevance/tarantula/form_example.rb +50 -0
  15. data/examples/relevance/tarantula/form_submission_example.rb +171 -0
  16. data/examples/relevance/tarantula/html_document_handler_example.rb +43 -0
  17. data/examples/relevance/tarantula/html_report_helper_example.rb +46 -0
  18. data/examples/relevance/tarantula/html_reporter_example.rb +82 -0
  19. data/examples/relevance/tarantula/invalid_html_handler_example.rb +33 -0
  20. data/examples/relevance/tarantula/io_reporter_example.rb +11 -0
  21. data/examples/relevance/tarantula/link_example.rb +84 -0
  22. data/examples/relevance/tarantula/log_grabber_example.rb +26 -0
  23. data/examples/relevance/tarantula/rails_integration_proxy_example.rb +88 -0
  24. data/examples/relevance/tarantula/result_example.rb +85 -0
  25. data/examples/relevance/tarantula/tidy_handler_example.rb +58 -0
  26. data/examples/relevance/tarantula/transform_example.rb +20 -0
  27. data/examples/relevance/tarantula_example.rb +23 -0
  28. data/laf/images/header_bg.jpg +0 -0
  29. data/laf/images/logo.png +0 -0
  30. data/laf/images/tagline.png +0 -0
  31. data/laf/javascripts/jquery-1.2.3.js +3408 -0
  32. data/laf/javascripts/jquery-ui-tabs.js +890 -0
  33. data/laf/javascripts/jquery.tablesorter.js +861 -0
  34. data/laf/javascripts/tarantula.js +10 -0
  35. data/laf/stylesheets/tarantula.css +346 -0
  36. data/lib/relevance/core_extensions/ellipsize.rb +34 -0
  37. data/lib/relevance/core_extensions/file.rb +9 -0
  38. data/lib/relevance/core_extensions/metaclass.rb +78 -0
  39. data/lib/relevance/core_extensions/response.rb +9 -0
  40. data/lib/relevance/core_extensions/string_chars_fix.rb +11 -0
  41. data/lib/relevance/core_extensions/test_case.rb +19 -0
  42. data/lib/relevance/tarantula.rb +58 -0
  43. data/lib/relevance/tarantula/attack.rb +18 -0
  44. data/lib/relevance/tarantula/attack_handler.rb +37 -0
  45. data/lib/relevance/tarantula/basic_attack.rb +40 -0
  46. data/lib/relevance/tarantula/crawler.rb +254 -0
  47. data/lib/relevance/tarantula/detail.html.erb +81 -0
  48. data/lib/relevance/tarantula/form.rb +23 -0
  49. data/lib/relevance/tarantula/form_submission.rb +88 -0
  50. data/lib/relevance/tarantula/html_document_handler.rb +36 -0
  51. data/lib/relevance/tarantula/html_report_helper.rb +39 -0
  52. data/lib/relevance/tarantula/html_reporter.rb +105 -0
  53. data/lib/relevance/tarantula/index.html.erb +37 -0
  54. data/lib/relevance/tarantula/invalid_html_handler.rb +18 -0
  55. data/lib/relevance/tarantula/io_reporter.rb +34 -0
  56. data/lib/relevance/tarantula/link.rb +94 -0
  57. data/lib/relevance/tarantula/log_grabber.rb +16 -0
  58. data/lib/relevance/tarantula/rails_integration_proxy.rb +68 -0
  59. data/lib/relevance/tarantula/recording.rb +12 -0
  60. data/lib/relevance/tarantula/response.rb +13 -0
  61. data/lib/relevance/tarantula/result.rb +77 -0
  62. data/lib/relevance/tarantula/test_report.html.erb +32 -0
  63. data/lib/relevance/tarantula/tidy_handler.rb +32 -0
  64. data/lib/relevance/tarantula/transform.rb +17 -0
  65. data/lib/relevance/tasks/tarantula_tasks.rake +42 -0
  66. data/lib/tarantula-rails3.rb +9 -0
  67. data/template/tarantula_test.rb +22 -0
  68. metadata +164 -0
@@ -0,0 +1,29 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
+
3
+ describe "Relevance::Tarantula::AttackHandler" do
4
+ before do
5
+ @handler = Relevance::Tarantula::AttackHandler.new
6
+ attack = Relevance::Tarantula::Attack.new({:name => 'foo_name', :input => 'foo_code', :output => '<bad>'})
7
+ @handler.stubs(:attacks).returns([attack])
8
+ end
9
+
10
+ it "lets safe documents through" do
11
+ result = @handler.handle(Relevance::Tarantula::Result.new(:response => stub(:html? => true, :body => '<a href="/foo">good</a>')))
12
+ result.should == nil
13
+ end
14
+
15
+ it "detects the supplied code" do
16
+ result = @handler.handle(Relevance::Tarantula::Result.new(:response => stub(:html? => true, :body => '<a href="/foo"><bad></a>')))
17
+ result.success.should == false
18
+ end
19
+ end
20
+
21
+ describe "Attacks without an output specified" do
22
+ it "never matches anything" do
23
+ handler = Relevance::Tarantula::AttackHandler.new
24
+ attack = Relevance::Tarantula::Attack.new({:name => 'foo_name', :input => 'foo_code'})
25
+ Relevance::Tarantula::FormSubmission.stubs(:attacks).returns([attack])
26
+ result = handler.handle(Relevance::Tarantula::Result.new(:response => stub(:html? => true, :body => '<a href="/foo">good</a>')))
27
+ result.should == nil
28
+ end
29
+ end
@@ -0,0 +1,12 @@
1
+ require File.dirname(__FILE__) + "/../../example_helper.rb"
2
+
3
+ describe Relevance::Tarantula::BasicAttack do
4
+ before do
5
+ @attack = Relevance::Tarantula::BasicAttack.new
6
+ end
7
+
8
+ it "can generate a random whole number" do
9
+ @attack.random_whole_number.should >= 0
10
+ Fixnum.should === @attack.random_whole_number
11
+ end
12
+ end
@@ -0,0 +1,375 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
+
3
+ describe Relevance::Tarantula::Crawler do
4
+
5
+ describe "transform_url" do
6
+
7
+ before { @crawler = Relevance::Tarantula::Crawler.new }
8
+
9
+ it "de-obfuscates unicode obfuscated urls" do
10
+ obfuscated_mailto = "&#109;&#97;&#105;&#108;&#116;&#111;&#58;"
11
+ @crawler.transform_url(obfuscated_mailto).should == "mailto:"
12
+ end
13
+
14
+ it "strips the trailing name portion of a link" do
15
+ @crawler.transform_url('http://host/path#name').should == 'http://host/path'
16
+ end
17
+ end
18
+
19
+
20
+ describe "log grabbing" do
21
+
22
+ it "returns nil if no grabber is specified" do
23
+ crawler = Relevance::Tarantula::Crawler.new
24
+ crawler.grab_log!.should == nil
25
+ end
26
+
27
+ it "returns grabber.grab if grabber is specified" do
28
+ crawler = Relevance::Tarantula::Crawler.new
29
+ crawler.log_grabber = stub(:grab! => "fake log entry")
30
+ crawler.grab_log!.should == "fake log entry"
31
+ end
32
+
33
+ end
34
+
35
+ describe "interrupt" do
36
+
37
+ it 'catches interruption and writes the partial report' do
38
+ crawler = Relevance::Tarantula::Crawler.new
39
+ crawler.stubs(:queue_link)
40
+ crawler.stubs(:do_crawl).raises(Interrupt)
41
+ crawler.expects(:report_results)
42
+ $stderr.expects(:puts).with("CTRL-C")
43
+ crawler.crawl
44
+ end
45
+
46
+ end
47
+
48
+ describe 'handle_form_results' do
49
+
50
+ it 'captures the result values (bugfix)' do
51
+ response = stub_everything
52
+ result_args = {:url => :action_stub,
53
+ :data => 'nil',
54
+ :response => response,
55
+ :referrer => :action_stub,
56
+ :log => nil,
57
+ :method => :stub_method,
58
+ :test_name => nil}
59
+ result = Relevance::Tarantula::Result.new(result_args)
60
+ Relevance::Tarantula::Result.expects(:new).with(result_args).returns(result)
61
+ crawler = Relevance::Tarantula::Crawler.new
62
+ crawler.handle_form_results(stub_everything(:method => :stub_method, :action => :action_stub),
63
+ response)
64
+ end
65
+
66
+ end
67
+
68
+ describe "crawl" do
69
+
70
+ it 'queues the first url, does crawl, and then reports results' do
71
+ crawler = Relevance::Tarantula::Crawler.new
72
+ crawler.expects(:queue_link).with("/foobar")
73
+ crawler.expects(:do_crawl)
74
+ crawler.expects(:report_results)
75
+ crawler.crawl("/foobar")
76
+ end
77
+
78
+ it 'reports results even if the crawl fails' do
79
+ crawler = Relevance::Tarantula::Crawler.new
80
+ crawler.expects(:do_crawl).raises(RuntimeError)
81
+ crawler.expects(:report_results)
82
+ lambda {crawler.crawl('/')}.should raise_error(RuntimeError)
83
+ end
84
+
85
+ end
86
+
87
+ describe "queueing" do
88
+
89
+ it 'queues and remembers links' do
90
+ crawler = Relevance::Tarantula::Crawler.new
91
+ crawler.expects(:transform_url).with("/url").returns("/transformed").at_least_once
92
+ crawler.queue_link("/url")
93
+ # TODO not sure this is the best way to test this anymore; relying on result of transform in both actual and expected
94
+ crawler.crawl_queue.should == [make_link("/url", crawler)]
95
+ crawler.links_queued.should == Set.new([make_link("/url", crawler)])
96
+ end
97
+
98
+ it 'queues and remembers forms' do
99
+ crawler = Relevance::Tarantula::Crawler.new
100
+ form = Hpricot('<form action="/action" method="post"/>').at('form')
101
+ signature = Relevance::Tarantula::FormSubmission.new(make_form(form)).signature
102
+ crawler.queue_form(form)
103
+ crawler.crawl_queue.size.should == 1
104
+ crawler.form_signatures_queued.should == Set.new([signature])
105
+ end
106
+
107
+ it "passes link, self, and referrer when creating Link objects" do
108
+ crawler = Relevance::Tarantula::Crawler.new
109
+ Relevance::Tarantula::Link.expects(:new).with('/url', crawler, '/some-referrer')
110
+ crawler.stubs(:should_skip_link?)
111
+ crawler.queue_link('/url', '/some-referrer')
112
+ end
113
+
114
+ end
115
+
116
+ describe "crawling" do
117
+ before do
118
+ @form = Hpricot('<form action="/action" method="post"/>').at('form')
119
+ end
120
+
121
+ it "does two things with each link: crawl and blip" do
122
+ crawler = Relevance::Tarantula::Crawler.new
123
+ crawler.proxy = stub
124
+ crawler.crawl_queue = links = [make_link("/foo1", crawler), make_link("/foo2", crawler)]
125
+
126
+ links.each{|link| link.expects(:crawl)}
127
+ crawler.expects(:blip).times(2)
128
+
129
+ crawler.crawl_the_queue
130
+ crawler.crawl_queue.should == []
131
+ end
132
+
133
+ it "invokes queued forms, logs responses, and calls handlers" do
134
+ crawler = Relevance::Tarantula::Crawler.new
135
+ crawler.crawl_queue << Relevance::Tarantula::FormSubmission.new(make_form(@form, crawler))
136
+ crawler.expects(:submit).returns(stub(:code => "200"))
137
+ crawler.expects(:blip)
138
+ crawler.crawl_the_queue
139
+ end
140
+
141
+ # TODO this is the same as "resets to the initial links/forms ..." and doesn't appear to test anything related to a timeout.
142
+ it "breaks out early if a timeout is set"
143
+
144
+ it "resets to the initial links/forms on subsequent crawls when times_to_crawl > 1" do
145
+ crawler = Relevance::Tarantula::Crawler.new
146
+ stub_puts_and_print(crawler)
147
+ response = stub(:code => "200")
148
+ crawler.queue_link('/foo')
149
+ crawler.expects(:follow).returns(response).times(4) # (stub and "/") * 2
150
+ crawler.queue_form(@form)
151
+ crawler.expects(:submit).returns(response).times(2)
152
+ crawler.expects(:blip).times(6)
153
+ crawler.times_to_crawl = 2
154
+ crawler.crawl
155
+ end
156
+
157
+ end
158
+
159
+ describe "report_results" do
160
+ it "prints a final summary line" do
161
+ crawler = Relevance::Tarantula::Crawler.new
162
+ crawler.stubs(:generate_reports)
163
+ crawler.expects(:total_links_count).returns(42)
164
+ crawler.expects(:puts).with("Crawled 42 links and forms.")
165
+ crawler.report_results
166
+ end
167
+
168
+ it "delegates to generate_reports" do
169
+ crawler = Relevance::Tarantula::Crawler.new
170
+ crawler.stubs(:puts)
171
+ crawler.expects(:generate_reports)
172
+ crawler.report_results
173
+ end
174
+
175
+ end
176
+
177
+ describe "blip" do
178
+
179
+ it "blips the current progress if !verbose" do
180
+ $stdout.stubs(:tty?).returns(true)
181
+ crawler = Relevance::Tarantula::Crawler.new
182
+ crawler.stubs(:verbose).returns false
183
+ crawler.stubs(:timeout_if_too_long)
184
+ crawler.expects(:print).with("\r 0 of 0 links completed ")
185
+ crawler.blip
186
+ end
187
+
188
+ it "suppresses the blip message if not writing to a tty" do
189
+ $stdout.stubs(:tty?).returns(false)
190
+ crawler = Relevance::Tarantula::Crawler.new
191
+ crawler.stubs(:verbose).returns false
192
+ crawler.stubs(:timeout_if_too_long)
193
+ crawler.expects(:print).never
194
+ crawler.blip
195
+ end
196
+
197
+ it "blips nothing if verbose" do
198
+ $stdout.stubs(:tty?).returns(true)
199
+ crawler = Relevance::Tarantula::Crawler.new
200
+ crawler.stubs(:verbose).returns true
201
+ crawler.expects(:print).never
202
+ crawler.blip
203
+ end
204
+
205
+ end
206
+
207
+ describe "finished?" do
208
+
209
+ it "is finished when the links and forms are crawled" do
210
+ crawler = Relevance::Tarantula::Crawler.new
211
+ crawler.finished?.should == true
212
+ end
213
+
214
+ it "isn't finished when links remain" do
215
+ crawler = Relevance::Tarantula::Crawler.new
216
+ crawler.crawl_queue = [:stub_link]
217
+ crawler.finished?.should == false
218
+ end
219
+
220
+ it "isn't finished when forms remain" do
221
+ crawler = Relevance::Tarantula::Crawler.new
222
+ crawler.crawl_queue = [:stub_form]
223
+ crawler.finished?.should == false
224
+ end
225
+
226
+ end
227
+
228
+ it "crawls links and forms again and again until finished?==true" do
229
+ crawler = Relevance::Tarantula::Crawler.new
230
+ crawler.expects(:finished?).times(3).returns(false, false, true)
231
+ crawler.expects(:crawl_the_queue).times(2)
232
+ crawler.do_crawl(1)
233
+ end
234
+
235
+ it "asks each reporter to write its report in report_dir" do
236
+ crawler = Relevance::Tarantula::Crawler.new
237
+ crawler.stubs(:report_dir).returns(test_output_dir)
238
+ reporter = stub_everything
239
+ reporter.expects(:report)
240
+ reporter.expects(:finish_report)
241
+ crawler.reporters = [reporter]
242
+ crawler.save_result stub(:code => "404", :url => "/uh-oh")
243
+ crawler.generate_reports
244
+ end
245
+
246
+ it "builds a report dir relative to rails root" do
247
+ crawler = Relevance::Tarantula::Crawler.new
248
+ crawler.expects(:rails_root).returns("faux_rails_root")
249
+ crawler.report_dir.should == "faux_rails_root/tmp/tarantula"
250
+ end
251
+
252
+ it "skips links that are already queued" do
253
+ crawler = Relevance::Tarantula::Crawler.new
254
+ crawler.should_skip_link?(make_link("/foo")).should == false
255
+ crawler.queue_link("/foo").should == make_link("/foo")
256
+ crawler.should_skip_link?(make_link("/foo")).should == true
257
+ end
258
+
259
+ describe "link skipping" do
260
+
261
+ before { @crawler = Relevance::Tarantula::Crawler.new }
262
+
263
+ it "skips links that are too long" do
264
+ @crawler.should_skip_link?(make_link("/foo")).should == false
265
+ @crawler.max_url_length = 2
266
+ @crawler.expects(:log).with("Skipping long url /foo")
267
+ @crawler.should_skip_link?(make_link("/foo")).should == true
268
+ end
269
+
270
+ it "skips outbound links (those that begin with http)" do
271
+ @crawler.expects(:log).with("Skipping http-anything")
272
+ @crawler.should_skip_link?(make_link("http-anything")).should == true
273
+ end
274
+
275
+ it "skips javascript links (those that begin with javascript)" do
276
+ @crawler.expects(:log).with("Skipping javascript-anything")
277
+ @crawler.should_skip_link?(make_link("javascript-anything")).should == true
278
+ end
279
+
280
+ it "skips mailto links (those that begin with http)" do
281
+ @crawler.expects(:log).with("Skipping mailto-anything")
282
+ @crawler.should_skip_link?(make_link("mailto-anything")).should == true
283
+ end
284
+
285
+ it 'skips blank links' do
286
+ @crawler.queue_link(nil)
287
+ @crawler.crawl_queue.should == []
288
+ @crawler.queue_link("")
289
+ @crawler.crawl_queue.should == []
290
+ end
291
+
292
+ it "logs and skips links that match a pattern" do
293
+ @crawler.expects(:log).with("Skipping /the-red-button")
294
+ @crawler.skip_uri_patterns << /red-button/
295
+ @crawler.queue_link("/blue-button").should == make_link("/blue-button")
296
+ @crawler.queue_link("/the-red-button").should == nil
297
+ end
298
+
299
+ it "logs and skips form submissions that match a pattern" do
300
+ @crawler.expects(:log).with("Skipping /reset-password-form")
301
+ @crawler.skip_uri_patterns << /reset-password/
302
+ fs = stub_everything(:action => "/reset-password-form")
303
+ @crawler.should_skip_form_submission?(fs).should == true
304
+ end
305
+ end
306
+
307
+ describe "allow_nnn_for" do
308
+
309
+ it "installs result as a response_code_handler" do
310
+ crawler = Relevance::Tarantula::Crawler.new
311
+ crawler.response_code_handler.should == Relevance::Tarantula::Result
312
+ end
313
+
314
+ it "delegates to the response_code_handler" do
315
+ crawler = Relevance::Tarantula::Crawler.new
316
+ (response_code_handler = mock).expects(:allow_404_for).with(:stub)
317
+ crawler.response_code_handler = response_code_handler
318
+ crawler.allow_404_for(:stub)
319
+ end
320
+
321
+ it "chains up to super for method_missing" do
322
+ crawler = Relevance::Tarantula::Crawler.new
323
+ lambda{crawler.foo}.should raise_error(NoMethodError)
324
+ end
325
+
326
+ end
327
+
328
+ describe "timeouts" do
329
+
330
+ it "sets start and end times for a single crawl" do
331
+ start_time = Time.parse("March 1st, 2008 10:00am")
332
+ end_time = Time.parse("March 1st, 2008 10:10am")
333
+ Time.stubs(:now).returns(start_time, end_time)
334
+
335
+ crawler = Relevance::Tarantula::Crawler.new
336
+ stub_puts_and_print(crawler)
337
+ crawler.proxy = stub_everything(:get => response = stub(:code => "200"))
338
+ crawler.crawl
339
+ crawler.crawl_start_times.first.should == start_time
340
+ crawler.crawl_end_times.first.should == end_time
341
+ end
342
+
343
+ it "has elasped time for a crawl" do
344
+ start_time = Time.parse("March 1st, 2008 10:00am")
345
+ elasped_time_check = Time.parse("March 1st, 2008, 10:10:00am")
346
+ Time.stubs(:now).returns(start_time, elasped_time_check)
347
+
348
+ crawler = Relevance::Tarantula::Crawler.new
349
+ stub_puts_and_print(crawler)
350
+ crawler.proxy = stub_everything(:get => response = stub(:code => "200"))
351
+ crawler.crawl
352
+ crawler.elasped_time_for_pass(0).should == 600.seconds
353
+ end
354
+
355
+ it "raises out of the crawl if elasped time is greater then the crawl timeout" do
356
+ start_time = Time.parse("March 1st, 2008 10:00am")
357
+ elasped_time_check = Time.parse("March 1st, 2008, 10:35:00am")
358
+ Time.stubs(:now).returns(start_time, elasped_time_check)
359
+
360
+ crawler = Relevance::Tarantula::Crawler.new
361
+ crawler.crawl_timeout = 5.minutes
362
+
363
+ crawler.crawl_queue = [stub(:href => "/foo1", :method => :get), stub(:href => "/foo2", :method => :get)]
364
+ crawler.proxy = stub
365
+ crawler.proxy.stubs(:get).returns(response = stub(:code => "200"))
366
+
367
+ stub_puts_and_print(crawler)
368
+ lambda {
369
+ crawler.do_crawl(0)
370
+ }.should raise_error
371
+ end
372
+
373
+ end
374
+
375
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "example_helper.rb"))
2
+
3
+ describe "Relevance::Tarantula::Form large example" do
4
+ before do
5
+ @tag = Hpricot(<<END)
6
+ <form action="/session" method="post">
7
+ <input name="authenticity_token" type="hidden" value="1be0d07c6e13669a87b8f52a3c7e1d1ffa77708d" />
8
+ <input id="email" name="email" size="30" type="text" />
9
+ <input id="password" name="password" size="30" type="password" />
10
+ <input id="remember_me" name="remember_me" type="checkbox" value="1" />
11
+ <input name="commit" type="submit" value="Log in" />
12
+ </form>
13
+ END
14
+ @form = make_form(@tag.at('form'))
15
+ end
16
+
17
+ it "has an action" do
18
+ @form.action.should == "/session"
19
+ end
20
+
21
+ it "has a method" do
22
+ @form.method.should == "post"
23
+ end
24
+
25
+ end
26
+
27
+ describe "A Relevance::Tarantula::Form" do
28
+ it "defaults method to 'get'" do
29
+ @tag = Hpricot("<form/>")
30
+ @form = make_form(@tag.at('form'))
31
+ @form.method.should == 'get'
32
+ end
33
+ end
34
+
35
+ describe "A Relevance::Tarantula::Form with a hacked _method" do
36
+ before do
37
+ @tag = Hpricot(<<END)
38
+ <form action="/foo">
39
+ <input name="authenticity_token" type="hidden" value="1be0d07c6e13669a87b8f52a3c7e1d1ffa77708d" />
40
+ <input id="_method" name="_method" size="30" type="text" value="PUT"/>
41
+ </form>
42
+ END
43
+ @form = make_form(@tag.at('form'))
44
+ end
45
+
46
+ it "has a method" do
47
+ @form.method.should == "put"
48
+ end
49
+
50
+ end