tarantula 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. data/CHANGELOG +2 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +80 -0
  5. data/init.rb +1 -0
  6. data/install.rb +1 -0
  7. data/laf/images/background.jpg +0 -0
  8. data/laf/images/relevance-os-logo.gif +0 -0
  9. data/laf/images/tab.png +0 -0
  10. data/laf/images/table-sort.gif +0 -0
  11. data/laf/images/tarantula-sprites.png +0 -0
  12. data/laf/javascripts/jquery-1.2.3.js +3408 -0
  13. data/laf/javascripts/jquery-ui-tabs.js +890 -0
  14. data/laf/javascripts/jquery.tablesorter.js +861 -0
  15. data/laf/javascripts/tarantula.js +10 -0
  16. data/laf/stylesheets/tarantula.css +638 -0
  17. data/laf/stylesheets/ui.tabs.css +113 -0
  18. data/lib/relevance/core_extensions/ellipsize.rb +34 -0
  19. data/lib/relevance/core_extensions/file.rb +9 -0
  20. data/lib/relevance/core_extensions/response.rb +9 -0
  21. data/lib/relevance/core_extensions/test_case.rb +12 -0
  22. data/lib/relevance/tarantula.rb +63 -0
  23. data/lib/relevance/tarantula/attack.rb +15 -0
  24. data/lib/relevance/tarantula/attack_form_submission.rb +75 -0
  25. data/lib/relevance/tarantula/attack_handler.rb +37 -0
  26. data/lib/relevance/tarantula/crawler.rb +240 -0
  27. data/lib/relevance/tarantula/detail.html.erb +77 -0
  28. data/lib/relevance/tarantula/form.rb +21 -0
  29. data/lib/relevance/tarantula/form_submission.rb +70 -0
  30. data/lib/relevance/tarantula/html_document_handler.rb +36 -0
  31. data/lib/relevance/tarantula/html_report_helper.rb +56 -0
  32. data/lib/relevance/tarantula/html_reporter.rb +105 -0
  33. data/lib/relevance/tarantula/index.html.erb +48 -0
  34. data/lib/relevance/tarantula/invalid_html_handler.rb +18 -0
  35. data/lib/relevance/tarantula/io_reporter.rb +34 -0
  36. data/lib/relevance/tarantula/link.rb +56 -0
  37. data/lib/relevance/tarantula/log_grabber.rb +16 -0
  38. data/lib/relevance/tarantula/rails_integration_proxy.rb +70 -0
  39. data/lib/relevance/tarantula/recording.rb +12 -0
  40. data/lib/relevance/tarantula/response.rb +13 -0
  41. data/lib/relevance/tarantula/result.rb +66 -0
  42. data/lib/relevance/tarantula/test_report.html.erb +34 -0
  43. data/lib/relevance/tarantula/tidy_handler.rb +32 -0
  44. data/lib/relevance/tarantula/transform.rb +17 -0
  45. data/manifest.txt +117 -0
  46. data/rails/init.rb +1 -0
  47. data/tarantula.gemspec +48 -0
  48. data/tasks/tarantula_tasks.rake +34 -0
  49. data/template/tarantula_test.rb +12 -0
  50. data/test/relevance/core_extensions/ellipsize_test.rb +19 -0
  51. data/test/relevance/core_extensions/file_test.rb +8 -0
  52. data/test/relevance/core_extensions/response_test.rb +29 -0
  53. data/test/relevance/core_extensions/test_case_test.rb +16 -0
  54. data/test/relevance/tarantula/attack_form_submission_test.rb +79 -0
  55. data/test/relevance/tarantula/attack_handler_test.rb +29 -0
  56. data/test/relevance/tarantula/crawler_test.rb +296 -0
  57. data/test/relevance/tarantula/form_submission_test.rb +71 -0
  58. data/test/relevance/tarantula/form_test.rb +50 -0
  59. data/test/relevance/tarantula/html_document_handler_test.rb +43 -0
  60. data/test/relevance/tarantula/html_report_helper_test.rb +47 -0
  61. data/test/relevance/tarantula/html_reporter_test.rb +82 -0
  62. data/test/relevance/tarantula/invalid_html_handler_test.rb +33 -0
  63. data/test/relevance/tarantula/io_reporter_test.rb +11 -0
  64. data/test/relevance/tarantula/link_test.rb +61 -0
  65. data/test/relevance/tarantula/log_grabber_test.rb +26 -0
  66. data/test/relevance/tarantula/rails_integration_proxy_test.rb +94 -0
  67. data/test/relevance/tarantula/result_test.rb +85 -0
  68. data/test/relevance/tarantula/tidy_handler_test.rb +58 -0
  69. data/test/relevance/tarantula/transform_test.rb +21 -0
  70. data/test/relevance/tarantula_test.rb +23 -0
  71. data/test/test_helper.rb +34 -0
  72. data/tmp/test_output/images/background.jpg +0 -0
  73. data/tmp/test_output/images/relevance-os-logo.gif +0 -0
  74. data/tmp/test_output/images/tab.png +0 -0
  75. data/tmp/test_output/images/table-sort.gif +0 -0
  76. data/tmp/test_output/images/tarantula-sprites.png +0 -0
  77. data/tmp/test_output/index.html +255 -0
  78. data/tmp/test_output/javascripts/jquery-1.2.3.js +3408 -0
  79. data/tmp/test_output/javascripts/jquery-ui-tabs.js +890 -0
  80. data/tmp/test_output/javascripts/jquery.tablesorter.js +861 -0
  81. data/tmp/test_output/javascripts/tarantula.js +10 -0
  82. data/tmp/test_output/stylesheets/tarantula.css +638 -0
  83. data/tmp/test_output/stylesheets/ui.tabs.css +113 -0
  84. data/tmp/test_output/test_user_pages/1.html +71 -0
  85. data/tmp/test_output/test_user_pages/10.html +71 -0
  86. data/tmp/test_output/test_user_pages/11.html +71 -0
  87. data/tmp/test_output/test_user_pages/12.html +71 -0
  88. data/tmp/test_output/test_user_pages/13.html +71 -0
  89. data/tmp/test_output/test_user_pages/14.html +71 -0
  90. data/tmp/test_output/test_user_pages/15.html +71 -0
  91. data/tmp/test_output/test_user_pages/16.html +71 -0
  92. data/tmp/test_output/test_user_pages/17.html +71 -0
  93. data/tmp/test_output/test_user_pages/18.html +71 -0
  94. data/tmp/test_output/test_user_pages/19.html +71 -0
  95. data/tmp/test_output/test_user_pages/2.html +71 -0
  96. data/tmp/test_output/test_user_pages/20.html +71 -0
  97. data/tmp/test_output/test_user_pages/3.html +71 -0
  98. data/tmp/test_output/test_user_pages/4.html +71 -0
  99. data/tmp/test_output/test_user_pages/5.html +71 -0
  100. data/tmp/test_output/test_user_pages/6.html +71 -0
  101. data/tmp/test_output/test_user_pages/7.html +71 -0
  102. data/tmp/test_output/test_user_pages/8.html +71 -0
  103. data/tmp/test_output/test_user_pages/9.html +71 -0
  104. data/uninstall.rb +1 -0
  105. data/vendor/xss-shield/MIT-LICENSE +20 -0
  106. data/vendor/xss-shield/README +76 -0
  107. data/vendor/xss-shield/init.rb +16 -0
  108. data/vendor/xss-shield/lib/xss_shield.rb +6 -0
  109. data/vendor/xss-shield/lib/xss_shield/erb_hacks.rb +111 -0
  110. data/vendor/xss-shield/lib/xss_shield/haml_hacks.rb +42 -0
  111. data/vendor/xss-shield/lib/xss_shield/safe_string.rb +47 -0
  112. data/vendor/xss-shield/lib/xss_shield/secure_helpers.rb +40 -0
  113. data/vendor/xss-shield/test/test_actionview_integration.rb +40 -0
  114. data/vendor/xss-shield/test/test_erb.rb +44 -0
  115. data/vendor/xss-shield/test/test_haml.rb +43 -0
  116. data/vendor/xss-shield/test/test_helpers.rb +25 -0
  117. data/vendor/xss-shield/test/test_safe_string.rb +55 -0
  118. metadata +283 -0
@@ -0,0 +1,113 @@
1
+ /* Caution! Ensure accessibility in print and other media types... */
2
+ @media projection, screen { /* Use class for showing/hiding tab content, so that visibility can be better controlled in different media types... */
3
+ .ui-tabs-hide {
4
+ display: none;
5
+ }
6
+ }
7
+
8
+ /* Hide useless elements in print layouts... */
9
+ @media print {
10
+ .ui-tabs-nav {
11
+ display: none;
12
+ }
13
+ }
14
+
15
+ /* Skin */
16
+ .ui-tabs-nav, .ui-tabs-panel {
17
+ font-family: "Trebuchet MS", Trebuchet, Verdana, Helvetica, Arial, sans-serif;
18
+ font-size: 12px;
19
+ }
20
+ .ui-tabs-nav {
21
+ list-style: none;
22
+ margin: 0;
23
+ padding: 0 0 0 4px;
24
+ }
25
+ .ui-tabs-nav:after { /* clearing without presentational markup, IE gets extra treatment */
26
+ display: block;
27
+ clear: both;
28
+ content: " ";
29
+ }
30
+ .ui-tabs-nav li {
31
+ list-style: none;
32
+ float: left;
33
+ margin: 0 0 0 1px;
34
+ min-width: 84px; /* be nice to Opera */
35
+ }
36
+ .ui-tabs-nav a, .ui-tabs-nav a span {
37
+ display: block;
38
+ padding: 0 10px;
39
+ background: url(../images/tab.png) no-repeat;
40
+ }
41
+ .ui-tabs-nav a {
42
+ margin: 1px 0 0; /* position: relative makes opacity fail for disabled tab in IE */
43
+ padding-left: 0;
44
+ color: #27537a;
45
+ font-weight: bold;
46
+ line-height: 1.2;
47
+ text-align: center;
48
+ text-decoration: none;
49
+ white-space: nowrap; /* required in IE 6 */
50
+ outline: 0; /* prevent dotted border in Firefox */
51
+ }
52
+ .ui-tabs-nav .ui-tabs-selected a {
53
+ position: relative;
54
+ top: 1px;
55
+ z-index: 2;
56
+ margin-top: 0;
57
+ color: #000;
58
+ }
59
+ .ui-tabs-nav a span {
60
+ width: 64px; /* IE 6 treats width as min-width */
61
+ min-width: 64px;
62
+ height: 18px; /* IE 6 treats height as min-height */
63
+ min-height: 18px;
64
+ padding-top: 6px;
65
+ padding-right: 0;
66
+ }
67
+ *>.ui-tabs-nav a span { /* hide from IE 6 */
68
+ width: auto;
69
+ height: auto;
70
+ }
71
+ .ui-tabs-nav .ui-tabs-selected a span {
72
+ padding-bottom: 1px;
73
+ }
74
+ .ui-tabs-nav .ui-tabs-selected a, .ui-tabs-nav a:hover, .ui-tabs-nav a:focus, .ui-tabs-nav a:active {
75
+ background-position: 100% -150px;
76
+ }
77
+ .ui-tabs-nav a, .ui-tabs-nav .ui-tabs-disabled a:hover, .ui-tabs-nav .ui-tabs-disabled a:focus, .ui-tabs-nav .ui-tabs-disabled a:active {
78
+ background-position: 100% -100px;
79
+ }
80
+ .ui-tabs-nav .ui-tabs-selected a span, .ui-tabs-nav a:hover span, .ui-tabs-nav a:focus span, .ui-tabs-nav a:active span {
81
+ background-position: 0 -50px;
82
+ }
83
+ .ui-tabs-nav a span, .ui-tabs-nav .ui-tabs-disabled a:hover span, .ui-tabs-nav .ui-tabs-disabled a:focus span, .ui-tabs-nav .ui-tabs-disabled a:active span {
84
+ background-position: 0 0;
85
+ }
86
+ .ui-tabs-nav .ui-tabs-selected a:link, .ui-tabs-nav .ui-tabs-selected a:visited, .ui-tabs-nav .ui-tabs-disabled a:link, .ui-tabs-nav .ui-tabs-disabled a:visited { /* @ Opera, use pseudo classes otherwise it confuses cursor... */
87
+ cursor: text;
88
+ }
89
+ .ui-tabs-nav a:hover, .ui-tabs-nav a:focus, .ui-tabs-nav a:active,
90
+ .ui-tabs-nav .ui-tabs-unselect a:hover, .ui-tabs-nav .ui-tabs-unselect a:focus, .ui-tabs-nav .ui-tabs-unselect a:active { /* @ Opera, we need to be explicit again here now... */
91
+ cursor: pointer;
92
+ }
93
+ .ui-tabs-disabled {
94
+ opacity: .4;
95
+ filter: alpha(opacity=40);
96
+ }
97
+ .ui-tabs-panel {
98
+ border-top: 1px solid #97a5b0;
99
+ padding: 1em 8px;
100
+ }
101
+ .ui-tabs-loading em {
102
+ padding: 0 0 0 20px;
103
+ background: url(loading.gif) no-repeat 0 50%;
104
+ }
105
+
106
+ /* Additional IE specific bug fixes... */
107
+ * html .ui-tabs-nav { /* auto clear, @ IE 6 & IE 7 Quirks Mode */
108
+ display: inline-block;
109
+ }
110
+ *:first-child+html .ui-tabs-nav { /* @ IE 7 Standards Mode - do not group selectors, otherwise IE 6 will ignore complete rule (because of the unknown + combinator)... */
111
+ display: inline-block;
112
+ }
113
+
@@ -0,0 +1,34 @@
1
+ module Relevance::CoreExtensions::Nil
2
+ def ellipsize(cutoff = 20)
3
+ ""
4
+ end
5
+ end
6
+
7
+ module Relevance::CoreExtensions::String
8
+ def ellipsize(cutoff = 20)
9
+ if length > cutoff
10
+ "#{self[0...cutoff]}..."
11
+ else
12
+ self
13
+ end
14
+ end
15
+ end
16
+
17
+ module Relevance::CoreExtensions::Object
18
+ def ellipsize(cutoff = 20)
19
+ inspect.ellipsize(cutoff)
20
+ end
21
+ end
22
+
23
+ class Object
24
+ include Relevance::CoreExtensions::Object
25
+ end
26
+ class String
27
+ include Relevance::CoreExtensions::String
28
+ end
29
+ class NilClass
30
+ include Relevance::CoreExtensions::Nil
31
+ end
32
+
33
+
34
+
@@ -0,0 +1,9 @@
1
+ module Relevance::CoreExtensions::File
2
+ def extension(path)
3
+ extname(path)[1..-1]
4
+ end
5
+ end
6
+
7
+ class File
8
+ extend Relevance::CoreExtensions::File
9
+ end
@@ -0,0 +1,9 @@
1
+ # dynamically mixed in to response objects
2
+ module Relevance::CoreExtensions::Response
3
+ def html?
4
+ # some versions of Rails integration tests don't set content type
5
+ # so we are treating nil as html. A better fix would be welcome here.
6
+ ((content_type =~ %r{^text/html}) != nil) || content_type == nil
7
+ end
8
+ end
9
+
@@ -0,0 +1,12 @@
1
+ class Test::Unit::TestCase
2
+ def tarantula_crawl(integration_test, options = {})
3
+ url = options[:url] || "/"
4
+ t = tarantula_crawler(integration_test, options)
5
+ t.crawl url
6
+ end
7
+
8
+ def tarantula_crawler(integration_test, options = {})
9
+ Relevance::Tarantula::RailsIntegrationProxy.rails_integration_test(integration_test, options)
10
+ end
11
+ end
12
+
@@ -0,0 +1,63 @@
1
+ require 'forwardable'
2
+
3
+ TARANTULA_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "../.."))
4
+
5
+ require 'erb'
6
+ gem 'actionpack'
7
+ gem 'activesupport'
8
+ require 'active_support'
9
+ require 'action_controller'
10
+ # bringing in xss-shield requires a bunch of other dependencies
11
+ # still not certain about this, if it ruins your world please let me know
12
+ #xss_shield_path = File.join(TARANTULA_ROOT, %w{vendor xss-shield})
13
+ #$: << File.join(xss_shield_path, "lib")
14
+ #require File.join(xss_shield_path, "init")
15
+
16
+ require 'htmlentities'
17
+ require 'facets/kernel/meta'
18
+ require 'facets/metaid'
19
+
20
+ module Relevance; end
21
+ module Relevance; module CoreExtensions; end; end
22
+ module Relevance
23
+ module Tarantula
24
+ VERSION = "0.0.5"
25
+
26
+ def tarantula_home
27
+ File.expand_path(File.join(File.dirname(__FILE__), "../.."))
28
+ end
29
+ def log(msg)
30
+ puts msg if verbose
31
+ end
32
+ def rails_root
33
+ ::RAILS_ROOT
34
+ end
35
+ def verbose
36
+ ENV["VERBOSE"]
37
+ end
38
+ end
39
+ end
40
+
41
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "test_case"))
42
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "ellipsize"))
43
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "file"))
44
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "response"))
45
+
46
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_reporter"))
47
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_report_helper"))
48
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "io_reporter"))
49
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "recording"))
50
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "response"))
51
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "result"))
52
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "log_grabber"))
53
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "invalid_html_handler"))
54
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "transform"))
55
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "crawler"))
56
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form"))
57
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form_submission"))
58
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack"))
59
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_form_submission"))
60
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_handler"))
61
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "link"))
62
+
63
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "tidy_handler")) if ENV['TIDY_PATH']
@@ -0,0 +1,15 @@
1
+ class Relevance::Tarantula::Attack
2
+ HASHABLE_ATTRS = [:name, :input, :output, :description]
3
+ attr_accessor *HASHABLE_ATTRS
4
+ def initialize(hash)
5
+ hash.each do |k,v|
6
+ raise ArgumentError, k unless HASHABLE_ATTRS.member?(k)
7
+ self.instance_variable_set("@#{k}", v)
8
+ end
9
+ end
10
+ def ==(other)
11
+ Relevance::Tarantula::Attack === other && HASHABLE_ATTRS.all? { |attr| send(attr) == other.send(attr)}
12
+ end
13
+ end
14
+
15
+
@@ -0,0 +1,75 @@
1
+ class Relevance::Tarantula::AttackFormSubmission
2
+ attr_accessor :method, :action, :data, :attack
3
+
4
+ class << self
5
+ def attacks
6
+ # normalize from hash input to Attack
7
+ @attacks = @attacks.map do |val|
8
+ Hash === val ? Relevance::Tarantula::Attack.new(val) : val
9
+ end
10
+ @attacks
11
+ end
12
+ def attacks=(atts)
13
+ # normalize from hash input to Attack
14
+ @attacks = atts.map do |val|
15
+ Hash === val ? Relevance::Tarantula::Attack.new(val) : val
16
+ end
17
+ end
18
+ end
19
+ @attacks = []
20
+
21
+ def initialize(form, attack = nil)
22
+ @method = form.method
23
+ @action = form.action
24
+ @attack = attack
25
+ @data = mutate_selects(form).merge(mutate_text_areas(form)).merge(mutate_inputs(form))
26
+ end
27
+
28
+ def self.mutate(form)
29
+ attacks and attacks.map do |attack|
30
+ self.new(form, attack)
31
+ end
32
+ end
33
+
34
+ def to_s
35
+ "#{action} #{method} #{data.inspect} #{attack.inspect}"
36
+ end
37
+
38
+ # a form's signature is what makes it unique (e.g. action + fields)
39
+ # used to keep track of which forms we have submitted already
40
+ def signature
41
+ [action, data.keys.sort, attack.name]
42
+ end
43
+
44
+ def create_random_data_for(form, tag_selector)
45
+ form.search(tag_selector).inject({}) do |form_args, input|
46
+ # TODO: test
47
+ form_args[input['name']] = random_data(input) if input['name']
48
+ form_args
49
+ end
50
+ end
51
+
52
+ def mutate_inputs(form)
53
+ create_random_data_for(form, 'input')
54
+ end
55
+
56
+ def mutate_text_areas(form)
57
+ create_random_data_for(form, 'textarea')
58
+ end
59
+
60
+ def mutate_selects(form)
61
+ form.search('select').inject({}) do |form_args, select|
62
+ options = select.search('option')
63
+ option = options.rand
64
+ form_args[select['name']] = option['value']
65
+ form_args
66
+ end
67
+ end
68
+
69
+ def random_data(input)
70
+ case input['name']
71
+ when /^_method$/ : input['value']
72
+ else attack.input
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ require 'hpricot'
2
+
3
+ class Relevance::Tarantula::AttackHandler
4
+ include ERB::Util
5
+
6
+ def attacks
7
+ Relevance::Tarantula::AttackFormSubmission.attacks.select(&:output)
8
+ end
9
+
10
+ def handle(result)
11
+ return unless attacks.size > 0
12
+ regexp = '(' + attacks.map {|a| Regexp.escape a.output}.join('|') + ')'
13
+ response = result.response
14
+ return unless response.html?
15
+ if n = (response.body =~ /#{regexp}/)
16
+ error_result = result.dup
17
+ error_result.success = false
18
+ error_result.description = "XSS error found, match was: #{h($1)}"
19
+ error_result.data = <<-STR
20
+ ########################################################################
21
+ # Text around unescaped string: #{$1}
22
+ ########################################################################
23
+ #{response.body[[0, n - 200].max , 400]}
24
+
25
+
26
+
27
+
28
+
29
+ ########################################################################
30
+ # Attack information:
31
+ ########################################################################
32
+ #{attacks.select {|a| a.output == $1}[0].to_yaml}
33
+ STR
34
+ error_result
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,240 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "rails_integration_proxy"))
2
+ require File.expand_path(File.join(File.dirname(__FILE__), "html_document_handler.rb"))
3
+
4
+ class Relevance::Tarantula::Crawler
5
+ extend Forwardable
6
+ include Relevance::Tarantula
7
+
8
+ attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
9
+ :reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
10
+ :form_signatures_queued, :max_url_length, :response_code_handler,
11
+ :times_to_crawl, :fuzzers, :test_name
12
+ attr_reader :transform_url_patterns, :referrers, :failures, :successes
13
+
14
+ def initialize
15
+ @max_url_length = 1024
16
+ @successes = []
17
+ @failures = []
18
+ @handlers = [@response_code_handler = Result]
19
+ @links_queued = Set.new
20
+ @form_signatures_queued = Set.new
21
+ @links_to_crawl = []
22
+ @forms_to_crawl = []
23
+ @referrers = {}
24
+ @skip_uri_patterns = [
25
+ /^javascript/,
26
+ /^mailto/,
27
+ /^http/,
28
+ ]
29
+ self.transform_url_patterns = [
30
+ [/#.*$/, '']
31
+ ]
32
+ @reporters = [Relevance::Tarantula::IOReporter.new($stderr)]
33
+ @decoder = HTMLEntities.new
34
+ @times_to_crawl = 1
35
+ @fuzzers = [Relevance::Tarantula::FormSubmission]
36
+ end
37
+
38
+ def method_missing(meth, *args)
39
+ super unless Result::ALLOW_NNN_FOR =~ meth.to_s
40
+ @response_code_handler.send(meth, *args)
41
+ end
42
+
43
+ def transform_url_patterns=(patterns)
44
+ @transform_url_patterns = patterns.map do |pattern|
45
+ Array === pattern ? Relevance::Tarantula::Transform.new(*pattern) : pattern
46
+ end
47
+ end
48
+
49
+ def crawl(url = "/")
50
+ orig_links_queued = @links_queued.dup
51
+ orig_form_signatures_queued = @form_signatures_queued.dup
52
+ orig_links_to_crawl = @links_to_crawl.dup
53
+ orig_forms_to_crawl = @forms_to_crawl.dup
54
+ @times_to_crawl.times do |i|
55
+ queue_link url
56
+ do_crawl
57
+
58
+ puts "#{(i+1).ordinalize} crawl" if @times_to_crawl > 1
59
+
60
+ if i + 1 < @times_to_crawl
61
+ @links_queued = orig_links_queued
62
+ @form_signatures_queued = orig_form_signatures_queued
63
+ @links_to_crawl = orig_links_to_crawl
64
+ @forms_to_crawl = orig_forms_to_crawl
65
+ @referrers = {}
66
+ end
67
+ end
68
+ rescue Interrupt
69
+ $stderr.puts "CTRL-C"
70
+ ensure
71
+ report_results
72
+ end
73
+
74
+ def finished?
75
+ @links_to_crawl.empty? && @forms_to_crawl.empty?
76
+ end
77
+
78
+ def do_crawl
79
+ while (!finished?)
80
+ crawl_queued_links
81
+ crawl_queued_forms
82
+ end
83
+ end
84
+
85
+ def crawl_queued_links
86
+ while (link = @links_to_crawl.pop)
87
+ response = proxy.send(link.method, link.href)
88
+ log "Response #{response.code} for #{link}"
89
+ handle_link_results(link, response)
90
+ blip
91
+ end
92
+ end
93
+
94
+ def save_result(result)
95
+ reporters.each do |reporter|
96
+ reporter.report(result)
97
+ end
98
+ end
99
+
100
+ def handle_link_results(link, response)
101
+ handlers.each do |h|
102
+ begin
103
+ save_result h.handle(Result.new(:method => link.method,
104
+ :url => link.href,
105
+ :response => response,
106
+ :log => grab_log!,
107
+ :referrer => referrers[link],
108
+ :test_name => test_name).freeze)
109
+ rescue Exception => e
110
+ log "error handling #{link} #{e.message}"
111
+ # TODO: pass to results
112
+ end
113
+ end
114
+ end
115
+
116
+ def crawl_form(form)
117
+ response = proxy.send(form.method, form.action, form.data)
118
+ log "Response #{response.code} for #{form}"
119
+ response
120
+ rescue ActiveRecord::RecordNotFound => e
121
+ log "Skipping #{form.action}, presumed ok that record is missing"
122
+ Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
123
+ end
124
+
125
+ def crawl_queued_forms
126
+ while (form = @forms_to_crawl.pop)
127
+ response = crawl_form(form)
128
+ handle_form_results(form, response)
129
+ blip
130
+ end
131
+ end
132
+
133
+ def grab_log!
134
+ @log_grabber && @log_grabber.grab!
135
+ end
136
+
137
+ def handle_form_results(form, response)
138
+ handlers.each do |h|
139
+ save_result h.handle(Result.new(:method => form.method,
140
+ :url => form.action,
141
+ :response => response,
142
+ :log => grab_log!,
143
+ :referrer => form.action,
144
+ :data => form.data.inspect,
145
+ :test_name => test_name).freeze)
146
+ end
147
+ end
148
+
149
+ def should_skip_url?(url)
150
+ return true if url.blank?
151
+ if @skip_uri_patterns.any? {|pattern| pattern =~ url}
152
+ log "Skipping #{url}"
153
+ return true
154
+ end
155
+ if url.length > max_url_length
156
+ log "Skipping long url #{url}"
157
+ return true
158
+ end
159
+ end
160
+
161
+ def should_skip_link?(link)
162
+ should_skip_url?(link.href) || @links_queued.member?(link)
163
+ end
164
+
165
+ def should_skip_form_submission?(fs)
166
+ should_skip_url?(fs.action) || @form_signatures_queued.member?(fs.signature)
167
+ end
168
+
169
+ def transform_url(url)
170
+ return unless url
171
+ url = @decoder.decode(url)
172
+ @transform_url_patterns.each do |pattern|
173
+ url = pattern[url]
174
+ end
175
+ url
176
+ end
177
+
178
+ def queue_link(dest, referrer = nil)
179
+ dest = Link.new(dest)
180
+ dest.href = transform_url(dest.href)
181
+ return if should_skip_link?(dest)
182
+ @referrers[dest] = referrer if referrer
183
+ @links_to_crawl << dest
184
+ @links_queued << dest
185
+ dest
186
+ end
187
+
188
+ def queue_form(form, referrer = nil)
189
+ fuzzers.each do |fuzzer|
190
+ fuzzer.mutate(Form.new(form)).each do |fs|
191
+ # fs = fuzzer.new(Form.new(form))
192
+ fs.action = transform_url(fs.action)
193
+ return if should_skip_form_submission?(fs)
194
+ @referrers[fs.action] = referrer if referrer
195
+ @forms_to_crawl << fs
196
+ @form_signatures_queued << fs.signature
197
+ end
198
+ end
199
+ end
200
+
201
+ def report_dir
202
+ File.join(rails_root, "tmp", "tarantula")
203
+ end
204
+
205
+ def generate_reports
206
+ errors = []
207
+ reporters.each do |reporter|
208
+ begin
209
+ reporter.finish_report(test_name)
210
+ rescue RuntimeError => e
211
+ errors << e
212
+ end
213
+ end
214
+ unless errors.empty?
215
+ raise errors.map(&:message).join("\n")
216
+ end
217
+ end
218
+
219
+ def report_results
220
+ generate_reports
221
+ end
222
+
223
+ def total_links_count
224
+ @links_queued.size + @form_signatures_queued.size
225
+ end
226
+
227
+ def links_remaining_count
228
+ @links_to_crawl.size + @forms_to_crawl.size
229
+ end
230
+
231
+ def links_completed_count
232
+ total_links_count - links_remaining_count
233
+ end
234
+
235
+ def blip
236
+ unless verbose
237
+ print "\r #{links_completed_count} of #{total_links_count} links completed "
238
+ end
239
+ end
240
+ end