tarantula 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. data/CHANGELOG +2 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +106 -0
  4. data/Rakefile +80 -0
  5. data/init.rb +1 -0
  6. data/install.rb +1 -0
  7. data/laf/images/background.jpg +0 -0
  8. data/laf/images/relevance-os-logo.gif +0 -0
  9. data/laf/images/tab.png +0 -0
  10. data/laf/images/table-sort.gif +0 -0
  11. data/laf/images/tarantula-sprites.png +0 -0
  12. data/laf/javascripts/jquery-1.2.3.js +3408 -0
  13. data/laf/javascripts/jquery-ui-tabs.js +890 -0
  14. data/laf/javascripts/jquery.tablesorter.js +861 -0
  15. data/laf/javascripts/tarantula.js +10 -0
  16. data/laf/stylesheets/tarantula.css +638 -0
  17. data/laf/stylesheets/ui.tabs.css +113 -0
  18. data/lib/relevance/core_extensions/ellipsize.rb +34 -0
  19. data/lib/relevance/core_extensions/file.rb +9 -0
  20. data/lib/relevance/core_extensions/response.rb +9 -0
  21. data/lib/relevance/core_extensions/test_case.rb +12 -0
  22. data/lib/relevance/tarantula.rb +63 -0
  23. data/lib/relevance/tarantula/attack.rb +15 -0
  24. data/lib/relevance/tarantula/attack_form_submission.rb +75 -0
  25. data/lib/relevance/tarantula/attack_handler.rb +37 -0
  26. data/lib/relevance/tarantula/crawler.rb +240 -0
  27. data/lib/relevance/tarantula/detail.html.erb +77 -0
  28. data/lib/relevance/tarantula/form.rb +21 -0
  29. data/lib/relevance/tarantula/form_submission.rb +70 -0
  30. data/lib/relevance/tarantula/html_document_handler.rb +36 -0
  31. data/lib/relevance/tarantula/html_report_helper.rb +56 -0
  32. data/lib/relevance/tarantula/html_reporter.rb +105 -0
  33. data/lib/relevance/tarantula/index.html.erb +48 -0
  34. data/lib/relevance/tarantula/invalid_html_handler.rb +18 -0
  35. data/lib/relevance/tarantula/io_reporter.rb +34 -0
  36. data/lib/relevance/tarantula/link.rb +56 -0
  37. data/lib/relevance/tarantula/log_grabber.rb +16 -0
  38. data/lib/relevance/tarantula/rails_integration_proxy.rb +70 -0
  39. data/lib/relevance/tarantula/recording.rb +12 -0
  40. data/lib/relevance/tarantula/response.rb +13 -0
  41. data/lib/relevance/tarantula/result.rb +66 -0
  42. data/lib/relevance/tarantula/test_report.html.erb +34 -0
  43. data/lib/relevance/tarantula/tidy_handler.rb +32 -0
  44. data/lib/relevance/tarantula/transform.rb +17 -0
  45. data/manifest.txt +117 -0
  46. data/rails/init.rb +1 -0
  47. data/tarantula.gemspec +48 -0
  48. data/tasks/tarantula_tasks.rake +34 -0
  49. data/template/tarantula_test.rb +12 -0
  50. data/test/relevance/core_extensions/ellipsize_test.rb +19 -0
  51. data/test/relevance/core_extensions/file_test.rb +8 -0
  52. data/test/relevance/core_extensions/response_test.rb +29 -0
  53. data/test/relevance/core_extensions/test_case_test.rb +16 -0
  54. data/test/relevance/tarantula/attack_form_submission_test.rb +79 -0
  55. data/test/relevance/tarantula/attack_handler_test.rb +29 -0
  56. data/test/relevance/tarantula/crawler_test.rb +296 -0
  57. data/test/relevance/tarantula/form_submission_test.rb +71 -0
  58. data/test/relevance/tarantula/form_test.rb +50 -0
  59. data/test/relevance/tarantula/html_document_handler_test.rb +43 -0
  60. data/test/relevance/tarantula/html_report_helper_test.rb +47 -0
  61. data/test/relevance/tarantula/html_reporter_test.rb +82 -0
  62. data/test/relevance/tarantula/invalid_html_handler_test.rb +33 -0
  63. data/test/relevance/tarantula/io_reporter_test.rb +11 -0
  64. data/test/relevance/tarantula/link_test.rb +61 -0
  65. data/test/relevance/tarantula/log_grabber_test.rb +26 -0
  66. data/test/relevance/tarantula/rails_integration_proxy_test.rb +94 -0
  67. data/test/relevance/tarantula/result_test.rb +85 -0
  68. data/test/relevance/tarantula/tidy_handler_test.rb +58 -0
  69. data/test/relevance/tarantula/transform_test.rb +21 -0
  70. data/test/relevance/tarantula_test.rb +23 -0
  71. data/test/test_helper.rb +34 -0
  72. data/tmp/test_output/images/background.jpg +0 -0
  73. data/tmp/test_output/images/relevance-os-logo.gif +0 -0
  74. data/tmp/test_output/images/tab.png +0 -0
  75. data/tmp/test_output/images/table-sort.gif +0 -0
  76. data/tmp/test_output/images/tarantula-sprites.png +0 -0
  77. data/tmp/test_output/index.html +255 -0
  78. data/tmp/test_output/javascripts/jquery-1.2.3.js +3408 -0
  79. data/tmp/test_output/javascripts/jquery-ui-tabs.js +890 -0
  80. data/tmp/test_output/javascripts/jquery.tablesorter.js +861 -0
  81. data/tmp/test_output/javascripts/tarantula.js +10 -0
  82. data/tmp/test_output/stylesheets/tarantula.css +638 -0
  83. data/tmp/test_output/stylesheets/ui.tabs.css +113 -0
  84. data/tmp/test_output/test_user_pages/1.html +71 -0
  85. data/tmp/test_output/test_user_pages/10.html +71 -0
  86. data/tmp/test_output/test_user_pages/11.html +71 -0
  87. data/tmp/test_output/test_user_pages/12.html +71 -0
  88. data/tmp/test_output/test_user_pages/13.html +71 -0
  89. data/tmp/test_output/test_user_pages/14.html +71 -0
  90. data/tmp/test_output/test_user_pages/15.html +71 -0
  91. data/tmp/test_output/test_user_pages/16.html +71 -0
  92. data/tmp/test_output/test_user_pages/17.html +71 -0
  93. data/tmp/test_output/test_user_pages/18.html +71 -0
  94. data/tmp/test_output/test_user_pages/19.html +71 -0
  95. data/tmp/test_output/test_user_pages/2.html +71 -0
  96. data/tmp/test_output/test_user_pages/20.html +71 -0
  97. data/tmp/test_output/test_user_pages/3.html +71 -0
  98. data/tmp/test_output/test_user_pages/4.html +71 -0
  99. data/tmp/test_output/test_user_pages/5.html +71 -0
  100. data/tmp/test_output/test_user_pages/6.html +71 -0
  101. data/tmp/test_output/test_user_pages/7.html +71 -0
  102. data/tmp/test_output/test_user_pages/8.html +71 -0
  103. data/tmp/test_output/test_user_pages/9.html +71 -0
  104. data/uninstall.rb +1 -0
  105. data/vendor/xss-shield/MIT-LICENSE +20 -0
  106. data/vendor/xss-shield/README +76 -0
  107. data/vendor/xss-shield/init.rb +16 -0
  108. data/vendor/xss-shield/lib/xss_shield.rb +6 -0
  109. data/vendor/xss-shield/lib/xss_shield/erb_hacks.rb +111 -0
  110. data/vendor/xss-shield/lib/xss_shield/haml_hacks.rb +42 -0
  111. data/vendor/xss-shield/lib/xss_shield/safe_string.rb +47 -0
  112. data/vendor/xss-shield/lib/xss_shield/secure_helpers.rb +40 -0
  113. data/vendor/xss-shield/test/test_actionview_integration.rb +40 -0
  114. data/vendor/xss-shield/test/test_erb.rb +44 -0
  115. data/vendor/xss-shield/test/test_haml.rb +43 -0
  116. data/vendor/xss-shield/test/test_helpers.rb +25 -0
  117. data/vendor/xss-shield/test/test_safe_string.rb +55 -0
  118. metadata +283 -0
@@ -0,0 +1,113 @@
1
+ /* Caution! Ensure accessibility in print and other media types... */
2
+ @media projection, screen { /* Use class for showing/hiding tab content, so that visibility can be better controlled in different media types... */
3
+ .ui-tabs-hide {
4
+ display: none;
5
+ }
6
+ }
7
+
8
+ /* Hide useless elements in print layouts... */
9
+ @media print {
10
+ .ui-tabs-nav {
11
+ display: none;
12
+ }
13
+ }
14
+
15
+ /* Skin */
16
+ .ui-tabs-nav, .ui-tabs-panel {
17
+ font-family: "Trebuchet MS", Trebuchet, Verdana, Helvetica, Arial, sans-serif;
18
+ font-size: 12px;
19
+ }
20
+ .ui-tabs-nav {
21
+ list-style: none;
22
+ margin: 0;
23
+ padding: 0 0 0 4px;
24
+ }
25
+ .ui-tabs-nav:after { /* clearing without presentational markup, IE gets extra treatment */
26
+ display: block;
27
+ clear: both;
28
+ content: " ";
29
+ }
30
+ .ui-tabs-nav li {
31
+ list-style: none;
32
+ float: left;
33
+ margin: 0 0 0 1px;
34
+ min-width: 84px; /* be nice to Opera */
35
+ }
36
+ .ui-tabs-nav a, .ui-tabs-nav a span {
37
+ display: block;
38
+ padding: 0 10px;
39
+ background: url(../images/tab.png) no-repeat;
40
+ }
41
+ .ui-tabs-nav a {
42
+ margin: 1px 0 0; /* position: relative makes opacity fail for disabled tab in IE */
43
+ padding-left: 0;
44
+ color: #27537a;
45
+ font-weight: bold;
46
+ line-height: 1.2;
47
+ text-align: center;
48
+ text-decoration: none;
49
+ white-space: nowrap; /* required in IE 6 */
50
+ outline: 0; /* prevent dotted border in Firefox */
51
+ }
52
+ .ui-tabs-nav .ui-tabs-selected a {
53
+ position: relative;
54
+ top: 1px;
55
+ z-index: 2;
56
+ margin-top: 0;
57
+ color: #000;
58
+ }
59
+ .ui-tabs-nav a span {
60
+ width: 64px; /* IE 6 treats width as min-width */
61
+ min-width: 64px;
62
+ height: 18px; /* IE 6 treats height as min-height */
63
+ min-height: 18px;
64
+ padding-top: 6px;
65
+ padding-right: 0;
66
+ }
67
+ *>.ui-tabs-nav a span { /* hide from IE 6 */
68
+ width: auto;
69
+ height: auto;
70
+ }
71
+ .ui-tabs-nav .ui-tabs-selected a span {
72
+ padding-bottom: 1px;
73
+ }
74
+ .ui-tabs-nav .ui-tabs-selected a, .ui-tabs-nav a:hover, .ui-tabs-nav a:focus, .ui-tabs-nav a:active {
75
+ background-position: 100% -150px;
76
+ }
77
+ .ui-tabs-nav a, .ui-tabs-nav .ui-tabs-disabled a:hover, .ui-tabs-nav .ui-tabs-disabled a:focus, .ui-tabs-nav .ui-tabs-disabled a:active {
78
+ background-position: 100% -100px;
79
+ }
80
+ .ui-tabs-nav .ui-tabs-selected a span, .ui-tabs-nav a:hover span, .ui-tabs-nav a:focus span, .ui-tabs-nav a:active span {
81
+ background-position: 0 -50px;
82
+ }
83
+ .ui-tabs-nav a span, .ui-tabs-nav .ui-tabs-disabled a:hover span, .ui-tabs-nav .ui-tabs-disabled a:focus span, .ui-tabs-nav .ui-tabs-disabled a:active span {
84
+ background-position: 0 0;
85
+ }
86
+ .ui-tabs-nav .ui-tabs-selected a:link, .ui-tabs-nav .ui-tabs-selected a:visited, .ui-tabs-nav .ui-tabs-disabled a:link, .ui-tabs-nav .ui-tabs-disabled a:visited { /* @ Opera, use pseudo classes otherwise it confuses cursor... */
87
+ cursor: text;
88
+ }
89
+ .ui-tabs-nav a:hover, .ui-tabs-nav a:focus, .ui-tabs-nav a:active,
90
+ .ui-tabs-nav .ui-tabs-unselect a:hover, .ui-tabs-nav .ui-tabs-unselect a:focus, .ui-tabs-nav .ui-tabs-unselect a:active { /* @ Opera, we need to be explicit again here now... */
91
+ cursor: pointer;
92
+ }
93
+ .ui-tabs-disabled {
94
+ opacity: .4;
95
+ filter: alpha(opacity=40);
96
+ }
97
+ .ui-tabs-panel {
98
+ border-top: 1px solid #97a5b0;
99
+ padding: 1em 8px;
100
+ }
101
+ .ui-tabs-loading em {
102
+ padding: 0 0 0 20px;
103
+ background: url(loading.gif) no-repeat 0 50%;
104
+ }
105
+
106
+ /* Additional IE specific bug fixes... */
107
+ * html .ui-tabs-nav { /* auto clear, @ IE 6 & IE 7 Quirks Mode */
108
+ display: inline-block;
109
+ }
110
+ *:first-child+html .ui-tabs-nav { /* @ IE 7 Standards Mode - do not group selectors, otherwise IE 6 will ignore complete rule (because of the unknown + combinator)... */
111
+ display: inline-block;
112
+ }
113
+
@@ -0,0 +1,34 @@
1
+ module Relevance::CoreExtensions::Nil
2
+ def ellipsize(cutoff = 20)
3
+ ""
4
+ end
5
+ end
6
+
7
+ module Relevance::CoreExtensions::String
8
+ def ellipsize(cutoff = 20)
9
+ if length > cutoff
10
+ "#{self[0...cutoff]}..."
11
+ else
12
+ self
13
+ end
14
+ end
15
+ end
16
+
17
+ module Relevance::CoreExtensions::Object
18
+ def ellipsize(cutoff = 20)
19
+ inspect.ellipsize(cutoff)
20
+ end
21
+ end
22
+
23
+ class Object
24
+ include Relevance::CoreExtensions::Object
25
+ end
26
+ class String
27
+ include Relevance::CoreExtensions::String
28
+ end
29
+ class NilClass
30
+ include Relevance::CoreExtensions::Nil
31
+ end
32
+
33
+
34
+
@@ -0,0 +1,9 @@
1
+ module Relevance::CoreExtensions::File
2
+ def extension(path)
3
+ extname(path)[1..-1]
4
+ end
5
+ end
6
+
7
+ class File
8
+ extend Relevance::CoreExtensions::File
9
+ end
@@ -0,0 +1,9 @@
1
+ # dynamically mixed in to response objects
2
+ module Relevance::CoreExtensions::Response
3
+ def html?
4
+ # some versions of Rails integration tests don't set content type
5
+ # so we are treating nil as html. A better fix would be welcome here.
6
+ ((content_type =~ %r{^text/html}) != nil) || content_type == nil
7
+ end
8
+ end
9
+
@@ -0,0 +1,12 @@
1
+ class Test::Unit::TestCase
2
+ def tarantula_crawl(integration_test, options = {})
3
+ url = options[:url] || "/"
4
+ t = tarantula_crawler(integration_test, options)
5
+ t.crawl url
6
+ end
7
+
8
+ def tarantula_crawler(integration_test, options = {})
9
+ Relevance::Tarantula::RailsIntegrationProxy.rails_integration_test(integration_test, options)
10
+ end
11
+ end
12
+
@@ -0,0 +1,63 @@
1
+ require 'forwardable'
2
+
3
+ TARANTULA_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "../.."))
4
+
5
+ require 'erb'
6
+ gem 'actionpack'
7
+ gem 'activesupport'
8
+ require 'active_support'
9
+ require 'action_controller'
10
+ # bringing in xss-shield requires a bunch of other dependencies
11
+ # still not certain about this, if it ruins your world please let me know
12
+ #xss_shield_path = File.join(TARANTULA_ROOT, %w{vendor xss-shield})
13
+ #$: << File.join(xss_shield_path, "lib")
14
+ #require File.join(xss_shield_path, "init")
15
+
16
+ require 'htmlentities'
17
+ require 'facets/kernel/meta'
18
+ require 'facets/metaid'
19
+
20
+ module Relevance; end
21
+ module Relevance; module CoreExtensions; end; end
22
+ module Relevance
23
+ module Tarantula
24
+ VERSION = "0.0.5"
25
+
26
+ def tarantula_home
27
+ File.expand_path(File.join(File.dirname(__FILE__), "../.."))
28
+ end
29
+ def log(msg)
30
+ puts msg if verbose
31
+ end
32
+ def rails_root
33
+ ::RAILS_ROOT
34
+ end
35
+ def verbose
36
+ ENV["VERBOSE"]
37
+ end
38
+ end
39
+ end
40
+
41
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "test_case"))
42
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "ellipsize"))
43
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "file"))
44
+ require File.expand_path(File.join(File.dirname(__FILE__), "core_extensions", "response"))
45
+
46
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_reporter"))
47
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "html_report_helper"))
48
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "io_reporter"))
49
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "recording"))
50
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "response"))
51
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "result"))
52
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "log_grabber"))
53
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "invalid_html_handler"))
54
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "transform"))
55
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "crawler"))
56
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form"))
57
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "form_submission"))
58
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack"))
59
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_form_submission"))
60
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "attack_handler"))
61
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "link"))
62
+
63
+ require File.expand_path(File.join(File.dirname(__FILE__), "tarantula", "tidy_handler")) if ENV['TIDY_PATH']
@@ -0,0 +1,15 @@
1
+ class Relevance::Tarantula::Attack
2
+ HASHABLE_ATTRS = [:name, :input, :output, :description]
3
+ attr_accessor *HASHABLE_ATTRS
4
+ def initialize(hash)
5
+ hash.each do |k,v|
6
+ raise ArgumentError, k unless HASHABLE_ATTRS.member?(k)
7
+ self.instance_variable_set("@#{k}", v)
8
+ end
9
+ end
10
+ def ==(other)
11
+ Relevance::Tarantula::Attack === other && HASHABLE_ATTRS.all? { |attr| send(attr) == other.send(attr)}
12
+ end
13
+ end
14
+
15
+
@@ -0,0 +1,75 @@
1
+ class Relevance::Tarantula::AttackFormSubmission
2
+ attr_accessor :method, :action, :data, :attack
3
+
4
+ class << self
5
+ def attacks
6
+ # normalize from hash input to Attack
7
+ @attacks = @attacks.map do |val|
8
+ Hash === val ? Relevance::Tarantula::Attack.new(val) : val
9
+ end
10
+ @attacks
11
+ end
12
+ def attacks=(atts)
13
+ # normalize from hash input to Attack
14
+ @attacks = atts.map do |val|
15
+ Hash === val ? Relevance::Tarantula::Attack.new(val) : val
16
+ end
17
+ end
18
+ end
19
+ @attacks = []
20
+
21
+ def initialize(form, attack = nil)
22
+ @method = form.method
23
+ @action = form.action
24
+ @attack = attack
25
+ @data = mutate_selects(form).merge(mutate_text_areas(form)).merge(mutate_inputs(form))
26
+ end
27
+
28
+ def self.mutate(form)
29
+ attacks and attacks.map do |attack|
30
+ self.new(form, attack)
31
+ end
32
+ end
33
+
34
+ def to_s
35
+ "#{action} #{method} #{data.inspect} #{attack.inspect}"
36
+ end
37
+
38
+ # a form's signature is what makes it unique (e.g. action + fields)
39
+ # used to keep track of which forms we have submitted already
40
+ def signature
41
+ [action, data.keys.sort, attack.name]
42
+ end
43
+
44
+ def create_random_data_for(form, tag_selector)
45
+ form.search(tag_selector).inject({}) do |form_args, input|
46
+ # TODO: test
47
+ form_args[input['name']] = random_data(input) if input['name']
48
+ form_args
49
+ end
50
+ end
51
+
52
+ def mutate_inputs(form)
53
+ create_random_data_for(form, 'input')
54
+ end
55
+
56
+ def mutate_text_areas(form)
57
+ create_random_data_for(form, 'textarea')
58
+ end
59
+
60
+ def mutate_selects(form)
61
+ form.search('select').inject({}) do |form_args, select|
62
+ options = select.search('option')
63
+ option = options.rand
64
+ form_args[select['name']] = option['value']
65
+ form_args
66
+ end
67
+ end
68
+
69
+ def random_data(input)
70
+ case input['name']
71
+ when /^_method$/ : input['value']
72
+ else attack.input
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ require 'hpricot'
2
+
3
+ class Relevance::Tarantula::AttackHandler
4
+ include ERB::Util
5
+
6
+ def attacks
7
+ Relevance::Tarantula::AttackFormSubmission.attacks.select(&:output)
8
+ end
9
+
10
+ def handle(result)
11
+ return unless attacks.size > 0
12
+ regexp = '(' + attacks.map {|a| Regexp.escape a.output}.join('|') + ')'
13
+ response = result.response
14
+ return unless response.html?
15
+ if n = (response.body =~ /#{regexp}/)
16
+ error_result = result.dup
17
+ error_result.success = false
18
+ error_result.description = "XSS error found, match was: #{h($1)}"
19
+ error_result.data = <<-STR
20
+ ########################################################################
21
+ # Text around unescaped string: #{$1}
22
+ ########################################################################
23
+ #{response.body[[0, n - 200].max , 400]}
24
+
25
+
26
+
27
+
28
+
29
+ ########################################################################
30
+ # Attack information:
31
+ ########################################################################
32
+ #{attacks.select {|a| a.output == $1}[0].to_yaml}
33
+ STR
34
+ error_result
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,240 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "rails_integration_proxy"))
2
+ require File.expand_path(File.join(File.dirname(__FILE__), "html_document_handler.rb"))
3
+
4
+ class Relevance::Tarantula::Crawler
5
+ extend Forwardable
6
+ include Relevance::Tarantula
7
+
8
+ attr_accessor :proxy, :handlers, :skip_uri_patterns, :log_grabber,
9
+ :reporters, :links_to_crawl, :links_queued, :forms_to_crawl,
10
+ :form_signatures_queued, :max_url_length, :response_code_handler,
11
+ :times_to_crawl, :fuzzers, :test_name
12
+ attr_reader :transform_url_patterns, :referrers, :failures, :successes
13
+
14
+ def initialize
15
+ @max_url_length = 1024
16
+ @successes = []
17
+ @failures = []
18
+ @handlers = [@response_code_handler = Result]
19
+ @links_queued = Set.new
20
+ @form_signatures_queued = Set.new
21
+ @links_to_crawl = []
22
+ @forms_to_crawl = []
23
+ @referrers = {}
24
+ @skip_uri_patterns = [
25
+ /^javascript/,
26
+ /^mailto/,
27
+ /^http/,
28
+ ]
29
+ self.transform_url_patterns = [
30
+ [/#.*$/, '']
31
+ ]
32
+ @reporters = [Relevance::Tarantula::IOReporter.new($stderr)]
33
+ @decoder = HTMLEntities.new
34
+ @times_to_crawl = 1
35
+ @fuzzers = [Relevance::Tarantula::FormSubmission]
36
+ end
37
+
38
+ def method_missing(meth, *args)
39
+ super unless Result::ALLOW_NNN_FOR =~ meth.to_s
40
+ @response_code_handler.send(meth, *args)
41
+ end
42
+
43
+ def transform_url_patterns=(patterns)
44
+ @transform_url_patterns = patterns.map do |pattern|
45
+ Array === pattern ? Relevance::Tarantula::Transform.new(*pattern) : pattern
46
+ end
47
+ end
48
+
49
+ def crawl(url = "/")
50
+ orig_links_queued = @links_queued.dup
51
+ orig_form_signatures_queued = @form_signatures_queued.dup
52
+ orig_links_to_crawl = @links_to_crawl.dup
53
+ orig_forms_to_crawl = @forms_to_crawl.dup
54
+ @times_to_crawl.times do |i|
55
+ queue_link url
56
+ do_crawl
57
+
58
+ puts "#{(i+1).ordinalize} crawl" if @times_to_crawl > 1
59
+
60
+ if i + 1 < @times_to_crawl
61
+ @links_queued = orig_links_queued
62
+ @form_signatures_queued = orig_form_signatures_queued
63
+ @links_to_crawl = orig_links_to_crawl
64
+ @forms_to_crawl = orig_forms_to_crawl
65
+ @referrers = {}
66
+ end
67
+ end
68
+ rescue Interrupt
69
+ $stderr.puts "CTRL-C"
70
+ ensure
71
+ report_results
72
+ end
73
+
74
+ def finished?
75
+ @links_to_crawl.empty? && @forms_to_crawl.empty?
76
+ end
77
+
78
+ def do_crawl
79
+ while (!finished?)
80
+ crawl_queued_links
81
+ crawl_queued_forms
82
+ end
83
+ end
84
+
85
+ def crawl_queued_links
86
+ while (link = @links_to_crawl.pop)
87
+ response = proxy.send(link.method, link.href)
88
+ log "Response #{response.code} for #{link}"
89
+ handle_link_results(link, response)
90
+ blip
91
+ end
92
+ end
93
+
94
+ def save_result(result)
95
+ reporters.each do |reporter|
96
+ reporter.report(result)
97
+ end
98
+ end
99
+
100
+ def handle_link_results(link, response)
101
+ handlers.each do |h|
102
+ begin
103
+ save_result h.handle(Result.new(:method => link.method,
104
+ :url => link.href,
105
+ :response => response,
106
+ :log => grab_log!,
107
+ :referrer => referrers[link],
108
+ :test_name => test_name).freeze)
109
+ rescue Exception => e
110
+ log "error handling #{link} #{e.message}"
111
+ # TODO: pass to results
112
+ end
113
+ end
114
+ end
115
+
116
+ def crawl_form(form)
117
+ response = proxy.send(form.method, form.action, form.data)
118
+ log "Response #{response.code} for #{form}"
119
+ response
120
+ rescue ActiveRecord::RecordNotFound => e
121
+ log "Skipping #{form.action}, presumed ok that record is missing"
122
+ Relevance::Tarantula::Response.new(:code => "404", :body => e.message, :content_type => "text/plain")
123
+ end
124
+
125
+ def crawl_queued_forms
126
+ while (form = @forms_to_crawl.pop)
127
+ response = crawl_form(form)
128
+ handle_form_results(form, response)
129
+ blip
130
+ end
131
+ end
132
+
133
+ def grab_log!
134
+ @log_grabber && @log_grabber.grab!
135
+ end
136
+
137
+ def handle_form_results(form, response)
138
+ handlers.each do |h|
139
+ save_result h.handle(Result.new(:method => form.method,
140
+ :url => form.action,
141
+ :response => response,
142
+ :log => grab_log!,
143
+ :referrer => form.action,
144
+ :data => form.data.inspect,
145
+ :test_name => test_name).freeze)
146
+ end
147
+ end
148
+
149
+ def should_skip_url?(url)
150
+ return true if url.blank?
151
+ if @skip_uri_patterns.any? {|pattern| pattern =~ url}
152
+ log "Skipping #{url}"
153
+ return true
154
+ end
155
+ if url.length > max_url_length
156
+ log "Skipping long url #{url}"
157
+ return true
158
+ end
159
+ end
160
+
161
+ def should_skip_link?(link)
162
+ should_skip_url?(link.href) || @links_queued.member?(link)
163
+ end
164
+
165
+ def should_skip_form_submission?(fs)
166
+ should_skip_url?(fs.action) || @form_signatures_queued.member?(fs.signature)
167
+ end
168
+
169
+ def transform_url(url)
170
+ return unless url
171
+ url = @decoder.decode(url)
172
+ @transform_url_patterns.each do |pattern|
173
+ url = pattern[url]
174
+ end
175
+ url
176
+ end
177
+
178
+ def queue_link(dest, referrer = nil)
179
+ dest = Link.new(dest)
180
+ dest.href = transform_url(dest.href)
181
+ return if should_skip_link?(dest)
182
+ @referrers[dest] = referrer if referrer
183
+ @links_to_crawl << dest
184
+ @links_queued << dest
185
+ dest
186
+ end
187
+
188
+ def queue_form(form, referrer = nil)
189
+ fuzzers.each do |fuzzer|
190
+ fuzzer.mutate(Form.new(form)).each do |fs|
191
+ # fs = fuzzer.new(Form.new(form))
192
+ fs.action = transform_url(fs.action)
193
+ return if should_skip_form_submission?(fs)
194
+ @referrers[fs.action] = referrer if referrer
195
+ @forms_to_crawl << fs
196
+ @form_signatures_queued << fs.signature
197
+ end
198
+ end
199
+ end
200
+
201
+ def report_dir
202
+ File.join(rails_root, "tmp", "tarantula")
203
+ end
204
+
205
+ def generate_reports
206
+ errors = []
207
+ reporters.each do |reporter|
208
+ begin
209
+ reporter.finish_report(test_name)
210
+ rescue RuntimeError => e
211
+ errors << e
212
+ end
213
+ end
214
+ unless errors.empty?
215
+ raise errors.map(&:message).join("\n")
216
+ end
217
+ end
218
+
219
+ def report_results
220
+ generate_reports
221
+ end
222
+
223
+ def total_links_count
224
+ @links_queued.size + @form_signatures_queued.size
225
+ end
226
+
227
+ def links_remaining_count
228
+ @links_to_crawl.size + @forms_to_crawl.size
229
+ end
230
+
231
+ def links_completed_count
232
+ total_links_count - links_remaining_count
233
+ end
234
+
235
+ def blip
236
+ unless verbose
237
+ print "\r #{links_completed_count} of #{total_links_count} links completed "
238
+ end
239
+ end
240
+ end