arachni 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (287) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +159 -0
  3. data/LICENSE.md +126 -196
  4. data/README.md +32 -24
  5. data/arachni.gemspec +7 -7
  6. data/components/checks/active/code_injection_timing.rb +3 -3
  7. data/components/checks/active/csrf.rb +2 -2
  8. data/components/checks/active/file_inclusion.rb +6 -7
  9. data/components/checks/active/os_cmd_injection.rb +3 -3
  10. data/components/checks/active/path_traversal.rb +7 -7
  11. data/components/checks/active/response_splitting.rb +9 -4
  12. data/components/checks/active/session_fixation.rb +7 -3
  13. data/components/checks/active/source_code_disclosure.rb +5 -5
  14. data/components/checks/active/unvalidated_redirect.rb +12 -3
  15. data/components/checks/active/unvalidated_redirect_dom.rb +3 -3
  16. data/components/checks/active/xss.rb +23 -10
  17. data/components/checks/active/xss_dom_inputs.rb +113 -11
  18. data/components/checks/active/xxe.rb +3 -3
  19. data/components/checks/passive/backdoors.rb +6 -5
  20. data/components/checks/passive/backup_directories.rb +6 -6
  21. data/components/checks/passive/backup_files.rb +6 -6
  22. data/components/checks/passive/common_admin_interfaces.rb +58 -0
  23. data/components/checks/passive/common_admin_interfaces/admin-panels.txt +49 -0
  24. data/components/checks/passive/common_directories/directories.txt +0 -16
  25. data/components/checks/passive/common_files.rb +6 -5
  26. data/components/checks/passive/common_files/filenames.txt +0 -2
  27. data/components/checks/passive/directory_listing.rb +6 -6
  28. data/components/checks/passive/grep/cookie_set_for_parent_domain.rb +3 -3
  29. data/components/checks/passive/grep/hsts.rb +6 -3
  30. data/components/checks/passive/grep/http_only_cookies.rb +3 -3
  31. data/components/checks/passive/grep/insecure_cookies.rb +2 -2
  32. data/components/checks/passive/grep/insecure_cors_policy.rb +6 -4
  33. data/components/checks/passive/grep/x_frame_options.rb +6 -4
  34. data/components/checks/passive/htaccess_limit.rb +6 -2
  35. data/components/checks/passive/http_put.rb +8 -4
  36. data/components/checks/passive/interesting_responses.rb +3 -2
  37. data/components/checks/passive/localstart_asp.rb +6 -2
  38. data/components/checks/passive/origin_spoof_access_restriction_bypass.rb +5 -1
  39. data/components/checks/passive/xst.rb +6 -2
  40. data/components/fingerprinters/frameworks/aspx_mvc.rb +43 -0
  41. data/components/fingerprinters/frameworks/cakephp.rb +28 -0
  42. data/components/fingerprinters/frameworks/cherrypy.rb +31 -0
  43. data/components/fingerprinters/frameworks/django.rb +33 -0
  44. data/components/fingerprinters/frameworks/jsf.rb +30 -0
  45. data/components/fingerprinters/frameworks/rack.rb +5 -7
  46. data/components/fingerprinters/frameworks/rails.rb +43 -0
  47. data/components/fingerprinters/languages/aspx.rb +11 -11
  48. data/components/fingerprinters/languages/{jsp.rb → java.rb} +11 -7
  49. data/components/fingerprinters/languages/php.rb +6 -6
  50. data/components/fingerprinters/languages/python.rb +14 -6
  51. data/components/fingerprinters/languages/ruby.rb +3 -5
  52. data/components/fingerprinters/servers/apache.rb +5 -4
  53. data/components/fingerprinters/servers/gunicorn.rb +33 -0
  54. data/components/fingerprinters/servers/jetty.rb +1 -1
  55. data/components/fingerprinters/servers/tomcat.rb +11 -4
  56. data/components/path_extractors/anchors.rb +5 -12
  57. data/components/path_extractors/areas.rb +5 -13
  58. data/components/path_extractors/comments.rb +5 -3
  59. data/components/path_extractors/data_url.rb +21 -0
  60. data/components/path_extractors/forms.rb +5 -13
  61. data/components/path_extractors/frames.rb +6 -13
  62. data/components/path_extractors/generic.rb +3 -12
  63. data/components/path_extractors/links.rb +5 -13
  64. data/components/path_extractors/meta_refresh.rb +5 -13
  65. data/components/path_extractors/scripts.rb +8 -14
  66. data/components/plugins/autologin.rb +17 -5
  67. data/components/plugins/defaults/meta/remedies/discovery.rb +11 -29
  68. data/components/plugins/login_script.rb +40 -10
  69. data/components/plugins/metrics.rb +235 -0
  70. data/components/plugins/proxy.rb +21 -4
  71. data/components/plugins/proxy/panel/page_accordion.html.erb +34 -2
  72. data/components/plugins/restrict_to_dom_state.rb +70 -0
  73. data/components/plugins/vector_feed.rb +38 -9
  74. data/components/reporters/plugin_formatters/html/metrics.rb +290 -0
  75. data/components/reporters/plugin_formatters/stdout/metrics.rb +80 -0
  76. data/components/reporters/plugin_formatters/xml/metrics.rb +29 -0
  77. data/components/reporters/stdout.rb +4 -2
  78. data/components/reporters/xml.rb +4 -4
  79. data/components/reporters/xml/schema.xsd +95 -0
  80. data/lib/arachni.rb +2 -0
  81. data/lib/arachni/browser.rb +132 -77
  82. data/lib/arachni/browser/javascript.rb +173 -45
  83. data/lib/arachni/browser/javascript/scripts/dom_monitor.js +81 -6
  84. data/lib/arachni/browser/javascript/scripts/taint_tracer.js +31 -3
  85. data/lib/arachni/browser_cluster.rb +41 -15
  86. data/lib/arachni/browser_cluster/job.rb +4 -0
  87. data/lib/arachni/browser_cluster/jobs/resource_exploration.rb +0 -9
  88. data/lib/arachni/browser_cluster/worker.rb +8 -5
  89. data/lib/arachni/check/auditor.rb +20 -8
  90. data/lib/arachni/check/base.rb +38 -6
  91. data/lib/arachni/element/base.rb +18 -1
  92. data/lib/arachni/element/capabilities/analyzable/differential.rb +0 -1
  93. data/lib/arachni/element/capabilities/analyzable/taint.rb +40 -10
  94. data/lib/arachni/element/capabilities/analyzable/timeout.rb +27 -23
  95. data/lib/arachni/element/capabilities/auditable/dom.rb +22 -0
  96. data/lib/arachni/element/capabilities/inputtable.rb +6 -2
  97. data/lib/arachni/element/capabilities/submittable.rb +1 -1
  98. data/lib/arachni/element/cookie.rb +37 -23
  99. data/lib/arachni/element/cookie/capabilities/mutable.rb +6 -6
  100. data/lib/arachni/element/cookie/dom.rb +0 -8
  101. data/lib/arachni/element/form.rb +28 -14
  102. data/lib/arachni/element/form/capabilities/auditable.rb +2 -2
  103. data/lib/arachni/element/form/capabilities/mutable.rb +5 -5
  104. data/lib/arachni/element/form/dom.rb +0 -8
  105. data/lib/arachni/element/generic_dom.rb +1 -1
  106. data/lib/arachni/element/json.rb +2 -1
  107. data/lib/arachni/element/json/capabilities/inputtable.rb +6 -6
  108. data/lib/arachni/element/json/capabilities/mutable.rb +1 -1
  109. data/lib/arachni/element/link.rb +13 -16
  110. data/lib/arachni/element/link/dom.rb +1 -14
  111. data/lib/arachni/element/link_template.rb +3 -2
  112. data/lib/arachni/element/link_template/dom.rb +0 -16
  113. data/lib/arachni/element/server.rb +51 -9
  114. data/lib/arachni/element/xml.rb +1 -0
  115. data/lib/arachni/ethon/easy.rb +4 -1
  116. data/lib/arachni/framework/parts/audit.rb +26 -77
  117. data/lib/arachni/framework/parts/browser.rb +50 -55
  118. data/lib/arachni/framework/parts/check.rb +4 -3
  119. data/lib/arachni/framework/parts/data.rb +41 -6
  120. data/lib/arachni/framework/parts/state.rb +16 -7
  121. data/lib/arachni/http/client.rb +66 -38
  122. data/lib/arachni/http/client/dynamic_404_handler.rb +46 -14
  123. data/lib/arachni/http/headers.rb +22 -10
  124. data/lib/arachni/http/proxy_server.rb +67 -22
  125. data/lib/arachni/http/proxy_server/ssl-interceptor-cacert.pem +34 -0
  126. data/lib/arachni/http/proxy_server/ssl-interceptor-cakey.pem +51 -0
  127. data/lib/arachni/http/request.rb +71 -18
  128. data/lib/arachni/issue.rb +17 -3
  129. data/lib/arachni/option_groups/browser_cluster.rb +34 -1
  130. data/lib/arachni/option_groups/http.rb +1 -1
  131. data/lib/arachni/page.rb +26 -13
  132. data/lib/arachni/page/dom/transition.rb +2 -2
  133. data/lib/arachni/parser.rb +28 -11
  134. data/lib/arachni/platform/fingerprinter.rb +5 -0
  135. data/lib/arachni/platform/manager.rb +65 -32
  136. data/lib/arachni/plugin/base.rb +8 -0
  137. data/lib/arachni/processes/instances.rb +25 -11
  138. data/lib/arachni/reporter/manager.rb +2 -2
  139. data/lib/arachni/rpc/client/instance.rb +4 -0
  140. data/lib/arachni/rpc/server/framework/master.rb +3 -3
  141. data/lib/arachni/rpc/server/framework/multi_instance.rb +0 -8
  142. data/lib/arachni/rpc/server/instance.rb +2 -1
  143. data/lib/arachni/ruby/array.rb +5 -0
  144. data/lib/arachni/ruby/hash.rb +5 -0
  145. data/lib/arachni/ruby/string.rb +2 -3
  146. data/lib/arachni/session.rb +32 -6
  147. data/lib/arachni/state/framework.rb +6 -2
  148. data/lib/arachni/support/cache.rb +1 -0
  149. data/lib/arachni/support/cache/base.rb +12 -8
  150. data/lib/arachni/support/cache/least_recently_pushed.rb +29 -0
  151. data/lib/arachni/support/cache/least_recently_used.rb +5 -8
  152. data/lib/arachni/support/cache/preference.rb +1 -1
  153. data/lib/arachni/support/cache/random_replacement.rb +1 -25
  154. data/lib/arachni/support/database/queue.rb +21 -8
  155. data/lib/arachni/support/lookup/base.rb +7 -1
  156. data/lib/arachni/support/mixins/observable.rb +3 -1
  157. data/lib/arachni/support/profiler.rb +51 -10
  158. data/lib/arachni/support/signature.rb +11 -2
  159. data/lib/arachni/trainer.rb +8 -2
  160. data/lib/arachni/uri.rb +28 -25
  161. data/lib/arachni/uri/scope.rb +1 -1
  162. data/lib/arachni/utilities.rb +8 -0
  163. data/lib/arachni/watir/element.rb +1 -1
  164. data/lib/version +1 -1
  165. data/spec/arachni/browser/javascript/dom_monitor_spec.rb +388 -53
  166. data/spec/arachni/browser/javascript/taint_tracer_spec.rb +41 -0
  167. data/spec/arachni/browser/javascript_spec.rb +235 -61
  168. data/spec/arachni/browser_cluster/jobs/resource_exploration_spec.rb +0 -9
  169. data/spec/arachni/browser_cluster_spec.rb +58 -10
  170. data/spec/arachni/browser_spec.rb +170 -26
  171. data/spec/arachni/check/auditor_spec.rb +22 -3
  172. data/spec/arachni/check/base_spec.rb +84 -0
  173. data/spec/arachni/element/body_spec.rb +1 -1
  174. data/spec/arachni/element/capabilities/analyzable/taint_spec.rb +3 -3
  175. data/spec/arachni/element/capabilities/analyzable/timeout_spec.rb +1 -1
  176. data/spec/arachni/element/cookie/dom_spec.rb +0 -9
  177. data/spec/arachni/element/cookie_spec.rb +85 -0
  178. data/spec/arachni/element/form/dom_spec.rb +0 -9
  179. data/spec/arachni/element/form_spec.rb +46 -3
  180. data/spec/arachni/element/json_spec.rb +20 -0
  181. data/spec/arachni/element/link/dom_spec.rb +0 -9
  182. data/spec/arachni/element/link_spec.rb +40 -15
  183. data/spec/arachni/element/link_template/dom_spec.rb +0 -8
  184. data/spec/arachni/element/link_template_spec.rb +2 -6
  185. data/spec/arachni/element/server_spec.rb +94 -8
  186. data/spec/arachni/element/xml_spec.rb +20 -0
  187. data/spec/arachni/framework/parts/audit_spec.rb +12 -14
  188. data/spec/arachni/framework/parts/browser_spec.rb +0 -171
  189. data/spec/arachni/framework/parts/platform_spec.rb +14 -8
  190. data/spec/arachni/framework/parts/report_spec.rb +1 -1
  191. data/spec/arachni/framework/parts/state_spec.rb +0 -9
  192. data/spec/arachni/http/client/dynamic_404_handlers_spec.rb +19 -0
  193. data/spec/arachni/http/client_spec.rb +169 -42
  194. data/spec/arachni/http/headers_spec.rb +18 -0
  195. data/spec/arachni/http/request_spec.rb +23 -0
  196. data/spec/arachni/issue_spec.rb +17 -6
  197. data/spec/arachni/page_spec.rb +22 -2
  198. data/spec/arachni/parser_spec.rb +5 -0
  199. data/spec/arachni/platform/manager_spec.rb +57 -25
  200. data/spec/arachni/reporter/manager_spec.rb +26 -0
  201. data/spec/arachni/rpc/server/active_options_spec.rb +9 -4
  202. data/spec/arachni/state/framework_spec.rb +2 -8
  203. data/spec/arachni/support/cache/least_recently_pushed_spec.rb +90 -0
  204. data/spec/arachni/support/cache/least_recently_used_spec.rb +5 -13
  205. data/spec/arachni/support/database/queue_spec.rb +7 -0
  206. data/spec/arachni/support/mixins/observable_spec.rb +15 -1
  207. data/spec/arachni/trainer_spec.rb +2 -2
  208. data/spec/components/checks/active/code_injection_timing_spec.rb +1 -1
  209. data/spec/components/checks/active/file_inclusion_spec.rb +6 -6
  210. data/spec/components/checks/active/path_traversal_spec.rb +2 -2
  211. data/spec/components/checks/active/source_code_disclosure_spec.rb +2 -2
  212. data/spec/components/checks/active/unvalidated_redirect_spec.rb +6 -6
  213. data/spec/components/checks/active/xss_dom_inputs_spec.rb +3 -5
  214. data/spec/components/checks/active/xss_dom_script_context_spec.rb +1 -1
  215. data/spec/components/checks/active/xss_spec.rb +5 -5
  216. data/spec/components/checks/passive/common_admin_interfaces_spec.rb +15 -0
  217. data/spec/components/checks/passive/interesting_responses_spec.rb +14 -1
  218. data/spec/components/fingerprinters/frameworks/aspx_mvc_spec.rb +31 -0
  219. data/spec/components/fingerprinters/frameworks/cakephp_spec.rb +22 -0
  220. data/spec/components/fingerprinters/frameworks/cherrypy_spec.rb +28 -0
  221. data/spec/components/fingerprinters/frameworks/django_spec.rb +37 -0
  222. data/spec/components/fingerprinters/frameworks/jsf_spec.rb +27 -0
  223. data/spec/components/fingerprinters/frameworks/rack_spec.rb +11 -14
  224. data/spec/components/fingerprinters/frameworks/rails_spec.rb +53 -0
  225. data/spec/components/fingerprinters/languages/asp_spec.rb +7 -9
  226. data/spec/components/fingerprinters/languages/aspx_spec.rb +10 -24
  227. data/spec/components/fingerprinters/languages/java_spec.rb +88 -0
  228. data/spec/components/fingerprinters/languages/php_spec.rb +19 -12
  229. data/spec/components/fingerprinters/languages/python_spec.rb +22 -9
  230. data/spec/components/fingerprinters/languages/ruby.rb +6 -4
  231. data/spec/components/fingerprinters/os/bsd_spec.rb +6 -4
  232. data/spec/components/fingerprinters/os/linux_spec.rb +6 -4
  233. data/spec/components/fingerprinters/os/solaris_spec.rb +6 -4
  234. data/spec/components/fingerprinters/os/unix_spec.rb +6 -4
  235. data/spec/components/fingerprinters/os/windows_spec.rb +6 -4
  236. data/spec/components/fingerprinters/servers/apache_spec.rb +15 -4
  237. data/spec/components/fingerprinters/servers/gunicorn_spec.rb +28 -0
  238. data/spec/components/fingerprinters/servers/iis_spec.rb +6 -6
  239. data/spec/components/fingerprinters/servers/jetty_spec.rb +6 -6
  240. data/spec/components/fingerprinters/servers/nginx_spec.rb +6 -4
  241. data/spec/components/fingerprinters/servers/tomcat_spec.rb +15 -6
  242. data/spec/components/path_extractors/data_url_spec.rb +19 -0
  243. data/spec/components/plugins/autologin_spec.rb +23 -0
  244. data/spec/components/plugins/login_script_spec.rb +112 -24
  245. data/spec/components/plugins/restrict_to_dom_state_spec.rb +16 -0
  246. data/spec/components/plugins/vector_feed_spec.rb +39 -1
  247. data/spec/support/factories/page/dom.rb +9 -4
  248. data/spec/support/factories/page/dom/transition.rb +31 -9
  249. data/spec/support/factories/scan_report.rb +8 -6
  250. data/spec/support/fixtures/empty/placeholder +0 -0
  251. data/spec/support/fixtures/report.afr +0 -0
  252. data/spec/support/fixtures/reporters/manager_spec/error.rb +18 -0
  253. data/spec/support/servers/arachni/browser.rb +117 -11
  254. data/spec/support/servers/arachni/browser/javascript/dom_monitor.rb +148 -4
  255. data/spec/support/servers/arachni/check/auditor.rb +4 -0
  256. data/spec/support/servers/arachni/element/cookie/cookie_dom.rb +1 -1
  257. data/spec/support/servers/arachni/http/client.rb +5 -0
  258. data/spec/support/servers/arachni/http/client/dynamic_404_handler.rb +13 -0
  259. data/spec/support/servers/checks/active/code_injection_timing.rb +1 -1
  260. data/spec/support/servers/checks/active/file_inclusion.rb +2 -2
  261. data/spec/support/servers/checks/active/path_traversal.rb +2 -2
  262. data/spec/support/servers/checks/active/source_code_disclosure.rb +40 -33
  263. data/spec/support/servers/checks/active/trainer_check.rb +9 -10
  264. data/spec/support/servers/checks/active/unvalidated_redirect_dom.rb +7 -4
  265. data/spec/support/servers/checks/active/xss.rb +35 -0
  266. data/spec/support/servers/checks/active/xss_dom.rb +1 -1
  267. data/spec/support/servers/checks/active/xss_dom_inputs.rb +24 -0
  268. data/spec/support/servers/checks/active/xss_dom_script_context.rb +1 -1
  269. data/spec/support/servers/checks/passive/common_admin_interfaces.rb +6 -0
  270. data/spec/support/servers/plugins/autologin.rb +9 -0
  271. data/spec/support/servers/plugins/restrict_to_dom_state.rb +4 -0
  272. data/spec/support/shared/element/base.rb +42 -0
  273. data/spec/support/shared/element/capabilities/auditable.rb +4 -4
  274. data/spec/support/shared/element/capabilities/auditable/dom.rb +26 -0
  275. data/spec/support/shared/element/capabilities/inputtable.rb +16 -11
  276. data/spec/support/shared/element/capabilities/submitable.rb +7 -2
  277. data/spec/support/shared/fingerprinter.rb +8 -0
  278. data/spec/support/shared/path_extractor.rb +1 -1
  279. data/ui/cli/framework.rb +3 -3
  280. data/ui/cli/framework/option_parser.rb +9 -0
  281. data/ui/cli/output.rb +9 -0
  282. data/ui/cli/reporter.rb +5 -2
  283. data/ui/cli/utilities.rb +4 -2
  284. metadata +76 -17
  285. data/lib/arachni/http/proxy_server/ssl-interceptor-cert.pem +0 -34
  286. data/lib/arachni/http/proxy_server/ssl-interceptor-pkey.pem +0 -51
  287. data/spec/components/fingerprinters/languages/jsp_spec.rb +0 -56
@@ -30,18 +30,20 @@ module Browser
30
30
  state.set_status_message :browser_cluster_startup
31
31
  end
32
32
 
33
- @browser_cluster ||= BrowserCluster.new
33
+ @browser_cluster ||= BrowserCluster.new(
34
+ on_pop: proc do
35
+ next if !pause?
36
+
37
+ print_debug 'Blocking browser cluster on pop.'
38
+ wait_if_paused
39
+ end
40
+ )
34
41
  state.clear_status_messages
42
+
35
43
  @browser_cluster
36
44
  end
37
45
  end
38
46
 
39
- def browser
40
- return if !use_browsers?
41
-
42
- @browser ||= Arachni::Browser.new( store_pages: false )
43
- end
44
-
45
47
  # @return [Bool]
46
48
  # `true` if the environment has a browser, `false` otherwise.
47
49
  def host_has_browser?
@@ -58,36 +60,6 @@ module Browser
58
60
  browser_cluster.skip_states( browser_job.id )
59
61
  end
60
62
 
61
- # @private
62
- def apply_dom_metadata( page )
63
- return false if page.dom.depth > 0 || !page.has_script? ||
64
- !browser
65
-
66
- # This optimization only affects Form::DOM elements, so don't bother
67
- # if none of the checks are interested in any of them.
68
- return false if !checks.values.find do |c|
69
- c.check? page, [Element::Form::DOM, Element::Cookie::DOM]
70
- end
71
-
72
- begin
73
- bp = browser.load( page ).to_page
74
- rescue Selenium::WebDriver::Error::WebDriverError,
75
- Watir::Exception::Error => e
76
- print_debug "Could not apply metadata to '#{page.dom.url}'" <<
77
- " because: #{e} [#{e.class}"
78
- return
79
- end
80
-
81
- # Request timeout or some other failure...
82
- return if bp.code == 0
83
-
84
- page.import_metadata( bp, :skip_dom )
85
-
86
- true
87
- ensure
88
- browser.clear_buffers if browser
89
- end
90
-
91
63
  def use_browsers?
92
64
  options.browser_cluster.pool_size > 0 &&
93
65
  options.scope.dom_depth_limit > 0 && host_has_browser?
@@ -95,13 +67,6 @@ module Browser
95
67
 
96
68
  private
97
69
 
98
- def shutdown_browser
99
- return if !@browser
100
-
101
- @browser.shutdown
102
- @browser = nil
103
- end
104
-
105
70
  def shutdown_browser_cluster
106
71
  return if !@browser_cluster
107
72
 
@@ -125,11 +90,6 @@ module Browser
125
90
  synchronize do
126
91
  return if !push_to_page_queue page
127
92
 
128
- pushed_paths = nil
129
- if crawl?
130
- pushed_paths = push_paths_from_page( page ).size
131
- end
132
-
133
93
  print_status "Got new page from the browser-cluster: #{page.dom.url}"
134
94
  print_info "DOM depth: #{page.dom.depth} (Limit: #{options.scope.dom_depth_limit})"
135
95
 
@@ -137,10 +97,6 @@ module Browser
137
97
  print_info ' Transitions:'
138
98
  page.dom.print_transitions( method(:print_info), ' ' )
139
99
  end
140
-
141
- if pushed_paths
142
- print_info " -- Analysis resulted in #{pushed_paths} usable paths."
143
- end
144
100
  end
145
101
  end
146
102
 
@@ -154,13 +110,52 @@ module Browser
154
110
  Options.scope.dom_depth_limit.to_i < page.dom.depth + 1 ||
155
111
  !page.has_script?
156
112
 
157
- browser_cluster.queue( browser_job.forward( resource: page ) ) do |response|
158
- handle_browser_page response.page
113
+ # We need to schedule a separate job for applying metadata because it
114
+ # needs to have a clean state.
115
+ schedule_dom_metadata_application( page )
116
+
117
+ browser_cluster.queue( browser_job.forward( resource: page ) ) do |result|
118
+ handle_browser_page result.page
159
119
  end
160
120
 
161
121
  true
162
122
  end
163
123
 
124
+ def schedule_dom_metadata_application( page )
125
+ return if page.dom.depth > 0
126
+ return if page.metadata.map { |_, data| data['skip_dom'].values }.
127
+ flatten.compact.any?
128
+
129
+ # This optimization only affects Form & Cookie DOM elements,
130
+ # so don't bother if none of the checks are interested in them.
131
+ return if !checks.values.
132
+ find { |c| c.check? page, [Element::Form::DOM, Element::Cookie::DOM], true }
133
+
134
+ page.clear_cache
135
+
136
+ browser_cluster.with_browser do |browser|
137
+ apply_dom_metadata( browser, page )
138
+ end
139
+ end
140
+
141
+ def apply_dom_metadata( browser, page )
142
+ bp = nil
143
+
144
+ begin
145
+ bp = browser.load( page ).to_page
146
+ rescue Selenium::WebDriver::Error::WebDriverError,
147
+ Watir::Exception::Error => e
148
+ print_debug "Could not apply metadata to '#{page.dom.url}'" <<
149
+ " because: #{e} [#{e.class}"
150
+ return
151
+ end
152
+
153
+ # Request timeout or some other failure...
154
+ return if bp.code == 0
155
+
156
+ handle_browser_page bp
157
+ end
158
+
164
159
  def browser_job
165
160
  # We'll recycle the same job since all of them will have the same
166
161
  # callback. This will force the BrowserCluster to use the same block
@@ -67,11 +67,12 @@ module Check
67
67
  # Check to run.
68
68
  # @param [Page] page
69
69
  def check_page( check, page )
70
+ ps = page.platforms.to_a
71
+
70
72
  # If we've been given platforms which the check doesn't support don't
71
73
  # even bother running it.
72
- if !check.supports_platforms?( Options.platforms )
73
- print_info "Check #{check.shortname} does not support: " <<
74
- Options.platforms.join( ' + ' )
74
+ if !check.supports_platforms?( ps )
75
+ print_info "Check #{check.shortname} does not support: #{ps.join( ' + ' )}"
75
76
  return false
76
77
  end
77
78
 
@@ -27,8 +27,9 @@ module Data
27
27
  # `true` if push was successful, `false` if the `page` matched any
28
28
  # exclusion criteria or has already been seen.
29
29
  def push_to_page_queue( page, force = false )
30
- return false if !force && (!accepts_more_pages? || state.page_seen?( page ) ||
31
- page.scope.out? || page.scope.redundant?( true ))
30
+ return false if !force && (!accepts_more_pages? ||
31
+ state.page_seen?( page ) || page.scope.out? ||
32
+ page.scope.redundant?( true ))
32
33
 
33
34
  # We want to update from the already loaded page cache (if there is one)
34
35
  # as we have to store the page anyways (needs to go through Browser analysis)
@@ -40,6 +41,7 @@ module Data
40
41
  # some other component; however, it wouldn't be the end of the world if
41
42
  # that were to happen.
42
43
  ElementFilter.update_from_page_cache page
44
+ page.clear_cache
43
45
 
44
46
  data.push_to_page_queue page
45
47
  state.page_seen page
@@ -99,12 +101,24 @@ module Data
99
101
  !url_queue.empty? || !page_queue.empty?
100
102
  end
101
103
 
104
+ # @return [Page, nil]
105
+ # A page if the queues aren't empty, `nil` otherwise.
106
+ def pop_page
107
+ pop_page_from_queue || pop_page_from_url_queue
108
+ end
109
+
110
+ # @return [Page, nil]
111
+ # A page if the queue wasn't empty, `nil` otherwise.
102
112
  def pop_page_from_url_queue( &block )
103
113
  return if url_queue.empty?
104
114
 
105
115
  grabbed_page = nil
106
- Page.from_url( url_queue.pop,
107
- http: { update_cookies: true, performer: self }
116
+ Page.from_url(
117
+ url_queue.pop,
118
+ http: {
119
+ update_cookies: true,
120
+ performer: self
121
+ }
108
122
  ) do |page|
109
123
  @retries[page.url.hash] ||= 0
110
124
 
@@ -125,7 +139,8 @@ module Data
125
139
  @failures << page.url
126
140
 
127
141
  print_error "Giving up trying to audit: #{page.url}"
128
- print_error "Couldn't get a response after #{AUDIT_PAGE_MAX_TRIES} tries: #{page.response.return_message}."
142
+ print_error "Couldn't get a response after #{AUDIT_PAGE_MAX_TRIES}" +
143
+ " tries: #{page.response.return_message}."
129
144
  else
130
145
  print_bad "Retrying for: #{page.url} [#{page.response.return_message}]"
131
146
  @retries[page.url.hash] += 1
@@ -135,16 +150,36 @@ module Data
135
150
  grabbed_page = nil
136
151
  block.call grabbed_page if block_given?
137
152
  end
153
+
138
154
  http.run if !block_given?
139
155
  grabbed_page
140
156
  end
141
157
 
142
- # @return [Page]
158
+ # @return [Page, nil]
159
+ # A page if the queue wasn't empty, `nil` otherwise.
143
160
  def pop_page_from_queue
144
161
  return if page_queue.empty?
145
162
  page_queue.pop
146
163
  end
147
164
 
165
+ def replenish_page_queue_from_url_queue
166
+ return if !page_queue.empty?
167
+
168
+ # Number pulled out of my ass, low enough to not add any noticeable
169
+ # stress, hopefully high enough to grab us at least one page that has
170
+ # some workload which will result in HTTP requests which will mask the
171
+ # next replenishing operation.
172
+ [10, page_queue.free_buffer_size].min.times do
173
+ return if url_queue.empty?
174
+
175
+ # We push directly to the queue instead of using #push_to_page_queue
176
+ # because it's too early to deduplicate.
177
+ pop_page_from_url_queue { |p| page_queue << p }
178
+ end
179
+
180
+ !url_queue.empty?
181
+ end
182
+
148
183
  def add_to_sitemap( page )
149
184
  data.add_page_to_sitemap( page )
150
185
  end
@@ -65,6 +65,14 @@ module State
65
65
  def initialize
66
66
  super
67
67
 
68
+ Element::Capabilities::Auditable.skip_like do |element|
69
+ if pause?
70
+ print_debug "Blocking on element audit: #{element.audit_id}"
71
+ end
72
+
73
+ wait_if_paused
74
+ end
75
+
68
76
  state.status = :ready
69
77
  end
70
78
 
@@ -96,13 +104,14 @@ module State
96
104
  return if @cleaned_up
97
105
  @cleaned_up = true
98
106
 
107
+ state.force_resume
108
+
99
109
  state.status = :cleanup
100
110
 
101
111
  sitemap.merge!( browser_sitemap )
102
112
 
103
113
  if shutdown_browsers
104
114
  state.set_status_message :browser_cluster_shutdown
105
- shutdown_browser
106
115
  shutdown_browser_cluster
107
116
  end
108
117
 
@@ -128,6 +137,11 @@ module State
128
137
  true
129
138
  end
130
139
 
140
+ # @private
141
+ def reset_trainer
142
+ @trainer = Trainer.new( self )
143
+ end
144
+
131
145
  # @note Prefer this from {.reset} if you already have an instance.
132
146
  # @note You should first reset {Arachni::Options}.
133
147
  #
@@ -319,11 +333,6 @@ module State
319
333
  state.suspended?
320
334
  end
321
335
 
322
- # @private
323
- def reset_trainer
324
- @trainer = Trainer.new( self )
325
- end
326
-
327
336
  private
328
337
 
329
338
  # @note Must be called before calling any audit methods.
@@ -370,7 +379,7 @@ module State
370
379
  new_element = true
371
380
  end
372
381
 
373
- if e.respond_to?( :dom ) && e.dom
382
+ if page.dom.depth > 0 && e.respond_to?( :dom ) && e.dom
374
383
  if !state.element_checked?( e.dom )
375
384
  state.element_checked e.dom
376
385
  new_element = true
@@ -140,7 +140,11 @@ class Client
140
140
  headers.merge!( Options.http.request_headers )
141
141
 
142
142
  cookie_jar.load( Options.http.cookie_jar_filepath ) if Options.http.cookie_jar_filepath
143
- update_cookies( Options.http.cookies )
143
+
144
+ Options.http.cookies.each do |name, value|
145
+ update_cookies( name => value )
146
+ end
147
+
144
148
  update_cookies( Options.http.cookie_string ) if Options.http.cookie_string
145
149
 
146
150
  reset_burst_info
@@ -465,14 +469,30 @@ class Client
465
469
  @running = true
466
470
 
467
471
  reset_burst_info
472
+
473
+ # Lots of new objects are about to be generated, make sure that old ones
474
+ # have been collected to prevent RAM spikes.
475
+ gc
476
+
468
477
  client_run
469
478
 
479
+ # Collect the new objects as well.
480
+ gc
481
+
470
482
  @queue_size = 0
471
483
  @running = false
472
484
 
473
485
  @burst_runtime += Time.now - @burst_runtime_start
474
486
  @total_runtime += @burst_runtime
475
487
  end
488
+
489
+ def gc
490
+ # Don't GC after every little run, only do it when we're past the
491
+ # maximum queue size.
492
+ return if @queue_size < Options.http.request_queue_size
493
+
494
+ GC.start
495
+ end
476
496
 
477
497
  def reset_burst_info
478
498
  @burst_response_time_sum = 0
@@ -501,46 +521,12 @@ class Client
501
521
  print_debug_level_3 "Headers: #{request.headers}"
502
522
  print_debug_level_3 "Cookies: #{request.cookies}"
503
523
  print_debug_level_3 "Train?: #{request.train?}"
524
+ print_debug_level_3 "Fingerprint?: #{request.fingerprint?}"
504
525
  print_debug_level_3 '------------'
505
526
  end
506
527
 
507
528
  if add_callbacks
508
- request.on_complete do |response|
509
- synchronize do
510
- @response_count += 1
511
- @burst_response_count += 1
512
- @burst_response_time_sum += response.time
513
- @total_response_time_sum += response.time
514
-
515
- if Platform::Manager.fingerprint?( response )
516
- # Force a fingerprint by converting the Response to a Page object.
517
- response.to_page
518
- end
519
-
520
- notify_on_complete( response )
521
-
522
- parse_and_set_cookies( response ) if request.update_cookies?
523
-
524
- if debug_level_3?
525
- print_debug_level_3 '------------'
526
- print_debug_level_3 "Got response for request ID#: #{response.request.id}"
527
- print_debug_level_3 "Performer: #{response.request.performer.inspect}"
528
- print_debug_level_3 "Status: #{response.code}"
529
- print_debug_level_3 "Code: #{response.return_code}"
530
- print_debug_level_3 "Message: #{response.return_message}"
531
- print_debug_level_3 "URL: #{response.url}"
532
- print_debug_level_3 "Headers:\n#{response.headers_string}"
533
- print_debug_level_3 "Parsed headers: #{response.headers}"
534
- end
535
-
536
- if response.timed_out?
537
- print_debug_level_3 "Request timed-out! -- ID# #{response.request.id}"
538
- @time_out_count += 1
539
- end
540
-
541
- print_debug_level_3 '------------'
542
- end
543
- end
529
+ request.on_complete( &method(:global_on_complete) )
544
530
  end
545
531
 
546
532
  synchronize { @request_count += 1 }
@@ -559,6 +545,47 @@ class Client
559
545
  request
560
546
  end
561
547
 
548
+ def global_on_complete( response )
549
+ request = response.request
550
+
551
+ synchronize do
552
+ @response_count += 1
553
+ @burst_response_count += 1
554
+ @burst_response_time_sum += response.time
555
+ @total_response_time_sum += response.time
556
+
557
+ if response.request.fingerprint? &&
558
+ Platform::Manager.fingerprint?( response )
559
+
560
+ # Force a fingerprint by converting the Response to a Page object.
561
+ response.to_page
562
+ end
563
+
564
+ notify_on_complete( response )
565
+
566
+ parse_and_set_cookies( response ) if request.update_cookies?
567
+
568
+ if debug_level_3?
569
+ print_debug_level_3 '------------'
570
+ print_debug_level_3 "Got response for request ID#: #{response.request.id}\n#{response.request}"
571
+ print_debug_level_3 "Performer: #{response.request.performer.inspect}"
572
+ print_debug_level_3 "Status: #{response.code}"
573
+ print_debug_level_3 "Code: #{response.return_code}"
574
+ print_debug_level_3 "Message: #{response.return_message}"
575
+ print_debug_level_3 "URL: #{response.url}"
576
+ print_debug_level_3 "Headers:\n#{response.headers_string}"
577
+ print_debug_level_3 "Parsed headers: #{response.headers}"
578
+ end
579
+
580
+ if response.timed_out?
581
+ print_debug_level_3 "Request timed-out! -- ID# #{response.request.id}"
582
+ @time_out_count += 1
583
+ end
584
+
585
+ print_debug_level_3 '------------'
586
+ end
587
+ end
588
+
562
589
  def client_initialize
563
590
  @hydra = Typhoeus::Hydra.new(
564
591
  max_concurrency: Options.http.request_concurrency || MAX_CONCURRENCY
@@ -566,7 +593,8 @@ class Client
566
593
  end
567
594
 
568
595
  def client_run
569
- @hydra.run
596
+ # Can get Ethon select errors.
597
+ exception_jail( false ) { @hydra.run }
570
598
  end
571
599
 
572
600
  def client_abort