arachni 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +159 -0
  3. data/LICENSE.md +126 -196
  4. data/README.md +32 -24
  5. data/arachni.gemspec +7 -7
  6. data/components/checks/active/code_injection_timing.rb +3 -3
  7. data/components/checks/active/csrf.rb +2 -2
  8. data/components/checks/active/file_inclusion.rb +6 -7
  9. data/components/checks/active/os_cmd_injection.rb +3 -3
  10. data/components/checks/active/path_traversal.rb +7 -7
  11. data/components/checks/active/response_splitting.rb +9 -4
  12. data/components/checks/active/session_fixation.rb +7 -3
  13. data/components/checks/active/source_code_disclosure.rb +5 -5
  14. data/components/checks/active/unvalidated_redirect.rb +12 -3
  15. data/components/checks/active/unvalidated_redirect_dom.rb +3 -3
  16. data/components/checks/active/xss.rb +23 -10
  17. data/components/checks/active/xss_dom_inputs.rb +113 -11
  18. data/components/checks/active/xxe.rb +3 -3
  19. data/components/checks/passive/backdoors.rb +6 -5
  20. data/components/checks/passive/backup_directories.rb +6 -6
  21. data/components/checks/passive/backup_files.rb +6 -6
  22. data/components/checks/passive/common_admin_interfaces.rb +58 -0
  23. data/components/checks/passive/common_admin_interfaces/admin-panels.txt +49 -0
  24. data/components/checks/passive/common_directories/directories.txt +0 -16
  25. data/components/checks/passive/common_files.rb +6 -5
  26. data/components/checks/passive/common_files/filenames.txt +0 -2
  27. data/components/checks/passive/directory_listing.rb +6 -6
  28. data/components/checks/passive/grep/cookie_set_for_parent_domain.rb +3 -3
  29. data/components/checks/passive/grep/hsts.rb +6 -3
  30. data/components/checks/passive/grep/http_only_cookies.rb +3 -3
  31. data/components/checks/passive/grep/insecure_cookies.rb +2 -2
  32. data/components/checks/passive/grep/insecure_cors_policy.rb +6 -4
  33. data/components/checks/passive/grep/x_frame_options.rb +6 -4
  34. data/components/checks/passive/htaccess_limit.rb +6 -2
  35. data/components/checks/passive/http_put.rb +8 -4
  36. data/components/checks/passive/interesting_responses.rb +3 -2
  37. data/components/checks/passive/localstart_asp.rb +6 -2
  38. data/components/checks/passive/origin_spoof_access_restriction_bypass.rb +5 -1
  39. data/components/checks/passive/xst.rb +6 -2
  40. data/components/fingerprinters/frameworks/aspx_mvc.rb +43 -0
  41. data/components/fingerprinters/frameworks/cakephp.rb +28 -0
  42. data/components/fingerprinters/frameworks/cherrypy.rb +31 -0
  43. data/components/fingerprinters/frameworks/django.rb +33 -0
  44. data/components/fingerprinters/frameworks/jsf.rb +30 -0
  45. data/components/fingerprinters/frameworks/rack.rb +5 -7
  46. data/components/fingerprinters/frameworks/rails.rb +43 -0
  47. data/components/fingerprinters/languages/aspx.rb +11 -11
  48. data/components/fingerprinters/languages/{jsp.rb → java.rb} +11 -7
  49. data/components/fingerprinters/languages/php.rb +6 -6
  50. data/components/fingerprinters/languages/python.rb +14 -6
  51. data/components/fingerprinters/languages/ruby.rb +3 -5
  52. data/components/fingerprinters/servers/apache.rb +5 -4
  53. data/components/fingerprinters/servers/gunicorn.rb +33 -0
  54. data/components/fingerprinters/servers/jetty.rb +1 -1
  55. data/components/fingerprinters/servers/tomcat.rb +11 -4
  56. data/components/path_extractors/anchors.rb +5 -12
  57. data/components/path_extractors/areas.rb +5 -13
  58. data/components/path_extractors/comments.rb +5 -3
  59. data/components/path_extractors/data_url.rb +21 -0
  60. data/components/path_extractors/forms.rb +5 -13
  61. data/components/path_extractors/frames.rb +6 -13
  62. data/components/path_extractors/generic.rb +3 -12
  63. data/components/path_extractors/links.rb +5 -13
  64. data/components/path_extractors/meta_refresh.rb +5 -13
  65. data/components/path_extractors/scripts.rb +8 -14
  66. data/components/plugins/autologin.rb +17 -5
  67. data/components/plugins/defaults/meta/remedies/discovery.rb +11 -29
  68. data/components/plugins/login_script.rb +40 -10
  69. data/components/plugins/metrics.rb +235 -0
  70. data/components/plugins/proxy.rb +21 -4
  71. data/components/plugins/proxy/panel/page_accordion.html.erb +34 -2
  72. data/components/plugins/restrict_to_dom_state.rb +70 -0
  73. data/components/plugins/vector_feed.rb +38 -9
  74. data/components/reporters/plugin_formatters/html/metrics.rb +290 -0
  75. data/components/reporters/plugin_formatters/stdout/metrics.rb +80 -0
  76. data/components/reporters/plugin_formatters/xml/metrics.rb +29 -0
  77. data/components/reporters/stdout.rb +4 -2
  78. data/components/reporters/xml.rb +4 -4
  79. data/components/reporters/xml/schema.xsd +95 -0
  80. data/lib/arachni.rb +2 -0
  81. data/lib/arachni/browser.rb +132 -77
  82. data/lib/arachni/browser/javascript.rb +173 -45
  83. data/lib/arachni/browser/javascript/scripts/dom_monitor.js +81 -6
  84. data/lib/arachni/browser/javascript/scripts/taint_tracer.js +31 -3
  85. data/lib/arachni/browser_cluster.rb +41 -15
  86. data/lib/arachni/browser_cluster/job.rb +4 -0
  87. data/lib/arachni/browser_cluster/jobs/resource_exploration.rb +0 -9
  88. data/lib/arachni/browser_cluster/worker.rb +8 -5
  89. data/lib/arachni/check/auditor.rb +20 -8
  90. data/lib/arachni/check/base.rb +38 -6
  91. data/lib/arachni/element/base.rb +18 -1
  92. data/lib/arachni/element/capabilities/analyzable/differential.rb +0 -1
  93. data/lib/arachni/element/capabilities/analyzable/taint.rb +40 -10
  94. data/lib/arachni/element/capabilities/analyzable/timeout.rb +27 -23
  95. data/lib/arachni/element/capabilities/auditable/dom.rb +22 -0
  96. data/lib/arachni/element/capabilities/inputtable.rb +6 -2
  97. data/lib/arachni/element/capabilities/submittable.rb +1 -1
  98. data/lib/arachni/element/cookie.rb +37 -23
  99. data/lib/arachni/element/cookie/capabilities/mutable.rb +6 -6
  100. data/lib/arachni/element/cookie/dom.rb +0 -8
  101. data/lib/arachni/element/form.rb +28 -14
  102. data/lib/arachni/element/form/capabilities/auditable.rb +2 -2
  103. data/lib/arachni/element/form/capabilities/mutable.rb +5 -5
  104. data/lib/arachni/element/form/dom.rb +0 -8
  105. data/lib/arachni/element/generic_dom.rb +1 -1
  106. data/lib/arachni/element/json.rb +2 -1
  107. data/lib/arachni/element/json/capabilities/inputtable.rb +6 -6
  108. data/lib/arachni/element/json/capabilities/mutable.rb +1 -1
  109. data/lib/arachni/element/link.rb +13 -16
  110. data/lib/arachni/element/link/dom.rb +1 -14
  111. data/lib/arachni/element/link_template.rb +3 -2
  112. data/lib/arachni/element/link_template/dom.rb +0 -16
  113. data/lib/arachni/element/server.rb +51 -9
  114. data/lib/arachni/element/xml.rb +1 -0
  115. data/lib/arachni/ethon/easy.rb +4 -1
  116. data/lib/arachni/framework/parts/audit.rb +26 -77
  117. data/lib/arachni/framework/parts/browser.rb +50 -55
  118. data/lib/arachni/framework/parts/check.rb +4 -3
  119. data/lib/arachni/framework/parts/data.rb +41 -6
  120. data/lib/arachni/framework/parts/state.rb +16 -7
  121. data/lib/arachni/http/client.rb +66 -38
  122. data/lib/arachni/http/client/dynamic_404_handler.rb +46 -14
  123. data/lib/arachni/http/headers.rb +22 -10
  124. data/lib/arachni/http/proxy_server.rb +67 -22
  125. data/lib/arachni/http/proxy_server/ssl-interceptor-cacert.pem +34 -0
  126. data/lib/arachni/http/proxy_server/ssl-interceptor-cakey.pem +51 -0
  127. data/lib/arachni/http/request.rb +71 -18
  128. data/lib/arachni/issue.rb +17 -3
  129. data/lib/arachni/option_groups/browser_cluster.rb +34 -1
  130. data/lib/arachni/option_groups/http.rb +1 -1
  131. data/lib/arachni/page.rb +26 -13
  132. data/lib/arachni/page/dom/transition.rb +2 -2
  133. data/lib/arachni/parser.rb +28 -11
  134. data/lib/arachni/platform/fingerprinter.rb +5 -0
  135. data/lib/arachni/platform/manager.rb +65 -32
  136. data/lib/arachni/plugin/base.rb +8 -0
  137. data/lib/arachni/processes/instances.rb +25 -11
  138. data/lib/arachni/reporter/manager.rb +2 -2
  139. data/lib/arachni/rpc/client/instance.rb +4 -0
  140. data/lib/arachni/rpc/server/framework/master.rb +3 -3
  141. data/lib/arachni/rpc/server/framework/multi_instance.rb +0 -8
  142. data/lib/arachni/rpc/server/instance.rb +2 -1
  143. data/lib/arachni/ruby/array.rb +5 -0
  144. data/lib/arachni/ruby/hash.rb +5 -0
  145. data/lib/arachni/ruby/string.rb +2 -3
  146. data/lib/arachni/session.rb +32 -6
  147. data/lib/arachni/state/framework.rb +6 -2
  148. data/lib/arachni/support/cache.rb +1 -0
  149. data/lib/arachni/support/cache/base.rb +12 -8
  150. data/lib/arachni/support/cache/least_recently_pushed.rb +29 -0
  151. data/lib/arachni/support/cache/least_recently_used.rb +5 -8
  152. data/lib/arachni/support/cache/preference.rb +1 -1
  153. data/lib/arachni/support/cache/random_replacement.rb +1 -25
  154. data/lib/arachni/support/database/queue.rb +21 -8
  155. data/lib/arachni/support/lookup/base.rb +7 -1
  156. data/lib/arachni/support/mixins/observable.rb +3 -1
  157. data/lib/arachni/support/profiler.rb +51 -10
  158. data/lib/arachni/support/signature.rb +11 -2
  159. data/lib/arachni/trainer.rb +8 -2
  160. data/lib/arachni/uri.rb +28 -25
  161. data/lib/arachni/uri/scope.rb +1 -1
  162. data/lib/arachni/utilities.rb +8 -0
  163. data/lib/arachni/watir/element.rb +1 -1
  164. data/lib/version +1 -1
  165. data/spec/arachni/browser/javascript/dom_monitor_spec.rb +388 -53
  166. data/spec/arachni/browser/javascript/taint_tracer_spec.rb +41 -0
  167. data/spec/arachni/browser/javascript_spec.rb +235 -61
  168. data/spec/arachni/browser_cluster/jobs/resource_exploration_spec.rb +0 -9
  169. data/spec/arachni/browser_cluster_spec.rb +58 -10
  170. data/spec/arachni/browser_spec.rb +170 -26
  171. data/spec/arachni/check/auditor_spec.rb +22 -3
  172. data/spec/arachni/check/base_spec.rb +84 -0
  173. data/spec/arachni/element/body_spec.rb +1 -1
  174. data/spec/arachni/element/capabilities/analyzable/taint_spec.rb +3 -3
  175. data/spec/arachni/element/capabilities/analyzable/timeout_spec.rb +1 -1
  176. data/spec/arachni/element/cookie/dom_spec.rb +0 -9
  177. data/spec/arachni/element/cookie_spec.rb +85 -0
  178. data/spec/arachni/element/form/dom_spec.rb +0 -9
  179. data/spec/arachni/element/form_spec.rb +46 -3
  180. data/spec/arachni/element/json_spec.rb +20 -0
  181. data/spec/arachni/element/link/dom_spec.rb +0 -9
  182. data/spec/arachni/element/link_spec.rb +40 -15
  183. data/spec/arachni/element/link_template/dom_spec.rb +0 -8
  184. data/spec/arachni/element/link_template_spec.rb +2 -6
  185. data/spec/arachni/element/server_spec.rb +94 -8
  186. data/spec/arachni/element/xml_spec.rb +20 -0
  187. data/spec/arachni/framework/parts/audit_spec.rb +12 -14
  188. data/spec/arachni/framework/parts/browser_spec.rb +0 -171
  189. data/spec/arachni/framework/parts/platform_spec.rb +14 -8
  190. data/spec/arachni/framework/parts/report_spec.rb +1 -1
  191. data/spec/arachni/framework/parts/state_spec.rb +0 -9
  192. data/spec/arachni/http/client/dynamic_404_handlers_spec.rb +19 -0
  193. data/spec/arachni/http/client_spec.rb +169 -42
  194. data/spec/arachni/http/headers_spec.rb +18 -0
  195. data/spec/arachni/http/request_spec.rb +23 -0
  196. data/spec/arachni/issue_spec.rb +17 -6
  197. data/spec/arachni/page_spec.rb +22 -2
  198. data/spec/arachni/parser_spec.rb +5 -0
  199. data/spec/arachni/platform/manager_spec.rb +57 -25
  200. data/spec/arachni/reporter/manager_spec.rb +26 -0
  201. data/spec/arachni/rpc/server/active_options_spec.rb +9 -4
  202. data/spec/arachni/state/framework_spec.rb +2 -8
  203. data/spec/arachni/support/cache/least_recently_pushed_spec.rb +90 -0
  204. data/spec/arachni/support/cache/least_recently_used_spec.rb +5 -13
  205. data/spec/arachni/support/database/queue_spec.rb +7 -0
  206. data/spec/arachni/support/mixins/observable_spec.rb +15 -1
  207. data/spec/arachni/trainer_spec.rb +2 -2
  208. data/spec/components/checks/active/code_injection_timing_spec.rb +1 -1
  209. data/spec/components/checks/active/file_inclusion_spec.rb +6 -6
  210. data/spec/components/checks/active/path_traversal_spec.rb +2 -2
  211. data/spec/components/checks/active/source_code_disclosure_spec.rb +2 -2
  212. data/spec/components/checks/active/unvalidated_redirect_spec.rb +6 -6
  213. data/spec/components/checks/active/xss_dom_inputs_spec.rb +3 -5
  214. data/spec/components/checks/active/xss_dom_script_context_spec.rb +1 -1
  215. data/spec/components/checks/active/xss_spec.rb +5 -5
  216. data/spec/components/checks/passive/common_admin_interfaces_spec.rb +15 -0
  217. data/spec/components/checks/passive/interesting_responses_spec.rb +14 -1
  218. data/spec/components/fingerprinters/frameworks/aspx_mvc_spec.rb +31 -0
  219. data/spec/components/fingerprinters/frameworks/cakephp_spec.rb +22 -0
  220. data/spec/components/fingerprinters/frameworks/cherrypy_spec.rb +28 -0
  221. data/spec/components/fingerprinters/frameworks/django_spec.rb +37 -0
  222. data/spec/components/fingerprinters/frameworks/jsf_spec.rb +27 -0
  223. data/spec/components/fingerprinters/frameworks/rack_spec.rb +11 -14
  224. data/spec/components/fingerprinters/frameworks/rails_spec.rb +53 -0
  225. data/spec/components/fingerprinters/languages/asp_spec.rb +7 -9
  226. data/spec/components/fingerprinters/languages/aspx_spec.rb +10 -24
  227. data/spec/components/fingerprinters/languages/java_spec.rb +88 -0
  228. data/spec/components/fingerprinters/languages/php_spec.rb +19 -12
  229. data/spec/components/fingerprinters/languages/python_spec.rb +22 -9
  230. data/spec/components/fingerprinters/languages/ruby.rb +6 -4
  231. data/spec/components/fingerprinters/os/bsd_spec.rb +6 -4
  232. data/spec/components/fingerprinters/os/linux_spec.rb +6 -4
  233. data/spec/components/fingerprinters/os/solaris_spec.rb +6 -4
  234. data/spec/components/fingerprinters/os/unix_spec.rb +6 -4
  235. data/spec/components/fingerprinters/os/windows_spec.rb +6 -4
  236. data/spec/components/fingerprinters/servers/apache_spec.rb +15 -4
  237. data/spec/components/fingerprinters/servers/gunicorn_spec.rb +28 -0
  238. data/spec/components/fingerprinters/servers/iis_spec.rb +6 -6
  239. data/spec/components/fingerprinters/servers/jetty_spec.rb +6 -6
  240. data/spec/components/fingerprinters/servers/nginx_spec.rb +6 -4
  241. data/spec/components/fingerprinters/servers/tomcat_spec.rb +15 -6
  242. data/spec/components/path_extractors/data_url_spec.rb +19 -0
  243. data/spec/components/plugins/autologin_spec.rb +23 -0
  244. data/spec/components/plugins/login_script_spec.rb +112 -24
  245. data/spec/components/plugins/restrict_to_dom_state_spec.rb +16 -0
  246. data/spec/components/plugins/vector_feed_spec.rb +39 -1
  247. data/spec/support/factories/page/dom.rb +9 -4
  248. data/spec/support/factories/page/dom/transition.rb +31 -9
  249. data/spec/support/factories/scan_report.rb +8 -6
  250. data/spec/support/fixtures/empty/placeholder +0 -0
  251. data/spec/support/fixtures/report.afr +0 -0
  252. data/spec/support/fixtures/reporters/manager_spec/error.rb +18 -0
  253. data/spec/support/servers/arachni/browser.rb +117 -11
  254. data/spec/support/servers/arachni/browser/javascript/dom_monitor.rb +148 -4
  255. data/spec/support/servers/arachni/check/auditor.rb +4 -0
  256. data/spec/support/servers/arachni/element/cookie/cookie_dom.rb +1 -1
  257. data/spec/support/servers/arachni/http/client.rb +5 -0
  258. data/spec/support/servers/arachni/http/client/dynamic_404_handler.rb +13 -0
  259. data/spec/support/servers/checks/active/code_injection_timing.rb +1 -1
  260. data/spec/support/servers/checks/active/file_inclusion.rb +2 -2
  261. data/spec/support/servers/checks/active/path_traversal.rb +2 -2
  262. data/spec/support/servers/checks/active/source_code_disclosure.rb +40 -33
  263. data/spec/support/servers/checks/active/trainer_check.rb +9 -10
  264. data/spec/support/servers/checks/active/unvalidated_redirect_dom.rb +7 -4
  265. data/spec/support/servers/checks/active/xss.rb +35 -0
  266. data/spec/support/servers/checks/active/xss_dom.rb +1 -1
  267. data/spec/support/servers/checks/active/xss_dom_inputs.rb +24 -0
  268. data/spec/support/servers/checks/active/xss_dom_script_context.rb +1 -1
  269. data/spec/support/servers/checks/passive/common_admin_interfaces.rb +6 -0
  270. data/spec/support/servers/plugins/autologin.rb +9 -0
  271. data/spec/support/servers/plugins/restrict_to_dom_state.rb +4 -0
  272. data/spec/support/shared/element/base.rb +42 -0
  273. data/spec/support/shared/element/capabilities/auditable.rb +4 -4
  274. data/spec/support/shared/element/capabilities/auditable/dom.rb +26 -0
  275. data/spec/support/shared/element/capabilities/inputtable.rb +16 -11
  276. data/spec/support/shared/element/capabilities/submitable.rb +7 -2
  277. data/spec/support/shared/fingerprinter.rb +8 -0
  278. data/spec/support/shared/path_extractor.rb +1 -1
  279. data/ui/cli/framework.rb +3 -3
  280. data/ui/cli/framework/option_parser.rb +9 -0
  281. data/ui/cli/output.rb +9 -0
  282. data/ui/cli/reporter.rb +5 -2
  283. data/ui/cli/utilities.rb +4 -2
  284. metadata +76 -17
  285. data/lib/arachni/http/proxy_server/ssl-interceptor-cert.pem +0 -34
  286. data/lib/arachni/http/proxy_server/ssl-interceptor-pkey.pem +0 -51
  287. data/spec/components/fingerprinters/languages/jsp_spec.rb +0 -56
@@ -30,18 +30,20 @@ module Browser
30
30
  state.set_status_message :browser_cluster_startup
31
31
  end
32
32
 
33
- @browser_cluster ||= BrowserCluster.new
33
+ @browser_cluster ||= BrowserCluster.new(
34
+ on_pop: proc do
35
+ next if !pause?
36
+
37
+ print_debug 'Blocking browser cluster on pop.'
38
+ wait_if_paused
39
+ end
40
+ )
34
41
  state.clear_status_messages
42
+
35
43
  @browser_cluster
36
44
  end
37
45
  end
38
46
 
39
- def browser
40
- return if !use_browsers?
41
-
42
- @browser ||= Arachni::Browser.new( store_pages: false )
43
- end
44
-
45
47
  # @return [Bool]
46
48
  # `true` if the environment has a browser, `false` otherwise.
47
49
  def host_has_browser?
@@ -58,36 +60,6 @@ module Browser
58
60
  browser_cluster.skip_states( browser_job.id )
59
61
  end
60
62
 
61
- # @private
62
- def apply_dom_metadata( page )
63
- return false if page.dom.depth > 0 || !page.has_script? ||
64
- !browser
65
-
66
- # This optimization only affects Form::DOM elements, so don't bother
67
- # if none of the checks are interested in any of them.
68
- return false if !checks.values.find do |c|
69
- c.check? page, [Element::Form::DOM, Element::Cookie::DOM]
70
- end
71
-
72
- begin
73
- bp = browser.load( page ).to_page
74
- rescue Selenium::WebDriver::Error::WebDriverError,
75
- Watir::Exception::Error => e
76
- print_debug "Could not apply metadata to '#{page.dom.url}'" <<
77
- " because: #{e} [#{e.class}"
78
- return
79
- end
80
-
81
- # Request timeout or some other failure...
82
- return if bp.code == 0
83
-
84
- page.import_metadata( bp, :skip_dom )
85
-
86
- true
87
- ensure
88
- browser.clear_buffers if browser
89
- end
90
-
91
63
  def use_browsers?
92
64
  options.browser_cluster.pool_size > 0 &&
93
65
  options.scope.dom_depth_limit > 0 && host_has_browser?
@@ -95,13 +67,6 @@ module Browser
95
67
 
96
68
  private
97
69
 
98
- def shutdown_browser
99
- return if !@browser
100
-
101
- @browser.shutdown
102
- @browser = nil
103
- end
104
-
105
70
  def shutdown_browser_cluster
106
71
  return if !@browser_cluster
107
72
 
@@ -125,11 +90,6 @@ module Browser
125
90
  synchronize do
126
91
  return if !push_to_page_queue page
127
92
 
128
- pushed_paths = nil
129
- if crawl?
130
- pushed_paths = push_paths_from_page( page ).size
131
- end
132
-
133
93
  print_status "Got new page from the browser-cluster: #{page.dom.url}"
134
94
  print_info "DOM depth: #{page.dom.depth} (Limit: #{options.scope.dom_depth_limit})"
135
95
 
@@ -137,10 +97,6 @@ module Browser
137
97
  print_info ' Transitions:'
138
98
  page.dom.print_transitions( method(:print_info), ' ' )
139
99
  end
140
-
141
- if pushed_paths
142
- print_info " -- Analysis resulted in #{pushed_paths} usable paths."
143
- end
144
100
  end
145
101
  end
146
102
 
@@ -154,13 +110,52 @@ module Browser
154
110
  Options.scope.dom_depth_limit.to_i < page.dom.depth + 1 ||
155
111
  !page.has_script?
156
112
 
157
- browser_cluster.queue( browser_job.forward( resource: page ) ) do |response|
158
- handle_browser_page response.page
113
+ # We need to schedule a separate job for applying metadata because it
114
+ # needs to have a clean state.
115
+ schedule_dom_metadata_application( page )
116
+
117
+ browser_cluster.queue( browser_job.forward( resource: page ) ) do |result|
118
+ handle_browser_page result.page
159
119
  end
160
120
 
161
121
  true
162
122
  end
163
123
 
124
+ def schedule_dom_metadata_application( page )
125
+ return if page.dom.depth > 0
126
+ return if page.metadata.map { |_, data| data['skip_dom'].values }.
127
+ flatten.compact.any?
128
+
129
+ # This optimization only affects Form & Cookie DOM elements,
130
+ # so don't bother if none of the checks are interested in them.
131
+ return if !checks.values.
132
+ find { |c| c.check? page, [Element::Form::DOM, Element::Cookie::DOM], true }
133
+
134
+ page.clear_cache
135
+
136
+ browser_cluster.with_browser do |browser|
137
+ apply_dom_metadata( browser, page )
138
+ end
139
+ end
140
+
141
+ def apply_dom_metadata( browser, page )
142
+ bp = nil
143
+
144
+ begin
145
+ bp = browser.load( page ).to_page
146
+ rescue Selenium::WebDriver::Error::WebDriverError,
147
+ Watir::Exception::Error => e
148
+ print_debug "Could not apply metadata to '#{page.dom.url}'" <<
149
+ " because: #{e} [#{e.class}"
150
+ return
151
+ end
152
+
153
+ # Request timeout or some other failure...
154
+ return if bp.code == 0
155
+
156
+ handle_browser_page bp
157
+ end
158
+
164
159
  def browser_job
165
160
  # We'll recycle the same job since all of them will have the same
166
161
  # callback. This will force the BrowserCluster to use the same block
@@ -67,11 +67,12 @@ module Check
67
67
  # Check to run.
68
68
  # @param [Page] page
69
69
  def check_page( check, page )
70
+ ps = page.platforms.to_a
71
+
70
72
  # If we've been given platforms which the check doesn't support don't
71
73
  # even bother running it.
72
- if !check.supports_platforms?( Options.platforms )
73
- print_info "Check #{check.shortname} does not support: " <<
74
- Options.platforms.join( ' + ' )
74
+ if !check.supports_platforms?( ps )
75
+ print_info "Check #{check.shortname} does not support: #{ps.join( ' + ' )}"
75
76
  return false
76
77
  end
77
78
 
@@ -27,8 +27,9 @@ module Data
27
27
  # `true` if push was successful, `false` if the `page` matched any
28
28
  # exclusion criteria or has already been seen.
29
29
  def push_to_page_queue( page, force = false )
30
- return false if !force && (!accepts_more_pages? || state.page_seen?( page ) ||
31
- page.scope.out? || page.scope.redundant?( true ))
30
+ return false if !force && (!accepts_more_pages? ||
31
+ state.page_seen?( page ) || page.scope.out? ||
32
+ page.scope.redundant?( true ))
32
33
 
33
34
  # We want to update from the already loaded page cache (if there is one)
34
35
  # as we have to store the page anyways (needs to go through Browser analysis)
@@ -40,6 +41,7 @@ module Data
40
41
  # some other component; however, it wouldn't be the end of the world if
41
42
  # that were to happen.
42
43
  ElementFilter.update_from_page_cache page
44
+ page.clear_cache
43
45
 
44
46
  data.push_to_page_queue page
45
47
  state.page_seen page
@@ -99,12 +101,24 @@ module Data
99
101
  !url_queue.empty? || !page_queue.empty?
100
102
  end
101
103
 
104
+ # @return [Page, nil]
105
+ # A page if the queues aren't empty, `nil` otherwise.
106
+ def pop_page
107
+ pop_page_from_queue || pop_page_from_url_queue
108
+ end
109
+
110
+ # @return [Page, nil]
111
+ # A page if the queue wasn't empty, `nil` otherwise.
102
112
  def pop_page_from_url_queue( &block )
103
113
  return if url_queue.empty?
104
114
 
105
115
  grabbed_page = nil
106
- Page.from_url( url_queue.pop,
107
- http: { update_cookies: true, performer: self }
116
+ Page.from_url(
117
+ url_queue.pop,
118
+ http: {
119
+ update_cookies: true,
120
+ performer: self
121
+ }
108
122
  ) do |page|
109
123
  @retries[page.url.hash] ||= 0
110
124
 
@@ -125,7 +139,8 @@ module Data
125
139
  @failures << page.url
126
140
 
127
141
  print_error "Giving up trying to audit: #{page.url}"
128
- print_error "Couldn't get a response after #{AUDIT_PAGE_MAX_TRIES} tries: #{page.response.return_message}."
142
+ print_error "Couldn't get a response after #{AUDIT_PAGE_MAX_TRIES}" +
143
+ " tries: #{page.response.return_message}."
129
144
  else
130
145
  print_bad "Retrying for: #{page.url} [#{page.response.return_message}]"
131
146
  @retries[page.url.hash] += 1
@@ -135,16 +150,36 @@ module Data
135
150
  grabbed_page = nil
136
151
  block.call grabbed_page if block_given?
137
152
  end
153
+
138
154
  http.run if !block_given?
139
155
  grabbed_page
140
156
  end
141
157
 
142
- # @return [Page]
158
+ # @return [Page, nil]
159
+ # A page if the queue wasn't empty, `nil` otherwise.
143
160
  def pop_page_from_queue
144
161
  return if page_queue.empty?
145
162
  page_queue.pop
146
163
  end
147
164
 
165
+ def replenish_page_queue_from_url_queue
166
+ return if !page_queue.empty?
167
+
168
+ # Number pulled out of my ass, low enough to not add any noticeable
169
+ # stress, hopefully high enough to grab us at least one page that has
170
+ # some workload which will result in HTTP requests which will mask the
171
+ # next replenishing operation.
172
+ [10, page_queue.free_buffer_size].min.times do
173
+ return if url_queue.empty?
174
+
175
+ # We push directly to the queue instead of using #push_to_page_queue
176
+ # because it's too early to deduplicate.
177
+ pop_page_from_url_queue { |p| page_queue << p }
178
+ end
179
+
180
+ !url_queue.empty?
181
+ end
182
+
148
183
  def add_to_sitemap( page )
149
184
  data.add_page_to_sitemap( page )
150
185
  end
@@ -65,6 +65,14 @@ module State
65
65
  def initialize
66
66
  super
67
67
 
68
+ Element::Capabilities::Auditable.skip_like do |element|
69
+ if pause?
70
+ print_debug "Blocking on element audit: #{element.audit_id}"
71
+ end
72
+
73
+ wait_if_paused
74
+ end
75
+
68
76
  state.status = :ready
69
77
  end
70
78
 
@@ -96,13 +104,14 @@ module State
96
104
  return if @cleaned_up
97
105
  @cleaned_up = true
98
106
 
107
+ state.force_resume
108
+
99
109
  state.status = :cleanup
100
110
 
101
111
  sitemap.merge!( browser_sitemap )
102
112
 
103
113
  if shutdown_browsers
104
114
  state.set_status_message :browser_cluster_shutdown
105
- shutdown_browser
106
115
  shutdown_browser_cluster
107
116
  end
108
117
 
@@ -128,6 +137,11 @@ module State
128
137
  true
129
138
  end
130
139
 
140
+ # @private
141
+ def reset_trainer
142
+ @trainer = Trainer.new( self )
143
+ end
144
+
131
145
  # @note Prefer this from {.reset} if you already have an instance.
132
146
  # @note You should first reset {Arachni::Options}.
133
147
  #
@@ -319,11 +333,6 @@ module State
319
333
  state.suspended?
320
334
  end
321
335
 
322
- # @private
323
- def reset_trainer
324
- @trainer = Trainer.new( self )
325
- end
326
-
327
336
  private
328
337
 
329
338
  # @note Must be called before calling any audit methods.
@@ -370,7 +379,7 @@ module State
370
379
  new_element = true
371
380
  end
372
381
 
373
- if e.respond_to?( :dom ) && e.dom
382
+ if page.dom.depth > 0 && e.respond_to?( :dom ) && e.dom
374
383
  if !state.element_checked?( e.dom )
375
384
  state.element_checked e.dom
376
385
  new_element = true
@@ -140,7 +140,11 @@ class Client
140
140
  headers.merge!( Options.http.request_headers )
141
141
 
142
142
  cookie_jar.load( Options.http.cookie_jar_filepath ) if Options.http.cookie_jar_filepath
143
- update_cookies( Options.http.cookies )
143
+
144
+ Options.http.cookies.each do |name, value|
145
+ update_cookies( name => value )
146
+ end
147
+
144
148
  update_cookies( Options.http.cookie_string ) if Options.http.cookie_string
145
149
 
146
150
  reset_burst_info
@@ -465,14 +469,30 @@ class Client
465
469
  @running = true
466
470
 
467
471
  reset_burst_info
472
+
473
+ # Lots of new objects are about to be generated, make sure that old ones
474
+ # have been collected to prevent RAM spikes.
475
+ gc
476
+
468
477
  client_run
469
478
 
479
+ # Collect the new objects as well.
480
+ gc
481
+
470
482
  @queue_size = 0
471
483
  @running = false
472
484
 
473
485
  @burst_runtime += Time.now - @burst_runtime_start
474
486
  @total_runtime += @burst_runtime
475
487
  end
488
+
489
+ def gc
490
+ # Don't GC after every little run, only do it when we're past the
491
+ # maximum queue size.
492
+ return if @queue_size < Options.http.request_queue_size
493
+
494
+ GC.start
495
+ end
476
496
 
477
497
  def reset_burst_info
478
498
  @burst_response_time_sum = 0
@@ -501,46 +521,12 @@ class Client
501
521
  print_debug_level_3 "Headers: #{request.headers}"
502
522
  print_debug_level_3 "Cookies: #{request.cookies}"
503
523
  print_debug_level_3 "Train?: #{request.train?}"
524
+ print_debug_level_3 "Fingerprint?: #{request.fingerprint?}"
504
525
  print_debug_level_3 '------------'
505
526
  end
506
527
 
507
528
  if add_callbacks
508
- request.on_complete do |response|
509
- synchronize do
510
- @response_count += 1
511
- @burst_response_count += 1
512
- @burst_response_time_sum += response.time
513
- @total_response_time_sum += response.time
514
-
515
- if Platform::Manager.fingerprint?( response )
516
- # Force a fingerprint by converting the Response to a Page object.
517
- response.to_page
518
- end
519
-
520
- notify_on_complete( response )
521
-
522
- parse_and_set_cookies( response ) if request.update_cookies?
523
-
524
- if debug_level_3?
525
- print_debug_level_3 '------------'
526
- print_debug_level_3 "Got response for request ID#: #{response.request.id}"
527
- print_debug_level_3 "Performer: #{response.request.performer.inspect}"
528
- print_debug_level_3 "Status: #{response.code}"
529
- print_debug_level_3 "Code: #{response.return_code}"
530
- print_debug_level_3 "Message: #{response.return_message}"
531
- print_debug_level_3 "URL: #{response.url}"
532
- print_debug_level_3 "Headers:\n#{response.headers_string}"
533
- print_debug_level_3 "Parsed headers: #{response.headers}"
534
- end
535
-
536
- if response.timed_out?
537
- print_debug_level_3 "Request timed-out! -- ID# #{response.request.id}"
538
- @time_out_count += 1
539
- end
540
-
541
- print_debug_level_3 '------------'
542
- end
543
- end
529
+ request.on_complete( &method(:global_on_complete) )
544
530
  end
545
531
 
546
532
  synchronize { @request_count += 1 }
@@ -559,6 +545,47 @@ class Client
559
545
  request
560
546
  end
561
547
 
548
+ def global_on_complete( response )
549
+ request = response.request
550
+
551
+ synchronize do
552
+ @response_count += 1
553
+ @burst_response_count += 1
554
+ @burst_response_time_sum += response.time
555
+ @total_response_time_sum += response.time
556
+
557
+ if response.request.fingerprint? &&
558
+ Platform::Manager.fingerprint?( response )
559
+
560
+ # Force a fingerprint by converting the Response to a Page object.
561
+ response.to_page
562
+ end
563
+
564
+ notify_on_complete( response )
565
+
566
+ parse_and_set_cookies( response ) if request.update_cookies?
567
+
568
+ if debug_level_3?
569
+ print_debug_level_3 '------------'
570
+ print_debug_level_3 "Got response for request ID#: #{response.request.id}\n#{response.request}"
571
+ print_debug_level_3 "Performer: #{response.request.performer.inspect}"
572
+ print_debug_level_3 "Status: #{response.code}"
573
+ print_debug_level_3 "Code: #{response.return_code}"
574
+ print_debug_level_3 "Message: #{response.return_message}"
575
+ print_debug_level_3 "URL: #{response.url}"
576
+ print_debug_level_3 "Headers:\n#{response.headers_string}"
577
+ print_debug_level_3 "Parsed headers: #{response.headers}"
578
+ end
579
+
580
+ if response.timed_out?
581
+ print_debug_level_3 "Request timed-out! -- ID# #{response.request.id}"
582
+ @time_out_count += 1
583
+ end
584
+
585
+ print_debug_level_3 '------------'
586
+ end
587
+ end
588
+
562
589
  def client_initialize
563
590
  @hydra = Typhoeus::Hydra.new(
564
591
  max_concurrency: Options.http.request_concurrency || MAX_CONCURRENCY
@@ -566,7 +593,8 @@ class Client
566
593
  end
567
594
 
568
595
  def client_run
569
- @hydra.run
596
+ # Can get Ethon select errors.
597
+ exception_jail( false ) { @hydra.run }
570
598
  end
571
599
 
572
600
  def client_abort