arachni 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +159 -0
  3. data/LICENSE.md +126 -196
  4. data/README.md +32 -24
  5. data/arachni.gemspec +7 -7
  6. data/components/checks/active/code_injection_timing.rb +3 -3
  7. data/components/checks/active/csrf.rb +2 -2
  8. data/components/checks/active/file_inclusion.rb +6 -7
  9. data/components/checks/active/os_cmd_injection.rb +3 -3
  10. data/components/checks/active/path_traversal.rb +7 -7
  11. data/components/checks/active/response_splitting.rb +9 -4
  12. data/components/checks/active/session_fixation.rb +7 -3
  13. data/components/checks/active/source_code_disclosure.rb +5 -5
  14. data/components/checks/active/unvalidated_redirect.rb +12 -3
  15. data/components/checks/active/unvalidated_redirect_dom.rb +3 -3
  16. data/components/checks/active/xss.rb +23 -10
  17. data/components/checks/active/xss_dom_inputs.rb +113 -11
  18. data/components/checks/active/xxe.rb +3 -3
  19. data/components/checks/passive/backdoors.rb +6 -5
  20. data/components/checks/passive/backup_directories.rb +6 -6
  21. data/components/checks/passive/backup_files.rb +6 -6
  22. data/components/checks/passive/common_admin_interfaces.rb +58 -0
  23. data/components/checks/passive/common_admin_interfaces/admin-panels.txt +49 -0
  24. data/components/checks/passive/common_directories/directories.txt +0 -16
  25. data/components/checks/passive/common_files.rb +6 -5
  26. data/components/checks/passive/common_files/filenames.txt +0 -2
  27. data/components/checks/passive/directory_listing.rb +6 -6
  28. data/components/checks/passive/grep/cookie_set_for_parent_domain.rb +3 -3
  29. data/components/checks/passive/grep/hsts.rb +6 -3
  30. data/components/checks/passive/grep/http_only_cookies.rb +3 -3
  31. data/components/checks/passive/grep/insecure_cookies.rb +2 -2
  32. data/components/checks/passive/grep/insecure_cors_policy.rb +6 -4
  33. data/components/checks/passive/grep/x_frame_options.rb +6 -4
  34. data/components/checks/passive/htaccess_limit.rb +6 -2
  35. data/components/checks/passive/http_put.rb +8 -4
  36. data/components/checks/passive/interesting_responses.rb +3 -2
  37. data/components/checks/passive/localstart_asp.rb +6 -2
  38. data/components/checks/passive/origin_spoof_access_restriction_bypass.rb +5 -1
  39. data/components/checks/passive/xst.rb +6 -2
  40. data/components/fingerprinters/frameworks/aspx_mvc.rb +43 -0
  41. data/components/fingerprinters/frameworks/cakephp.rb +28 -0
  42. data/components/fingerprinters/frameworks/cherrypy.rb +31 -0
  43. data/components/fingerprinters/frameworks/django.rb +33 -0
  44. data/components/fingerprinters/frameworks/jsf.rb +30 -0
  45. data/components/fingerprinters/frameworks/rack.rb +5 -7
  46. data/components/fingerprinters/frameworks/rails.rb +43 -0
  47. data/components/fingerprinters/languages/aspx.rb +11 -11
  48. data/components/fingerprinters/languages/{jsp.rb → java.rb} +11 -7
  49. data/components/fingerprinters/languages/php.rb +6 -6
  50. data/components/fingerprinters/languages/python.rb +14 -6
  51. data/components/fingerprinters/languages/ruby.rb +3 -5
  52. data/components/fingerprinters/servers/apache.rb +5 -4
  53. data/components/fingerprinters/servers/gunicorn.rb +33 -0
  54. data/components/fingerprinters/servers/jetty.rb +1 -1
  55. data/components/fingerprinters/servers/tomcat.rb +11 -4
  56. data/components/path_extractors/anchors.rb +5 -12
  57. data/components/path_extractors/areas.rb +5 -13
  58. data/components/path_extractors/comments.rb +5 -3
  59. data/components/path_extractors/data_url.rb +21 -0
  60. data/components/path_extractors/forms.rb +5 -13
  61. data/components/path_extractors/frames.rb +6 -13
  62. data/components/path_extractors/generic.rb +3 -12
  63. data/components/path_extractors/links.rb +5 -13
  64. data/components/path_extractors/meta_refresh.rb +5 -13
  65. data/components/path_extractors/scripts.rb +8 -14
  66. data/components/plugins/autologin.rb +17 -5
  67. data/components/plugins/defaults/meta/remedies/discovery.rb +11 -29
  68. data/components/plugins/login_script.rb +40 -10
  69. data/components/plugins/metrics.rb +235 -0
  70. data/components/plugins/proxy.rb +21 -4
  71. data/components/plugins/proxy/panel/page_accordion.html.erb +34 -2
  72. data/components/plugins/restrict_to_dom_state.rb +70 -0
  73. data/components/plugins/vector_feed.rb +38 -9
  74. data/components/reporters/plugin_formatters/html/metrics.rb +290 -0
  75. data/components/reporters/plugin_formatters/stdout/metrics.rb +80 -0
  76. data/components/reporters/plugin_formatters/xml/metrics.rb +29 -0
  77. data/components/reporters/stdout.rb +4 -2
  78. data/components/reporters/xml.rb +4 -4
  79. data/components/reporters/xml/schema.xsd +95 -0
  80. data/lib/arachni.rb +2 -0
  81. data/lib/arachni/browser.rb +132 -77
  82. data/lib/arachni/browser/javascript.rb +173 -45
  83. data/lib/arachni/browser/javascript/scripts/dom_monitor.js +81 -6
  84. data/lib/arachni/browser/javascript/scripts/taint_tracer.js +31 -3
  85. data/lib/arachni/browser_cluster.rb +41 -15
  86. data/lib/arachni/browser_cluster/job.rb +4 -0
  87. data/lib/arachni/browser_cluster/jobs/resource_exploration.rb +0 -9
  88. data/lib/arachni/browser_cluster/worker.rb +8 -5
  89. data/lib/arachni/check/auditor.rb +20 -8
  90. data/lib/arachni/check/base.rb +38 -6
  91. data/lib/arachni/element/base.rb +18 -1
  92. data/lib/arachni/element/capabilities/analyzable/differential.rb +0 -1
  93. data/lib/arachni/element/capabilities/analyzable/taint.rb +40 -10
  94. data/lib/arachni/element/capabilities/analyzable/timeout.rb +27 -23
  95. data/lib/arachni/element/capabilities/auditable/dom.rb +22 -0
  96. data/lib/arachni/element/capabilities/inputtable.rb +6 -2
  97. data/lib/arachni/element/capabilities/submittable.rb +1 -1
  98. data/lib/arachni/element/cookie.rb +37 -23
  99. data/lib/arachni/element/cookie/capabilities/mutable.rb +6 -6
  100. data/lib/arachni/element/cookie/dom.rb +0 -8
  101. data/lib/arachni/element/form.rb +28 -14
  102. data/lib/arachni/element/form/capabilities/auditable.rb +2 -2
  103. data/lib/arachni/element/form/capabilities/mutable.rb +5 -5
  104. data/lib/arachni/element/form/dom.rb +0 -8
  105. data/lib/arachni/element/generic_dom.rb +1 -1
  106. data/lib/arachni/element/json.rb +2 -1
  107. data/lib/arachni/element/json/capabilities/inputtable.rb +6 -6
  108. data/lib/arachni/element/json/capabilities/mutable.rb +1 -1
  109. data/lib/arachni/element/link.rb +13 -16
  110. data/lib/arachni/element/link/dom.rb +1 -14
  111. data/lib/arachni/element/link_template.rb +3 -2
  112. data/lib/arachni/element/link_template/dom.rb +0 -16
  113. data/lib/arachni/element/server.rb +51 -9
  114. data/lib/arachni/element/xml.rb +1 -0
  115. data/lib/arachni/ethon/easy.rb +4 -1
  116. data/lib/arachni/framework/parts/audit.rb +26 -77
  117. data/lib/arachni/framework/parts/browser.rb +50 -55
  118. data/lib/arachni/framework/parts/check.rb +4 -3
  119. data/lib/arachni/framework/parts/data.rb +41 -6
  120. data/lib/arachni/framework/parts/state.rb +16 -7
  121. data/lib/arachni/http/client.rb +66 -38
  122. data/lib/arachni/http/client/dynamic_404_handler.rb +46 -14
  123. data/lib/arachni/http/headers.rb +22 -10
  124. data/lib/arachni/http/proxy_server.rb +67 -22
  125. data/lib/arachni/http/proxy_server/ssl-interceptor-cacert.pem +34 -0
  126. data/lib/arachni/http/proxy_server/ssl-interceptor-cakey.pem +51 -0
  127. data/lib/arachni/http/request.rb +71 -18
  128. data/lib/arachni/issue.rb +17 -3
  129. data/lib/arachni/option_groups/browser_cluster.rb +34 -1
  130. data/lib/arachni/option_groups/http.rb +1 -1
  131. data/lib/arachni/page.rb +26 -13
  132. data/lib/arachni/page/dom/transition.rb +2 -2
  133. data/lib/arachni/parser.rb +28 -11
  134. data/lib/arachni/platform/fingerprinter.rb +5 -0
  135. data/lib/arachni/platform/manager.rb +65 -32
  136. data/lib/arachni/plugin/base.rb +8 -0
  137. data/lib/arachni/processes/instances.rb +25 -11
  138. data/lib/arachni/reporter/manager.rb +2 -2
  139. data/lib/arachni/rpc/client/instance.rb +4 -0
  140. data/lib/arachni/rpc/server/framework/master.rb +3 -3
  141. data/lib/arachni/rpc/server/framework/multi_instance.rb +0 -8
  142. data/lib/arachni/rpc/server/instance.rb +2 -1
  143. data/lib/arachni/ruby/array.rb +5 -0
  144. data/lib/arachni/ruby/hash.rb +5 -0
  145. data/lib/arachni/ruby/string.rb +2 -3
  146. data/lib/arachni/session.rb +32 -6
  147. data/lib/arachni/state/framework.rb +6 -2
  148. data/lib/arachni/support/cache.rb +1 -0
  149. data/lib/arachni/support/cache/base.rb +12 -8
  150. data/lib/arachni/support/cache/least_recently_pushed.rb +29 -0
  151. data/lib/arachni/support/cache/least_recently_used.rb +5 -8
  152. data/lib/arachni/support/cache/preference.rb +1 -1
  153. data/lib/arachni/support/cache/random_replacement.rb +1 -25
  154. data/lib/arachni/support/database/queue.rb +21 -8
  155. data/lib/arachni/support/lookup/base.rb +7 -1
  156. data/lib/arachni/support/mixins/observable.rb +3 -1
  157. data/lib/arachni/support/profiler.rb +51 -10
  158. data/lib/arachni/support/signature.rb +11 -2
  159. data/lib/arachni/trainer.rb +8 -2
  160. data/lib/arachni/uri.rb +28 -25
  161. data/lib/arachni/uri/scope.rb +1 -1
  162. data/lib/arachni/utilities.rb +8 -0
  163. data/lib/arachni/watir/element.rb +1 -1
  164. data/lib/version +1 -1
  165. data/spec/arachni/browser/javascript/dom_monitor_spec.rb +388 -53
  166. data/spec/arachni/browser/javascript/taint_tracer_spec.rb +41 -0
  167. data/spec/arachni/browser/javascript_spec.rb +235 -61
  168. data/spec/arachni/browser_cluster/jobs/resource_exploration_spec.rb +0 -9
  169. data/spec/arachni/browser_cluster_spec.rb +58 -10
  170. data/spec/arachni/browser_spec.rb +170 -26
  171. data/spec/arachni/check/auditor_spec.rb +22 -3
  172. data/spec/arachni/check/base_spec.rb +84 -0
  173. data/spec/arachni/element/body_spec.rb +1 -1
  174. data/spec/arachni/element/capabilities/analyzable/taint_spec.rb +3 -3
  175. data/spec/arachni/element/capabilities/analyzable/timeout_spec.rb +1 -1
  176. data/spec/arachni/element/cookie/dom_spec.rb +0 -9
  177. data/spec/arachni/element/cookie_spec.rb +85 -0
  178. data/spec/arachni/element/form/dom_spec.rb +0 -9
  179. data/spec/arachni/element/form_spec.rb +46 -3
  180. data/spec/arachni/element/json_spec.rb +20 -0
  181. data/spec/arachni/element/link/dom_spec.rb +0 -9
  182. data/spec/arachni/element/link_spec.rb +40 -15
  183. data/spec/arachni/element/link_template/dom_spec.rb +0 -8
  184. data/spec/arachni/element/link_template_spec.rb +2 -6
  185. data/spec/arachni/element/server_spec.rb +94 -8
  186. data/spec/arachni/element/xml_spec.rb +20 -0
  187. data/spec/arachni/framework/parts/audit_spec.rb +12 -14
  188. data/spec/arachni/framework/parts/browser_spec.rb +0 -171
  189. data/spec/arachni/framework/parts/platform_spec.rb +14 -8
  190. data/spec/arachni/framework/parts/report_spec.rb +1 -1
  191. data/spec/arachni/framework/parts/state_spec.rb +0 -9
  192. data/spec/arachni/http/client/dynamic_404_handlers_spec.rb +19 -0
  193. data/spec/arachni/http/client_spec.rb +169 -42
  194. data/spec/arachni/http/headers_spec.rb +18 -0
  195. data/spec/arachni/http/request_spec.rb +23 -0
  196. data/spec/arachni/issue_spec.rb +17 -6
  197. data/spec/arachni/page_spec.rb +22 -2
  198. data/spec/arachni/parser_spec.rb +5 -0
  199. data/spec/arachni/platform/manager_spec.rb +57 -25
  200. data/spec/arachni/reporter/manager_spec.rb +26 -0
  201. data/spec/arachni/rpc/server/active_options_spec.rb +9 -4
  202. data/spec/arachni/state/framework_spec.rb +2 -8
  203. data/spec/arachni/support/cache/least_recently_pushed_spec.rb +90 -0
  204. data/spec/arachni/support/cache/least_recently_used_spec.rb +5 -13
  205. data/spec/arachni/support/database/queue_spec.rb +7 -0
  206. data/spec/arachni/support/mixins/observable_spec.rb +15 -1
  207. data/spec/arachni/trainer_spec.rb +2 -2
  208. data/spec/components/checks/active/code_injection_timing_spec.rb +1 -1
  209. data/spec/components/checks/active/file_inclusion_spec.rb +6 -6
  210. data/spec/components/checks/active/path_traversal_spec.rb +2 -2
  211. data/spec/components/checks/active/source_code_disclosure_spec.rb +2 -2
  212. data/spec/components/checks/active/unvalidated_redirect_spec.rb +6 -6
  213. data/spec/components/checks/active/xss_dom_inputs_spec.rb +3 -5
  214. data/spec/components/checks/active/xss_dom_script_context_spec.rb +1 -1
  215. data/spec/components/checks/active/xss_spec.rb +5 -5
  216. data/spec/components/checks/passive/common_admin_interfaces_spec.rb +15 -0
  217. data/spec/components/checks/passive/interesting_responses_spec.rb +14 -1
  218. data/spec/components/fingerprinters/frameworks/aspx_mvc_spec.rb +31 -0
  219. data/spec/components/fingerprinters/frameworks/cakephp_spec.rb +22 -0
  220. data/spec/components/fingerprinters/frameworks/cherrypy_spec.rb +28 -0
  221. data/spec/components/fingerprinters/frameworks/django_spec.rb +37 -0
  222. data/spec/components/fingerprinters/frameworks/jsf_spec.rb +27 -0
  223. data/spec/components/fingerprinters/frameworks/rack_spec.rb +11 -14
  224. data/spec/components/fingerprinters/frameworks/rails_spec.rb +53 -0
  225. data/spec/components/fingerprinters/languages/asp_spec.rb +7 -9
  226. data/spec/components/fingerprinters/languages/aspx_spec.rb +10 -24
  227. data/spec/components/fingerprinters/languages/java_spec.rb +88 -0
  228. data/spec/components/fingerprinters/languages/php_spec.rb +19 -12
  229. data/spec/components/fingerprinters/languages/python_spec.rb +22 -9
  230. data/spec/components/fingerprinters/languages/ruby.rb +6 -4
  231. data/spec/components/fingerprinters/os/bsd_spec.rb +6 -4
  232. data/spec/components/fingerprinters/os/linux_spec.rb +6 -4
  233. data/spec/components/fingerprinters/os/solaris_spec.rb +6 -4
  234. data/spec/components/fingerprinters/os/unix_spec.rb +6 -4
  235. data/spec/components/fingerprinters/os/windows_spec.rb +6 -4
  236. data/spec/components/fingerprinters/servers/apache_spec.rb +15 -4
  237. data/spec/components/fingerprinters/servers/gunicorn_spec.rb +28 -0
  238. data/spec/components/fingerprinters/servers/iis_spec.rb +6 -6
  239. data/spec/components/fingerprinters/servers/jetty_spec.rb +6 -6
  240. data/spec/components/fingerprinters/servers/nginx_spec.rb +6 -4
  241. data/spec/components/fingerprinters/servers/tomcat_spec.rb +15 -6
  242. data/spec/components/path_extractors/data_url_spec.rb +19 -0
  243. data/spec/components/plugins/autologin_spec.rb +23 -0
  244. data/spec/components/plugins/login_script_spec.rb +112 -24
  245. data/spec/components/plugins/restrict_to_dom_state_spec.rb +16 -0
  246. data/spec/components/plugins/vector_feed_spec.rb +39 -1
  247. data/spec/support/factories/page/dom.rb +9 -4
  248. data/spec/support/factories/page/dom/transition.rb +31 -9
  249. data/spec/support/factories/scan_report.rb +8 -6
  250. data/spec/support/fixtures/empty/placeholder +0 -0
  251. data/spec/support/fixtures/report.afr +0 -0
  252. data/spec/support/fixtures/reporters/manager_spec/error.rb +18 -0
  253. data/spec/support/servers/arachni/browser.rb +117 -11
  254. data/spec/support/servers/arachni/browser/javascript/dom_monitor.rb +148 -4
  255. data/spec/support/servers/arachni/check/auditor.rb +4 -0
  256. data/spec/support/servers/arachni/element/cookie/cookie_dom.rb +1 -1
  257. data/spec/support/servers/arachni/http/client.rb +5 -0
  258. data/spec/support/servers/arachni/http/client/dynamic_404_handler.rb +13 -0
  259. data/spec/support/servers/checks/active/code_injection_timing.rb +1 -1
  260. data/spec/support/servers/checks/active/file_inclusion.rb +2 -2
  261. data/spec/support/servers/checks/active/path_traversal.rb +2 -2
  262. data/spec/support/servers/checks/active/source_code_disclosure.rb +40 -33
  263. data/spec/support/servers/checks/active/trainer_check.rb +9 -10
  264. data/spec/support/servers/checks/active/unvalidated_redirect_dom.rb +7 -4
  265. data/spec/support/servers/checks/active/xss.rb +35 -0
  266. data/spec/support/servers/checks/active/xss_dom.rb +1 -1
  267. data/spec/support/servers/checks/active/xss_dom_inputs.rb +24 -0
  268. data/spec/support/servers/checks/active/xss_dom_script_context.rb +1 -1
  269. data/spec/support/servers/checks/passive/common_admin_interfaces.rb +6 -0
  270. data/spec/support/servers/plugins/autologin.rb +9 -0
  271. data/spec/support/servers/plugins/restrict_to_dom_state.rb +4 -0
  272. data/spec/support/shared/element/base.rb +42 -0
  273. data/spec/support/shared/element/capabilities/auditable.rb +4 -4
  274. data/spec/support/shared/element/capabilities/auditable/dom.rb +26 -0
  275. data/spec/support/shared/element/capabilities/inputtable.rb +16 -11
  276. data/spec/support/shared/element/capabilities/submitable.rb +7 -2
  277. data/spec/support/shared/fingerprinter.rb +8 -0
  278. data/spec/support/shared/path_extractor.rb +1 -1
  279. data/ui/cli/framework.rb +3 -3
  280. data/ui/cli/framework/option_parser.rb +9 -0
  281. data/ui/cli/output.rb +9 -0
  282. data/ui/cli/reporter.rb +5 -2
  283. data/ui/cli/utilities.rb +4 -2
  284. metadata +76 -17
  285. data/lib/arachni/http/proxy_server/ssl-interceptor-cert.pem +0 -34
  286. data/lib/arachni/http/proxy_server/ssl-interceptor-pkey.pem +0 -51
  287. data/spec/components/fingerprinters/languages/jsp_spec.rb +0 -56
@@ -74,7 +74,7 @@ class GenericDOM < Base
74
74
  def value
75
75
  transition.options[:value]
76
76
  end
77
- alias :affected_input_value value
77
+ alias :affected_input_value :value
78
78
 
79
79
  # @return [Hash]
80
80
  def to_h
@@ -44,7 +44,7 @@ class JSON < Base
44
44
  self.inputs = (self.inputs || {}).merge( options[:inputs] || {} )
45
45
 
46
46
  if @source && self.inputs.empty?
47
- self.inputs = JSON.load( self.source )
47
+ self.inputs = ::JSON.load( self.source )
48
48
  end
49
49
 
50
50
  @default_inputs = self.inputs.dup.freeze
@@ -104,6 +104,7 @@ class JSON < Base
104
104
  # @return [JSON, nil]
105
105
  def from_request( url, request )
106
106
  return if !request.body.is_a?( String ) || request.body.empty?
107
+ return if too_big?( request.body )
107
108
 
108
109
  data = begin
109
110
  ::JSON.load( request.body )
@@ -45,7 +45,7 @@ module Inputtable
45
45
  #
46
46
  # @return [Object]
47
47
  #
48
- # @see Capabilities::Inputtable#[]
48
+ # @see Arachni::Element::Capabilities::Inputtable#[]
49
49
  def []( name )
50
50
  key, data = find( name )
51
51
  data[key]
@@ -65,7 +65,7 @@ module Inputtable
65
65
  # @return [Object]
66
66
  # `value`
67
67
  #
68
- # @see Capabilities::Inputtable#[]=
68
+ # @see Arachni::Element::Capabilities::Inputtable#[]=
69
69
  def []=( name, value )
70
70
  @inputs = @inputs.dup
71
71
  key, data = find( name )
@@ -80,11 +80,11 @@ module Inputtable
80
80
  # Overrides {Capabilities::Inputtable#update} to allow for non-string data
81
81
  # of variable depth.
82
82
  #
83
- # @param (see Capabilities::Inputtable#update)
84
- # @return (see Capabilities::Inputtable#update)
85
- # @raise (see Capabilities::Inputtable#update)
83
+ # @param (see Arachni::Element::Capabilities::Inputtable#update)
84
+ # @return (see Arachni::Element::Capabilities::Inputtable#update)
85
+ # @raise (see Arachni::Element::Capabilities::Inputtable#update)
86
86
  #
87
- # @see Capabilities::Inputtable#update
87
+ # @see Arachni::Element::Capabilities::Inputtable#update
88
88
  def update( hash )
89
89
  traverse_data hash do |path, value|
90
90
  self[path] = value
@@ -26,7 +26,7 @@ module Mutable
26
26
  # If the `name` is an `Array`, it will be treated as a path to the location
27
27
  # of the input.
28
28
  #
29
- # @see Capabilities::Mutable#affected_input_name=
29
+ # @see Arachni::Element::Capabilities::Mutable#affected_input_name=
30
30
  def affected_input_name=( name )
31
31
  if name.is_a?( Array ) && name.size == 1
32
32
  name = name.first
@@ -55,19 +55,11 @@ class Link < Base
55
55
  def to_s
56
56
  uri = uri_parse( self.action ).dup
57
57
  uri.query = self.inputs.
58
- map { |k, v| "#{encode_query_params(k)}=#{encode_query_params(v)}" }.
58
+ map { |k, v| "#{encode(k)}=#{encode(v)}" }.
59
59
  join( '&' )
60
60
  uri.to_s
61
61
  end
62
62
 
63
- # @param (see .encode_query_params)
64
- # @return (see .encode_query_params)
65
- #
66
- # @see .encode_query_params
67
- def encode_query_params( *args )
68
- self.class.encode_query_params( *args )
69
- end
70
-
71
63
  # @param (see .encode)
72
64
  # @return (see .encode)
73
65
  #
@@ -114,7 +106,14 @@ class Link < Base
114
106
  #
115
107
  # @return [Array<Link>]
116
108
  def from_document( url, document )
117
- document = Nokogiri::HTML( document.to_s ) if !document.is_a?( Nokogiri::HTML::Document )
109
+ if !document.is_a?( Nokogiri::HTML::Document )
110
+ document = document.to_s
111
+
112
+ return [] if !(document =~ /\?.*=/)
113
+
114
+ document = Nokogiri::HTML( document )
115
+ end
116
+
118
117
  base_url = begin
119
118
  document.search( '//base[@href]' )[0]['href']
120
119
  rescue
@@ -122,6 +121,8 @@ class Link < Base
122
121
  end
123
122
 
124
123
  document.search( '//a' ).map do |link|
124
+ next if too_big?( link['href'] )
125
+
125
126
  href = to_absolute( link['href'], base_url )
126
127
  next if !href
127
128
 
@@ -137,12 +138,8 @@ class Link < Base
137
138
  end.compact
138
139
  end
139
140
 
140
- def encode_query_params( param )
141
- encode( encode( param.recode ), '=' )
142
- end
143
-
144
- def encode( *args )
145
- ::URI.encode( *args )
141
+ def encode( string )
142
+ Arachni::HTTP::Request.encode string
146
143
  end
147
144
 
148
145
  def decode( *args )
@@ -57,7 +57,7 @@ class DOM < Base
57
57
  # URL including the DOM {#inputs}.
58
58
  def to_s
59
59
  "#{@action}##{fragment_path}?" << inputs.
60
- map { |k, v| "#{encode_query_params(k)}=#{encode_query_params(v)}" }.
60
+ map { |k, v| "#{encode(k)}=#{encode(v)}" }.
61
61
  join( '&' )
62
62
  end
63
63
 
@@ -65,22 +65,9 @@ class DOM < Base
65
65
  "#{@action}##{fragment}"
66
66
  end
67
67
 
68
- def encode_query_params( *args )
69
- Link.encode_query_params( *args )
70
- end
71
-
72
- def encode( *args )
73
- Link.encode( *args )
74
- end
75
-
76
- def decode( *args )
77
- Link.decode( *args )
78
- end
79
-
80
68
  def type
81
69
  self.class.type
82
70
  end
83
-
84
71
  def self.type
85
72
  :link_dom
86
73
  end
@@ -170,6 +170,7 @@ class LinkTemplate < Base
170
170
  end
171
171
 
172
172
  document.search( '//a' ).map do |link|
173
+ next if too_big?( link['href'] )
173
174
  next if !(href = to_absolute( link['href'], base_url ))
174
175
 
175
176
  template, inputs = extract_inputs( href, templates )
@@ -212,11 +213,11 @@ class LinkTemplate < Base
212
213
  end
213
214
 
214
215
  def encode( string )
215
- URI.encode( URI.encode( URI.encode( string.to_s, ';' ) ), '/' )
216
+ Link.encode string
216
217
  end
217
218
 
218
219
  def decode( *args )
219
- URI.decode( *args )
220
+ Link.decode( *args )
220
221
  end
221
222
 
222
223
  def type
@@ -68,22 +68,6 @@ class DOM < Base
68
68
  LinkTemplate.extract_inputs( url, templates )
69
69
  end
70
70
 
71
- def encode( string )
72
- self.class.encode( string )
73
- end
74
-
75
- def self.encode( string )
76
- string
77
- end
78
-
79
- def decode( *args )
80
- self.class.decode( *args )
81
- end
82
-
83
- def self.decode( *args )
84
- Link.decode( *args )
85
- end
86
-
87
71
  def type
88
72
  self.class.type
89
73
  end
@@ -16,14 +16,27 @@ module Arachni::Element
16
16
  class Server < Base
17
17
  include Capabilities::WithAuditor
18
18
 
19
- # Used to determine how different a resource should be in order to be sent
20
- # to the {Trainer#push}.
19
+ # Valid responses to discovery checks should vary *wildly*, especially when
20
+ # considering the types of directories and files that these checks look for.
21
21
  #
22
- # Ideally, all identified resources should be analyzed by the {Trainer} but
23
- # there can be cases where unreliable custom-4o4 signatures lead to FPs and
24
- # feeding FPs to the system can create an infinite loop.
22
+ # On the other hand, custom-404 or such responses will have many things in
23
+ # common which makes it possible to spot them without much bother.
24
+ #
25
+ # Ideally, custom-404s will be identified properly by the
26
+ # {HTTP::Client::Dynamic404Handler} but this is here to save our ass in case
27
+ # there's a bug or an unforeseen edge-case or something.
28
+ #
29
+ # Also, identified resources should be analyzed by the {Trainer} but there
30
+ # can be cases where unreliable custom-404 signatures lead to FPs and feeding
31
+ # FPs to the system can create an infinite loop.
25
32
  SIMILARITY_TOLERANCE = 0.25
26
33
 
34
+ # Remark in case of an untrusted issue.
35
+ REMARK = 'This issue was logged by a discovery check but ' +
36
+ 'the response for the resource it identified is very similar to responses ' +
37
+ 'for other resources of similar type. This is a strong indication that ' +
38
+ 'the logged issue is a false positive.'
39
+
27
40
  def initialize( url )
28
41
  super url: url
29
42
  @initialization_options = url
@@ -86,7 +99,13 @@ class Server < Base
86
99
  return false if !full_and_absolute_url?( url )
87
100
 
88
101
  if http.dynamic_404_handler.needs_check?( url )
89
- http.get( url, performer: self ) do |r|
102
+
103
+ # Don't enable fingerprinting if there's a dynamic handler, we don't
104
+ # want to keep analyzing non existent resources.
105
+ #
106
+ # If a resource does exist though it will be fingerprinted down the
107
+ # line.
108
+ http.get( url, performer: self, fingerprint: false ) do |r|
90
109
  if r.code == 200
91
110
  http.dynamic_404_handler._404?( r ) { |bool| block.call( !bool, r ) }
92
111
  else
@@ -118,6 +137,23 @@ class Server < Base
118
137
  s << '>'
119
138
  end
120
139
 
140
+ def self.flag_issues_as_untrusted( issue_digests )
141
+ issue_digests.uniq.each do |digest|
142
+ next if !Arachni::Data.issues[digest]
143
+
144
+ Arachni::Data.issues[digest].variations.each do |issue|
145
+ issue.add_remark :meta_analysis, REMARK
146
+ issue.trusted = false
147
+ end
148
+ end
149
+ end
150
+
151
+ def self.flag_issues_if_untrusted( similarity, issue_digests )
152
+ return if similarity < SIMILARITY_TOLERANCE
153
+
154
+ flag_issues_as_untrusted( issue_digests )
155
+ end
156
+
121
157
  private
122
158
 
123
159
  def analyze
@@ -153,10 +189,14 @@ class Server < Base
153
189
  # framework custom-404s and get into an infinite loop.
154
190
  train = similarity < SIMILARITY_TOLERANCE
155
191
 
192
+ issue_digests = []
156
193
  @candidates.each do |response, block|
157
- log response, train, &block
194
+ issue_digests << log( response, train, &block ).digest
158
195
  end
159
196
 
197
+ return if train
198
+
199
+ self.class.flag_issues_as_untrusted( issue_digests )
160
200
  ensure
161
201
  @candidates.clear
162
202
  end
@@ -164,12 +204,14 @@ class Server < Base
164
204
  def log( response, train = true, &block )
165
205
  block.call( response ) if block_given?
166
206
 
167
- auditor.log_remote_file( response )
207
+ issue = auditor.log_remote_file( response )
168
208
 
169
- return if !train
209
+ return issue if !train
170
210
 
171
211
  # Use the newly identified resource to increase the scan scope.
172
212
  auditor.framework.trainer.push( response )
213
+
214
+ issue
173
215
  end
174
216
 
175
217
  end
@@ -137,6 +137,7 @@ class XML < Base
137
137
  # @return [XML, nil]
138
138
  def from_request( url, request )
139
139
  return if !request.body.is_a?( String ) || request.body.empty?
140
+ return if too_big?( request.body )
140
141
 
141
142
  data = parse_inputs( request.body )
142
143
  return if data.empty?
@@ -11,9 +11,12 @@ class Easy
11
11
  module Callbacks
12
12
  def debug_callback
13
13
  @debug_callback ||= proc do |handle, type, data, size, udata|
14
+ # We only care about these so that we can have access to raw
15
+ # HTTP request traffic for reporting/debugging purposes.
16
+ next if type != :header_out && type != :data_out
17
+
14
18
  message = data.read_string( size )
15
19
  @debug_info.add type, message
16
- # print message unless [:data_in, :data_out].include?(type)
17
20
  0
18
21
  end
19
22
  end
@@ -117,25 +117,28 @@ module Audit
117
117
  # resulting pages back to the framework.
118
118
  perform_browser_analysis( page )
119
119
 
120
- # Remove elements which have already passed through here.
121
- pre_audit_element_filter( page )
122
-
123
- # Filter the page through the browser and apply DOM metadata to itself
124
- # and its elements in order to allow for audit optimizations down the
125
- # line.
126
- #
127
- # For example, if a DOM element has no associated events, there's no
128
- # point in it getting audited.
129
- apply_dom_metadata( page )
130
-
131
- notify_on_effective_page_audit( page )
132
-
133
- # Run checks which **don't** benefit from fingerprinting first, so that
134
- # we can use the responses of their HTTP requests to fingerprint the
135
- # webapp platforms, so that the checks which **do** benefit from knowing
136
- # the remote platforms can run more efficiently.
137
- run_http = run_checks( @checks.without_platforms, page )
138
- run_http = true if run_checks( @checks.with_platforms, page )
120
+ run_http = false
121
+
122
+ if checks.any?
123
+ # Remove elements which have already passed through here.
124
+ pre_audit_element_filter( page )
125
+
126
+ notify_on_effective_page_audit( page )
127
+
128
+ # Run checks which **don't** benefit from fingerprinting first, so
129
+ # that we can use the responses of their HTTP requests to fingerprint
130
+ # the webapp platforms, so that the checks which **do** benefit from
131
+ # knowing the remote platforms can run more efficiently.
132
+ run_http = run_checks( @checks.without_platforms, page )
133
+ run_http = true if run_checks( @checks.with_platforms, page )
134
+ end
135
+
136
+ notify_after_page_audit( page )
137
+
138
+ # Makes it easier on the GC but it is important that it be called
139
+ # **after** all the callbacks have been executed because they may need
140
+ # access to the cached data and there's no sense in re-parsing.
141
+ page.clear_cache
139
142
 
140
143
  if Arachni::Check::Auditor.has_timeout_candidates?
141
144
  print_line
@@ -145,10 +148,6 @@ module Audit
145
148
  run_http = true
146
149
  end
147
150
 
148
- # Makes it easier on the GC.
149
- page.clear_cache
150
-
151
- notify_after_page_audit( page )
152
151
  run_http
153
152
  end
154
153
 
@@ -207,66 +206,21 @@ module Audit
207
206
 
208
207
  @audit_queues_done = false
209
208
 
210
- # If for some reason we've got pages in the page queue this early,
211
- # consume them and get it over with.
212
- audit_page_queue
213
-
214
- next_page = nil
215
- while !suspended? && !page_limit_reached? &&
216
- (page = next_page || pop_page_from_url_queue)
217
-
218
- # Schedule the next page to be grabbed along with the audit requests
219
- # for the current page in order to avoid blocking.
220
- next_page = nil
221
- next_page_call = proc do
222
- pop_page_from_url_queue { |p| next_page = p }
223
- end
209
+ while !suspended? && !page_limit_reached? && (page = pop_page)
224
210
 
225
- # If we have login capabilities make sure that our session is valid
226
- # before grabbing and auditing the next page.
227
- if session.can_login?
228
- # Schedule the login check to happen along with the audit requests
229
- # to prevent blocking and grab the next page as well.
230
- session.logged_in? do |bool|
231
- next next_page_call.call if bool
211
+ session.ensure_logged_in
232
212
 
233
- session.login
234
- next_page_call
235
- end
236
- else
237
- next_page_call.call
238
- end
213
+ replenish_page_queue_from_url_queue
239
214
 
240
- # We're counting on piggybacking the next page retrieval with the
215
+ # We're counting on piggybacking the page queue replenishing on the
241
216
  # page audit, however if there wasn't an audit we need to force an
242
217
  # HTTP run.
243
218
  audit_page( page ) or http.run
244
219
 
245
- if next_page && suspend?
246
- data.page_queue << next_page
247
- end
248
-
249
220
  handle_signals
250
-
251
- # Consume pages somehow triggered by the audit and pushed by the
252
- # trainer or plugins or whatever.
253
- audit_page_queue
254
221
  end
255
222
 
256
- audit_page_queue
257
-
258
223
  @audit_queues_done = true
259
- true
260
- end
261
-
262
- # Audits the page queue.
263
- #
264
- # @see #pop_page_from_queue
265
- def audit_page_queue
266
- while !suspended? && !page_limit_reached? && (page = pop_page_from_queue)
267
- audit_page( page )
268
- handle_signals
269
- end
270
224
  end
271
225
 
272
226
  def harvest_http_responses
@@ -274,10 +228,6 @@ module Audit
274
228
  print_info 'Depending on server responsiveness and network' <<
275
229
  ' conditions this may take a while.'
276
230
 
277
- # Run all the queued HTTP requests and harvest the responses.
278
- http.run
279
-
280
- # Needed for some HTTP callbacks.
281
231
  http.run
282
232
  end
283
233
 
@@ -286,4 +236,3 @@ end
286
236
  end
287
237
  end
288
238
  end
289
-