arachni 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -0
- data/README.md +9 -2
- data/components/checks/active/code_injection.rb +5 -5
- data/components/checks/active/code_injection_timing.rb +3 -3
- data/components/checks/active/no_sql_injection_differential.rb +3 -2
- data/components/checks/active/os_cmd_injection.rb +11 -5
- data/components/checks/active/os_cmd_injection_timing.rb +11 -4
- data/components/checks/active/path_traversal.rb +2 -2
- data/components/checks/active/sql_injection.rb +1 -1
- data/components/checks/active/sql_injection/patterns/mssql +1 -0
- data/components/checks/active/sql_injection_differential.rb +3 -2
- data/components/checks/active/unvalidated_redirect.rb +3 -3
- data/components/checks/passive/common_directories/directories.txt +2 -0
- data/components/checks/passive/common_files/filenames.txt +1 -0
- data/lib/arachni/browser.rb +17 -1
- data/lib/arachni/check/auditor.rb +5 -2
- data/lib/arachni/check/base.rb +30 -5
- data/lib/arachni/element/capabilities/analyzable/differential.rb +2 -5
- data/lib/arachni/element/capabilities/auditable.rb +3 -1
- data/lib/arachni/element/capabilities/with_dom.rb +1 -0
- data/lib/arachni/element/capabilities/with_node.rb +1 -1
- data/lib/arachni/element/cookie.rb +2 -2
- data/lib/arachni/element/form.rb +1 -1
- data/lib/arachni/element/header.rb +2 -2
- data/lib/arachni/element/link_template.rb +1 -1
- data/lib/arachni/framework.rb +21 -1144
- data/lib/arachni/framework/parts/audit.rb +282 -0
- data/lib/arachni/framework/parts/browser.rb +132 -0
- data/lib/arachni/framework/parts/check.rb +86 -0
- data/lib/arachni/framework/parts/data.rb +158 -0
- data/lib/arachni/framework/parts/platform.rb +34 -0
- data/lib/arachni/framework/parts/plugin.rb +61 -0
- data/lib/arachni/framework/parts/report.rb +128 -0
- data/lib/arachni/framework/parts/scope.rb +40 -0
- data/lib/arachni/framework/parts/state.rb +457 -0
- data/lib/arachni/http/client.rb +33 -30
- data/lib/arachni/http/request.rb +6 -2
- data/lib/arachni/issue.rb +55 -1
- data/lib/arachni/platform/manager.rb +25 -21
- data/lib/arachni/state/framework.rb +7 -1
- data/lib/arachni/utilities.rb +10 -0
- data/lib/version +1 -1
- data/spec/arachni/browser_spec.rb +13 -0
- data/spec/arachni/check/auditor_spec.rb +1 -0
- data/spec/arachni/check/base_spec.rb +80 -0
- data/spec/arachni/element/cookie_spec.rb +2 -2
- data/spec/arachni/framework/parts/audit_spec.rb +391 -0
- data/spec/arachni/framework/parts/browser_spec.rb +26 -0
- data/spec/arachni/framework/parts/check_spec.rb +24 -0
- data/spec/arachni/framework/parts/data_spec.rb +187 -0
- data/spec/arachni/framework/parts/platform_spec.rb +62 -0
- data/spec/arachni/framework/parts/plugin_spec.rb +41 -0
- data/spec/arachni/framework/parts/report_spec.rb +66 -0
- data/spec/arachni/framework/parts/scope_spec.rb +86 -0
- data/spec/arachni/framework/parts/state_spec.rb +528 -0
- data/spec/arachni/framework_spec.rb +17 -1344
- data/spec/arachni/http/client_spec.rb +12 -7
- data/spec/arachni/issue_spec.rb +35 -0
- data/spec/arachni/platform/manager_spec.rb +2 -3
- data/spec/arachni/state/framework_spec.rb +15 -0
- data/spec/components/checks/active/code_injection_timing_spec.rb +5 -5
- data/spec/components/checks/active/no_sql_injection_differential_spec.rb +4 -0
- data/spec/components/checks/active/os_cmd_injection_spec.rb +20 -7
- data/spec/components/checks/active/os_cmd_injection_timing_spec.rb +5 -5
- data/spec/components/checks/active/sql_injection_differential_spec.rb +4 -0
- data/spec/components/checks/active/sql_injection_spec.rb +2 -3
- data/spec/support/servers/arachni/browser.rb +31 -0
- data/spec/support/servers/checks/active/code_injection.rb +1 -1
- data/spec/support/servers/checks/active/no_sql_injection_differential.rb +36 -34
- data/spec/support/servers/checks/active/os_cmd_injection.rb +6 -12
- data/spec/support/servers/checks/active/os_cmd_injection_timing.rb +9 -4
- data/spec/support/servers/checks/active/sql_injection.rb +1 -1
- data/spec/support/servers/checks/active/sql_injection_differential.rb +37 -34
- data/spec/support/shared/element/capabilities/with_node.rb +25 -0
- data/spec/support/shared/framework.rb +26 -0
- data/ui/cli/output.rb +2 -0
- data/ui/cli/rpc/server/dispatcher/option_parser.rb +1 -1
- metadata +32 -4
- data/components/checks/active/sql_injection/patterns/coldfusion +0 -1
@@ -188,9 +188,7 @@ module Differential
|
|
188
188
|
signatures[:controls][altered_hash].refine!(res.body) :
|
189
189
|
Support::Signature.new(res.body)
|
190
190
|
|
191
|
-
|
192
|
-
|
193
|
-
finalize_if_done( opts, signatures )
|
191
|
+
increase_received_responses( opts, signatures )
|
194
192
|
end
|
195
193
|
end
|
196
194
|
end
|
@@ -260,8 +258,7 @@ module Differential
|
|
260
258
|
|
261
259
|
signature_sieve( altered_hash, signatures, pair_hash )
|
262
260
|
|
263
|
-
|
264
|
-
finalize_if_done( opts, signatures )
|
261
|
+
increase_received_responses( opts, signatures )
|
265
262
|
end
|
266
263
|
end
|
267
264
|
end
|
@@ -245,7 +245,9 @@ module Auditable
|
|
245
245
|
submit( options ) do |response|
|
246
246
|
element = response.request.performer
|
247
247
|
if !element.audit_options[:silent]
|
248
|
-
print_status
|
248
|
+
print_status "Analyzing response ##{response.request.id} for " <<
|
249
|
+
"#{self.type} input '#{affected_input_name}'" <<
|
250
|
+
" pointing to: '#{audit_status_message_action}'"
|
249
251
|
end
|
250
252
|
|
251
253
|
exception_jail( false ){ block.call( response, element ) }
|
@@ -466,10 +466,10 @@ class Cookie < Base
|
|
466
466
|
#
|
467
467
|
# @return [String]
|
468
468
|
def encode( str, type = :value )
|
469
|
-
reserved = "+;%\0\'\""
|
469
|
+
reserved = "+;%\0\'\"&"
|
470
470
|
reserved << '=' if type == :name
|
471
471
|
|
472
|
-
URI.encode( str, reserved ).recode.gsub( ' ', '+' )
|
472
|
+
URI.encode( str.to_s, reserved ).recode.gsub( ' ', '+' )
|
473
473
|
end
|
474
474
|
|
475
475
|
# Decodes a {String} encoded for the `Cookie` header field.
|
data/lib/arachni/element/form.rb
CHANGED
data/lib/arachni/framework.rb
CHANGED
@@ -41,121 +41,57 @@ require lib + 'session'
|
|
41
41
|
require lib + 'trainer'
|
42
42
|
require lib + 'browser_cluster'
|
43
43
|
|
44
|
-
#
|
44
|
+
Dir.glob( "#{lib}framework/parts/**/*.rb" ).each { |h| require h }
|
45
|
+
|
46
|
+
# The Framework class ties together all the subsystems.
|
45
47
|
#
|
46
48
|
# It's the brains of the operation, it bosses the rest of the subsystems around.
|
47
|
-
# It
|
48
|
-
# user options.
|
49
|
+
# It loads checks, reports and plugins and runs them according to user options.
|
49
50
|
#
|
50
51
|
# @author Tasos "Zapotek" Laskos <tasos.laskos@arachni-scanner.com>
|
51
52
|
class Framework
|
52
53
|
include UI::Output
|
53
|
-
|
54
54
|
include Utilities
|
55
|
-
include Support::Mixins::Observable
|
56
55
|
|
57
|
-
#
|
58
|
-
|
56
|
+
# How many times to request a page upon failure.
|
57
|
+
AUDIT_PAGE_MAX_TRIES = 5
|
59
58
|
|
60
|
-
|
61
|
-
|
59
|
+
include Parts::Scope
|
60
|
+
include Parts::Browser
|
61
|
+
include Parts::Report
|
62
|
+
include Parts::Plugin
|
63
|
+
include Parts::Check
|
64
|
+
include Parts::Platform
|
65
|
+
include Parts::Audit
|
66
|
+
include Parts::Data
|
67
|
+
include Parts::State
|
62
68
|
|
63
69
|
# {Framework} error namespace.
|
64
70
|
#
|
65
71
|
# All {Framework} errors inherit from and live under it.
|
66
72
|
#
|
67
|
-
# When I say Framework I mean the {Framework} class, not the
|
68
|
-
#
|
73
|
+
# When I say Framework I mean the {Framework} class, not the entire Arachni
|
74
|
+
# Framework.
|
69
75
|
#
|
70
76
|
# @author Tasos "Zapotek" Laskos <tasos.laskos@arachni-scanner.com>
|
71
77
|
class Error < Arachni::Error
|
72
78
|
end
|
73
79
|
|
74
|
-
# How many times to request a page upon failure.
|
75
|
-
AUDIT_PAGE_MAX_TRIES = 5
|
76
|
-
|
77
80
|
# @return [Options]
|
78
81
|
# System options
|
79
82
|
attr_reader :options
|
80
83
|
|
81
|
-
# @return [Arachni::Reporter::Manager]
|
82
|
-
attr_reader :reporters
|
83
|
-
|
84
|
-
# @return [Arachni::Check::Manager]
|
85
|
-
attr_reader :checks
|
86
|
-
|
87
|
-
# @return [Arachni::Plugin::Manager]
|
88
|
-
attr_reader :plugins
|
89
|
-
|
90
|
-
# @return [Session]
|
91
|
-
# Web application session manager.
|
92
|
-
attr_reader :session
|
93
|
-
|
94
|
-
# @return [Arachni::HTTP]
|
95
|
-
attr_reader :http
|
96
|
-
|
97
|
-
# @return [Trainer]
|
98
|
-
attr_reader :trainer
|
99
|
-
|
100
|
-
# @return [Array<String>]
|
101
|
-
# Page URLs which elicited no response from the server and were not audited.
|
102
|
-
# Not determined by HTTP status codes, we're talking network failures here.
|
103
|
-
attr_reader :failures
|
104
|
-
|
105
|
-
# @param [String] afs
|
106
|
-
# Path to an `.afs.` (Arachni Framework Snapshot) file created by {#suspend}.
|
107
|
-
#
|
108
|
-
# @return [Framework]
|
109
|
-
# Restored instance.
|
110
|
-
def self.restore( afs, &block )
|
111
|
-
framework = new
|
112
|
-
framework.restore( afs )
|
113
|
-
|
114
|
-
if block_given?
|
115
|
-
begin
|
116
|
-
block.call framework
|
117
|
-
ensure
|
118
|
-
framework.clean_up
|
119
|
-
framework.reset
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
framework
|
124
|
-
end
|
125
|
-
|
126
84
|
# @param [Options] options
|
127
85
|
# @param [Block] block
|
128
86
|
# Block to be passed a {Framework} instance which will then be {#reset}.
|
129
|
-
def initialize( options =
|
130
|
-
super()
|
131
|
-
|
87
|
+
def initialize( options = Options.instance, &block )
|
132
88
|
Encoding.default_external = 'BINARY'
|
133
89
|
Encoding.default_internal = 'BINARY'
|
134
90
|
|
135
91
|
@options = options
|
136
92
|
|
137
|
-
|
138
|
-
|
139
|
-
@plugins = Plugin::Manager.new( self )
|
140
|
-
|
141
|
-
reset_session
|
142
|
-
@http = HTTP::Client.instance
|
143
|
-
|
144
|
-
reset_trainer
|
145
|
-
|
146
|
-
# Deep clone the redundancy rules to preserve their original counters
|
147
|
-
# for the reports.
|
148
|
-
@orig_redundant = options.scope.redundant_path_patterns.deep_clone
|
149
|
-
|
150
|
-
state.status = :ready
|
151
|
-
|
152
|
-
@current_url = ''
|
153
|
-
|
154
|
-
# Holds page URLs which returned no response.
|
155
|
-
@failures = []
|
156
|
-
@retries = {}
|
157
|
-
|
158
|
-
@after_page_audit_blocks = []
|
93
|
+
# Initialize the Parts.
|
94
|
+
super()
|
159
95
|
|
160
96
|
# Little helper to run a piece of code and reset the framework to be
|
161
97
|
# ready to be reused.
|
@@ -169,46 +105,6 @@ class Framework
|
|
169
105
|
end
|
170
106
|
end
|
171
107
|
|
172
|
-
# @return [Integer]
|
173
|
-
# Total number of pages added to the {#push_to_page_queue page audit queue}.
|
174
|
-
def page_queue_total_size
|
175
|
-
data.page_queue_total_size
|
176
|
-
end
|
177
|
-
|
178
|
-
# @return [Integer]
|
179
|
-
# Total number of URLs added to the {#push_to_url_queue URL audit queue}.
|
180
|
-
def url_queue_total_size
|
181
|
-
data.url_queue_total_size
|
182
|
-
end
|
183
|
-
|
184
|
-
# @return [Hash<String, Integer>]
|
185
|
-
# List of crawled URLs with their HTTP codes.
|
186
|
-
def sitemap
|
187
|
-
data.sitemap
|
188
|
-
end
|
189
|
-
|
190
|
-
# @return [BrowserCluster, nil]
|
191
|
-
# A lazy-loaded browser cluster or `nil` if
|
192
|
-
# {OptionGroups::BrowserCluster#pool_size} or
|
193
|
-
# {OptionGroups::Scope#dom_depth_limit} are 0 or not
|
194
|
-
# {#host_has_browser?}.
|
195
|
-
def browser_cluster
|
196
|
-
return if options.browser_cluster.pool_size == 0 ||
|
197
|
-
Options.scope.dom_depth_limit == 0 || !host_has_browser?
|
198
|
-
|
199
|
-
# Initialization may take a while so since we lazy load this make sure
|
200
|
-
# that only one thread gets to this code at a time.
|
201
|
-
synchronize do
|
202
|
-
if !@browser_cluster
|
203
|
-
state.set_status_message :browser_cluster_startup
|
204
|
-
end
|
205
|
-
|
206
|
-
@browser_cluster ||= BrowserCluster.new
|
207
|
-
state.clear_status_messages
|
208
|
-
@browser_cluster
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
108
|
# Starts the scan.
|
213
109
|
#
|
214
110
|
# @param [Block] block
|
@@ -222,10 +118,8 @@ class Framework
|
|
222
118
|
# exit the reporters will still run with whatever results Arachni managed
|
223
119
|
# to gather.
|
224
120
|
exception_jail( false ){ audit }
|
225
|
-
# print_with_statistics
|
226
121
|
|
227
|
-
return if aborted?
|
228
|
-
return if suspended?
|
122
|
+
return if aborted? || suspended?
|
229
123
|
|
230
124
|
clean_up
|
231
125
|
exception_jail( false ){ block.call } if block_given?
|
@@ -234,133 +128,6 @@ class Framework
|
|
234
128
|
true
|
235
129
|
end
|
236
130
|
|
237
|
-
# @return [State::Framework]
|
238
|
-
def state
|
239
|
-
State.framework
|
240
|
-
end
|
241
|
-
|
242
|
-
# @return [Data::Framework]
|
243
|
-
def data
|
244
|
-
Data.framework
|
245
|
-
end
|
246
|
-
|
247
|
-
# @note Will update the {HTTP::Client#cookie_jar} with {Page#cookie_jar}.
|
248
|
-
# @note It will audit just the given `page` and not any subsequent pages
|
249
|
-
# discovered by the {Trainer} -- i.e. ignore any new elements that might
|
250
|
-
# appear as a result.
|
251
|
-
# @note It will pass the `page` to the {BrowserCluster} for analysis if the
|
252
|
-
# {Page::Scope#dom_depth_limit_reached? DOM depth limit} has
|
253
|
-
# not been reached and push resulting pages to {#push_to_page_queue} but
|
254
|
-
# will not audit those pages either.
|
255
|
-
#
|
256
|
-
# @param [Page] page
|
257
|
-
# Runs loaded checks against `page`
|
258
|
-
def audit_page( page )
|
259
|
-
return if !page
|
260
|
-
|
261
|
-
if page.scope.out?
|
262
|
-
print_info "Ignoring page due to exclusion criteria: #{page.dom.url}"
|
263
|
-
return false
|
264
|
-
end
|
265
|
-
|
266
|
-
# Initialize the BrowserCluster.
|
267
|
-
browser_cluster
|
268
|
-
|
269
|
-
state.audited_page_count += 1
|
270
|
-
add_to_sitemap( page )
|
271
|
-
sitemap.merge!( browser_sitemap )
|
272
|
-
|
273
|
-
print_line
|
274
|
-
print_status "[HTTP: #{page.code}] #{page.dom.url}"
|
275
|
-
|
276
|
-
if page.platforms.any?
|
277
|
-
print_info "Identified as: #{page.platforms.to_a.join( ', ' )}"
|
278
|
-
end
|
279
|
-
|
280
|
-
if crawl?
|
281
|
-
pushed = push_paths_from_page( page )
|
282
|
-
print_info "Analysis resulted in #{pushed.size} usable paths."
|
283
|
-
end
|
284
|
-
|
285
|
-
if host_has_browser?
|
286
|
-
print_info "DOM depth: #{page.dom.depth} (Limit: #{options.scope.dom_depth_limit})"
|
287
|
-
|
288
|
-
if page.dom.transitions.any?
|
289
|
-
print_info ' Transitions:'
|
290
|
-
page.dom.print_transitions( method(:print_info), ' ' )
|
291
|
-
end
|
292
|
-
end
|
293
|
-
|
294
|
-
# Aside from plugins and whatnot, the Trainer hooks here to update the
|
295
|
-
# ElementFilter so that it'll know if new elements appear during the
|
296
|
-
# audit, so it's a big deal.
|
297
|
-
notify_on_page_audit( page )
|
298
|
-
|
299
|
-
@current_url = page.dom.url.to_s
|
300
|
-
|
301
|
-
http.update_cookies( page.cookie_jar )
|
302
|
-
perform_browser_analysis( page )
|
303
|
-
|
304
|
-
# Remove elements which have already passed through here.
|
305
|
-
pre_audit_element_filter( page )
|
306
|
-
|
307
|
-
# Run checks which **don't** benefit from fingerprinting first, so that
|
308
|
-
# we can use the responses of their HTTP requests to fingerprint the
|
309
|
-
# webapp platforms, so that the checks which **do** benefit from knowing
|
310
|
-
# the remote platforms can run more efficiently.
|
311
|
-
ran = false
|
312
|
-
@checks.without_platforms.values.each do |check|
|
313
|
-
ran = true if check_page( check, page )
|
314
|
-
end
|
315
|
-
harvest_http_responses if ran
|
316
|
-
run_http = ran
|
317
|
-
|
318
|
-
ran = false
|
319
|
-
@checks.with_platforms.values.each do |check|
|
320
|
-
ran = true if check_page( check, page )
|
321
|
-
end
|
322
|
-
harvest_http_responses if ran
|
323
|
-
run_http ||= ran
|
324
|
-
|
325
|
-
if Check::Auditor.has_timeout_candidates?
|
326
|
-
print_line
|
327
|
-
print_status "Verifying timeout-analysis candidates for: #{page.dom.url}"
|
328
|
-
print_info '---------------------------------------'
|
329
|
-
Check::Auditor.timeout_audit_run
|
330
|
-
run_http = true
|
331
|
-
end
|
332
|
-
|
333
|
-
# Makes it easier on the GC.
|
334
|
-
page.clear_cache
|
335
|
-
|
336
|
-
notify_after_page_audit( page )
|
337
|
-
run_http
|
338
|
-
end
|
339
|
-
|
340
|
-
# @return [Bool]
|
341
|
-
# `true` if the environment has a browser, `false` otherwise.
|
342
|
-
def host_has_browser?
|
343
|
-
Browser.has_executable?
|
344
|
-
end
|
345
|
-
|
346
|
-
# @return [Bool]
|
347
|
-
# `true` if the {OptionGroups::Scope#page_limit} has been reached,
|
348
|
-
# `false` otherwise.
|
349
|
-
def page_limit_reached?
|
350
|
-
options.scope.page_limit_reached?( sitemap.size )
|
351
|
-
end
|
352
|
-
|
353
|
-
def crawl?
|
354
|
-
options.scope.crawl? && options.scope.restrict_paths.empty?
|
355
|
-
end
|
356
|
-
|
357
|
-
# @return [Bool]
|
358
|
-
# `true` if the framework can process more pages, `false` is scope limits
|
359
|
-
# have been reached.
|
360
|
-
def accepts_more_pages?
|
361
|
-
crawl? && !page_limit_reached?
|
362
|
-
end
|
363
|
-
|
364
131
|
# @return [Hash]
|
365
132
|
#
|
366
133
|
# Framework statistics:
|
@@ -382,901 +149,11 @@ class Framework
|
|
382
149
|
}
|
383
150
|
end
|
384
151
|
|
385
|
-
# @return [Array<String>]
|
386
|
-
# Messages providing more information about the current {#status} of
|
387
|
-
# the framework.
|
388
|
-
def status_messages
|
389
|
-
state.status_messages
|
390
|
-
end
|
391
|
-
|
392
|
-
# @param [Page] page
|
393
|
-
# Page to push to the page audit queue -- increases {#page_queue_total_size}
|
394
|
-
#
|
395
|
-
# @return [Bool]
|
396
|
-
# `true` if push was successful, `false` if the `page` matched any
|
397
|
-
# exclusion criteria or has already been seen.
|
398
|
-
def push_to_page_queue( page, force = false )
|
399
|
-
return false if !force && (!accepts_more_pages? || state.page_seen?( page ) ||
|
400
|
-
page.scope.out? || page.scope.redundant?)
|
401
|
-
|
402
|
-
# We want to update from the already loaded page cache (if there is one)
|
403
|
-
# as we have to store the page anyways (needs to go through Browser analysis)
|
404
|
-
# and it's not worth the resources to parse its elements.
|
405
|
-
#
|
406
|
-
# We're basically doing this to give the Browser and Trainer a better
|
407
|
-
# view of what elements have been seen, so that they won't feed us pages
|
408
|
-
# with elements that they think are new, but have been provided to us by
|
409
|
-
# some other component; however, it wouldn't be the end of the world if
|
410
|
-
# that were to happen.
|
411
|
-
ElementFilter.update_from_page_cache page
|
412
|
-
|
413
|
-
data.push_to_page_queue page
|
414
|
-
state.page_seen page
|
415
|
-
|
416
|
-
true
|
417
|
-
end
|
418
|
-
|
419
|
-
# @param [String] url
|
420
|
-
# URL to push to the audit queue -- increases {#url_queue_total_size}
|
421
|
-
#
|
422
|
-
# @return [Bool]
|
423
|
-
# `true` if push was successful, `false` if the `url` matched any
|
424
|
-
# exclusion criteria or has already been seen.
|
425
|
-
def push_to_url_queue( url, force = false )
|
426
|
-
return if !force && !accepts_more_pages?
|
427
|
-
|
428
|
-
url = to_absolute( url ) || url
|
429
|
-
if state.url_seen?( url ) || skip_path?( url ) || redundant_path?( url )
|
430
|
-
return false
|
431
|
-
end
|
432
|
-
|
433
|
-
data.push_to_url_queue url
|
434
|
-
state.url_seen url
|
435
|
-
|
436
|
-
true
|
437
|
-
end
|
438
|
-
|
439
|
-
# @return [Report]
|
440
|
-
# Scan results.
|
441
|
-
def report
|
442
|
-
opts = options.to_hash.deep_clone
|
443
|
-
|
444
|
-
# restore the original redundancy rules and their counters
|
445
|
-
opts[:scope][:redundant_path_patterns] = @orig_redundant
|
446
|
-
|
447
|
-
Report.new(
|
448
|
-
options: options,
|
449
|
-
sitemap: sitemap,
|
450
|
-
issues: Data.issues.sort,
|
451
|
-
plugins: @plugins.results,
|
452
|
-
start_datetime: @start_datetime,
|
453
|
-
finish_datetime: @finish_datetime
|
454
|
-
)
|
455
|
-
end
|
456
|
-
|
457
|
-
# Runs a reporter component and returns the contents of the generated report.
|
458
|
-
#
|
459
|
-
# Only accepts reporters which support an `outfile` option.
|
460
|
-
#
|
461
|
-
# @param [String] name
|
462
|
-
# Name of the reporter component to run, as presented by {#list_reporters}'s
|
463
|
-
# `:shortname` key.
|
464
|
-
# @param [Report] external_report
|
465
|
-
# Report to use -- defaults to the local one.
|
466
|
-
#
|
467
|
-
# @return [String]
|
468
|
-
# Scan report.
|
469
|
-
#
|
470
|
-
# @raise [Component::Error::NotFound]
|
471
|
-
# If the given reporter name doesn't correspond to a valid reporter component.
|
472
|
-
#
|
473
|
-
# @raise [Component::Options::Error::Invalid]
|
474
|
-
# If the requested reporter doesn't format the scan results as a String.
|
475
|
-
def report_as( name, external_report = report )
|
476
|
-
if !@reporters.available.include?( name.to_s )
|
477
|
-
fail Component::Error::NotFound, "Reporter '#{name}' could not be found."
|
478
|
-
end
|
479
|
-
|
480
|
-
loaded = @reporters.loaded
|
481
|
-
begin
|
482
|
-
@reporters.clear
|
483
|
-
|
484
|
-
if !@reporters[name].has_outfile?
|
485
|
-
fail Component::Options::Error::Invalid,
|
486
|
-
"Reporter '#{name}' cannot format the audit results as a String."
|
487
|
-
end
|
488
|
-
|
489
|
-
outfile = "#{Dir.tmpdir}/#{generate_token}"
|
490
|
-
@reporters.run( name, external_report, outfile: outfile )
|
491
|
-
|
492
|
-
IO.binread( outfile )
|
493
|
-
ensure
|
494
|
-
File.delete( outfile ) if outfile
|
495
|
-
@reporters.clear
|
496
|
-
@reporters.load loaded
|
497
|
-
end
|
498
|
-
end
|
499
|
-
|
500
|
-
# @return [Array<Hash>]
|
501
|
-
# Information about all available {Checks}.
|
502
|
-
def list_checks( patterns = nil )
|
503
|
-
loaded = @checks.loaded
|
504
|
-
|
505
|
-
begin
|
506
|
-
@checks.clear
|
507
|
-
@checks.available.map do |name|
|
508
|
-
path = @checks.name_to_path( name )
|
509
|
-
next if !list_check?( path, patterns )
|
510
|
-
|
511
|
-
@checks[name].info.merge(
|
512
|
-
shortname: name,
|
513
|
-
author: [@checks[name].info[:author]].
|
514
|
-
flatten.map { |a| a.strip },
|
515
|
-
path: path.strip,
|
516
|
-
platforms: @checks[name].platforms,
|
517
|
-
elements: @checks[name].elements
|
518
|
-
)
|
519
|
-
end.compact
|
520
|
-
ensure
|
521
|
-
@checks.clear
|
522
|
-
@checks.load loaded
|
523
|
-
end
|
524
|
-
end
|
525
|
-
|
526
|
-
# @return [Array<Hash>]
|
527
|
-
# Information about all available {Reporters}.
|
528
|
-
def list_reporters( patterns = nil )
|
529
|
-
loaded = @reporters.loaded
|
530
|
-
|
531
|
-
begin
|
532
|
-
@reporters.clear
|
533
|
-
@reporters.available.map do |report|
|
534
|
-
path = @reporters.name_to_path( report )
|
535
|
-
next if !list_reporter?( path, patterns )
|
536
|
-
|
537
|
-
@reporters[report].info.merge(
|
538
|
-
options: @reporters[report].info[:options] || [],
|
539
|
-
shortname: report,
|
540
|
-
path: path,
|
541
|
-
author: [@reporters[report].info[:author]].
|
542
|
-
flatten.map { |a| a.strip }
|
543
|
-
)
|
544
|
-
end.compact
|
545
|
-
ensure
|
546
|
-
@reporters.clear
|
547
|
-
@reporters.load loaded
|
548
|
-
end
|
549
|
-
end
|
550
|
-
|
551
|
-
# @return [Array<Hash>]
|
552
|
-
# Information about all available {Plugins}.
|
553
|
-
def list_plugins( patterns = nil )
|
554
|
-
loaded = @plugins.loaded
|
555
|
-
|
556
|
-
begin
|
557
|
-
@plugins.clear
|
558
|
-
@plugins.available.map do |plugin|
|
559
|
-
path = @plugins.name_to_path( plugin )
|
560
|
-
next if !list_plugin?( path, patterns )
|
561
|
-
|
562
|
-
@plugins[plugin].info.merge(
|
563
|
-
options: @plugins[plugin].info[:options] || [],
|
564
|
-
shortname: plugin,
|
565
|
-
path: path,
|
566
|
-
author: [@plugins[plugin].info[:author]].
|
567
|
-
flatten.map { |a| a.strip }
|
568
|
-
)
|
569
|
-
end.compact
|
570
|
-
ensure
|
571
|
-
@plugins.clear
|
572
|
-
@plugins.load loaded
|
573
|
-
end
|
574
|
-
end
|
575
|
-
|
576
|
-
# @return [Array<Hash>]
|
577
|
-
# Information about all available platforms.
|
578
|
-
def list_platforms
|
579
|
-
platforms = Platform::Manager.new
|
580
|
-
platforms.valid.inject({}) do |h, platform|
|
581
|
-
type = Platform::Manager::TYPES[platforms.find_type( platform )]
|
582
|
-
h[type] ||= {}
|
583
|
-
h[type][platform] = platforms.fullname( platform )
|
584
|
-
h
|
585
|
-
end
|
586
|
-
end
|
587
|
-
|
588
|
-
# @return [Symbol]
|
589
|
-
# Status of the instance, possible values are (in order):
|
590
|
-
#
|
591
|
-
# * `:ready` -- {#initialize Initialised} and waiting for instructions.
|
592
|
-
# * `:preparing` -- Getting ready to start (i.e. initializing plugins etc.).
|
593
|
-
# * `:scanning` -- The instance is currently {#run auditing} the webapp.
|
594
|
-
# * `:pausing` -- The instance is being {#pause paused} (if applicable).
|
595
|
-
# * `:paused` -- The instance has been {#pause paused} (if applicable).
|
596
|
-
# * `:suspending` -- The instance is being {#suspend suspended} (if applicable).
|
597
|
-
# * `:suspended` -- The instance has being {#suspend suspended} (if applicable).
|
598
|
-
# * `:cleanup` -- The scan has completed and the instance is
|
599
|
-
# {Framework#clean_up cleaning up} after itself (i.e. waiting for
|
600
|
-
# plugins to finish etc.).
|
601
|
-
# * `:aborted` -- The scan has been {Framework#abort}, you can grab the
|
602
|
-
# report and shutdown.
|
603
|
-
# * `:done` -- The scan has completed, you can grab the report and shutdown.
|
604
|
-
def status
|
605
|
-
state.status
|
606
|
-
end
|
607
|
-
|
608
|
-
# @return [Bool]
|
609
|
-
# `true` if the framework is running, `false` otherwise. This is `true`
|
610
|
-
# even if the scan is {#paused?}.
|
611
|
-
def running?
|
612
|
-
state.running?
|
613
|
-
end
|
614
|
-
|
615
|
-
# @return [Bool]
|
616
|
-
# `true` if the system is scanning, `false` otherwise.
|
617
|
-
def scanning?
|
618
|
-
state.scanning?
|
619
|
-
end
|
620
|
-
|
621
|
-
# @return [Bool]
|
622
|
-
# `true` if the framework is paused, `false` otherwise.
|
623
|
-
def paused?
|
624
|
-
state.paused?
|
625
|
-
end
|
626
|
-
|
627
|
-
# @return [Bool]
|
628
|
-
# `true` if the framework has been instructed to pause (i.e. is in the
|
629
|
-
# process of being paused or has been paused), `false` otherwise.
|
630
|
-
def pause?
|
631
|
-
state.pause?
|
632
|
-
end
|
633
|
-
|
634
|
-
# @return [Bool]
|
635
|
-
# `true` if the framework is in the process of pausing, `false` otherwise.
|
636
|
-
def pausing?
|
637
|
-
state.pausing?
|
638
|
-
end
|
639
|
-
|
640
|
-
# @note Each call from a unique caller is counted as a pause request
|
641
|
-
# and in order for the system to resume **all** pause callers need to
|
642
|
-
# {#resume} it.
|
643
|
-
#
|
644
|
-
# Pauses the framework on a best effort basis.
|
645
|
-
#
|
646
|
-
# @param [Bool] wait
|
647
|
-
# Wait until the system has been paused.
|
648
|
-
#
|
649
|
-
# @return [Integer]
|
650
|
-
# ID identifying this pause request.
|
651
|
-
def pause( wait = true )
|
652
|
-
id = generate_token.hash
|
653
|
-
state.pause id, wait
|
654
|
-
id
|
655
|
-
end
|
656
|
-
|
657
|
-
# @return [Bool]
|
658
|
-
# `true` if the framework {#run} has been aborted, `false` otherwise.
|
659
|
-
def aborted?
|
660
|
-
state.aborted?
|
661
|
-
end
|
662
|
-
|
663
|
-
# @return [Bool]
|
664
|
-
# `true` if the framework has been instructed to abort (i.e. is in the
|
665
|
-
# process of being aborted or has been aborted), `false` otherwise.
|
666
|
-
def abort?
|
667
|
-
state.abort?
|
668
|
-
end
|
669
|
-
|
670
|
-
# @return [Bool]
|
671
|
-
# `true` if the framework is in the process of aborting, `false` otherwise.
|
672
|
-
def aborting?
|
673
|
-
state.aborting?
|
674
|
-
end
|
675
|
-
|
676
|
-
# Aborts the framework {#run} on a best effort basis.
|
677
|
-
#
|
678
|
-
# @param [Bool] wait
|
679
|
-
# Wait until the system has been aborted.
|
680
|
-
def abort( wait = true )
|
681
|
-
state.abort wait
|
682
|
-
end
|
683
|
-
|
684
|
-
# @note Each call from a unique caller is counted as a pause request
|
685
|
-
# and in order for the system to resume **all** pause callers need to
|
686
|
-
# {#resume} it.
|
687
|
-
#
|
688
|
-
# Removes a {#pause} request for the current caller.
|
689
|
-
#
|
690
|
-
# @param [Integer] id
|
691
|
-
# ID of the {#pause} request.
|
692
|
-
def resume( id )
|
693
|
-
state.resume id
|
694
|
-
end
|
695
|
-
|
696
|
-
# Writes a {Snapshot.dump} to disk and aborts the scan.
|
697
|
-
#
|
698
|
-
# @param [Bool] wait
|
699
|
-
# Wait for the system to write it state to disk.
|
700
|
-
#
|
701
|
-
# @return [String,nil]
|
702
|
-
# Path to the state file `wait` is `true`, `nil` otherwise.
|
703
|
-
def suspend( wait = true )
|
704
|
-
state.suspend( wait )
|
705
|
-
return snapshot_path if wait
|
706
|
-
nil
|
707
|
-
end
|
708
|
-
|
709
|
-
# @return [Bool]
|
710
|
-
# `true` if the system is in the process of being suspended, `false`
|
711
|
-
# otherwise.
|
712
|
-
def suspend?
|
713
|
-
state.suspend?
|
714
|
-
end
|
715
|
-
|
716
|
-
# @return [Bool]
|
717
|
-
# `true` if the system has been suspended, `false` otherwise.
|
718
|
-
def suspended?
|
719
|
-
state.suspended?
|
720
|
-
end
|
721
|
-
|
722
|
-
# @return [String]
|
723
|
-
# Provisioned {#suspend} dump file for this instance.
|
724
|
-
def snapshot_path
|
725
|
-
return @state_archive if @state_archive
|
726
|
-
|
727
|
-
default_filename =
|
728
|
-
"#{URI(options.url).host} #{Time.now.to_s.gsub( ':', '_' )} #{generate_token}.afs"
|
729
|
-
|
730
|
-
location = options.snapshot.save_path
|
731
|
-
|
732
|
-
if !location
|
733
|
-
location = default_filename
|
734
|
-
elsif File.directory? location
|
735
|
-
location += "/#{default_filename}"
|
736
|
-
end
|
737
|
-
|
738
|
-
@state_archive ||= File.expand_path( location )
|
739
|
-
end
|
740
|
-
|
741
|
-
# @param [String] afs
|
742
|
-
# Path to an `.afs.` (Arachni Framework Snapshot) file created by {#suspend}.
|
743
|
-
#
|
744
|
-
# @return [Framework]
|
745
|
-
# Restored instance.
|
746
|
-
def restore( afs )
|
747
|
-
Snapshot.load afs
|
748
|
-
|
749
|
-
browser_job_update_skip_states state.browser_skip_states
|
750
|
-
|
751
|
-
checks.load Options.checks
|
752
|
-
plugins.load Options.plugins.keys
|
753
|
-
|
754
|
-
nil
|
755
|
-
end
|
756
|
-
|
757
|
-
def wait_for_browser?
|
758
|
-
@browser_cluster && !browser_cluster.done?
|
759
|
-
end
|
760
|
-
|
761
|
-
# Cleans up the framework; should be called after running the audit or
|
762
|
-
# after canceling a running scan.
|
763
|
-
#
|
764
|
-
# It stops the clock and waits for the plugins to finish up.
|
765
|
-
def clean_up( shutdown_browsers = true )
|
766
|
-
return if @cleaned_up
|
767
|
-
@cleaned_up = true
|
768
|
-
|
769
|
-
state.status = :cleanup
|
770
|
-
|
771
|
-
sitemap.merge!( browser_sitemap )
|
772
|
-
|
773
|
-
if shutdown_browsers
|
774
|
-
state.set_status_message :browser_cluster_shutdown
|
775
|
-
shutdown_browser_cluster
|
776
|
-
end
|
777
|
-
|
778
|
-
state.set_status_message :clearing_queues
|
779
|
-
page_queue.clear
|
780
|
-
url_queue.clear
|
781
|
-
|
782
|
-
@finish_datetime = Time.now
|
783
|
-
@start_datetime ||= Time.now
|
784
|
-
|
785
|
-
# Make sure this is disabled or it'll break reporter output.
|
786
|
-
disable_only_positives
|
787
|
-
|
788
|
-
state.running = false
|
789
|
-
|
790
|
-
state.set_status_message :waiting_for_plugins
|
791
|
-
@plugins.block
|
792
|
-
|
793
|
-
# Plugins may need the session right till the very end so save it for last.
|
794
|
-
@session.clean_up
|
795
|
-
@session = nil
|
796
|
-
|
797
|
-
true
|
798
|
-
end
|
799
|
-
|
800
|
-
def browser_job_skip_states
|
801
|
-
return if !@browser_cluster
|
802
|
-
browser_cluster.skip_states( browser_job.id )
|
803
|
-
end
|
804
|
-
|
805
152
|
# @return [String]
|
806
153
|
# Returns the version of the framework.
|
807
154
|
def version
|
808
155
|
Arachni::VERSION
|
809
156
|
end
|
810
157
|
|
811
|
-
# @note Prefer this from {.reset} if you already have an instance.
|
812
|
-
# @note You should first reset {Arachni::Options}.
|
813
|
-
#
|
814
|
-
# Resets everything and allows the framework to be re-used.
|
815
|
-
def reset
|
816
|
-
@cleaned_up = false
|
817
|
-
@browser_job = nil
|
818
|
-
|
819
|
-
@failures.clear
|
820
|
-
@retries.clear
|
821
|
-
|
822
|
-
# This needs to happen before resetting the other components so they
|
823
|
-
# will be able to put in their hooks.
|
824
|
-
self.class.reset
|
825
|
-
|
826
|
-
clear_observers
|
827
|
-
reset_trainer
|
828
|
-
reset_session
|
829
|
-
@checks.clear
|
830
|
-
@reporters.clear
|
831
|
-
@plugins.clear
|
832
|
-
end
|
833
|
-
|
834
|
-
# @note You should first reset {Arachni::Options}.
|
835
|
-
#
|
836
|
-
# Resets everything and allows the framework environment to be re-used.
|
837
|
-
def self.reset
|
838
|
-
State.clear
|
839
|
-
Data.clear
|
840
|
-
|
841
|
-
Platform::Manager.reset
|
842
|
-
Check::Auditor.reset
|
843
|
-
ElementFilter.reset
|
844
|
-
Element::Capabilities::Auditable.reset
|
845
|
-
Element::Capabilities::Analyzable.reset
|
846
|
-
Check::Manager.reset
|
847
|
-
Plugin::Manager.reset
|
848
|
-
Reporter::Manager.reset
|
849
|
-
HTTP::Client.reset
|
850
|
-
end
|
851
|
-
|
852
|
-
# @private
|
853
|
-
def reset_trainer
|
854
|
-
@trainer = Trainer.new( self )
|
855
|
-
end
|
856
|
-
|
857
|
-
private
|
858
|
-
|
859
|
-
def shutdown_browser_cluster
|
860
|
-
return if !@browser_cluster
|
861
|
-
|
862
|
-
browser_cluster.shutdown
|
863
|
-
|
864
|
-
@browser_cluster = nil
|
865
|
-
@browser_job = nil
|
866
|
-
end
|
867
|
-
|
868
|
-
def push_paths_from_page( page )
|
869
|
-
page.paths.select { |path| push_to_url_queue( path ) }
|
870
|
-
end
|
871
|
-
|
872
|
-
def browser_sitemap
|
873
|
-
return {} if !@browser_cluster
|
874
|
-
browser_cluster.sitemap
|
875
|
-
end
|
876
|
-
|
877
|
-
def browser_job_update_skip_states( states )
|
878
|
-
return if states.empty?
|
879
|
-
browser_cluster.update_skip_states browser_job.id, states
|
880
|
-
end
|
881
|
-
|
882
|
-
def reset_session
|
883
|
-
@session.clean_up if @session
|
884
|
-
@session = Session.new
|
885
|
-
end
|
886
|
-
|
887
|
-
def abort_if_signaled
|
888
|
-
return if !abort?
|
889
|
-
clean_up
|
890
|
-
state.aborted
|
891
|
-
end
|
892
|
-
|
893
|
-
def suspend_if_signaled
|
894
|
-
return if !suspend?
|
895
|
-
suspend_to_disk
|
896
|
-
end
|
897
|
-
|
898
|
-
def suspend_to_disk
|
899
|
-
while wait_for_browser?
|
900
|
-
last_pending_jobs ||= 0
|
901
|
-
pending_jobs = browser_cluster.pending_job_counter
|
902
|
-
|
903
|
-
if pending_jobs != last_pending_jobs
|
904
|
-
state.set_status_message :waiting_for_browser_cluster_jobs, pending_jobs
|
905
|
-
print_info "Suspending: #{status_messages.first}"
|
906
|
-
end
|
907
|
-
|
908
|
-
last_pending_jobs = pending_jobs
|
909
|
-
sleep 0.1
|
910
|
-
end
|
911
|
-
|
912
|
-
# Make sure the component options are up to date with what's actually
|
913
|
-
# happening.
|
914
|
-
options.checks = checks.loaded
|
915
|
-
options.plugins = plugins.loaded.
|
916
|
-
inject({}) { |h, name| h[name.to_s] = Options.plugins[name.to_s] || {}; h }
|
917
|
-
|
918
|
-
if browser_job_skip_states
|
919
|
-
state.browser_skip_states.merge browser_job_skip_states
|
920
|
-
end
|
921
|
-
|
922
|
-
state.set_status_message :suspending_plugins
|
923
|
-
@plugins.suspend
|
924
|
-
|
925
|
-
state.set_status_message :saving_snapshot, snapshot_path
|
926
|
-
Snapshot.dump( snapshot_path )
|
927
|
-
state.clear_status_messages
|
928
|
-
|
929
|
-
clean_up
|
930
|
-
|
931
|
-
state.set_status_message :snapshot_location, snapshot_path
|
932
|
-
print_info status_messages.first
|
933
|
-
state.suspended
|
934
|
-
end
|
935
|
-
|
936
|
-
def handle_signals
|
937
|
-
wait_if_paused
|
938
|
-
abort_if_signaled
|
939
|
-
suspend_if_signaled
|
940
|
-
end
|
941
|
-
|
942
|
-
def wait_if_paused
|
943
|
-
state.paused if pause?
|
944
|
-
sleep 0.2 while pause? && !abort?
|
945
|
-
end
|
946
|
-
|
947
|
-
# @note Must be called before calling any audit methods.
|
948
|
-
#
|
949
|
-
# Prepares the framework for the audit.
|
950
|
-
#
|
951
|
-
# * Sets the status to `:preparing`.
|
952
|
-
# * Starts the clock.
|
953
|
-
# * Runs the plugins.
|
954
|
-
def prepare
|
955
|
-
state.status = :preparing
|
956
|
-
state.running = true
|
957
|
-
@start_datetime = Time.now
|
958
|
-
|
959
|
-
Snapshot.restored? ? @plugins.restore : @plugins.run
|
960
|
-
end
|
961
|
-
|
962
|
-
def handle_browser_page( page )
|
963
|
-
synchronize do
|
964
|
-
return if !push_to_page_queue page
|
965
|
-
|
966
|
-
pushed_paths = nil
|
967
|
-
if crawl?
|
968
|
-
pushed_paths = push_paths_from_page( page ).size
|
969
|
-
end
|
970
|
-
|
971
|
-
print_status "Got new page from the browser-cluster: #{page.dom.url}"
|
972
|
-
print_info "DOM depth: #{page.dom.depth} (Limit: #{options.scope.dom_depth_limit})"
|
973
|
-
|
974
|
-
if page.dom.transitions.any?
|
975
|
-
print_info ' Transitions:'
|
976
|
-
page.dom.print_transitions( method(:print_info), ' ' )
|
977
|
-
end
|
978
|
-
|
979
|
-
if pushed_paths
|
980
|
-
print_info " -- Analysis resulted in #{pushed_paths} usable paths."
|
981
|
-
end
|
982
|
-
end
|
983
|
-
end
|
984
|
-
|
985
|
-
# Passes the `page` to {BrowserCluster#queue} and then pushes
|
986
|
-
# the resulting pages to {#push_to_page_queue}.
|
987
|
-
#
|
988
|
-
# @param [Page] page
|
989
|
-
# Page to analyze.
|
990
|
-
def perform_browser_analysis( page )
|
991
|
-
return if !browser_cluster || !accepts_more_pages? ||
|
992
|
-
Options.scope.dom_depth_limit.to_i < page.dom.depth + 1 ||
|
993
|
-
!page.has_script?
|
994
|
-
|
995
|
-
browser_cluster.queue( browser_job.forward( resource: page ) ) do |response|
|
996
|
-
handle_browser_page response.page
|
997
|
-
end
|
998
|
-
|
999
|
-
true
|
1000
|
-
end
|
1001
|
-
|
1002
|
-
def browser_job
|
1003
|
-
# We'll recycle the same job since all of them will have the same
|
1004
|
-
# callback. This will force the BrowserCluster to use the same block
|
1005
|
-
# for all queued jobs.
|
1006
|
-
#
|
1007
|
-
# Also, this job should never end so that all analysis operations
|
1008
|
-
# share the same state.
|
1009
|
-
@browser_job ||= BrowserCluster::Jobs::ResourceExploration.new(
|
1010
|
-
never_ending: true
|
1011
|
-
)
|
1012
|
-
end
|
1013
|
-
|
1014
|
-
# Performs the audit.
|
1015
|
-
def audit
|
1016
|
-
handle_signals
|
1017
|
-
return if aborted?
|
1018
|
-
|
1019
|
-
state.status = :scanning if !pausing?
|
1020
|
-
|
1021
|
-
push_to_url_queue( options.url )
|
1022
|
-
options.scope.extend_paths.each { |url| push_to_url_queue( url ) }
|
1023
|
-
options.scope.restrict_paths.each { |url| push_to_url_queue( url, true ) }
|
1024
|
-
|
1025
|
-
# Initialize the BrowserCluster.
|
1026
|
-
browser_cluster
|
1027
|
-
|
1028
|
-
# Keep auditing until there are no more resources in the queues and the
|
1029
|
-
# browsers have stopped spinning.
|
1030
|
-
loop do
|
1031
|
-
show_workload_msg = true
|
1032
|
-
while !has_audit_workload? && wait_for_browser?
|
1033
|
-
if show_workload_msg
|
1034
|
-
print_line
|
1035
|
-
print_status 'Workload exhausted, waiting for new pages' <<
|
1036
|
-
' from the browser-cluster...'
|
1037
|
-
end
|
1038
|
-
show_workload_msg = false
|
1039
|
-
|
1040
|
-
last_pending_jobs ||= 0
|
1041
|
-
pending_jobs = browser_cluster.pending_job_counter
|
1042
|
-
if pending_jobs != last_pending_jobs
|
1043
|
-
browser_cluster.print_info "Pending jobs: #{pending_jobs}"
|
1044
|
-
end
|
1045
|
-
last_pending_jobs = pending_jobs
|
1046
|
-
|
1047
|
-
sleep 0.1
|
1048
|
-
end
|
1049
|
-
|
1050
|
-
audit_queues
|
1051
|
-
|
1052
|
-
break if page_limit_reached?
|
1053
|
-
break if !has_audit_workload? && !wait_for_browser?
|
1054
|
-
end
|
1055
|
-
end
|
1056
|
-
|
1057
|
-
def has_audit_workload?
|
1058
|
-
!url_queue.empty? || !page_queue.empty?
|
1059
|
-
end
|
1060
|
-
|
1061
|
-
def page_queue
|
1062
|
-
data.page_queue
|
1063
|
-
end
|
1064
|
-
|
1065
|
-
def url_queue
|
1066
|
-
data.url_queue
|
1067
|
-
end
|
1068
|
-
|
1069
|
-
# Audits the {Data::Framework.url_queue URL} and {Data::Framework.page_queue Page}
|
1070
|
-
# queues while maintaining a valid session with the webapp if we've got
|
1071
|
-
# login capabilities.
|
1072
|
-
def audit_queues
|
1073
|
-
return if @audit_queues_done == false || !has_audit_workload? ||
|
1074
|
-
page_limit_reached?
|
1075
|
-
|
1076
|
-
@audit_queues_done = false
|
1077
|
-
|
1078
|
-
# If for some reason we've got pages in the page queue this early,
|
1079
|
-
# consume them and get it over with.
|
1080
|
-
audit_page_queue
|
1081
|
-
|
1082
|
-
next_page = nil
|
1083
|
-
while !suspended? && !page_limit_reached? &&
|
1084
|
-
(page = next_page || pop_page_from_url_queue)
|
1085
|
-
|
1086
|
-
# Helps us schedule the next page to be grabbed along with the audit
|
1087
|
-
# requests for the current page to avoid blocking.
|
1088
|
-
next_page = nil
|
1089
|
-
next_page_call = proc do
|
1090
|
-
pop_page_from_url_queue { |p| next_page = p }
|
1091
|
-
end
|
1092
|
-
|
1093
|
-
# If we can login capabilities make sure that our session is valid
|
1094
|
-
# before grabbing and auditing the next page.
|
1095
|
-
if session.can_login?
|
1096
|
-
# Schedule the login check to happen along with the audit requests
|
1097
|
-
# to prevent blocking and grab the next page as well.
|
1098
|
-
session.logged_in? do |bool|
|
1099
|
-
next next_page_call.call if bool
|
1100
|
-
|
1101
|
-
session.login
|
1102
|
-
next_page_call
|
1103
|
-
end
|
1104
|
-
else
|
1105
|
-
next_page_call.call
|
1106
|
-
end
|
1107
|
-
|
1108
|
-
# We're counting on piggybacking the next page retrieval with the
|
1109
|
-
# page audit, however if there wasn't an audit we need to force an
|
1110
|
-
# HTTP run.
|
1111
|
-
audit_page( page ) or http.run
|
1112
|
-
|
1113
|
-
if next_page && suspend?
|
1114
|
-
data.page_queue << next_page
|
1115
|
-
end
|
1116
|
-
|
1117
|
-
handle_signals
|
1118
|
-
|
1119
|
-
# Consume pages somehow triggered by the audit and pushed by the
|
1120
|
-
# trainer or plugins or whatever.
|
1121
|
-
audit_page_queue
|
1122
|
-
end
|
1123
|
-
|
1124
|
-
audit_page_queue
|
1125
|
-
|
1126
|
-
@audit_queues_done = true
|
1127
|
-
true
|
1128
|
-
end
|
1129
|
-
|
1130
|
-
def pop_page_from_url_queue( &block )
|
1131
|
-
return if url_queue.empty?
|
1132
|
-
|
1133
|
-
grabbed_page = nil
|
1134
|
-
Page.from_url( url_queue.pop, http: { update_cookies: true } ) do |page|
|
1135
|
-
@retries[page.url.hash] ||= 0
|
1136
|
-
|
1137
|
-
if (location = page.response.headers['Location'])
|
1138
|
-
print_info "Scheduled #{page.code} redirection: #{page.url} => #{location}"
|
1139
|
-
push_to_url_queue to_absolute( location, page.url )
|
1140
|
-
end
|
1141
|
-
|
1142
|
-
if page.code != 0
|
1143
|
-
grabbed_page = page
|
1144
|
-
block.call grabbed_page if block_given?
|
1145
|
-
next
|
1146
|
-
end
|
1147
|
-
|
1148
|
-
if @retries[page.url.hash] >= AUDIT_PAGE_MAX_TRIES
|
1149
|
-
@failures << page.url
|
1150
|
-
|
1151
|
-
print_error "Giving up trying to audit: #{page.url}"
|
1152
|
-
print_error "Couldn't get a response after #{AUDIT_PAGE_MAX_TRIES} tries."
|
1153
|
-
else
|
1154
|
-
print_bad "Retrying for: #{page.url}"
|
1155
|
-
@retries[page.url.hash] += 1
|
1156
|
-
url_queue << page.url
|
1157
|
-
end
|
1158
|
-
|
1159
|
-
grabbed_page = nil
|
1160
|
-
block.call grabbed_page if block_given?
|
1161
|
-
end
|
1162
|
-
http.run if !block_given?
|
1163
|
-
grabbed_page
|
1164
|
-
end
|
1165
|
-
|
1166
|
-
# Audits the page queue.
|
1167
|
-
#
|
1168
|
-
# @see #pop_page_from_queue
|
1169
|
-
def audit_page_queue
|
1170
|
-
while !suspended? && !page_limit_reached? && (page = pop_page_from_queue)
|
1171
|
-
audit_page( page )
|
1172
|
-
handle_signals
|
1173
|
-
end
|
1174
|
-
end
|
1175
|
-
|
1176
|
-
# @return [Page]
|
1177
|
-
def pop_page_from_queue
|
1178
|
-
return if page_queue.empty?
|
1179
|
-
page_queue.pop
|
1180
|
-
end
|
1181
|
-
|
1182
|
-
def harvest_http_responses
|
1183
|
-
print_status 'Harvesting HTTP responses...'
|
1184
|
-
print_info 'Depending on server responsiveness and network' <<
|
1185
|
-
' conditions this may take a while.'
|
1186
|
-
|
1187
|
-
# Run all the queued HTTP requests and harvest the responses.
|
1188
|
-
http.run
|
1189
|
-
|
1190
|
-
# Needed for some HTTP callbacks.
|
1191
|
-
http.run
|
1192
|
-
end
|
1193
|
-
|
1194
|
-
# Passes a page to the check and runs it.
|
1195
|
-
# It also handles any exceptions thrown by the check at runtime.
|
1196
|
-
#
|
1197
|
-
# @param [Check::Base] check
|
1198
|
-
# Check to run.
|
1199
|
-
# @param [Page] page
|
1200
|
-
def check_page( check, page )
|
1201
|
-
begin
|
1202
|
-
@checks.run_one( check, page )
|
1203
|
-
rescue => e
|
1204
|
-
print_error "Error in #{check.to_s}: #{e.to_s}"
|
1205
|
-
print_error_backtrace e
|
1206
|
-
false
|
1207
|
-
end
|
1208
|
-
end
|
1209
|
-
|
1210
|
-
# Small but (sometimes) important optimization:
|
1211
|
-
#
|
1212
|
-
# Keep track of page elements which have already been passed to checks,
|
1213
|
-
# in order to filter them out and hopefully even avoid running checks
|
1214
|
-
# against pages with no new elements.
|
1215
|
-
#
|
1216
|
-
# It's not like there were going to be redundant audits anyways, because
|
1217
|
-
# each layer of the audit performs its own redundancy checks, but those
|
1218
|
-
# redundancy checks can introduce significant latencies when dealing
|
1219
|
-
# with pages with lots of elements.
|
1220
|
-
def pre_audit_element_filter( page )
|
1221
|
-
redundant_elements = {}
|
1222
|
-
page.elements.each do |e|
|
1223
|
-
next if !Options.audit.element?( e.type )
|
1224
|
-
next if e.is_a?( Cookie ) || e.is_a?( Header )
|
1225
|
-
|
1226
|
-
new_element = false
|
1227
|
-
redundant_elements[e.type] ||= []
|
1228
|
-
|
1229
|
-
if !state.element_checked?( e )
|
1230
|
-
state.element_checked e
|
1231
|
-
new_element = true
|
1232
|
-
end
|
1233
|
-
|
1234
|
-
if e.respond_to?( :dom ) && e.dom
|
1235
|
-
if !state.element_checked?( e.dom )
|
1236
|
-
state.element_checked e.dom
|
1237
|
-
new_element = true
|
1238
|
-
end
|
1239
|
-
end
|
1240
|
-
|
1241
|
-
next if new_element
|
1242
|
-
|
1243
|
-
redundant_elements[e.type] << e
|
1244
|
-
end
|
1245
|
-
|
1246
|
-
# Remove redundant elements from the page cache, if there are thousands
|
1247
|
-
# of them then just skipping them during the audit will introduce latency.
|
1248
|
-
redundant_elements.each do |type, elements|
|
1249
|
-
page.send( "#{type}s=", page.send( "#{type}s" ) - elements )
|
1250
|
-
end
|
1251
|
-
|
1252
|
-
page
|
1253
|
-
end
|
1254
|
-
|
1255
|
-
def add_to_sitemap( page )
|
1256
|
-
data.add_page_to_sitemap( page )
|
1257
|
-
end
|
1258
|
-
|
1259
|
-
def list_reporter?( path, patterns = nil )
|
1260
|
-
regexp_array_match( patterns, path )
|
1261
|
-
end
|
1262
|
-
|
1263
|
-
def list_check?( path, patterns = nil )
|
1264
|
-
regexp_array_match( patterns, path )
|
1265
|
-
end
|
1266
|
-
|
1267
|
-
def list_plugin?( path, patterns = nil )
|
1268
|
-
regexp_array_match( patterns, path )
|
1269
|
-
end
|
1270
|
-
|
1271
|
-
def regexp_array_match( regexps, str )
|
1272
|
-
regexps = [regexps].flatten.compact.
|
1273
|
-
map { |s| s.is_a?( Regexp ) ? s : Regexp.new( s.to_s ) }
|
1274
|
-
return true if regexps.empty?
|
1275
|
-
|
1276
|
-
cnt = 0
|
1277
|
-
regexps.each { |filter| cnt += 1 if str =~ filter }
|
1278
|
-
cnt == regexps.size
|
1279
|
-
end
|
1280
|
-
|
1281
158
|
end
|
1282
159
|
end
|