arachni 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -0
  3. data/README.md +8 -4
  4. data/bin/arachni_console +1 -1
  5. data/components/checks/active/no_sql_injection.rb +4 -4
  6. data/components/checks/passive/common_directories/directories.txt +1 -0
  7. data/components/checks/passive/common_files/filenames.txt +1 -0
  8. data/components/plugins/login_script.rb +156 -0
  9. data/components/reporters/plugin_formatters/html/login_script.rb +48 -0
  10. data/components/reporters/plugin_formatters/stdout/login_script.rb +23 -0
  11. data/components/reporters/plugin_formatters/xml/login_script.rb +26 -0
  12. data/components/reporters/xml/schema.xsd +17 -0
  13. data/lib/arachni/browser.rb +7 -4
  14. data/lib/arachni/browser/javascript.rb +40 -4
  15. data/lib/arachni/browser/javascript/proxy.rb +1 -1
  16. data/lib/arachni/browser_cluster/worker.rb +14 -4
  17. data/lib/arachni/check/auditor.rb +24 -7
  18. data/lib/arachni/check/manager.rb +6 -0
  19. data/lib/arachni/framework.rb +54 -6
  20. data/lib/arachni/http/client.rb +41 -23
  21. data/lib/arachni/http/headers.rb +5 -1
  22. data/lib/arachni/http/message.rb +0 -7
  23. data/lib/arachni/http/request.rb +40 -32
  24. data/lib/arachni/http/response.rb +8 -1
  25. data/lib/arachni/platform/manager.rb +7 -0
  26. data/lib/arachni/rpc/server/framework/multi_instance.rb +1 -1
  27. data/lib/arachni/session.rb +88 -58
  28. data/lib/arachni/state/framework.rb +34 -5
  29. data/lib/arachni/support/profiler.rb +2 -0
  30. data/lib/arachni/uri.rb +2 -1
  31. data/lib/version +1 -1
  32. data/spec/arachni/browser/javascript_spec.rb +15 -0
  33. data/spec/arachni/check/manager_spec.rb +17 -0
  34. data/spec/arachni/framework_spec.rb +4 -2
  35. data/spec/arachni/http/client_spec.rb +1 -1
  36. data/spec/arachni/session_spec.rb +80 -37
  37. data/spec/arachni/state/framework_spec.rb +34 -1
  38. data/spec/arachni/uri_spec.rb +7 -0
  39. data/spec/components/plugins/login_script_spec.rb +157 -0
  40. data/spec/support/servers/plugins/login_script.rb +13 -0
  41. data/ui/cli/output.rb +26 -9
  42. metadata +11 -3
@@ -29,6 +29,10 @@ class Javascript
29
29
  # Filesystem directory containing the JS scripts.
30
30
  SCRIPT_LIBRARY = "#{File.dirname( __FILE__ )}/javascript/scripts/"
31
31
 
32
+ SCRIPT_SOURCES = Dir.glob("#{SCRIPT_LIBRARY}*.js").inject({}) do |h, path|
33
+ h.merge!( path => IO.read(path) )
34
+ end
35
+
32
36
  NO_EVENTS_FOR_ELEMENTS = Set.new([
33
37
  :base, :bdo, :br, :head, :html, :iframe, :meta, :param, :script, :style,
34
38
  :title, :link
@@ -198,6 +202,17 @@ class Javascript
198
202
  @browser.watir.execute_script script
199
203
  end
200
204
 
205
+ # Executes the given code but unwraps Watir elements.
206
+ #
207
+ # @param [String] script
208
+ # JS code to execute.
209
+ #
210
+ # @return [Object]
211
+ # Result of `script`.
212
+ def run_without_elements( script )
213
+ unwrap_elements run( script )
214
+ end
215
+
201
216
  # @return (see TaintTracer#debug)
202
217
  def debugging_data
203
218
  return [] if !supported?
@@ -361,18 +376,18 @@ class Javascript
361
376
  def read_script( filename )
362
377
  @scripts ||= {}
363
378
  @scripts[filename] ||=
364
- IO.read( filesystem_path_for_script( filename ) ).
365
- gsub( '_token', "_#{token}" ).freeze
379
+ SCRIPT_SOURCES[filesystem_path_for_script(filename)].
380
+ gsub( '_token', "_#{token}" )
366
381
  end
367
382
 
368
383
  def script_exists?( filename )
369
- (!!read_script( filename )) rescue false
384
+ SCRIPT_SOURCES.include? filesystem_path_for_script( filename )
370
385
  end
371
386
 
372
387
  def filesystem_path_for_script( filename )
373
388
  name = "#{SCRIPT_LIBRARY}#{filename}"
374
389
  name << '.js' if !name.end_with?( '.js')
375
- name
390
+ File.expand_path( name )
376
391
  end
377
392
 
378
393
  def script_url_for( filename )
@@ -384,6 +399,27 @@ class Javascript
384
399
  "#{SCRIPT_BASE_URL}#{filename}.js"
385
400
  end
386
401
 
402
+ def unwrap_elements( obj )
403
+ case obj
404
+ when Watir::Element
405
+ unwrap_element( obj )
406
+
407
+ when Array
408
+ obj.map { |e| unwrap_elements( e ) }
409
+
410
+ when Hash
411
+ obj.each { |k, v| obj[k] = unwrap_elements( v ) }
412
+ obj
413
+
414
+ else
415
+ obj
416
+ end
417
+ end
418
+
419
+ def unwrap_element( element )
420
+ element.html
421
+ end
422
+
387
423
  end
388
424
  end
389
425
  end
@@ -70,7 +70,7 @@ class Proxy < BasicObject
70
70
  # Javascript property/function.
71
71
  # @param [Array] arguments
72
72
  def call( function, *arguments )
73
- @javascript.run "return #{stub.write( function, *arguments )}"
73
+ @javascript.run_without_elements "return #{stub.write( function, *arguments )}"
74
74
  end
75
75
  alias :method_missing :call
76
76
 
@@ -82,7 +82,8 @@ class Worker < Arachni::Browser
82
82
 
83
83
  # PhantomJS may have crashed (it happens sometimes) so make sure that
84
84
  # we've got a live one before running the job.
85
- browser_respawn_if_necessary
85
+ # If we can't respawn, then bail out.
86
+ return if browser_respawn_if_necessary.nil?
86
87
 
87
88
  begin
88
89
  with_timeout @job_timeout do
@@ -234,7 +235,7 @@ class Worker < Arachni::Browser
234
235
  end
235
236
 
236
237
  def browser_respawn_if_necessary
237
- return if !time_to_die? && browser_alive? &&
238
+ return false if !time_to_die? && browser_alive? &&
238
239
  watir.windows.size < RESPAWN_WHEN_WINDOW_COUNT_REACHES
239
240
 
240
241
  browser_respawn
@@ -257,9 +258,18 @@ class Worker < Arachni::Browser
257
258
  @watir = nil
258
259
  @selenium = nil
259
260
 
260
- @watir = ::Watir::Browser.new( selenium )
261
+ # Browser may fail to respawn but there's nothing we can do about
262
+ # that, just leave it dead and try again at the next job.
263
+ begin
264
+ @watir = ::Watir::Browser.new( selenium )
265
+
266
+ ensure_open_window
261
267
 
262
- ensure_open_window
268
+ true
269
+ rescue Browser::Error::Spawn => e
270
+ print_error 'Could not respawn the browser, will try again at the next job.'
271
+ nil
272
+ end
263
273
  end
264
274
 
265
275
  def time_to_die?
@@ -380,7 +380,8 @@ module Auditor
380
380
  #
381
381
  # @see Page#audit?
382
382
  def skip?( element )
383
- return true if !page.audit_element?( element )
383
+ return true if audited?( element.coverage_id ) ||
384
+ !page.audit_element?( element )
384
385
 
385
386
  # Don't audit elements which have been already logged as vulnerable
386
387
  # either by us or preferred checks.
@@ -390,6 +391,10 @@ module Auditor
390
391
  klass = framework.checks[check]
391
392
  next if !klass.info.include?(:issue)
392
393
 
394
+ # No point in doing the following heavy deduplication check if there
395
+ # are no issues logged to begin with.
396
+ next if klass.issue_counter == 0
397
+
393
398
  if Data.issues.include?( klass.create_issue( vector: element ) )
394
399
  return true
395
400
  end
@@ -499,7 +504,10 @@ module Auditor
499
504
  if !block_given?
500
505
  audit_taint( payloads, opts )
501
506
  else
502
- each_candidate_element( opts[:elements] ) { |e| e.audit( payloads, opts, &block ) }
507
+ each_candidate_element( opts[:elements] ) do |e|
508
+ e.audit( payloads, opts, &block )
509
+ audited( e.coverage_id )
510
+ end
503
511
  end
504
512
  end
505
513
 
@@ -512,7 +520,10 @@ module Auditor
512
520
  # @see Arachni::Element::Capabilities::Analyzable::Taint
513
521
  def audit_taint( payloads, opts = {} )
514
522
  opts = OPTIONS.merge( opts )
515
- each_candidate_element( opts[:elements] ) { |e| e.taint_analysis( payloads, opts ) }
523
+ each_candidate_element( opts[:elements] )do |e|
524
+ e.taint_analysis( payloads, opts )
525
+ audited( e.coverage_id )
526
+ end
516
527
  end
517
528
 
518
529
  # Audits elements using differential analysis and automatically logs results.
@@ -523,7 +534,10 @@ module Auditor
523
534
  # @see Arachni::Element::Capabilities::Analyzable::Differential
524
535
  def audit_differential( opts = {}, &block )
525
536
  opts = OPTIONS.merge( opts )
526
- each_candidate_element( opts[:elements] ) { |e| e.differential_analysis( opts, &block ) }
537
+ each_candidate_element( opts[:elements] ) do |e|
538
+ e.differential_analysis( opts, &block )
539
+ audited( e.coverage_id )
540
+ end
527
541
  end
528
542
 
529
543
  # Audits elements using timing attacks and automatically logs results.
@@ -534,7 +548,10 @@ module Auditor
534
548
  # @see Arachni::Element::Capabilities::Analyzable::Timeout
535
549
  def audit_timeout( payloads, opts = {} )
536
550
  opts = OPTIONS.merge( opts )
537
- each_candidate_element( opts[:elements] ) { |e| e.timeout_analysis( payloads, opts ) }
551
+ each_candidate_element( opts[:elements] ) do |e|
552
+ e.timeout_analysis( payloads, opts )
553
+ audited( e.coverage_id )
554
+ end
538
555
  end
539
556
 
540
557
  # Traces the taint in the given `resource` and passes each page to the
@@ -581,7 +598,7 @@ module Auditor
581
598
 
582
599
  def prepare_each_element( elements, &block )
583
600
  elements.each do |e|
584
- next if e.inputs.empty?
601
+ next if skip?( e ) || e.inputs.empty?
585
602
 
586
603
  d = e.dup
587
604
  d.auditor = self
@@ -591,7 +608,7 @@ module Auditor
591
608
 
592
609
  def prepare_each_dom_element( elements, &block )
593
610
  elements.each do |e|
594
- next if !e.dom || e.dom.inputs.empty?
611
+ next if skip?( e ) || !e.dom || e.dom.inputs.empty?
595
612
 
596
613
  d = e.dup
597
614
  d.dom.auditor = self
@@ -116,6 +116,10 @@ class Manager < Arachni::Component::Manager
116
116
  # Check to run as a class.
117
117
  # @param [Page] page
118
118
  # Page to audit.
119
+ #
120
+ # @return [Bool]
121
+ # `true` if the check was ran (based on {Check::Auditor.check?}),
122
+ # `false` otherwise.
119
123
  def run_one( check, page )
120
124
  return false if !check.check?( page )
121
125
 
@@ -123,6 +127,8 @@ class Manager < Arachni::Component::Manager
123
127
  check_new.prepare
124
128
  check_new.run
125
129
  check_new.clean_up
130
+
131
+ true
126
132
  end
127
133
 
128
134
  def self.reset
@@ -272,8 +272,6 @@ class Framework
272
272
 
273
273
  print_line
274
274
  print_status "[HTTP: #{page.code}] #{page.dom.url}"
275
- # print_object_space
276
- # print_with_statistics
277
275
 
278
276
  if page.platforms.any?
279
277
  print_info "Identified as: #{page.platforms.to_a.join( ', ' )}"
@@ -293,6 +291,9 @@ class Framework
293
291
  end
294
292
  end
295
293
 
294
+ # Aside from plugins and whatnot, the Trainer hooks here to update the
295
+ # ElementFilter so that it'll know if new elements appear during the
296
+ # audit, so it's a big deal.
296
297
  notify_on_page_audit( page )
297
298
 
298
299
  @current_url = page.dom.url.to_s
@@ -300,22 +301,23 @@ class Framework
300
301
  http.update_cookies( page.cookie_jar )
301
302
  perform_browser_analysis( page )
302
303
 
304
+ # Remove elements which have already passed through here.
305
+ pre_audit_element_filter( page )
306
+
303
307
  # Run checks which **don't** benefit from fingerprinting first, so that
304
308
  # we can use the responses of their HTTP requests to fingerprint the
305
309
  # webapp platforms, so that the checks which **do** benefit from knowing
306
310
  # the remote platforms can run more efficiently.
307
311
  ran = false
308
312
  @checks.without_platforms.values.each do |check|
309
- ran = true
310
- check_page( check, page )
313
+ ran = true if check_page( check, page )
311
314
  end
312
315
  harvest_http_responses if ran
313
316
  run_http = ran
314
317
 
315
318
  ran = false
316
319
  @checks.with_platforms.values.each do |check|
317
- ran = true
318
- check_page( check, page )
320
+ ran = true if check_page( check, page )
319
321
  end
320
322
  harvest_http_responses if ran
321
323
  run_http ||= ran
@@ -1201,7 +1203,53 @@ class Framework
1201
1203
  rescue => e
1202
1204
  print_error "Error in #{check.to_s}: #{e.to_s}"
1203
1205
  print_error_backtrace e
1206
+ false
1207
+ end
1208
+ end
1209
+
1210
+ # Small but (sometimes) important optimization:
1211
+ #
1212
+ # Keep track of page elements which have already been passed to checks,
1213
+ # in order to filter them out and hopefully even avoid running checks
1214
+ # against pages with no new elements.
1215
+ #
1216
+ # It's not like there were going to be redundant audits anyways, because
1217
+ # each layer of the audit performs its own redundancy checks, but those
1218
+ # redundancy checks can introduce significant latencies when dealing
1219
+ # with pages with lots of elements.
1220
+ def pre_audit_element_filter( page )
1221
+ redundant_elements = {}
1222
+ page.elements.each do |e|
1223
+ next if !Options.audit.element?( e.type )
1224
+ next if e.is_a?( Cookie ) || e.is_a?( Header )
1225
+
1226
+ new_element = false
1227
+ redundant_elements[e.type] ||= []
1228
+
1229
+ if !state.element_checked?( e )
1230
+ state.element_checked e
1231
+ new_element = true
1232
+ end
1233
+
1234
+ if e.respond_to?( :dom ) && e.dom
1235
+ if !state.element_checked?( e.dom )
1236
+ state.element_checked e.dom
1237
+ new_element = true
1238
+ end
1239
+ end
1240
+
1241
+ next if new_element
1242
+
1243
+ redundant_elements[e.type] << e
1244
+ end
1245
+
1246
+ # Remove redundant elements from the page cache, if there are thousands
1247
+ # of them then just skipping them during the audit will introduce latency.
1248
+ redundant_elements.each do |type, elements|
1249
+ page.send( "#{type}s=", page.send( "#{type}s" ) - elements )
1204
1250
  end
1251
+
1252
+ page
1205
1253
  end
1206
1254
 
1207
1255
  def add_to_sitemap( page )
@@ -131,23 +131,21 @@ class Client
131
131
  clear_observers if hooks_too
132
132
  State.http.clear
133
133
 
134
- opts = Options
135
-
136
- @url = opts.url.to_s
134
+ @url = Options.url.to_s
137
135
  @url = nil if @url.empty?
138
136
 
139
- @hydra = Typhoeus::Hydra.new( max_concurrency: opts.http.request_concurrency || MAX_CONCURRENCY )
137
+ client_initialize
140
138
 
141
139
  headers.merge!(
142
140
  'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
143
- 'User-Agent' => opts.http.user_agent
141
+ 'User-Agent' => Options.http.user_agent
144
142
  )
145
- headers['From'] = opts.authorized_by if opts.authorized_by
146
- headers.merge!( opts.http.request_headers )
143
+ headers['From'] = Options.authorized_by if Options.authorized_by
144
+ headers.merge!( Options.http.request_headers )
147
145
 
148
- cookie_jar.load( opts.http.cookie_jar_filepath ) if opts.http.cookie_jar_filepath
149
- update_cookies( opts.http.cookies )
150
- update_cookies( opts.http.cookie_string ) if opts.http.cookie_string
146
+ cookie_jar.load( Options.http.cookie_jar_filepath ) if Options.http.cookie_jar_filepath
147
+ update_cookies( Options.http.cookies )
148
+ update_cookies( Options.http.cookie_string ) if Options.http.cookie_string
151
149
 
152
150
  reset_burst_info
153
151
 
@@ -203,7 +201,7 @@ class Client
203
201
  @burst_runtime = nil
204
202
 
205
203
  begin
206
- hydra_run
204
+ run_and_update_statistics
207
205
 
208
206
  duped_after_run = observers_for( :after_run ).dup
209
207
  observers_for( :after_run ).clear
@@ -258,7 +256,7 @@ class Client
258
256
 
259
257
  # Aborts the running requests on a best effort basis.
260
258
  def abort
261
- exception_jail { @hydra.abort }
259
+ exception_jail { client_abort }
262
260
  end
263
261
 
264
262
  # @return [Integer]
@@ -688,11 +686,11 @@ class Client
688
686
  false
689
687
  end
690
688
 
691
- def hydra_run
689
+ def run_and_update_statistics
692
690
  @running = true
693
691
 
694
692
  reset_burst_info
695
- @hydra.run
693
+ client_run
696
694
 
697
695
  @queue_size = 0
698
696
  @running = false
@@ -700,7 +698,7 @@ class Client
700
698
  @burst_runtime += Time.now - @burst_runtime_start
701
699
  @total_runtime += @burst_runtime
702
700
  end
703
-
701
+
704
702
  def reset_burst_info
705
703
  @burst_response_time_sum = 0
706
704
  @burst_response_count = 0
@@ -771,20 +769,40 @@ class Client
771
769
 
772
770
  return if request.blocking?
773
771
 
772
+ if client_queue( request )
773
+ @queue_size += 1
774
+
775
+ if emergency_run?
776
+ print_info 'Request queue reached its maximum size, performing an emergency run.'
777
+ run_and_update_statistics
778
+ end
779
+ end
780
+
781
+ request
782
+ end
783
+
784
+ def client_initialize
785
+ @hydra = Typhoeus::Hydra.new(
786
+ max_concurrency: Options.http.request_concurrency || MAX_CONCURRENCY
787
+ )
788
+ end
789
+
790
+ def client_run
791
+ @hydra.run
792
+ end
793
+
794
+ def client_abort
795
+ @hydra.abort
796
+ end
797
+
798
+ def client_queue( request )
774
799
  if request.high_priority?
775
800
  @hydra.queue_front( request.to_typhoeus )
776
801
  else
777
802
  @hydra.queue( request.to_typhoeus )
778
803
  end
779
804
 
780
- @queue_size += 1
781
-
782
- if emergency_run?
783
- print_info 'Request queue reached its maximum size, performing an emergency run.'
784
- hydra_run
785
- end
786
-
787
- request
805
+ true
788
806
  end
789
807
 
790
808
  def emergency_run?