sensu 0.12.6 → 0.13.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sensu/server.rb CHANGED
@@ -1,11 +1,10 @@
1
- require File.join(File.dirname(__FILE__), 'base')
2
- require File.join(File.dirname(__FILE__), 'redis')
3
- require File.join(File.dirname(__FILE__), 'socket')
4
- require File.join(File.dirname(__FILE__), 'sandbox')
1
+ require 'sensu/daemon'
2
+ require 'sensu/socket'
3
+ require 'sensu/sandbox'
5
4
 
6
5
  module Sensu
7
6
  class Server
8
- include Utilities
7
+ include Daemon
9
8
 
10
9
  attr_reader :is_master
11
10
 
@@ -13,84 +12,27 @@ module Sensu
13
12
  server = self.new(options)
14
13
  EM::run do
15
14
  server.start
16
- server.trap_signals
15
+ server.setup_signal_traps
17
16
  end
18
17
  end
19
18
 
20
19
  def initialize(options={})
21
- base = Base.new(options)
22
- @logger = base.logger
23
- @settings = base.settings
24
- @extensions = base.extensions
25
- base.setup_process
26
- @extensions.load_settings(@settings.to_hash)
27
- @timers = Array.new
28
- @master_timers = Array.new
29
- @handlers_in_progress_count = 0
20
+ super
30
21
  @is_master = false
22
+ @timers[:master] = Array.new
23
+ @handlers_in_progress_count = 0
31
24
  end
32
25
 
33
- def setup_redis
34
- @logger.debug('connecting to redis', {
35
- :settings => @settings[:redis]
36
- })
37
- @redis = Redis.connect(@settings[:redis])
38
- @redis.on_error do |error|
39
- @logger.fatal('redis connection error', {
40
- :error => error.to_s
41
- })
42
- stop
43
- end
44
- @redis.before_reconnect do
45
- unless testing?
46
- @logger.warn('reconnecting to redis')
47
- pause
48
- end
49
- end
50
- @redis.after_reconnect do
51
- @logger.info('reconnected to redis')
52
- resume
53
- end
54
- end
55
-
56
- def setup_rabbitmq
57
- @logger.debug('connecting to rabbitmq', {
58
- :settings => @settings[:rabbitmq]
59
- })
60
- @rabbitmq = RabbitMQ.connect(@settings[:rabbitmq])
61
- @rabbitmq.on_error do |error|
62
- @logger.fatal('rabbitmq connection error', {
63
- :error => error.to_s
64
- })
65
- stop
66
- end
67
- @rabbitmq.before_reconnect do
68
- unless testing?
69
- @logger.warn('reconnecting to rabbitmq')
70
- pause
71
- end
72
- end
73
- @rabbitmq.after_reconnect do
74
- @logger.info('reconnected to rabbitmq')
75
- resume
76
- end
77
- @amq = @rabbitmq.channel
78
- end
79
-
80
- def setup_keepalives(&block)
26
+ def setup_keepalives
81
27
  @logger.debug('subscribing to keepalives')
82
- @keepalive_queue = @amq.queue!('keepalives', :auto_delete => true)
83
- @keepalive_queue.bind(@amq.direct('keepalives')) do
84
- block.call if block
85
- end
86
- @keepalive_queue.subscribe(:ack => true) do |header, payload|
87
- client = Oj.load(payload)
28
+ @transport.subscribe(:direct, 'keepalives', 'keepalives', :ack => true) do |message_info, message|
29
+ client = MultiJson.load(message)
88
30
  @logger.debug('received keepalive', {
89
31
  :client => client
90
32
  })
91
- @redis.set('client:' + client[:name], Oj.dump(client)) do
33
+ @redis.set('client:' + client[:name], MultiJson.dump(client)) do
92
34
  @redis.sadd('clients', client[:name]) do
93
- header.ack
35
+ @transport.ack(message_info)
94
36
  end
95
37
  end
96
38
  end
@@ -252,39 +194,31 @@ module Sensu
252
194
  end
253
195
 
254
196
  def mutate_event_data(mutator_name, event, &block)
197
+ mutator_name ||= 'json'
198
+ return_output = Proc.new do |output, status|
199
+ if status == 0
200
+ block.dup.call(output)
201
+ else
202
+ @logger.error('mutator error', {
203
+ :event => event,
204
+ :output => output,
205
+ :status => status
206
+ })
207
+ @handlers_in_progress_count -= 1
208
+ end
209
+ end
210
+ @logger.debug('mutating event data', {
211
+ :event => event,
212
+ :mutator_name => mutator_name
213
+ })
255
214
  case
256
- when mutator_name.nil?
257
- block.call(Oj.dump(event))
258
215
  when @settings.mutator_exists?(mutator_name)
259
216
  mutator = @settings[:mutators][mutator_name]
260
- IO.async_popen(mutator[:command], Oj.dump(event), mutator[:timeout]) do |output, status|
261
- if status == 0
262
- block.call(output)
263
- else
264
- @logger.error('mutator error', {
265
- :event => event,
266
- :mutator => mutator,
267
- :output => output,
268
- :status => status
269
- })
270
- @handlers_in_progress_count -= 1
271
- end
272
- end
217
+ options = {:data => MultiJson.dump(event), :timeout => mutator[:timeout]}
218
+ Spawn.process(mutator[:command], options, &return_output)
273
219
  when @extensions.mutator_exists?(mutator_name)
274
220
  extension = @extensions[:mutators][mutator_name]
275
- extension.safe_run(event) do |output, status|
276
- if status == 0
277
- block.call(output)
278
- else
279
- @logger.error('mutator extension error', {
280
- :event => event,
281
- :extension => extension.definition,
282
- :output => output,
283
- :status => status
284
- })
285
- @handlers_in_progress_count -= 1
286
- end
287
- end
221
+ extension.safe_run(event, &return_output)
288
222
  else
289
223
  @logger.error('unknown mutator', {
290
224
  :mutator_name => mutator_name
@@ -313,7 +247,8 @@ module Sensu
313
247
  mutate_event_data(handler[:mutator], event) do |event_data|
314
248
  case handler[:type]
315
249
  when 'pipe'
316
- IO.async_popen(handler[:command], event_data, handler[:timeout]) do |output, status|
250
+ options = {:data => event_data, :timeout => handler[:timeout]}
251
+ Spawn.process(handler[:command], options) do |output, status|
317
252
  output.each_line do |line|
318
253
  @logger.info('handler output', {
319
254
  :handler => handler,
@@ -348,21 +283,17 @@ module Sensu
348
283
  rescue => error
349
284
  on_error.call(error)
350
285
  end
351
- when 'amqp'
352
- exchange_name = handler[:exchange][:name]
353
- exchange_type = handler[:exchange].has_key?(:type) ? handler[:exchange][:type].to_sym : :direct
354
- exchange_options = handler[:exchange].reject do |key, value|
355
- [:name, :type].include?(key)
356
- end
286
+ when 'transport'
357
287
  unless event_data.empty?
358
- begin
359
- @amq.method(exchange_type).call(exchange_name, exchange_options).publish(event_data)
360
- rescue AMQ::Client::ConnectionClosedError => error
361
- @logger.error('failed to publish event data to an exchange', {
362
- :exchange => handler[:exchange],
363
- :payload => event_data,
364
- :error => error.to_s
365
- })
288
+ pipe = handler[:pipe]
289
+ @transport.publish(pipe[:type].to_sym, pipe[:name], event_data, pipe[:options] || Hash.new) do |info|
290
+ if info[:error]
291
+ @logger.fatal('failed to publish event data to the transport', {
292
+ :pipe => pipe,
293
+ :payload => event_data,
294
+ :error => info[:error].to_s
295
+ })
296
+ end
366
297
  end
367
298
  end
368
299
  @handlers_in_progress_count -= 1
@@ -387,7 +318,7 @@ module Sensu
387
318
  })
388
319
  check = result[:check]
389
320
  result_set = check[:name] + ':' + check[:issued].to_s
390
- @redis.hset('aggregation:' + result_set, result[:client], Oj.dump(
321
+ @redis.hset('aggregation:' + result_set, result[:client], MultiJson.dump(
391
322
  :output => check[:output],
392
323
  :status => check[:status]
393
324
  )) do
@@ -405,15 +336,28 @@ module Sensu
405
336
  end
406
337
  end
407
338
 
339
+ def event_bridges(event)
340
+ @extensions[:bridges].each do |name, bridge|
341
+ bridge.safe_run(event) do |output, status|
342
+ output.each_line do |line|
343
+ @logger.info('bridge extension output', {
344
+ :extension => bridge.definition,
345
+ :output => line
346
+ })
347
+ end
348
+ end
349
+ end
350
+ end
351
+
408
352
  def process_result(result)
409
353
  @logger.debug('processing result', {
410
354
  :result => result
411
355
  })
412
356
  @redis.get('client:' + result[:client]) do |client_json|
413
357
  unless client_json.nil?
414
- client = Oj.load(client_json)
358
+ client = MultiJson.load(client_json)
415
359
  check = case
416
- when @settings.check_exists?(result[:check][:name])
360
+ when @settings.check_exists?(result[:check][:name]) && !result[:check][:standalone]
417
361
  @settings[:checks][result[:check][:name]].merge(result[:check])
418
362
  else
419
363
  result[:check]
@@ -444,10 +388,10 @@ module Sensu
444
388
  @redis.ltrim(history_key, -21, -1)
445
389
  end
446
390
  @redis.hget('events:' + client[:name], check[:name]) do |event_json|
447
- previous_occurrence = event_json ? Oj.load(event_json) : false
391
+ previous_occurrence = event_json ? MultiJson.load(event_json) : false
448
392
  is_flapping = false
449
393
  if check.has_key?(:low_flap_threshold) && check.has_key?(:high_flap_threshold)
450
- was_flapping = previous_occurrence ? previous_occurrence[:flapping] : false
394
+ was_flapping = previous_occurrence && previous_occurrence[:action] == 'flapping'
451
395
  is_flapping = case
452
396
  when total_state_change >= check[:high_flap_threshold]
453
397
  true
@@ -458,6 +402,7 @@ module Sensu
458
402
  end
459
403
  end
460
404
  event = {
405
+ :id => random_uuid,
461
406
  :client => client,
462
407
  :check => check,
463
408
  :occurrences => 1
@@ -466,25 +411,18 @@ module Sensu
466
411
  if previous_occurrence && check[:status] == previous_occurrence[:status]
467
412
  event[:occurrences] = previous_occurrence[:occurrences] + 1
468
413
  end
469
- @redis.hset('events:' + client[:name], check[:name], Oj.dump(
470
- :output => check[:output],
471
- :status => check[:status],
472
- :issued => check[:issued],
473
- :handlers => Array((check[:handlers] || check[:handler]) || 'default'),
474
- :flapping => is_flapping,
475
- :occurrences => event[:occurrences]
476
- )) do
414
+ event[:action] = is_flapping ? :flapping : :create
415
+ @redis.hset('events:' + client[:name], check[:name], MultiJson.dump(event)) do
477
416
  unless check[:handle] == false
478
- event[:action] = is_flapping ? :flapping : :create
479
417
  handle_event(event)
480
418
  end
481
419
  end
482
420
  elsif previous_occurrence
421
+ event[:occurrences] = previous_occurrence[:occurrences]
422
+ event[:action] = :resolve
483
423
  unless check[:auto_resolve] == false && !check[:force_resolve]
484
424
  @redis.hdel('events:' + client[:name], check[:name]) do
485
425
  unless check[:handle] == false
486
- event[:occurrences] = previous_occurrence[:occurrences]
487
- event[:action] = :resolve
488
426
  handle_event(event)
489
427
  end
490
428
  end
@@ -492,6 +430,7 @@ module Sensu
492
430
  elsif check[:type] == 'metric'
493
431
  handle_event(event)
494
432
  end
433
+ event_bridges(event)
495
434
  end
496
435
  end
497
436
  end
@@ -499,20 +438,16 @@ module Sensu
499
438
  end
500
439
  end
501
440
 
502
- def setup_results(&block)
441
+ def setup_results
503
442
  @logger.debug('subscribing to results')
504
- @result_queue = @amq.queue!('results', :auto_delete => true)
505
- @result_queue.bind(@amq.direct('results')) do
506
- block.call if block
507
- end
508
- @result_queue.subscribe(:ack => true) do |header, payload|
509
- result = Oj.load(payload)
443
+ @transport.subscribe(:direct, 'results', 'results', :ack => true) do |message_info, message|
444
+ result = MultiJson.load(message)
510
445
  @logger.debug('received result', {
511
446
  :result => result
512
447
  })
513
448
  process_result(result)
514
449
  EM::next_tick do
515
- header.ack
450
+ @transport.ack(message_info)
516
451
  end
517
452
  end
518
453
  end
@@ -537,15 +472,15 @@ module Sensu
537
472
  :payload => payload,
538
473
  :subscribers => check[:subscribers]
539
474
  })
540
- check[:subscribers].each do |exchange_name|
541
- begin
542
- @amq.fanout(exchange_name).publish(Oj.dump(payload))
543
- rescue AMQ::Client::ConnectionClosedError => error
544
- @logger.error('failed to publish check request', {
545
- :exchange_name => exchange_name,
546
- :payload => payload,
547
- :error => error.to_s
548
- })
475
+ check[:subscribers].each do |subscription|
476
+ @transport.publish(:fanout, subscription, MultiJson.dump(payload)) do |info|
477
+ if info[:error]
478
+ @logger.error('failed to publish check request', {
479
+ :subscription => subscription,
480
+ :payload => payload,
481
+ :error => info[:error].to_s
482
+ })
483
+ end
549
484
  end
550
485
  end
551
486
  end
@@ -556,9 +491,9 @@ module Sensu
556
491
  checks.each do |check|
557
492
  check_count += 1
558
493
  scheduling_delay = stagger * check_count % 30
559
- @master_timers << EM::Timer.new(scheduling_delay) do
494
+ @timers[:master] << EM::Timer.new(scheduling_delay) do
560
495
  interval = testing? ? 0.5 : check[:interval]
561
- @master_timers << EM::PeriodicTimer.new(interval) do
496
+ @timers[:master] << EM::PeriodicTimer.new(interval) do
562
497
  unless check_request_subdued?(check)
563
498
  publish_check_request(check)
564
499
  else
@@ -590,13 +525,13 @@ module Sensu
590
525
  @logger.debug('publishing check result', {
591
526
  :payload => payload
592
527
  })
593
- begin
594
- @amq.direct('results').publish(Oj.dump(payload))
595
- rescue AMQ::Client::ConnectionClosedError => error
596
- @logger.error('failed to publish check result', {
597
- :payload => payload,
598
- :error => error.to_s
599
- })
528
+ @transport.publish(:direct, 'results', MultiJson.dump(payload)) do |info|
529
+ if info[:error]
530
+ @logger.error('failed to publish check result', {
531
+ :payload => payload,
532
+ :error => info[:error].to_s
533
+ })
534
+ end
600
535
  end
601
536
  end
602
537
 
@@ -606,7 +541,7 @@ module Sensu
606
541
  clients.each do |client_name|
607
542
  @redis.get('client:' + client_name) do |client_json|
608
543
  unless client_json.nil?
609
- client = Oj.load(client_json)
544
+ client = MultiJson.load(client_json)
610
545
  check = {
611
546
  :thresholds => {
612
547
  :warning => 120,
@@ -643,7 +578,7 @@ module Sensu
643
578
 
644
579
  def setup_client_monitor
645
580
  @logger.debug('monitoring clients')
646
- @master_timers << EM::PeriodicTimer.new(30) do
581
+ @timers[:master] << EM::PeriodicTimer.new(30) do
647
582
  determine_stale_clients
648
583
  end
649
584
  end
@@ -678,7 +613,7 @@ module Sensu
678
613
 
679
614
  def setup_aggregation_pruner
680
615
  @logger.debug('pruning aggregations')
681
- @master_timers << EM::PeriodicTimer.new(20) do
616
+ @timers[:master] << EM::PeriodicTimer.new(20) do
682
617
  prune_aggregations
683
618
  end
684
619
  end
@@ -713,7 +648,7 @@ module Sensu
713
648
 
714
649
  def setup_master_monitor
715
650
  request_master_election
716
- @timers << EM::PeriodicTimer.new(20) do
651
+ @timers[:run] << EM::PeriodicTimer.new(20) do
717
652
  if @is_master
718
653
  @redis.set('lock:master', Time.now.to_i) do
719
654
  @logger.debug('updated master lock timestamp')
@@ -728,10 +663,10 @@ module Sensu
728
663
  block ||= Proc.new {}
729
664
  if @is_master
730
665
  @logger.warn('resigning as master')
731
- @master_timers.each do |timer|
666
+ @timers[:master].each do |timer|
732
667
  timer.cancel
733
668
  end
734
- @master_timers.clear
669
+ @timers[:master].clear
735
670
  if @redis.connected?
736
671
  @redis.del('lock:master') do
737
672
  @logger.info('removed master lock')
@@ -758,18 +693,7 @@ module Sensu
758
693
 
759
694
  def unsubscribe
760
695
  @logger.warn('unsubscribing from keepalive and result queues')
761
- if @rabbitmq.connected?
762
- @keepalive_queue.unsubscribe
763
- @result_queue.unsubscribe
764
- @amq.recover
765
- else
766
- @keepalive_queue.before_recovery do
767
- @keepalive_queue.unsubscribe
768
- end
769
- @result_queue.before_recovery do
770
- @result_queue.unsubscribe
771
- end
772
- end
696
+ @transport.unsubscribe
773
697
  end
774
698
 
775
699
  def complete_handlers_in_progress(&block)
@@ -793,17 +717,17 @@ module Sensu
793
717
 
794
718
  def start
795
719
  setup_redis
796
- setup_rabbitmq
720
+ setup_transport
797
721
  bootstrap
798
722
  end
799
723
 
800
724
  def pause(&block)
801
725
  unless @state == :pausing || @state == :paused
802
726
  @state = :pausing
803
- @timers.each do |timer|
727
+ @timers[:run].each do |timer|
804
728
  timer.cancel
805
729
  end
806
- @timers.clear
730
+ @timers[:run].clear
807
731
  unsubscribe
808
732
  resign_as_master do
809
733
  @state = :paused
@@ -817,7 +741,7 @@ module Sensu
817
741
  def resume
818
742
  retry_until_true(1) do
819
743
  if @state == :paused
820
- if @redis.connected? && @rabbitmq.connected?
744
+ if @redis.connected? && @transport.connected?
821
745
  bootstrap
822
746
  true
823
747
  end
@@ -830,30 +754,9 @@ module Sensu
830
754
  @state = :stopping
831
755
  pause do
832
756
  complete_handlers_in_progress do
833
- @extensions.stop_all do
834
- @redis.close
835
- @rabbitmq.close
836
- @logger.warn('stopping reactor')
837
- EM::stop_event_loop
838
- end
839
- end
840
- end
841
- end
842
-
843
- def trap_signals
844
- @signals = Array.new
845
- STOP_SIGNALS.each do |signal|
846
- Signal.trap(signal) do
847
- @signals << signal
848
- end
849
- end
850
- EM::PeriodicTimer.new(1) do
851
- signal = @signals.shift
852
- if STOP_SIGNALS.include?(signal)
853
- @logger.warn('received signal', {
854
- :signal => signal
855
- })
856
- stop
757
+ @redis.close
758
+ @transport.close
759
+ super
857
760
  end
858
761
  end
859
762
  end