sensu 0.12.6 → 0.13.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +41 -0
- data/lib/sensu/api.rb +145 -205
- data/lib/sensu/cli.rb +2 -1
- data/lib/sensu/client.rb +51 -119
- data/lib/sensu/constants.rb +1 -7
- data/lib/sensu/daemon.rb +221 -0
- data/lib/sensu/server.rb +105 -202
- data/lib/sensu/socket.rb +4 -4
- data/lib/sensu/utilities.rb +6 -29
- data/sensu.gemspec +10 -6
- metadata +223 -228
- data/lib/sensu/base.rb +0 -75
- data/lib/sensu/extensions.rb +0 -162
- data/lib/sensu/extensions/handlers/debug.rb +0 -17
- data/lib/sensu/extensions/mutators/only_check_output.rb +0 -17
- data/lib/sensu/io.rb +0 -98
- data/lib/sensu/logstream.rb +0 -93
- data/lib/sensu/process.rb +0 -48
- data/lib/sensu/rabbitmq.rb +0 -106
- data/lib/sensu/settings.rb +0 -483
data/lib/sensu/server.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require File.join(File.dirname(__FILE__), 'sandbox')
|
1
|
+
require 'sensu/daemon'
|
2
|
+
require 'sensu/socket'
|
3
|
+
require 'sensu/sandbox'
|
5
4
|
|
6
5
|
module Sensu
|
7
6
|
class Server
|
8
|
-
include
|
7
|
+
include Daemon
|
9
8
|
|
10
9
|
attr_reader :is_master
|
11
10
|
|
@@ -13,84 +12,27 @@ module Sensu
|
|
13
12
|
server = self.new(options)
|
14
13
|
EM::run do
|
15
14
|
server.start
|
16
|
-
server.
|
15
|
+
server.setup_signal_traps
|
17
16
|
end
|
18
17
|
end
|
19
18
|
|
20
19
|
def initialize(options={})
|
21
|
-
|
22
|
-
@logger = base.logger
|
23
|
-
@settings = base.settings
|
24
|
-
@extensions = base.extensions
|
25
|
-
base.setup_process
|
26
|
-
@extensions.load_settings(@settings.to_hash)
|
27
|
-
@timers = Array.new
|
28
|
-
@master_timers = Array.new
|
29
|
-
@handlers_in_progress_count = 0
|
20
|
+
super
|
30
21
|
@is_master = false
|
22
|
+
@timers[:master] = Array.new
|
23
|
+
@handlers_in_progress_count = 0
|
31
24
|
end
|
32
25
|
|
33
|
-
def
|
34
|
-
@logger.debug('connecting to redis', {
|
35
|
-
:settings => @settings[:redis]
|
36
|
-
})
|
37
|
-
@redis = Redis.connect(@settings[:redis])
|
38
|
-
@redis.on_error do |error|
|
39
|
-
@logger.fatal('redis connection error', {
|
40
|
-
:error => error.to_s
|
41
|
-
})
|
42
|
-
stop
|
43
|
-
end
|
44
|
-
@redis.before_reconnect do
|
45
|
-
unless testing?
|
46
|
-
@logger.warn('reconnecting to redis')
|
47
|
-
pause
|
48
|
-
end
|
49
|
-
end
|
50
|
-
@redis.after_reconnect do
|
51
|
-
@logger.info('reconnected to redis')
|
52
|
-
resume
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def setup_rabbitmq
|
57
|
-
@logger.debug('connecting to rabbitmq', {
|
58
|
-
:settings => @settings[:rabbitmq]
|
59
|
-
})
|
60
|
-
@rabbitmq = RabbitMQ.connect(@settings[:rabbitmq])
|
61
|
-
@rabbitmq.on_error do |error|
|
62
|
-
@logger.fatal('rabbitmq connection error', {
|
63
|
-
:error => error.to_s
|
64
|
-
})
|
65
|
-
stop
|
66
|
-
end
|
67
|
-
@rabbitmq.before_reconnect do
|
68
|
-
unless testing?
|
69
|
-
@logger.warn('reconnecting to rabbitmq')
|
70
|
-
pause
|
71
|
-
end
|
72
|
-
end
|
73
|
-
@rabbitmq.after_reconnect do
|
74
|
-
@logger.info('reconnected to rabbitmq')
|
75
|
-
resume
|
76
|
-
end
|
77
|
-
@amq = @rabbitmq.channel
|
78
|
-
end
|
79
|
-
|
80
|
-
def setup_keepalives(&block)
|
26
|
+
def setup_keepalives
|
81
27
|
@logger.debug('subscribing to keepalives')
|
82
|
-
@
|
83
|
-
|
84
|
-
block.call if block
|
85
|
-
end
|
86
|
-
@keepalive_queue.subscribe(:ack => true) do |header, payload|
|
87
|
-
client = Oj.load(payload)
|
28
|
+
@transport.subscribe(:direct, 'keepalives', 'keepalives', :ack => true) do |message_info, message|
|
29
|
+
client = MultiJson.load(message)
|
88
30
|
@logger.debug('received keepalive', {
|
89
31
|
:client => client
|
90
32
|
})
|
91
|
-
@redis.set('client:' + client[:name],
|
33
|
+
@redis.set('client:' + client[:name], MultiJson.dump(client)) do
|
92
34
|
@redis.sadd('clients', client[:name]) do
|
93
|
-
|
35
|
+
@transport.ack(message_info)
|
94
36
|
end
|
95
37
|
end
|
96
38
|
end
|
@@ -252,39 +194,31 @@ module Sensu
|
|
252
194
|
end
|
253
195
|
|
254
196
|
def mutate_event_data(mutator_name, event, &block)
|
197
|
+
mutator_name ||= 'json'
|
198
|
+
return_output = Proc.new do |output, status|
|
199
|
+
if status == 0
|
200
|
+
block.dup.call(output)
|
201
|
+
else
|
202
|
+
@logger.error('mutator error', {
|
203
|
+
:event => event,
|
204
|
+
:output => output,
|
205
|
+
:status => status
|
206
|
+
})
|
207
|
+
@handlers_in_progress_count -= 1
|
208
|
+
end
|
209
|
+
end
|
210
|
+
@logger.debug('mutating event data', {
|
211
|
+
:event => event,
|
212
|
+
:mutator_name => mutator_name
|
213
|
+
})
|
255
214
|
case
|
256
|
-
when mutator_name.nil?
|
257
|
-
block.call(Oj.dump(event))
|
258
215
|
when @settings.mutator_exists?(mutator_name)
|
259
216
|
mutator = @settings[:mutators][mutator_name]
|
260
|
-
|
261
|
-
|
262
|
-
block.call(output)
|
263
|
-
else
|
264
|
-
@logger.error('mutator error', {
|
265
|
-
:event => event,
|
266
|
-
:mutator => mutator,
|
267
|
-
:output => output,
|
268
|
-
:status => status
|
269
|
-
})
|
270
|
-
@handlers_in_progress_count -= 1
|
271
|
-
end
|
272
|
-
end
|
217
|
+
options = {:data => MultiJson.dump(event), :timeout => mutator[:timeout]}
|
218
|
+
Spawn.process(mutator[:command], options, &return_output)
|
273
219
|
when @extensions.mutator_exists?(mutator_name)
|
274
220
|
extension = @extensions[:mutators][mutator_name]
|
275
|
-
extension.safe_run(event
|
276
|
-
if status == 0
|
277
|
-
block.call(output)
|
278
|
-
else
|
279
|
-
@logger.error('mutator extension error', {
|
280
|
-
:event => event,
|
281
|
-
:extension => extension.definition,
|
282
|
-
:output => output,
|
283
|
-
:status => status
|
284
|
-
})
|
285
|
-
@handlers_in_progress_count -= 1
|
286
|
-
end
|
287
|
-
end
|
221
|
+
extension.safe_run(event, &return_output)
|
288
222
|
else
|
289
223
|
@logger.error('unknown mutator', {
|
290
224
|
:mutator_name => mutator_name
|
@@ -313,7 +247,8 @@ module Sensu
|
|
313
247
|
mutate_event_data(handler[:mutator], event) do |event_data|
|
314
248
|
case handler[:type]
|
315
249
|
when 'pipe'
|
316
|
-
|
250
|
+
options = {:data => event_data, :timeout => handler[:timeout]}
|
251
|
+
Spawn.process(handler[:command], options) do |output, status|
|
317
252
|
output.each_line do |line|
|
318
253
|
@logger.info('handler output', {
|
319
254
|
:handler => handler,
|
@@ -348,21 +283,17 @@ module Sensu
|
|
348
283
|
rescue => error
|
349
284
|
on_error.call(error)
|
350
285
|
end
|
351
|
-
when '
|
352
|
-
exchange_name = handler[:exchange][:name]
|
353
|
-
exchange_type = handler[:exchange].has_key?(:type) ? handler[:exchange][:type].to_sym : :direct
|
354
|
-
exchange_options = handler[:exchange].reject do |key, value|
|
355
|
-
[:name, :type].include?(key)
|
356
|
-
end
|
286
|
+
when 'transport'
|
357
287
|
unless event_data.empty?
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
288
|
+
pipe = handler[:pipe]
|
289
|
+
@transport.publish(pipe[:type].to_sym, pipe[:name], event_data, pipe[:options] || Hash.new) do |info|
|
290
|
+
if info[:error]
|
291
|
+
@logger.fatal('failed to publish event data to the transport', {
|
292
|
+
:pipe => pipe,
|
293
|
+
:payload => event_data,
|
294
|
+
:error => info[:error].to_s
|
295
|
+
})
|
296
|
+
end
|
366
297
|
end
|
367
298
|
end
|
368
299
|
@handlers_in_progress_count -= 1
|
@@ -387,7 +318,7 @@ module Sensu
|
|
387
318
|
})
|
388
319
|
check = result[:check]
|
389
320
|
result_set = check[:name] + ':' + check[:issued].to_s
|
390
|
-
@redis.hset('aggregation:' + result_set, result[:client],
|
321
|
+
@redis.hset('aggregation:' + result_set, result[:client], MultiJson.dump(
|
391
322
|
:output => check[:output],
|
392
323
|
:status => check[:status]
|
393
324
|
)) do
|
@@ -405,15 +336,28 @@ module Sensu
|
|
405
336
|
end
|
406
337
|
end
|
407
338
|
|
339
|
+
def event_bridges(event)
|
340
|
+
@extensions[:bridges].each do |name, bridge|
|
341
|
+
bridge.safe_run(event) do |output, status|
|
342
|
+
output.each_line do |line|
|
343
|
+
@logger.info('bridge extension output', {
|
344
|
+
:extension => bridge.definition,
|
345
|
+
:output => line
|
346
|
+
})
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
408
352
|
def process_result(result)
|
409
353
|
@logger.debug('processing result', {
|
410
354
|
:result => result
|
411
355
|
})
|
412
356
|
@redis.get('client:' + result[:client]) do |client_json|
|
413
357
|
unless client_json.nil?
|
414
|
-
client =
|
358
|
+
client = MultiJson.load(client_json)
|
415
359
|
check = case
|
416
|
-
when @settings.check_exists?(result[:check][:name])
|
360
|
+
when @settings.check_exists?(result[:check][:name]) && !result[:check][:standalone]
|
417
361
|
@settings[:checks][result[:check][:name]].merge(result[:check])
|
418
362
|
else
|
419
363
|
result[:check]
|
@@ -444,10 +388,10 @@ module Sensu
|
|
444
388
|
@redis.ltrim(history_key, -21, -1)
|
445
389
|
end
|
446
390
|
@redis.hget('events:' + client[:name], check[:name]) do |event_json|
|
447
|
-
previous_occurrence = event_json ?
|
391
|
+
previous_occurrence = event_json ? MultiJson.load(event_json) : false
|
448
392
|
is_flapping = false
|
449
393
|
if check.has_key?(:low_flap_threshold) && check.has_key?(:high_flap_threshold)
|
450
|
-
was_flapping = previous_occurrence
|
394
|
+
was_flapping = previous_occurrence && previous_occurrence[:action] == 'flapping'
|
451
395
|
is_flapping = case
|
452
396
|
when total_state_change >= check[:high_flap_threshold]
|
453
397
|
true
|
@@ -458,6 +402,7 @@ module Sensu
|
|
458
402
|
end
|
459
403
|
end
|
460
404
|
event = {
|
405
|
+
:id => random_uuid,
|
461
406
|
:client => client,
|
462
407
|
:check => check,
|
463
408
|
:occurrences => 1
|
@@ -466,25 +411,18 @@ module Sensu
|
|
466
411
|
if previous_occurrence && check[:status] == previous_occurrence[:status]
|
467
412
|
event[:occurrences] = previous_occurrence[:occurrences] + 1
|
468
413
|
end
|
469
|
-
|
470
|
-
|
471
|
-
:status => check[:status],
|
472
|
-
:issued => check[:issued],
|
473
|
-
:handlers => Array((check[:handlers] || check[:handler]) || 'default'),
|
474
|
-
:flapping => is_flapping,
|
475
|
-
:occurrences => event[:occurrences]
|
476
|
-
)) do
|
414
|
+
event[:action] = is_flapping ? :flapping : :create
|
415
|
+
@redis.hset('events:' + client[:name], check[:name], MultiJson.dump(event)) do
|
477
416
|
unless check[:handle] == false
|
478
|
-
event[:action] = is_flapping ? :flapping : :create
|
479
417
|
handle_event(event)
|
480
418
|
end
|
481
419
|
end
|
482
420
|
elsif previous_occurrence
|
421
|
+
event[:occurrences] = previous_occurrence[:occurrences]
|
422
|
+
event[:action] = :resolve
|
483
423
|
unless check[:auto_resolve] == false && !check[:force_resolve]
|
484
424
|
@redis.hdel('events:' + client[:name], check[:name]) do
|
485
425
|
unless check[:handle] == false
|
486
|
-
event[:occurrences] = previous_occurrence[:occurrences]
|
487
|
-
event[:action] = :resolve
|
488
426
|
handle_event(event)
|
489
427
|
end
|
490
428
|
end
|
@@ -492,6 +430,7 @@ module Sensu
|
|
492
430
|
elsif check[:type] == 'metric'
|
493
431
|
handle_event(event)
|
494
432
|
end
|
433
|
+
event_bridges(event)
|
495
434
|
end
|
496
435
|
end
|
497
436
|
end
|
@@ -499,20 +438,16 @@ module Sensu
|
|
499
438
|
end
|
500
439
|
end
|
501
440
|
|
502
|
-
def setup_results
|
441
|
+
def setup_results
|
503
442
|
@logger.debug('subscribing to results')
|
504
|
-
@
|
505
|
-
|
506
|
-
block.call if block
|
507
|
-
end
|
508
|
-
@result_queue.subscribe(:ack => true) do |header, payload|
|
509
|
-
result = Oj.load(payload)
|
443
|
+
@transport.subscribe(:direct, 'results', 'results', :ack => true) do |message_info, message|
|
444
|
+
result = MultiJson.load(message)
|
510
445
|
@logger.debug('received result', {
|
511
446
|
:result => result
|
512
447
|
})
|
513
448
|
process_result(result)
|
514
449
|
EM::next_tick do
|
515
|
-
|
450
|
+
@transport.ack(message_info)
|
516
451
|
end
|
517
452
|
end
|
518
453
|
end
|
@@ -537,15 +472,15 @@ module Sensu
|
|
537
472
|
:payload => payload,
|
538
473
|
:subscribers => check[:subscribers]
|
539
474
|
})
|
540
|
-
check[:subscribers].each do |
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
475
|
+
check[:subscribers].each do |subscription|
|
476
|
+
@transport.publish(:fanout, subscription, MultiJson.dump(payload)) do |info|
|
477
|
+
if info[:error]
|
478
|
+
@logger.error('failed to publish check request', {
|
479
|
+
:subscription => subscription,
|
480
|
+
:payload => payload,
|
481
|
+
:error => info[:error].to_s
|
482
|
+
})
|
483
|
+
end
|
549
484
|
end
|
550
485
|
end
|
551
486
|
end
|
@@ -556,9 +491,9 @@ module Sensu
|
|
556
491
|
checks.each do |check|
|
557
492
|
check_count += 1
|
558
493
|
scheduling_delay = stagger * check_count % 30
|
559
|
-
@
|
494
|
+
@timers[:master] << EM::Timer.new(scheduling_delay) do
|
560
495
|
interval = testing? ? 0.5 : check[:interval]
|
561
|
-
@
|
496
|
+
@timers[:master] << EM::PeriodicTimer.new(interval) do
|
562
497
|
unless check_request_subdued?(check)
|
563
498
|
publish_check_request(check)
|
564
499
|
else
|
@@ -590,13 +525,13 @@ module Sensu
|
|
590
525
|
@logger.debug('publishing check result', {
|
591
526
|
:payload => payload
|
592
527
|
})
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
528
|
+
@transport.publish(:direct, 'results', MultiJson.dump(payload)) do |info|
|
529
|
+
if info[:error]
|
530
|
+
@logger.error('failed to publish check result', {
|
531
|
+
:payload => payload,
|
532
|
+
:error => info[:error].to_s
|
533
|
+
})
|
534
|
+
end
|
600
535
|
end
|
601
536
|
end
|
602
537
|
|
@@ -606,7 +541,7 @@ module Sensu
|
|
606
541
|
clients.each do |client_name|
|
607
542
|
@redis.get('client:' + client_name) do |client_json|
|
608
543
|
unless client_json.nil?
|
609
|
-
client =
|
544
|
+
client = MultiJson.load(client_json)
|
610
545
|
check = {
|
611
546
|
:thresholds => {
|
612
547
|
:warning => 120,
|
@@ -643,7 +578,7 @@ module Sensu
|
|
643
578
|
|
644
579
|
def setup_client_monitor
|
645
580
|
@logger.debug('monitoring clients')
|
646
|
-
@
|
581
|
+
@timers[:master] << EM::PeriodicTimer.new(30) do
|
647
582
|
determine_stale_clients
|
648
583
|
end
|
649
584
|
end
|
@@ -678,7 +613,7 @@ module Sensu
|
|
678
613
|
|
679
614
|
def setup_aggregation_pruner
|
680
615
|
@logger.debug('pruning aggregations')
|
681
|
-
@
|
616
|
+
@timers[:master] << EM::PeriodicTimer.new(20) do
|
682
617
|
prune_aggregations
|
683
618
|
end
|
684
619
|
end
|
@@ -713,7 +648,7 @@ module Sensu
|
|
713
648
|
|
714
649
|
def setup_master_monitor
|
715
650
|
request_master_election
|
716
|
-
@timers << EM::PeriodicTimer.new(20) do
|
651
|
+
@timers[:run] << EM::PeriodicTimer.new(20) do
|
717
652
|
if @is_master
|
718
653
|
@redis.set('lock:master', Time.now.to_i) do
|
719
654
|
@logger.debug('updated master lock timestamp')
|
@@ -728,10 +663,10 @@ module Sensu
|
|
728
663
|
block ||= Proc.new {}
|
729
664
|
if @is_master
|
730
665
|
@logger.warn('resigning as master')
|
731
|
-
@
|
666
|
+
@timers[:master].each do |timer|
|
732
667
|
timer.cancel
|
733
668
|
end
|
734
|
-
@
|
669
|
+
@timers[:master].clear
|
735
670
|
if @redis.connected?
|
736
671
|
@redis.del('lock:master') do
|
737
672
|
@logger.info('removed master lock')
|
@@ -758,18 +693,7 @@ module Sensu
|
|
758
693
|
|
759
694
|
def unsubscribe
|
760
695
|
@logger.warn('unsubscribing from keepalive and result queues')
|
761
|
-
|
762
|
-
@keepalive_queue.unsubscribe
|
763
|
-
@result_queue.unsubscribe
|
764
|
-
@amq.recover
|
765
|
-
else
|
766
|
-
@keepalive_queue.before_recovery do
|
767
|
-
@keepalive_queue.unsubscribe
|
768
|
-
end
|
769
|
-
@result_queue.before_recovery do
|
770
|
-
@result_queue.unsubscribe
|
771
|
-
end
|
772
|
-
end
|
696
|
+
@transport.unsubscribe
|
773
697
|
end
|
774
698
|
|
775
699
|
def complete_handlers_in_progress(&block)
|
@@ -793,17 +717,17 @@ module Sensu
|
|
793
717
|
|
794
718
|
def start
|
795
719
|
setup_redis
|
796
|
-
|
720
|
+
setup_transport
|
797
721
|
bootstrap
|
798
722
|
end
|
799
723
|
|
800
724
|
def pause(&block)
|
801
725
|
unless @state == :pausing || @state == :paused
|
802
726
|
@state = :pausing
|
803
|
-
@timers.each do |timer|
|
727
|
+
@timers[:run].each do |timer|
|
804
728
|
timer.cancel
|
805
729
|
end
|
806
|
-
@timers.clear
|
730
|
+
@timers[:run].clear
|
807
731
|
unsubscribe
|
808
732
|
resign_as_master do
|
809
733
|
@state = :paused
|
@@ -817,7 +741,7 @@ module Sensu
|
|
817
741
|
def resume
|
818
742
|
retry_until_true(1) do
|
819
743
|
if @state == :paused
|
820
|
-
if @redis.connected? && @
|
744
|
+
if @redis.connected? && @transport.connected?
|
821
745
|
bootstrap
|
822
746
|
true
|
823
747
|
end
|
@@ -830,30 +754,9 @@ module Sensu
|
|
830
754
|
@state = :stopping
|
831
755
|
pause do
|
832
756
|
complete_handlers_in_progress do
|
833
|
-
@
|
834
|
-
|
835
|
-
|
836
|
-
@logger.warn('stopping reactor')
|
837
|
-
EM::stop_event_loop
|
838
|
-
end
|
839
|
-
end
|
840
|
-
end
|
841
|
-
end
|
842
|
-
|
843
|
-
def trap_signals
|
844
|
-
@signals = Array.new
|
845
|
-
STOP_SIGNALS.each do |signal|
|
846
|
-
Signal.trap(signal) do
|
847
|
-
@signals << signal
|
848
|
-
end
|
849
|
-
end
|
850
|
-
EM::PeriodicTimer.new(1) do
|
851
|
-
signal = @signals.shift
|
852
|
-
if STOP_SIGNALS.include?(signal)
|
853
|
-
@logger.warn('received signal', {
|
854
|
-
:signal => signal
|
855
|
-
})
|
856
|
-
stop
|
757
|
+
@redis.close
|
758
|
+
@transport.close
|
759
|
+
super
|
857
760
|
end
|
858
761
|
end
|
859
762
|
end
|