sensu 0.16.0-java → 0.17.0.beta.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sensu/server.rb DELETED
@@ -1,767 +0,0 @@
1
- require 'sensu/daemon'
2
- require 'sensu/socket'
3
- require 'sensu/sandbox'
4
-
5
- module Sensu
6
- class Server
7
- include Daemon
8
-
9
- attr_reader :is_master
10
-
11
- def self.run(options={})
12
- server = self.new(options)
13
- EM::run do
14
- server.start
15
- server.setup_signal_traps
16
- end
17
- end
18
-
19
- def initialize(options={})
20
- super
21
- @is_master = false
22
- @timers[:master] = Array.new
23
- @handlers_in_progress_count = 0
24
- end
25
-
26
- def setup_keepalives
27
- @logger.debug('subscribing to keepalives')
28
- @transport.subscribe(:direct, 'keepalives', 'keepalives', :ack => true) do |message_info, message|
29
- begin
30
- client = MultiJson.load(message)
31
- @logger.debug('received keepalive', {
32
- :client => client
33
- })
34
- @redis.set('client:' + client[:name], MultiJson.dump(client)) do
35
- @redis.sadd('clients', client[:name]) do
36
- @transport.ack(message_info)
37
- end
38
- end
39
- rescue MultiJson::ParseError => error
40
- @logger.error('failed to parse keepalive payload', {
41
- :message => message,
42
- :error => error.to_s
43
- })
44
- @transport.ack(message_info)
45
- end
46
- end
47
- end
48
-
49
- def action_subdued?(condition)
50
- subdued = false
51
- if condition.has_key?(:begin) && condition.has_key?(:end)
52
- begin_time = Time.parse(condition[:begin])
53
- end_time = Time.parse(condition[:end])
54
- if end_time < begin_time
55
- if Time.now < end_time
56
- begin_time = Time.parse('12:00:00 AM')
57
- else
58
- end_time = Time.parse('11:59:59 PM')
59
- end
60
- end
61
- if Time.now >= begin_time && Time.now <= end_time
62
- subdued = true
63
- end
64
- end
65
- if condition.has_key?(:days)
66
- days = condition[:days].map(&:downcase)
67
- if days.include?(Time.now.strftime('%A').downcase)
68
- subdued = true
69
- end
70
- end
71
- if subdued && condition.has_key?(:exceptions)
72
- subdued = condition[:exceptions].none? do |exception|
73
- Time.now >= Time.parse(exception[:begin]) && Time.now <= Time.parse(exception[:end])
74
- end
75
- end
76
- subdued
77
- end
78
-
79
- def handler_subdued?(handler, check)
80
- subdued = Array.new
81
- if handler[:subdue]
82
- subdued << action_subdued?(handler[:subdue])
83
- end
84
- if check[:subdue] && check[:subdue][:at] != 'publisher'
85
- subdued << action_subdued?(check[:subdue])
86
- end
87
- subdued.any?
88
- end
89
-
90
- def filter_attributes_match?(hash_one, hash_two)
91
- hash_one.keys.all? do |key|
92
- case
93
- when hash_one[key] == hash_two[key]
94
- true
95
- when hash_one[key].is_a?(Hash) && hash_two[key].is_a?(Hash)
96
- filter_attributes_match?(hash_one[key], hash_two[key])
97
- when hash_one[key].is_a?(String) && hash_one[key].start_with?('eval:')
98
- begin
99
- expression = hash_one[key].gsub(/^eval:(\s+)?/, '')
100
- !!Sandbox.eval(expression, hash_two[key])
101
- rescue => error
102
- @logger.error('filter eval error', {
103
- :attributes => [hash_one, hash_two],
104
- :error => error.to_s
105
- })
106
- false
107
- end
108
- else
109
- false
110
- end
111
- end
112
- end
113
-
114
- def event_filtered?(filter_name, event)
115
- if @settings.filter_exists?(filter_name)
116
- filter = @settings[:filters][filter_name]
117
- matched = filter_attributes_match?(filter[:attributes], event)
118
- filter[:negate] ? matched : !matched
119
- else
120
- @logger.error('unknown filter', {
121
- :filter_name => filter_name
122
- })
123
- false
124
- end
125
- end
126
-
127
- def derive_handlers(handler_list, depth=0)
128
- handler_list.compact.inject(Array.new) do |handlers, handler_name|
129
- if @settings.handler_exists?(handler_name)
130
- handler = @settings[:handlers][handler_name].merge(:name => handler_name)
131
- if handler[:type] == 'set'
132
- if depth < 2
133
- handlers = handlers + derive_handlers(handler[:handlers], depth + 1)
134
- else
135
- @logger.error('handler sets cannot be deeply nested', {
136
- :handler => handler
137
- })
138
- end
139
- else
140
- handlers << handler
141
- end
142
- elsif @extensions.handler_exists?(handler_name)
143
- handlers << @extensions[:handlers][handler_name]
144
- else
145
- @logger.error('unknown handler', {
146
- :handler_name => handler_name
147
- })
148
- end
149
- handlers.uniq
150
- end
151
- end
152
-
153
- def event_handlers(event)
154
- handler_list = Array((event[:check][:handlers] || event[:check][:handler]) || 'default')
155
- handlers = derive_handlers(handler_list)
156
- handlers.select do |handler|
157
- if event[:action] == :flapping && !handler[:handle_flapping]
158
- @logger.info('handler does not handle flapping events', {
159
- :event => event,
160
- :handler => handler
161
- })
162
- next
163
- end
164
- if handler_subdued?(handler, event[:check])
165
- @logger.info('handler is subdued', {
166
- :event => event,
167
- :handler => handler
168
- })
169
- next
170
- end
171
- if handler.has_key?(:severities)
172
- handle = case event[:action]
173
- when :resolve
174
- event[:check][:history].reverse[1..-1].any? do |status|
175
- if status.to_i == 0
176
- break
177
- end
178
- severity = SEVERITIES[status.to_i] || 'unknown'
179
- handler[:severities].include?(severity)
180
- end
181
- else
182
- severity = SEVERITIES[event[:check][:status]] || 'unknown'
183
- handler[:severities].include?(severity)
184
- end
185
- unless handle
186
- @logger.debug('handler does not handle event severity', {
187
- :event => event,
188
- :handler => handler
189
- })
190
- next
191
- end
192
- end
193
- if handler.has_key?(:filters) || handler.has_key?(:filter)
194
- filter_list = Array(handler[:filters] || handler[:filter])
195
- filtered = filter_list.any? do |filter_name|
196
- event_filtered?(filter_name, event)
197
- end
198
- if filtered
199
- @logger.info('event filtered for handler', {
200
- :event => event,
201
- :handler => handler
202
- })
203
- next
204
- end
205
- end
206
- true
207
- end
208
- end
209
-
210
- def mutate_event_data(mutator_name, event, &block)
211
- mutator_name ||= 'json'
212
- return_output = Proc.new do |output, status|
213
- if status == 0
214
- block.dup.call(output)
215
- else
216
- @logger.error('mutator error', {
217
- :event => event,
218
- :output => output,
219
- :status => status
220
- })
221
- @handlers_in_progress_count -= 1
222
- end
223
- end
224
- @logger.debug('mutating event data', {
225
- :event => event,
226
- :mutator_name => mutator_name
227
- })
228
- case
229
- when @settings.mutator_exists?(mutator_name)
230
- mutator = @settings[:mutators][mutator_name]
231
- options = {:data => MultiJson.dump(event), :timeout => mutator[:timeout]}
232
- Spawn.process(mutator[:command], options, &return_output)
233
- when @extensions.mutator_exists?(mutator_name)
234
- extension = @extensions[:mutators][mutator_name]
235
- extension.safe_run(event, &return_output)
236
- else
237
- @logger.error('unknown mutator', {
238
- :mutator_name => mutator_name
239
- })
240
- @handlers_in_progress_count -= 1
241
- end
242
- end
243
-
244
- def handle_event(event)
245
- handlers = event_handlers(event)
246
- handlers.each do |handler|
247
- log_level = event[:check][:type] == 'metric' ? :debug : :info
248
- @logger.send(log_level, 'handling event', {
249
- :event => event,
250
- :handler => handler.respond_to?(:definition) ? handler.definition : handler
251
- })
252
- @handlers_in_progress_count += 1
253
- on_error = Proc.new do |error|
254
- @logger.error('handler error', {
255
- :event => event,
256
- :handler => handler,
257
- :error => error.to_s
258
- })
259
- @handlers_in_progress_count -= 1
260
- end
261
- mutate_event_data(handler[:mutator], event) do |event_data|
262
- case handler[:type]
263
- when 'pipe'
264
- options = {:data => event_data, :timeout => handler[:timeout]}
265
- Spawn.process(handler[:command], options) do |output, status|
266
- output.each_line do |line|
267
- @logger.info('handler output', {
268
- :handler => handler,
269
- :output => line
270
- })
271
- end
272
- @handlers_in_progress_count -= 1
273
- end
274
- when 'tcp'
275
- begin
276
- EM::connect(handler[:socket][:host], handler[:socket][:port], SocketHandler) do |socket|
277
- socket.on_success = Proc.new do
278
- @handlers_in_progress_count -= 1
279
- end
280
- socket.on_error = on_error
281
- timeout = handler[:timeout] || 10
282
- socket.pending_connect_timeout = timeout
283
- socket.comm_inactivity_timeout = timeout
284
- socket.send_data(event_data.to_s)
285
- socket.close_connection_after_writing
286
- end
287
- rescue => error
288
- on_error.call(error)
289
- end
290
- when 'udp'
291
- begin
292
- EM::open_datagram_socket('0.0.0.0', 0, nil) do |socket|
293
- socket.send_datagram(event_data.to_s, handler[:socket][:host], handler[:socket][:port])
294
- socket.close_connection_after_writing
295
- @handlers_in_progress_count -= 1
296
- end
297
- rescue => error
298
- on_error.call(error)
299
- end
300
- when 'transport'
301
- unless event_data.empty?
302
- pipe = handler[:pipe]
303
- @transport.publish(pipe[:type].to_sym, pipe[:name], event_data, pipe[:options] || Hash.new) do |info|
304
- if info[:error]
305
- @logger.fatal('failed to publish event data to the transport', {
306
- :pipe => pipe,
307
- :payload => event_data,
308
- :error => info[:error].to_s
309
- })
310
- end
311
- end
312
- end
313
- @handlers_in_progress_count -= 1
314
- when 'extension'
315
- handler.safe_run(event_data) do |output, status|
316
- output.each_line do |line|
317
- @logger.info('handler extension output', {
318
- :extension => handler.definition,
319
- :output => line
320
- })
321
- end
322
- @handlers_in_progress_count -= 1
323
- end
324
- end
325
- end
326
- end
327
- end
328
-
329
- def aggregate_result(result)
330
- @logger.debug('adding result to aggregate', {
331
- :result => result
332
- })
333
- check = result[:check]
334
- result_set = check[:name] + ':' + check[:issued].to_s
335
- @redis.hset('aggregation:' + result_set, result[:client], MultiJson.dump(
336
- :output => check[:output],
337
- :status => check[:status]
338
- )) do
339
- SEVERITIES.each do |severity|
340
- @redis.hsetnx('aggregate:' + result_set, severity, 0)
341
- end
342
- severity = (SEVERITIES[check[:status]] || 'unknown')
343
- @redis.hincrby('aggregate:' + result_set, severity, 1) do
344
- @redis.hincrby('aggregate:' + result_set, 'total', 1) do
345
- @redis.sadd('aggregates:' + check[:name], check[:issued]) do
346
- @redis.sadd('aggregates', check[:name])
347
- end
348
- end
349
- end
350
- end
351
- end
352
-
353
- def event_bridges(event)
354
- @extensions[:bridges].each do |name, bridge|
355
- bridge.safe_run(event) do |output, status|
356
- output.each_line do |line|
357
- @logger.debug('bridge extension output', {
358
- :extension => bridge.definition,
359
- :output => line
360
- })
361
- end
362
- end
363
- end
364
- end
365
-
366
- def process_result(result)
367
- @logger.debug('processing result', {
368
- :result => result
369
- })
370
- @redis.get('client:' + result[:client]) do |client_json|
371
- unless client_json.nil?
372
- client = MultiJson.load(client_json)
373
- check = case
374
- when @settings.check_exists?(result[:check][:name]) && !result[:check][:standalone]
375
- @settings[:checks][result[:check][:name]].merge(result[:check])
376
- else
377
- result[:check]
378
- end
379
- if check[:aggregate]
380
- aggregate_result(result)
381
- end
382
- @redis.sadd('history:' + client[:name], check[:name])
383
- history_key = 'history:' + client[:name] + ':' + check[:name]
384
- @redis.rpush(history_key, check[:status]) do
385
- execution_key = 'execution:' + client[:name] + ':' + check[:name]
386
- @redis.set(execution_key, check[:executed])
387
- @redis.lrange(history_key, -21, -1) do |history|
388
- check[:history] = history
389
- total_state_change = 0
390
- unless history.size < 21
391
- state_changes = 0
392
- change_weight = 0.8
393
- previous_status = history.first
394
- history.each do |status|
395
- unless status == previous_status
396
- state_changes += change_weight
397
- end
398
- change_weight += 0.02
399
- previous_status = status
400
- end
401
- total_state_change = (state_changes.fdiv(20) * 100).to_i
402
- @redis.ltrim(history_key, -21, -1)
403
- end
404
- @redis.hget('events:' + client[:name], check[:name]) do |event_json|
405
- previous_occurrence = event_json ? MultiJson.load(event_json) : false
406
- is_flapping = false
407
- if check.has_key?(:low_flap_threshold) && check.has_key?(:high_flap_threshold)
408
- was_flapping = previous_occurrence && previous_occurrence[:action] == 'flapping'
409
- is_flapping = case
410
- when total_state_change >= check[:high_flap_threshold]
411
- true
412
- when was_flapping && total_state_change <= check[:low_flap_threshold]
413
- false
414
- else
415
- was_flapping
416
- end
417
- end
418
- event = {
419
- :id => random_uuid,
420
- :client => client,
421
- :check => check,
422
- :occurrences => 1
423
- }
424
- if check[:status] != 0 || is_flapping
425
- if previous_occurrence && check[:status] == previous_occurrence[:check][:status]
426
- event[:occurrences] = previous_occurrence[:occurrences] + 1
427
- end
428
- event[:action] = is_flapping ? :flapping : :create
429
- @redis.hset('events:' + client[:name], check[:name], MultiJson.dump(event)) do
430
- unless check[:handle] == false
431
- handle_event(event)
432
- end
433
- end
434
- elsif previous_occurrence
435
- event[:occurrences] = previous_occurrence[:occurrences]
436
- event[:action] = :resolve
437
- unless check[:auto_resolve] == false && !check[:force_resolve]
438
- @redis.hdel('events:' + client[:name], check[:name]) do
439
- unless check[:handle] == false
440
- handle_event(event)
441
- end
442
- end
443
- end
444
- elsif check[:type] == 'metric'
445
- handle_event(event)
446
- end
447
- event_bridges(event)
448
- end
449
- end
450
- end
451
- end
452
- end
453
- end
454
-
455
- def setup_results
456
- @logger.debug('subscribing to results')
457
- @transport.subscribe(:direct, 'results', 'results', :ack => true) do |message_info, message|
458
- begin
459
- result = MultiJson.load(message)
460
- @logger.debug('received result', {
461
- :result => result
462
- })
463
- process_result(result)
464
- rescue MultiJson::ParseError => error
465
- @logger.error('failed to parse result payload', {
466
- :message => message,
467
- :error => error.to_s
468
- })
469
- end
470
- EM::next_tick do
471
- @transport.ack(message_info)
472
- end
473
- end
474
- end
475
-
476
- def check_request_subdued?(check)
477
- if check[:subdue] && check[:subdue][:at] == 'publisher'
478
- action_subdued?(check[:subdue])
479
- else
480
- false
481
- end
482
- end
483
-
484
- def publish_check_request(check)
485
- payload = {
486
- :name => check[:name],
487
- :issued => Time.now.to_i
488
- }
489
- if check.has_key?(:command)
490
- payload[:command] = check[:command]
491
- end
492
- @logger.info('publishing check request', {
493
- :payload => payload,
494
- :subscribers => check[:subscribers]
495
- })
496
- check[:subscribers].each do |subscription|
497
- @transport.publish(:fanout, subscription, MultiJson.dump(payload)) do |info|
498
- if info[:error]
499
- @logger.error('failed to publish check request', {
500
- :subscription => subscription,
501
- :payload => payload,
502
- :error => info[:error].to_s
503
- })
504
- end
505
- end
506
- end
507
- end
508
-
509
- def schedule_checks(checks)
510
- check_count = 0
511
- stagger = testing? ? 0 : 2
512
- checks.each do |check|
513
- check_count += 1
514
- scheduling_delay = stagger * check_count % 30
515
- @timers[:master] << EM::Timer.new(scheduling_delay) do
516
- interval = testing? ? 0.5 : check[:interval]
517
- @timers[:master] << EM::PeriodicTimer.new(interval) do
518
- unless check_request_subdued?(check)
519
- publish_check_request(check)
520
- else
521
- @logger.info('check request was subdued', {
522
- :check => check
523
- })
524
- end
525
- end
526
- end
527
- end
528
- end
529
-
530
- def setup_publisher
531
- @logger.debug('scheduling check requests')
532
- standard_checks = @settings.checks.reject do |check|
533
- check[:standalone] || check[:publish] == false
534
- end
535
- extension_checks = @extensions.checks.reject do |check|
536
- check[:standalone] || check[:publish] == false || !check[:interval].is_a?(Integer)
537
- end
538
- schedule_checks(standard_checks + extension_checks)
539
- end
540
-
541
- def publish_result(client, check)
542
- payload = {
543
- :client => client[:name],
544
- :check => check
545
- }
546
- @logger.debug('publishing check result', {
547
- :payload => payload
548
- })
549
- @transport.publish(:direct, 'results', MultiJson.dump(payload)) do |info|
550
- if info[:error]
551
- @logger.error('failed to publish check result', {
552
- :payload => payload,
553
- :error => info[:error].to_s
554
- })
555
- end
556
- end
557
- end
558
-
559
- def determine_stale_clients
560
- @logger.info('determining stale clients')
561
- keepalive_check = {
562
- :thresholds => {
563
- :warning => 120,
564
- :critical => 180
565
- }
566
- }
567
- if @settings.handler_exists?(:keepalive)
568
- keepalive_check[:handler] = "keepalive"
569
- end
570
- @redis.smembers('clients') do |clients|
571
- clients.each do |client_name|
572
- @redis.get('client:' + client_name) do |client_json|
573
- unless client_json.nil?
574
- client = MultiJson.load(client_json)
575
- check = keepalive_check.dup
576
- if client.has_key?(:keepalive)
577
- check = deep_merge(check, client[:keepalive])
578
- end
579
- check[:name] = 'keepalive'
580
- check[:issued] = Time.now.to_i
581
- check[:executed] = Time.now.to_i
582
- time_since_last_keepalive = Time.now.to_i - client[:timestamp]
583
- case
584
- when time_since_last_keepalive >= check[:thresholds][:critical]
585
- check[:output] = 'No keep-alive sent from client in over '
586
- check[:output] << check[:thresholds][:critical].to_s + ' seconds'
587
- check[:status] = 2
588
- when time_since_last_keepalive >= check[:thresholds][:warning]
589
- check[:output] = 'No keep-alive sent from client in over '
590
- check[:output] << check[:thresholds][:warning].to_s + ' seconds'
591
- check[:status] = 1
592
- else
593
- check[:output] = 'Keep-alive sent from client less than '
594
- check[:output] << check[:thresholds][:warning].to_s + ' seconds ago'
595
- check[:status] = 0
596
- end
597
- publish_result(client, check)
598
- end
599
- end
600
- end
601
- end
602
- end
603
-
604
- def setup_client_monitor
605
- @logger.debug('monitoring clients')
606
- @timers[:master] << EM::PeriodicTimer.new(30) do
607
- determine_stale_clients
608
- end
609
- end
610
-
611
- def prune_aggregations
612
- @logger.info('pruning aggregations')
613
- @redis.smembers('aggregates') do |checks|
614
- checks.each do |check_name|
615
- @redis.smembers('aggregates:' + check_name) do |aggregates|
616
- if aggregates.size > 20
617
- aggregates.sort!
618
- aggregates.take(aggregates.size - 20).each do |check_issued|
619
- @redis.srem('aggregates:' + check_name, check_issued) do
620
- result_set = check_name + ':' + check_issued.to_s
621
- @redis.del('aggregate:' + result_set) do
622
- @redis.del('aggregation:' + result_set) do
623
- @logger.debug('pruned aggregation', {
624
- :check => {
625
- :name => check_name,
626
- :issued => check_issued
627
- }
628
- })
629
- end
630
- end
631
- end
632
- end
633
- end
634
- end
635
- end
636
- end
637
- end
638
-
639
- def setup_aggregation_pruner
640
- @logger.debug('pruning aggregations')
641
- @timers[:master] << EM::PeriodicTimer.new(20) do
642
- prune_aggregations
643
- end
644
- end
645
-
646
- def master_duties
647
- setup_publisher
648
- setup_client_monitor
649
- setup_aggregation_pruner
650
- end
651
-
652
- def request_master_election
653
- @redis.setnx('lock:master', Time.now.to_i) do |created|
654
- if created
655
- @is_master = true
656
- @logger.info('i am the master')
657
- master_duties
658
- else
659
- @redis.get('lock:master') do |timestamp|
660
- if Time.now.to_i - timestamp.to_i >= 30
661
- @redis.getset('lock:master', Time.now.to_i) do |previous|
662
- if previous == timestamp
663
- @is_master = true
664
- @logger.info('i am now the master')
665
- master_duties
666
- end
667
- end
668
- end
669
- end
670
- end
671
- end
672
- end
673
-
674
- def setup_master_monitor
675
- @timers[:run] << EM::Timer.new(2) do
676
- request_master_election
677
- end
678
- @timers[:run] << EM::PeriodicTimer.new(10) do
679
- if @is_master
680
- @redis.set('lock:master', Time.now.to_i) do
681
- @logger.debug('updated master lock timestamp')
682
- end
683
- else
684
- request_master_election
685
- end
686
- end
687
- end
688
-
689
- def resign_as_master
690
- if @is_master
691
- @logger.warn('resigning as master')
692
- @timers[:master].each do |timer|
693
- timer.cancel
694
- end
695
- @timers[:master].clear
696
- @is_master = false
697
- else
698
- @logger.debug('not currently master')
699
- end
700
- end
701
-
702
- def unsubscribe
703
- @logger.warn('unsubscribing from keepalive and result queues')
704
- @transport.unsubscribe
705
- end
706
-
707
- def complete_handlers_in_progress(&block)
708
- @logger.info('completing handlers in progress', {
709
- :handlers_in_progress_count => @handlers_in_progress_count
710
- })
711
- retry_until_true do
712
- if @handlers_in_progress_count == 0
713
- block.call
714
- true
715
- end
716
- end
717
- end
718
-
719
- def bootstrap
720
- setup_keepalives
721
- setup_results
722
- setup_master_monitor
723
- @state = :running
724
- end
725
-
726
- def start
727
- setup_redis
728
- setup_transport
729
- bootstrap
730
- end
731
-
732
- def pause
733
- unless @state == :pausing || @state == :paused
734
- @state = :pausing
735
- @timers[:run].each do |timer|
736
- timer.cancel
737
- end
738
- @timers[:run].clear
739
- unsubscribe
740
- resign_as_master
741
- @state = :paused
742
- end
743
- end
744
-
745
- def resume
746
- retry_until_true(1) do
747
- if @state == :paused
748
- if @redis.connected? && @transport.connected?
749
- bootstrap
750
- true
751
- end
752
- end
753
- end
754
- end
755
-
756
- def stop
757
- @logger.warn('stopping')
758
- pause
759
- @state = :stopping
760
- complete_handlers_in_progress do
761
- @redis.close
762
- @transport.close
763
- super
764
- end
765
- end
766
- end
767
- end