sensu 0.16.0-java → 0.17.0.beta.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/bin/sensu-api +4 -4
- data/bin/sensu-client +4 -4
- data/bin/sensu-server +4 -4
- data/lib/sensu/api/process.rb +704 -0
- data/lib/sensu/cli.rb +21 -15
- data/lib/sensu/client/process.rb +414 -0
- data/lib/sensu/client/socket.rb +226 -0
- data/lib/sensu/constants.rb +4 -1
- data/lib/sensu/daemon.rb +125 -73
- data/lib/sensu/redis.rb +10 -5
- data/lib/sensu/server/filter.rb +309 -0
- data/lib/sensu/server/handle.rb +168 -0
- data/lib/sensu/server/mutate.rb +92 -0
- data/lib/sensu/server/process.rb +811 -0
- data/lib/sensu/server/sandbox.rb +21 -0
- data/lib/sensu/server/socket.rb +42 -0
- data/lib/sensu/utilities.rb +29 -3
- data/sensu.gemspec +29 -28
- metadata +34 -16
- data/lib/sensu/api.rb +0 -704
- data/lib/sensu/client.rb +0 -292
- data/lib/sensu/sandbox.rb +0 -11
- data/lib/sensu/server.rb +0 -767
- data/lib/sensu/socket.rb +0 -246
data/lib/sensu/server.rb
DELETED
@@ -1,767 +0,0 @@
|
|
1
|
-
require 'sensu/daemon'
|
2
|
-
require 'sensu/socket'
|
3
|
-
require 'sensu/sandbox'
|
4
|
-
|
5
|
-
module Sensu
|
6
|
-
class Server
|
7
|
-
include Daemon
|
8
|
-
|
9
|
-
attr_reader :is_master
|
10
|
-
|
11
|
-
def self.run(options={})
|
12
|
-
server = self.new(options)
|
13
|
-
EM::run do
|
14
|
-
server.start
|
15
|
-
server.setup_signal_traps
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def initialize(options={})
|
20
|
-
super
|
21
|
-
@is_master = false
|
22
|
-
@timers[:master] = Array.new
|
23
|
-
@handlers_in_progress_count = 0
|
24
|
-
end
|
25
|
-
|
26
|
-
def setup_keepalives
|
27
|
-
@logger.debug('subscribing to keepalives')
|
28
|
-
@transport.subscribe(:direct, 'keepalives', 'keepalives', :ack => true) do |message_info, message|
|
29
|
-
begin
|
30
|
-
client = MultiJson.load(message)
|
31
|
-
@logger.debug('received keepalive', {
|
32
|
-
:client => client
|
33
|
-
})
|
34
|
-
@redis.set('client:' + client[:name], MultiJson.dump(client)) do
|
35
|
-
@redis.sadd('clients', client[:name]) do
|
36
|
-
@transport.ack(message_info)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
rescue MultiJson::ParseError => error
|
40
|
-
@logger.error('failed to parse keepalive payload', {
|
41
|
-
:message => message,
|
42
|
-
:error => error.to_s
|
43
|
-
})
|
44
|
-
@transport.ack(message_info)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def action_subdued?(condition)
|
50
|
-
subdued = false
|
51
|
-
if condition.has_key?(:begin) && condition.has_key?(:end)
|
52
|
-
begin_time = Time.parse(condition[:begin])
|
53
|
-
end_time = Time.parse(condition[:end])
|
54
|
-
if end_time < begin_time
|
55
|
-
if Time.now < end_time
|
56
|
-
begin_time = Time.parse('12:00:00 AM')
|
57
|
-
else
|
58
|
-
end_time = Time.parse('11:59:59 PM')
|
59
|
-
end
|
60
|
-
end
|
61
|
-
if Time.now >= begin_time && Time.now <= end_time
|
62
|
-
subdued = true
|
63
|
-
end
|
64
|
-
end
|
65
|
-
if condition.has_key?(:days)
|
66
|
-
days = condition[:days].map(&:downcase)
|
67
|
-
if days.include?(Time.now.strftime('%A').downcase)
|
68
|
-
subdued = true
|
69
|
-
end
|
70
|
-
end
|
71
|
-
if subdued && condition.has_key?(:exceptions)
|
72
|
-
subdued = condition[:exceptions].none? do |exception|
|
73
|
-
Time.now >= Time.parse(exception[:begin]) && Time.now <= Time.parse(exception[:end])
|
74
|
-
end
|
75
|
-
end
|
76
|
-
subdued
|
77
|
-
end
|
78
|
-
|
79
|
-
def handler_subdued?(handler, check)
|
80
|
-
subdued = Array.new
|
81
|
-
if handler[:subdue]
|
82
|
-
subdued << action_subdued?(handler[:subdue])
|
83
|
-
end
|
84
|
-
if check[:subdue] && check[:subdue][:at] != 'publisher'
|
85
|
-
subdued << action_subdued?(check[:subdue])
|
86
|
-
end
|
87
|
-
subdued.any?
|
88
|
-
end
|
89
|
-
|
90
|
-
def filter_attributes_match?(hash_one, hash_two)
|
91
|
-
hash_one.keys.all? do |key|
|
92
|
-
case
|
93
|
-
when hash_one[key] == hash_two[key]
|
94
|
-
true
|
95
|
-
when hash_one[key].is_a?(Hash) && hash_two[key].is_a?(Hash)
|
96
|
-
filter_attributes_match?(hash_one[key], hash_two[key])
|
97
|
-
when hash_one[key].is_a?(String) && hash_one[key].start_with?('eval:')
|
98
|
-
begin
|
99
|
-
expression = hash_one[key].gsub(/^eval:(\s+)?/, '')
|
100
|
-
!!Sandbox.eval(expression, hash_two[key])
|
101
|
-
rescue => error
|
102
|
-
@logger.error('filter eval error', {
|
103
|
-
:attributes => [hash_one, hash_two],
|
104
|
-
:error => error.to_s
|
105
|
-
})
|
106
|
-
false
|
107
|
-
end
|
108
|
-
else
|
109
|
-
false
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
def event_filtered?(filter_name, event)
|
115
|
-
if @settings.filter_exists?(filter_name)
|
116
|
-
filter = @settings[:filters][filter_name]
|
117
|
-
matched = filter_attributes_match?(filter[:attributes], event)
|
118
|
-
filter[:negate] ? matched : !matched
|
119
|
-
else
|
120
|
-
@logger.error('unknown filter', {
|
121
|
-
:filter_name => filter_name
|
122
|
-
})
|
123
|
-
false
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
def derive_handlers(handler_list, depth=0)
|
128
|
-
handler_list.compact.inject(Array.new) do |handlers, handler_name|
|
129
|
-
if @settings.handler_exists?(handler_name)
|
130
|
-
handler = @settings[:handlers][handler_name].merge(:name => handler_name)
|
131
|
-
if handler[:type] == 'set'
|
132
|
-
if depth < 2
|
133
|
-
handlers = handlers + derive_handlers(handler[:handlers], depth + 1)
|
134
|
-
else
|
135
|
-
@logger.error('handler sets cannot be deeply nested', {
|
136
|
-
:handler => handler
|
137
|
-
})
|
138
|
-
end
|
139
|
-
else
|
140
|
-
handlers << handler
|
141
|
-
end
|
142
|
-
elsif @extensions.handler_exists?(handler_name)
|
143
|
-
handlers << @extensions[:handlers][handler_name]
|
144
|
-
else
|
145
|
-
@logger.error('unknown handler', {
|
146
|
-
:handler_name => handler_name
|
147
|
-
})
|
148
|
-
end
|
149
|
-
handlers.uniq
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
def event_handlers(event)
|
154
|
-
handler_list = Array((event[:check][:handlers] || event[:check][:handler]) || 'default')
|
155
|
-
handlers = derive_handlers(handler_list)
|
156
|
-
handlers.select do |handler|
|
157
|
-
if event[:action] == :flapping && !handler[:handle_flapping]
|
158
|
-
@logger.info('handler does not handle flapping events', {
|
159
|
-
:event => event,
|
160
|
-
:handler => handler
|
161
|
-
})
|
162
|
-
next
|
163
|
-
end
|
164
|
-
if handler_subdued?(handler, event[:check])
|
165
|
-
@logger.info('handler is subdued', {
|
166
|
-
:event => event,
|
167
|
-
:handler => handler
|
168
|
-
})
|
169
|
-
next
|
170
|
-
end
|
171
|
-
if handler.has_key?(:severities)
|
172
|
-
handle = case event[:action]
|
173
|
-
when :resolve
|
174
|
-
event[:check][:history].reverse[1..-1].any? do |status|
|
175
|
-
if status.to_i == 0
|
176
|
-
break
|
177
|
-
end
|
178
|
-
severity = SEVERITIES[status.to_i] || 'unknown'
|
179
|
-
handler[:severities].include?(severity)
|
180
|
-
end
|
181
|
-
else
|
182
|
-
severity = SEVERITIES[event[:check][:status]] || 'unknown'
|
183
|
-
handler[:severities].include?(severity)
|
184
|
-
end
|
185
|
-
unless handle
|
186
|
-
@logger.debug('handler does not handle event severity', {
|
187
|
-
:event => event,
|
188
|
-
:handler => handler
|
189
|
-
})
|
190
|
-
next
|
191
|
-
end
|
192
|
-
end
|
193
|
-
if handler.has_key?(:filters) || handler.has_key?(:filter)
|
194
|
-
filter_list = Array(handler[:filters] || handler[:filter])
|
195
|
-
filtered = filter_list.any? do |filter_name|
|
196
|
-
event_filtered?(filter_name, event)
|
197
|
-
end
|
198
|
-
if filtered
|
199
|
-
@logger.info('event filtered for handler', {
|
200
|
-
:event => event,
|
201
|
-
:handler => handler
|
202
|
-
})
|
203
|
-
next
|
204
|
-
end
|
205
|
-
end
|
206
|
-
true
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
def mutate_event_data(mutator_name, event, &block)
|
211
|
-
mutator_name ||= 'json'
|
212
|
-
return_output = Proc.new do |output, status|
|
213
|
-
if status == 0
|
214
|
-
block.dup.call(output)
|
215
|
-
else
|
216
|
-
@logger.error('mutator error', {
|
217
|
-
:event => event,
|
218
|
-
:output => output,
|
219
|
-
:status => status
|
220
|
-
})
|
221
|
-
@handlers_in_progress_count -= 1
|
222
|
-
end
|
223
|
-
end
|
224
|
-
@logger.debug('mutating event data', {
|
225
|
-
:event => event,
|
226
|
-
:mutator_name => mutator_name
|
227
|
-
})
|
228
|
-
case
|
229
|
-
when @settings.mutator_exists?(mutator_name)
|
230
|
-
mutator = @settings[:mutators][mutator_name]
|
231
|
-
options = {:data => MultiJson.dump(event), :timeout => mutator[:timeout]}
|
232
|
-
Spawn.process(mutator[:command], options, &return_output)
|
233
|
-
when @extensions.mutator_exists?(mutator_name)
|
234
|
-
extension = @extensions[:mutators][mutator_name]
|
235
|
-
extension.safe_run(event, &return_output)
|
236
|
-
else
|
237
|
-
@logger.error('unknown mutator', {
|
238
|
-
:mutator_name => mutator_name
|
239
|
-
})
|
240
|
-
@handlers_in_progress_count -= 1
|
241
|
-
end
|
242
|
-
end
|
243
|
-
|
244
|
-
def handle_event(event)
|
245
|
-
handlers = event_handlers(event)
|
246
|
-
handlers.each do |handler|
|
247
|
-
log_level = event[:check][:type] == 'metric' ? :debug : :info
|
248
|
-
@logger.send(log_level, 'handling event', {
|
249
|
-
:event => event,
|
250
|
-
:handler => handler.respond_to?(:definition) ? handler.definition : handler
|
251
|
-
})
|
252
|
-
@handlers_in_progress_count += 1
|
253
|
-
on_error = Proc.new do |error|
|
254
|
-
@logger.error('handler error', {
|
255
|
-
:event => event,
|
256
|
-
:handler => handler,
|
257
|
-
:error => error.to_s
|
258
|
-
})
|
259
|
-
@handlers_in_progress_count -= 1
|
260
|
-
end
|
261
|
-
mutate_event_data(handler[:mutator], event) do |event_data|
|
262
|
-
case handler[:type]
|
263
|
-
when 'pipe'
|
264
|
-
options = {:data => event_data, :timeout => handler[:timeout]}
|
265
|
-
Spawn.process(handler[:command], options) do |output, status|
|
266
|
-
output.each_line do |line|
|
267
|
-
@logger.info('handler output', {
|
268
|
-
:handler => handler,
|
269
|
-
:output => line
|
270
|
-
})
|
271
|
-
end
|
272
|
-
@handlers_in_progress_count -= 1
|
273
|
-
end
|
274
|
-
when 'tcp'
|
275
|
-
begin
|
276
|
-
EM::connect(handler[:socket][:host], handler[:socket][:port], SocketHandler) do |socket|
|
277
|
-
socket.on_success = Proc.new do
|
278
|
-
@handlers_in_progress_count -= 1
|
279
|
-
end
|
280
|
-
socket.on_error = on_error
|
281
|
-
timeout = handler[:timeout] || 10
|
282
|
-
socket.pending_connect_timeout = timeout
|
283
|
-
socket.comm_inactivity_timeout = timeout
|
284
|
-
socket.send_data(event_data.to_s)
|
285
|
-
socket.close_connection_after_writing
|
286
|
-
end
|
287
|
-
rescue => error
|
288
|
-
on_error.call(error)
|
289
|
-
end
|
290
|
-
when 'udp'
|
291
|
-
begin
|
292
|
-
EM::open_datagram_socket('0.0.0.0', 0, nil) do |socket|
|
293
|
-
socket.send_datagram(event_data.to_s, handler[:socket][:host], handler[:socket][:port])
|
294
|
-
socket.close_connection_after_writing
|
295
|
-
@handlers_in_progress_count -= 1
|
296
|
-
end
|
297
|
-
rescue => error
|
298
|
-
on_error.call(error)
|
299
|
-
end
|
300
|
-
when 'transport'
|
301
|
-
unless event_data.empty?
|
302
|
-
pipe = handler[:pipe]
|
303
|
-
@transport.publish(pipe[:type].to_sym, pipe[:name], event_data, pipe[:options] || Hash.new) do |info|
|
304
|
-
if info[:error]
|
305
|
-
@logger.fatal('failed to publish event data to the transport', {
|
306
|
-
:pipe => pipe,
|
307
|
-
:payload => event_data,
|
308
|
-
:error => info[:error].to_s
|
309
|
-
})
|
310
|
-
end
|
311
|
-
end
|
312
|
-
end
|
313
|
-
@handlers_in_progress_count -= 1
|
314
|
-
when 'extension'
|
315
|
-
handler.safe_run(event_data) do |output, status|
|
316
|
-
output.each_line do |line|
|
317
|
-
@logger.info('handler extension output', {
|
318
|
-
:extension => handler.definition,
|
319
|
-
:output => line
|
320
|
-
})
|
321
|
-
end
|
322
|
-
@handlers_in_progress_count -= 1
|
323
|
-
end
|
324
|
-
end
|
325
|
-
end
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
def aggregate_result(result)
|
330
|
-
@logger.debug('adding result to aggregate', {
|
331
|
-
:result => result
|
332
|
-
})
|
333
|
-
check = result[:check]
|
334
|
-
result_set = check[:name] + ':' + check[:issued].to_s
|
335
|
-
@redis.hset('aggregation:' + result_set, result[:client], MultiJson.dump(
|
336
|
-
:output => check[:output],
|
337
|
-
:status => check[:status]
|
338
|
-
)) do
|
339
|
-
SEVERITIES.each do |severity|
|
340
|
-
@redis.hsetnx('aggregate:' + result_set, severity, 0)
|
341
|
-
end
|
342
|
-
severity = (SEVERITIES[check[:status]] || 'unknown')
|
343
|
-
@redis.hincrby('aggregate:' + result_set, severity, 1) do
|
344
|
-
@redis.hincrby('aggregate:' + result_set, 'total', 1) do
|
345
|
-
@redis.sadd('aggregates:' + check[:name], check[:issued]) do
|
346
|
-
@redis.sadd('aggregates', check[:name])
|
347
|
-
end
|
348
|
-
end
|
349
|
-
end
|
350
|
-
end
|
351
|
-
end
|
352
|
-
|
353
|
-
def event_bridges(event)
|
354
|
-
@extensions[:bridges].each do |name, bridge|
|
355
|
-
bridge.safe_run(event) do |output, status|
|
356
|
-
output.each_line do |line|
|
357
|
-
@logger.debug('bridge extension output', {
|
358
|
-
:extension => bridge.definition,
|
359
|
-
:output => line
|
360
|
-
})
|
361
|
-
end
|
362
|
-
end
|
363
|
-
end
|
364
|
-
end
|
365
|
-
|
366
|
-
def process_result(result)
|
367
|
-
@logger.debug('processing result', {
|
368
|
-
:result => result
|
369
|
-
})
|
370
|
-
@redis.get('client:' + result[:client]) do |client_json|
|
371
|
-
unless client_json.nil?
|
372
|
-
client = MultiJson.load(client_json)
|
373
|
-
check = case
|
374
|
-
when @settings.check_exists?(result[:check][:name]) && !result[:check][:standalone]
|
375
|
-
@settings[:checks][result[:check][:name]].merge(result[:check])
|
376
|
-
else
|
377
|
-
result[:check]
|
378
|
-
end
|
379
|
-
if check[:aggregate]
|
380
|
-
aggregate_result(result)
|
381
|
-
end
|
382
|
-
@redis.sadd('history:' + client[:name], check[:name])
|
383
|
-
history_key = 'history:' + client[:name] + ':' + check[:name]
|
384
|
-
@redis.rpush(history_key, check[:status]) do
|
385
|
-
execution_key = 'execution:' + client[:name] + ':' + check[:name]
|
386
|
-
@redis.set(execution_key, check[:executed])
|
387
|
-
@redis.lrange(history_key, -21, -1) do |history|
|
388
|
-
check[:history] = history
|
389
|
-
total_state_change = 0
|
390
|
-
unless history.size < 21
|
391
|
-
state_changes = 0
|
392
|
-
change_weight = 0.8
|
393
|
-
previous_status = history.first
|
394
|
-
history.each do |status|
|
395
|
-
unless status == previous_status
|
396
|
-
state_changes += change_weight
|
397
|
-
end
|
398
|
-
change_weight += 0.02
|
399
|
-
previous_status = status
|
400
|
-
end
|
401
|
-
total_state_change = (state_changes.fdiv(20) * 100).to_i
|
402
|
-
@redis.ltrim(history_key, -21, -1)
|
403
|
-
end
|
404
|
-
@redis.hget('events:' + client[:name], check[:name]) do |event_json|
|
405
|
-
previous_occurrence = event_json ? MultiJson.load(event_json) : false
|
406
|
-
is_flapping = false
|
407
|
-
if check.has_key?(:low_flap_threshold) && check.has_key?(:high_flap_threshold)
|
408
|
-
was_flapping = previous_occurrence && previous_occurrence[:action] == 'flapping'
|
409
|
-
is_flapping = case
|
410
|
-
when total_state_change >= check[:high_flap_threshold]
|
411
|
-
true
|
412
|
-
when was_flapping && total_state_change <= check[:low_flap_threshold]
|
413
|
-
false
|
414
|
-
else
|
415
|
-
was_flapping
|
416
|
-
end
|
417
|
-
end
|
418
|
-
event = {
|
419
|
-
:id => random_uuid,
|
420
|
-
:client => client,
|
421
|
-
:check => check,
|
422
|
-
:occurrences => 1
|
423
|
-
}
|
424
|
-
if check[:status] != 0 || is_flapping
|
425
|
-
if previous_occurrence && check[:status] == previous_occurrence[:check][:status]
|
426
|
-
event[:occurrences] = previous_occurrence[:occurrences] + 1
|
427
|
-
end
|
428
|
-
event[:action] = is_flapping ? :flapping : :create
|
429
|
-
@redis.hset('events:' + client[:name], check[:name], MultiJson.dump(event)) do
|
430
|
-
unless check[:handle] == false
|
431
|
-
handle_event(event)
|
432
|
-
end
|
433
|
-
end
|
434
|
-
elsif previous_occurrence
|
435
|
-
event[:occurrences] = previous_occurrence[:occurrences]
|
436
|
-
event[:action] = :resolve
|
437
|
-
unless check[:auto_resolve] == false && !check[:force_resolve]
|
438
|
-
@redis.hdel('events:' + client[:name], check[:name]) do
|
439
|
-
unless check[:handle] == false
|
440
|
-
handle_event(event)
|
441
|
-
end
|
442
|
-
end
|
443
|
-
end
|
444
|
-
elsif check[:type] == 'metric'
|
445
|
-
handle_event(event)
|
446
|
-
end
|
447
|
-
event_bridges(event)
|
448
|
-
end
|
449
|
-
end
|
450
|
-
end
|
451
|
-
end
|
452
|
-
end
|
453
|
-
end
|
454
|
-
|
455
|
-
def setup_results
|
456
|
-
@logger.debug('subscribing to results')
|
457
|
-
@transport.subscribe(:direct, 'results', 'results', :ack => true) do |message_info, message|
|
458
|
-
begin
|
459
|
-
result = MultiJson.load(message)
|
460
|
-
@logger.debug('received result', {
|
461
|
-
:result => result
|
462
|
-
})
|
463
|
-
process_result(result)
|
464
|
-
rescue MultiJson::ParseError => error
|
465
|
-
@logger.error('failed to parse result payload', {
|
466
|
-
:message => message,
|
467
|
-
:error => error.to_s
|
468
|
-
})
|
469
|
-
end
|
470
|
-
EM::next_tick do
|
471
|
-
@transport.ack(message_info)
|
472
|
-
end
|
473
|
-
end
|
474
|
-
end
|
475
|
-
|
476
|
-
def check_request_subdued?(check)
|
477
|
-
if check[:subdue] && check[:subdue][:at] == 'publisher'
|
478
|
-
action_subdued?(check[:subdue])
|
479
|
-
else
|
480
|
-
false
|
481
|
-
end
|
482
|
-
end
|
483
|
-
|
484
|
-
def publish_check_request(check)
|
485
|
-
payload = {
|
486
|
-
:name => check[:name],
|
487
|
-
:issued => Time.now.to_i
|
488
|
-
}
|
489
|
-
if check.has_key?(:command)
|
490
|
-
payload[:command] = check[:command]
|
491
|
-
end
|
492
|
-
@logger.info('publishing check request', {
|
493
|
-
:payload => payload,
|
494
|
-
:subscribers => check[:subscribers]
|
495
|
-
})
|
496
|
-
check[:subscribers].each do |subscription|
|
497
|
-
@transport.publish(:fanout, subscription, MultiJson.dump(payload)) do |info|
|
498
|
-
if info[:error]
|
499
|
-
@logger.error('failed to publish check request', {
|
500
|
-
:subscription => subscription,
|
501
|
-
:payload => payload,
|
502
|
-
:error => info[:error].to_s
|
503
|
-
})
|
504
|
-
end
|
505
|
-
end
|
506
|
-
end
|
507
|
-
end
|
508
|
-
|
509
|
-
def schedule_checks(checks)
|
510
|
-
check_count = 0
|
511
|
-
stagger = testing? ? 0 : 2
|
512
|
-
checks.each do |check|
|
513
|
-
check_count += 1
|
514
|
-
scheduling_delay = stagger * check_count % 30
|
515
|
-
@timers[:master] << EM::Timer.new(scheduling_delay) do
|
516
|
-
interval = testing? ? 0.5 : check[:interval]
|
517
|
-
@timers[:master] << EM::PeriodicTimer.new(interval) do
|
518
|
-
unless check_request_subdued?(check)
|
519
|
-
publish_check_request(check)
|
520
|
-
else
|
521
|
-
@logger.info('check request was subdued', {
|
522
|
-
:check => check
|
523
|
-
})
|
524
|
-
end
|
525
|
-
end
|
526
|
-
end
|
527
|
-
end
|
528
|
-
end
|
529
|
-
|
530
|
-
def setup_publisher
|
531
|
-
@logger.debug('scheduling check requests')
|
532
|
-
standard_checks = @settings.checks.reject do |check|
|
533
|
-
check[:standalone] || check[:publish] == false
|
534
|
-
end
|
535
|
-
extension_checks = @extensions.checks.reject do |check|
|
536
|
-
check[:standalone] || check[:publish] == false || !check[:interval].is_a?(Integer)
|
537
|
-
end
|
538
|
-
schedule_checks(standard_checks + extension_checks)
|
539
|
-
end
|
540
|
-
|
541
|
-
def publish_result(client, check)
|
542
|
-
payload = {
|
543
|
-
:client => client[:name],
|
544
|
-
:check => check
|
545
|
-
}
|
546
|
-
@logger.debug('publishing check result', {
|
547
|
-
:payload => payload
|
548
|
-
})
|
549
|
-
@transport.publish(:direct, 'results', MultiJson.dump(payload)) do |info|
|
550
|
-
if info[:error]
|
551
|
-
@logger.error('failed to publish check result', {
|
552
|
-
:payload => payload,
|
553
|
-
:error => info[:error].to_s
|
554
|
-
})
|
555
|
-
end
|
556
|
-
end
|
557
|
-
end
|
558
|
-
|
559
|
-
def determine_stale_clients
|
560
|
-
@logger.info('determining stale clients')
|
561
|
-
keepalive_check = {
|
562
|
-
:thresholds => {
|
563
|
-
:warning => 120,
|
564
|
-
:critical => 180
|
565
|
-
}
|
566
|
-
}
|
567
|
-
if @settings.handler_exists?(:keepalive)
|
568
|
-
keepalive_check[:handler] = "keepalive"
|
569
|
-
end
|
570
|
-
@redis.smembers('clients') do |clients|
|
571
|
-
clients.each do |client_name|
|
572
|
-
@redis.get('client:' + client_name) do |client_json|
|
573
|
-
unless client_json.nil?
|
574
|
-
client = MultiJson.load(client_json)
|
575
|
-
check = keepalive_check.dup
|
576
|
-
if client.has_key?(:keepalive)
|
577
|
-
check = deep_merge(check, client[:keepalive])
|
578
|
-
end
|
579
|
-
check[:name] = 'keepalive'
|
580
|
-
check[:issued] = Time.now.to_i
|
581
|
-
check[:executed] = Time.now.to_i
|
582
|
-
time_since_last_keepalive = Time.now.to_i - client[:timestamp]
|
583
|
-
case
|
584
|
-
when time_since_last_keepalive >= check[:thresholds][:critical]
|
585
|
-
check[:output] = 'No keep-alive sent from client in over '
|
586
|
-
check[:output] << check[:thresholds][:critical].to_s + ' seconds'
|
587
|
-
check[:status] = 2
|
588
|
-
when time_since_last_keepalive >= check[:thresholds][:warning]
|
589
|
-
check[:output] = 'No keep-alive sent from client in over '
|
590
|
-
check[:output] << check[:thresholds][:warning].to_s + ' seconds'
|
591
|
-
check[:status] = 1
|
592
|
-
else
|
593
|
-
check[:output] = 'Keep-alive sent from client less than '
|
594
|
-
check[:output] << check[:thresholds][:warning].to_s + ' seconds ago'
|
595
|
-
check[:status] = 0
|
596
|
-
end
|
597
|
-
publish_result(client, check)
|
598
|
-
end
|
599
|
-
end
|
600
|
-
end
|
601
|
-
end
|
602
|
-
end
|
603
|
-
|
604
|
-
def setup_client_monitor
|
605
|
-
@logger.debug('monitoring clients')
|
606
|
-
@timers[:master] << EM::PeriodicTimer.new(30) do
|
607
|
-
determine_stale_clients
|
608
|
-
end
|
609
|
-
end
|
610
|
-
|
611
|
-
def prune_aggregations
|
612
|
-
@logger.info('pruning aggregations')
|
613
|
-
@redis.smembers('aggregates') do |checks|
|
614
|
-
checks.each do |check_name|
|
615
|
-
@redis.smembers('aggregates:' + check_name) do |aggregates|
|
616
|
-
if aggregates.size > 20
|
617
|
-
aggregates.sort!
|
618
|
-
aggregates.take(aggregates.size - 20).each do |check_issued|
|
619
|
-
@redis.srem('aggregates:' + check_name, check_issued) do
|
620
|
-
result_set = check_name + ':' + check_issued.to_s
|
621
|
-
@redis.del('aggregate:' + result_set) do
|
622
|
-
@redis.del('aggregation:' + result_set) do
|
623
|
-
@logger.debug('pruned aggregation', {
|
624
|
-
:check => {
|
625
|
-
:name => check_name,
|
626
|
-
:issued => check_issued
|
627
|
-
}
|
628
|
-
})
|
629
|
-
end
|
630
|
-
end
|
631
|
-
end
|
632
|
-
end
|
633
|
-
end
|
634
|
-
end
|
635
|
-
end
|
636
|
-
end
|
637
|
-
end
|
638
|
-
|
639
|
-
def setup_aggregation_pruner
|
640
|
-
@logger.debug('pruning aggregations')
|
641
|
-
@timers[:master] << EM::PeriodicTimer.new(20) do
|
642
|
-
prune_aggregations
|
643
|
-
end
|
644
|
-
end
|
645
|
-
|
646
|
-
def master_duties
|
647
|
-
setup_publisher
|
648
|
-
setup_client_monitor
|
649
|
-
setup_aggregation_pruner
|
650
|
-
end
|
651
|
-
|
652
|
-
def request_master_election
|
653
|
-
@redis.setnx('lock:master', Time.now.to_i) do |created|
|
654
|
-
if created
|
655
|
-
@is_master = true
|
656
|
-
@logger.info('i am the master')
|
657
|
-
master_duties
|
658
|
-
else
|
659
|
-
@redis.get('lock:master') do |timestamp|
|
660
|
-
if Time.now.to_i - timestamp.to_i >= 30
|
661
|
-
@redis.getset('lock:master', Time.now.to_i) do |previous|
|
662
|
-
if previous == timestamp
|
663
|
-
@is_master = true
|
664
|
-
@logger.info('i am now the master')
|
665
|
-
master_duties
|
666
|
-
end
|
667
|
-
end
|
668
|
-
end
|
669
|
-
end
|
670
|
-
end
|
671
|
-
end
|
672
|
-
end
|
673
|
-
|
674
|
-
def setup_master_monitor
|
675
|
-
@timers[:run] << EM::Timer.new(2) do
|
676
|
-
request_master_election
|
677
|
-
end
|
678
|
-
@timers[:run] << EM::PeriodicTimer.new(10) do
|
679
|
-
if @is_master
|
680
|
-
@redis.set('lock:master', Time.now.to_i) do
|
681
|
-
@logger.debug('updated master lock timestamp')
|
682
|
-
end
|
683
|
-
else
|
684
|
-
request_master_election
|
685
|
-
end
|
686
|
-
end
|
687
|
-
end
|
688
|
-
|
689
|
-
def resign_as_master
|
690
|
-
if @is_master
|
691
|
-
@logger.warn('resigning as master')
|
692
|
-
@timers[:master].each do |timer|
|
693
|
-
timer.cancel
|
694
|
-
end
|
695
|
-
@timers[:master].clear
|
696
|
-
@is_master = false
|
697
|
-
else
|
698
|
-
@logger.debug('not currently master')
|
699
|
-
end
|
700
|
-
end
|
701
|
-
|
702
|
-
def unsubscribe
|
703
|
-
@logger.warn('unsubscribing from keepalive and result queues')
|
704
|
-
@transport.unsubscribe
|
705
|
-
end
|
706
|
-
|
707
|
-
def complete_handlers_in_progress(&block)
|
708
|
-
@logger.info('completing handlers in progress', {
|
709
|
-
:handlers_in_progress_count => @handlers_in_progress_count
|
710
|
-
})
|
711
|
-
retry_until_true do
|
712
|
-
if @handlers_in_progress_count == 0
|
713
|
-
block.call
|
714
|
-
true
|
715
|
-
end
|
716
|
-
end
|
717
|
-
end
|
718
|
-
|
719
|
-
def bootstrap
|
720
|
-
setup_keepalives
|
721
|
-
setup_results
|
722
|
-
setup_master_monitor
|
723
|
-
@state = :running
|
724
|
-
end
|
725
|
-
|
726
|
-
def start
|
727
|
-
setup_redis
|
728
|
-
setup_transport
|
729
|
-
bootstrap
|
730
|
-
end
|
731
|
-
|
732
|
-
def pause
|
733
|
-
unless @state == :pausing || @state == :paused
|
734
|
-
@state = :pausing
|
735
|
-
@timers[:run].each do |timer|
|
736
|
-
timer.cancel
|
737
|
-
end
|
738
|
-
@timers[:run].clear
|
739
|
-
unsubscribe
|
740
|
-
resign_as_master
|
741
|
-
@state = :paused
|
742
|
-
end
|
743
|
-
end
|
744
|
-
|
745
|
-
def resume
|
746
|
-
retry_until_true(1) do
|
747
|
-
if @state == :paused
|
748
|
-
if @redis.connected? && @transport.connected?
|
749
|
-
bootstrap
|
750
|
-
true
|
751
|
-
end
|
752
|
-
end
|
753
|
-
end
|
754
|
-
end
|
755
|
-
|
756
|
-
def stop
|
757
|
-
@logger.warn('stopping')
|
758
|
-
pause
|
759
|
-
@state = :stopping
|
760
|
-
complete_handlers_in_progress do
|
761
|
-
@redis.close
|
762
|
-
@transport.close
|
763
|
-
super
|
764
|
-
end
|
765
|
-
end
|
766
|
-
end
|
767
|
-
end
|