sensu 0.17.2-java → 0.18.0.beta.1-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/lib/sensu/api/process.rb +43 -17
- data/lib/sensu/client/process.rb +26 -4
- data/lib/sensu/constants.rb +1 -1
- data/lib/sensu/daemon.rb +1 -1
- data/lib/sensu/server/process.rb +140 -97
- data/sensu.gemspec +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bdc41be10ee3b0602db3b6152537609e702da877
|
4
|
+
data.tar.gz: 51e6b5517e0f7cbceccce322c6a51ee32d8de425
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5f7d0f1224a44a79a79101719e8a3e3d1dd65550972dd49e611bd5f98b498c5d0a4eb62788c1829ce9da41305defa1549ce29459ceb568c3bfbbb24312996df
|
7
|
+
data.tar.gz: 78952b016c084d464dc4dc323fbb393ec820a25fdaa11cda7ba5d2bf8c5971ead2c1a5bf3530547ea0bf70f354a3f1ed3661a8397e314784239840ba6246f631
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,33 @@
|
|
1
|
+
## 0.18.0 - TBD
|
2
|
+
|
3
|
+
### Features
|
4
|
+
|
5
|
+
Dynamic (or JIT) client creation (in the registry) for check results for a
|
6
|
+
nonexistent client or a check source. Sensu clients can now monitor an
|
7
|
+
external resource on its behalf, using a check `source` to create a JIT
|
8
|
+
client for the resource, used to store the execution history and provide
|
9
|
+
context within event data. JIT client data in the registry can be
|
10
|
+
managed/updated via the Sensu API, POST `/clients`.
|
11
|
+
|
12
|
+
Storing the latest check result for every client/check pair. This data is
|
13
|
+
currently exposed via the API at `/clients/:client/history` and will be
|
14
|
+
used by several upcoming features.
|
15
|
+
|
16
|
+
The Sensu API now listens on TCP port `4567` by default.
|
17
|
+
|
18
|
+
Sensu server leader election lock timestamps now include milliseconds to
|
19
|
+
reduce the chance of a conflict when attempting to elect a new leader.
|
20
|
+
|
21
|
+
### Other
|
22
|
+
|
23
|
+
The Sensu client sockets (TCP/UDP) are now stopped/closed before the
|
24
|
+
process is stopped.
|
25
|
+
|
26
|
+
Sensu server "master" election is now "leader" election.
|
27
|
+
|
28
|
+
Configuration file encoding is now forced to 8-bit ASCII and UTF-8 BOMs
|
29
|
+
are removed if present.
|
30
|
+
|
1
31
|
## 0.17.2 - 2015-04-08
|
2
32
|
|
3
33
|
### Other
|
data/lib/sensu/api/process.rb
CHANGED
@@ -40,7 +40,7 @@ module Sensu
|
|
40
40
|
setup_logger(options)
|
41
41
|
set :logger, @logger
|
42
42
|
load_settings(options)
|
43
|
-
set :api, @settings[:api]
|
43
|
+
set :api, @settings[:api] || {}
|
44
44
|
set :checks, @settings[:checks]
|
45
45
|
set :all_checks, @settings.checks
|
46
46
|
set :cors, @settings[:cors] || {
|
@@ -55,8 +55,13 @@ module Sensu
|
|
55
55
|
|
56
56
|
def start_server
|
57
57
|
Thin::Logging.silent = true
|
58
|
-
bind =
|
59
|
-
|
58
|
+
bind = settings.api[:bind] || "0.0.0.0"
|
59
|
+
port = settings.api[:port] || 4567
|
60
|
+
@logger.info("api listening", {
|
61
|
+
:bind => bind,
|
62
|
+
:port => port
|
63
|
+
})
|
64
|
+
@thin = Thin::Server.new(bind, port, self)
|
60
65
|
@thin.start
|
61
66
|
end
|
62
67
|
|
@@ -170,7 +175,9 @@ module Sensu
|
|
170
175
|
begin
|
171
176
|
data = MultiJson.load(env["rack.input"].read)
|
172
177
|
valid = rules.all? do |key, rule|
|
173
|
-
|
178
|
+
value = data[key]
|
179
|
+
(value.is_a?(rule[:type]) || (rule[:nil_ok] && value.nil?)) &&
|
180
|
+
rule[:regex].nil? || (rule[:regex] && (value =~ rule[:regex]) == 0)
|
174
181
|
end
|
175
182
|
if valid
|
176
183
|
callback.call(data)
|
@@ -302,6 +309,23 @@ module Sensu
|
|
302
309
|
end
|
303
310
|
end
|
304
311
|
|
312
|
+
apost "/clients/?" do
|
313
|
+
rules = {
|
314
|
+
:name => {:type => String, :nil_ok => false, :regex => /^[\w\.-]+$/},
|
315
|
+
:address => {:type => String, :nil_ok => false},
|
316
|
+
:subscriptions => {:type => Array, :nil_ok => false}
|
317
|
+
}
|
318
|
+
read_data(rules) do |data|
|
319
|
+
data[:keepalives] = false
|
320
|
+
data[:timestamp] = Time.now.to_i
|
321
|
+
settings.redis.set("client:#{data[:name]}", MultiJson.dump(data)) do
|
322
|
+
settings.redis.sadd("clients", data[:name]) do
|
323
|
+
created!(MultiJson.dump(:name => data[:name]))
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
305
329
|
aget "/clients/?" do
|
306
330
|
response = Array.new
|
307
331
|
settings.redis.smembers("clients") do |clients|
|
@@ -333,22 +357,25 @@ module Sensu
|
|
333
357
|
|
334
358
|
aget %r{/clients/([\w\.-]+)/history/?$} do |client_name|
|
335
359
|
response = Array.new
|
336
|
-
settings.redis.smembers("
|
360
|
+
settings.redis.smembers("result:#{client_name}") do |checks|
|
337
361
|
unless checks.empty?
|
338
362
|
checks.each_with_index do |check_name, index|
|
339
|
-
|
363
|
+
result_key = "#{client_name}:#{check_name}"
|
364
|
+
history_key = "history:#{result_key}"
|
340
365
|
settings.redis.lrange(history_key, -21, -1) do |history|
|
341
366
|
history.map! do |status|
|
342
367
|
status.to_i
|
343
368
|
end
|
344
|
-
|
345
|
-
|
369
|
+
settings.redis.get("result:#{result_key}") do |result_json|
|
370
|
+
result = MultiJson.load(result_json)
|
371
|
+
last_execution = result[:executed]
|
346
372
|
unless history.empty? || last_execution.nil?
|
347
373
|
item = {
|
348
374
|
:check => check_name,
|
349
375
|
:history => history,
|
350
376
|
:last_execution => last_execution.to_i,
|
351
|
-
:last_status => history.last
|
377
|
+
:last_status => history.last,
|
378
|
+
:last_result => result
|
352
379
|
}
|
353
380
|
response << item
|
354
381
|
end
|
@@ -373,18 +400,17 @@ module Sensu
|
|
373
400
|
end
|
374
401
|
EM::Timer.new(5) do
|
375
402
|
client = MultiJson.load(client_json)
|
376
|
-
settings.logger.info("deleting client",
|
377
|
-
:client => client
|
378
|
-
})
|
403
|
+
settings.logger.info("deleting client", :client => client)
|
379
404
|
settings.redis.srem("clients", client_name) do
|
380
405
|
settings.redis.del("client:#{client_name}")
|
381
406
|
settings.redis.del("events:#{client_name}")
|
382
|
-
settings.redis.smembers("
|
407
|
+
settings.redis.smembers("result:#{client_name}") do |checks|
|
383
408
|
checks.each do |check_name|
|
384
|
-
|
385
|
-
settings.redis.del("
|
409
|
+
result_key = "#{client_name}:#{check_name}"
|
410
|
+
settings.redis.del("result:#{result_key}")
|
411
|
+
settings.redis.del("history:#{result_key}")
|
386
412
|
end
|
387
|
-
settings.redis.del("
|
413
|
+
settings.redis.del("result:#{client_name}")
|
388
414
|
end
|
389
415
|
end
|
390
416
|
end
|
@@ -428,7 +454,7 @@ module Sensu
|
|
428
454
|
:subscribers => subscribers
|
429
455
|
})
|
430
456
|
subscribers.uniq.each do |exchange_name|
|
431
|
-
settings.transport.publish(:fanout, exchange_name, MultiJson.dump(payload)) do |info|
|
457
|
+
settings.transport.publish(:fanout, exchange_name.to_s, MultiJson.dump(payload)) do |info|
|
432
458
|
if info[:error]
|
433
459
|
settings.logger.error("failed to publish check request", {
|
434
460
|
:exchange_name => exchange_name,
|
data/lib/sensu/client/process.rb
CHANGED
@@ -22,13 +22,14 @@ module Sensu
|
|
22
22
|
end
|
23
23
|
|
24
24
|
# Override Daemon initialize() to support Sensu client check
|
25
|
-
# execution safe mode
|
25
|
+
# execution safe mode, checks in progress, and open sockets.
|
26
26
|
#
|
27
27
|
# @param options [Hash]
|
28
28
|
def initialize(options={})
|
29
29
|
super
|
30
30
|
@safe_mode = @settings[:client][:safe_mode] || false
|
31
31
|
@checks_in_progress = []
|
32
|
+
@sockets = []
|
32
33
|
end
|
33
34
|
|
34
35
|
# Create a Sensu client keepalive payload, to be sent over the
|
@@ -315,18 +316,21 @@ module Sensu
|
|
315
316
|
# TCP & UDP port 3030. The socket can be configured via the
|
316
317
|
# client definition, `:socket` with `:bind` and `:port`. The
|
317
318
|
# current instance of the Sensu logger, settings, and transport
|
318
|
-
# are passed to the socket handler, `Sensu::Client::Socket`.
|
319
|
+
# are passed to the socket handler, `Sensu::Client::Socket`. The
|
320
|
+
# TCP socket server signature (Fixnum) and UDP connection object
|
321
|
+
# are stored in `@sockets`, so that they can be managed
|
322
|
+
# elsewhere, eg. `close_sockets()`.
|
319
323
|
def setup_sockets
|
320
324
|
options = @settings[:client][:socket] || Hash.new
|
321
325
|
options[:bind] ||= "127.0.0.1"
|
322
326
|
options[:port] ||= 3030
|
323
327
|
@logger.debug("binding client tcp and udp sockets", :options => options)
|
324
|
-
EM::start_server(options[:bind], options[:port], Socket) do |socket|
|
328
|
+
@sockets << EM::start_server(options[:bind], options[:port], Socket) do |socket|
|
325
329
|
socket.logger = @logger
|
326
330
|
socket.settings = @settings
|
327
331
|
socket.transport = @transport
|
328
332
|
end
|
329
|
-
EM::open_datagram_socket(options[:bind], options[:port], Socket) do |socket|
|
333
|
+
@sockets << EM::open_datagram_socket(options[:bind], options[:port], Socket) do |socket|
|
330
334
|
socket.logger = @logger
|
331
335
|
socket.settings = @settings
|
332
336
|
socket.transport = @transport
|
@@ -352,6 +356,23 @@ module Sensu
|
|
352
356
|
end
|
353
357
|
end
|
354
358
|
|
359
|
+
# Close the Sensu client TCP and UDP sockets. This method
|
360
|
+
# iterates through `@sockets`, which contains socket server
|
361
|
+
# signatures (Fixnum) and connection objects. A signature
|
362
|
+
# indicates a TCP socket server that needs to be stopped. A
|
363
|
+
# connection object indicates a socket connection that needs to
|
364
|
+
# be closed, eg. a UDP datagram socket.
|
365
|
+
def close_sockets
|
366
|
+
@logger.info("closing client tcp and udp sockets")
|
367
|
+
@sockets.each do |socket|
|
368
|
+
if socket.is_a?(Numeric)
|
369
|
+
EM.stop_server(socket)
|
370
|
+
else
|
371
|
+
socket.close_connection
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
355
376
|
# Bootstrap the Sensu client, setting up client keepalives,
|
356
377
|
# subscriptions, and standalone check executions. This method
|
357
378
|
# sets the process/daemon `@state` to `:running`.
|
@@ -415,6 +436,7 @@ module Sensu
|
|
415
436
|
pause
|
416
437
|
@state = :stopping
|
417
438
|
complete_checks_in_progress do
|
439
|
+
close_sockets
|
418
440
|
@transport.close
|
419
441
|
super
|
420
442
|
end
|
data/lib/sensu/constants.rb
CHANGED
data/lib/sensu/daemon.rb
CHANGED
data/lib/sensu/server/process.rb
CHANGED
@@ -11,7 +11,7 @@ module Sensu
|
|
11
11
|
include Mutate
|
12
12
|
include Handle
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :is_leader, :handling_event_count
|
15
15
|
|
16
16
|
# Create an instance of the Sensu server process, start the
|
17
17
|
# server within the EventMachine event loop, and set up server
|
@@ -26,14 +26,14 @@ module Sensu
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
# Override Daemon initialize() to support Sensu server
|
29
|
+
# Override Daemon initialize() to support Sensu server leader
|
30
30
|
# election and the handling event count.
|
31
31
|
#
|
32
32
|
# @param options [Hash]
|
33
33
|
def initialize(options={})
|
34
34
|
super
|
35
|
-
@
|
36
|
-
@timers[:
|
35
|
+
@is_leader = false
|
36
|
+
@timers[:leader] = Array.new
|
37
37
|
@handling_event_count = 0
|
38
38
|
end
|
39
39
|
|
@@ -49,7 +49,7 @@ module Sensu
|
|
49
49
|
@logger.debug("updating client registry", :client => client)
|
50
50
|
@redis.set("client:#{client[:name]}", MultiJson.dump(client)) do
|
51
51
|
@redis.sadd("clients", client[:name]) do
|
52
|
-
callback.call
|
52
|
+
callback.call(client)
|
53
53
|
end
|
54
54
|
end
|
55
55
|
end
|
@@ -178,13 +178,16 @@ module Sensu
|
|
178
178
|
# check execution across a number of Sensu clients. JSON
|
179
179
|
# serialization is used for storing check result data.
|
180
180
|
#
|
181
|
-
# @param
|
182
|
-
|
183
|
-
|
184
|
-
check
|
181
|
+
# @param client [Hash]
|
182
|
+
# @param check [Hash]
|
183
|
+
def aggregate_check_result(client, check)
|
184
|
+
@logger.debug("adding check result to aggregate", {
|
185
|
+
:client => client,
|
186
|
+
:check => check
|
187
|
+
})
|
185
188
|
result_set = "#{check[:name]}:#{check[:issued]}"
|
186
189
|
result_data = MultiJson.dump(:output => check[:output], :status => check[:status])
|
187
|
-
@redis.hset("aggregation:#{result_set}",
|
190
|
+
@redis.hset("aggregation:#{result_set}", client[:name], result_data) do
|
188
191
|
SEVERITIES.each do |severity|
|
189
192
|
@redis.hsetnx("aggregate:#{result_set}", severity, 0)
|
190
193
|
end
|
@@ -199,31 +202,34 @@ module Sensu
|
|
199
202
|
end
|
200
203
|
end
|
201
204
|
|
202
|
-
# Store check result data. This method stores
|
203
|
-
# check result statuses for a client/check
|
204
|
-
# is used for event context and flap detection.
|
205
|
-
# execution timestamp is also stored, to provide an
|
206
|
-
# of how recent the data is.
|
205
|
+
# Store check result data. This method stores check result data
|
206
|
+
# and the 21 most recent check result statuses for a client/check
|
207
|
+
# pair, this history is used for event context and flap detection.
|
208
|
+
# The check execution timestamp is also stored, to provide an
|
209
|
+
# indication of how recent the data is.
|
207
210
|
#
|
208
211
|
# @param client [Hash]
|
209
212
|
# @param check [Hash]
|
210
213
|
# @param callback [Proc] to call when the check result data has
|
211
214
|
# been stored (history, etc).
|
212
215
|
def store_check_result(client, check, &callback)
|
213
|
-
@
|
216
|
+
@logger.debug("storing check result", :check => check)
|
217
|
+
@redis.sadd("result:#{client[:name]}", check[:name])
|
214
218
|
result_key = "#{client[:name]}:#{check[:name]}"
|
215
|
-
|
216
|
-
@redis.
|
217
|
-
|
218
|
-
@redis.
|
219
|
-
|
219
|
+
check_truncated = check.merge(:output => check[:output][0..256])
|
220
|
+
@redis.set("result:#{result_key}", MultiJson.dump(check_truncated)) do
|
221
|
+
history_key = "history:#{result_key}"
|
222
|
+
@redis.rpush(history_key, check[:status]) do
|
223
|
+
@redis.ltrim(history_key, -21, -1)
|
224
|
+
callback.call
|
225
|
+
end
|
220
226
|
end
|
221
227
|
end
|
222
228
|
|
223
229
|
# Fetch the execution history for a client/check pair, the 21
|
224
230
|
# most recent check result statuses. This method also calculates
|
225
231
|
# the total state change percentage for the history, this value
|
226
|
-
# is use for check state
|
232
|
+
# is use for check state flap detection, using a similar
|
227
233
|
# algorithm to Nagios:
|
228
234
|
# http://nagios.sourceforge.net/docs/3_0/flapping.html
|
229
235
|
#
|
@@ -332,39 +338,73 @@ module Sensu
|
|
332
338
|
end
|
333
339
|
end
|
334
340
|
|
341
|
+
# Create a blank client (data) and add it to the client
|
342
|
+
# registry. Only the client name is known, the other client
|
343
|
+
# attributes must be updated via the API (POST /clients:client).
|
344
|
+
# Dynamically created clients and those updated via the API will
|
345
|
+
# have client keepalives disabled, `:keepalives` is set to
|
346
|
+
# `false`.
|
347
|
+
#
|
348
|
+
# @param name [Hash] to use for the client.
|
349
|
+
# @param callback [Proc] to be called with the dynamically
|
350
|
+
# created client data.
|
351
|
+
def create_client(name, &callback)
|
352
|
+
client = {
|
353
|
+
:name => name,
|
354
|
+
:address => "unknown",
|
355
|
+
:subscriptions => [],
|
356
|
+
:keepalives => false
|
357
|
+
}
|
358
|
+
update_client_registry(client, &callback)
|
359
|
+
end
|
360
|
+
|
361
|
+
# Retrieve a client (data) from Redis if it exists. If a client
|
362
|
+
# does not already exist, create one (a blank) using the
|
363
|
+
# `client_key` as the client name. Dynamically create client
|
364
|
+
# data can be updated using the API (POST /clients/:client).
|
365
|
+
#
|
366
|
+
# @param result [Hash] data.
|
367
|
+
# @param callback [Proc] to be called with client data, either
|
368
|
+
# retrieved from Redis, or dynamically created.
|
369
|
+
def retrieve_client(result, &callback)
|
370
|
+
client_key = result[:check][:source] || result[:client]
|
371
|
+
@redis.get("client:#{client_key}") do |client_json|
|
372
|
+
unless client_json.nil?
|
373
|
+
client = MultiJson.load(client_json)
|
374
|
+
callback.call(client)
|
375
|
+
else
|
376
|
+
create_client(client_key, &callback)
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
335
381
|
# Process a check result, storing its data, inspecting its
|
336
382
|
# contents, and taking the appropriate actions (eg. update the
|
337
383
|
# event registry). A check result must have a valid client name,
|
338
|
-
# associated with a client in the registry
|
339
|
-
#
|
340
|
-
#
|
341
|
-
#
|
342
|
-
# merged with the check result for more context.
|
384
|
+
# associated with a client in the registry or one will be
|
385
|
+
# created. If a local check definition exists for the check
|
386
|
+
# name, and the check result is not from a standalone check
|
387
|
+
# execution, it's merged with the check result for more context.
|
343
388
|
#
|
344
389
|
# @param result [Hash] data.
|
345
390
|
def process_check_result(result)
|
346
391
|
@logger.debug("processing result", :result => result)
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
check
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
update_event_registry(client, check) do |event|
|
362
|
-
process_event(event)
|
363
|
-
end
|
392
|
+
retrieve_client(result) do |client|
|
393
|
+
check = case
|
394
|
+
when @settings.check_exists?(result[:check][:name]) && !result[:check][:standalone]
|
395
|
+
@settings[:checks][result[:check][:name]].merge(result[:check])
|
396
|
+
else
|
397
|
+
result[:check]
|
398
|
+
end
|
399
|
+
aggregate_check_result(client, check) if check[:aggregate]
|
400
|
+
store_check_result(client, check) do
|
401
|
+
check_history(client, check) do |history, total_state_change|
|
402
|
+
check[:history] = history
|
403
|
+
check[:total_state_change] = total_state_change
|
404
|
+
update_event_registry(client, check) do |event|
|
405
|
+
process_event(event)
|
364
406
|
end
|
365
407
|
end
|
366
|
-
else
|
367
|
-
@logger.warn("client not in registry", :client => result[:client])
|
368
408
|
end
|
369
409
|
end
|
370
410
|
end
|
@@ -441,8 +481,8 @@ module Sensu
|
|
441
481
|
|
442
482
|
# Schedule check executions, using EventMachine periodic timers,
|
443
483
|
# using a calculated execution splay. The timers are stored in
|
444
|
-
# the timers hash under `:
|
445
|
-
# is a task for only the Sensu server
|
484
|
+
# the timers hash under `:leader`, as check request publishing
|
485
|
+
# is a task for only the Sensu server leader, so they can be
|
446
486
|
# cancelled etc. Check requests are not published if subdued.
|
447
487
|
#
|
448
488
|
# @param checks [Array] of definitions.
|
@@ -457,9 +497,9 @@ module Sensu
|
|
457
497
|
end
|
458
498
|
execution_splay = testing? ? 0 : calculate_check_execution_splay(check)
|
459
499
|
interval = testing? ? 0.5 : check[:interval]
|
460
|
-
@timers[:
|
500
|
+
@timers[:leader] << EM::Timer.new(execution_splay) do
|
461
501
|
create_check_request.call
|
462
|
-
@timers[:
|
502
|
+
@timers[:leader] << EM::PeriodicTimer.new(interval, &create_check_request)
|
463
503
|
end
|
464
504
|
end
|
465
505
|
end
|
@@ -549,6 +589,7 @@ module Sensu
|
|
549
589
|
@redis.get("client:#{client_name}") do |client_json|
|
550
590
|
unless client_json.nil?
|
551
591
|
client = MultiJson.load(client_json)
|
592
|
+
next if client[:keepalives] == false
|
552
593
|
check = create_keepalive_check(client)
|
553
594
|
time_since_last_keepalive = Time.now.to_i - client[:timestamp]
|
554
595
|
check[:output] = "No keepalive sent from client for "
|
@@ -574,10 +615,10 @@ module Sensu
|
|
574
615
|
|
575
616
|
# Set up the client monitor, a periodic timer to run
|
576
617
|
# `determine_stale_clients()` every 30 seconds. The timer is
|
577
|
-
# stored in the timers hash under `:
|
618
|
+
# stored in the timers hash under `:leader`.
|
578
619
|
def setup_client_monitor
|
579
620
|
@logger.debug("monitoring client keepalives")
|
580
|
-
@timers[:
|
621
|
+
@timers[:leader] << EM::PeriodicTimer.new(30) do
|
581
622
|
determine_stale_clients
|
582
623
|
end
|
583
624
|
end
|
@@ -616,52 +657,53 @@ module Sensu
|
|
616
657
|
# Set up the check result aggregation pruner, using periodic
|
617
658
|
# timer to run `prune_check_result_aggregations()` every 20
|
618
659
|
# seconds. The timer is stored in the timers hash under
|
619
|
-
# `:
|
660
|
+
# `:leader`.
|
620
661
|
def setup_check_result_aggregation_pruner
|
621
662
|
@logger.debug("pruning check result aggregations")
|
622
|
-
@timers[:
|
663
|
+
@timers[:leader] << EM::PeriodicTimer.new(20) do
|
623
664
|
prune_check_result_aggregations
|
624
665
|
end
|
625
666
|
end
|
626
667
|
|
627
|
-
# Set up the
|
668
|
+
# Set up the leader duties, tasks only performed by a single
|
628
669
|
# Sensu server at a time. The duties include publishing check
|
629
670
|
# requests, monitoring for stale clients, and pruning check
|
630
671
|
# result aggregations.
|
631
|
-
def
|
672
|
+
def leader_duties
|
632
673
|
setup_check_request_publisher
|
633
674
|
setup_client_monitor
|
634
675
|
setup_check_result_aggregation_pruner
|
635
676
|
end
|
636
677
|
|
637
|
-
# Request a
|
638
|
-
# current process is the
|
678
|
+
# Request a leader election, a process to determine if the
|
679
|
+
# current process is the leader Sensu server, with its
|
639
680
|
# own/unique duties. A Redis key/value is used as a central
|
640
681
|
# lock, using the "SETNX" Redis command to set the key/value if
|
641
682
|
# it does not exist, using a timestamp for the value. If the
|
642
683
|
# current process was able to create the key/value, it is the
|
643
|
-
#
|
684
|
+
# leader, and must do the duties of the leader. If the current
|
644
685
|
# process was not able to create the key/value, but the current
|
645
686
|
# timestamp value is equal to or over 30 seconds ago, the
|
646
687
|
# "GETSET" Redis command is used to set a new timestamp and
|
647
688
|
# fetch the previous value to compare them, to determine if it
|
648
689
|
# was set by the current process. If the current process is able
|
649
|
-
# to set the timestamp value, it becomes the
|
650
|
-
# has `@
|
651
|
-
def
|
652
|
-
@redis.setnx("lock:
|
690
|
+
# to set the timestamp value, it becomes the leader. The leader
|
691
|
+
# has `@is_leader` set to `true`.
|
692
|
+
def request_leader_election
|
693
|
+
@redis.setnx("lock:leader", Time.now.to_i) do |created|
|
653
694
|
if created
|
654
|
-
@
|
655
|
-
@logger.info("i am the
|
656
|
-
|
695
|
+
@is_leader = true
|
696
|
+
@logger.info("i am the leader")
|
697
|
+
leader_duties
|
657
698
|
else
|
658
|
-
@redis.get("lock:
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
@
|
664
|
-
|
699
|
+
@redis.get("lock:leader") do |current_timestamp|
|
700
|
+
lock_timestamp = (Time.now.to_f * 1000).to_i
|
701
|
+
if lock_timestamp - current_timestamp.to_i >= 30000
|
702
|
+
@redis.getset("lock:leader", lock_timestamp) do |previous_timestamp|
|
703
|
+
if previous_timestamp == current_timestamp
|
704
|
+
@is_leader = true
|
705
|
+
@logger.info("i am now the leader")
|
706
|
+
leader_duties
|
665
707
|
end
|
666
708
|
end
|
667
709
|
end
|
@@ -670,41 +712,42 @@ module Sensu
|
|
670
712
|
end
|
671
713
|
end
|
672
714
|
|
673
|
-
# Set up the
|
674
|
-
# `
|
675
|
-
# used to update the
|
676
|
-
# process is the
|
715
|
+
# Set up the leader monitor. A one-time timer is used to run
|
716
|
+
# `request_leader_exection()` in 2 seconds. A periodic timer is
|
717
|
+
# used to update the leader lock timestamp if the current
|
718
|
+
# process is the leader, or to run `request_leader_election(),
|
677
719
|
# every 10 seconds. The timers are stored in the timers hash
|
678
720
|
# under `:run`.
|
679
|
-
def
|
721
|
+
def setup_leader_monitor
|
680
722
|
@timers[:run] << EM::Timer.new(2) do
|
681
|
-
|
723
|
+
request_leader_election
|
682
724
|
end
|
683
725
|
@timers[:run] << EM::PeriodicTimer.new(10) do
|
684
|
-
if @
|
685
|
-
|
686
|
-
|
726
|
+
if @is_leader
|
727
|
+
lock_timestamp = (Time.now.to_f * 1000).to_i
|
728
|
+
@redis.set("lock:leader", lock_timestamp) do
|
729
|
+
@logger.debug("updated leader lock timestamp")
|
687
730
|
end
|
688
731
|
else
|
689
|
-
|
732
|
+
request_leader_election
|
690
733
|
end
|
691
734
|
end
|
692
735
|
end
|
693
736
|
|
694
|
-
# Resign as
|
695
|
-
#
|
737
|
+
# Resign as leader, if the current process is the Sensu server
|
738
|
+
# leader. This method cancels and clears the leader timers,
|
696
739
|
# those with references stored in the timers hash under
|
697
|
-
# `:
|
698
|
-
def
|
699
|
-
if @
|
700
|
-
@logger.warn("resigning as
|
701
|
-
@timers[:
|
740
|
+
# `:leader`, and `@is_leader`is set to `false`.
|
741
|
+
def resign_as_leader
|
742
|
+
if @is_leader
|
743
|
+
@logger.warn("resigning as leader")
|
744
|
+
@timers[:leader].each do |timer|
|
702
745
|
timer.cancel
|
703
746
|
end
|
704
|
-
@timers[:
|
705
|
-
@
|
747
|
+
@timers[:leader].clear
|
748
|
+
@is_leader = false
|
706
749
|
else
|
707
|
-
@logger.debug("not currently
|
750
|
+
@logger.debug("not currently leader")
|
708
751
|
end
|
709
752
|
end
|
710
753
|
|
@@ -736,13 +779,13 @@ module Sensu
|
|
736
779
|
end
|
737
780
|
|
738
781
|
# Bootstrap the Sensu server process, setting up the keepalive
|
739
|
-
# and check result consumers, and attemping to become the
|
782
|
+
# and check result consumers, and attemping to become the leader
|
740
783
|
# to carry out its duties. This method sets the process/daemon
|
741
784
|
# `@state` to `:running`.
|
742
785
|
def bootstrap
|
743
786
|
setup_keepalives
|
744
787
|
setup_results
|
745
|
-
|
788
|
+
setup_leader_monitor
|
746
789
|
@state = :running
|
747
790
|
end
|
748
791
|
|
@@ -759,7 +802,7 @@ module Sensu
|
|
759
802
|
# set to `:pausing`, to indicate that it's in progress. All run
|
760
803
|
# timers are cancelled, and the references are cleared. The
|
761
804
|
# Sensu server will unsubscribe from all transport
|
762
|
-
# subscriptions, resign as
|
805
|
+
# subscriptions, resign as leader (if currently the leader),
|
763
806
|
# then set the process/daemon `@state` to `:paused`.
|
764
807
|
def pause
|
765
808
|
unless @state == :pausing || @state == :paused
|
@@ -769,7 +812,7 @@ module Sensu
|
|
769
812
|
end
|
770
813
|
@timers[:run].clear
|
771
814
|
unsubscribe
|
772
|
-
|
815
|
+
resign_as_leader
|
773
816
|
@state = :paused
|
774
817
|
end
|
775
818
|
end
|
data/sensu.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.add_dependency "eventmachine", "1.0.3"
|
20
20
|
s.add_dependency "sensu-em", "2.4.1"
|
21
21
|
s.add_dependency "sensu-logger", "1.0.0"
|
22
|
-
s.add_dependency "sensu-settings", "1.
|
22
|
+
s.add_dependency "sensu-settings", "1.7.0"
|
23
23
|
s.add_dependency "sensu-extension", "1.1.2"
|
24
24
|
s.add_dependency "sensu-extensions", "1.2.0"
|
25
25
|
s.add_dependency "sensu-transport", "2.4.0"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sensu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0.beta.1
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sean Porter
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-04-
|
12
|
+
date: 2015-04-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: multi_json
|
@@ -87,12 +87,12 @@ dependencies:
|
|
87
87
|
requirements:
|
88
88
|
- - '='
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 1.
|
90
|
+
version: 1.7.0
|
91
91
|
requirement: !ruby/object:Gem::Requirement
|
92
92
|
requirements:
|
93
93
|
- - '='
|
94
94
|
- !ruby/object:Gem::Version
|
95
|
-
version: 1.
|
95
|
+
version: 1.7.0
|
96
96
|
prerelease: false
|
97
97
|
type: :runtime
|
98
98
|
- !ruby/object:Gem::Dependency
|
@@ -283,9 +283,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
283
283
|
version: '0'
|
284
284
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
285
285
|
requirements:
|
286
|
-
- - '
|
286
|
+
- - '>'
|
287
287
|
- !ruby/object:Gem::Version
|
288
|
-
version:
|
288
|
+
version: 1.3.1
|
289
289
|
requirements: []
|
290
290
|
rubyforge_project:
|
291
291
|
rubygems_version: 2.1.9
|