sensu 0.23.3-java → 0.24.0.beta-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +58 -0
- data/lib/sensu/api/process.rb +184 -90
- data/lib/sensu/api/validators.rb +37 -0
- data/lib/sensu/cli.rb +3 -0
- data/lib/sensu/client/process.rb +34 -6
- data/lib/sensu/constants.rb +1 -1
- data/lib/sensu/daemon.rb +67 -30
- data/lib/sensu/server/filter.rb +2 -2
- data/lib/sensu/server/handle.rb +12 -12
- data/lib/sensu/server/mutate.rb +4 -4
- data/lib/sensu/server/process.rb +218 -213
- data/sensu.gemspec +5 -5
- metadata +13 -12
@@ -0,0 +1,37 @@
|
|
1
|
+
require "sensu/settings/rules"
|
2
|
+
require "sensu/settings/validators/client"
|
3
|
+
|
4
|
+
module Sensu
|
5
|
+
module API
|
6
|
+
module Validators
|
7
|
+
# The error class for validation.
|
8
|
+
class Invalid < RuntimeError; end
|
9
|
+
|
10
|
+
class Client
|
11
|
+
# Include Sensu Settings rules and client validator.
|
12
|
+
include Sensu::Settings::Rules
|
13
|
+
include Sensu::Settings::Validators::Client
|
14
|
+
|
15
|
+
# Determine if a client definition is valid.
|
16
|
+
#
|
17
|
+
# @param client [Hash]
|
18
|
+
# @return [TrueClass, FalseClass]
|
19
|
+
def valid?(client)
|
20
|
+
validate_client(client)
|
21
|
+
true
|
22
|
+
rescue Invalid
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# This method is called when `validate_client()` encounters an
|
29
|
+
# invalid definition object. This method raises an exception
|
30
|
+
# to be caught by `valid?()`.
|
31
|
+
def invalid(*arguments)
|
32
|
+
raise Invalid
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/sensu/cli.rb
CHANGED
@@ -26,6 +26,9 @@ module Sensu
|
|
26
26
|
opts.on("-d", "--config_dir DIR[,DIR]", "DIR or comma-delimited DIR list for Sensu JSON config files") do |dir|
|
27
27
|
options[:config_dirs] = dir.split(",")
|
28
28
|
end
|
29
|
+
opts.on("--validate_config", "Validate the compiled configuration and exit") do
|
30
|
+
options[:validate_config] = true
|
31
|
+
end
|
29
32
|
opts.on("-P", "--print_config", "Print the compiled configuration and exit") do
|
30
33
|
options[:print_config] = true
|
31
34
|
end
|
data/lib/sensu/client/process.rb
CHANGED
@@ -99,12 +99,40 @@ module Sensu
|
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
|
+
# Perform token substitution for an object. String values are
|
103
|
+
# passed to `substitute_tokens()`, arrays and sub-hashes are
|
104
|
+
# processed recursively. Numeric values are ignored.
|
105
|
+
#
|
106
|
+
# @param object [Object]
|
107
|
+
# @return [Array] containing the updated object with substituted
|
108
|
+
# values and an array of unmatched tokens.
|
109
|
+
def object_substitute_tokens(object)
|
110
|
+
unmatched_tokens = []
|
111
|
+
case object
|
112
|
+
when Hash
|
113
|
+
object.each do |key, value|
|
114
|
+
object[key], unmatched = object_substitute_tokens(value)
|
115
|
+
unmatched_tokens.push(*unmatched)
|
116
|
+
end
|
117
|
+
when Array
|
118
|
+
object.map! do |value|
|
119
|
+
value, unmatched = object_substitute_tokens(value)
|
120
|
+
unmatched_tokens.push(*unmatched)
|
121
|
+
value
|
122
|
+
end
|
123
|
+
when String
|
124
|
+
object, unmatched_tokens = substitute_tokens(object, @settings[:client])
|
125
|
+
end
|
126
|
+
[object, unmatched_tokens.uniq]
|
127
|
+
end
|
128
|
+
|
102
129
|
# Execute a check command, capturing its output (STDOUT/ERR),
|
103
130
|
# exit status code, execution duration, timestamp, and publish
|
104
131
|
# the result. This method guards against multiple executions for
|
105
|
-
# the same check. Check
|
106
|
-
# associated client attribute values
|
107
|
-
#
|
132
|
+
# the same check. Check attribute value tokens are substituted
|
133
|
+
# with the associated client attribute values, via
|
134
|
+
# `object_substitute_tokens()`. If there are unmatched check
|
135
|
+
# attribute value tokens, the check will not be executed,
|
108
136
|
# instead a check result will be published reporting the
|
109
137
|
# unmatched tokens.
|
110
138
|
#
|
@@ -113,11 +141,11 @@ module Sensu
|
|
113
141
|
@logger.debug("attempting to execute check command", :check => check)
|
114
142
|
unless @checks_in_progress.include?(check[:name])
|
115
143
|
@checks_in_progress << check[:name]
|
116
|
-
|
144
|
+
check, unmatched_tokens = object_substitute_tokens(check)
|
117
145
|
if unmatched_tokens.empty?
|
118
|
-
check[:executed] = Time.now.to_i
|
119
146
|
started = Time.now.to_f
|
120
|
-
|
147
|
+
check[:executed] = started.to_i
|
148
|
+
Spawn.process(check[:command], :timeout => check[:timeout]) do |output, status|
|
121
149
|
check[:duration] = ("%.3f" % (Time.now.to_f - started)).to_f
|
122
150
|
check[:output] = output
|
123
151
|
check[:status] = status
|
data/lib/sensu/constants.rb
CHANGED
data/lib/sensu/daemon.rb
CHANGED
@@ -2,14 +2,14 @@ require "rubygems"
|
|
2
2
|
|
3
3
|
gem "eventmachine", "1.2.0.1"
|
4
4
|
|
5
|
-
gem "sensu-json", "
|
5
|
+
gem "sensu-json", "2.0.0"
|
6
6
|
gem "sensu-logger", "1.2.0"
|
7
|
-
gem "sensu-settings", "
|
7
|
+
gem "sensu-settings", "5.1.0"
|
8
8
|
gem "sensu-extension", "1.5.0"
|
9
9
|
gem "sensu-extensions", "1.5.0"
|
10
|
-
gem "sensu-transport", "
|
10
|
+
gem "sensu-transport", "6.0.0"
|
11
11
|
gem "sensu-spawn", "2.2.0"
|
12
|
-
gem "sensu-redis", "1.
|
12
|
+
gem "sensu-redis", "1.4.0"
|
13
13
|
|
14
14
|
require "time"
|
15
15
|
require "uri"
|
@@ -49,6 +49,7 @@ module Sensu
|
|
49
49
|
setup_logger(options)
|
50
50
|
load_settings(options)
|
51
51
|
load_extensions(options)
|
52
|
+
setup_spawn
|
52
53
|
setup_process(options)
|
53
54
|
end
|
54
55
|
|
@@ -64,56 +65,78 @@ module Sensu
|
|
64
65
|
@logger.setup_signal_traps
|
65
66
|
end
|
66
67
|
|
67
|
-
# Log setting or extension loading
|
68
|
+
# Log setting or extension loading notices, sensitive information
|
68
69
|
# is redacted.
|
69
70
|
#
|
70
|
-
# @param
|
71
|
-
# @param level [Symbol] to log the
|
72
|
-
def
|
73
|
-
|
71
|
+
# @param notices [Array] to be logged.
|
72
|
+
# @param level [Symbol] to log the notices at.
|
73
|
+
def log_notices(notices=[], level=:warn)
|
74
|
+
notices.each do |concern|
|
74
75
|
message = concern.delete(:message)
|
75
76
|
@logger.send(level, message, redact_sensitive(concern))
|
76
77
|
end
|
77
78
|
end
|
78
79
|
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#
|
82
|
-
#
|
83
|
-
#
|
80
|
+
# Determine if the Sensu settings are valid, if there are load or
|
81
|
+
# validation errors, and immediately exit the process with the
|
82
|
+
# appropriate exit status code. This method is used to determine
|
83
|
+
# if the latest configuration changes are valid prior to
|
84
|
+
# restarting the Sensu service, triggered by a CLI argument, e.g.
|
85
|
+
# `--validate_config`.
|
84
86
|
#
|
85
87
|
# @param settings [Object]
|
86
|
-
def
|
88
|
+
def validate_settings!(settings)
|
89
|
+
if settings.errors.empty?
|
90
|
+
puts "configuration is valid"
|
91
|
+
exit
|
92
|
+
else
|
93
|
+
puts "configuration is invalid"
|
94
|
+
puts Sensu::JSON.dump({:errors => @settings.errors}, :pretty => true)
|
95
|
+
exit 2
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Print the Sensu settings (JSON) to STDOUT and immediately exit
|
100
|
+
# the process with the appropriate exit status code. This method
|
101
|
+
# is used while troubleshooting configuration issues, triggered by
|
102
|
+
# a CLI argument, e.g. `--print_config`. Sensu settings with
|
103
|
+
# sensitive values (e.g. passwords) are first redacted.
|
104
|
+
#
|
105
|
+
# @param settings [Object]
|
106
|
+
def print_settings!(settings)
|
87
107
|
redacted_settings = redact_sensitive(settings.to_hash)
|
88
108
|
@logger.warn("outputting compiled configuration and exiting")
|
89
109
|
puts Sensu::JSON.dump(redacted_settings, :pretty => true)
|
90
|
-
exit
|
110
|
+
exit(settings.errors.empty? ? 0 : 2)
|
91
111
|
end
|
92
112
|
|
93
|
-
# Load Sensu settings
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
#
|
98
|
-
#
|
113
|
+
# Load Sensu settings. This method creates the settings instance
|
114
|
+
# variable: `@settings`. If the `validate_config` option is true,
|
115
|
+
# this method calls `validate_settings!()` to validate the latest
|
116
|
+
# compiled configuration settings and will then exit the process.
|
117
|
+
# If the `print_config` option is true, this method calls
|
118
|
+
# `print_settings!()` to output the compiled configuration
|
119
|
+
# settings and will then exit the process. If there are loading or
|
120
|
+
# validation errors, they will be logged (notices), and this
|
121
|
+
# method will exit(2) the process.
|
122
|
+
#
|
99
123
|
#
|
100
124
|
# https://github.com/sensu/sensu-settings
|
101
125
|
#
|
102
126
|
# @param options [Hash]
|
103
127
|
def load_settings(options={})
|
104
128
|
@settings = Settings.get(options)
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
129
|
+
validate_settings!(@settings) if options[:validate_config]
|
130
|
+
log_notices(@settings.warnings)
|
131
|
+
log_notices(@settings.errors, :fatal)
|
132
|
+
print_settings!(@settings) if options[:print_config]
|
133
|
+
unless @settings.errors.empty?
|
110
134
|
@logger.fatal("SENSU NOT RUNNING!")
|
111
135
|
exit 2
|
112
136
|
end
|
113
|
-
print_settings(@settings) if options[:print_config]
|
114
137
|
end
|
115
138
|
|
116
|
-
# Load Sensu extensions and log any
|
139
|
+
# Load Sensu extensions and log any notices. Set the logger and
|
117
140
|
# settings for each extension instance. This method creates the
|
118
141
|
# extensions instance variable: `@extensions`.
|
119
142
|
#
|
@@ -123,7 +146,7 @@ module Sensu
|
|
123
146
|
# @param options [Hash]
|
124
147
|
def load_extensions(options={})
|
125
148
|
@extensions = Extensions.get(options)
|
126
|
-
|
149
|
+
log_notices(@extensions.warnings)
|
127
150
|
extension_settings = @settings.to_hash.dup
|
128
151
|
@extensions.all.each do |extension|
|
129
152
|
extension.logger = @logger
|
@@ -131,6 +154,20 @@ module Sensu
|
|
131
154
|
end
|
132
155
|
end
|
133
156
|
|
157
|
+
# Set up Sensu spawn, creating a worker to create, control, and
|
158
|
+
# limit spawned child processes. This method adjusts the
|
159
|
+
# EventMachine thread pool size to accommodate the concurrent
|
160
|
+
# process spawn limit and other Sensu process operations.
|
161
|
+
#
|
162
|
+
# https://github.com/sensu/sensu-spawn
|
163
|
+
def setup_spawn
|
164
|
+
@logger.info("configuring sensu spawn", :settings => @settings[:sensu][:spawn])
|
165
|
+
threadpool_size = @settings[:sensu][:spawn][:limit] + 10
|
166
|
+
@logger.debug("setting eventmachine threadpool size", :size => threadpool_size)
|
167
|
+
EM.threadpool_size = threadpool_size
|
168
|
+
Spawn.setup(@settings[:sensu][:spawn])
|
169
|
+
end
|
170
|
+
|
134
171
|
# Manage the current process, optionally daemonize and/or write
|
135
172
|
# the current process ID to a PID file.
|
136
173
|
#
|
data/lib/sensu/server/filter.rb
CHANGED
@@ -345,14 +345,14 @@ module Sensu
|
|
345
345
|
end
|
346
346
|
if filter_message
|
347
347
|
@logger.info(filter_message, details)
|
348
|
-
@
|
348
|
+
@in_progress[:events] -= 1 if @in_progress
|
349
349
|
else
|
350
350
|
event_filtered?(handler, event) do |filtered|
|
351
351
|
unless filtered
|
352
352
|
yield(event)
|
353
353
|
else
|
354
354
|
@logger.info("event was filtered", details)
|
355
|
-
@
|
355
|
+
@in_progress[:events] -= 1 if @in_progress
|
356
356
|
end
|
357
357
|
end
|
358
358
|
end
|
data/lib/sensu/server/handle.rb
CHANGED
@@ -4,7 +4,7 @@ module Sensu
|
|
4
4
|
module Server
|
5
5
|
module Handle
|
6
6
|
# Create a handler error callback, for logging the error and
|
7
|
-
# decrementing the `@
|
7
|
+
# decrementing the `@in_progress[:events]` by `1`.
|
8
8
|
#
|
9
9
|
# @param handler [Object]
|
10
10
|
# @param event_data [Object]
|
@@ -16,14 +16,14 @@ module Sensu
|
|
16
16
|
:event_data => event_data,
|
17
17
|
:error => error.to_s
|
18
18
|
})
|
19
|
-
@
|
19
|
+
@in_progress[:events] -= 1 if @in_progress
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
# Execute a pipe event handler, using the defined handler
|
24
24
|
# command to spawn a process, passing it event data via STDIN.
|
25
25
|
# Log the handler output lines and decrement the
|
26
|
-
# `@
|
26
|
+
# `@in_progress[:events]` by `1` when the handler executes
|
27
27
|
# successfully.
|
28
28
|
#
|
29
29
|
# @param handler [Hash] definition.
|
@@ -36,7 +36,7 @@ module Sensu
|
|
36
36
|
:handler => handler,
|
37
37
|
:output => output.split("\n+")
|
38
38
|
})
|
39
|
-
@
|
39
|
+
@in_progress[:events] -= 1 if @in_progress
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -47,7 +47,7 @@ module Sensu
|
|
47
47
|
# `handler_error()` method is used to create the `on_error`
|
48
48
|
# callback for the connection handler. The `on_error` callback
|
49
49
|
# is call in the event of any error(s). The
|
50
|
-
# `@
|
50
|
+
# `@in_progress[:events]` is decremented by `1` when the data is
|
51
51
|
# transmitted successfully, `on_success`.
|
52
52
|
#
|
53
53
|
# @param handler [Hash] definition.
|
@@ -57,7 +57,7 @@ module Sensu
|
|
57
57
|
begin
|
58
58
|
EM::connect(handler[:socket][:host], handler[:socket][:port], Socket) do |socket|
|
59
59
|
socket.on_success = Proc.new do
|
60
|
-
@
|
60
|
+
@in_progress[:events] -= 1 if @in_progress
|
61
61
|
end
|
62
62
|
socket.on_error = on_error
|
63
63
|
timeout = handler[:timeout] || 10
|
@@ -71,7 +71,7 @@ module Sensu
|
|
71
71
|
end
|
72
72
|
|
73
73
|
# Transmit event data to a UDP socket, then close the
|
74
|
-
# connection. The `@
|
74
|
+
# connection. The `@in_progress[:events]` is decremented by `1`
|
75
75
|
# when the data is assumed to have been transmitted.
|
76
76
|
#
|
77
77
|
# @param handler [Hash] definition.
|
@@ -81,7 +81,7 @@ module Sensu
|
|
81
81
|
EM::open_datagram_socket("0.0.0.0", 0, nil) do |socket|
|
82
82
|
socket.send_datagram(event_data.to_s, handler[:socket][:host], handler[:socket][:port])
|
83
83
|
socket.close_connection_after_writing
|
84
|
-
@
|
84
|
+
@in_progress[:events] -= 1 if @in_progress
|
85
85
|
end
|
86
86
|
rescue => error
|
87
87
|
handler_error(handler, event_data).call(error)
|
@@ -90,7 +90,7 @@ module Sensu
|
|
90
90
|
|
91
91
|
# Publish event data to a Sensu transport pipe. Event data that
|
92
92
|
# is `nil` or empty will not be published, to prevent transport
|
93
|
-
# errors. The `@
|
93
|
+
# errors. The `@in_progress[:events]` is decremented by `1`,
|
94
94
|
# even if the event data is not published.
|
95
95
|
#
|
96
96
|
# @param handler [Hash] definition.
|
@@ -105,14 +105,14 @@ module Sensu
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
end
|
108
|
-
@
|
108
|
+
@in_progress[:events] -= 1 if @in_progress
|
109
109
|
end
|
110
110
|
|
111
111
|
# Run a handler extension, within the Sensu EventMachine reactor
|
112
112
|
# (event loop). The extension API `safe_run()` method is used to
|
113
113
|
# guard against most errors. The `safe_run()` callback is always
|
114
114
|
# called, logging the extension run output and status, and
|
115
|
-
# decrementing the `@
|
115
|
+
# decrementing the `@in_progress[:events]` by `1`.
|
116
116
|
#
|
117
117
|
# @param handler [Hash] definition.
|
118
118
|
# @param event_data [Object] to pass to the handler extension.
|
@@ -123,7 +123,7 @@ module Sensu
|
|
123
123
|
:output => output,
|
124
124
|
:status => status
|
125
125
|
})
|
126
|
-
@
|
126
|
+
@in_progress[:events] -= 1 if @in_progress
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
data/lib/sensu/server/mutate.rb
CHANGED
@@ -6,7 +6,7 @@ module Sensu
|
|
6
6
|
# created callback can be used for standard mutators and mutator
|
7
7
|
# extensions. The provided callback will only be called when the
|
8
8
|
# mutator status is `0` (OK). If the status is not `0`, an error
|
9
|
-
# is logged, and the `@
|
9
|
+
# is logged, and the `@in_progress[:events]` is decremented by
|
10
10
|
# `1`.
|
11
11
|
#
|
12
12
|
# @param mutator [Object] definition or extension.
|
@@ -25,7 +25,7 @@ module Sensu
|
|
25
25
|
:output => output,
|
26
26
|
:status => status
|
27
27
|
})
|
28
|
-
@
|
28
|
+
@in_progress[:events] -= 1 if @in_progress
|
29
29
|
end
|
30
30
|
end
|
31
31
|
end
|
@@ -63,7 +63,7 @@ module Sensu
|
|
63
63
|
# mutator is used, unless the handler specifies another mutator.
|
64
64
|
# If a mutator does not exist, not defined or a missing
|
65
65
|
# extension, an error will be logged and the
|
66
|
-
# `@
|
66
|
+
# `@in_progress[:events]` is decremented by `1`. This method
|
67
67
|
# first checks for the existence of a standard mutator, then
|
68
68
|
# checks for an extension if a standard mutator is not defined.
|
69
69
|
#
|
@@ -84,7 +84,7 @@ module Sensu
|
|
84
84
|
@logger.error("unknown mutator", {
|
85
85
|
:mutator_name => mutator_name
|
86
86
|
})
|
87
|
-
@
|
87
|
+
@in_progress[:events] -= 1 if @in_progress
|
88
88
|
end
|
89
89
|
end
|
90
90
|
end
|
data/lib/sensu/server/process.rb
CHANGED
@@ -11,7 +11,9 @@ module Sensu
|
|
11
11
|
include Mutate
|
12
12
|
include Handle
|
13
13
|
|
14
|
-
attr_reader :is_leader, :
|
14
|
+
attr_reader :is_leader, :in_progress
|
15
|
+
|
16
|
+
STANDARD_CHECK_TYPE = "standard".freeze
|
15
17
|
|
16
18
|
METRIC_CHECK_TYPE = "metric".freeze
|
17
19
|
|
@@ -40,17 +42,23 @@ module Sensu
|
|
40
42
|
super
|
41
43
|
@is_leader = false
|
42
44
|
@timers[:leader] = Array.new
|
43
|
-
@
|
45
|
+
@in_progress = Hash.new(0)
|
44
46
|
end
|
45
47
|
|
46
48
|
# Set up the Redis and Transport connection objects, `@redis`
|
47
|
-
# and `@transport`. This method
|
48
|
-
#
|
49
|
+
# and `@transport`. This method updates the Redis on error
|
50
|
+
# callback to reset the in progress check result counter. This
|
51
|
+
# method "drys" up many instances of `setup_redis()` and
|
52
|
+
# `setup_transport()`, particularly in the specs.
|
49
53
|
#
|
50
54
|
# @yield callback/block called after connecting to Redis and the
|
51
55
|
# Sensu Transport.
|
52
56
|
def setup_connections
|
53
57
|
setup_redis do
|
58
|
+
@redis.on_error do |error|
|
59
|
+
@logger.error("redis connection error", :error => error.to_s)
|
60
|
+
@in_progress[:check_results] = 0
|
61
|
+
end
|
54
62
|
setup_transport do
|
55
63
|
yield
|
56
64
|
end
|
@@ -90,15 +98,11 @@ module Sensu
|
|
90
98
|
#
|
91
99
|
# @param client [Hash] definition.
|
92
100
|
def create_client_registration_event(client)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
:action => :create,
|
99
|
-
:timestamp => Time.now.to_i
|
100
|
-
}
|
101
|
-
process_event(event)
|
101
|
+
check = create_registration_check(client)
|
102
|
+
create_event(client, check) do |event|
|
103
|
+
event_bridges(event)
|
104
|
+
process_event(event)
|
105
|
+
end
|
102
106
|
end
|
103
107
|
|
104
108
|
# Process an initial client registration, when it is first added
|
@@ -235,8 +239,8 @@ module Sensu
|
|
235
239
|
#
|
236
240
|
# This method determines the appropriate handlers for an event,
|
237
241
|
# filtering and mutating the event data for each of them. The
|
238
|
-
# `@
|
239
|
-
# handler chain (filter -> mutate -> handle).
|
242
|
+
# `@in_progress[:events]` counter is incremented by `1`, for
|
243
|
+
# each event handler chain (filter -> mutate -> handle).
|
240
244
|
#
|
241
245
|
# @param event [Hash]
|
242
246
|
def process_event(event)
|
@@ -245,7 +249,7 @@ module Sensu
|
|
245
249
|
handler_list = Array((event[:check][:handlers] || event[:check][:handler]) || DEFAULT_HANDLER_NAME)
|
246
250
|
handlers = derive_handlers(handler_list)
|
247
251
|
handlers.each do |handler|
|
248
|
-
@
|
252
|
+
@in_progress[:events] += 1
|
249
253
|
filter_event(handler, event) do |event|
|
250
254
|
mutate_event(handler, event) do |event_data|
|
251
255
|
handle_event(handler, event_data)
|
@@ -271,35 +275,27 @@ module Sensu
|
|
271
275
|
end
|
272
276
|
end
|
273
277
|
|
274
|
-
# Add a check result to an aggregate.
|
275
|
-
#
|
276
|
-
#
|
277
|
-
#
|
278
|
-
#
|
279
|
-
#
|
280
|
-
#
|
281
|
-
# serialization is used for storing check result data.
|
278
|
+
# Add a check result to an aggregate. The aggregate name is
|
279
|
+
# determined by the value of check `:aggregate`. If check
|
280
|
+
# `:aggregate` is `true` (legacy), the check `:name` is used as
|
281
|
+
# the aggregate name. If check `:aggregate` is a string, it is
|
282
|
+
# used as the aggregate name. This method will add the client
|
283
|
+
# name to the aggregate, all other processing (e.g. counters) is
|
284
|
+
# done by the Sensu API on request.
|
282
285
|
#
|
283
286
|
# @param client [Hash]
|
284
287
|
# @param check [Hash]
|
285
288
|
def aggregate_check_result(client, check)
|
289
|
+
aggregate = (check[:aggregate].is_a?(String) ? check[:aggregate] : check[:name])
|
286
290
|
@logger.debug("adding check result to aggregate", {
|
291
|
+
:aggregate => aggregate,
|
287
292
|
:client => client,
|
288
293
|
:check => check
|
289
294
|
})
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
@redis.hset("aggregation:#{result_set}", client[:name], result_data)
|
294
|
-
SEVERITIES.each do |severity|
|
295
|
-
@redis.hsetnx("aggregate:#{result_set}", severity, 0)
|
295
|
+
aggregate_member = "#{client[:name]}:#{check[:name]}"
|
296
|
+
@redis.sadd("aggregates:#{aggregate}", aggregate_member) do
|
297
|
+
@redis.sadd("aggregates", aggregate)
|
296
298
|
end
|
297
|
-
severity = (SEVERITIES[check[:status]] || "unknown")
|
298
|
-
@redis.hincrby("aggregate:#{result_set}", severity, 1)
|
299
|
-
@redis.hincrby("aggregate:#{result_set}", "total", 1)
|
300
|
-
@redis.sadd("aggregates:#{check[:name]}", check[:issued])
|
301
|
-
@redis.sadd("aggregates", check[:name])
|
302
|
-
@redis.exec
|
303
299
|
end
|
304
300
|
|
305
301
|
# Truncate check output. For metric checks, (`"type":
|
@@ -333,7 +329,7 @@ module Sensu
|
|
333
329
|
# @param client [Hash]
|
334
330
|
# @param check [Hash]
|
335
331
|
# @yield [] callback/block called after the check data has been
|
336
|
-
#
|
332
|
+
# stored (history, etc).
|
337
333
|
def store_check_result(client, check)
|
338
334
|
@logger.debug("storing check result", :check => check)
|
339
335
|
result_key = "#{client[:name]}:#{check[:name]}"
|
@@ -342,6 +338,7 @@ module Sensu
|
|
342
338
|
@redis.multi
|
343
339
|
@redis.sadd("result:#{client[:name]}", check[:name])
|
344
340
|
@redis.set("result:#{result_key}", Sensu::JSON.dump(check_truncated))
|
341
|
+
@redis.sadd("ttl", result_key) if check[:ttl]
|
345
342
|
@redis.rpush(history_key, check[:status])
|
346
343
|
@redis.ltrim(history_key, -21, -1)
|
347
344
|
@redis.exec do
|
@@ -418,83 +415,106 @@ module Sensu
|
|
418
415
|
end
|
419
416
|
|
420
417
|
# Update the event registry, stored in Redis. This method
|
421
|
-
# determines if
|
422
|
-
# event data in the registry.
|
423
|
-
# client/check pair is fetched, used in conditionals and the
|
424
|
-
# composition of the new event data. If a check `:status` is not
|
418
|
+
# determines if event data warrants in the creation or update of
|
419
|
+
# event data in the registry. If a check `:status` is not
|
425
420
|
# `0`, or it has been flapping, an event is created/updated in
|
426
|
-
# the registry. If
|
427
|
-
#
|
428
|
-
#
|
429
|
-
#
|
430
|
-
#
|
431
|
-
#
|
432
|
-
#
|
433
|
-
#
|
434
|
-
#
|
421
|
+
# the registry. If the event `:action` is `:resolve`, the event
|
422
|
+
# is removed (resolved) from the registry. If the previous
|
423
|
+
# conditions are not met and check `:type` is `metric`, the
|
424
|
+
# registry is not updated, but further event processing is
|
425
|
+
# required (`yield(true)`). JSON serialization is used when
|
426
|
+
# storing data in the registry.
|
427
|
+
#
|
428
|
+
# @param event [Hash]
|
429
|
+
# @yield callback [event] callback/block called after the event
|
430
|
+
# registry has been updated.
|
431
|
+
# @yieldparam process [TrueClass, FalseClass] indicating if the
|
432
|
+
# event requires further processing.
|
433
|
+
def update_event_registry(event)
|
434
|
+
client_name = event[:client][:name]
|
435
|
+
if event[:check][:status] != 0 || event[:action] == :flapping
|
436
|
+
@redis.hset("events:#{client_name}", event[:check][:name], Sensu::JSON.dump(event)) do
|
437
|
+
yield(true)
|
438
|
+
end
|
439
|
+
elsif event[:action] == :resolve &&
|
440
|
+
(event[:check][:auto_resolve] != false || event[:check][:force_resolve])
|
441
|
+
@redis.hdel("events:#{client_name}", event[:check][:name]) do
|
442
|
+
yield(true)
|
443
|
+
end
|
444
|
+
elsif event[:check][:type] == METRIC_CHECK_TYPE
|
445
|
+
yield(true)
|
446
|
+
else
|
447
|
+
yield(false)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# Create an event, using the provided client and check result
|
452
|
+
# data. Existing event data for the client/check pair is fetched
|
453
|
+
# from the event registry to be used in the composition of the
|
454
|
+
# new event.
|
435
455
|
#
|
436
456
|
# @param client [Hash]
|
437
457
|
# @param check [Hash]
|
438
458
|
# @yield callback [event] callback/block called with the
|
439
|
-
# resulting event
|
440
|
-
# the check is of type `:metric`.
|
459
|
+
# resulting event.
|
441
460
|
# @yieldparam event [Hash]
|
442
|
-
def
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
:
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
461
|
+
def create_event(client, check)
|
462
|
+
check_history(client, check) do |history, total_state_change|
|
463
|
+
check[:history] = history
|
464
|
+
check[:total_state_change] = total_state_change
|
465
|
+
@redis.hget("events:#{client[:name]}", check[:name]) do |event_json|
|
466
|
+
stored_event = event_json ? Sensu::JSON.load(event_json) : nil
|
467
|
+
flapping = check_flapping?(stored_event, check)
|
468
|
+
event = {
|
469
|
+
:client => client,
|
470
|
+
:check => check,
|
471
|
+
:occurrences => 1,
|
472
|
+
:action => (flapping ? :flapping : :create),
|
473
|
+
:timestamp => Time.now.to_i
|
474
|
+
}
|
475
|
+
if stored_event
|
476
|
+
event[:id] = stored_event[:id]
|
477
|
+
event[:last_state_change] = stored_event[:last_state_change]
|
478
|
+
event[:last_ok] = stored_event[:last_ok]
|
479
|
+
event[:occurrences] = stored_event[:occurrences]
|
480
|
+
else
|
481
|
+
event[:id] = random_uuid
|
460
482
|
end
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
483
|
+
if check[:status] != 0 || flapping
|
484
|
+
if history[-1] == history[-2]
|
485
|
+
event[:occurrences] += 1
|
486
|
+
else
|
487
|
+
event[:occurrences] = 1
|
488
|
+
event[:last_state_change] = event[:timestamp]
|
467
489
|
end
|
490
|
+
elsif stored_event
|
491
|
+
event[:last_state_change] = event[:timestamp]
|
492
|
+
event[:action] = :resolve
|
493
|
+
end
|
494
|
+
if check[:status] == 0
|
495
|
+
event[:last_ok] = event[:timestamp]
|
468
496
|
end
|
469
|
-
elsif check[:type] == METRIC_CHECK_TYPE
|
470
497
|
yield(event)
|
471
498
|
end
|
472
|
-
event_bridges(event)
|
473
499
|
end
|
474
500
|
end
|
475
501
|
|
476
|
-
# Create a blank client (data)
|
477
|
-
#
|
478
|
-
#
|
479
|
-
#
|
480
|
-
#
|
481
|
-
# `false`.
|
502
|
+
# Create a blank client (data). Only the client name is known,
|
503
|
+
# the other client attributes must be updated via the API (POST
|
504
|
+
# /clients:client). Dynamically created clients and those
|
505
|
+
# updated via the API will have client keepalives disabled by
|
506
|
+
# default, `:keepalives` is set to `false`.
|
482
507
|
#
|
483
|
-
# @param name [
|
484
|
-
# @
|
485
|
-
# dynamically created client data.
|
486
|
-
# @yieldparam client [Hash]
|
508
|
+
# @param name [String] to use for the client.
|
509
|
+
# @return [Hash] client.
|
487
510
|
def create_client(name)
|
488
|
-
|
511
|
+
{
|
489
512
|
:name => name,
|
490
513
|
:address => "unknown",
|
491
514
|
:subscriptions => [],
|
492
515
|
:keepalives => false,
|
493
516
|
:version => VERSION
|
494
517
|
}
|
495
|
-
update_client_registry(client) do
|
496
|
-
yield(client)
|
497
|
-
end
|
498
518
|
end
|
499
519
|
|
500
520
|
# Retrieve a client (data) from Redis if it exists. If a client
|
@@ -527,7 +547,8 @@ module Sensu
|
|
527
547
|
yield(client)
|
528
548
|
end
|
529
549
|
else
|
530
|
-
create_client(client_key)
|
550
|
+
client = create_client(client_key)
|
551
|
+
update_client_registry(client) do
|
531
552
|
yield(client)
|
532
553
|
end
|
533
554
|
end
|
@@ -536,14 +557,18 @@ module Sensu
|
|
536
557
|
|
537
558
|
# Process a check result, storing its data, inspecting its
|
538
559
|
# contents, and taking the appropriate actions (eg. update the
|
539
|
-
# event registry).
|
540
|
-
#
|
541
|
-
#
|
542
|
-
#
|
543
|
-
#
|
560
|
+
# event registry). The `@in_progress[:check_results]` counter is
|
561
|
+
# incremented by `1` prior to check result processing and then
|
562
|
+
# decremented by `1` after updating the event registry. A check
|
563
|
+
# result must have a valid client name, associated with a client
|
564
|
+
# in the registry or one will be created. If a local check
|
565
|
+
# definition exists for the check name, and the check result is
|
566
|
+
# not from a standalone check execution, it's merged with the
|
567
|
+
# check result for more context.
|
544
568
|
#
|
545
569
|
# @param result [Hash] data.
|
546
570
|
def process_check_result(result)
|
571
|
+
@in_progress[:check_results] += 1
|
547
572
|
@logger.debug("processing result", :result => result)
|
548
573
|
retrieve_client(result) do |client|
|
549
574
|
check = case
|
@@ -552,13 +577,15 @@ module Sensu
|
|
552
577
|
else
|
553
578
|
result[:check]
|
554
579
|
end
|
580
|
+
check[:type] ||= STANDARD_CHECK_TYPE
|
581
|
+
check[:origin] = result[:client] if check[:source]
|
555
582
|
aggregate_check_result(client, check) if check[:aggregate]
|
556
583
|
store_check_result(client, check) do
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
584
|
+
create_event(client, check) do |event|
|
585
|
+
event_bridges(event)
|
586
|
+
update_event_registry(event) do |process|
|
587
|
+
process_event(event) if process
|
588
|
+
@in_progress[:check_results] -= 1
|
562
589
|
end
|
563
590
|
end
|
564
591
|
end
|
@@ -755,45 +782,72 @@ module Sensu
|
|
755
782
|
check.merge(:name => "keepalive", :issued => timestamp, :executed => timestamp)
|
756
783
|
end
|
757
784
|
|
785
|
+
# Create client keepalive check results. This method will
|
786
|
+
# retrieve clients from the registry, creating a keepalive
|
787
|
+
# check definition for each client, using the
|
788
|
+
# `create_keepalive_check()` method, containing client specific
|
789
|
+
# keepalive thresholds. If the time since the latest keepalive
|
790
|
+
# is equal to or greater than a threshold, the check `:output`
|
791
|
+
# is set to a descriptive message, and `:status` is set to the
|
792
|
+
# appropriate non-zero value. If a client has been sending
|
793
|
+
# keepalives, `:output` and `:status` are set to indicate an OK
|
794
|
+
# state. The `publish_check_result()` method is used to publish
|
795
|
+
# the client keepalive check results.
|
796
|
+
#
|
797
|
+
# @param clients [Array] of client names.
|
798
|
+
# @yield [] callback/block called after the client keepalive
|
799
|
+
# check results have been created.
|
800
|
+
def create_client_keepalive_check_results(clients)
|
801
|
+
client_keys = clients.map { |client_name| "client:#{client_name}" }
|
802
|
+
@redis.mget(*client_keys) do |client_json_objects|
|
803
|
+
client_json_objects.each do |client_json|
|
804
|
+
unless client_json.nil?
|
805
|
+
client = Sensu::JSON.load(client_json)
|
806
|
+
next if client[:keepalives] == false
|
807
|
+
check = create_keepalive_check(client)
|
808
|
+
time_since_last_keepalive = Time.now.to_i - client[:timestamp]
|
809
|
+
check[:output] = "No keepalive sent from client for "
|
810
|
+
check[:output] << "#{time_since_last_keepalive} seconds"
|
811
|
+
case
|
812
|
+
when time_since_last_keepalive >= check[:thresholds][:critical]
|
813
|
+
check[:output] << " (>=#{check[:thresholds][:critical]})"
|
814
|
+
check[:status] = 2
|
815
|
+
when time_since_last_keepalive >= check[:thresholds][:warning]
|
816
|
+
check[:output] << " (>=#{check[:thresholds][:warning]})"
|
817
|
+
check[:status] = 1
|
818
|
+
else
|
819
|
+
check[:output] = "Keepalive sent from client "
|
820
|
+
check[:output] << "#{time_since_last_keepalive} seconds ago"
|
821
|
+
check[:status] = 0
|
822
|
+
end
|
823
|
+
publish_check_result(client[:name], check)
|
824
|
+
end
|
825
|
+
end
|
826
|
+
yield
|
827
|
+
end
|
828
|
+
end
|
829
|
+
|
758
830
|
# Determine stale clients, those that have not sent a keepalive
|
759
|
-
# in a specified amount of time
|
760
|
-
#
|
761
|
-
#
|
762
|
-
#
|
763
|
-
#
|
764
|
-
#
|
765
|
-
#
|
766
|
-
# value. If a client has been sending keepalives, `:output` and
|
767
|
-
# `:status` are set to indicate an OK state. A check result is
|
768
|
-
# published for every client in the registry.
|
831
|
+
# in a specified amount of time. This method iterates through
|
832
|
+
# the client registry, creating a keepalive check result for
|
833
|
+
# each client. The `create_client_keepalive_check_results()`
|
834
|
+
# method is used to inspect and create keepalive check results
|
835
|
+
# for each slice of clients from the registry. A relatively
|
836
|
+
# small clients slice size (20) is used to reduce the number of
|
837
|
+
# clients inspected within a single tick of the EM reactor.
|
769
838
|
def determine_stale_clients
|
770
839
|
@logger.info("determining stale clients")
|
771
840
|
@redis.smembers("clients") do |clients|
|
772
|
-
clients.
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
time_since_last_keepalive = Time.now.to_i - client[:timestamp]
|
779
|
-
check[:output] = "No keepalive sent from client for "
|
780
|
-
check[:output] << "#{time_since_last_keepalive} seconds"
|
781
|
-
case
|
782
|
-
when time_since_last_keepalive >= check[:thresholds][:critical]
|
783
|
-
check[:output] << " (>=#{check[:thresholds][:critical]})"
|
784
|
-
check[:status] = 2
|
785
|
-
when time_since_last_keepalive >= check[:thresholds][:warning]
|
786
|
-
check[:output] << " (>=#{check[:thresholds][:warning]})"
|
787
|
-
check[:status] = 1
|
788
|
-
else
|
789
|
-
check[:output] = "Keepalive sent from client "
|
790
|
-
check[:output] << "#{time_since_last_keepalive} seconds ago"
|
791
|
-
check[:status] = 0
|
792
|
-
end
|
793
|
-
publish_check_result(client[:name], check)
|
841
|
+
client_count = clients.length
|
842
|
+
keepalive_check_results = Proc.new do |slice_start, slice_size|
|
843
|
+
unless slice_start > client_count - 1
|
844
|
+
clients_slice = clients.slice(slice_start..slice_size)
|
845
|
+
create_client_keepalive_check_results(clients_slice) do
|
846
|
+
keepalive_check_results.call(slice_start + 20, slice_size + 20)
|
794
847
|
end
|
795
848
|
end
|
796
849
|
end
|
850
|
+
keepalive_check_results.call(0, 19)
|
797
851
|
end
|
798
852
|
end
|
799
853
|
|
@@ -809,32 +863,29 @@ module Sensu
|
|
809
863
|
|
810
864
|
# Determine stale check results, those that have not executed in
|
811
865
|
# a specified amount of time (check TTL). This method iterates
|
812
|
-
# through
|
813
|
-
#
|
814
|
-
#
|
815
|
-
# is
|
816
|
-
#
|
817
|
-
# published with the appropriate check output.
|
866
|
+
# through stored check results that have a defined TTL value (in
|
867
|
+
# seconds). The time since last check execution (in seconds) is
|
868
|
+
# calculated for each check result. If the time since last
|
869
|
+
# execution is equal to or greater than the check TTL, a warning
|
870
|
+
# check result is published with the appropriate check output.
|
818
871
|
def determine_stale_check_results
|
819
872
|
@logger.info("determining stale check results")
|
820
|
-
@redis.smembers("
|
821
|
-
|
822
|
-
@redis.
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
check[:status] = 1
|
834
|
-
publish_check_result(client_name, check)
|
835
|
-
end
|
836
|
-
end
|
873
|
+
@redis.smembers("ttl") do |result_keys|
|
874
|
+
result_keys.each do |result_key|
|
875
|
+
@redis.get("result:#{result_key}") do |result_json|
|
876
|
+
unless result_json.nil?
|
877
|
+
check = Sensu::JSON.load(result_json)
|
878
|
+
next unless check[:ttl] && check[:executed] && !check[:force_resolve]
|
879
|
+
time_since_last_execution = Time.now.to_i - check[:executed]
|
880
|
+
if time_since_last_execution >= check[:ttl]
|
881
|
+
client_name = result_key.split(":").first
|
882
|
+
check[:output] = "Last check execution was "
|
883
|
+
check[:output] << "#{time_since_last_execution} seconds ago"
|
884
|
+
check[:status] = 1
|
885
|
+
publish_check_result(client_name, check)
|
837
886
|
end
|
887
|
+
else
|
888
|
+
@redis.srem("ttl", result_key)
|
838
889
|
end
|
839
890
|
end
|
840
891
|
end
|
@@ -851,48 +902,6 @@ module Sensu
|
|
851
902
|
end
|
852
903
|
end
|
853
904
|
|
854
|
-
# Prune check result aggregations (aggregates). Sensu only
|
855
|
-
# stores the 20 latest aggregations for a check, to keep the
|
856
|
-
# amount of data stored to a minimum.
|
857
|
-
def prune_check_result_aggregations
|
858
|
-
@logger.info("pruning check result aggregations")
|
859
|
-
@redis.smembers("aggregates") do |checks|
|
860
|
-
checks.each do |check_name|
|
861
|
-
@redis.smembers("aggregates:#{check_name}") do |aggregates|
|
862
|
-
if aggregates.length > 20
|
863
|
-
aggregates.sort!
|
864
|
-
aggregates.take(aggregates.length - 20).each do |check_issued|
|
865
|
-
result_set = "#{check_name}:#{check_issued}"
|
866
|
-
@redis.multi
|
867
|
-
@redis.srem("aggregates:#{check_name}", check_issued)
|
868
|
-
@redis.del("aggregate:#{result_set}")
|
869
|
-
@redis.del("aggregation:#{result_set}")
|
870
|
-
@redis.exec do
|
871
|
-
@logger.debug("pruned aggregation", {
|
872
|
-
:check => {
|
873
|
-
:name => check_name,
|
874
|
-
:issued => check_issued
|
875
|
-
}
|
876
|
-
})
|
877
|
-
end
|
878
|
-
end
|
879
|
-
end
|
880
|
-
end
|
881
|
-
end
|
882
|
-
end
|
883
|
-
end
|
884
|
-
|
885
|
-
# Set up the check result aggregation pruner, using periodic
|
886
|
-
# timer to run `prune_check_result_aggregations()` every 20
|
887
|
-
# seconds. The timer is stored in the timers hash under
|
888
|
-
# `:leader`.
|
889
|
-
def setup_check_result_aggregation_pruner
|
890
|
-
@logger.debug("pruning check result aggregations")
|
891
|
-
@timers[:leader] << EM::PeriodicTimer.new(20) do
|
892
|
-
prune_check_result_aggregations
|
893
|
-
end
|
894
|
-
end
|
895
|
-
|
896
905
|
# Set up the leader duties, tasks only performed by a single
|
897
906
|
# Sensu server at a time. The duties include publishing check
|
898
907
|
# requests, monitoring for stale clients, and pruning check
|
@@ -901,7 +910,6 @@ module Sensu
|
|
901
910
|
setup_check_request_publisher
|
902
911
|
setup_client_monitor
|
903
912
|
setup_check_result_monitor
|
904
|
-
setup_check_result_aggregation_pruner
|
905
913
|
end
|
906
914
|
|
907
915
|
# Create a lock timestamp (integer), current time including
|
@@ -1040,19 +1048,16 @@ module Sensu
|
|
1040
1048
|
@transport.unsubscribe if @transport
|
1041
1049
|
end
|
1042
1050
|
|
1043
|
-
# Complete
|
1044
|
-
#
|
1045
|
-
#
|
1046
|
-
# called when handling is complete.
|
1051
|
+
# Complete in progress work and then call the provided callback.
|
1052
|
+
# This method will wait until all counters stored in the
|
1053
|
+
# `@in_progress` hash equal `0`.
|
1047
1054
|
#
|
1048
|
-
# @yield [] callback/block to call when
|
1049
|
-
#
|
1050
|
-
def
|
1051
|
-
@logger.info("completing
|
1052
|
-
:handling_event_count => @handling_event_count
|
1053
|
-
})
|
1055
|
+
# @yield [] callback/block to call when in progress work is
|
1056
|
+
# completed.
|
1057
|
+
def complete_in_progress
|
1058
|
+
@logger.info("completing work in progress", :in_progress => @in_progress)
|
1054
1059
|
retry_until_true do
|
1055
|
-
if @
|
1060
|
+
if @in_progress.values.all? { |count| count == 0 }
|
1056
1061
|
yield
|
1057
1062
|
true
|
1058
1063
|
end
|
@@ -1124,7 +1129,7 @@ module Sensu
|
|
1124
1129
|
@logger.warn("stopping")
|
1125
1130
|
pause
|
1126
1131
|
@state = :stopping
|
1127
|
-
|
1132
|
+
complete_in_progress do
|
1128
1133
|
@redis.close if @redis
|
1129
1134
|
@transport.close if @transport
|
1130
1135
|
super
|