sensu 0.23.3-java → 0.24.0.beta-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +58 -0
- data/lib/sensu/api/process.rb +184 -90
- data/lib/sensu/api/validators.rb +37 -0
- data/lib/sensu/cli.rb +3 -0
- data/lib/sensu/client/process.rb +34 -6
- data/lib/sensu/constants.rb +1 -1
- data/lib/sensu/daemon.rb +67 -30
- data/lib/sensu/server/filter.rb +2 -2
- data/lib/sensu/server/handle.rb +12 -12
- data/lib/sensu/server/mutate.rb +4 -4
- data/lib/sensu/server/process.rb +218 -213
- data/sensu.gemspec +5 -5
- metadata +13 -12
@@ -0,0 +1,37 @@
|
|
1
|
+
require "sensu/settings/rules"
|
2
|
+
require "sensu/settings/validators/client"
|
3
|
+
|
4
|
+
module Sensu
|
5
|
+
module API
|
6
|
+
module Validators
|
7
|
+
# The error class for validation.
|
8
|
+
class Invalid < RuntimeError; end
|
9
|
+
|
10
|
+
class Client
|
11
|
+
# Include Sensu Settings rules and client validator.
|
12
|
+
include Sensu::Settings::Rules
|
13
|
+
include Sensu::Settings::Validators::Client
|
14
|
+
|
15
|
+
# Determine if a client definition is valid.
|
16
|
+
#
|
17
|
+
# @param client [Hash]
|
18
|
+
# @return [TrueClass, FalseClass]
|
19
|
+
def valid?(client)
|
20
|
+
validate_client(client)
|
21
|
+
true
|
22
|
+
rescue Invalid
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# This method is called when `validate_client()` encounters an
|
29
|
+
# invalid definition object. This method raises an exception
|
30
|
+
# to be caught by `valid?()`.
|
31
|
+
def invalid(*arguments)
|
32
|
+
raise Invalid
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/sensu/cli.rb
CHANGED
@@ -26,6 +26,9 @@ module Sensu
|
|
26
26
|
opts.on("-d", "--config_dir DIR[,DIR]", "DIR or comma-delimited DIR list for Sensu JSON config files") do |dir|
|
27
27
|
options[:config_dirs] = dir.split(",")
|
28
28
|
end
|
29
|
+
opts.on("--validate_config", "Validate the compiled configuration and exit") do
|
30
|
+
options[:validate_config] = true
|
31
|
+
end
|
29
32
|
opts.on("-P", "--print_config", "Print the compiled configuration and exit") do
|
30
33
|
options[:print_config] = true
|
31
34
|
end
|
data/lib/sensu/client/process.rb
CHANGED
@@ -99,12 +99,40 @@ module Sensu
|
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
|
+
# Perform token substitution for an object. String values are
|
103
|
+
# passed to `substitute_tokens()`, arrays and sub-hashes are
|
104
|
+
# processed recursively. Numeric values are ignored.
|
105
|
+
#
|
106
|
+
# @param object [Object]
|
107
|
+
# @return [Array] containing the updated object with substituted
|
108
|
+
# values and an array of unmatched tokens.
|
109
|
+
def object_substitute_tokens(object)
|
110
|
+
unmatched_tokens = []
|
111
|
+
case object
|
112
|
+
when Hash
|
113
|
+
object.each do |key, value|
|
114
|
+
object[key], unmatched = object_substitute_tokens(value)
|
115
|
+
unmatched_tokens.push(*unmatched)
|
116
|
+
end
|
117
|
+
when Array
|
118
|
+
object.map! do |value|
|
119
|
+
value, unmatched = object_substitute_tokens(value)
|
120
|
+
unmatched_tokens.push(*unmatched)
|
121
|
+
value
|
122
|
+
end
|
123
|
+
when String
|
124
|
+
object, unmatched_tokens = substitute_tokens(object, @settings[:client])
|
125
|
+
end
|
126
|
+
[object, unmatched_tokens.uniq]
|
127
|
+
end
|
128
|
+
|
102
129
|
# Execute a check command, capturing its output (STDOUT/ERR),
|
103
130
|
# exit status code, execution duration, timestamp, and publish
|
104
131
|
# the result. This method guards against multiple executions for
|
105
|
-
# the same check. Check
|
106
|
-
# associated client attribute values
|
107
|
-
#
|
132
|
+
# the same check. Check attribute value tokens are substituted
|
133
|
+
# with the associated client attribute values, via
|
134
|
+
# `object_substitute_tokens()`. If there are unmatched check
|
135
|
+
# attribute value tokens, the check will not be executed,
|
108
136
|
# instead a check result will be published reporting the
|
109
137
|
# unmatched tokens.
|
110
138
|
#
|
@@ -113,11 +141,11 @@ module Sensu
|
|
113
141
|
@logger.debug("attempting to execute check command", :check => check)
|
114
142
|
unless @checks_in_progress.include?(check[:name])
|
115
143
|
@checks_in_progress << check[:name]
|
116
|
-
|
144
|
+
check, unmatched_tokens = object_substitute_tokens(check)
|
117
145
|
if unmatched_tokens.empty?
|
118
|
-
check[:executed] = Time.now.to_i
|
119
146
|
started = Time.now.to_f
|
120
|
-
|
147
|
+
check[:executed] = started.to_i
|
148
|
+
Spawn.process(check[:command], :timeout => check[:timeout]) do |output, status|
|
121
149
|
check[:duration] = ("%.3f" % (Time.now.to_f - started)).to_f
|
122
150
|
check[:output] = output
|
123
151
|
check[:status] = status
|
data/lib/sensu/constants.rb
CHANGED
data/lib/sensu/daemon.rb
CHANGED
@@ -2,14 +2,14 @@ require "rubygems"
|
|
2
2
|
|
3
3
|
gem "eventmachine", "1.2.0.1"
|
4
4
|
|
5
|
-
gem "sensu-json", "
|
5
|
+
gem "sensu-json", "2.0.0"
|
6
6
|
gem "sensu-logger", "1.2.0"
|
7
|
-
gem "sensu-settings", "
|
7
|
+
gem "sensu-settings", "5.1.0"
|
8
8
|
gem "sensu-extension", "1.5.0"
|
9
9
|
gem "sensu-extensions", "1.5.0"
|
10
|
-
gem "sensu-transport", "
|
10
|
+
gem "sensu-transport", "6.0.0"
|
11
11
|
gem "sensu-spawn", "2.2.0"
|
12
|
-
gem "sensu-redis", "1.
|
12
|
+
gem "sensu-redis", "1.4.0"
|
13
13
|
|
14
14
|
require "time"
|
15
15
|
require "uri"
|
@@ -49,6 +49,7 @@ module Sensu
|
|
49
49
|
setup_logger(options)
|
50
50
|
load_settings(options)
|
51
51
|
load_extensions(options)
|
52
|
+
setup_spawn
|
52
53
|
setup_process(options)
|
53
54
|
end
|
54
55
|
|
@@ -64,56 +65,78 @@ module Sensu
|
|
64
65
|
@logger.setup_signal_traps
|
65
66
|
end
|
66
67
|
|
67
|
-
# Log setting or extension loading
|
68
|
+
# Log setting or extension loading notices, sensitive information
|
68
69
|
# is redacted.
|
69
70
|
#
|
70
|
-
# @param
|
71
|
-
# @param level [Symbol] to log the
|
72
|
-
def
|
73
|
-
|
71
|
+
# @param notices [Array] to be logged.
|
72
|
+
# @param level [Symbol] to log the notices at.
|
73
|
+
def log_notices(notices=[], level=:warn)
|
74
|
+
notices.each do |concern|
|
74
75
|
message = concern.delete(:message)
|
75
76
|
@logger.send(level, message, redact_sensitive(concern))
|
76
77
|
end
|
77
78
|
end
|
78
79
|
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#
|
82
|
-
#
|
83
|
-
#
|
80
|
+
# Determine if the Sensu settings are valid, if there are load or
|
81
|
+
# validation errors, and immediately exit the process with the
|
82
|
+
# appropriate exit status code. This method is used to determine
|
83
|
+
# if the latest configuration changes are valid prior to
|
84
|
+
# restarting the Sensu service, triggered by a CLI argument, e.g.
|
85
|
+
# `--validate_config`.
|
84
86
|
#
|
85
87
|
# @param settings [Object]
|
86
|
-
def
|
88
|
+
def validate_settings!(settings)
|
89
|
+
if settings.errors.empty?
|
90
|
+
puts "configuration is valid"
|
91
|
+
exit
|
92
|
+
else
|
93
|
+
puts "configuration is invalid"
|
94
|
+
puts Sensu::JSON.dump({:errors => @settings.errors}, :pretty => true)
|
95
|
+
exit 2
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Print the Sensu settings (JSON) to STDOUT and immediately exit
|
100
|
+
# the process with the appropriate exit status code. This method
|
101
|
+
# is used while troubleshooting configuration issues, triggered by
|
102
|
+
# a CLI argument, e.g. `--print_config`. Sensu settings with
|
103
|
+
# sensitive values (e.g. passwords) are first redacted.
|
104
|
+
#
|
105
|
+
# @param settings [Object]
|
106
|
+
def print_settings!(settings)
|
87
107
|
redacted_settings = redact_sensitive(settings.to_hash)
|
88
108
|
@logger.warn("outputting compiled configuration and exiting")
|
89
109
|
puts Sensu::JSON.dump(redacted_settings, :pretty => true)
|
90
|
-
exit
|
110
|
+
exit(settings.errors.empty? ? 0 : 2)
|
91
111
|
end
|
92
112
|
|
93
|
-
# Load Sensu settings
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
#
|
98
|
-
#
|
113
|
+
# Load Sensu settings. This method creates the settings instance
|
114
|
+
# variable: `@settings`. If the `validate_config` option is true,
|
115
|
+
# this method calls `validate_settings!()` to validate the latest
|
116
|
+
# compiled configuration settings and will then exit the process.
|
117
|
+
# If the `print_config` option is true, this method calls
|
118
|
+
# `print_settings!()` to output the compiled configuration
|
119
|
+
# settings and will then exit the process. If there are loading or
|
120
|
+
# validation errors, they will be logged (notices), and this
|
121
|
+
# method will exit(2) the process.
|
122
|
+
#
|
99
123
|
#
|
100
124
|
# https://github.com/sensu/sensu-settings
|
101
125
|
#
|
102
126
|
# @param options [Hash]
|
103
127
|
def load_settings(options={})
|
104
128
|
@settings = Settings.get(options)
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
129
|
+
validate_settings!(@settings) if options[:validate_config]
|
130
|
+
log_notices(@settings.warnings)
|
131
|
+
log_notices(@settings.errors, :fatal)
|
132
|
+
print_settings!(@settings) if options[:print_config]
|
133
|
+
unless @settings.errors.empty?
|
110
134
|
@logger.fatal("SENSU NOT RUNNING!")
|
111
135
|
exit 2
|
112
136
|
end
|
113
|
-
print_settings(@settings) if options[:print_config]
|
114
137
|
end
|
115
138
|
|
116
|
-
# Load Sensu extensions and log any
|
139
|
+
# Load Sensu extensions and log any notices. Set the logger and
|
117
140
|
# settings for each extension instance. This method creates the
|
118
141
|
# extensions instance variable: `@extensions`.
|
119
142
|
#
|
@@ -123,7 +146,7 @@ module Sensu
|
|
123
146
|
# @param options [Hash]
|
124
147
|
def load_extensions(options={})
|
125
148
|
@extensions = Extensions.get(options)
|
126
|
-
|
149
|
+
log_notices(@extensions.warnings)
|
127
150
|
extension_settings = @settings.to_hash.dup
|
128
151
|
@extensions.all.each do |extension|
|
129
152
|
extension.logger = @logger
|
@@ -131,6 +154,20 @@ module Sensu
|
|
131
154
|
end
|
132
155
|
end
|
133
156
|
|
157
|
+
# Set up Sensu spawn, creating a worker to create, control, and
|
158
|
+
# limit spawned child processes. This method adjusts the
|
159
|
+
# EventMachine thread pool size to accommodate the concurrent
|
160
|
+
# process spawn limit and other Sensu process operations.
|
161
|
+
#
|
162
|
+
# https://github.com/sensu/sensu-spawn
|
163
|
+
def setup_spawn
|
164
|
+
@logger.info("configuring sensu spawn", :settings => @settings[:sensu][:spawn])
|
165
|
+
threadpool_size = @settings[:sensu][:spawn][:limit] + 10
|
166
|
+
@logger.debug("setting eventmachine threadpool size", :size => threadpool_size)
|
167
|
+
EM.threadpool_size = threadpool_size
|
168
|
+
Spawn.setup(@settings[:sensu][:spawn])
|
169
|
+
end
|
170
|
+
|
134
171
|
# Manage the current process, optionally daemonize and/or write
|
135
172
|
# the current process ID to a PID file.
|
136
173
|
#
|
data/lib/sensu/server/filter.rb
CHANGED
@@ -345,14 +345,14 @@ module Sensu
|
|
345
345
|
end
|
346
346
|
if filter_message
|
347
347
|
@logger.info(filter_message, details)
|
348
|
-
@
|
348
|
+
@in_progress[:events] -= 1 if @in_progress
|
349
349
|
else
|
350
350
|
event_filtered?(handler, event) do |filtered|
|
351
351
|
unless filtered
|
352
352
|
yield(event)
|
353
353
|
else
|
354
354
|
@logger.info("event was filtered", details)
|
355
|
-
@
|
355
|
+
@in_progress[:events] -= 1 if @in_progress
|
356
356
|
end
|
357
357
|
end
|
358
358
|
end
|
data/lib/sensu/server/handle.rb
CHANGED
@@ -4,7 +4,7 @@ module Sensu
|
|
4
4
|
module Server
|
5
5
|
module Handle
|
6
6
|
# Create a handler error callback, for logging the error and
|
7
|
-
# decrementing the `@
|
7
|
+
# decrementing the `@in_progress[:events]` by `1`.
|
8
8
|
#
|
9
9
|
# @param handler [Object]
|
10
10
|
# @param event_data [Object]
|
@@ -16,14 +16,14 @@ module Sensu
|
|
16
16
|
:event_data => event_data,
|
17
17
|
:error => error.to_s
|
18
18
|
})
|
19
|
-
@
|
19
|
+
@in_progress[:events] -= 1 if @in_progress
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
# Execute a pipe event handler, using the defined handler
|
24
24
|
# command to spawn a process, passing it event data via STDIN.
|
25
25
|
# Log the handler output lines and decrement the
|
26
|
-
# `@
|
26
|
+
# `@in_progress[:events]` by `1` when the handler executes
|
27
27
|
# successfully.
|
28
28
|
#
|
29
29
|
# @param handler [Hash] definition.
|
@@ -36,7 +36,7 @@ module Sensu
|
|
36
36
|
:handler => handler,
|
37
37
|
:output => output.split("\n+")
|
38
38
|
})
|
39
|
-
@
|
39
|
+
@in_progress[:events] -= 1 if @in_progress
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -47,7 +47,7 @@ module Sensu
|
|
47
47
|
# `handler_error()` method is used to create the `on_error`
|
48
48
|
# callback for the connection handler. The `on_error` callback
|
49
49
|
# is call in the event of any error(s). The
|
50
|
-
# `@
|
50
|
+
# `@in_progress[:events]` is decremented by `1` when the data is
|
51
51
|
# transmitted successfully, `on_success`.
|
52
52
|
#
|
53
53
|
# @param handler [Hash] definition.
|
@@ -57,7 +57,7 @@ module Sensu
|
|
57
57
|
begin
|
58
58
|
EM::connect(handler[:socket][:host], handler[:socket][:port], Socket) do |socket|
|
59
59
|
socket.on_success = Proc.new do
|
60
|
-
@
|
60
|
+
@in_progress[:events] -= 1 if @in_progress
|
61
61
|
end
|
62
62
|
socket.on_error = on_error
|
63
63
|
timeout = handler[:timeout] || 10
|
@@ -71,7 +71,7 @@ module Sensu
|
|
71
71
|
end
|
72
72
|
|
73
73
|
# Transmit event data to a UDP socket, then close the
|
74
|
-
# connection. The `@
|
74
|
+
# connection. The `@in_progress[:events]` is decremented by `1`
|
75
75
|
# when the data is assumed to have been transmitted.
|
76
76
|
#
|
77
77
|
# @param handler [Hash] definition.
|
@@ -81,7 +81,7 @@ module Sensu
|
|
81
81
|
EM::open_datagram_socket("0.0.0.0", 0, nil) do |socket|
|
82
82
|
socket.send_datagram(event_data.to_s, handler[:socket][:host], handler[:socket][:port])
|
83
83
|
socket.close_connection_after_writing
|
84
|
-
@
|
84
|
+
@in_progress[:events] -= 1 if @in_progress
|
85
85
|
end
|
86
86
|
rescue => error
|
87
87
|
handler_error(handler, event_data).call(error)
|
@@ -90,7 +90,7 @@ module Sensu
|
|
90
90
|
|
91
91
|
# Publish event data to a Sensu transport pipe. Event data that
|
92
92
|
# is `nil` or empty will not be published, to prevent transport
|
93
|
-
# errors. The `@
|
93
|
+
# errors. The `@in_progress[:events]` is decremented by `1`,
|
94
94
|
# even if the event data is not published.
|
95
95
|
#
|
96
96
|
# @param handler [Hash] definition.
|
@@ -105,14 +105,14 @@ module Sensu
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
end
|
108
|
-
@
|
108
|
+
@in_progress[:events] -= 1 if @in_progress
|
109
109
|
end
|
110
110
|
|
111
111
|
# Run a handler extension, within the Sensu EventMachine reactor
|
112
112
|
# (event loop). The extension API `safe_run()` method is used to
|
113
113
|
# guard against most errors. The `safe_run()` callback is always
|
114
114
|
# called, logging the extension run output and status, and
|
115
|
-
# decrementing the `@
|
115
|
+
# decrementing the `@in_progress[:events]` by `1`.
|
116
116
|
#
|
117
117
|
# @param handler [Hash] definition.
|
118
118
|
# @param event_data [Object] to pass to the handler extension.
|
@@ -123,7 +123,7 @@ module Sensu
|
|
123
123
|
:output => output,
|
124
124
|
:status => status
|
125
125
|
})
|
126
|
-
@
|
126
|
+
@in_progress[:events] -= 1 if @in_progress
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
data/lib/sensu/server/mutate.rb
CHANGED
@@ -6,7 +6,7 @@ module Sensu
|
|
6
6
|
# created callback can be used for standard mutators and mutator
|
7
7
|
# extensions. The provided callback will only be called when the
|
8
8
|
# mutator status is `0` (OK). If the status is not `0`, an error
|
9
|
-
# is logged, and the `@
|
9
|
+
# is logged, and the `@in_progress[:events]` is decremented by
|
10
10
|
# `1`.
|
11
11
|
#
|
12
12
|
# @param mutator [Object] definition or extension.
|
@@ -25,7 +25,7 @@ module Sensu
|
|
25
25
|
:output => output,
|
26
26
|
:status => status
|
27
27
|
})
|
28
|
-
@
|
28
|
+
@in_progress[:events] -= 1 if @in_progress
|
29
29
|
end
|
30
30
|
end
|
31
31
|
end
|
@@ -63,7 +63,7 @@ module Sensu
|
|
63
63
|
# mutator is used, unless the handler specifies another mutator.
|
64
64
|
# If a mutator does not exist, not defined or a missing
|
65
65
|
# extension, an error will be logged and the
|
66
|
-
# `@
|
66
|
+
# `@in_progress[:events]` is decremented by `1`. This method
|
67
67
|
# first checks for the existence of a standard mutator, then
|
68
68
|
# checks for an extension if a standard mutator is not defined.
|
69
69
|
#
|
@@ -84,7 +84,7 @@ module Sensu
|
|
84
84
|
@logger.error("unknown mutator", {
|
85
85
|
:mutator_name => mutator_name
|
86
86
|
})
|
87
|
-
@
|
87
|
+
@in_progress[:events] -= 1 if @in_progress
|
88
88
|
end
|
89
89
|
end
|
90
90
|
end
|
data/lib/sensu/server/process.rb
CHANGED
@@ -11,7 +11,9 @@ module Sensu
|
|
11
11
|
include Mutate
|
12
12
|
include Handle
|
13
13
|
|
14
|
-
attr_reader :is_leader, :
|
14
|
+
attr_reader :is_leader, :in_progress
|
15
|
+
|
16
|
+
STANDARD_CHECK_TYPE = "standard".freeze
|
15
17
|
|
16
18
|
METRIC_CHECK_TYPE = "metric".freeze
|
17
19
|
|
@@ -40,17 +42,23 @@ module Sensu
|
|
40
42
|
super
|
41
43
|
@is_leader = false
|
42
44
|
@timers[:leader] = Array.new
|
43
|
-
@
|
45
|
+
@in_progress = Hash.new(0)
|
44
46
|
end
|
45
47
|
|
46
48
|
# Set up the Redis and Transport connection objects, `@redis`
|
47
|
-
# and `@transport`. This method
|
48
|
-
#
|
49
|
+
# and `@transport`. This method updates the Redis on error
|
50
|
+
# callback to reset the in progress check result counter. This
|
51
|
+
# method "drys" up many instances of `setup_redis()` and
|
52
|
+
# `setup_transport()`, particularly in the specs.
|
49
53
|
#
|
50
54
|
# @yield callback/block called after connecting to Redis and the
|
51
55
|
# Sensu Transport.
|
52
56
|
def setup_connections
|
53
57
|
setup_redis do
|
58
|
+
@redis.on_error do |error|
|
59
|
+
@logger.error("redis connection error", :error => error.to_s)
|
60
|
+
@in_progress[:check_results] = 0
|
61
|
+
end
|
54
62
|
setup_transport do
|
55
63
|
yield
|
56
64
|
end
|
@@ -90,15 +98,11 @@ module Sensu
|
|
90
98
|
#
|
91
99
|
# @param client [Hash] definition.
|
92
100
|
def create_client_registration_event(client)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
:action => :create,
|
99
|
-
:timestamp => Time.now.to_i
|
100
|
-
}
|
101
|
-
process_event(event)
|
101
|
+
check = create_registration_check(client)
|
102
|
+
create_event(client, check) do |event|
|
103
|
+
event_bridges(event)
|
104
|
+
process_event(event)
|
105
|
+
end
|
102
106
|
end
|
103
107
|
|
104
108
|
# Process an initial client registration, when it is first added
|
@@ -235,8 +239,8 @@ module Sensu
|
|
235
239
|
#
|
236
240
|
# This method determines the appropriate handlers for an event,
|
237
241
|
# filtering and mutating the event data for each of them. The
|
238
|
-
# `@
|
239
|
-
# handler chain (filter -> mutate -> handle).
|
242
|
+
# `@in_progress[:events]` counter is incremented by `1`, for
|
243
|
+
# each event handler chain (filter -> mutate -> handle).
|
240
244
|
#
|
241
245
|
# @param event [Hash]
|
242
246
|
def process_event(event)
|
@@ -245,7 +249,7 @@ module Sensu
|
|
245
249
|
handler_list = Array((event[:check][:handlers] || event[:check][:handler]) || DEFAULT_HANDLER_NAME)
|
246
250
|
handlers = derive_handlers(handler_list)
|
247
251
|
handlers.each do |handler|
|
248
|
-
@
|
252
|
+
@in_progress[:events] += 1
|
249
253
|
filter_event(handler, event) do |event|
|
250
254
|
mutate_event(handler, event) do |event_data|
|
251
255
|
handle_event(handler, event_data)
|
@@ -271,35 +275,27 @@ module Sensu
|
|
271
275
|
end
|
272
276
|
end
|
273
277
|
|
274
|
-
# Add a check result to an aggregate.
|
275
|
-
#
|
276
|
-
#
|
277
|
-
#
|
278
|
-
#
|
279
|
-
#
|
280
|
-
#
|
281
|
-
# serialization is used for storing check result data.
|
278
|
+
# Add a check result to an aggregate. The aggregate name is
|
279
|
+
# determined by the value of check `:aggregate`. If check
|
280
|
+
# `:aggregate` is `true` (legacy), the check `:name` is used as
|
281
|
+
# the aggregate name. If check `:aggregate` is a string, it is
|
282
|
+
# used as the aggregate name. This method will add the client
|
283
|
+
# name to the aggregate, all other processing (e.g. counters) is
|
284
|
+
# done by the Sensu API on request.
|
282
285
|
#
|
283
286
|
# @param client [Hash]
|
284
287
|
# @param check [Hash]
|
285
288
|
def aggregate_check_result(client, check)
|
289
|
+
aggregate = (check[:aggregate].is_a?(String) ? check[:aggregate] : check[:name])
|
286
290
|
@logger.debug("adding check result to aggregate", {
|
291
|
+
:aggregate => aggregate,
|
287
292
|
:client => client,
|
288
293
|
:check => check
|
289
294
|
})
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
@redis.hset("aggregation:#{result_set}", client[:name], result_data)
|
294
|
-
SEVERITIES.each do |severity|
|
295
|
-
@redis.hsetnx("aggregate:#{result_set}", severity, 0)
|
295
|
+
aggregate_member = "#{client[:name]}:#{check[:name]}"
|
296
|
+
@redis.sadd("aggregates:#{aggregate}", aggregate_member) do
|
297
|
+
@redis.sadd("aggregates", aggregate)
|
296
298
|
end
|
297
|
-
severity = (SEVERITIES[check[:status]] || "unknown")
|
298
|
-
@redis.hincrby("aggregate:#{result_set}", severity, 1)
|
299
|
-
@redis.hincrby("aggregate:#{result_set}", "total", 1)
|
300
|
-
@redis.sadd("aggregates:#{check[:name]}", check[:issued])
|
301
|
-
@redis.sadd("aggregates", check[:name])
|
302
|
-
@redis.exec
|
303
299
|
end
|
304
300
|
|
305
301
|
# Truncate check output. For metric checks, (`"type":
|
@@ -333,7 +329,7 @@ module Sensu
|
|
333
329
|
# @param client [Hash]
|
334
330
|
# @param check [Hash]
|
335
331
|
# @yield [] callback/block called after the check data has been
|
336
|
-
#
|
332
|
+
# stored (history, etc).
|
337
333
|
def store_check_result(client, check)
|
338
334
|
@logger.debug("storing check result", :check => check)
|
339
335
|
result_key = "#{client[:name]}:#{check[:name]}"
|
@@ -342,6 +338,7 @@ module Sensu
|
|
342
338
|
@redis.multi
|
343
339
|
@redis.sadd("result:#{client[:name]}", check[:name])
|
344
340
|
@redis.set("result:#{result_key}", Sensu::JSON.dump(check_truncated))
|
341
|
+
@redis.sadd("ttl", result_key) if check[:ttl]
|
345
342
|
@redis.rpush(history_key, check[:status])
|
346
343
|
@redis.ltrim(history_key, -21, -1)
|
347
344
|
@redis.exec do
|
@@ -418,83 +415,106 @@ module Sensu
|
|
418
415
|
end
|
419
416
|
|
420
417
|
# Update the event registry, stored in Redis. This method
|
421
|
-
# determines if
|
422
|
-
# event data in the registry.
|
423
|
-
# client/check pair is fetched, used in conditionals and the
|
424
|
-
# composition of the new event data. If a check `:status` is not
|
418
|
+
# determines if event data warrants in the creation or update of
|
419
|
+
# event data in the registry. If a check `:status` is not
|
425
420
|
# `0`, or it has been flapping, an event is created/updated in
|
426
|
-
# the registry. If
|
427
|
-
#
|
428
|
-
#
|
429
|
-
#
|
430
|
-
#
|
431
|
-
#
|
432
|
-
#
|
433
|
-
#
|
434
|
-
#
|
421
|
+
# the registry. If the event `:action` is `:resolve`, the event
|
422
|
+
# is removed (resolved) from the registry. If the previous
|
423
|
+
# conditions are not met and check `:type` is `metric`, the
|
424
|
+
# registry is not updated, but further event processing is
|
425
|
+
# required (`yield(true)`). JSON serialization is used when
|
426
|
+
# storing data in the registry.
|
427
|
+
#
|
428
|
+
# @param event [Hash]
|
429
|
+
# @yield callback [event] callback/block called after the event
|
430
|
+
# registry has been updated.
|
431
|
+
# @yieldparam process [TrueClass, FalseClass] indicating if the
|
432
|
+
# event requires further processing.
|
433
|
+
def update_event_registry(event)
|
434
|
+
client_name = event[:client][:name]
|
435
|
+
if event[:check][:status] != 0 || event[:action] == :flapping
|
436
|
+
@redis.hset("events:#{client_name}", event[:check][:name], Sensu::JSON.dump(event)) do
|
437
|
+
yield(true)
|
438
|
+
end
|
439
|
+
elsif event[:action] == :resolve &&
|
440
|
+
(event[:check][:auto_resolve] != false || event[:check][:force_resolve])
|
441
|
+
@redis.hdel("events:#{client_name}", event[:check][:name]) do
|
442
|
+
yield(true)
|
443
|
+
end
|
444
|
+
elsif event[:check][:type] == METRIC_CHECK_TYPE
|
445
|
+
yield(true)
|
446
|
+
else
|
447
|
+
yield(false)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# Create an event, using the provided client and check result
|
452
|
+
# data. Existing event data for the client/check pair is fetched
|
453
|
+
# from the event registry to be used in the composition of the
|
454
|
+
# new event.
|
435
455
|
#
|
436
456
|
# @param client [Hash]
|
437
457
|
# @param check [Hash]
|
438
458
|
# @yield callback [event] callback/block called with the
|
439
|
-
# resulting event
|
440
|
-
# the check is of type `:metric`.
|
459
|
+
# resulting event.
|
441
460
|
# @yieldparam event [Hash]
|
442
|
-
def
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
:
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
461
|
+
def create_event(client, check)
|
462
|
+
check_history(client, check) do |history, total_state_change|
|
463
|
+
check[:history] = history
|
464
|
+
check[:total_state_change] = total_state_change
|
465
|
+
@redis.hget("events:#{client[:name]}", check[:name]) do |event_json|
|
466
|
+
stored_event = event_json ? Sensu::JSON.load(event_json) : nil
|
467
|
+
flapping = check_flapping?(stored_event, check)
|
468
|
+
event = {
|
469
|
+
:client => client,
|
470
|
+
:check => check,
|
471
|
+
:occurrences => 1,
|
472
|
+
:action => (flapping ? :flapping : :create),
|
473
|
+
:timestamp => Time.now.to_i
|
474
|
+
}
|
475
|
+
if stored_event
|
476
|
+
event[:id] = stored_event[:id]
|
477
|
+
event[:last_state_change] = stored_event[:last_state_change]
|
478
|
+
event[:last_ok] = stored_event[:last_ok]
|
479
|
+
event[:occurrences] = stored_event[:occurrences]
|
480
|
+
else
|
481
|
+
event[:id] = random_uuid
|
460
482
|
end
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
483
|
+
if check[:status] != 0 || flapping
|
484
|
+
if history[-1] == history[-2]
|
485
|
+
event[:occurrences] += 1
|
486
|
+
else
|
487
|
+
event[:occurrences] = 1
|
488
|
+
event[:last_state_change] = event[:timestamp]
|
467
489
|
end
|
490
|
+
elsif stored_event
|
491
|
+
event[:last_state_change] = event[:timestamp]
|
492
|
+
event[:action] = :resolve
|
493
|
+
end
|
494
|
+
if check[:status] == 0
|
495
|
+
event[:last_ok] = event[:timestamp]
|
468
496
|
end
|
469
|
-
elsif check[:type] == METRIC_CHECK_TYPE
|
470
497
|
yield(event)
|
471
498
|
end
|
472
|
-
event_bridges(event)
|
473
499
|
end
|
474
500
|
end
|
475
501
|
|
476
|
-
# Create a blank client (data)
|
477
|
-
#
|
478
|
-
#
|
479
|
-
#
|
480
|
-
#
|
481
|
-
# `false`.
|
502
|
+
# Create a blank client (data). Only the client name is known,
|
503
|
+
# the other client attributes must be updated via the API (POST
|
504
|
+
# /clients:client). Dynamically created clients and those
|
505
|
+
# updated via the API will have client keepalives disabled by
|
506
|
+
# default, `:keepalives` is set to `false`.
|
482
507
|
#
|
483
|
-
# @param name [
|
484
|
-
# @
|
485
|
-
# dynamically created client data.
|
486
|
-
# @yieldparam client [Hash]
|
508
|
+
# @param name [String] to use for the client.
|
509
|
+
# @return [Hash] client.
|
487
510
|
def create_client(name)
|
488
|
-
|
511
|
+
{
|
489
512
|
:name => name,
|
490
513
|
:address => "unknown",
|
491
514
|
:subscriptions => [],
|
492
515
|
:keepalives => false,
|
493
516
|
:version => VERSION
|
494
517
|
}
|
495
|
-
update_client_registry(client) do
|
496
|
-
yield(client)
|
497
|
-
end
|
498
518
|
end
|
499
519
|
|
500
520
|
# Retrieve a client (data) from Redis if it exists. If a client
|
@@ -527,7 +547,8 @@ module Sensu
|
|
527
547
|
yield(client)
|
528
548
|
end
|
529
549
|
else
|
530
|
-
create_client(client_key)
|
550
|
+
client = create_client(client_key)
|
551
|
+
update_client_registry(client) do
|
531
552
|
yield(client)
|
532
553
|
end
|
533
554
|
end
|
@@ -536,14 +557,18 @@ module Sensu
|
|
536
557
|
|
537
558
|
# Process a check result, storing its data, inspecting its
|
538
559
|
# contents, and taking the appropriate actions (eg. update the
|
539
|
-
# event registry).
|
540
|
-
#
|
541
|
-
#
|
542
|
-
#
|
543
|
-
#
|
560
|
+
# event registry). The `@in_progress[:check_results]` counter is
|
561
|
+
# incremented by `1` prior to check result processing and then
|
562
|
+
# decremented by `1` after updating the event registry. A check
|
563
|
+
# result must have a valid client name, associated with a client
|
564
|
+
# in the registry or one will be created. If a local check
|
565
|
+
# definition exists for the check name, and the check result is
|
566
|
+
# not from a standalone check execution, it's merged with the
|
567
|
+
# check result for more context.
|
544
568
|
#
|
545
569
|
# @param result [Hash] data.
|
546
570
|
def process_check_result(result)
|
571
|
+
@in_progress[:check_results] += 1
|
547
572
|
@logger.debug("processing result", :result => result)
|
548
573
|
retrieve_client(result) do |client|
|
549
574
|
check = case
|
@@ -552,13 +577,15 @@ module Sensu
|
|
552
577
|
else
|
553
578
|
result[:check]
|
554
579
|
end
|
580
|
+
check[:type] ||= STANDARD_CHECK_TYPE
|
581
|
+
check[:origin] = result[:client] if check[:source]
|
555
582
|
aggregate_check_result(client, check) if check[:aggregate]
|
556
583
|
store_check_result(client, check) do
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
584
|
+
create_event(client, check) do |event|
|
585
|
+
event_bridges(event)
|
586
|
+
update_event_registry(event) do |process|
|
587
|
+
process_event(event) if process
|
588
|
+
@in_progress[:check_results] -= 1
|
562
589
|
end
|
563
590
|
end
|
564
591
|
end
|
@@ -755,45 +782,72 @@ module Sensu
|
|
755
782
|
check.merge(:name => "keepalive", :issued => timestamp, :executed => timestamp)
|
756
783
|
end
|
757
784
|
|
785
|
+
# Create client keepalive check results. This method will
|
786
|
+
# retrieve clients from the registry, creating a keepalive
|
787
|
+
# check definition for each client, using the
|
788
|
+
# `create_keepalive_check()` method, containing client specific
|
789
|
+
# keepalive thresholds. If the time since the latest keepalive
|
790
|
+
# is equal to or greater than a threshold, the check `:output`
|
791
|
+
# is set to a descriptive message, and `:status` is set to the
|
792
|
+
# appropriate non-zero value. If a client has been sending
|
793
|
+
# keepalives, `:output` and `:status` are set to indicate an OK
|
794
|
+
# state. The `publish_check_result()` method is used to publish
|
795
|
+
# the client keepalive check results.
|
796
|
+
#
|
797
|
+
# @param clients [Array] of client names.
|
798
|
+
# @yield [] callback/block called after the client keepalive
|
799
|
+
# check results have been created.
|
800
|
+
def create_client_keepalive_check_results(clients)
|
801
|
+
client_keys = clients.map { |client_name| "client:#{client_name}" }
|
802
|
+
@redis.mget(*client_keys) do |client_json_objects|
|
803
|
+
client_json_objects.each do |client_json|
|
804
|
+
unless client_json.nil?
|
805
|
+
client = Sensu::JSON.load(client_json)
|
806
|
+
next if client[:keepalives] == false
|
807
|
+
check = create_keepalive_check(client)
|
808
|
+
time_since_last_keepalive = Time.now.to_i - client[:timestamp]
|
809
|
+
check[:output] = "No keepalive sent from client for "
|
810
|
+
check[:output] << "#{time_since_last_keepalive} seconds"
|
811
|
+
case
|
812
|
+
when time_since_last_keepalive >= check[:thresholds][:critical]
|
813
|
+
check[:output] << " (>=#{check[:thresholds][:critical]})"
|
814
|
+
check[:status] = 2
|
815
|
+
when time_since_last_keepalive >= check[:thresholds][:warning]
|
816
|
+
check[:output] << " (>=#{check[:thresholds][:warning]})"
|
817
|
+
check[:status] = 1
|
818
|
+
else
|
819
|
+
check[:output] = "Keepalive sent from client "
|
820
|
+
check[:output] << "#{time_since_last_keepalive} seconds ago"
|
821
|
+
check[:status] = 0
|
822
|
+
end
|
823
|
+
publish_check_result(client[:name], check)
|
824
|
+
end
|
825
|
+
end
|
826
|
+
yield
|
827
|
+
end
|
828
|
+
end
|
829
|
+
|
758
830
|
# Determine stale clients, those that have not sent a keepalive
|
759
|
-
# in a specified amount of time
|
760
|
-
#
|
761
|
-
#
|
762
|
-
#
|
763
|
-
#
|
764
|
-
#
|
765
|
-
#
|
766
|
-
# value. If a client has been sending keepalives, `:output` and
|
767
|
-
# `:status` are set to indicate an OK state. A check result is
|
768
|
-
# published for every client in the registry.
|
831
|
+
# in a specified amount of time. This method iterates through
|
832
|
+
# the client registry, creating a keepalive check result for
|
833
|
+
# each client. The `create_client_keepalive_check_results()`
|
834
|
+
# method is used to inspect and create keepalive check results
|
835
|
+
# for each slice of clients from the registry. A relatively
|
836
|
+
# small clients slice size (20) is used to reduce the number of
|
837
|
+
# clients inspected within a single tick of the EM reactor.
|
769
838
|
def determine_stale_clients
|
770
839
|
@logger.info("determining stale clients")
|
771
840
|
@redis.smembers("clients") do |clients|
|
772
|
-
clients.
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
time_since_last_keepalive = Time.now.to_i - client[:timestamp]
|
779
|
-
check[:output] = "No keepalive sent from client for "
|
780
|
-
check[:output] << "#{time_since_last_keepalive} seconds"
|
781
|
-
case
|
782
|
-
when time_since_last_keepalive >= check[:thresholds][:critical]
|
783
|
-
check[:output] << " (>=#{check[:thresholds][:critical]})"
|
784
|
-
check[:status] = 2
|
785
|
-
when time_since_last_keepalive >= check[:thresholds][:warning]
|
786
|
-
check[:output] << " (>=#{check[:thresholds][:warning]})"
|
787
|
-
check[:status] = 1
|
788
|
-
else
|
789
|
-
check[:output] = "Keepalive sent from client "
|
790
|
-
check[:output] << "#{time_since_last_keepalive} seconds ago"
|
791
|
-
check[:status] = 0
|
792
|
-
end
|
793
|
-
publish_check_result(client[:name], check)
|
841
|
+
client_count = clients.length
|
842
|
+
keepalive_check_results = Proc.new do |slice_start, slice_size|
|
843
|
+
unless slice_start > client_count - 1
|
844
|
+
clients_slice = clients.slice(slice_start..slice_size)
|
845
|
+
create_client_keepalive_check_results(clients_slice) do
|
846
|
+
keepalive_check_results.call(slice_start + 20, slice_size + 20)
|
794
847
|
end
|
795
848
|
end
|
796
849
|
end
|
850
|
+
keepalive_check_results.call(0, 19)
|
797
851
|
end
|
798
852
|
end
|
799
853
|
|
@@ -809,32 +863,29 @@ module Sensu
|
|
809
863
|
|
810
864
|
# Determine stale check results, those that have not executed in
|
811
865
|
# a specified amount of time (check TTL). This method iterates
|
812
|
-
# through
|
813
|
-
#
|
814
|
-
#
|
815
|
-
# is
|
816
|
-
#
|
817
|
-
# published with the appropriate check output.
|
866
|
+
# through stored check results that have a defined TTL value (in
|
867
|
+
# seconds). The time since last check execution (in seconds) is
|
868
|
+
# calculated for each check result. If the time since last
|
869
|
+
# execution is equal to or greater than the check TTL, a warning
|
870
|
+
# check result is published with the appropriate check output.
|
818
871
|
def determine_stale_check_results
|
819
872
|
@logger.info("determining stale check results")
|
820
|
-
@redis.smembers("
|
821
|
-
|
822
|
-
@redis.
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
check[:status] = 1
|
834
|
-
publish_check_result(client_name, check)
|
835
|
-
end
|
836
|
-
end
|
873
|
+
@redis.smembers("ttl") do |result_keys|
|
874
|
+
result_keys.each do |result_key|
|
875
|
+
@redis.get("result:#{result_key}") do |result_json|
|
876
|
+
unless result_json.nil?
|
877
|
+
check = Sensu::JSON.load(result_json)
|
878
|
+
next unless check[:ttl] && check[:executed] && !check[:force_resolve]
|
879
|
+
time_since_last_execution = Time.now.to_i - check[:executed]
|
880
|
+
if time_since_last_execution >= check[:ttl]
|
881
|
+
client_name = result_key.split(":").first
|
882
|
+
check[:output] = "Last check execution was "
|
883
|
+
check[:output] << "#{time_since_last_execution} seconds ago"
|
884
|
+
check[:status] = 1
|
885
|
+
publish_check_result(client_name, check)
|
837
886
|
end
|
887
|
+
else
|
888
|
+
@redis.srem("ttl", result_key)
|
838
889
|
end
|
839
890
|
end
|
840
891
|
end
|
@@ -851,48 +902,6 @@ module Sensu
|
|
851
902
|
end
|
852
903
|
end
|
853
904
|
|
854
|
-
# Prune check result aggregations (aggregates). Sensu only
|
855
|
-
# stores the 20 latest aggregations for a check, to keep the
|
856
|
-
# amount of data stored to a minimum.
|
857
|
-
def prune_check_result_aggregations
|
858
|
-
@logger.info("pruning check result aggregations")
|
859
|
-
@redis.smembers("aggregates") do |checks|
|
860
|
-
checks.each do |check_name|
|
861
|
-
@redis.smembers("aggregates:#{check_name}") do |aggregates|
|
862
|
-
if aggregates.length > 20
|
863
|
-
aggregates.sort!
|
864
|
-
aggregates.take(aggregates.length - 20).each do |check_issued|
|
865
|
-
result_set = "#{check_name}:#{check_issued}"
|
866
|
-
@redis.multi
|
867
|
-
@redis.srem("aggregates:#{check_name}", check_issued)
|
868
|
-
@redis.del("aggregate:#{result_set}")
|
869
|
-
@redis.del("aggregation:#{result_set}")
|
870
|
-
@redis.exec do
|
871
|
-
@logger.debug("pruned aggregation", {
|
872
|
-
:check => {
|
873
|
-
:name => check_name,
|
874
|
-
:issued => check_issued
|
875
|
-
}
|
876
|
-
})
|
877
|
-
end
|
878
|
-
end
|
879
|
-
end
|
880
|
-
end
|
881
|
-
end
|
882
|
-
end
|
883
|
-
end
|
884
|
-
|
885
|
-
# Set up the check result aggregation pruner, using periodic
|
886
|
-
# timer to run `prune_check_result_aggregations()` every 20
|
887
|
-
# seconds. The timer is stored in the timers hash under
|
888
|
-
# `:leader`.
|
889
|
-
def setup_check_result_aggregation_pruner
|
890
|
-
@logger.debug("pruning check result aggregations")
|
891
|
-
@timers[:leader] << EM::PeriodicTimer.new(20) do
|
892
|
-
prune_check_result_aggregations
|
893
|
-
end
|
894
|
-
end
|
895
|
-
|
896
905
|
# Set up the leader duties, tasks only performed by a single
|
897
906
|
# Sensu server at a time. The duties include publishing check
|
898
907
|
# requests, monitoring for stale clients, and pruning check
|
@@ -901,7 +910,6 @@ module Sensu
|
|
901
910
|
setup_check_request_publisher
|
902
911
|
setup_client_monitor
|
903
912
|
setup_check_result_monitor
|
904
|
-
setup_check_result_aggregation_pruner
|
905
913
|
end
|
906
914
|
|
907
915
|
# Create a lock timestamp (integer), current time including
|
@@ -1040,19 +1048,16 @@ module Sensu
|
|
1040
1048
|
@transport.unsubscribe if @transport
|
1041
1049
|
end
|
1042
1050
|
|
1043
|
-
# Complete
|
1044
|
-
#
|
1045
|
-
#
|
1046
|
-
# called when handling is complete.
|
1051
|
+
# Complete in progress work and then call the provided callback.
|
1052
|
+
# This method will wait until all counters stored in the
|
1053
|
+
# `@in_progress` hash equal `0`.
|
1047
1054
|
#
|
1048
|
-
# @yield [] callback/block to call when
|
1049
|
-
#
|
1050
|
-
def
|
1051
|
-
@logger.info("completing
|
1052
|
-
:handling_event_count => @handling_event_count
|
1053
|
-
})
|
1055
|
+
# @yield [] callback/block to call when in progress work is
|
1056
|
+
# completed.
|
1057
|
+
def complete_in_progress
|
1058
|
+
@logger.info("completing work in progress", :in_progress => @in_progress)
|
1054
1059
|
retry_until_true do
|
1055
|
-
if @
|
1060
|
+
if @in_progress.values.all? { |count| count == 0 }
|
1056
1061
|
yield
|
1057
1062
|
true
|
1058
1063
|
end
|
@@ -1124,7 +1129,7 @@ module Sensu
|
|
1124
1129
|
@logger.warn("stopping")
|
1125
1130
|
pause
|
1126
1131
|
@state = :stopping
|
1127
|
-
|
1132
|
+
complete_in_progress do
|
1128
1133
|
@redis.close if @redis
|
1129
1134
|
@transport.close if @transport
|
1130
1135
|
super
|