sensu 0.23.3-java → 0.24.0.beta-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ require "sensu/settings/rules"
2
+ require "sensu/settings/validators/client"
3
+
4
+ module Sensu
5
+ module API
6
+ module Validators
7
+ # The error class for validation.
8
+ class Invalid < RuntimeError; end
9
+
10
+ class Client
11
+ # Include Sensu Settings rules and client validator.
12
+ include Sensu::Settings::Rules
13
+ include Sensu::Settings::Validators::Client
14
+
15
+ # Determine if a client definition is valid.
16
+ #
17
+ # @param client [Hash]
18
+ # @return [TrueClass, FalseClass]
19
+ def valid?(client)
20
+ validate_client(client)
21
+ true
22
+ rescue Invalid
23
+ false
24
+ end
25
+
26
+ private
27
+
28
+ # This method is called when `validate_client()` encounters an
29
+ # invalid definition object. This method raises an exception
30
+ # to be caught by `valid?()`.
31
+ def invalid(*arguments)
32
+ raise Invalid
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/sensu/cli.rb CHANGED
@@ -26,6 +26,9 @@ module Sensu
26
26
  opts.on("-d", "--config_dir DIR[,DIR]", "DIR or comma-delimited DIR list for Sensu JSON config files") do |dir|
27
27
  options[:config_dirs] = dir.split(",")
28
28
  end
29
+ opts.on("--validate_config", "Validate the compiled configuration and exit") do
30
+ options[:validate_config] = true
31
+ end
29
32
  opts.on("-P", "--print_config", "Print the compiled configuration and exit") do
30
33
  options[:print_config] = true
31
34
  end
@@ -99,12 +99,40 @@ module Sensu
99
99
  end
100
100
  end
101
101
 
102
+ # Perform token substitution for an object. String values are
103
+ # passed to `substitute_tokens()`, arrays and sub-hashes are
104
+ # processed recursively. Numeric values are ignored.
105
+ #
106
+ # @param object [Object]
107
+ # @return [Array] containing the updated object with substituted
108
+ # values and an array of unmatched tokens.
109
+ def object_substitute_tokens(object)
110
+ unmatched_tokens = []
111
+ case object
112
+ when Hash
113
+ object.each do |key, value|
114
+ object[key], unmatched = object_substitute_tokens(value)
115
+ unmatched_tokens.push(*unmatched)
116
+ end
117
+ when Array
118
+ object.map! do |value|
119
+ value, unmatched = object_substitute_tokens(value)
120
+ unmatched_tokens.push(*unmatched)
121
+ value
122
+ end
123
+ when String
124
+ object, unmatched_tokens = substitute_tokens(object, @settings[:client])
125
+ end
126
+ [object, unmatched_tokens.uniq]
127
+ end
128
+
102
129
  # Execute a check command, capturing its output (STDOUT/ERR),
103
130
  # exit status code, execution duration, timestamp, and publish
104
131
  # the result. This method guards against multiple executions for
105
- # the same check. Check command tokens are substituted with the
106
- # associated client attribute values. If there are unmatched
107
- # check command tokens, the check command will not be executed,
132
+ # the same check. Check attribute value tokens are substituted
133
+ # with the associated client attribute values, via
134
+ # `object_substitute_tokens()`. If there are unmatched check
135
+ # attribute value tokens, the check will not be executed,
108
136
  # instead a check result will be published reporting the
109
137
  # unmatched tokens.
110
138
  #
@@ -113,11 +141,11 @@ module Sensu
113
141
  @logger.debug("attempting to execute check command", :check => check)
114
142
  unless @checks_in_progress.include?(check[:name])
115
143
  @checks_in_progress << check[:name]
116
- command, unmatched_tokens = substitute_tokens(check[:command], @settings[:client])
144
+ check, unmatched_tokens = object_substitute_tokens(check)
117
145
  if unmatched_tokens.empty?
118
- check[:executed] = Time.now.to_i
119
146
  started = Time.now.to_f
120
- Spawn.process(command, :timeout => check[:timeout]) do |output, status|
147
+ check[:executed] = started.to_i
148
+ Spawn.process(check[:command], :timeout => check[:timeout]) do |output, status|
121
149
  check[:duration] = ("%.3f" % (Time.now.to_f - started)).to_f
122
150
  check[:output] = output
123
151
  check[:status] = status
@@ -1,7 +1,7 @@
1
1
  module Sensu
2
2
  unless defined?(Sensu::VERSION)
3
3
  # Sensu release version.
4
- VERSION = "0.23.3".freeze
4
+ VERSION = "0.24.0.beta".freeze
5
5
 
6
6
  # Sensu check severities.
7
7
  SEVERITIES = %w[ok warning critical unknown].freeze
data/lib/sensu/daemon.rb CHANGED
@@ -2,14 +2,14 @@ require "rubygems"
2
2
 
3
3
  gem "eventmachine", "1.2.0.1"
4
4
 
5
- gem "sensu-json", "1.1.1"
5
+ gem "sensu-json", "2.0.0"
6
6
  gem "sensu-logger", "1.2.0"
7
- gem "sensu-settings", "3.4.0"
7
+ gem "sensu-settings", "5.1.0"
8
8
  gem "sensu-extension", "1.5.0"
9
9
  gem "sensu-extensions", "1.5.0"
10
- gem "sensu-transport", "5.0.0"
10
+ gem "sensu-transport", "6.0.0"
11
11
  gem "sensu-spawn", "2.2.0"
12
- gem "sensu-redis", "1.3.0"
12
+ gem "sensu-redis", "1.4.0"
13
13
 
14
14
  require "time"
15
15
  require "uri"
@@ -49,6 +49,7 @@ module Sensu
49
49
  setup_logger(options)
50
50
  load_settings(options)
51
51
  load_extensions(options)
52
+ setup_spawn
52
53
  setup_process(options)
53
54
  end
54
55
 
@@ -64,56 +65,78 @@ module Sensu
64
65
  @logger.setup_signal_traps
65
66
  end
66
67
 
67
- # Log setting or extension loading concerns, sensitive information
68
+ # Log setting or extension loading notices, sensitive information
68
69
  # is redacted.
69
70
  #
70
- # @param concerns [Array] to be logged.
71
- # @param level [Symbol] to log the concerns at.
72
- def log_concerns(concerns=[], level=:warn)
73
- concerns.each do |concern|
71
+ # @param notices [Array] to be logged.
72
+ # @param level [Symbol] to log the notices at.
73
+ def log_notices(notices=[], level=:warn)
74
+ notices.each do |concern|
74
75
  message = concern.delete(:message)
75
76
  @logger.send(level, message, redact_sensitive(concern))
76
77
  end
77
78
  end
78
79
 
79
- # Print the Sensu settings and immediately exit the process. This
80
- # method is used while troubleshooting configuration issues,
81
- # triggered by a CLI argument, e.g. `--print_config`. Sensu
82
- # settings with sensitive values (e.g. passwords) are first
83
- # redacted.
80
+ # Determine if the Sensu settings are valid, if there are load or
81
+ # validation errors, and immediately exit the process with the
82
+ # appropriate exit status code. This method is used to determine
83
+ # if the latest configuration changes are valid prior to
84
+ # restarting the Sensu service, triggered by a CLI argument, e.g.
85
+ # `--validate_config`.
84
86
  #
85
87
  # @param settings [Object]
86
- def print_settings(settings)
88
+ def validate_settings!(settings)
89
+ if settings.errors.empty?
90
+ puts "configuration is valid"
91
+ exit
92
+ else
93
+ puts "configuration is invalid"
94
+ puts Sensu::JSON.dump({:errors => @settings.errors}, :pretty => true)
95
+ exit 2
96
+ end
97
+ end
98
+
99
+ # Print the Sensu settings (JSON) to STDOUT and immediately exit
100
+ # the process with the appropriate exit status code. This method
101
+ # is used while troubleshooting configuration issues, triggered by
102
+ # a CLI argument, e.g. `--print_config`. Sensu settings with
103
+ # sensitive values (e.g. passwords) are first redacted.
104
+ #
105
+ # @param settings [Object]
106
+ def print_settings!(settings)
87
107
  redacted_settings = redact_sensitive(settings.to_hash)
88
108
  @logger.warn("outputting compiled configuration and exiting")
89
109
  puts Sensu::JSON.dump(redacted_settings, :pretty => true)
90
- exit
110
+ exit(settings.errors.empty? ? 0 : 2)
91
111
  end
92
112
 
93
- # Load Sensu settings and validate them. If there are validation
94
- # failures, log them (concerns), then cause the Sensu process to
95
- # exit (2). This method creates the settings instance variable:
96
- # `@settings`. If the `print_config` option is true, this method
97
- # calls `print_settings()` to output the compiled configuration
98
- # settings and then exit the process.
113
+ # Load Sensu settings. This method creates the settings instance
114
+ # variable: `@settings`. If the `validate_config` option is true,
115
+ # this method calls `validate_settings!()` to validate the latest
116
+ # compiled configuration settings and will then exit the process.
117
+ # If the `print_config` option is true, this method calls
118
+ # `print_settings!()` to output the compiled configuration
119
+ # settings and will then exit the process. If there are loading or
120
+ # validation errors, they will be logged (notices), and this
121
+ # method will exit(2) the process.
122
+ #
99
123
  #
100
124
  # https://github.com/sensu/sensu-settings
101
125
  #
102
126
  # @param options [Hash]
103
127
  def load_settings(options={})
104
128
  @settings = Settings.get(options)
105
- log_concerns(@settings.warnings)
106
- failures = @settings.validate
107
- unless failures.empty?
108
- @logger.fatal("invalid settings")
109
- log_concerns(failures, :fatal)
129
+ validate_settings!(@settings) if options[:validate_config]
130
+ log_notices(@settings.warnings)
131
+ log_notices(@settings.errors, :fatal)
132
+ print_settings!(@settings) if options[:print_config]
133
+ unless @settings.errors.empty?
110
134
  @logger.fatal("SENSU NOT RUNNING!")
111
135
  exit 2
112
136
  end
113
- print_settings(@settings) if options[:print_config]
114
137
  end
115
138
 
116
- # Load Sensu extensions and log any concerns. Set the logger and
139
+ # Load Sensu extensions and log any notices. Set the logger and
117
140
  # settings for each extension instance. This method creates the
118
141
  # extensions instance variable: `@extensions`.
119
142
  #
@@ -123,7 +146,7 @@ module Sensu
123
146
  # @param options [Hash]
124
147
  def load_extensions(options={})
125
148
  @extensions = Extensions.get(options)
126
- log_concerns(@extensions.warnings)
149
+ log_notices(@extensions.warnings)
127
150
  extension_settings = @settings.to_hash.dup
128
151
  @extensions.all.each do |extension|
129
152
  extension.logger = @logger
@@ -131,6 +154,20 @@ module Sensu
131
154
  end
132
155
  end
133
156
 
157
+ # Set up Sensu spawn, creating a worker to create, control, and
158
+ # limit spawned child processes. This method adjusts the
159
+ # EventMachine thread pool size to accommodate the concurrent
160
+ # process spawn limit and other Sensu process operations.
161
+ #
162
+ # https://github.com/sensu/sensu-spawn
163
+ def setup_spawn
164
+ @logger.info("configuring sensu spawn", :settings => @settings[:sensu][:spawn])
165
+ threadpool_size = @settings[:sensu][:spawn][:limit] + 10
166
+ @logger.debug("setting eventmachine threadpool size", :size => threadpool_size)
167
+ EM.threadpool_size = threadpool_size
168
+ Spawn.setup(@settings[:sensu][:spawn])
169
+ end
170
+
134
171
  # Manage the current process, optionally daemonize and/or write
135
172
  # the current process ID to a PID file.
136
173
  #
@@ -345,14 +345,14 @@ module Sensu
345
345
  end
346
346
  if filter_message
347
347
  @logger.info(filter_message, details)
348
- @handling_event_count -= 1 if @handling_event_count
348
+ @in_progress[:events] -= 1 if @in_progress
349
349
  else
350
350
  event_filtered?(handler, event) do |filtered|
351
351
  unless filtered
352
352
  yield(event)
353
353
  else
354
354
  @logger.info("event was filtered", details)
355
- @handling_event_count -= 1 if @handling_event_count
355
+ @in_progress[:events] -= 1 if @in_progress
356
356
  end
357
357
  end
358
358
  end
@@ -4,7 +4,7 @@ module Sensu
4
4
  module Server
5
5
  module Handle
6
6
  # Create a handler error callback, for logging the error and
7
- # decrementing the `@handling_event_count` by `1`.
7
+ # decrementing the `@in_progress[:events]` by `1`.
8
8
  #
9
9
  # @param handler [Object]
10
10
  # @param event_data [Object]
@@ -16,14 +16,14 @@ module Sensu
16
16
  :event_data => event_data,
17
17
  :error => error.to_s
18
18
  })
19
- @handling_event_count -= 1 if @handling_event_count
19
+ @in_progress[:events] -= 1 if @in_progress
20
20
  end
21
21
  end
22
22
 
23
23
  # Execute a pipe event handler, using the defined handler
24
24
  # command to spawn a process, passing it event data via STDIN.
25
25
  # Log the handler output lines and decrement the
26
- # `@handling_event_count` by `1` when the handler executes
26
+ # `@in_progress[:events]` by `1` when the handler executes
27
27
  # successfully.
28
28
  #
29
29
  # @param handler [Hash] definition.
@@ -36,7 +36,7 @@ module Sensu
36
36
  :handler => handler,
37
37
  :output => output.split("\n+")
38
38
  })
39
- @handling_event_count -= 1 if @handling_event_count
39
+ @in_progress[:events] -= 1 if @in_progress
40
40
  end
41
41
  end
42
42
 
@@ -47,7 +47,7 @@ module Sensu
47
47
  # `handler_error()` method is used to create the `on_error`
48
48
  # callback for the connection handler. The `on_error` callback
49
49
  # is call in the event of any error(s). The
50
- # `@handling_event_count` is decremented by `1` when the data is
50
+ # `@in_progress[:events]` is decremented by `1` when the data is
51
51
  # transmitted successfully, `on_success`.
52
52
  #
53
53
  # @param handler [Hash] definition.
@@ -57,7 +57,7 @@ module Sensu
57
57
  begin
58
58
  EM::connect(handler[:socket][:host], handler[:socket][:port], Socket) do |socket|
59
59
  socket.on_success = Proc.new do
60
- @handling_event_count -= 1 if @handling_event_count
60
+ @in_progress[:events] -= 1 if @in_progress
61
61
  end
62
62
  socket.on_error = on_error
63
63
  timeout = handler[:timeout] || 10
@@ -71,7 +71,7 @@ module Sensu
71
71
  end
72
72
 
73
73
  # Transmit event data to a UDP socket, then close the
74
- # connection. The `@handling_event_count` is decremented by `1`
74
+ # connection. The `@in_progress[:events]` is decremented by `1`
75
75
  # when the data is assumed to have been transmitted.
76
76
  #
77
77
  # @param handler [Hash] definition.
@@ -81,7 +81,7 @@ module Sensu
81
81
  EM::open_datagram_socket("0.0.0.0", 0, nil) do |socket|
82
82
  socket.send_datagram(event_data.to_s, handler[:socket][:host], handler[:socket][:port])
83
83
  socket.close_connection_after_writing
84
- @handling_event_count -= 1 if @handling_event_count
84
+ @in_progress[:events] -= 1 if @in_progress
85
85
  end
86
86
  rescue => error
87
87
  handler_error(handler, event_data).call(error)
@@ -90,7 +90,7 @@ module Sensu
90
90
 
91
91
  # Publish event data to a Sensu transport pipe. Event data that
92
92
  # is `nil` or empty will not be published, to prevent transport
93
- # errors. The `@handling_event_count` is decremented by `1`,
93
+ # errors. The `@in_progress[:events]` is decremented by `1`,
94
94
  # even if the event data is not published.
95
95
  #
96
96
  # @param handler [Hash] definition.
@@ -105,14 +105,14 @@ module Sensu
105
105
  end
106
106
  end
107
107
  end
108
- @handling_event_count -= 1 if @handling_event_count
108
+ @in_progress[:events] -= 1 if @in_progress
109
109
  end
110
110
 
111
111
  # Run a handler extension, within the Sensu EventMachine reactor
112
112
  # (event loop). The extension API `safe_run()` method is used to
113
113
  # guard against most errors. The `safe_run()` callback is always
114
114
  # called, logging the extension run output and status, and
115
- # decrementing the `@handling_event_count` by `1`.
115
+ # decrementing the `@in_progress[:events]` by `1`.
116
116
  #
117
117
  # @param handler [Hash] definition.
118
118
  # @param event_data [Object] to pass to the handler extension.
@@ -123,7 +123,7 @@ module Sensu
123
123
  :output => output,
124
124
  :status => status
125
125
  })
126
- @handling_event_count -= 1 if @handling_event_count
126
+ @in_progress[:events] -= 1 if @in_progress
127
127
  end
128
128
  end
129
129
 
@@ -6,7 +6,7 @@ module Sensu
6
6
  # created callback can be used for standard mutators and mutator
7
7
  # extensions. The provided callback will only be called when the
8
8
  # mutator status is `0` (OK). If the status is not `0`, an error
9
- # is logged, and the `@handling_event_count` is decremented by
9
+ # is logged, and the `@in_progress[:events]` is decremented by
10
10
  # `1`.
11
11
  #
12
12
  # @param mutator [Object] definition or extension.
@@ -25,7 +25,7 @@ module Sensu
25
25
  :output => output,
26
26
  :status => status
27
27
  })
28
- @handling_event_count -= 1 if @handling_event_count
28
+ @in_progress[:events] -= 1 if @in_progress
29
29
  end
30
30
  end
31
31
  end
@@ -63,7 +63,7 @@ module Sensu
63
63
  # mutator is used, unless the handler specifies another mutator.
64
64
  # If a mutator does not exist, not defined or a missing
65
65
  # extension, an error will be logged and the
66
- # `@handling_event_count` is decremented by `1`. This method
66
+ # `@in_progress[:events]` is decremented by `1`. This method
67
67
  # first checks for the existence of a standard mutator, then
68
68
  # checks for an extension if a standard mutator is not defined.
69
69
  #
@@ -84,7 +84,7 @@ module Sensu
84
84
  @logger.error("unknown mutator", {
85
85
  :mutator_name => mutator_name
86
86
  })
87
- @handling_event_count -= 1 if @handling_event_count
87
+ @in_progress[:events] -= 1 if @in_progress
88
88
  end
89
89
  end
90
90
  end
@@ -11,7 +11,9 @@ module Sensu
11
11
  include Mutate
12
12
  include Handle
13
13
 
14
- attr_reader :is_leader, :handling_event_count
14
+ attr_reader :is_leader, :in_progress
15
+
16
+ STANDARD_CHECK_TYPE = "standard".freeze
15
17
 
16
18
  METRIC_CHECK_TYPE = "metric".freeze
17
19
 
@@ -40,17 +42,23 @@ module Sensu
40
42
  super
41
43
  @is_leader = false
42
44
  @timers[:leader] = Array.new
43
- @handling_event_count = 0
45
+ @in_progress = Hash.new(0)
44
46
  end
45
47
 
46
48
  # Set up the Redis and Transport connection objects, `@redis`
47
- # and `@transport`. This method "drys" up many instances of
48
- # `setup_redis()` and `setup_transport()`.
49
+ # and `@transport`. This method updates the Redis on error
50
+ # callback to reset the in progress check result counter. This
51
+ # method "drys" up many instances of `setup_redis()` and
52
+ # `setup_transport()`, particularly in the specs.
49
53
  #
50
54
  # @yield callback/block called after connecting to Redis and the
51
55
  # Sensu Transport.
52
56
  def setup_connections
53
57
  setup_redis do
58
+ @redis.on_error do |error|
59
+ @logger.error("redis connection error", :error => error.to_s)
60
+ @in_progress[:check_results] = 0
61
+ end
54
62
  setup_transport do
55
63
  yield
56
64
  end
@@ -90,15 +98,11 @@ module Sensu
90
98
  #
91
99
  # @param client [Hash] definition.
92
100
  def create_client_registration_event(client)
93
- event = {
94
- :id => random_uuid,
95
- :client => client,
96
- :check => create_registration_check(client),
97
- :occurrences => 1,
98
- :action => :create,
99
- :timestamp => Time.now.to_i
100
- }
101
- process_event(event)
101
+ check = create_registration_check(client)
102
+ create_event(client, check) do |event|
103
+ event_bridges(event)
104
+ process_event(event)
105
+ end
102
106
  end
103
107
 
104
108
  # Process an initial client registration, when it is first added
@@ -235,8 +239,8 @@ module Sensu
235
239
  #
236
240
  # This method determines the appropriate handlers for an event,
237
241
  # filtering and mutating the event data for each of them. The
238
- # `@handling_event_count` is incremented by `1`, for each event
239
- # handler chain (filter -> mutate -> handle).
242
+ # `@in_progress[:events]` counter is incremented by `1`, for
243
+ # each event handler chain (filter -> mutate -> handle).
240
244
  #
241
245
  # @param event [Hash]
242
246
  def process_event(event)
@@ -245,7 +249,7 @@ module Sensu
245
249
  handler_list = Array((event[:check][:handlers] || event[:check][:handler]) || DEFAULT_HANDLER_NAME)
246
250
  handlers = derive_handlers(handler_list)
247
251
  handlers.each do |handler|
248
- @handling_event_count += 1
252
+ @in_progress[:events] += 1
249
253
  filter_event(handler, event) do |event|
250
254
  mutate_event(handler, event) do |event_data|
251
255
  handle_event(handler, event_data)
@@ -271,35 +275,27 @@ module Sensu
271
275
  end
272
276
  end
273
277
 
274
- # Add a check result to an aggregate. A check aggregate uses the
275
- # check `:name` and the `:issued` timestamp as its unique
276
- # identifier. An aggregate uses several counters: the total
277
- # number of results in the aggregate, and a counter for each
278
- # check severity (ok, warning, etc). Check output is also
279
- # stored, to be summarized to aid in identifying outliers for a
280
- # check execution across a number of Sensu clients. JSON
281
- # serialization is used for storing check result data.
278
+ # Add a check result to an aggregate. The aggregate name is
279
+ # determined by the value of check `:aggregate`. If check
280
+ # `:aggregate` is `true` (legacy), the check `:name` is used as
281
+ # the aggregate name. If check `:aggregate` is a string, it is
282
+ # used as the aggregate name. This method will add the client
283
+ # name to the aggregate, all other processing (e.g. counters) is
284
+ # done by the Sensu API on request.
282
285
  #
283
286
  # @param client [Hash]
284
287
  # @param check [Hash]
285
288
  def aggregate_check_result(client, check)
289
+ aggregate = (check[:aggregate].is_a?(String) ? check[:aggregate] : check[:name])
286
290
  @logger.debug("adding check result to aggregate", {
291
+ :aggregate => aggregate,
287
292
  :client => client,
288
293
  :check => check
289
294
  })
290
- result_set = "#{check[:name]}:#{check[:issued]}"
291
- result_data = Sensu::JSON.dump(:output => check[:output], :status => check[:status])
292
- @redis.multi
293
- @redis.hset("aggregation:#{result_set}", client[:name], result_data)
294
- SEVERITIES.each do |severity|
295
- @redis.hsetnx("aggregate:#{result_set}", severity, 0)
295
+ aggregate_member = "#{client[:name]}:#{check[:name]}"
296
+ @redis.sadd("aggregates:#{aggregate}", aggregate_member) do
297
+ @redis.sadd("aggregates", aggregate)
296
298
  end
297
- severity = (SEVERITIES[check[:status]] || "unknown")
298
- @redis.hincrby("aggregate:#{result_set}", severity, 1)
299
- @redis.hincrby("aggregate:#{result_set}", "total", 1)
300
- @redis.sadd("aggregates:#{check[:name]}", check[:issued])
301
- @redis.sadd("aggregates", check[:name])
302
- @redis.exec
303
299
  end
304
300
 
305
301
  # Truncate check output. For metric checks, (`"type":
@@ -333,7 +329,7 @@ module Sensu
333
329
  # @param client [Hash]
334
330
  # @param check [Hash]
335
331
  # @yield [] callback/block called after the check data has been
336
- # stored (history, etc).
332
+ # stored (history, etc).
337
333
  def store_check_result(client, check)
338
334
  @logger.debug("storing check result", :check => check)
339
335
  result_key = "#{client[:name]}:#{check[:name]}"
@@ -342,6 +338,7 @@ module Sensu
342
338
  @redis.multi
343
339
  @redis.sadd("result:#{client[:name]}", check[:name])
344
340
  @redis.set("result:#{result_key}", Sensu::JSON.dump(check_truncated))
341
+ @redis.sadd("ttl", result_key) if check[:ttl]
345
342
  @redis.rpush(history_key, check[:status])
346
343
  @redis.ltrim(history_key, -21, -1)
347
344
  @redis.exec do
@@ -418,83 +415,106 @@ module Sensu
418
415
  end
419
416
 
420
417
  # Update the event registry, stored in Redis. This method
421
- # determines if check data results in the creation or update of
422
- # event data in the registry. Existing event data for a
423
- # client/check pair is fetched, used in conditionals and the
424
- # composition of the new event data. If a check `:status` is not
418
+ # determines if event data warrants in the creation or update of
419
+ # event data in the registry. If a check `:status` is not
425
420
  # `0`, or it has been flapping, an event is created/updated in
426
- # the registry. If there was existing event data, but the check
427
- # `:status` is now `0`, the event is removed (resolved) from the
428
- # registry. If the previous conditions are not met, and check
429
- # `:type` is `metric` and the `:status` is `0`, the event
430
- # registry is not updated, but the provided callback is called
431
- # with the event data. All event data is sent to event bridge
432
- # extensions, including events that do not normally produce an
433
- # action. JSON serialization is used when storing data in the
434
- # registry.
421
+ # the registry. If the event `:action` is `:resolve`, the event
422
+ # is removed (resolved) from the registry. If the previous
423
+ # conditions are not met and check `:type` is `metric`, the
424
+ # registry is not updated, but further event processing is
425
+ # required (`yield(true)`). JSON serialization is used when
426
+ # storing data in the registry.
427
+ #
428
+ # @param event [Hash]
429
+ # @yield callback [event] callback/block called after the event
430
+ # registry has been updated.
431
+ # @yieldparam process [TrueClass, FalseClass] indicating if the
432
+ # event requires further processing.
433
+ def update_event_registry(event)
434
+ client_name = event[:client][:name]
435
+ if event[:check][:status] != 0 || event[:action] == :flapping
436
+ @redis.hset("events:#{client_name}", event[:check][:name], Sensu::JSON.dump(event)) do
437
+ yield(true)
438
+ end
439
+ elsif event[:action] == :resolve &&
440
+ (event[:check][:auto_resolve] != false || event[:check][:force_resolve])
441
+ @redis.hdel("events:#{client_name}", event[:check][:name]) do
442
+ yield(true)
443
+ end
444
+ elsif event[:check][:type] == METRIC_CHECK_TYPE
445
+ yield(true)
446
+ else
447
+ yield(false)
448
+ end
449
+ end
450
+
451
+ # Create an event, using the provided client and check result
452
+ # data. Existing event data for the client/check pair is fetched
453
+ # from the event registry to be used in the composition of the
454
+ # new event.
435
455
  #
436
456
  # @param client [Hash]
437
457
  # @param check [Hash]
438
458
  # @yield callback [event] callback/block called with the
439
- # resulting event data if the event registry is updated, or
440
- # the check is of type `:metric`.
459
+ # resulting event.
441
460
  # @yieldparam event [Hash]
442
- def update_event_registry(client, check)
443
- @redis.hget("events:#{client[:name]}", check[:name]) do |event_json|
444
- stored_event = event_json ? Sensu::JSON.load(event_json) : nil
445
- flapping = check_flapping?(stored_event, check)
446
- event = {
447
- :id => random_uuid,
448
- :client => client,
449
- :check => check,
450
- :occurrences => 1,
451
- :action => (flapping ? :flapping : :create),
452
- :timestamp => Time.now.to_i
453
- }
454
- if check[:status] != 0 || flapping
455
- if stored_event && check[:status] == stored_event[:check][:status]
456
- event[:occurrences] = stored_event[:occurrences] + 1
457
- end
458
- @redis.hset("events:#{client[:name]}", check[:name], Sensu::JSON.dump(event)) do
459
- yield(event)
461
+ def create_event(client, check)
462
+ check_history(client, check) do |history, total_state_change|
463
+ check[:history] = history
464
+ check[:total_state_change] = total_state_change
465
+ @redis.hget("events:#{client[:name]}", check[:name]) do |event_json|
466
+ stored_event = event_json ? Sensu::JSON.load(event_json) : nil
467
+ flapping = check_flapping?(stored_event, check)
468
+ event = {
469
+ :client => client,
470
+ :check => check,
471
+ :occurrences => 1,
472
+ :action => (flapping ? :flapping : :create),
473
+ :timestamp => Time.now.to_i
474
+ }
475
+ if stored_event
476
+ event[:id] = stored_event[:id]
477
+ event[:last_state_change] = stored_event[:last_state_change]
478
+ event[:last_ok] = stored_event[:last_ok]
479
+ event[:occurrences] = stored_event[:occurrences]
480
+ else
481
+ event[:id] = random_uuid
460
482
  end
461
- elsif stored_event
462
- event[:occurrences] = stored_event[:occurrences]
463
- event[:action] = :resolve
464
- unless check[:auto_resolve] == false && !check[:force_resolve]
465
- @redis.hdel("events:#{client[:name]}", check[:name]) do
466
- yield(event)
483
+ if check[:status] != 0 || flapping
484
+ if history[-1] == history[-2]
485
+ event[:occurrences] += 1
486
+ else
487
+ event[:occurrences] = 1
488
+ event[:last_state_change] = event[:timestamp]
467
489
  end
490
+ elsif stored_event
491
+ event[:last_state_change] = event[:timestamp]
492
+ event[:action] = :resolve
493
+ end
494
+ if check[:status] == 0
495
+ event[:last_ok] = event[:timestamp]
468
496
  end
469
- elsif check[:type] == METRIC_CHECK_TYPE
470
497
  yield(event)
471
498
  end
472
- event_bridges(event)
473
499
  end
474
500
  end
475
501
 
476
- # Create a blank client (data) and add it to the client
477
- # registry. Only the client name is known, the other client
478
- # attributes must be updated via the API (POST /clients:client).
479
- # Dynamically created clients and those updated via the API will
480
- # have client keepalives disabled, `:keepalives` is set to
481
- # `false`.
502
+ # Create a blank client (data). Only the client name is known,
503
+ # the other client attributes must be updated via the API (POST
504
+ # /clients:client). Dynamically created clients and those
505
+ # updated via the API will have client keepalives disabled by
506
+ # default, `:keepalives` is set to `false`.
482
507
  #
483
- # @param name [Hash] to use for the client.
484
- # @yield [client] callback/block to be called with the
485
- # dynamically created client data.
486
- # @yieldparam client [Hash]
508
+ # @param name [String] to use for the client.
509
+ # @return [Hash] client.
487
510
  def create_client(name)
488
- client = {
511
+ {
489
512
  :name => name,
490
513
  :address => "unknown",
491
514
  :subscriptions => [],
492
515
  :keepalives => false,
493
516
  :version => VERSION
494
517
  }
495
- update_client_registry(client) do
496
- yield(client)
497
- end
498
518
  end
499
519
 
500
520
  # Retrieve a client (data) from Redis if it exists. If a client
@@ -527,7 +547,8 @@ module Sensu
527
547
  yield(client)
528
548
  end
529
549
  else
530
- create_client(client_key) do |client|
550
+ client = create_client(client_key)
551
+ update_client_registry(client) do
531
552
  yield(client)
532
553
  end
533
554
  end
@@ -536,14 +557,18 @@ module Sensu
536
557
 
537
558
  # Process a check result, storing its data, inspecting its
538
559
  # contents, and taking the appropriate actions (eg. update the
539
- # event registry). A check result must have a valid client name,
540
- # associated with a client in the registry or one will be
541
- # created. If a local check definition exists for the check
542
- # name, and the check result is not from a standalone check
543
- # execution, it's merged with the check result for more context.
560
+ # event registry). The `@in_progress[:check_results]` counter is
561
+ # incremented by `1` prior to check result processing and then
562
+ # decremented by `1` after updating the event registry. A check
563
+ # result must have a valid client name, associated with a client
564
+ # in the registry or one will be created. If a local check
565
+ # definition exists for the check name, and the check result is
566
+ # not from a standalone check execution, it's merged with the
567
+ # check result for more context.
544
568
  #
545
569
  # @param result [Hash] data.
546
570
  def process_check_result(result)
571
+ @in_progress[:check_results] += 1
547
572
  @logger.debug("processing result", :result => result)
548
573
  retrieve_client(result) do |client|
549
574
  check = case
@@ -552,13 +577,15 @@ module Sensu
552
577
  else
553
578
  result[:check]
554
579
  end
580
+ check[:type] ||= STANDARD_CHECK_TYPE
581
+ check[:origin] = result[:client] if check[:source]
555
582
  aggregate_check_result(client, check) if check[:aggregate]
556
583
  store_check_result(client, check) do
557
- check_history(client, check) do |history, total_state_change|
558
- check[:history] = history
559
- check[:total_state_change] = total_state_change
560
- update_event_registry(client, check) do |event|
561
- process_event(event)
584
+ create_event(client, check) do |event|
585
+ event_bridges(event)
586
+ update_event_registry(event) do |process|
587
+ process_event(event) if process
588
+ @in_progress[:check_results] -= 1
562
589
  end
563
590
  end
564
591
  end
@@ -755,45 +782,72 @@ module Sensu
755
782
  check.merge(:name => "keepalive", :issued => timestamp, :executed => timestamp)
756
783
  end
757
784
 
785
+ # Create client keepalive check results. This method will
786
+ # retrieve clients from the registry, creating a keepalive
787
+ # check definition for each client, using the
788
+ # `create_keepalive_check()` method, containing client specific
789
+ # keepalive thresholds. If the time since the latest keepalive
790
+ # is equal to or greater than a threshold, the check `:output`
791
+ # is set to a descriptive message, and `:status` is set to the
792
+ # appropriate non-zero value. If a client has been sending
793
+ # keepalives, `:output` and `:status` are set to indicate an OK
794
+ # state. The `publish_check_result()` method is used to publish
795
+ # the client keepalive check results.
796
+ #
797
+ # @param clients [Array] of client names.
798
+ # @yield [] callback/block called after the client keepalive
799
+ # check results have been created.
800
+ def create_client_keepalive_check_results(clients)
801
+ client_keys = clients.map { |client_name| "client:#{client_name}" }
802
+ @redis.mget(*client_keys) do |client_json_objects|
803
+ client_json_objects.each do |client_json|
804
+ unless client_json.nil?
805
+ client = Sensu::JSON.load(client_json)
806
+ next if client[:keepalives] == false
807
+ check = create_keepalive_check(client)
808
+ time_since_last_keepalive = Time.now.to_i - client[:timestamp]
809
+ check[:output] = "No keepalive sent from client for "
810
+ check[:output] << "#{time_since_last_keepalive} seconds"
811
+ case
812
+ when time_since_last_keepalive >= check[:thresholds][:critical]
813
+ check[:output] << " (>=#{check[:thresholds][:critical]})"
814
+ check[:status] = 2
815
+ when time_since_last_keepalive >= check[:thresholds][:warning]
816
+ check[:output] << " (>=#{check[:thresholds][:warning]})"
817
+ check[:status] = 1
818
+ else
819
+ check[:output] = "Keepalive sent from client "
820
+ check[:output] << "#{time_since_last_keepalive} seconds ago"
821
+ check[:status] = 0
822
+ end
823
+ publish_check_result(client[:name], check)
824
+ end
825
+ end
826
+ yield
827
+ end
828
+ end
829
+
758
830
  # Determine stale clients, those that have not sent a keepalive
759
- # in a specified amount of time (thresholds). This method
760
- # iterates through the client registry, creating a keepalive
761
- # check definition with the `create_keepalive_check()` method,
762
- # containing client specific staleness thresholds. If the time
763
- # since the latest keepalive is equal to or greater than a
764
- # threshold, the check `:output` is set to a descriptive
765
- # message, and `:status` is set to the appropriate non-zero
766
- # value. If a client has been sending keepalives, `:output` and
767
- # `:status` are set to indicate an OK state. A check result is
768
- # published for every client in the registry.
831
+ # in a specified amount of time. This method iterates through
832
+ # the client registry, creating a keepalive check result for
833
+ # each client. The `create_client_keepalive_check_results()`
834
+ # method is used to inspect and create keepalive check results
835
+ # for each slice of clients from the registry. A relatively
836
+ # small clients slice size (20) is used to reduce the number of
837
+ # clients inspected within a single tick of the EM reactor.
769
838
  def determine_stale_clients
770
839
  @logger.info("determining stale clients")
771
840
  @redis.smembers("clients") do |clients|
772
- clients.each do |client_name|
773
- @redis.get("client:#{client_name}") do |client_json|
774
- unless client_json.nil?
775
- client = Sensu::JSON.load(client_json)
776
- next if client[:keepalives] == false
777
- check = create_keepalive_check(client)
778
- time_since_last_keepalive = Time.now.to_i - client[:timestamp]
779
- check[:output] = "No keepalive sent from client for "
780
- check[:output] << "#{time_since_last_keepalive} seconds"
781
- case
782
- when time_since_last_keepalive >= check[:thresholds][:critical]
783
- check[:output] << " (>=#{check[:thresholds][:critical]})"
784
- check[:status] = 2
785
- when time_since_last_keepalive >= check[:thresholds][:warning]
786
- check[:output] << " (>=#{check[:thresholds][:warning]})"
787
- check[:status] = 1
788
- else
789
- check[:output] = "Keepalive sent from client "
790
- check[:output] << "#{time_since_last_keepalive} seconds ago"
791
- check[:status] = 0
792
- end
793
- publish_check_result(client[:name], check)
841
+ client_count = clients.length
842
+ keepalive_check_results = Proc.new do |slice_start, slice_size|
843
+ unless slice_start > client_count - 1
844
+ clients_slice = clients.slice(slice_start..slice_size)
845
+ create_client_keepalive_check_results(clients_slice) do
846
+ keepalive_check_results.call(slice_start + 20, slice_size + 20)
794
847
  end
795
848
  end
796
849
  end
850
+ keepalive_check_results.call(0, 19)
797
851
  end
798
852
  end
799
853
 
@@ -809,32 +863,29 @@ module Sensu
809
863
 
810
864
  # Determine stale check results, those that have not executed in
811
865
  # a specified amount of time (check TTL). This method iterates
812
- # through the client registry and check results for checks with
813
- # a defined TTL value (in seconds). If a check result has a
814
- # defined TTL, the time since last check execution (in seconds)
815
- # is calculated. If the time since last execution is equal to or
816
- # greater than the check TTL, a warning check result is
817
- # published with the appropriate check output.
866
+ # through stored check results that have a defined TTL value (in
867
+ # seconds). The time since last check execution (in seconds) is
868
+ # calculated for each check result. If the time since last
869
+ # execution is equal to or greater than the check TTL, a warning
870
+ # check result is published with the appropriate check output.
818
871
  def determine_stale_check_results
819
872
  @logger.info("determining stale check results")
820
- @redis.smembers("clients") do |clients|
821
- clients.each do |client_name|
822
- @redis.smembers("result:#{client_name}") do |checks|
823
- checks.each do |check_name|
824
- result_key = "#{client_name}:#{check_name}"
825
- @redis.get("result:#{result_key}") do |result_json|
826
- unless result_json.nil?
827
- check = Sensu::JSON.load(result_json)
828
- next unless check[:ttl] && check[:executed] && !check[:force_resolve]
829
- time_since_last_execution = Time.now.to_i - check[:executed]
830
- if time_since_last_execution >= check[:ttl]
831
- check[:output] = "Last check execution was "
832
- check[:output] << "#{time_since_last_execution} seconds ago"
833
- check[:status] = 1
834
- publish_check_result(client_name, check)
835
- end
836
- end
873
+ @redis.smembers("ttl") do |result_keys|
874
+ result_keys.each do |result_key|
875
+ @redis.get("result:#{result_key}") do |result_json|
876
+ unless result_json.nil?
877
+ check = Sensu::JSON.load(result_json)
878
+ next unless check[:ttl] && check[:executed] && !check[:force_resolve]
879
+ time_since_last_execution = Time.now.to_i - check[:executed]
880
+ if time_since_last_execution >= check[:ttl]
881
+ client_name = result_key.split(":").first
882
+ check[:output] = "Last check execution was "
883
+ check[:output] << "#{time_since_last_execution} seconds ago"
884
+ check[:status] = 1
885
+ publish_check_result(client_name, check)
837
886
  end
887
+ else
888
+ @redis.srem("ttl", result_key)
838
889
  end
839
890
  end
840
891
  end
@@ -851,48 +902,6 @@ module Sensu
851
902
  end
852
903
  end
853
904
 
854
- # Prune check result aggregations (aggregates). Sensu only
855
- # stores the 20 latest aggregations for a check, to keep the
856
- # amount of data stored to a minimum.
857
- def prune_check_result_aggregations
858
- @logger.info("pruning check result aggregations")
859
- @redis.smembers("aggregates") do |checks|
860
- checks.each do |check_name|
861
- @redis.smembers("aggregates:#{check_name}") do |aggregates|
862
- if aggregates.length > 20
863
- aggregates.sort!
864
- aggregates.take(aggregates.length - 20).each do |check_issued|
865
- result_set = "#{check_name}:#{check_issued}"
866
- @redis.multi
867
- @redis.srem("aggregates:#{check_name}", check_issued)
868
- @redis.del("aggregate:#{result_set}")
869
- @redis.del("aggregation:#{result_set}")
870
- @redis.exec do
871
- @logger.debug("pruned aggregation", {
872
- :check => {
873
- :name => check_name,
874
- :issued => check_issued
875
- }
876
- })
877
- end
878
- end
879
- end
880
- end
881
- end
882
- end
883
- end
884
-
885
- # Set up the check result aggregation pruner, using periodic
886
- # timer to run `prune_check_result_aggregations()` every 20
887
- # seconds. The timer is stored in the timers hash under
888
- # `:leader`.
889
- def setup_check_result_aggregation_pruner
890
- @logger.debug("pruning check result aggregations")
891
- @timers[:leader] << EM::PeriodicTimer.new(20) do
892
- prune_check_result_aggregations
893
- end
894
- end
895
-
896
905
  # Set up the leader duties, tasks only performed by a single
897
906
  # Sensu server at a time. The duties include publishing check
898
907
  # requests, monitoring for stale clients, and pruning check
@@ -901,7 +910,6 @@ module Sensu
901
910
  setup_check_request_publisher
902
911
  setup_client_monitor
903
912
  setup_check_result_monitor
904
- setup_check_result_aggregation_pruner
905
913
  end
906
914
 
907
915
  # Create a lock timestamp (integer), current time including
@@ -1040,19 +1048,16 @@ module Sensu
1040
1048
  @transport.unsubscribe if @transport
1041
1049
  end
1042
1050
 
1043
- # Complete event handling currently in progress. The
1044
- # `:handling_event_count` is used to determine if event handling
1045
- # is complete, when it is equal to `0`. The provided callback is
1046
- # called when handling is complete.
1051
+ # Complete in progress work and then call the provided callback.
1052
+ # This method will wait until all counters stored in the
1053
+ # `@in_progress` hash equal `0`.
1047
1054
  #
1048
- # @yield [] callback/block to call when event handling is
1049
- # complete.
1050
- def complete_event_handling
1051
- @logger.info("completing event handling in progress", {
1052
- :handling_event_count => @handling_event_count
1053
- })
1055
+ # @yield [] callback/block to call when in progress work is
1056
+ # completed.
1057
+ def complete_in_progress
1058
+ @logger.info("completing work in progress", :in_progress => @in_progress)
1054
1059
  retry_until_true do
1055
- if @handling_event_count == 0
1060
+ if @in_progress.values.all? { |count| count == 0 }
1056
1061
  yield
1057
1062
  true
1058
1063
  end
@@ -1124,7 +1129,7 @@ module Sensu
1124
1129
  @logger.warn("stopping")
1125
1130
  pause
1126
1131
  @state = :stopping
1127
- complete_event_handling do
1132
+ complete_in_progress do
1128
1133
  @redis.close if @redis
1129
1134
  @transport.close if @transport
1130
1135
  super