sensu 0.23.3-java → 0.24.0.beta-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+ require "sensu/settings/rules"
2
+ require "sensu/settings/validators/client"
3
+
4
+ module Sensu
5
+ module API
6
+ module Validators
7
+ # The error class for validation.
8
+ class Invalid < RuntimeError; end
9
+
10
+ class Client
11
+ # Include Sensu Settings rules and client validator.
12
+ include Sensu::Settings::Rules
13
+ include Sensu::Settings::Validators::Client
14
+
15
+ # Determine if a client definition is valid.
16
+ #
17
+ # @param client [Hash]
18
+ # @return [TrueClass, FalseClass]
19
+ def valid?(client)
20
+ validate_client(client)
21
+ true
22
+ rescue Invalid
23
+ false
24
+ end
25
+
26
+ private
27
+
28
+ # This method is called when `validate_client()` encounters an
29
+ # invalid definition object. This method raises an exception
30
+ # to be caught by `valid?()`.
31
+ def invalid(*arguments)
32
+ raise Invalid
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/sensu/cli.rb CHANGED
@@ -26,6 +26,9 @@ module Sensu
26
26
  opts.on("-d", "--config_dir DIR[,DIR]", "DIR or comma-delimited DIR list for Sensu JSON config files") do |dir|
27
27
  options[:config_dirs] = dir.split(",")
28
28
  end
29
+ opts.on("--validate_config", "Validate the compiled configuration and exit") do
30
+ options[:validate_config] = true
31
+ end
29
32
  opts.on("-P", "--print_config", "Print the compiled configuration and exit") do
30
33
  options[:print_config] = true
31
34
  end
@@ -99,12 +99,40 @@ module Sensu
99
99
  end
100
100
  end
101
101
 
102
+ # Perform token substitution for an object. String values are
103
+ # passed to `substitute_tokens()`, arrays and sub-hashes are
104
+ # processed recursively. Numeric values are ignored.
105
+ #
106
+ # @param object [Object]
107
+ # @return [Array] containing the updated object with substituted
108
+ # values and an array of unmatched tokens.
109
+ def object_substitute_tokens(object)
110
+ unmatched_tokens = []
111
+ case object
112
+ when Hash
113
+ object.each do |key, value|
114
+ object[key], unmatched = object_substitute_tokens(value)
115
+ unmatched_tokens.push(*unmatched)
116
+ end
117
+ when Array
118
+ object.map! do |value|
119
+ value, unmatched = object_substitute_tokens(value)
120
+ unmatched_tokens.push(*unmatched)
121
+ value
122
+ end
123
+ when String
124
+ object, unmatched_tokens = substitute_tokens(object, @settings[:client])
125
+ end
126
+ [object, unmatched_tokens.uniq]
127
+ end
128
+
102
129
  # Execute a check command, capturing its output (STDOUT/ERR),
103
130
  # exit status code, execution duration, timestamp, and publish
104
131
  # the result. This method guards against multiple executions for
105
- # the same check. Check command tokens are substituted with the
106
- # associated client attribute values. If there are unmatched
107
- # check command tokens, the check command will not be executed,
132
+ # the same check. Check attribute value tokens are substituted
133
+ # with the associated client attribute values, via
134
+ # `object_substitute_tokens()`. If there are unmatched check
135
+ # attribute value tokens, the check will not be executed,
108
136
  # instead a check result will be published reporting the
109
137
  # unmatched tokens.
110
138
  #
@@ -113,11 +141,11 @@ module Sensu
113
141
  @logger.debug("attempting to execute check command", :check => check)
114
142
  unless @checks_in_progress.include?(check[:name])
115
143
  @checks_in_progress << check[:name]
116
- command, unmatched_tokens = substitute_tokens(check[:command], @settings[:client])
144
+ check, unmatched_tokens = object_substitute_tokens(check)
117
145
  if unmatched_tokens.empty?
118
- check[:executed] = Time.now.to_i
119
146
  started = Time.now.to_f
120
- Spawn.process(command, :timeout => check[:timeout]) do |output, status|
147
+ check[:executed] = started.to_i
148
+ Spawn.process(check[:command], :timeout => check[:timeout]) do |output, status|
121
149
  check[:duration] = ("%.3f" % (Time.now.to_f - started)).to_f
122
150
  check[:output] = output
123
151
  check[:status] = status
@@ -1,7 +1,7 @@
1
1
  module Sensu
2
2
  unless defined?(Sensu::VERSION)
3
3
  # Sensu release version.
4
- VERSION = "0.23.3".freeze
4
+ VERSION = "0.24.0.beta".freeze
5
5
 
6
6
  # Sensu check severities.
7
7
  SEVERITIES = %w[ok warning critical unknown].freeze
data/lib/sensu/daemon.rb CHANGED
@@ -2,14 +2,14 @@ require "rubygems"
2
2
 
3
3
  gem "eventmachine", "1.2.0.1"
4
4
 
5
- gem "sensu-json", "1.1.1"
5
+ gem "sensu-json", "2.0.0"
6
6
  gem "sensu-logger", "1.2.0"
7
- gem "sensu-settings", "3.4.0"
7
+ gem "sensu-settings", "5.1.0"
8
8
  gem "sensu-extension", "1.5.0"
9
9
  gem "sensu-extensions", "1.5.0"
10
- gem "sensu-transport", "5.0.0"
10
+ gem "sensu-transport", "6.0.0"
11
11
  gem "sensu-spawn", "2.2.0"
12
- gem "sensu-redis", "1.3.0"
12
+ gem "sensu-redis", "1.4.0"
13
13
 
14
14
  require "time"
15
15
  require "uri"
@@ -49,6 +49,7 @@ module Sensu
49
49
  setup_logger(options)
50
50
  load_settings(options)
51
51
  load_extensions(options)
52
+ setup_spawn
52
53
  setup_process(options)
53
54
  end
54
55
 
@@ -64,56 +65,78 @@ module Sensu
64
65
  @logger.setup_signal_traps
65
66
  end
66
67
 
67
- # Log setting or extension loading concerns, sensitive information
68
+ # Log setting or extension loading notices, sensitive information
68
69
  # is redacted.
69
70
  #
70
- # @param concerns [Array] to be logged.
71
- # @param level [Symbol] to log the concerns at.
72
- def log_concerns(concerns=[], level=:warn)
73
- concerns.each do |concern|
71
+ # @param notices [Array] to be logged.
72
+ # @param level [Symbol] to log the notices at.
73
+ def log_notices(notices=[], level=:warn)
74
+ notices.each do |concern|
74
75
  message = concern.delete(:message)
75
76
  @logger.send(level, message, redact_sensitive(concern))
76
77
  end
77
78
  end
78
79
 
79
- # Print the Sensu settings and immediately exit the process. This
80
- # method is used while troubleshooting configuration issues,
81
- # triggered by a CLI argument, e.g. `--print_config`. Sensu
82
- # settings with sensitive values (e.g. passwords) are first
83
- # redacted.
80
+ # Determine if the Sensu settings are valid, if there are load or
81
+ # validation errors, and immediately exit the process with the
82
+ # appropriate exit status code. This method is used to determine
83
+ # if the latest configuration changes are valid prior to
84
+ # restarting the Sensu service, triggered by a CLI argument, e.g.
85
+ # `--validate_config`.
84
86
  #
85
87
  # @param settings [Object]
86
- def print_settings(settings)
88
+ def validate_settings!(settings)
89
+ if settings.errors.empty?
90
+ puts "configuration is valid"
91
+ exit
92
+ else
93
+ puts "configuration is invalid"
94
+ puts Sensu::JSON.dump({:errors => @settings.errors}, :pretty => true)
95
+ exit 2
96
+ end
97
+ end
98
+
99
+ # Print the Sensu settings (JSON) to STDOUT and immediately exit
100
+ # the process with the appropriate exit status code. This method
101
+ # is used while troubleshooting configuration issues, triggered by
102
+ # a CLI argument, e.g. `--print_config`. Sensu settings with
103
+ # sensitive values (e.g. passwords) are first redacted.
104
+ #
105
+ # @param settings [Object]
106
+ def print_settings!(settings)
87
107
  redacted_settings = redact_sensitive(settings.to_hash)
88
108
  @logger.warn("outputting compiled configuration and exiting")
89
109
  puts Sensu::JSON.dump(redacted_settings, :pretty => true)
90
- exit
110
+ exit(settings.errors.empty? ? 0 : 2)
91
111
  end
92
112
 
93
- # Load Sensu settings and validate them. If there are validation
94
- # failures, log them (concerns), then cause the Sensu process to
95
- # exit (2). This method creates the settings instance variable:
96
- # `@settings`. If the `print_config` option is true, this method
97
- # calls `print_settings()` to output the compiled configuration
98
- # settings and then exit the process.
113
+ # Load Sensu settings. This method creates the settings instance
114
+ # variable: `@settings`. If the `validate_config` option is true,
115
+ # this method calls `validate_settings!()` to validate the latest
116
+ # compiled configuration settings and will then exit the process.
117
+ # If the `print_config` option is true, this method calls
118
+ # `print_settings!()` to output the compiled configuration
119
+ # settings and will then exit the process. If there are loading or
120
+ # validation errors, they will be logged (notices), and this
121
+ # method will exit(2) the process.
122
+ #
99
123
  #
100
124
  # https://github.com/sensu/sensu-settings
101
125
  #
102
126
  # @param options [Hash]
103
127
  def load_settings(options={})
104
128
  @settings = Settings.get(options)
105
- log_concerns(@settings.warnings)
106
- failures = @settings.validate
107
- unless failures.empty?
108
- @logger.fatal("invalid settings")
109
- log_concerns(failures, :fatal)
129
+ validate_settings!(@settings) if options[:validate_config]
130
+ log_notices(@settings.warnings)
131
+ log_notices(@settings.errors, :fatal)
132
+ print_settings!(@settings) if options[:print_config]
133
+ unless @settings.errors.empty?
110
134
  @logger.fatal("SENSU NOT RUNNING!")
111
135
  exit 2
112
136
  end
113
- print_settings(@settings) if options[:print_config]
114
137
  end
115
138
 
116
- # Load Sensu extensions and log any concerns. Set the logger and
139
+ # Load Sensu extensions and log any notices. Set the logger and
117
140
  # settings for each extension instance. This method creates the
118
141
  # extensions instance variable: `@extensions`.
119
142
  #
@@ -123,7 +146,7 @@ module Sensu
123
146
  # @param options [Hash]
124
147
  def load_extensions(options={})
125
148
  @extensions = Extensions.get(options)
126
- log_concerns(@extensions.warnings)
149
+ log_notices(@extensions.warnings)
127
150
  extension_settings = @settings.to_hash.dup
128
151
  @extensions.all.each do |extension|
129
152
  extension.logger = @logger
@@ -131,6 +154,20 @@ module Sensu
131
154
  end
132
155
  end
133
156
 
157
+ # Set up Sensu spawn, creating a worker to create, control, and
158
+ # limit spawned child processes. This method adjusts the
159
+ # EventMachine thread pool size to accommodate the concurrent
160
+ # process spawn limit and other Sensu process operations.
161
+ #
162
+ # https://github.com/sensu/sensu-spawn
163
+ def setup_spawn
164
+ @logger.info("configuring sensu spawn", :settings => @settings[:sensu][:spawn])
165
+ threadpool_size = @settings[:sensu][:spawn][:limit] + 10
166
+ @logger.debug("setting eventmachine threadpool size", :size => threadpool_size)
167
+ EM.threadpool_size = threadpool_size
168
+ Spawn.setup(@settings[:sensu][:spawn])
169
+ end
170
+
134
171
  # Manage the current process, optionally daemonize and/or write
135
172
  # the current process ID to a PID file.
136
173
  #
@@ -345,14 +345,14 @@ module Sensu
345
345
  end
346
346
  if filter_message
347
347
  @logger.info(filter_message, details)
348
- @handling_event_count -= 1 if @handling_event_count
348
+ @in_progress[:events] -= 1 if @in_progress
349
349
  else
350
350
  event_filtered?(handler, event) do |filtered|
351
351
  unless filtered
352
352
  yield(event)
353
353
  else
354
354
  @logger.info("event was filtered", details)
355
- @handling_event_count -= 1 if @handling_event_count
355
+ @in_progress[:events] -= 1 if @in_progress
356
356
  end
357
357
  end
358
358
  end
@@ -4,7 +4,7 @@ module Sensu
4
4
  module Server
5
5
  module Handle
6
6
  # Create a handler error callback, for logging the error and
7
- # decrementing the `@handling_event_count` by `1`.
7
+ # decrementing the `@in_progress[:events]` by `1`.
8
8
  #
9
9
  # @param handler [Object]
10
10
  # @param event_data [Object]
@@ -16,14 +16,14 @@ module Sensu
16
16
  :event_data => event_data,
17
17
  :error => error.to_s
18
18
  })
19
- @handling_event_count -= 1 if @handling_event_count
19
+ @in_progress[:events] -= 1 if @in_progress
20
20
  end
21
21
  end
22
22
 
23
23
  # Execute a pipe event handler, using the defined handler
24
24
  # command to spawn a process, passing it event data via STDIN.
25
25
  # Log the handler output lines and decrement the
26
- # `@handling_event_count` by `1` when the handler executes
26
+ # `@in_progress[:events]` by `1` when the handler executes
27
27
  # successfully.
28
28
  #
29
29
  # @param handler [Hash] definition.
@@ -36,7 +36,7 @@ module Sensu
36
36
  :handler => handler,
37
37
  :output => output.split("\n+")
38
38
  })
39
- @handling_event_count -= 1 if @handling_event_count
39
+ @in_progress[:events] -= 1 if @in_progress
40
40
  end
41
41
  end
42
42
 
@@ -47,7 +47,7 @@ module Sensu
47
47
  # `handler_error()` method is used to create the `on_error`
48
48
  # callback for the connection handler. The `on_error` callback
49
49
  # is call in the event of any error(s). The
50
- # `@handling_event_count` is decremented by `1` when the data is
50
+ # `@in_progress[:events]` is decremented by `1` when the data is
51
51
  # transmitted successfully, `on_success`.
52
52
  #
53
53
  # @param handler [Hash] definition.
@@ -57,7 +57,7 @@ module Sensu
57
57
  begin
58
58
  EM::connect(handler[:socket][:host], handler[:socket][:port], Socket) do |socket|
59
59
  socket.on_success = Proc.new do
60
- @handling_event_count -= 1 if @handling_event_count
60
+ @in_progress[:events] -= 1 if @in_progress
61
61
  end
62
62
  socket.on_error = on_error
63
63
  timeout = handler[:timeout] || 10
@@ -71,7 +71,7 @@ module Sensu
71
71
  end
72
72
 
73
73
  # Transmit event data to a UDP socket, then close the
74
- # connection. The `@handling_event_count` is decremented by `1`
74
+ # connection. The `@in_progress[:events]` is decremented by `1`
75
75
  # when the data is assumed to have been transmitted.
76
76
  #
77
77
  # @param handler [Hash] definition.
@@ -81,7 +81,7 @@ module Sensu
81
81
  EM::open_datagram_socket("0.0.0.0", 0, nil) do |socket|
82
82
  socket.send_datagram(event_data.to_s, handler[:socket][:host], handler[:socket][:port])
83
83
  socket.close_connection_after_writing
84
- @handling_event_count -= 1 if @handling_event_count
84
+ @in_progress[:events] -= 1 if @in_progress
85
85
  end
86
86
  rescue => error
87
87
  handler_error(handler, event_data).call(error)
@@ -90,7 +90,7 @@ module Sensu
90
90
 
91
91
  # Publish event data to a Sensu transport pipe. Event data that
92
92
  # is `nil` or empty will not be published, to prevent transport
93
- # errors. The `@handling_event_count` is decremented by `1`,
93
+ # errors. The `@in_progress[:events]` is decremented by `1`,
94
94
  # even if the event data is not published.
95
95
  #
96
96
  # @param handler [Hash] definition.
@@ -105,14 +105,14 @@ module Sensu
105
105
  end
106
106
  end
107
107
  end
108
- @handling_event_count -= 1 if @handling_event_count
108
+ @in_progress[:events] -= 1 if @in_progress
109
109
  end
110
110
 
111
111
  # Run a handler extension, within the Sensu EventMachine reactor
112
112
  # (event loop). The extension API `safe_run()` method is used to
113
113
  # guard against most errors. The `safe_run()` callback is always
114
114
  # called, logging the extension run output and status, and
115
- # decrementing the `@handling_event_count` by `1`.
115
+ # decrementing the `@in_progress[:events]` by `1`.
116
116
  #
117
117
  # @param handler [Hash] definition.
118
118
  # @param event_data [Object] to pass to the handler extension.
@@ -123,7 +123,7 @@ module Sensu
123
123
  :output => output,
124
124
  :status => status
125
125
  })
126
- @handling_event_count -= 1 if @handling_event_count
126
+ @in_progress[:events] -= 1 if @in_progress
127
127
  end
128
128
  end
129
129
 
@@ -6,7 +6,7 @@ module Sensu
6
6
  # created callback can be used for standard mutators and mutator
7
7
  # extensions. The provided callback will only be called when the
8
8
  # mutator status is `0` (OK). If the status is not `0`, an error
9
- # is logged, and the `@handling_event_count` is decremented by
9
+ # is logged, and the `@in_progress[:events]` is decremented by
10
10
  # `1`.
11
11
  #
12
12
  # @param mutator [Object] definition or extension.
@@ -25,7 +25,7 @@ module Sensu
25
25
  :output => output,
26
26
  :status => status
27
27
  })
28
- @handling_event_count -= 1 if @handling_event_count
28
+ @in_progress[:events] -= 1 if @in_progress
29
29
  end
30
30
  end
31
31
  end
@@ -63,7 +63,7 @@ module Sensu
63
63
  # mutator is used, unless the handler specifies another mutator.
64
64
  # If a mutator does not exist, not defined or a missing
65
65
  # extension, an error will be logged and the
66
- # `@handling_event_count` is decremented by `1`. This method
66
+ # `@in_progress[:events]` is decremented by `1`. This method
67
67
  # first checks for the existence of a standard mutator, then
68
68
  # checks for an extension if a standard mutator is not defined.
69
69
  #
@@ -84,7 +84,7 @@ module Sensu
84
84
  @logger.error("unknown mutator", {
85
85
  :mutator_name => mutator_name
86
86
  })
87
- @handling_event_count -= 1 if @handling_event_count
87
+ @in_progress[:events] -= 1 if @in_progress
88
88
  end
89
89
  end
90
90
  end
@@ -11,7 +11,9 @@ module Sensu
11
11
  include Mutate
12
12
  include Handle
13
13
 
14
- attr_reader :is_leader, :handling_event_count
14
+ attr_reader :is_leader, :in_progress
15
+
16
+ STANDARD_CHECK_TYPE = "standard".freeze
15
17
 
16
18
  METRIC_CHECK_TYPE = "metric".freeze
17
19
 
@@ -40,17 +42,23 @@ module Sensu
40
42
  super
41
43
  @is_leader = false
42
44
  @timers[:leader] = Array.new
43
- @handling_event_count = 0
45
+ @in_progress = Hash.new(0)
44
46
  end
45
47
 
46
48
  # Set up the Redis and Transport connection objects, `@redis`
47
- # and `@transport`. This method "drys" up many instances of
48
- # `setup_redis()` and `setup_transport()`.
49
+ # and `@transport`. This method updates the Redis on error
50
+ # callback to reset the in progress check result counter. This
51
+ # method "drys" up many instances of `setup_redis()` and
52
+ # `setup_transport()`, particularly in the specs.
49
53
  #
50
54
  # @yield callback/block called after connecting to Redis and the
51
55
  # Sensu Transport.
52
56
  def setup_connections
53
57
  setup_redis do
58
+ @redis.on_error do |error|
59
+ @logger.error("redis connection error", :error => error.to_s)
60
+ @in_progress[:check_results] = 0
61
+ end
54
62
  setup_transport do
55
63
  yield
56
64
  end
@@ -90,15 +98,11 @@ module Sensu
90
98
  #
91
99
  # @param client [Hash] definition.
92
100
  def create_client_registration_event(client)
93
- event = {
94
- :id => random_uuid,
95
- :client => client,
96
- :check => create_registration_check(client),
97
- :occurrences => 1,
98
- :action => :create,
99
- :timestamp => Time.now.to_i
100
- }
101
- process_event(event)
101
+ check = create_registration_check(client)
102
+ create_event(client, check) do |event|
103
+ event_bridges(event)
104
+ process_event(event)
105
+ end
102
106
  end
103
107
 
104
108
  # Process an initial client registration, when it is first added
@@ -235,8 +239,8 @@ module Sensu
235
239
  #
236
240
  # This method determines the appropriate handlers for an event,
237
241
  # filtering and mutating the event data for each of them. The
238
- # `@handling_event_count` is incremented by `1`, for each event
239
- # handler chain (filter -> mutate -> handle).
242
+ # `@in_progress[:events]` counter is incremented by `1`, for
243
+ # each event handler chain (filter -> mutate -> handle).
240
244
  #
241
245
  # @param event [Hash]
242
246
  def process_event(event)
@@ -245,7 +249,7 @@ module Sensu
245
249
  handler_list = Array((event[:check][:handlers] || event[:check][:handler]) || DEFAULT_HANDLER_NAME)
246
250
  handlers = derive_handlers(handler_list)
247
251
  handlers.each do |handler|
248
- @handling_event_count += 1
252
+ @in_progress[:events] += 1
249
253
  filter_event(handler, event) do |event|
250
254
  mutate_event(handler, event) do |event_data|
251
255
  handle_event(handler, event_data)
@@ -271,35 +275,27 @@ module Sensu
271
275
  end
272
276
  end
273
277
 
274
- # Add a check result to an aggregate. A check aggregate uses the
275
- # check `:name` and the `:issued` timestamp as its unique
276
- # identifier. An aggregate uses several counters: the total
277
- # number of results in the aggregate, and a counter for each
278
- # check severity (ok, warning, etc). Check output is also
279
- # stored, to be summarized to aid in identifying outliers for a
280
- # check execution across a number of Sensu clients. JSON
281
- # serialization is used for storing check result data.
278
+ # Add a check result to an aggregate. The aggregate name is
279
+ # determined by the value of check `:aggregate`. If check
280
+ # `:aggregate` is `true` (legacy), the check `:name` is used as
281
+ # the aggregate name. If check `:aggregate` is a string, it is
282
+ # used as the aggregate name. This method will add the client
283
+ # name to the aggregate, all other processing (e.g. counters) is
284
+ # done by the Sensu API on request.
282
285
  #
283
286
  # @param client [Hash]
284
287
  # @param check [Hash]
285
288
  def aggregate_check_result(client, check)
289
+ aggregate = (check[:aggregate].is_a?(String) ? check[:aggregate] : check[:name])
286
290
  @logger.debug("adding check result to aggregate", {
291
+ :aggregate => aggregate,
287
292
  :client => client,
288
293
  :check => check
289
294
  })
290
- result_set = "#{check[:name]}:#{check[:issued]}"
291
- result_data = Sensu::JSON.dump(:output => check[:output], :status => check[:status])
292
- @redis.multi
293
- @redis.hset("aggregation:#{result_set}", client[:name], result_data)
294
- SEVERITIES.each do |severity|
295
- @redis.hsetnx("aggregate:#{result_set}", severity, 0)
295
+ aggregate_member = "#{client[:name]}:#{check[:name]}"
296
+ @redis.sadd("aggregates:#{aggregate}", aggregate_member) do
297
+ @redis.sadd("aggregates", aggregate)
296
298
  end
297
- severity = (SEVERITIES[check[:status]] || "unknown")
298
- @redis.hincrby("aggregate:#{result_set}", severity, 1)
299
- @redis.hincrby("aggregate:#{result_set}", "total", 1)
300
- @redis.sadd("aggregates:#{check[:name]}", check[:issued])
301
- @redis.sadd("aggregates", check[:name])
302
- @redis.exec
303
299
  end
304
300
 
305
301
  # Truncate check output. For metric checks, (`"type":
@@ -333,7 +329,7 @@ module Sensu
333
329
  # @param client [Hash]
334
330
  # @param check [Hash]
335
331
  # @yield [] callback/block called after the check data has been
336
- # stored (history, etc).
332
+ # stored (history, etc).
337
333
  def store_check_result(client, check)
338
334
  @logger.debug("storing check result", :check => check)
339
335
  result_key = "#{client[:name]}:#{check[:name]}"
@@ -342,6 +338,7 @@ module Sensu
342
338
  @redis.multi
343
339
  @redis.sadd("result:#{client[:name]}", check[:name])
344
340
  @redis.set("result:#{result_key}", Sensu::JSON.dump(check_truncated))
341
+ @redis.sadd("ttl", result_key) if check[:ttl]
345
342
  @redis.rpush(history_key, check[:status])
346
343
  @redis.ltrim(history_key, -21, -1)
347
344
  @redis.exec do
@@ -418,83 +415,106 @@ module Sensu
418
415
  end
419
416
 
420
417
  # Update the event registry, stored in Redis. This method
421
- # determines if check data results in the creation or update of
422
- # event data in the registry. Existing event data for a
423
- # client/check pair is fetched, used in conditionals and the
424
- # composition of the new event data. If a check `:status` is not
418
+ # determines if event data warrants in the creation or update of
419
+ # event data in the registry. If a check `:status` is not
425
420
  # `0`, or it has been flapping, an event is created/updated in
426
- # the registry. If there was existing event data, but the check
427
- # `:status` is now `0`, the event is removed (resolved) from the
428
- # registry. If the previous conditions are not met, and check
429
- # `:type` is `metric` and the `:status` is `0`, the event
430
- # registry is not updated, but the provided callback is called
431
- # with the event data. All event data is sent to event bridge
432
- # extensions, including events that do not normally produce an
433
- # action. JSON serialization is used when storing data in the
434
- # registry.
421
+ # the registry. If the event `:action` is `:resolve`, the event
422
+ # is removed (resolved) from the registry. If the previous
423
+ # conditions are not met and check `:type` is `metric`, the
424
+ # registry is not updated, but further event processing is
425
+ # required (`yield(true)`). JSON serialization is used when
426
+ # storing data in the registry.
427
+ #
428
+ # @param event [Hash]
429
+ # @yield callback [event] callback/block called after the event
430
+ # registry has been updated.
431
+ # @yieldparam process [TrueClass, FalseClass] indicating if the
432
+ # event requires further processing.
433
+ def update_event_registry(event)
434
+ client_name = event[:client][:name]
435
+ if event[:check][:status] != 0 || event[:action] == :flapping
436
+ @redis.hset("events:#{client_name}", event[:check][:name], Sensu::JSON.dump(event)) do
437
+ yield(true)
438
+ end
439
+ elsif event[:action] == :resolve &&
440
+ (event[:check][:auto_resolve] != false || event[:check][:force_resolve])
441
+ @redis.hdel("events:#{client_name}", event[:check][:name]) do
442
+ yield(true)
443
+ end
444
+ elsif event[:check][:type] == METRIC_CHECK_TYPE
445
+ yield(true)
446
+ else
447
+ yield(false)
448
+ end
449
+ end
450
+
451
+ # Create an event, using the provided client and check result
452
+ # data. Existing event data for the client/check pair is fetched
453
+ # from the event registry to be used in the composition of the
454
+ # new event.
435
455
  #
436
456
  # @param client [Hash]
437
457
  # @param check [Hash]
438
458
  # @yield callback [event] callback/block called with the
439
- # resulting event data if the event registry is updated, or
440
- # the check is of type `:metric`.
459
+ # resulting event.
441
460
  # @yieldparam event [Hash]
442
- def update_event_registry(client, check)
443
- @redis.hget("events:#{client[:name]}", check[:name]) do |event_json|
444
- stored_event = event_json ? Sensu::JSON.load(event_json) : nil
445
- flapping = check_flapping?(stored_event, check)
446
- event = {
447
- :id => random_uuid,
448
- :client => client,
449
- :check => check,
450
- :occurrences => 1,
451
- :action => (flapping ? :flapping : :create),
452
- :timestamp => Time.now.to_i
453
- }
454
- if check[:status] != 0 || flapping
455
- if stored_event && check[:status] == stored_event[:check][:status]
456
- event[:occurrences] = stored_event[:occurrences] + 1
457
- end
458
- @redis.hset("events:#{client[:name]}", check[:name], Sensu::JSON.dump(event)) do
459
- yield(event)
461
+ def create_event(client, check)
462
+ check_history(client, check) do |history, total_state_change|
463
+ check[:history] = history
464
+ check[:total_state_change] = total_state_change
465
+ @redis.hget("events:#{client[:name]}", check[:name]) do |event_json|
466
+ stored_event = event_json ? Sensu::JSON.load(event_json) : nil
467
+ flapping = check_flapping?(stored_event, check)
468
+ event = {
469
+ :client => client,
470
+ :check => check,
471
+ :occurrences => 1,
472
+ :action => (flapping ? :flapping : :create),
473
+ :timestamp => Time.now.to_i
474
+ }
475
+ if stored_event
476
+ event[:id] = stored_event[:id]
477
+ event[:last_state_change] = stored_event[:last_state_change]
478
+ event[:last_ok] = stored_event[:last_ok]
479
+ event[:occurrences] = stored_event[:occurrences]
480
+ else
481
+ event[:id] = random_uuid
460
482
  end
461
- elsif stored_event
462
- event[:occurrences] = stored_event[:occurrences]
463
- event[:action] = :resolve
464
- unless check[:auto_resolve] == false && !check[:force_resolve]
465
- @redis.hdel("events:#{client[:name]}", check[:name]) do
466
- yield(event)
483
+ if check[:status] != 0 || flapping
484
+ if history[-1] == history[-2]
485
+ event[:occurrences] += 1
486
+ else
487
+ event[:occurrences] = 1
488
+ event[:last_state_change] = event[:timestamp]
467
489
  end
490
+ elsif stored_event
491
+ event[:last_state_change] = event[:timestamp]
492
+ event[:action] = :resolve
493
+ end
494
+ if check[:status] == 0
495
+ event[:last_ok] = event[:timestamp]
468
496
  end
469
- elsif check[:type] == METRIC_CHECK_TYPE
470
497
  yield(event)
471
498
  end
472
- event_bridges(event)
473
499
  end
474
500
  end
475
501
 
476
- # Create a blank client (data) and add it to the client
477
- # registry. Only the client name is known, the other client
478
- # attributes must be updated via the API (POST /clients:client).
479
- # Dynamically created clients and those updated via the API will
480
- # have client keepalives disabled, `:keepalives` is set to
481
- # `false`.
502
+ # Create a blank client (data). Only the client name is known,
503
+ # the other client attributes must be updated via the API (POST
504
+ # /clients:client). Dynamically created clients and those
505
+ # updated via the API will have client keepalives disabled by
506
+ # default, `:keepalives` is set to `false`.
482
507
  #
483
- # @param name [Hash] to use for the client.
484
- # @yield [client] callback/block to be called with the
485
- # dynamically created client data.
486
- # @yieldparam client [Hash]
508
+ # @param name [String] to use for the client.
509
+ # @return [Hash] client.
487
510
  def create_client(name)
488
- client = {
511
+ {
489
512
  :name => name,
490
513
  :address => "unknown",
491
514
  :subscriptions => [],
492
515
  :keepalives => false,
493
516
  :version => VERSION
494
517
  }
495
- update_client_registry(client) do
496
- yield(client)
497
- end
498
518
  end
499
519
 
500
520
  # Retrieve a client (data) from Redis if it exists. If a client
@@ -527,7 +547,8 @@ module Sensu
527
547
  yield(client)
528
548
  end
529
549
  else
530
- create_client(client_key) do |client|
550
+ client = create_client(client_key)
551
+ update_client_registry(client) do
531
552
  yield(client)
532
553
  end
533
554
  end
@@ -536,14 +557,18 @@ module Sensu
536
557
 
537
558
  # Process a check result, storing its data, inspecting its
538
559
  # contents, and taking the appropriate actions (eg. update the
539
- # event registry). A check result must have a valid client name,
540
- # associated with a client in the registry or one will be
541
- # created. If a local check definition exists for the check
542
- # name, and the check result is not from a standalone check
543
- # execution, it's merged with the check result for more context.
560
+ # event registry). The `@in_progress[:check_results]` counter is
561
+ # incremented by `1` prior to check result processing and then
562
+ # decremented by `1` after updating the event registry. A check
563
+ # result must have a valid client name, associated with a client
564
+ # in the registry or one will be created. If a local check
565
+ # definition exists for the check name, and the check result is
566
+ # not from a standalone check execution, it's merged with the
567
+ # check result for more context.
544
568
  #
545
569
  # @param result [Hash] data.
546
570
  def process_check_result(result)
571
+ @in_progress[:check_results] += 1
547
572
  @logger.debug("processing result", :result => result)
548
573
  retrieve_client(result) do |client|
549
574
  check = case
@@ -552,13 +577,15 @@ module Sensu
552
577
  else
553
578
  result[:check]
554
579
  end
580
+ check[:type] ||= STANDARD_CHECK_TYPE
581
+ check[:origin] = result[:client] if check[:source]
555
582
  aggregate_check_result(client, check) if check[:aggregate]
556
583
  store_check_result(client, check) do
557
- check_history(client, check) do |history, total_state_change|
558
- check[:history] = history
559
- check[:total_state_change] = total_state_change
560
- update_event_registry(client, check) do |event|
561
- process_event(event)
584
+ create_event(client, check) do |event|
585
+ event_bridges(event)
586
+ update_event_registry(event) do |process|
587
+ process_event(event) if process
588
+ @in_progress[:check_results] -= 1
562
589
  end
563
590
  end
564
591
  end
@@ -755,45 +782,72 @@ module Sensu
755
782
  check.merge(:name => "keepalive", :issued => timestamp, :executed => timestamp)
756
783
  end
757
784
 
785
+ # Create client keepalive check results. This method will
786
+ # retrieve clients from the registry, creating a keepalive
787
+ # check definition for each client, using the
788
+ # `create_keepalive_check()` method, containing client specific
789
+ # keepalive thresholds. If the time since the latest keepalive
790
+ # is equal to or greater than a threshold, the check `:output`
791
+ # is set to a descriptive message, and `:status` is set to the
792
+ # appropriate non-zero value. If a client has been sending
793
+ # keepalives, `:output` and `:status` are set to indicate an OK
794
+ # state. The `publish_check_result()` method is used to publish
795
+ # the client keepalive check results.
796
+ #
797
+ # @param clients [Array] of client names.
798
+ # @yield [] callback/block called after the client keepalive
799
+ # check results have been created.
800
+ def create_client_keepalive_check_results(clients)
801
+ client_keys = clients.map { |client_name| "client:#{client_name}" }
802
+ @redis.mget(*client_keys) do |client_json_objects|
803
+ client_json_objects.each do |client_json|
804
+ unless client_json.nil?
805
+ client = Sensu::JSON.load(client_json)
806
+ next if client[:keepalives] == false
807
+ check = create_keepalive_check(client)
808
+ time_since_last_keepalive = Time.now.to_i - client[:timestamp]
809
+ check[:output] = "No keepalive sent from client for "
810
+ check[:output] << "#{time_since_last_keepalive} seconds"
811
+ case
812
+ when time_since_last_keepalive >= check[:thresholds][:critical]
813
+ check[:output] << " (>=#{check[:thresholds][:critical]})"
814
+ check[:status] = 2
815
+ when time_since_last_keepalive >= check[:thresholds][:warning]
816
+ check[:output] << " (>=#{check[:thresholds][:warning]})"
817
+ check[:status] = 1
818
+ else
819
+ check[:output] = "Keepalive sent from client "
820
+ check[:output] << "#{time_since_last_keepalive} seconds ago"
821
+ check[:status] = 0
822
+ end
823
+ publish_check_result(client[:name], check)
824
+ end
825
+ end
826
+ yield
827
+ end
828
+ end
829
+
758
830
  # Determine stale clients, those that have not sent a keepalive
759
- # in a specified amount of time (thresholds). This method
760
- # iterates through the client registry, creating a keepalive
761
- # check definition with the `create_keepalive_check()` method,
762
- # containing client specific staleness thresholds. If the time
763
- # since the latest keepalive is equal to or greater than a
764
- # threshold, the check `:output` is set to a descriptive
765
- # message, and `:status` is set to the appropriate non-zero
766
- # value. If a client has been sending keepalives, `:output` and
767
- # `:status` are set to indicate an OK state. A check result is
768
- # published for every client in the registry.
831
+ # in a specified amount of time. This method iterates through
832
+ # the client registry, creating a keepalive check result for
833
+ # each client. The `create_client_keepalive_check_results()`
834
+ # method is used to inspect and create keepalive check results
835
+ # for each slice of clients from the registry. A relatively
836
+ # small clients slice size (20) is used to reduce the number of
837
+ # clients inspected within a single tick of the EM reactor.
769
838
  def determine_stale_clients
770
839
  @logger.info("determining stale clients")
771
840
  @redis.smembers("clients") do |clients|
772
- clients.each do |client_name|
773
- @redis.get("client:#{client_name}") do |client_json|
774
- unless client_json.nil?
775
- client = Sensu::JSON.load(client_json)
776
- next if client[:keepalives] == false
777
- check = create_keepalive_check(client)
778
- time_since_last_keepalive = Time.now.to_i - client[:timestamp]
779
- check[:output] = "No keepalive sent from client for "
780
- check[:output] << "#{time_since_last_keepalive} seconds"
781
- case
782
- when time_since_last_keepalive >= check[:thresholds][:critical]
783
- check[:output] << " (>=#{check[:thresholds][:critical]})"
784
- check[:status] = 2
785
- when time_since_last_keepalive >= check[:thresholds][:warning]
786
- check[:output] << " (>=#{check[:thresholds][:warning]})"
787
- check[:status] = 1
788
- else
789
- check[:output] = "Keepalive sent from client "
790
- check[:output] << "#{time_since_last_keepalive} seconds ago"
791
- check[:status] = 0
792
- end
793
- publish_check_result(client[:name], check)
841
+ client_count = clients.length
842
+ keepalive_check_results = Proc.new do |slice_start, slice_size|
843
+ unless slice_start > client_count - 1
844
+ clients_slice = clients.slice(slice_start..slice_size)
845
+ create_client_keepalive_check_results(clients_slice) do
846
+ keepalive_check_results.call(slice_start + 20, slice_size + 20)
794
847
  end
795
848
  end
796
849
  end
850
+ keepalive_check_results.call(0, 19)
797
851
  end
798
852
  end
799
853
 
@@ -809,32 +863,29 @@ module Sensu
809
863
 
810
864
  # Determine stale check results, those that have not executed in
811
865
  # a specified amount of time (check TTL). This method iterates
812
- # through the client registry and check results for checks with
813
- # a defined TTL value (in seconds). If a check result has a
814
- # defined TTL, the time since last check execution (in seconds)
815
- # is calculated. If the time since last execution is equal to or
816
- # greater than the check TTL, a warning check result is
817
- # published with the appropriate check output.
866
+ # through stored check results that have a defined TTL value (in
867
+ # seconds). The time since last check execution (in seconds) is
868
+ # calculated for each check result. If the time since last
869
+ # execution is equal to or greater than the check TTL, a warning
870
+ # check result is published with the appropriate check output.
818
871
  def determine_stale_check_results
819
872
  @logger.info("determining stale check results")
820
- @redis.smembers("clients") do |clients|
821
- clients.each do |client_name|
822
- @redis.smembers("result:#{client_name}") do |checks|
823
- checks.each do |check_name|
824
- result_key = "#{client_name}:#{check_name}"
825
- @redis.get("result:#{result_key}") do |result_json|
826
- unless result_json.nil?
827
- check = Sensu::JSON.load(result_json)
828
- next unless check[:ttl] && check[:executed] && !check[:force_resolve]
829
- time_since_last_execution = Time.now.to_i - check[:executed]
830
- if time_since_last_execution >= check[:ttl]
831
- check[:output] = "Last check execution was "
832
- check[:output] << "#{time_since_last_execution} seconds ago"
833
- check[:status] = 1
834
- publish_check_result(client_name, check)
835
- end
836
- end
873
+ @redis.smembers("ttl") do |result_keys|
874
+ result_keys.each do |result_key|
875
+ @redis.get("result:#{result_key}") do |result_json|
876
+ unless result_json.nil?
877
+ check = Sensu::JSON.load(result_json)
878
+ next unless check[:ttl] && check[:executed] && !check[:force_resolve]
879
+ time_since_last_execution = Time.now.to_i - check[:executed]
880
+ if time_since_last_execution >= check[:ttl]
881
+ client_name = result_key.split(":").first
882
+ check[:output] = "Last check execution was "
883
+ check[:output] << "#{time_since_last_execution} seconds ago"
884
+ check[:status] = 1
885
+ publish_check_result(client_name, check)
837
886
  end
887
+ else
888
+ @redis.srem("ttl", result_key)
838
889
  end
839
890
  end
840
891
  end
@@ -851,48 +902,6 @@ module Sensu
851
902
  end
852
903
  end
853
904
 
854
- # Prune check result aggregations (aggregates). Sensu only
855
- # stores the 20 latest aggregations for a check, to keep the
856
- # amount of data stored to a minimum.
857
- def prune_check_result_aggregations
858
- @logger.info("pruning check result aggregations")
859
- @redis.smembers("aggregates") do |checks|
860
- checks.each do |check_name|
861
- @redis.smembers("aggregates:#{check_name}") do |aggregates|
862
- if aggregates.length > 20
863
- aggregates.sort!
864
- aggregates.take(aggregates.length - 20).each do |check_issued|
865
- result_set = "#{check_name}:#{check_issued}"
866
- @redis.multi
867
- @redis.srem("aggregates:#{check_name}", check_issued)
868
- @redis.del("aggregate:#{result_set}")
869
- @redis.del("aggregation:#{result_set}")
870
- @redis.exec do
871
- @logger.debug("pruned aggregation", {
872
- :check => {
873
- :name => check_name,
874
- :issued => check_issued
875
- }
876
- })
877
- end
878
- end
879
- end
880
- end
881
- end
882
- end
883
- end
884
-
885
- # Set up the check result aggregation pruner, using periodic
886
- # timer to run `prune_check_result_aggregations()` every 20
887
- # seconds. The timer is stored in the timers hash under
888
- # `:leader`.
889
- def setup_check_result_aggregation_pruner
890
- @logger.debug("pruning check result aggregations")
891
- @timers[:leader] << EM::PeriodicTimer.new(20) do
892
- prune_check_result_aggregations
893
- end
894
- end
895
-
896
905
  # Set up the leader duties, tasks only performed by a single
897
906
  # Sensu server at a time. The duties include publishing check
898
907
  # requests, monitoring for stale clients, and pruning check
@@ -901,7 +910,6 @@ module Sensu
901
910
  setup_check_request_publisher
902
911
  setup_client_monitor
903
912
  setup_check_result_monitor
904
- setup_check_result_aggregation_pruner
905
913
  end
906
914
 
907
915
  # Create a lock timestamp (integer), current time including
@@ -1040,19 +1048,16 @@ module Sensu
1040
1048
  @transport.unsubscribe if @transport
1041
1049
  end
1042
1050
 
1043
- # Complete event handling currently in progress. The
1044
- # `:handling_event_count` is used to determine if event handling
1045
- # is complete, when it is equal to `0`. The provided callback is
1046
- # called when handling is complete.
1051
+ # Complete in progress work and then call the provided callback.
1052
+ # This method will wait until all counters stored in the
1053
+ # `@in_progress` hash equal `0`.
1047
1054
  #
1048
- # @yield [] callback/block to call when event handling is
1049
- # complete.
1050
- def complete_event_handling
1051
- @logger.info("completing event handling in progress", {
1052
- :handling_event_count => @handling_event_count
1053
- })
1055
+ # @yield [] callback/block to call when in progress work is
1056
+ # completed.
1057
+ def complete_in_progress
1058
+ @logger.info("completing work in progress", :in_progress => @in_progress)
1054
1059
  retry_until_true do
1055
- if @handling_event_count == 0
1060
+ if @in_progress.values.all? { |count| count == 0 }
1056
1061
  yield
1057
1062
  true
1058
1063
  end
@@ -1124,7 +1129,7 @@ module Sensu
1124
1129
  @logger.warn("stopping")
1125
1130
  pause
1126
1131
  @state = :stopping
1127
- complete_event_handling do
1132
+ complete_in_progress do
1128
1133
  @redis.close if @redis
1129
1134
  @transport.close if @transport
1130
1135
  super