notifu 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ require_relative "sidekiq_init"
2
+ require 'excon'
3
+
4
+ $logger = Notifu::Logger.new 'processor'
5
+
6
+ Sidekiq.configure_server do |config|
7
+ config.redis = { url: Notifu::CONFIG[:redis_queues] }
8
+ Sidekiq::Logging.logger = Log4r::Logger.new 'sidekiq'
9
+ if Notifu::CONFIG[:logging][:syslog][:enabled]
10
+ Sidekiq::Logging.logger.outputters = Log4r::SyslogOutputter.new 'sidekiq', ident: 'notifu-processor'
11
+ else
12
+ Sidekiq::Logging.logger.outputters = Log4r::Outputter.stdout
13
+ end
14
+ # Sidekiq::Logging.logger.formatter = Notifu::LogFormatter.new
15
+ Sidekiq::Logging.logger.level = Log4r::INFO
16
+ end
17
+
18
+ Sidekiq.configure_client do |config|
19
+ config.redis = { url: Notifu::CONFIG[:redis_queues] }
20
+ end
21
+
22
+ ###################################################################
23
+ ###################################################################
24
+ ####### PROCESSOR WORKER ##########################################
25
+ ###################################################################
26
+ ###################################################################
27
+
28
+
29
+ module Notifu
30
+ class Processor
31
+ include Sidekiq::Worker
32
+ include Notifu::Util
33
+
34
+ sidekiq_options :retry => true
35
+ sidekiq_options :queue => "processor"
36
+
37
+ attr_accessor :issue
38
+ attr_accessor :event
39
+ attr_accessor :now
40
+ attr_accessor :processing_result
41
+
42
+ ###################################################################
43
+ ####### SIDEKIQ GLUE METHOD #######################################
44
+ ###################################################################
45
+
46
+ def perform *args
47
+ t_start = Time.now.to_f*1000.0
48
+ log "info", "Task start"
49
+
50
+ # read event
51
+ self.event = Notifu::Model::Event.new args
52
+ self.now = Time.now
53
+ log "info", "Processing event NID #{self.event.notifu_id}"
54
+
55
+ # try to check if we already know about the issue, otherwise save it into DB as a new one
56
+ self.issue = Notifu::Model::Issue.with(:notifu_id, self.event.notifu_id)
57
+ self.issue ||= Notifu::Model::Issue.create(self.event.data)
58
+
59
+ # let the magic happen
60
+ process!
61
+
62
+ t_finish = Time.now.to_f*1000.0
63
+
64
+ log "info", "Task finish (in #{t_finish-t_start}ms)"
65
+ end
66
+
67
+ ###################################################################
68
+ ####### MAIN PROCESSING LOGIC #####################################
69
+ ###################################################################
70
+
71
+ def process!
72
+ self.event.group_sla.each do |gs|
73
+
74
+ # group related objects
75
+ begin
76
+ group = Notifu::Model::Group.with(:name, gs[:group])
77
+ sla = Notifu::Model::Sla.with(:name, gs[:sla])
78
+ rescue
79
+ log "info", "#{self.event.notifu_id} [#{self.event.host}/#{self.event.service}/#{self.event.code.to_state}]: Object init failed. Is Notifu API running?"
80
+ next
81
+ end
82
+
83
+ notified = {
84
+ sla: String.new,
85
+ group: String.new,
86
+ actors: Array.new,
87
+ contacts: Array.new,
88
+ escalation_level: "none"
89
+ }
90
+
91
+ result = []
92
+
93
+ # logic
94
+ if enough_occurrences? && self.event.action.to_s == "create"
95
+ result << "enough occurrences have passed"
96
+ if ! silenced?
97
+ result << "issue is not silenced"
98
+ if duty_time? sla.timerange_values(self.now)
99
+ result << "duty is active"
100
+ if status_changed?
101
+ result << "issue state has changed"
102
+ notified = notify!(sla, group)
103
+ result << "ACTION"
104
+ else
105
+ result << "issue state hasn't changed"
106
+ case self.event.code
107
+ when 0
108
+ result << "issue is in OK state" << "IDLE"
109
+ when 1
110
+ result << "issue is in WARNING state"
111
+ if first_notification?(sla, group)
112
+ result << "issue is new"
113
+ notified = notify!(sla, group)
114
+ result << "ACTION"
115
+ else
116
+ result << "already notified" << "IDLE"
117
+ end
118
+ when 2
119
+ result << "issue is not a warning"
120
+ if renotify?(sla, group)
121
+ result << "it's time to renotify"
122
+ notified = notify!(sla, group)
123
+ result << "ACTION"
124
+ else
125
+ result << "not yet time to renotify or escalate" << "IDLE"
126
+ end
127
+ else
128
+ result << "unknown state (#{self.event.code})" << "IDLE"
129
+ end
130
+ end
131
+ else
132
+ result << "duty is not active at this time" << "IDLE"
133
+ end
134
+ else
135
+ result << "issue is silenced" << "IDLE"
136
+ end
137
+ elsif self.event.action == "resolve" && self.issue.occurrences_count.to_i >= self.event.occurrences_trigger.to_i
138
+ if ! silenced?
139
+ result << "recovery of an event"
140
+ notified = notify!(sla, group)
141
+ result << "ACTION"
142
+ elsif self.event.unsilence
143
+ result << "recovery of an event (with unsilence)"
144
+ unsilence!
145
+ end
146
+ else
147
+ result << "not enough occurrences of this event" << "IDLE"
148
+ end
149
+
150
+ self.event.update_process_result!(notified)
151
+
152
+ action_log_message = {
153
+ logic: result.join(' -> '),
154
+ result: result[-1],
155
+ reason: result[-2],
156
+ group: group.name,
157
+ sla: sla.name,
158
+ host: self.event.host,
159
+ service: self.event.service,
160
+ message: self.event.message,
161
+ state: self.event.code.to_state,
162
+ contacts: notified[:contacts].to_json,
163
+ actors: notified[:actors].to_json,
164
+ occurrences_trigger: self.event.occurrences_trigger.to_i,
165
+ occurrences_count: self.event.occurrences_count.to_i,
166
+ check_duration: self.event.duration,
167
+ escalation_level: notified[:escalation_level].to_s,
168
+ sidekiq_jid: self.jid,
169
+ notifu_id: self.event.notifu_id,
170
+ :"@timestamp" => self.now.iso8601,
171
+ }
172
+
173
+ action_log action_log_message
174
+
175
+ end
176
+
177
+ if self.event.process_result.length > 0
178
+ self.issue.message = self.event.message
179
+ self.issue.action = self.event.action
180
+ self.issue.process_result = self.event.process_result
181
+ @issue.save
182
+ end
183
+
184
+ if status_changed?
185
+ self.issue.code = self.event.code
186
+ self.issue.time_created = self.event.time_created
187
+ end
188
+
189
+ self.issue.occurrences_trigger = self.event.occurrences_trigger
190
+ self.issue.occurrences_count = self.event.occurrences_count
191
+ self.issue.time_last_event = self.event.time_last_event
192
+ self.issue.sgs = self.event.sgs
193
+ self.issue.aspiring_code = self.event.code
194
+ self.issue.api_endpoint = self.event.api_endpoint
195
+ self.issue.duration = self.event.duration
196
+
197
+ @issue.save
198
+
199
+ # delayed cleanup job
200
+ cleanup!
201
+ end
202
+
203
+ ###################################################################
204
+ ####### NOTIFICATION METHOD (method for :process! ) ###############
205
+ ###################################################################
206
+
207
+ def notify! (sla, group)
208
+ actors = []
209
+ contacts = []
210
+ escalation_level = "primary"
211
+ sla_actors = eval(sla.actors)
212
+
213
+ group.primary.each do |contact|
214
+ contacts << contact.name
215
+ end
216
+ actors += sla_actors[:primary]
217
+
218
+ # secondary escalation
219
+ if escalate_to?(1, sla)
220
+ group.secondary.each do |contact|
221
+ contacts << contact.name
222
+ end
223
+ actors += sla_actors[:secondary] if sla_actors[:secondary]
224
+ escalation_level = "secondary"
225
+ end
226
+
227
+ # tertiary escalation
228
+ if escalate_to?(2, sla)
229
+ group.tertiary.each do |contact|
230
+ contacts << contact.name
231
+ end
232
+ actors += sla_actors[:tertiary] if sla_actors[:tertiary]
233
+ escalation_level = "tertiary"
234
+ end
235
+
236
+ actors.each do |actor|
237
+ job = Sidekiq::Client.push( 'class' => "Notifu::Actors::#{actor.camelize}",
238
+ 'args' => [ self.event.notifu_id, contacts ],
239
+ 'queue' => "actor-#{actor}")
240
+ end
241
+
242
+ log "info", "Taking action (#{group.name}:#{sla.name}) NID #{self.event.notifu_id} [#{self.event.host}/#{self.event.service}/#{self.event.code.to_state}] actor: #{actors.join(', ')}; contacts: #{contacts.join(', ')}; escalation_level: #{escalation_level}"
243
+
244
+ self.issue.time_last_notified!(group.name, sla.name, Time.now.to_i)
245
+
246
+ return { sla: sla.name, group: group.name, actors: actors, contacts: contacts, escalation_level: escalation_level }
247
+ end
248
+
249
+
250
+ ###################################################################
251
+ ####### LOGIC BLOCK ###############################################
252
+ ###################################################################
253
+
254
+ def enough_occurrences?
255
+ self.event.occurrences_count >= self.event.occurrences_trigger ? true : false
256
+ end
257
+
258
+ def escalate_to?(level, sla)
259
+
260
+ # escalation_interval = self.event.refresh
261
+ # escalation_interval ||= sla.refresh
262
+ escalation_interval = sla.refresh
263
+ escalation_period = level.to_i * escalation_interval.to_i
264
+
265
+ # log "info", "[#{escalation_period.to_s}] Creation time: " + Time.at(self.issue.time_created.to_i).to_s
266
+ # log "info", "[#{escalation_period.to_s}] Escalation time: " + Time.at(self.issue.time_created.to_i + escalation_period.to_i).to_s
267
+ # log "info", "[#{escalation_period.to_s}] Now time: " + Time.at(self.now.to_i).to_s
268
+
269
+
270
+ if self.issue.time_created.to_i + escalation_period.to_i <= self.now.to_i && is_critical?
271
+ return true
272
+ else
273
+ return false
274
+ end
275
+ end
276
+
277
+ def silenced?
278
+ if self.event.service == "keepalive"
279
+ path = "silence/#{self.event.host}"
280
+ else
281
+ path = "silence/#{self.event.host}/#{self.event.service}"
282
+ end
283
+
284
+ silenced = false
285
+ get_stashes.each do |stash|
286
+ silenced = true if stash["path"] == path
287
+ end
288
+
289
+ return silenced
290
+ end
291
+
292
+ def is_ok?
293
+ self.event.code == 0 ? true : false
294
+ end
295
+
296
+ def is_warning?
297
+ self.event.code == 1 ? true : false
298
+ end
299
+
300
+ def is_critical?
301
+ self.event.code == 2 ? true : false
302
+ end
303
+
304
+ def first_notification? sla, group
305
+ self.issue.time_last_notified?(group.name, sla.name) == nil ? true : false
306
+ end
307
+
308
+ def status_changed?
309
+ self.event.code.to_i != self.issue.code.to_i ? true : false
310
+ end
311
+
312
+ def renotify? (sla, group)
313
+ # t_renotify_int = self.event.refresh
314
+ # t_renotify_int ||= sla.refresh
315
+ t_renotify_int = sla.refresh
316
+ t_last_notified = self.issue.time_last_notified?(group.name, sla.name)
317
+
318
+ if t_last_notified.to_i + t_renotify_int.to_i <= self.now.to_i
319
+ return true
320
+ else
321
+ return false
322
+ end
323
+ end
324
+
325
+ def duty_time? (timerange)
326
+ case timerange
327
+ when "ALWAYS"
328
+ return true
329
+ when "NEVER"
330
+ return false
331
+ when /([0-9]{2}):([0-9]{2})-([0-9]{2}):([0-9]{2})/
332
+ t_duty_from = Time.local(self.now.year, self.now.month, self.now.day, timerange[0], timerange[1]).to_i
333
+ t_duty_until = Time.local(self.now.year, self.now.month, self.now.day, timerange[2], timerange[3]).to_i
334
+ if t_duty_from <= self.now.to_i <= t_duty_until
335
+ return true
336
+ else
337
+ return false
338
+ end
339
+ else
340
+ return true
341
+ end
342
+ end
343
+
344
+ ###################################################################
345
+ ####### HELPER BLOCK (methods for :process! ) #####################
346
+ ###################################################################
347
+
348
+
349
+ ##
350
+ # cleanup method
351
+ #
352
+ def cleanup!
353
+ if is_ok? && self.issue.action == "resolve"
354
+ Notifu::Cleaner.perform_async(self.issue.notifu_id)
355
+ end
356
+ end
357
+
358
+ ##
359
+ # get stashes from Sensu API
360
+ #
361
+ def get_stashes
362
+ return @stashes if @stashes
363
+ begin
364
+ sensu_api = Excon.get "#{self.event.api_endpoint}/stashes"
365
+ @stashes = JSON.parse sensu_api.body
366
+ rescue
367
+ @stashes = []
368
+ log "error", "Failed to get stashes #{self.event.api_endpoint}/stashes"
369
+ end
370
+ end
371
+
372
+
373
+ ##
374
+ # unsilence method
375
+ #
376
+ def unsilence!
377
+ path = "silence/#{self.event.host}/#{self.event.service}"
378
+ get_stashes.each do |stash|
379
+ if stash["path"] == path
380
+ if stash["expire"] < 0
381
+ if self.event.unsilence
382
+ begin
383
+ Excon.delete "#{self.event.api_endpoint}/stashes/silence/#{self.event.host}/#{self.event.service}"
384
+ log "info", "Unstashed #{self.event.host}/#{self.event.service} after recovery"
385
+ rescue
386
+ log "warning", "Failed to fetch stashes from Sensu API: #{self.event.api_endpoint}/stashes"
387
+ end
388
+ else
389
+ log "info", "#{self.event.host}/#{self.event.service} left stashed (auto-unstash disabled)"
390
+ end
391
+ else
392
+ log "info", "#{self.event.host}/#{self.event.service} left stashed (auto-unstash doesn't work on checks with defined expiry)"
393
+ end
394
+ end
395
+ end
396
+ end
397
+
398
+ ##
399
+ # logging method
400
+ #
401
+ def log(prio, msg)
402
+ $logger.log prio, "JID-#{self.jid}: " + msg.to_s
403
+ end
404
+
405
+ ##
406
+ # action logging method
407
+ #
408
+ def action_log event
409
+ $logger.action_log "processor", event
410
+ end
411
+
412
+ end
413
+ end
414
+
415
+ ###################################################################
416
+ ###################################################################
417
+ ####### CLEANER WORKER ############################################
418
+ ###################################################################
419
+ ###################################################################
420
+
421
+ module Notifu
422
+ class Cleaner
423
+ include Sidekiq::Worker
424
+ include Notifu::Util
425
+
426
+ sidekiq_options :retry => true
427
+ sidekiq_options :queue => "processor"
428
+
429
+ def perform notifu_id, delay=15
430
+ sleep delay
431
+ begin
432
+ Notifu::Model::Issue.with(:notifu_id, notifu_id).delete
433
+ log "info", "Cleanup NID #{notifu_id} - success"
434
+ rescue NoMethodError
435
+ log "info", "Cleanup NID #{notifu_id} - not found"
436
+ end
437
+ end
438
+
439
+ def log(prio, msg)
440
+ $logger.log prio, "JID-#{self.jid}: " + msg.to_s
441
+ end
442
+
443
+ end
444
+ end
@@ -0,0 +1,24 @@
1
+ ##
2
+ # Require block
3
+ #
4
+ require 'rubygems'
5
+ require 'bundler/setup'
6
+ require "ohm"
7
+ require "elasticsearch"
8
+ require "log4r/outputter/syslogoutputter"
9
+ require "log4r/configurator"
10
+ require "log4r"
11
+ require "syslog"
12
+ require "sidekiq"
13
+ require "sidekiq/logging"
14
+ require "notifu"
15
+
16
+ ##
17
+ # Config block
18
+ #
19
+ Notifu::CONFIG = Notifu::Config.new.get
20
+
21
+ ##
22
+ # Ohm init
23
+ #
24
+ Ohm.redis = Redic.new Notifu::CONFIG[:redis_data]