flapjack 0.6.24 → 0.6.25

Sign up to get free protection for your applications and to get access to all the features.
@@ -42,6 +42,21 @@ development:
42
42
  rooms:
43
43
  - "gimp@conference.jabber.domain.tld"
44
44
  - "log@conference.jabber.domain.tld"
45
+ oobetet:
46
+ enabled: yes
47
+ server: "jabber.domain.tld"
48
+ port: 5222
49
+ jabberid: "flapjacktest@jabber.domain.tld"
50
+ password: "nuther-good-password"
51
+ alias: "flapjacktest"
52
+ watched_check: "PING"
53
+ watched_entity: "foo.bar.net"
54
+ max_latency: 300
55
+ pagerduty_contact: "11111111111111111111111111111111"
56
+ rooms:
57
+ - "flapjacktest@conference.jabber.domain.tld"
58
+ - "gimp@conference.jabber.domain.tld"
59
+ - "log@conference.jabber.domain.tld"
45
60
  pagerduty_gateway:
46
61
  enabled: yes
47
62
  queue: pagerduty_notifications
@@ -11,7 +11,6 @@ Feature: events
11
11
  Given check 'abc' for entity 'def' is in an ok state
12
12
  When an ok event is received for check 'abc' on entity 'def'
13
13
  Then a notification should not be generated for check 'abc' on entity 'def'
14
- # And show me the output
15
14
 
16
15
  Scenario: Check ok to failed
17
16
  Given check 'abc' for entity 'def' is in an ok state
@@ -40,11 +39,9 @@ Feature: events
40
39
  When a failure event is received for check 'abc' on entity 'def'
41
40
  And 1 minute passes
42
41
  And a failure event is received for check 'abc' on entity 'def'
43
- And show me the notifications
44
42
  Then a notification should be generated for check 'abc' on entity 'def'
45
43
  When 1 minute passes
46
44
  And a failure event is received for check 'abc' on entity 'def'
47
- And show me the notifications
48
45
  Then a notification should not be generated for check 'abc' on entity 'def'
49
46
 
50
47
  @time
@@ -121,6 +118,31 @@ Feature: events
121
118
  And a failure event is received for check 'abc' on entity 'def'
122
119
  Then a notification should be generated for check 'abc' on entity 'def'
123
120
 
121
+ @time
122
+ Scenario: Osciliating state, period of two minutes
123
+ Given check 'abc' for entity 'def' is in an ok state
124
+ When a failure event is received for check 'abc' on entity 'def'
125
+ Then a notification should not be generated for check 'abc' on entity 'def'
126
+ When 50 seconds passes
127
+ And a failure event is received for check 'abc' on entity 'def'
128
+ Then a notification should be generated for check 'abc' on entity 'def'
129
+ When 10 seconds passes
130
+ And an ok event is received for check 'abc' on entity 'def'
131
+ Then a notification should be generated for check 'abc' on entity 'def'
132
+ When 50 seconds passes
133
+ And an ok event is received for check 'abc' on entity 'def'
134
+ Then a notification should not be generated for check 'abc' on entity 'def'
135
+ When 10 seconds passes
136
+ And a failure event is received for check 'abc' on entity 'def'
137
+ Then a notification should not be generated for check 'abc' on entity 'def'
138
+ When 50 seconds passes
139
+ And a failure event is received for check 'abc' on entity 'def'
140
+ #And show me the notifications
141
+ Then a notification should be generated for check 'abc' on entity 'def'
142
+ When 10 seconds passes
143
+ And an ok event is received for check 'abc' on entity 'def'
144
+ Then a notification should be generated for check 'abc' on entity 'def'
145
+
124
146
  Scenario: Acknowledgement when ok
125
147
  Given check 'abc' for entity 'def' is in an ok state
126
148
  When an acknowledgement event is received for check 'abc' on entity 'def'
@@ -248,6 +248,27 @@ module Flapjack
248
248
  lan.to_i
249
249
  end
250
250
 
251
+ def last_notifications_of_each_type
252
+ ln = {:problem => last_problem_notification,
253
+ :recovery => last_recovery_notification,
254
+ :acknowledgement => last_acknowledgement_notification }
255
+ puts "***** last_notifications_of_each_type for #{@key.inspect}: #{ln.inspect}"
256
+ ln
257
+ end
258
+
259
+ # unpredictable results if there are multiple notifications of different
260
+ # types sent at the same time
261
+ def last_notification
262
+ nils = { :type => nil, :timestamp => nil }
263
+ lne = last_notifications_of_each_type
264
+ ln = lne.delete_if {|type, timestamp|
265
+ timestamp.nil? || timestamp.to_i == 0
266
+ }
267
+ return nils unless ln.length > 0
268
+ lns = ln.sort_by { |type, timestamp| timestamp }.last
269
+ { :type => lns[0], :timestamp => lns[1] }
270
+ end
271
+
251
272
  def event_count_at(timestamp)
252
273
  eca = @redis.get("#{@key}:#{timestamp}:count")
253
274
  return unless (eca && eca =~ /^\d+$/)
@@ -9,7 +9,7 @@ module Flapjack
9
9
  # * If the service event’s state is a failure, and the time since the ok => failure state change
10
10
  # is below a threshold (e.g. 30 seconds), then don't alert
11
11
  # * If the service event’s state is a failure, and the time since the last alert is below a
12
- # threshold (5 minutes), then don’t alert
12
+ # threshold (5 minutes), and the last notification was not a recovery, then don’t alert
13
13
  class Delays
14
14
  include Base
15
15
 
@@ -25,20 +25,32 @@ module Flapjack
25
25
  current_time = Time.now.to_i
26
26
 
27
27
  if entity_check.failed?
28
- last_problem_alert = entity_check.last_problem_notification
29
- last_change = entity_check.last_change
28
+ last_problem_alert = entity_check.last_problem_notification
29
+ last_change = entity_check.last_change
30
+ last_notification = entity_check.last_notification
31
+ last_alert_type = last_notification[:type]
32
+ last_alert_timestamp = last_notification[:timestamp]
30
33
 
31
34
  current_failure_duration = current_time - last_change
32
35
  time_since_last_alert = current_time - last_problem_alert unless last_problem_alert.nil?
33
- @log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s}, last_change: #{last_change.to_s}, current_failure_duration: #{current_failure_duration}, time_since_last_alert: #{time_since_last_alert.to_s}")
36
+ @log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s}, " +
37
+ "last_change: #{last_change.to_s}, " +
38
+ "current_failure_duration: #{current_failure_duration}, " +
39
+ "time_since_last_alert: #{time_since_last_alert.to_s}")
34
40
  if (current_failure_duration < failure_delay)
35
41
  result = true
36
- @log.debug("Filter: Delays: blocking because duration of current failure (#{current_failure_duration}) is less than failure_delay (#{failure_delay})")
37
- elsif !last_problem_alert.nil? && (time_since_last_alert < resend_delay)
42
+ @log.debug("Filter: Delays: blocking because duration of current failure " +
43
+ "(#{current_failure_duration}) is less than failure_delay (#{failure_delay})")
44
+ elsif !last_problem_alert.nil? && (time_since_last_alert < resend_delay) &&
45
+ (last_alert_type !~ /recovery/i)
46
+
38
47
  result = true
39
- @log.debug("Filter: Delays: blocking because time since last alert for current problem (#{time_since_last_alert}) is less than resend_delay (#{resend_delay})")
48
+ @log.debug("Filter: Delays: blocking because time since last alert for " +
49
+ "current problem (#{time_since_last_alert}) is less than " +
50
+ "resend_delay (#{resend_delay}) and last alert type (#{last_alert_type}) was not a recovery")
40
51
  else
41
- @log.debug("Filter: Delays: not blocking because neither of the time comparison conditions were met")
52
+ @log.debug("Filter: Delays: not blocking because neither of the time comparison " +
53
+ "conditions were met")
42
54
  end
43
55
  else
44
56
  @log.debug("Filter: Delays: entity_check.failed? returned false ...")
@@ -32,9 +32,13 @@ module Flapjack
32
32
  log.level = Logger::INFO
33
33
  Blather.logger = log
34
34
 
35
+ def initialize
36
+ @buffer = []
37
+ @hostname = Socket.gethostname
38
+ end
39
+
35
40
  def setup
36
41
  @redis = build_redis_connection_pool
37
- @hostname = Socket.gethostname
38
42
  @flapjack_jid = Blather::JID.new((@config['jabberid'] || 'flapjack') + '/' + @hostname)
39
43
 
40
44
  super(@flapjack_jid, @config['password'], @config['server'], @config['port'].to_i)
@@ -96,10 +100,13 @@ module Flapjack
96
100
  say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
97
101
  end
98
102
  end
103
+ return if @buffer.empty?
104
+ while stanza = @buffer.shift
105
+ @logger.debug("Sending a buffered jabber message to: #{stanza.to}, using: #{stanza.type}, message: #{stanza.body}")
106
+ end
99
107
  end
100
108
 
101
109
  def interpreter(command)
102
-
103
110
  msg = nil
104
111
  action = nil
105
112
  entity_check = nil
@@ -219,8 +226,14 @@ module Flapjack
219
226
  end
220
227
 
221
228
  def say(to, msg, using = :chat)
222
- @logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
223
- write Blather::Stanza::Message.new(to, msg, using)
229
+ stanza = Blather::Stanza::Message.new(to, msg, using)
230
+ if connected?
231
+ @logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
232
+ write(stanza)
233
+ else
234
+ @logger.debug("Buffering a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
235
+ @buffer << stanza
236
+ end
224
237
  end
225
238
 
226
239
  def add_shutdown_event(opts = {})
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  module Flapjack
4
- VERSION = "0.6.24"
4
+ VERSION = "0.6.25"
5
5
  end
data/lib/flapjack/web.rb CHANGED
@@ -62,6 +62,7 @@ module Flapjack
62
62
  end
63
63
 
64
64
  get '/check' do
65
+ begin
65
66
  @entity = params[:entity]
66
67
  @check = params[:check]
67
68
 
@@ -74,11 +75,7 @@ module Flapjack
74
75
  @check_last_update = entity_check.last_update
75
76
  @check_last_change = last_change
76
77
  @check_summary = entity_check.summary
77
- @last_notifications =
78
- {:problem => entity_check.last_problem_notification,
79
- :recovery => entity_check.last_recovery_notification,
80
- :acknowledgement => entity_check.last_acknowledgement_notification
81
- }
78
+ @last_notifications = entity_check.last_notifications_of_each_type
82
79
  @in_scheduled_maintenance = entity_check.in_scheduled_maintenance?
83
80
  @in_unscheduled_maintenance = entity_check.in_unscheduled_maintenance?
84
81
  @scheduled_maintenances = entity_check.maintenances(nil, nil, :scheduled => true)
@@ -86,15 +83,20 @@ module Flapjack
86
83
  entity_check.event_count_at(entity_check.last_change) : nil
87
84
 
88
85
  haml :check
86
+ rescue Exception => e
87
+ puts e.message
88
+ puts e.backtrace.join("\n")
89
+ end
90
+
89
91
  end
90
92
 
91
93
  post '/acknowledgements/:entity/:check' do
92
- @entity = params[:entity]
93
- @check = params[:check]
94
- @summary = params[:summary]
94
+ @entity = params[:entity]
95
+ @check = params[:check]
96
+ @summary = params[:summary]
95
97
  @acknowledgement_id = params[:acknowledgement_id]
96
98
 
97
- dur = ChronicDuration.parse(params[:duration] || '')
99
+ dur = ChronicDuration.parse(params[:duration] || '')
98
100
  @duration = (dur.nil? || (dur <= 0)) ? (4 * 60 * 60) : dur
99
101
 
100
102
  entity_check = get_entity_check(@entity, @check)
@@ -102,6 +104,8 @@ module Flapjack
102
104
 
103
105
  ack = entity_check.create_acknowledgement('summary' => (@summary || ''),
104
106
  'acknowledgement_id' => @acknowledgement_id, 'duration' => @duration)
107
+
108
+ # FIXME: make this a flash message on the check page and delete the acknowledge page
105
109
  @acknowledge_success = !!ack
106
110
  [201, haml(:acknowledge)]
107
111
  end
@@ -48,6 +48,7 @@ describe Flapjack::Jabber do
48
48
  fj = Flapjack::Jabber.new
49
49
  fj.bootstrap(:config => config)
50
50
 
51
+ fj.should_receive(:connected?).and_return(true)
51
52
  fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Presence))
52
53
  fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
53
54
 
@@ -79,6 +80,7 @@ describe Flapjack::Jabber do
79
80
  fj.bootstrap(:config => config)
80
81
  fj.instance_variable_set('@redis_handler', redis)
81
82
 
83
+ fj.should_receive(:connected?).and_return(true)
82
84
  fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
83
85
 
84
86
  fj.on_groupchat(stanza)
@@ -93,6 +95,7 @@ describe Flapjack::Jabber do
93
95
  fj = Flapjack::Jabber.new
94
96
  fj.bootstrap(:config => config)
95
97
 
98
+ fj.should_receive(:connected?).and_return(true)
96
99
  fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
97
100
 
98
101
  fj.on_groupchat(stanza)
@@ -132,9 +135,10 @@ describe Flapjack::Jabber do
132
135
 
133
136
  fj = Flapjack::Jabber.new
134
137
  fj.bootstrap(:config => config)
138
+ fj.should_receive(:register_handler).exactly(4).times
135
139
 
136
140
  fj.should_receive(:connect)
137
- fj.should_receive(:connected?).twice.and_return(true)
141
+ fj.should_receive(:connected?).exactly(3).times.and_return(true)
138
142
  fj.should_receive(:should_quit?).exactly(3).times.and_return(false, false, true)
139
143
  redis.should_receive(:blpop).twice.and_return(
140
144
  ["jabber_notifications", %q{{"notification_type":"problem","event_id":"main-example.com:ping","state":"critical","summary":"!!!"}}],
@@ -92,13 +92,15 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
92
92
  it "shows the state of a check for an entity" do
93
93
  time = Time.now.to_i
94
94
 
95
+ last_notifications = {:problem => time - ((3 * 60 * 60) + (5 * 60)),
96
+ :recovery => time - (3 * 60 * 60),
97
+ :acknowledgement => nil }
98
+
95
99
  entity_check.should_receive(:state).and_return('ok')
96
100
  entity_check.should_receive(:last_update).and_return(time - (3 * 60 * 60))
97
101
  entity_check.should_receive(:last_change).and_return(time - (3 * 60 * 60))
98
102
  entity_check.should_receive(:summary).and_return('all good')
99
- entity_check.should_receive(:last_problem_notification).and_return(time - ((3 * 60 * 60) + (5 * 60)))
100
- entity_check.should_receive(:last_recovery_notification).and_return(time - (3 * 60 * 60))
101
- entity_check.should_receive(:last_acknowledgement_notification).and_return(nil)
103
+ entity_check.should_receive(:last_notifications_of_each_type).and_return(last_notifications)
102
104
  entity_check.should_receive(:in_scheduled_maintenance?).and_return(false)
103
105
  entity_check.should_receive(:in_unscheduled_maintenance?).and_return(false)
104
106
  entity_check.should_receive(:maintenances).with(nil, nil, :scheduled => true).and_return([])
@@ -185,4 +187,4 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
185
187
  last_response.status.should == 302
186
188
  end
187
189
 
188
- end
190
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flapjack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.24
4
+ version: 0.6.25
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-09-20 00:00:00.000000000 Z
14
+ date: 2012-09-21 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: daemons