flapjack 0.7.22 → 0.7.25

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG.md +19 -0
  2. data/bin/flapjack +3 -1
  3. data/bin/flapjack-nagios-receiver +5 -4
  4. data/bin/receive-events +2 -2
  5. data/features/events.feature +101 -95
  6. data/features/notification_rules.feature +36 -4
  7. data/features/steps/notifications_steps.rb +4 -0
  8. data/flapjack.gemspec +3 -2
  9. data/lib/flapjack/coordinator.rb +8 -6
  10. data/lib/flapjack/data/entity_check.rb +20 -13
  11. data/lib/flapjack/data/event.rb +4 -7
  12. data/lib/flapjack/data/notification.rb +63 -45
  13. data/lib/flapjack/filters/acknowledgement.rb +26 -24
  14. data/lib/flapjack/filters/delays.rb +46 -42
  15. data/lib/flapjack/filters/ok.rb +31 -34
  16. data/lib/flapjack/filters/scheduled_maintenance.rb +2 -2
  17. data/lib/flapjack/filters/unscheduled_maintenance.rb +2 -3
  18. data/lib/flapjack/gateways/email.rb +111 -114
  19. data/lib/flapjack/gateways/email/alert.html.erb +11 -11
  20. data/lib/flapjack/gateways/email/alert.text.erb +19 -6
  21. data/lib/flapjack/gateways/sms_messagenet.rb +15 -5
  22. data/lib/flapjack/gateways/web.rb +3 -4
  23. data/lib/flapjack/gateways/web/public/css/flapjack.css +0 -2
  24. data/lib/flapjack/gateways/web/public/img/flapjack-favicon-32-16.ico +0 -0
  25. data/lib/flapjack/gateways/web/public/img/flapjack-favicon-64-32-24-16.ico +0 -0
  26. data/lib/flapjack/gateways/web/public/img/flapjack-transparent-300.png +0 -0
  27. data/lib/flapjack/gateways/web/public/img/flapjack-transparent-350-400.png +0 -0
  28. data/lib/flapjack/gateways/web/views/_head.html.erb +1 -0
  29. data/lib/flapjack/gateways/web/views/index.html.erb +1 -1
  30. data/lib/flapjack/notifier.rb +2 -3
  31. data/lib/flapjack/pikelet.rb +5 -4
  32. data/lib/flapjack/processor.rb +39 -27
  33. data/lib/flapjack/version.rb +1 -1
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +5 -0
  35. data/spec/lib/flapjack/data/event_spec.rb +0 -1
  36. data/spec/lib/flapjack/gateways/email_spec.rb +5 -9
  37. data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +80 -1
  38. data/spec/lib/flapjack/gateways/web_spec.rb +1 -1
  39. data/spec/lib/flapjack/pikelet_spec.rb +4 -3
  40. data/spec/lib/flapjack/processor_spec.rb +0 -1
  41. metadata +28 -11
  42. data/lib/flapjack/filters/detect_mass_client_failures.rb +0 -44
  43. data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +0 -6
@@ -40,8 +40,6 @@ module Flapjack
40
40
  message += " is #{state.upcase}" unless ['acknowledgement', 'test'].include?(notification_type)
41
41
  message += " at #{Time.at(time).strftime('%-d %b %H:%M')}, #{summary}"
42
42
 
43
- notification['message'] = message
44
-
45
43
  # TODO log error and skip instead of raising errors
46
44
  if @config.nil? || (@config.respond_to?(:empty?) && @config.empty?)
47
45
  @logger.error "Messagenet config is missing"
@@ -53,13 +51,12 @@ module Flapjack
53
51
  username = @config["username"]
54
52
  password = @config["password"]
55
53
  address = notification['address']
56
- message = notification['message']
54
+ safe_message = truncate(message, 159)
57
55
  notification_id = notification['id']
58
56
 
59
57
  [[username, "Messagenet username is missing"],
60
58
  [password, "Messagenet password is missing"],
61
59
  [address, "SMS address is missing"],
62
- [message, "SMS message is missing"],
63
60
  [notification_id, "Notification id is missing"]].each do |val_err|
64
61
 
65
62
  next unless val_err.first.nil? || (val_err.first.respond_to?(:empty?) && val_err.first.empty?)
@@ -74,7 +71,7 @@ module Flapjack
74
71
  query = {'Username' => username,
75
72
  'Pwd' => password,
76
73
  'PhoneNumber' => address,
77
- 'PhoneMessage' => message}
74
+ 'PhoneMessage' => safe_message}
78
75
 
79
76
  http = EM::HttpRequest.new(MESSAGENET_URL).get(:query => query)
80
77
 
@@ -91,6 +88,19 @@ module Flapjack
91
88
  end
92
89
 
93
90
  end
91
+
92
+ # copied from ActiveSupport
93
+ def truncate(str, length, options = {})
94
+ text = str.dup
95
+ options[:omission] ||= "..."
96
+
97
+ length_with_room_for_omission = length - options[:omission].length
98
+ stop = options[:separator] ?
99
+ (text.rindex(options[:separator], length_with_room_for_omission) || length_with_room_for_omission) : length_with_room_for_omission
100
+
101
+ (text.length > length ? text[0...stop] + options[:omission] : text).to_s
102
+ end
103
+
94
104
  end
95
105
  end
96
106
  end
@@ -259,9 +259,8 @@ module Flapjack
259
259
  entity_check = get_entity_check(params[:entity], params[:check])
260
260
  return 404 if entity_check.nil?
261
261
 
262
- entity_check.create_scheduled_maintenance(:start_time => start_time,
263
- :duration => duration,
264
- :summary => summary)
262
+ entity_check.create_scheduled_maintenance(start_time, duration,
263
+ :summary => summary)
265
264
  redirect back
266
265
  end
267
266
 
@@ -340,7 +339,7 @@ module Flapjack
340
339
  entity_check = Flapjack::Data::EntityCheck.for_entity(entity,
341
340
  check, :redis => redis)
342
341
  summary = entity_check.summary
343
- summary = summary[0..76] + '...' unless summary.length < 81
342
+ summary = summary[0..76] + '...' unless (summary.nil? || (summary.length < 81))
344
343
  latest_notif =
345
344
  {:problem => entity_check.last_notification_for_state(:problem)[:timestamp],
346
345
  :recovery => entity_check.last_notification_for_state(:recovery)[:timestamp],
@@ -42,8 +42,6 @@
42
42
  /* Not required for template or sticky footer method. */
43
43
 
44
44
  .container {
45
- width: auto;
46
- max-width: 1900px;
47
45
  }
48
46
  .container .credit {
49
47
  margin: 10px 0;
@@ -2,3 +2,4 @@
2
2
  <link rel="stylesheet" href="/css/bootstrap.min.css" media="screen">
3
3
  <link rel="stylesheet" href="/css/flapjack.css" media="screen">
4
4
  <link rel="stylesheet" href="/css/bootstrap-responsive.min.css" media="screen">
5
+ <link rel="shortcut icon" href="/img/flapjack-favicon-32-16.ico"> <!-- thank you http://xiconeditor.com/ -->
@@ -16,7 +16,7 @@
16
16
  </div>
17
17
  <h4><a href="/entities_failing" title="failing entities"><%= h @count_failing_entities %></a> out of <a href="/entities_all" title="all entities"><%= h @count_all_entities %></a> entities have failing checks</h4>
18
18
  <h4><a href="/checks_failing" title="failing checks"><%= h @count_failing_checks %></a> out of <a href="/checks_all" title="all checks"><%= h @count_all_checks %></a> checks are failing</h4>
19
- <img src="/img/flapjack_white_bg_400_353.jpeg" width="400" height="353">
19
+ <img src="/img/flapjack-transparent-350-400.png" height="350" width="400">
20
20
  </div>
21
21
  <div id="push"></div>
22
22
  </div>
@@ -121,8 +121,7 @@ module Flapjack
121
121
 
122
122
  contact = message.contact
123
123
 
124
- # was event.ok?
125
- if (notification.event_state == 'ok') || (notification.event_state == 'up')
124
+ if notification.ok?
126
125
  contact.update_sent_alert_keys(
127
126
  :media => media_type,
128
127
  :check => event_id,
@@ -142,7 +141,7 @@ module Flapjack
142
141
  contact.update_sent_alert_keys(
143
142
  :media => media_type,
144
143
  :check => event_id,
145
- :state => notification.event_state)
144
+ :state => notification.state)
146
145
  end
147
146
 
148
147
 
@@ -154,15 +154,16 @@ module Flapjack
154
154
  def initialize(type, pikelet_klass, opts = {})
155
155
  super(type, pikelet_klass, opts)
156
156
 
157
- pikelet_klass.instance_variable_set('@config', @config)
158
- pikelet_klass.instance_variable_set('@redis_config', @redis_config)
159
- pikelet_klass.instance_variable_set('@logger', @logger)
160
-
161
157
  unless defined?(@@resque_pool) && !@@resque_pool.nil?
162
158
  @@resque_pool = Flapjack::RedisPool.new(:config => @redis_config)
163
159
  ::Resque.redis = @@resque_pool
160
+ @@redis_connection = Flapjack::RedisPool.new(:config => @redis_config)
164
161
  end
165
162
 
163
+ pikelet_klass.instance_variable_set('@config', @config)
164
+ pikelet_klass.instance_variable_set('@redis', @@redis_connection)
165
+ pikelet_klass.instance_variable_set('@logger', @logger)
166
+
166
167
  # TODO error if config['queue'].nil?
167
168
 
168
169
  @worker = EM::Resque::Worker.new(@config['queue'])
@@ -8,7 +8,6 @@ require 'flapjack/filters/acknowledgement'
8
8
  require 'flapjack/filters/ok'
9
9
  require 'flapjack/filters/scheduled_maintenance'
10
10
  require 'flapjack/filters/unscheduled_maintenance'
11
- require 'flapjack/filters/detect_mass_client_failures'
12
11
  require 'flapjack/filters/delays'
13
12
 
14
13
  require 'flapjack/data/entity_check'
@@ -47,7 +46,6 @@ module Flapjack
47
46
  @filters << Flapjack::Filters::Ok.new(options)
48
47
  @filters << Flapjack::Filters::ScheduledMaintenance.new(options)
49
48
  @filters << Flapjack::Filters::UnscheduledMaintenance.new(options)
50
- @filters << Flapjack::Filters::DetectMassClientFailures.new(options)
51
49
  @filters << Flapjack::Filters::Delays.new(options)
52
50
  @filters << Flapjack::Filters::Acknowledgement.new(options)
53
51
 
@@ -150,18 +148,18 @@ module Flapjack
150
148
 
151
149
  event.tags = (event.tags || Flapjack::Data::TagSet.new) + entity_check.tags
152
150
 
153
- should_notify = update_keys(event, entity_check, timestamp)
151
+ should_notify, previous_state = update_keys(event, entity_check, timestamp)
154
152
 
155
153
  if !should_notify
156
154
  @logger.debug("Not generating notification for event #{event.id} because filtering was skipped")
157
155
  return
158
- elsif blocker = @filters.find {|filter| filter.block?(event) }
156
+ elsif blocker = @filters.find {|filter| filter.block?(event, entity_check, previous_state) }
159
157
  @logger.debug("Not generating notification for event #{event.id} because this filter blocked: #{blocker.name}")
160
158
  return
161
159
  end
162
160
 
163
161
  @logger.info("Generating notification for event #{event_str}")
164
- generate_notification(event, entity_check, timestamp)
162
+ generate_notification(event, entity_check, timestamp, previous_state)
165
163
  end
166
164
 
167
165
  def update_keys(event, entity_check, timestamp)
@@ -169,6 +167,7 @@ module Flapjack
169
167
  touch_keys
170
168
 
171
169
  result = true
170
+ previous_state = nil
172
171
 
173
172
  event.counter = @redis.hincrby('event_counters', 'all', 1)
174
173
  @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
@@ -181,9 +180,7 @@ module Flapjack
181
180
  case event.type
182
181
  # Service events represent changes in state on monitored systems
183
182
  when 'service'
184
- # Track when we last saw an event for a particular entity:check pair
185
- entity_check.last_update = timestamp
186
-
183
+ @redis.multi
187
184
  if event.ok?
188
185
  @redis.hincrby('event_counters', 'ok', 1)
189
186
  @redis.hincrby("event_counters:#{@instance_id}", 'ok', 1)
@@ -192,10 +189,11 @@ module Flapjack
192
189
  @redis.hincrby("event_counters:#{@instance_id}", 'failure', 1)
193
190
  @redis.hset('unacknowledged_failures', event.counter, event.id)
194
191
  end
192
+ @redis.exec
195
193
 
196
- event.previous_state = entity_check.state
194
+ previous_state = entity_check.state
197
195
 
198
- if event.previous_state.nil?
196
+ if previous_state.nil?
199
197
  @logger.info("No previous state for event #{event.id}")
200
198
 
201
199
  if @ncsm_duration >= 0
@@ -203,28 +201,24 @@ module Flapjack
203
201
  entity_check.create_scheduled_maintenance(timestamp,
204
202
  @ncsm_duration, :summary => 'Automatically created for new check')
205
203
  end
206
- else
207
- event.previous_state_duration = timestamp - entity_check.last_change.to_i
204
+
205
+ # If the service event's state is ok and there was no previous state, don't alert.
206
+ # This stops new checks from alerting as "recovery" after they have been added.
207
+ if event.ok?
208
+ @logger.debug("setting skip_filters to true because there was no previous state and event is ok")
209
+ result = false
210
+ end
208
211
  end
209
212
 
210
213
  entity_check.update_state(event.state, :timestamp => timestamp,
211
- :summary => event.summary, :client => event.client,
212
- :count => event.counter, :details => event.details)
213
-
214
- # No state change, and event is ok, so no need to run through filters
215
- # OR
216
- # If the service event's state is ok and there was no previous state, don't alert.
217
- # This stops new checks from alerting as "recovery" after they have been added.
218
- if !event.previous_state && event.ok?
219
- @logger.debug("setting skip_filters to true because there was no previous state and event is ok")
220
- result = false
221
- end
214
+ :summary => event.summary, :count => event.counter, :details => event.details)
222
215
 
223
216
  entity_check.update_current_scheduled_maintenance
224
217
 
225
218
  # Action events represent human or automated interaction with Flapjack
226
219
  when 'action'
227
220
  # When an action event is processed, store the event.
221
+ @redis.multi
228
222
  @redis.hset(event.id + ':actions', timestamp, event.state)
229
223
  @redis.hincrby('event_counters', 'action', 1)
230
224
  @redis.hincrby("event_counters:#{@instance_id}", 'action', 1)
@@ -232,26 +226,44 @@ module Flapjack
232
226
  if event.acknowledgement? && event.acknowledgement_id
233
227
  @redis.hdel('unacknowledged_failures', event.acknowledgement_id)
234
228
  end
229
+ @redis.exec
235
230
  end
236
231
 
237
- result
232
+ [result, previous_state]
238
233
  end
239
234
 
240
- def generate_notification(event, entity_check, timestamp)
235
+ def generate_notification(event, entity_check, timestamp, previous_state)
241
236
  notification_type = Flapjack::Data::Notification.type_for_event(event)
242
237
  max_notified_severity = entity_check.max_notified_severity_of_current_failure
243
238
 
239
+ @redis.multi
244
240
  @redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
245
241
  @redis.set("#{event.id}:last_#{event.state}_notification", timestamp) if event.failure?
246
242
  @redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
247
243
  @redis.rpush("#{event.id}:#{event.state}_notifications", timestamp) if event.failure?
244
+ @redis.exec
245
+
248
246
  @logger.debug("Notification of type #{notification_type} is being generated for #{event.id}: " + event.inspect)
249
247
 
250
248
  severity = Flapjack::Data::Notification.severity_for_event(event, max_notified_severity)
251
- last_state = entity_check.historical_state_before(timestamp)
249
+
250
+ historical_state = case entity_check.state
251
+ when previous_state
252
+ # current state
253
+ curr = entity_check.historical_states(nil, nil, :order => 'desc', :limit => 1)
254
+ (curr && (curr.size == 1)) ? curr.first : nil
255
+ else
256
+ # last state
257
+ curr_and_last = entity_check.historical_states(nil, nil, :order => 'desc', :limit => 2)
258
+ (curr_and_last && (curr_and_last.size == 2)) ? curr_and_last.last : nil
259
+ end
260
+
261
+ lc = entity_check.last_change
262
+ state_duration = lc ? (timestamp - lc) : nil
252
263
 
253
264
  Flapjack::Data::Notification.add(@notifier_queue, event,
254
- :type => notification_type, :severity => severity, :last_state => last_state,
265
+ :type => notification_type, :severity => severity,
266
+ :last_state => historical_state, :state_duration => state_duration,
255
267
  :redis => @redis)
256
268
  end
257
269
 
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  module Flapjack
4
- VERSION = "0.7.22"
4
+ VERSION = "0.7.25"
5
5
  end
@@ -373,12 +373,17 @@ describe Flapjack::Data::EntityCheck, :redis => true do
373
373
  it "updates state" do
374
374
  @redis.hset("check:#{name}:#{check}", 'state', 'ok')
375
375
 
376
+ old_timestamp = @redis.hget("check:#{name}:#{check}", 'last_update')
377
+
376
378
  ec = Flapjack::Data::EntityCheck.for_entity_name(name, check, :redis => @redis)
377
379
  ec.update_state('critical')
378
380
 
379
381
  state = @redis.hget("check:#{name}:#{check}", 'state')
380
382
  state.should_not be_nil
381
383
  state.should == 'critical'
384
+
385
+ new_timestamp = @redis.hget("check:#{name}:#{check}", 'last_update')
386
+ new_timestamp.should_not == old_timestamp
382
387
  end
383
388
 
384
389
  it "updates enabled checks" do
@@ -88,7 +88,6 @@ describe Flapjack::Data::Event do
88
88
  its(:duration) { should == event_data['duration'] }
89
89
  its(:time) { should == event_data['time'] }
90
90
  its(:id) { should == 'xyz-example.com:ping' }
91
- its(:client) { should == 'xyz' }
92
91
  its(:type) { should == 'service' }
93
92
 
94
93
  it { should be_a_service }
@@ -3,22 +3,13 @@ require 'flapjack/gateways/email'
3
3
 
4
4
  describe Flapjack::Gateways::Email, :logger => true do
5
5
 
6
- before(:each) do
7
- Flapjack::Gateways::Email.instance_variable_set('@config', {})
8
- Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
9
- Flapjack::Gateways::Email.start
10
- end
11
-
12
6
  it "sends a mail with text and html parts" do
13
7
  email = mock('email')
14
8
 
15
9
  entity_check = mock(Flapjack::Data::EntityCheck)
16
10
  entity_check.should_receive(:in_scheduled_maintenance?).and_return(false)
17
11
  entity_check.should_receive(:in_unscheduled_maintenance?).and_return(false)
18
- entity_check.should_receive(:last_change).and_return(Time.now.to_i)
19
-
20
12
  redis = mock('redis')
21
- ::Resque.should_receive(:redis).and_return(redis)
22
13
 
23
14
  Flapjack::Data::EntityCheck.should_receive(:for_event_id).
24
15
  with('example.com:ping', :redis => redis).and_return(entity_check)
@@ -38,12 +29,17 @@ describe Flapjack::Gateways::Email, :logger => true do
38
29
  'contact_first_name' => 'John',
39
30
  'contact_last_name' => 'Smith',
40
31
  'state' => 'ok',
32
+ 'state_duration' => 2,
41
33
  'summary' => 'smile',
42
34
  'last_state' => 'problem',
43
35
  'last_summary' => 'frown',
44
36
  'time' => Time.now.to_i,
45
37
  'event_id' => 'example.com:ping'}
46
38
 
39
+ Flapjack::Gateways::Email.instance_variable_set('@config', {})
40
+ Flapjack::Gateways::Email.instance_variable_set('@redis', redis)
41
+ Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
42
+ Flapjack::Gateways::Email.start
47
43
  Flapjack::Gateways::Email.perform(notification)
48
44
  end
49
45
 
@@ -1,6 +1,85 @@
1
1
  require 'spec_helper'
2
2
  require 'flapjack/gateways/sms_messagenet'
3
3
 
4
- describe Flapjack::Gateways::SmsMessagenet do
4
+ describe Flapjack::Gateways::SmsMessagenet, :logger => true do
5
+
6
+ let(:lock) { mock(Monitor) }
7
+
8
+ let(:config) { {'username' => 'user',
9
+ 'password' => 'password'
10
+ }
11
+ }
12
+
13
+ let(:time) { Time.now }
14
+
15
+ let(:time_str) { Time.at(time).strftime('%-d %b %H:%M') }
16
+
17
+ let(:message) { {'notification_type' => 'recovery',
18
+ 'contact_first_name' => 'John',
19
+ 'contact_last_name' => 'Smith',
20
+ 'state' => 'ok',
21
+ 'summary' => 'smile',
22
+ 'last_state' => 'problem',
23
+ 'last_summary' => 'frown',
24
+ 'time' => time.to_i,
25
+ 'address' => '555-555555',
26
+ 'event_id' => 'example.com:ping',
27
+ 'id' => '123456789'
28
+ }
29
+ }
30
+
31
+ it "sends an SMS message" do
32
+ req = stub_request(:get, "https://www.messagenet.com.au/dotnet/Lodge.asmx/LodgeSMSMessage").
33
+ with(:query => {'PhoneNumber' => '555-555555',
34
+ 'Username' => 'user', 'Pwd' => 'password',
35
+ 'PhoneMessage' => "RECOVERY: 'ping' on example.com is OK at #{time_str}, smile"}).
36
+ to_return(:status => 200)
37
+
38
+ EM.synchrony do
39
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', config)
40
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
41
+ Flapjack::Gateways::SmsMessagenet.start
42
+ Flapjack::Gateways::SmsMessagenet.perform(message)
43
+ EM.stop
44
+ end
45
+ req.should have_been_requested
46
+ end
47
+
48
+ it "truncates a long message a" do
49
+ long_msg = message.merge('summary' => 'Four score and seven years ago our ' +
50
+ 'fathers brought forth on this continent, a new nation, conceived in ' +
51
+ 'Liberty, and dedicated to the proposition that all men are created equal.')
52
+
53
+ req = stub_request(:get, "https://www.messagenet.com.au/dotnet/Lodge.asmx/LodgeSMSMessage").
54
+ with(:query => {'PhoneNumber' => '555-555555',
55
+ 'Username' => 'user', 'Pwd' => 'password',
56
+ 'PhoneMessage' => "RECOVERY: 'ping' on example.com is " +
57
+ "OK at #{time_str}, Four score and seven years ago " +
58
+ 'our fathers brought forth on this continent, a new ' +
59
+ 'nation, conceived i...'}).
60
+ to_return(:status => 200)
61
+
62
+ EM.synchrony do
63
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', config)
64
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
65
+ Flapjack::Gateways::SmsMessagenet.start
66
+ Flapjack::Gateways::SmsMessagenet.perform(long_msg)
67
+ EM.stop
68
+ end
69
+ req.should have_been_requested
70
+ end
71
+
72
+ it "does not send an SMS message with an invalid config" do
73
+ EM.synchrony do
74
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', config.reject {|k, v| k == 'password'})
75
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
76
+ Flapjack::Gateways::SmsMessagenet.start
77
+ Flapjack::Gateways::SmsMessagenet.perform(message)
78
+ EM.stop
79
+ end
80
+
81
+ WebMock.should_not have_requested(:get,
82
+ "https://www.messagenet.com.au/dotnet/Lodge.asmx/LodgeSMSMessage")
83
+ end
5
84
 
6
85
  end