flapjack 1.2.0 → 1.2.1rc1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 04d31109d0b39ced29a1b027a0894cab07b9d4c0
4
- data.tar.gz: 86e20fc3b3f0d34acca6be7e33468fc3d609e868
3
+ metadata.gz: 1038ee80ed4a85514dbe214c5a50768c3517db8e
4
+ data.tar.gz: 6373cd02ed2d57411ed732bb1b00eb3e449b6683
5
5
  SHA512:
6
- metadata.gz: a838f02684a7cee3ba40cc5be1ecfa2e3b15246912a7bff218944b323f14d43ef64ebce1e9ce978f37795b17d44acc4ec708f4a8768e9788df339a20619f1892
7
- data.tar.gz: 93784a164826ef9af3bf84b947d0debdbf21acf3e601ee809c248534d52e0af2cbea643e5cd7ab2c4af4e14a7b8b6ced725883739ecd148ae11f3492a4771f4a
6
+ metadata.gz: 4ebdf21010802ae6ff6cd737e0ca6be03428fbafd6cc7dd31a65da6fc8fe821c3bd35a375e5a2b0a05ea92a438fdd0e2909b5f6f65ff69a99d1eb5daabc78c7c
7
+ data.tar.gz: 91d761e805b496c781596559c1d32465c875208db46365c31f60e04b19c89ef34faf3d3e0adac3425149d02421433699b8f1b1adcd3fd0b19437f063bbcbe3ba
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## Flapjack Changelog
2
2
 
3
+ # 1.2.1rc1 - 2014-12-04
4
+ - Bug: Exception in pagerduty gateway when looking for acknowledgements #721 (@jessereynolds)
5
+ - Bug: Retrieving scheduled maintenance via api fails when old entities without ids are present #711 (@jessereynolds)
6
+ - Bug: Repeated acknowledgement notifications sent after acknowledging on PagerDuty #714 (@jessereynolds)
7
+ - Bug: Exception thrown to browser when you pass no values to "Add Scheduled Maintenance" #719 (@ali-graham)
8
+ - Bug: API doesn't allow contact timezone to be updated #718 (@ali-graham)
9
+ - Bug: IceCube deprecation warning in 1.2.0 - :exrules is deprecated, #715 (@ali-graham)
10
+ - Bug: Badly formed error response to scheduled maintenance report with invalid check id #712 (@ali-graham)
11
+ - Bug: Searching for checks in maintenance via CLI doesn't seem to work #710 (@jessereynolds, @Hobbsee)
12
+
3
13
  # 1.2.0 - 2014-11-07
4
14
  - Bug: multi blocks for safe redis connection pool usage #694 (@ali-graham)
5
15
  - Bug: data migration to work around previous notification rule bug #699 (@ali-graham)
data/Gemfile-ruby1.9.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- flapjack (1.2.0)
4
+ flapjack (1.2.1rc1)
5
5
  activesupport (~> 3.2.14)
6
6
  blather (~> 0.8.3)
7
7
  chronic
@@ -172,6 +172,7 @@ production:
172
172
  api_url: "http://localhost:3081/"
173
173
  # Full path to location of logo file, e.g. /etc/flapjack/custom_logo.png
174
174
  #logo_image_path: "/etc/flapjack/web/custom_logo/flapjack-2013-notext-transparent-300-300.png"
175
+ show_exceptions: false
175
176
  logger:
176
177
  level: INFO
177
178
  syslog_errors: yes
@@ -376,6 +377,7 @@ development:
376
377
  api_url: "http://localhost:3081/"
377
378
  # Full path to location of logo file, e.g. /etc/flapjack/custom_logo.png
378
379
  #logo_image_path: "/etc/flapjack/web/custom_logo/flapjack-2013-notext-transparent-300-300.png"
380
+ show_exceptions: true
379
381
  logger:
380
382
  level: DEBUG
381
383
  syslog_errors: yes
@@ -0,0 +1,44 @@
1
+ @rollup @notification_rules @resque @processor @notifier @events
2
+ Feature: Multiple acknowledgements after scheduled maintenance
3
+
4
+ Background:
5
+ Given the following users exist:
6
+ | id | first_name | last_name | email | sms | timezone |
7
+ | 1 | Malak | Al-Musawi | malak@example.com | +61400000001 | Asia/Baghdad |
8
+
9
+ And the following entities exist:
10
+ | id | name | contacts |
11
+ | 1 | foo | 1 |
12
+
13
+ And user 1 has the following notification intervals:
14
+ | email | sms |
15
+ | 15 | 15 |
16
+
17
+ And user 1 has the following notification rollup thresholds:
18
+ | email | sms |
19
+ | 3 | 3 |
20
+
21
+ And user 1 has the following notification rules:
22
+ | entities | unknown_media | warning_media | critical_media |
23
+ | | | email | sms,email |
24
+
25
+ @time
26
+ Scenario: Multiple acks after sched maint ends
27
+ Given the check is check 'ping' on entity 'foo'
28
+ And the check is in an ok state
29
+ And the check is in scheduled maintenance for 1 hour
30
+ When 1 minute passes
31
+ And a critical event is received
32
+ Then no email alerts should be queued for malak@example.com
33
+ When 60 minutes passes
34
+ And a critical event is received
35
+ Then 1 email alert of type problem should be queued for malak@example.com
36
+ And the check should appear in unacknowledged_failing
37
+ When 1 minute passes
38
+ And an acknowledgement event is received
39
+ Then 1 email alert of type acknowledgement should be queued for malak@example.com
40
+ And the check should not appear in unacknowledged_failing
41
+ When 1 minute passes
42
+ And an acknowledgement event is received
43
+ Then 2 email alert of type acknowledgement should be queued for malak@example.com
44
+
@@ -199,6 +199,13 @@ Feature: events
199
199
  When an acknowledgement event is received
200
200
  Then a notification should be generated
201
201
 
202
+ Scenario: Acknowledgement when acknowledged
203
+ Given the check is in a critical state
204
+ When an acknowledgement event is received
205
+ Then a notification should be generated
206
+ When an acknowledgement event is received
207
+ Then a notification should be generated
208
+
202
209
  Scenario: Brief critical then OK
203
210
  Given the check is in an ok state
204
211
  When a critical event is received
@@ -426,3 +426,17 @@ When(/^user (\S+) ceases to be a contact of entity '(.*)'$/) do |contact_id, ent
426
426
  @redis.srem("contacts_for:#{entity.id}", contact_id)
427
427
  end
428
428
 
429
+ Then(/^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') should not appear in unacknowledged_failing$/) do |check, entity|
430
+ check ||= @check
431
+ entity ||= @entity
432
+ unacknowledged_failing_checks = Flapjack::Data::EntityCheck.unacknowledged_failing(:redis => @redis)
433
+ expect(unacknowledged_failing_checks.map {|ec| "#{ec.entity.name}:#{ec.check}"}).to_not include("#{entity}:#{check}")
434
+ end
435
+
436
+ Then(/^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') should appear in unacknowledged_failing$/) do |check, entity|
437
+ check ||= @check
438
+ entity ||= @entity
439
+ unacknowledged_failing_checks = Flapjack::Data::EntityCheck.unacknowledged_failing(:redis => @redis)
440
+ expect(unacknowledged_failing_checks.map {|ec| "#{ec.entity.name}:#{ec.check}"}).to include("#{entity}:#{check}")
441
+ end
442
+
@@ -57,7 +57,6 @@ module Flapjack
57
57
  end
58
58
 
59
59
  def create
60
- exit_now!("Entity & check must be supplied to create a maintenance period") if @options[:entity].nil? || @options[:check].nil?
61
60
  errors = Flapjack::Data::EntityCheck.create_maintenance(@options)
62
61
  (errors.each { |k, v| puts "#{k}: #{v}" }; exit_now!('Failed to create maintenances')) if errors.length > 0
63
62
  puts "The maintenances specified have been created"
@@ -83,28 +82,28 @@ command :maintenance do |maintenance|
83
82
  maintenance.desc 'Show maintenance windows according to criteria (default: all ongoing maintenance)'
84
83
  maintenance.command :show do |show|
85
84
 
86
- show.flag [:e, 'entity'],
85
+ show.flag ['entity', :e],
87
86
  :desc => 'The entity for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'db*\' or \'[[:lower:]]\''
88
87
 
89
- show.flag [:c, 'check'],
88
+ show.flag ['check', :c],
90
89
  :desc => 'The check for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'http*\' or \'[[:lower:]]\''
91
90
 
92
- show.flag [:r, 'reason'],
91
+ show.flag ['reason', :r],
93
92
  :desc => 'The reason for the maintenance window to occur. This can be a string, or a ruby regex of the form \'Downtime for *\' or \'[[:lower:]]\''
94
93
 
95
- show.flag [:s, 'start', 'started', 'starting'],
96
- :desc => 'The start time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between times and time"'
94
+ show.flag ['start', 'started', 'starting', :s],
95
+ :desc => 'The start time for the maintenance window, eg \'before 10am\'. This should be prefixed with \'more than\', \'less than\', \'on\', \'before\', or \'after\', or of the form \'between times and time\''
97
96
 
98
- show.flag [:d, 'duration'],
99
- :desc => 'The total duration of the maintenance window. This should be prefixed with "more than", "less than", "before, "after" or "equal to", or or of the form "between 3 and 4 hours". This should be an interval'
97
+ show.flag ['duration', :d],
98
+ :desc => 'The total duration of the maintenance window, eg \'equal to 5 hours\'. This should be prefixed with \'more than\', \'less than\', \'before, \'after\' or \'equal to\', or or of the form \'between 3 and 4 hours\'. This should be an interval'
100
99
 
101
- show.flag [:f, 'finish', 'finished', 'finishing', 'remain', 'remained', 'remaining', 'end'],
102
- :desc => 'The finishing time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between time and time"'
100
+ show.flag ['finish', 'finished', 'finishing', 'remain', 'remained', 'remaining', 'end', :f],
101
+ :desc => 'The finishing time for the maintenance window, eg \'more than 1 year from now\'. This should be prefixed with \'more than\', \'less than\', \'on\', \'before\', or \'after\', or of the form \'between time and time\''
103
102
 
104
- show.flag [:st, 'state'],
103
+ show.flag ['state', :st],
105
104
  :desc => 'The state that the check is currently in'
106
105
 
107
- show.flag [:t, 'type'],
106
+ show.flag ['type', :t],
108
107
  :desc => 'The type of maintenance scheduled',
109
108
  :default_value => 'scheduled'
110
109
 
@@ -117,32 +116,32 @@ command :maintenance do |maintenance|
117
116
  maintenance.desc 'Delete maintenance windows according to criteria (default: all ongoing maintenance)'
118
117
  maintenance.command :delete do |delete|
119
118
 
120
- delete.flag [:a, 'apply'],
119
+ delete.flag ['apply', :a],
121
120
  :desc => 'Whether this deletion should occur',
122
121
  :default_value => false
123
122
 
124
- delete.flag [:e, 'entity'],
123
+ delete.flag ['entity', :e],
125
124
  :desc => 'The entity for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'db*\' or \'[[:lower:]]\''
126
125
 
127
- delete.flag [:c, 'check'],
126
+ delete.flag ['check', :c],
128
127
  :desc => 'The check for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'http*\' or \'[[:lower:]]\''
129
128
 
130
- delete.flag [:r, 'reason'],
129
+ delete.flag ['reason', :r],
131
130
  :desc => 'The reason for the maintenance window to occur. This can be a string, or a ruby regex of the form \'Downtime for *\' or \'[[:lower:]]\''
132
131
 
133
- delete.flag [:s, 'start', 'started', 'starting'],
134
- :desc => 'The start time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between times and time"'
132
+ delete.flag ['start', 'started', 'starting', :s],
133
+ :desc => 'The start time for the maintenance window, eg \'before 10am\'. This should be prefixed with \'more than\', \'less than\', \'on\', \'before\', or \'after\', or of the form \'between times and time\''
135
134
 
136
- delete.flag [:d, 'duration'],
137
- :desc => 'The total duration of the maintenance window. This should be prefixed with "more than", "less than", "before, "after" or "equal to", or or of the form "between 3 and 4 hours". This should be an interval'
135
+ delete.flag ['duration', :d],
136
+ :desc => 'The total duration of the maintenance window, eg \'equal to 5 hours\'. This should be prefixed with \'more than\', \'less than\', \'before, \'after\' or \'equal to\', or or of the form \'between 3 and 4 hours\'. This should be an interval'
138
137
 
139
- delete.flag [:f, 'finish', 'finished', 'finishing', 'remain', 'remained', 'remaining', 'end'],
140
- :desc => 'The finishing time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between time and time"'
138
+ delete.flag ['finish', 'finished', 'finishing', 'remain', 'remained', 'remaining', 'end', :f],
139
+ :desc => 'The finishing time for the maintenance window, eg \'more than 1 year from now\'. This should be prefixed with \'more than\', \'less than\', \'on\', \'before\', or \'after\', or of the form \'between time and time\''
141
140
 
142
- delete.flag [:st, 'state'],
141
+ delete.flag ['state', :st],
143
142
  :desc => 'The state that the check is currently in'
144
143
 
145
- delete.flag [:t, 'type'],
144
+ delete.flag ['type', :t],
146
145
  :desc => 'The type of maintenance scheduled',
147
146
  :default_value => 'scheduled'
148
147
 
@@ -155,25 +154,30 @@ command :maintenance do |maintenance|
155
154
  maintenance.desc 'Create a maintenance window'
156
155
  maintenance.command :create do |create|
157
156
 
158
- create.flag [:e, 'entity'],
157
+ create.flag ['entity', :e],
159
158
  :desc => 'The entity for the maintenance window to occur on. This can be a comma separated list',
160
- :type => Array
159
+ :type => Array,
160
+ :required => true
161
161
 
162
- create.flag [:c, 'check'],
162
+ create.flag ['check', :c],
163
163
  :desc => 'The check for the maintenance window to occur on. This can be a comma separated list',
164
- :type => Array
164
+ :type => Array,
165
+ :required => true
165
166
 
166
- create.flag [:r, 'reason'],
167
- :desc => 'The reason for the maintenance window to occur'
167
+ create.flag ['reason', :r],
168
+ :desc => 'The reason for the maintenance window to occur',
169
+ :required => true
168
170
 
169
- create.flag [:s, 'start', 'started', 'starting'],
170
- :desc => 'The start time for the maintenance window'
171
+ create.flag ['start', 'started', 'starting', :s],
172
+ :desc => 'The start time for the maintenance window, eg \'now\' or \'in 3 hours\'',
173
+ :required => true
171
174
 
172
- create.flag [:d, 'duration'],
173
- :desc => 'The total duration of the maintenance window. This should be an interval'
175
+ create.flag ['duration', :d],
176
+ :desc => 'The total duration of the maintenance window, eg \'30 minutes\'. This should be an interval',
177
+ :required => true
174
178
 
175
- create.flag [:t, 'type'],
176
- :desc => 'The type of maintenance scheduled ("scheduled")',
179
+ create.flag ['type', :t],
180
+ :desc => 'The type of maintenance scheduled (\'scheduled\')',
177
181
  :default_value => 'scheduled'
178
182
 
179
183
  create.action do |global_options,options,args|
@@ -160,7 +160,7 @@ module Flapjack
160
160
  redis.exists(entity_check + ':unscheduled_maintenance')
161
161
  }.collect {|entity_check|
162
162
  Flapjack::Data::EntityCheck.for_event_id(entity_check, :redis => redis)
163
- }
163
+ }.compact
164
164
  end
165
165
 
166
166
  def self.find_maintenance(options = {})
@@ -983,8 +983,8 @@ module Flapjack
983
983
 
984
984
  def initialize(entity, check, options = {})
985
985
  raise "Redis connection not set" unless @redis = options[:redis]
986
- raise "Invalid entity" unless @entity = entity
987
- raise "Invalid check" unless @check = check
986
+ raise "Invalid entity (#{entity.inspect})" unless @entity = entity
987
+ raise "Invalid check (#{check.inspect} on #{entity.inspect})" unless @check = check
988
988
  @key = "#{entity.name}:#{check}"
989
989
  if @redis.zscore("all_checks", @key).nil?
990
990
  timestamp = options[:timestamp] || Time.now.to_i
@@ -18,10 +18,10 @@ module Flapjack
18
18
  semaphore = nil
19
19
  strikes = 0
20
20
  begin
21
- semaphore = Flapjack::Data::Semaphore.new(resource, :redis => redis, :expiry => 60)
21
+ semaphore = Flapjack::Data::Semaphore.new(resource, :redis => redis, :expiry => 300)
22
22
  rescue Flapjack::Data::Semaphore::ResourceLocked
23
23
  strikes += 1
24
- if strikes < 5
24
+ if strikes < 10
25
25
  sleep 2
26
26
  retry
27
27
  end
@@ -30,66 +30,102 @@ module Flapjack
30
30
  semaphore
31
31
  end
32
32
 
33
- def self.migrate_entity_check_data_if_required(options = {})
33
+ def self.create_entity_ids_if_required(options = {})
34
34
  raise "Redis connection not set" unless redis = options[:redis]
35
-
36
35
  logger = options[:logger]
37
36
 
37
+ if redis.exists('created_ids_for_old_entities_without_ids')
38
+ return
39
+ end
40
+
38
41
  semaphore = obtain_semaphore(ENTITY_DATA_MIGRATION, :redis => redis)
39
42
  if semaphore.nil?
40
43
  unless logger.nil?
41
- logger.fatal "Could not obtain lock for data migration. Ensure that " +
44
+ logger.fatal "Could not obtain lock for data migration (entity id creation). Ensure that " +
42
45
  "no other flapjack processes are running that might be executing " +
43
46
  "migrations, check logs for any exceptions, manually delete the " +
44
47
  "'#{ENTITY_DATA_MIGRATION}' key from your Flapjack Redis " +
45
48
  "database and try running Flapjack again."
46
49
  end
47
- exit
50
+ raise "Unable to obtain semaphore #{ENTITY_DATA_MIGRATION}"
48
51
  end
49
52
 
50
- if redis.exists('all_checks')
53
+ begin
54
+ logger.warn "Ensuring all entities have ids ..." unless logger.nil?
55
+
56
+ Flapjack::Data::EntityCheck.find_current_names_by_entity(:redis => redis, :logger => logger).keys.each {|entity_name|
57
+ entity = Flapjack::Data::Entity.find_by_name(entity_name, :create => true, :redis => redis, :logger => logger)
58
+ }
59
+
60
+ all_checks = Flapjack::Data::EntityCheck.all(:redis => redis, :logger => logger, :create_entity => true)
61
+
62
+ redis.set('created_ids_for_old_entities_without_ids', 'true')
63
+ logger.warn "Entity id creation complete."
64
+ ensure
51
65
  semaphore.release
52
- return
53
66
  end
67
+ end
54
68
 
55
- logger.warn "Upgrading Flapjack's entity/check Redis indexes..." unless logger.nil?
69
+ def self.migrate_entity_check_data_if_required(options = {})
70
+ raise "Redis connection not set" unless redis = options[:redis]
56
71
 
57
- check_names = redis.keys('check:*').map {|c| c.sub(/^check:/, '') } |
58
- Flapjack::Data::EntityCheck.find_current_names(:redis => redis)
72
+ logger = options[:logger]
59
73
 
60
- unless check_names.empty?
61
- timestamp = Time.now.to_i
74
+ if redis.exists('all_checks')
75
+ return
76
+ end
62
77
 
63
- check_names.each do |ecn|
64
- redis.zadd("all_checks", timestamp, ecn)
65
- entity_name, check = ecn.split(':', 2)
66
- redis.zadd("all_checks:#{entity_name}", timestamp, check)
67
- # not deleting the check hashes, they store useful data
78
+ semaphore = obtain_semaphore(ENTITY_DATA_MIGRATION, :redis => redis)
79
+ if semaphore.nil?
80
+ unless logger.nil?
81
+ logger.fatal "Could not obtain lock for entity check data migration. Ensure that " +
82
+ "no other flapjack processes are running that might be executing " +
83
+ "migrations, check logs for any exceptions, manually delete the " +
84
+ "'#{ENTITY_DATA_MIGRATION}' key from your Flapjack Redis " +
85
+ "database and try running Flapjack again."
68
86
  end
87
+ raise "Unable to obtain semaphore #{ENTITY_DATA_MIGRATION}"
69
88
  end
70
89
 
71
- logger.warn "Checks indexed." unless logger.nil?
72
-
73
- entity_name_keys = redis.keys("entity_id:*")
74
- unless entity_name_keys.empty?
75
- ids = redis.mget(*entity_name_keys)
90
+ begin
91
+ logger.warn "Upgrading Flapjack's entity/check Redis indexes..." unless logger.nil?
76
92
 
77
- entity_name_keys.each do |enk|
78
- enk =~ /^entity_id:(.+)$/; entity_name = $1; entity_id = ids.shift
93
+ check_names = redis.keys('check:*').map {|c| c.sub(/^check:/, '') } |
94
+ Flapjack::Data::EntityCheck.find_current_names(:redis => redis)
79
95
 
80
- redis.hset('all_entity_names_by_id', entity_id, entity_name)
81
- redis.hset('all_entity_ids_by_name', entity_name, entity_id)
96
+ unless check_names.empty?
97
+ timestamp = Time.now.to_i
82
98
 
83
- redis.del(enk)
84
- redis.del("entity:#{entity_id}")
99
+ check_names.each do |ecn|
100
+ redis.zadd("all_checks", timestamp, ecn)
101
+ entity_name, check = ecn.split(':', 2)
102
+ redis.zadd("all_checks:#{entity_name}", timestamp, check)
103
+ # not deleting the check hashes, they store useful data
104
+ end
85
105
  end
86
- end
87
106
 
88
- logger.warn "Entities indexed." unless logger.nil?
107
+ logger.warn "Checks indexed." unless logger.nil?
108
+
109
+ entity_name_keys = redis.keys("entity_id:*")
110
+ unless entity_name_keys.empty?
111
+ ids = redis.mget(*entity_name_keys)
112
+
113
+ entity_name_keys.each do |enk|
114
+ enk =~ /^entity_id:(.+)$/; entity_name = $1; entity_id = ids.shift
89
115
 
90
- semaphore.release
116
+ redis.hset('all_entity_names_by_id', entity_id, entity_name)
117
+ redis.hset('all_entity_ids_by_name', entity_name, entity_id)
91
118
 
92
- logger.warn "Indexing complete." unless logger.nil?
119
+ redis.del(enk)
120
+ redis.del("entity:#{entity_id}")
121
+ end
122
+ end
123
+
124
+ logger.warn "Entities indexed." unless logger.nil?
125
+ logger.warn "Indexing complete." unless logger.nil?
126
+ ensure
127
+ semaphore.release
128
+ end
93
129
  end
94
130
 
95
131
  def self.refresh_archive_index(options = {})
@@ -147,4 +183,4 @@ module Flapjack
147
183
 
148
184
  end
149
185
  end
150
- end
186
+ end
@@ -253,6 +253,11 @@ module Flapjack
253
253
  return unless (tr.has_key?(:start_time) || tr.has_key?(:start_date)) &&
254
254
  (tr.has_key?(:end_time) || tr.has_key?(:end_date))
255
255
 
256
+ # exrules is deprecated in latest ice_cube, but may be stored in data
257
+ # serialised from earlier versions of the gem
258
+ # ( https://github.com/flapjack/flapjack/issues/715 )
259
+ tr.delete(:exrules)
260
+
256
261
  parsed_time = proc {|tr, field|
257
262
  if t = tr.delete(field)
258
263
  t = t.dup