flapjack 0.7.27 → 0.7.28
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/CHANGELOG.md +9 -0
- data/bin/flapjack +22 -28
- data/bin/flapjack-nagios-receiver +5 -27
- data/bin/flapjack-populator +2 -2
- data/bin/flapper +13 -14
- data/bin/receive-events +3 -20
- data/bin/simulate-failed-check +3 -20
- data/etc/flapjack_config.yaml.example +119 -86
- data/features/cli.feature +69 -0
- data/features/events.feature +15 -0
- data/features/packaging-lintian.feature +4 -6
- data/features/rollup.feature +198 -0
- data/features/steps/cli_steps.rb +81 -0
- data/features/steps/events_steps.rb +26 -16
- data/features/steps/notifications_steps.rb +2 -2
- data/features/steps/packaging-lintian_steps.rb +2 -2
- data/features/support/daemons.rb +113 -0
- data/features/support/env.rb +26 -4
- data/lib/flapjack/configuration.rb +2 -0
- data/lib/flapjack/data/contact.rb +76 -5
- data/lib/flapjack/data/entity_check.rb +16 -0
- data/lib/flapjack/data/message.rb +11 -8
- data/lib/flapjack/data/notification.rb +31 -3
- data/lib/flapjack/data/notification_rule.rb +1 -1
- data/lib/flapjack/filters/delays.rb +1 -5
- data/lib/flapjack/gateways/api/contact_methods.rb +12 -6
- data/lib/flapjack/gateways/email.rb +35 -26
- data/lib/flapjack/gateways/email/alert.html.erb +4 -4
- data/lib/flapjack/gateways/email/alert.text.erb +2 -2
- data/lib/flapjack/gateways/email/alert_subject.text.erb +14 -0
- data/lib/flapjack/gateways/email/rollup.html.erb +48 -0
- data/lib/flapjack/gateways/email/rollup.text.erb +20 -0
- data/lib/flapjack/gateways/email/rollup_subject.text.erb +19 -0
- data/lib/flapjack/gateways/jabber.rb +97 -47
- data/lib/flapjack/gateways/sms_messagenet.rb +26 -24
- data/lib/flapjack/gateways/sms_messagenet/alert.text.erb +15 -0
- data/lib/flapjack/gateways/sms_messagenet/rollup.text.erb +34 -0
- data/lib/flapjack/gateways/web/views/contact.html.erb +16 -8
- data/lib/flapjack/notifier.rb +17 -4
- data/lib/flapjack/processor.rb +1 -1
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/coordinator_spec.rb +19 -19
- data/spec/lib/flapjack/data/contact_spec.rb +100 -25
- data/spec/lib/flapjack/data/event_spec.rb +1 -1
- data/spec/lib/flapjack/data/message_spec.rb +1 -1
- data/spec/lib/flapjack/data/notification_spec.rb +11 -3
- data/spec/lib/flapjack/gateways/api/contact_methods_spec.rb +36 -17
- data/spec/lib/flapjack/gateways/api/entity_check_presenter_spec.rb +1 -1
- data/spec/lib/flapjack/gateways/api/entity_methods_spec.rb +38 -38
- data/spec/lib/flapjack/gateways/api/entity_presenter_spec.rb +15 -15
- data/spec/lib/flapjack/gateways/email_spec.rb +4 -4
- data/spec/lib/flapjack/gateways/jabber_spec.rb +13 -14
- data/spec/lib/flapjack/gateways/oobetet_spec.rb +2 -2
- data/spec/lib/flapjack/gateways/pagerduty_spec.rb +5 -5
- data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +1 -1
- data/spec/lib/flapjack/gateways/web/views/contact.html.erb_spec.rb +2 -2
- data/spec/lib/flapjack/gateways/web_spec.rb +4 -4
- data/spec/lib/flapjack/logger_spec.rb +3 -3
- data/spec/lib/flapjack/pikelet_spec.rb +10 -10
- data/spec/lib/flapjack/processor_spec.rb +4 -4
- data/spec/lib/flapjack/redis_pool_spec.rb +1 -1
- metadata +70 -5
- checksums.yaml +0 -15
@@ -0,0 +1,69 @@
|
|
1
|
+
@process
|
2
|
+
Feature: command line utility
|
3
|
+
As a systems administrator
|
4
|
+
I should be able to manage Flapjack
|
5
|
+
From the command line
|
6
|
+
|
7
|
+
Background:
|
8
|
+
Given a file named "flapjack_cfg.yml" with:
|
9
|
+
"""
|
10
|
+
test:
|
11
|
+
redis:
|
12
|
+
db: 14
|
13
|
+
processor:
|
14
|
+
enabled: yes
|
15
|
+
logger:
|
16
|
+
level: warn
|
17
|
+
"""
|
18
|
+
And a file named "flapjack_cfg_d.yml" with:
|
19
|
+
"""
|
20
|
+
test:
|
21
|
+
pid_file: tmp/cucumber_cli/flapjack_d.pid
|
22
|
+
log_file: tmp/cucumber_cli/flapjack_d.log
|
23
|
+
redis:
|
24
|
+
db: 14
|
25
|
+
processor:
|
26
|
+
enabled: yes
|
27
|
+
logger:
|
28
|
+
level: warn
|
29
|
+
"""
|
30
|
+
|
31
|
+
Scenario: Starting flapjack
|
32
|
+
When I start flapjack with `flapjack start --config tmp/cucumber_cli/flapjack_cfg.yml`
|
33
|
+
Then flapjack should start within 15 seconds
|
34
|
+
|
35
|
+
Scenario: Stopping flapjack via SIGINT
|
36
|
+
When I start flapjack with `flapjack start --config tmp/cucumber_cli/flapjack_cfg.yml`
|
37
|
+
Then flapjack should start within 15 seconds
|
38
|
+
When I send a SIGINT to the flapjack process
|
39
|
+
Then flapjack should stop within 15 seconds
|
40
|
+
|
41
|
+
Scenario: Starting and stopping flapjack, daemonized
|
42
|
+
When I start flapjack (daemonised) with `flapjack start -d --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
43
|
+
Then flapjack should start within 15 seconds
|
44
|
+
When I stop flapjack with `flapjack stop --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
45
|
+
Then flapjack should stop within 15 seconds
|
46
|
+
|
47
|
+
Scenario: Starting, restarting and stopping flapjack, daemonized
|
48
|
+
When I start flapjack (daemonised) with `flapjack start -d --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
49
|
+
Then flapjack should start within 15 seconds
|
50
|
+
When I restart flapjack with `flapjack restart -d --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
51
|
+
Then flapjack should restart within 15 seconds
|
52
|
+
When I stop flapjack with `flapjack stop --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
53
|
+
Then flapjack should stop within 15 seconds
|
54
|
+
|
55
|
+
Scenario: Reloading flapjack configuration
|
56
|
+
When I start flapjack with `flapjack start --config tmp/cucumber_cli/flapjack_cfg.yml`
|
57
|
+
When I run `mv tmp/cucumber_cli/flapjack_cfg.yml tmp/cucumber_cli/flapjack_cfg.yml.bak`
|
58
|
+
Given a file named "flapjack_cfg.yml" with:
|
59
|
+
"""
|
60
|
+
test:
|
61
|
+
redis:
|
62
|
+
db: 14
|
63
|
+
processor:
|
64
|
+
enabled: no
|
65
|
+
"""
|
66
|
+
When I send a SIGHUP to the flapjack process
|
67
|
+
# TODO how to test for config file change?
|
68
|
+
When I send a SIGINT to the flapjack process
|
69
|
+
Then flapjack should stop within 15 seconds
|
data/features/events.feature
CHANGED
@@ -90,6 +90,21 @@ Feature: events
|
|
90
90
|
And a critical event is received
|
91
91
|
Then a notification should not be generated
|
92
92
|
|
93
|
+
@time
|
94
|
+
Scenario: Alert when coming out of scheduled maintenance
|
95
|
+
Given the check is in an ok state
|
96
|
+
And the check is in scheduled maintenance for 3 hours
|
97
|
+
When a critical event is received
|
98
|
+
And 1 minute passes
|
99
|
+
And a critical event is received
|
100
|
+
Then a notification should not be generated
|
101
|
+
And 2 hours passes
|
102
|
+
And a critical event is received
|
103
|
+
Then a notification should not be generated
|
104
|
+
When 1 hours passes
|
105
|
+
And a critical event is received
|
106
|
+
Then a notification should be generated
|
107
|
+
|
93
108
|
@time
|
94
109
|
Scenario: Check ok to critical for 1 minute when in unscheduled maintenance
|
95
110
|
Given the check is in an ok state
|
@@ -3,15 +3,13 @@ Feature: Packagability
|
|
3
3
|
It must be easily packagable
|
4
4
|
|
5
5
|
Scenario: No rubygems references
|
6
|
-
|
7
|
-
|
8
|
-
Then the exit status should be 1
|
6
|
+
When I run `grep require lib/* bin/* -R |grep rubygems`
|
7
|
+
Then the exit value should be 1
|
9
8
|
And I should see 0 lines of output
|
10
9
|
|
11
10
|
Scenario: A shebang that works everywhere
|
12
|
-
|
13
|
-
|
14
|
-
Then the exit status should be 0
|
11
|
+
When I run `find lib/ -type 'f' -name '*.rb'`
|
12
|
+
Then the exit value should be 0
|
15
13
|
And every file in the output should start with "#!/usr/bin/env ruby"
|
16
14
|
|
17
15
|
|
@@ -0,0 +1,198 @@
|
|
1
|
+
@rollup @notification_rules @resque @processor @notifier @events
|
2
|
+
Feature: Rollup on a per contact, per media basis
|
3
|
+
|
4
|
+
Background:
|
5
|
+
Given the following users exist:
|
6
|
+
| id | first_name | last_name | email | sms | timezone |
|
7
|
+
| 1 | Malak | Al-Musawi | malak@example.com | +61400000001 | Asia/Baghdad |
|
8
|
+
|
9
|
+
And the following entities exist:
|
10
|
+
| id | name | contacts |
|
11
|
+
| 1 | foo | 1 |
|
12
|
+
| 2 | baz | 1 |
|
13
|
+
|
14
|
+
And user 1 has the following notification intervals:
|
15
|
+
| email | sms |
|
16
|
+
| 15 | 15 |
|
17
|
+
|
18
|
+
And user 1 has the following notification rollup thresholds:
|
19
|
+
| email | sms |
|
20
|
+
| 1 | 2 |
|
21
|
+
|
22
|
+
And user 1 has the following notification rules:
|
23
|
+
| entities | unknown_media | warning_media | critical_media |
|
24
|
+
| | | email | sms,email |
|
25
|
+
|
26
|
+
@time
|
27
|
+
Scenario: Rollup threshold of 1 means first alert is a rollup
|
28
|
+
Given the check is check 'ping' on entity 'foo'
|
29
|
+
And the check is in an ok state
|
30
|
+
When a critical event is received
|
31
|
+
Then no email alerts should be queued for malak@example.com
|
32
|
+
When 1 minute passes
|
33
|
+
And a critical event is received
|
34
|
+
Then 1 email alert of type problem and rollup problem should be queued for malak@example.com
|
35
|
+
When 1 minute passes
|
36
|
+
And an ok event is received
|
37
|
+
Then 1 email alert of type recovery and rollup recovery should be queued for malak@example.com
|
38
|
+
|
39
|
+
@time
|
40
|
+
Scenario: Acknowledgement ending rollup generates rollup recovery message ignoring interval
|
41
|
+
Given the check is check 'ping' on entity 'foo'
|
42
|
+
And the check is in an ok state
|
43
|
+
When a critical event is received
|
44
|
+
Then no email alerts should be queued for malak@example.com
|
45
|
+
When 1 minute passes
|
46
|
+
And a critical event is received
|
47
|
+
Then 1 email alert of type problem and rollup problem should be queued for malak@example.com
|
48
|
+
When 10 minutes passes
|
49
|
+
And an acknowledgement event is received
|
50
|
+
Then 1 email alert of rollup recovery should be queued for malak@example.com
|
51
|
+
And 2 email alerts should be queued for malak@example.com
|
52
|
+
|
53
|
+
@time
|
54
|
+
Scenario: Transition to rollup when threshold is met
|
55
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
56
|
+
And check 'ping' for entity 'baz' is in an ok state
|
57
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
58
|
+
Then no sms alerts should be queued for +61400000001
|
59
|
+
When 1 minute passes
|
60
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
61
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
62
|
+
When 5 minutes passes
|
63
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
64
|
+
And 1 minute passes
|
65
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
66
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
67
|
+
And 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
68
|
+
When 1 minute passes
|
69
|
+
And an ok event is received for check 'ping' on entity 'foo'
|
70
|
+
Then no sms alerts of type recovery and rollup none should be queued for +61400000001
|
71
|
+
And 1 sms alert of type recovery and rollup recovery should be queued for +61400000001
|
72
|
+
And 3 sms alerts should be queued for +61400000001
|
73
|
+
When 1 minute passes
|
74
|
+
And an ok event is received for check 'ping' on entity 'baz'
|
75
|
+
Then 1 sms alert of type recovery and rollup none should be queued for +61400000001
|
76
|
+
And 1 sms alert of type recovery and rollup recovery should be queued for +61400000001
|
77
|
+
And 4 sms alerts should be queued for +61400000001
|
78
|
+
|
79
|
+
@time
|
80
|
+
Scenario: Acknowledgement delays rollup kick-in
|
81
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
82
|
+
And check 'ping' for entity 'baz' is in an ok state
|
83
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
84
|
+
Then no sms alerts should be queued for +61400000001
|
85
|
+
When 1 minute passes
|
86
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
87
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
88
|
+
When 5 minutes passes
|
89
|
+
And an acknowledgement event is received for check 'ping' on entity 'foo'
|
90
|
+
Then 1 sms alert of type acknowledgement and rollup none should be queued for +61400000001
|
91
|
+
And 2 sms alerts should be queued for +61400000001
|
92
|
+
When a critical event is received for check 'ping' on entity 'baz'
|
93
|
+
And 1 minute passes
|
94
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
95
|
+
Then 2 sms alerts of type problem and rollup none should be queued for +61400000001
|
96
|
+
And 3 sms alerts should be queued for +61400000001
|
97
|
+
|
98
|
+
@time
|
99
|
+
Scenario: Acknowledgement hastens rollup recovery
|
100
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
101
|
+
And check 'ping' for entity 'baz' is in an ok state
|
102
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
103
|
+
And 1 minute passes
|
104
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
105
|
+
Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
106
|
+
When 5 minutes passes
|
107
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
108
|
+
And 1 minute passes
|
109
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
110
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
111
|
+
And 2 sms alerts should be queued for +61400000001
|
112
|
+
When an acknowledgement event is received for check 'ping' on entity 'foo'
|
113
|
+
Then 1 sms alert of type acknowledgement and rollup recovery should be queued for +61400000001
|
114
|
+
And 3 sms alerts should be queued for +61400000001
|
115
|
+
When 30 minutes passes
|
116
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
117
|
+
Then 2 sms alerts of type problem and rollup none should be queued for +61400000001
|
118
|
+
And 4 sms alerts should be queued for +61400000001
|
119
|
+
|
120
|
+
@time
|
121
|
+
Scenario: Scheduled maintenance hastens rollup recovery
|
122
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
123
|
+
And check 'ping' for entity 'baz' is in an ok state
|
124
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
125
|
+
And 1 minute passes
|
126
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
127
|
+
Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
128
|
+
When 5 minutes passes
|
129
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
130
|
+
And 1 minute passes
|
131
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
132
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
133
|
+
And 2 sms alerts should be queued for +61400000001
|
134
|
+
When check 'ping' for entity 'foo' is in scheduled maintenance for 1 day
|
135
|
+
And 30 minutes passes
|
136
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
137
|
+
Then 1 sms alert of rollup recovery should be queued for +61400000001
|
138
|
+
|
139
|
+
@time
|
140
|
+
Scenario: Unscheduled maintenance ending promotes rollup
|
141
|
+
Given check 'ping' for entity 'foo' is in unscheduled maintenance
|
142
|
+
And check 'ping' for entity 'baz' is in an ok state
|
143
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
144
|
+
And 1 minute passes
|
145
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
146
|
+
Then 0 sms alerts should be queued for +61400000001
|
147
|
+
When 5 minutes passes
|
148
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
149
|
+
And 1 minute passes
|
150
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
151
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
152
|
+
And 1 sms alerts should be queued for +61400000001
|
153
|
+
When 4 hours passes
|
154
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
155
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
156
|
+
And 2 sms alerts should be queued for +61400000001
|
157
|
+
|
158
|
+
@time
|
159
|
+
Scenario: Scheduled maintenance ending promotes rollup
|
160
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
161
|
+
Given check 'ping' for entity 'foo' is in scheduled maintenance for 4 hours
|
162
|
+
And check 'ping' for entity 'baz' is in an ok state
|
163
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
164
|
+
And 1 minute passes
|
165
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
166
|
+
Then 0 sms alerts should be queued for +61400000001
|
167
|
+
When 5 minutes passes
|
168
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
169
|
+
And 1 minute passes
|
170
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
171
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
172
|
+
And 1 sms alerts should be queued for +61400000001
|
173
|
+
When 4 hours passes
|
174
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
175
|
+
And 1 minute passes
|
176
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
177
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
178
|
+
And 2 sms alerts should be queued for +61400000001
|
179
|
+
|
180
|
+
# @time
|
181
|
+
# Scenario: Contact ceases to be a contact on an entity that they were being alerted for
|
182
|
+
# Given check 'ping' for entity 'foo' is in an ok state
|
183
|
+
# And check 'ping' for entity 'baz' is in an ok state
|
184
|
+
# When a critical event is received for check 'ping' on entity 'foo'
|
185
|
+
# And 1 minute passes
|
186
|
+
# And a critical event is received for check 'ping' on entity 'foo'
|
187
|
+
# Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
188
|
+
# When 5 minutes passes
|
189
|
+
# And a critical event is received for check 'ping' on entity 'baz'
|
190
|
+
# And 1 minute passes
|
191
|
+
# And a critical event is received for check 'ping' on entity 'baz'
|
192
|
+
# Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
193
|
+
# And 2 sms alerts should be queued for +61400000001
|
194
|
+
# When 1 minute passes
|
195
|
+
# And user 1 ceases to be a contact of entity 'foo'
|
196
|
+
# And a critical event is received for check 'ping' on entity 'baz'
|
197
|
+
# Then 1 sms alert of rollup recovery should be queued for +61400000001
|
198
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
Given /^PENDING/ do
|
3
|
+
pending
|
4
|
+
end
|
5
|
+
|
6
|
+
Given /^a file named "([^"]*)" with:$/ do |file_name, file_content|
|
7
|
+
write_file(file_name, file_content)
|
8
|
+
end
|
9
|
+
|
10
|
+
When /^I ((?:re)?start|stop) flapjack( \(daemonised\))? with `(.+)`$/ do |start_stop_restart, daemonise, cmd|
|
11
|
+
@root = Pathname.new(File.dirname(__FILE__)).parent.parent.expand_path
|
12
|
+
command = "#{@root.join('bin')}/#{cmd}"
|
13
|
+
|
14
|
+
case start_stop_restart
|
15
|
+
when 'start'
|
16
|
+
@process_h = spawn_process(command,
|
17
|
+
:daemon_pidfile => (daemonise.nil? || daemonise.empty?) ? nil : 'tmp/cucumber_cli/flapjack_d.pid')
|
18
|
+
when 'stop', 'restart'
|
19
|
+
`#{command}`
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
When /^I send a SIG(\w+) to the flapjack process$/ do |signal|
|
24
|
+
process = @process_h[:process]
|
25
|
+
pid = process ? process.pid : @process_h[:pid]
|
26
|
+
Process.kill(signal, pid)
|
27
|
+
end
|
28
|
+
|
29
|
+
Then /^flapjack should ((?:re)?start|stop) within (\d+) seconds$/ do |start_stop_restart, seconds|
|
30
|
+
process = @process_h[:process]
|
31
|
+
pid = process ? process.pid : @process_h[:pid]
|
32
|
+
running = nil
|
33
|
+
attempts = 0
|
34
|
+
max_attempts = seconds.to_i * 200
|
35
|
+
|
36
|
+
case start_stop_restart
|
37
|
+
when 'start'
|
38
|
+
begin
|
39
|
+
Process.kill(0, pid)
|
40
|
+
running = true
|
41
|
+
rescue Errno::EINVAL, Errno::ESRCH, RangeError, Errno::EPERM => e
|
42
|
+
attempts += 1; sleep 0.1; retry if attempts < max_attempts
|
43
|
+
running = false
|
44
|
+
end
|
45
|
+
running.should be_true
|
46
|
+
when 'stop'
|
47
|
+
if process
|
48
|
+
# it's a child process, so we can use waitpid
|
49
|
+
begin
|
50
|
+
Timeout::timeout(seconds.to_i) do
|
51
|
+
Process.waitpid(pid)
|
52
|
+
running = false
|
53
|
+
end
|
54
|
+
rescue Timeout::Error
|
55
|
+
running = true
|
56
|
+
end
|
57
|
+
else
|
58
|
+
# started via dante, so we'll need to monitor externally
|
59
|
+
while (running != false) && (attempts < max_attempts)
|
60
|
+
begin
|
61
|
+
Process.kill(0, pid)
|
62
|
+
attempts += 1; sleep 0.1
|
63
|
+
running = true
|
64
|
+
rescue Errno::EINVAL, Errno::ESRCH, RangeError, Errno::EPERM => e
|
65
|
+
running = false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
running.should be_false
|
70
|
+
when 'restart'
|
71
|
+
read_pid = nil
|
72
|
+
while attempts < max_attempts
|
73
|
+
time_and_pid = time_and_pid_from_file('tmp/cucumber_cli/flapjack_d.pid')
|
74
|
+
read_pid = time_and_pid.last
|
75
|
+
break if read_pid != pid
|
76
|
+
attempts += 1; sleep 0.1
|
77
|
+
end
|
78
|
+
read_pid.should_not == pid
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
@@ -22,7 +22,7 @@ def submit_event(event)
|
|
22
22
|
@redis.rpush 'events', event.to_json
|
23
23
|
end
|
24
24
|
|
25
|
-
def set_scheduled_maintenance(entity, check, duration
|
25
|
+
def set_scheduled_maintenance(entity, check, duration)
|
26
26
|
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
27
27
|
t = Time.now.to_i
|
28
28
|
entity_check.create_scheduled_maintenance(t, duration, :summary => "upgrading everything")
|
@@ -200,11 +200,12 @@ Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in a criti
|
|
200
200
|
set_critical_state(entity, check)
|
201
201
|
end
|
202
202
|
|
203
|
-
Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in scheduled maintenance
|
203
|
+
Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in scheduled maintenance(?: for (.+))?$/ do |check, entity, duration|
|
204
204
|
check ||= @check
|
205
205
|
entity ||= @entity
|
206
|
+
durn = duration ? ChronicDuration.parse(duration) : 60*60*2
|
206
207
|
remove_unscheduled_maintenance(entity, check)
|
207
|
-
set_scheduled_maintenance(entity, check)
|
208
|
+
set_scheduled_maintenance(entity, check, durn)
|
208
209
|
end
|
209
210
|
|
210
211
|
# TODO set the state directly rather than submit & drain
|
@@ -345,6 +346,14 @@ Given /^user (\d+) has the following notification intervals:$/ do |contact_id, i
|
|
345
346
|
end
|
346
347
|
end
|
347
348
|
|
349
|
+
Given /^user (\d+) has the following notification rollup thresholds:$/ do |contact_id, rollup_thresholds|
|
350
|
+
contact = Flapjack::Data::Contact.find_by_id(contact_id, :redis => @redis)
|
351
|
+
rollup_thresholds.hashes.each do |rollup_threshold|
|
352
|
+
contact.set_rollup_threshold_for_media('email', rollup_threshold['email'].to_i)
|
353
|
+
contact.set_rollup_threshold_for_media('sms', rollup_threshold['sms'].to_i)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
348
357
|
Given /^user (\d+) has the following notification rules:$/ do |contact_id, rules|
|
349
358
|
contact = Flapjack::Data::Contact.find_by_id(contact_id, :redis => @redis)
|
350
359
|
timezone = contact.timezone
|
@@ -395,24 +404,25 @@ Then /^all alert dropping keys for user (\d+) should have expired$/ do |contact_
|
|
395
404
|
@redis.keys("drop_alerts_for_contact:#{contact_id}*").should be_empty
|
396
405
|
end
|
397
406
|
|
398
|
-
Then /^(
|
407
|
+
Then /^(\w+) (\w+) alert(?:s)?(?: of)?(?: type (\w+))?(?: and)?(?: rollup (\w+))? should be queued for (.*)$/ do |num_queued, media, notification_type, rollup, address|
|
399
408
|
check = check ? check : @check
|
400
409
|
entity = entity ? entity : @entity
|
401
410
|
case num_queued
|
402
411
|
when 'no'
|
403
412
|
num_queued = 0
|
404
413
|
end
|
405
|
-
queue
|
406
|
-
queue.find_all {|n|
|
414
|
+
queue = Resque.peek("#{media}_notifications", 0, 30)
|
415
|
+
queue.find_all {|n|
|
416
|
+
type_ok = notification_type ? ( n['args'].first['notification_type'] == notification_type ) : true
|
417
|
+
rollup_ok = true
|
418
|
+
if rollup
|
419
|
+
if rollup == 'none'
|
420
|
+
rollup_ok = n['args'].first['rollup'].nil?
|
421
|
+
else
|
422
|
+
rollup_ok = n['args'].first['rollup'] == rollup
|
423
|
+
end
|
424
|
+
end
|
425
|
+
type_ok && rollup_ok && ( n['args'].first['address'] == address )
|
426
|
+
}.length.should == num_queued.to_i
|
407
427
|
end
|
408
428
|
|
409
|
-
Then /^(.*) sms alert(?:s)? should be queued for (.*)$/ do |num_queued, address|
|
410
|
-
check = check ? check : @check
|
411
|
-
entity = entity ? entity : @entity
|
412
|
-
case num_queued
|
413
|
-
when 'no'
|
414
|
-
num_queued = 0
|
415
|
-
end
|
416
|
-
queue = Resque.peek('sms_notifications', 0, 30)
|
417
|
-
queue.find_all {|n| n['args'].first['address'] == address }.length.should == num_queued.to_i
|
418
|
-
end
|