flapjack 0.7.27 → 0.7.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/CHANGELOG.md +9 -0
- data/bin/flapjack +22 -28
- data/bin/flapjack-nagios-receiver +5 -27
- data/bin/flapjack-populator +2 -2
- data/bin/flapper +13 -14
- data/bin/receive-events +3 -20
- data/bin/simulate-failed-check +3 -20
- data/etc/flapjack_config.yaml.example +119 -86
- data/features/cli.feature +69 -0
- data/features/events.feature +15 -0
- data/features/packaging-lintian.feature +4 -6
- data/features/rollup.feature +198 -0
- data/features/steps/cli_steps.rb +81 -0
- data/features/steps/events_steps.rb +26 -16
- data/features/steps/notifications_steps.rb +2 -2
- data/features/steps/packaging-lintian_steps.rb +2 -2
- data/features/support/daemons.rb +113 -0
- data/features/support/env.rb +26 -4
- data/lib/flapjack/configuration.rb +2 -0
- data/lib/flapjack/data/contact.rb +76 -5
- data/lib/flapjack/data/entity_check.rb +16 -0
- data/lib/flapjack/data/message.rb +11 -8
- data/lib/flapjack/data/notification.rb +31 -3
- data/lib/flapjack/data/notification_rule.rb +1 -1
- data/lib/flapjack/filters/delays.rb +1 -5
- data/lib/flapjack/gateways/api/contact_methods.rb +12 -6
- data/lib/flapjack/gateways/email.rb +35 -26
- data/lib/flapjack/gateways/email/alert.html.erb +4 -4
- data/lib/flapjack/gateways/email/alert.text.erb +2 -2
- data/lib/flapjack/gateways/email/alert_subject.text.erb +14 -0
- data/lib/flapjack/gateways/email/rollup.html.erb +48 -0
- data/lib/flapjack/gateways/email/rollup.text.erb +20 -0
- data/lib/flapjack/gateways/email/rollup_subject.text.erb +19 -0
- data/lib/flapjack/gateways/jabber.rb +97 -47
- data/lib/flapjack/gateways/sms_messagenet.rb +26 -24
- data/lib/flapjack/gateways/sms_messagenet/alert.text.erb +15 -0
- data/lib/flapjack/gateways/sms_messagenet/rollup.text.erb +34 -0
- data/lib/flapjack/gateways/web/views/contact.html.erb +16 -8
- data/lib/flapjack/notifier.rb +17 -4
- data/lib/flapjack/processor.rb +1 -1
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/coordinator_spec.rb +19 -19
- data/spec/lib/flapjack/data/contact_spec.rb +100 -25
- data/spec/lib/flapjack/data/event_spec.rb +1 -1
- data/spec/lib/flapjack/data/message_spec.rb +1 -1
- data/spec/lib/flapjack/data/notification_spec.rb +11 -3
- data/spec/lib/flapjack/gateways/api/contact_methods_spec.rb +36 -17
- data/spec/lib/flapjack/gateways/api/entity_check_presenter_spec.rb +1 -1
- data/spec/lib/flapjack/gateways/api/entity_methods_spec.rb +38 -38
- data/spec/lib/flapjack/gateways/api/entity_presenter_spec.rb +15 -15
- data/spec/lib/flapjack/gateways/email_spec.rb +4 -4
- data/spec/lib/flapjack/gateways/jabber_spec.rb +13 -14
- data/spec/lib/flapjack/gateways/oobetet_spec.rb +2 -2
- data/spec/lib/flapjack/gateways/pagerduty_spec.rb +5 -5
- data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +1 -1
- data/spec/lib/flapjack/gateways/web/views/contact.html.erb_spec.rb +2 -2
- data/spec/lib/flapjack/gateways/web_spec.rb +4 -4
- data/spec/lib/flapjack/logger_spec.rb +3 -3
- data/spec/lib/flapjack/pikelet_spec.rb +10 -10
- data/spec/lib/flapjack/processor_spec.rb +4 -4
- data/spec/lib/flapjack/redis_pool_spec.rb +1 -1
- metadata +70 -5
- checksums.yaml +0 -15
@@ -0,0 +1,69 @@
|
|
1
|
+
@process
|
2
|
+
Feature: command line utility
|
3
|
+
As a systems administrator
|
4
|
+
I should be able to manage Flapjack
|
5
|
+
From the command line
|
6
|
+
|
7
|
+
Background:
|
8
|
+
Given a file named "flapjack_cfg.yml" with:
|
9
|
+
"""
|
10
|
+
test:
|
11
|
+
redis:
|
12
|
+
db: 14
|
13
|
+
processor:
|
14
|
+
enabled: yes
|
15
|
+
logger:
|
16
|
+
level: warn
|
17
|
+
"""
|
18
|
+
And a file named "flapjack_cfg_d.yml" with:
|
19
|
+
"""
|
20
|
+
test:
|
21
|
+
pid_file: tmp/cucumber_cli/flapjack_d.pid
|
22
|
+
log_file: tmp/cucumber_cli/flapjack_d.log
|
23
|
+
redis:
|
24
|
+
db: 14
|
25
|
+
processor:
|
26
|
+
enabled: yes
|
27
|
+
logger:
|
28
|
+
level: warn
|
29
|
+
"""
|
30
|
+
|
31
|
+
Scenario: Starting flapjack
|
32
|
+
When I start flapjack with `flapjack start --config tmp/cucumber_cli/flapjack_cfg.yml`
|
33
|
+
Then flapjack should start within 15 seconds
|
34
|
+
|
35
|
+
Scenario: Stopping flapjack via SIGINT
|
36
|
+
When I start flapjack with `flapjack start --config tmp/cucumber_cli/flapjack_cfg.yml`
|
37
|
+
Then flapjack should start within 15 seconds
|
38
|
+
When I send a SIGINT to the flapjack process
|
39
|
+
Then flapjack should stop within 15 seconds
|
40
|
+
|
41
|
+
Scenario: Starting and stopping flapjack, daemonized
|
42
|
+
When I start flapjack (daemonised) with `flapjack start -d --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
43
|
+
Then flapjack should start within 15 seconds
|
44
|
+
When I stop flapjack with `flapjack stop --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
45
|
+
Then flapjack should stop within 15 seconds
|
46
|
+
|
47
|
+
Scenario: Starting, restarting and stopping flapjack, daemonized
|
48
|
+
When I start flapjack (daemonised) with `flapjack start -d --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
49
|
+
Then flapjack should start within 15 seconds
|
50
|
+
When I restart flapjack with `flapjack restart -d --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
51
|
+
Then flapjack should restart within 15 seconds
|
52
|
+
When I stop flapjack with `flapjack stop --config tmp/cucumber_cli/flapjack_cfg_d.yml`
|
53
|
+
Then flapjack should stop within 15 seconds
|
54
|
+
|
55
|
+
Scenario: Reloading flapjack configuration
|
56
|
+
When I start flapjack with `flapjack start --config tmp/cucumber_cli/flapjack_cfg.yml`
|
57
|
+
When I run `mv tmp/cucumber_cli/flapjack_cfg.yml tmp/cucumber_cli/flapjack_cfg.yml.bak`
|
58
|
+
Given a file named "flapjack_cfg.yml" with:
|
59
|
+
"""
|
60
|
+
test:
|
61
|
+
redis:
|
62
|
+
db: 14
|
63
|
+
processor:
|
64
|
+
enabled: no
|
65
|
+
"""
|
66
|
+
When I send a SIGHUP to the flapjack process
|
67
|
+
# TODO how to test for config file change?
|
68
|
+
When I send a SIGINT to the flapjack process
|
69
|
+
Then flapjack should stop within 15 seconds
|
data/features/events.feature
CHANGED
@@ -90,6 +90,21 @@ Feature: events
|
|
90
90
|
And a critical event is received
|
91
91
|
Then a notification should not be generated
|
92
92
|
|
93
|
+
@time
|
94
|
+
Scenario: Alert when coming out of scheduled maintenance
|
95
|
+
Given the check is in an ok state
|
96
|
+
And the check is in scheduled maintenance for 3 hours
|
97
|
+
When a critical event is received
|
98
|
+
And 1 minute passes
|
99
|
+
And a critical event is received
|
100
|
+
Then a notification should not be generated
|
101
|
+
And 2 hours passes
|
102
|
+
And a critical event is received
|
103
|
+
Then a notification should not be generated
|
104
|
+
When 1 hours passes
|
105
|
+
And a critical event is received
|
106
|
+
Then a notification should be generated
|
107
|
+
|
93
108
|
@time
|
94
109
|
Scenario: Check ok to critical for 1 minute when in unscheduled maintenance
|
95
110
|
Given the check is in an ok state
|
@@ -3,15 +3,13 @@ Feature: Packagability
|
|
3
3
|
It must be easily packagable
|
4
4
|
|
5
5
|
Scenario: No rubygems references
|
6
|
-
|
7
|
-
|
8
|
-
Then the exit status should be 1
|
6
|
+
When I run `grep require lib/* bin/* -R |grep rubygems`
|
7
|
+
Then the exit value should be 1
|
9
8
|
And I should see 0 lines of output
|
10
9
|
|
11
10
|
Scenario: A shebang that works everywhere
|
12
|
-
|
13
|
-
|
14
|
-
Then the exit status should be 0
|
11
|
+
When I run `find lib/ -type 'f' -name '*.rb'`
|
12
|
+
Then the exit value should be 0
|
15
13
|
And every file in the output should start with "#!/usr/bin/env ruby"
|
16
14
|
|
17
15
|
|
@@ -0,0 +1,198 @@
|
|
1
|
+
@rollup @notification_rules @resque @processor @notifier @events
|
2
|
+
Feature: Rollup on a per contact, per media basis
|
3
|
+
|
4
|
+
Background:
|
5
|
+
Given the following users exist:
|
6
|
+
| id | first_name | last_name | email | sms | timezone |
|
7
|
+
| 1 | Malak | Al-Musawi | malak@example.com | +61400000001 | Asia/Baghdad |
|
8
|
+
|
9
|
+
And the following entities exist:
|
10
|
+
| id | name | contacts |
|
11
|
+
| 1 | foo | 1 |
|
12
|
+
| 2 | baz | 1 |
|
13
|
+
|
14
|
+
And user 1 has the following notification intervals:
|
15
|
+
| email | sms |
|
16
|
+
| 15 | 15 |
|
17
|
+
|
18
|
+
And user 1 has the following notification rollup thresholds:
|
19
|
+
| email | sms |
|
20
|
+
| 1 | 2 |
|
21
|
+
|
22
|
+
And user 1 has the following notification rules:
|
23
|
+
| entities | unknown_media | warning_media | critical_media |
|
24
|
+
| | | email | sms,email |
|
25
|
+
|
26
|
+
@time
|
27
|
+
Scenario: Rollup threshold of 1 means first alert is a rollup
|
28
|
+
Given the check is check 'ping' on entity 'foo'
|
29
|
+
And the check is in an ok state
|
30
|
+
When a critical event is received
|
31
|
+
Then no email alerts should be queued for malak@example.com
|
32
|
+
When 1 minute passes
|
33
|
+
And a critical event is received
|
34
|
+
Then 1 email alert of type problem and rollup problem should be queued for malak@example.com
|
35
|
+
When 1 minute passes
|
36
|
+
And an ok event is received
|
37
|
+
Then 1 email alert of type recovery and rollup recovery should be queued for malak@example.com
|
38
|
+
|
39
|
+
@time
|
40
|
+
Scenario: Acknowledgement ending rollup generates rollup recovery message ignoring interval
|
41
|
+
Given the check is check 'ping' on entity 'foo'
|
42
|
+
And the check is in an ok state
|
43
|
+
When a critical event is received
|
44
|
+
Then no email alerts should be queued for malak@example.com
|
45
|
+
When 1 minute passes
|
46
|
+
And a critical event is received
|
47
|
+
Then 1 email alert of type problem and rollup problem should be queued for malak@example.com
|
48
|
+
When 10 minutes passes
|
49
|
+
And an acknowledgement event is received
|
50
|
+
Then 1 email alert of rollup recovery should be queued for malak@example.com
|
51
|
+
And 2 email alerts should be queued for malak@example.com
|
52
|
+
|
53
|
+
@time
|
54
|
+
Scenario: Transition to rollup when threshold is met
|
55
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
56
|
+
And check 'ping' for entity 'baz' is in an ok state
|
57
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
58
|
+
Then no sms alerts should be queued for +61400000001
|
59
|
+
When 1 minute passes
|
60
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
61
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
62
|
+
When 5 minutes passes
|
63
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
64
|
+
And 1 minute passes
|
65
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
66
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
67
|
+
And 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
68
|
+
When 1 minute passes
|
69
|
+
And an ok event is received for check 'ping' on entity 'foo'
|
70
|
+
Then no sms alerts of type recovery and rollup none should be queued for +61400000001
|
71
|
+
And 1 sms alert of type recovery and rollup recovery should be queued for +61400000001
|
72
|
+
And 3 sms alerts should be queued for +61400000001
|
73
|
+
When 1 minute passes
|
74
|
+
And an ok event is received for check 'ping' on entity 'baz'
|
75
|
+
Then 1 sms alert of type recovery and rollup none should be queued for +61400000001
|
76
|
+
And 1 sms alert of type recovery and rollup recovery should be queued for +61400000001
|
77
|
+
And 4 sms alerts should be queued for +61400000001
|
78
|
+
|
79
|
+
@time
|
80
|
+
Scenario: Acknowledgement delays rollup kick-in
|
81
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
82
|
+
And check 'ping' for entity 'baz' is in an ok state
|
83
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
84
|
+
Then no sms alerts should be queued for +61400000001
|
85
|
+
When 1 minute passes
|
86
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
87
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
88
|
+
When 5 minutes passes
|
89
|
+
And an acknowledgement event is received for check 'ping' on entity 'foo'
|
90
|
+
Then 1 sms alert of type acknowledgement and rollup none should be queued for +61400000001
|
91
|
+
And 2 sms alerts should be queued for +61400000001
|
92
|
+
When a critical event is received for check 'ping' on entity 'baz'
|
93
|
+
And 1 minute passes
|
94
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
95
|
+
Then 2 sms alerts of type problem and rollup none should be queued for +61400000001
|
96
|
+
And 3 sms alerts should be queued for +61400000001
|
97
|
+
|
98
|
+
@time
|
99
|
+
Scenario: Acknowledgement hastens rollup recovery
|
100
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
101
|
+
And check 'ping' for entity 'baz' is in an ok state
|
102
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
103
|
+
And 1 minute passes
|
104
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
105
|
+
Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
106
|
+
When 5 minutes passes
|
107
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
108
|
+
And 1 minute passes
|
109
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
110
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
111
|
+
And 2 sms alerts should be queued for +61400000001
|
112
|
+
When an acknowledgement event is received for check 'ping' on entity 'foo'
|
113
|
+
Then 1 sms alert of type acknowledgement and rollup recovery should be queued for +61400000001
|
114
|
+
And 3 sms alerts should be queued for +61400000001
|
115
|
+
When 30 minutes passes
|
116
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
117
|
+
Then 2 sms alerts of type problem and rollup none should be queued for +61400000001
|
118
|
+
And 4 sms alerts should be queued for +61400000001
|
119
|
+
|
120
|
+
@time
|
121
|
+
Scenario: Scheduled maintenance hastens rollup recovery
|
122
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
123
|
+
And check 'ping' for entity 'baz' is in an ok state
|
124
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
125
|
+
And 1 minute passes
|
126
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
127
|
+
Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
128
|
+
When 5 minutes passes
|
129
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
130
|
+
And 1 minute passes
|
131
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
132
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
133
|
+
And 2 sms alerts should be queued for +61400000001
|
134
|
+
When check 'ping' for entity 'foo' is in scheduled maintenance for 1 day
|
135
|
+
And 30 minutes passes
|
136
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
137
|
+
Then 1 sms alert of rollup recovery should be queued for +61400000001
|
138
|
+
|
139
|
+
@time
|
140
|
+
Scenario: Unscheduled maintenance ending promotes rollup
|
141
|
+
Given check 'ping' for entity 'foo' is in unscheduled maintenance
|
142
|
+
And check 'ping' for entity 'baz' is in an ok state
|
143
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
144
|
+
And 1 minute passes
|
145
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
146
|
+
Then 0 sms alerts should be queued for +61400000001
|
147
|
+
When 5 minutes passes
|
148
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
149
|
+
And 1 minute passes
|
150
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
151
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
152
|
+
And 1 sms alerts should be queued for +61400000001
|
153
|
+
When 4 hours passes
|
154
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
155
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
156
|
+
And 2 sms alerts should be queued for +61400000001
|
157
|
+
|
158
|
+
@time
|
159
|
+
Scenario: Scheduled maintenance ending promotes rollup
|
160
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
161
|
+
Given check 'ping' for entity 'foo' is in scheduled maintenance for 4 hours
|
162
|
+
And check 'ping' for entity 'baz' is in an ok state
|
163
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
164
|
+
And 1 minute passes
|
165
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
166
|
+
Then 0 sms alerts should be queued for +61400000001
|
167
|
+
When 5 minutes passes
|
168
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
169
|
+
And 1 minute passes
|
170
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
171
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
172
|
+
And 1 sms alerts should be queued for +61400000001
|
173
|
+
When 4 hours passes
|
174
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
175
|
+
And 1 minute passes
|
176
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
177
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
178
|
+
And 2 sms alerts should be queued for +61400000001
|
179
|
+
|
180
|
+
# @time
|
181
|
+
# Scenario: Contact ceases to be a contact on an entity that they were being alerted for
|
182
|
+
# Given check 'ping' for entity 'foo' is in an ok state
|
183
|
+
# And check 'ping' for entity 'baz' is in an ok state
|
184
|
+
# When a critical event is received for check 'ping' on entity 'foo'
|
185
|
+
# And 1 minute passes
|
186
|
+
# And a critical event is received for check 'ping' on entity 'foo'
|
187
|
+
# Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
188
|
+
# When 5 minutes passes
|
189
|
+
# And a critical event is received for check 'ping' on entity 'baz'
|
190
|
+
# And 1 minute passes
|
191
|
+
# And a critical event is received for check 'ping' on entity 'baz'
|
192
|
+
# Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
193
|
+
# And 2 sms alerts should be queued for +61400000001
|
194
|
+
# When 1 minute passes
|
195
|
+
# And user 1 ceases to be a contact of entity 'foo'
|
196
|
+
# And a critical event is received for check 'ping' on entity 'baz'
|
197
|
+
# Then 1 sms alert of rollup recovery should be queued for +61400000001
|
198
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
Given /^PENDING/ do
|
3
|
+
pending
|
4
|
+
end
|
5
|
+
|
6
|
+
Given /^a file named "([^"]*)" with:$/ do |file_name, file_content|
|
7
|
+
write_file(file_name, file_content)
|
8
|
+
end
|
9
|
+
|
10
|
+
When /^I ((?:re)?start|stop) flapjack( \(daemonised\))? with `(.+)`$/ do |start_stop_restart, daemonise, cmd|
|
11
|
+
@root = Pathname.new(File.dirname(__FILE__)).parent.parent.expand_path
|
12
|
+
command = "#{@root.join('bin')}/#{cmd}"
|
13
|
+
|
14
|
+
case start_stop_restart
|
15
|
+
when 'start'
|
16
|
+
@process_h = spawn_process(command,
|
17
|
+
:daemon_pidfile => (daemonise.nil? || daemonise.empty?) ? nil : 'tmp/cucumber_cli/flapjack_d.pid')
|
18
|
+
when 'stop', 'restart'
|
19
|
+
`#{command}`
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
When /^I send a SIG(\w+) to the flapjack process$/ do |signal|
|
24
|
+
process = @process_h[:process]
|
25
|
+
pid = process ? process.pid : @process_h[:pid]
|
26
|
+
Process.kill(signal, pid)
|
27
|
+
end
|
28
|
+
|
29
|
+
Then /^flapjack should ((?:re)?start|stop) within (\d+) seconds$/ do |start_stop_restart, seconds|
|
30
|
+
process = @process_h[:process]
|
31
|
+
pid = process ? process.pid : @process_h[:pid]
|
32
|
+
running = nil
|
33
|
+
attempts = 0
|
34
|
+
max_attempts = seconds.to_i * 200
|
35
|
+
|
36
|
+
case start_stop_restart
|
37
|
+
when 'start'
|
38
|
+
begin
|
39
|
+
Process.kill(0, pid)
|
40
|
+
running = true
|
41
|
+
rescue Errno::EINVAL, Errno::ESRCH, RangeError, Errno::EPERM => e
|
42
|
+
attempts += 1; sleep 0.1; retry if attempts < max_attempts
|
43
|
+
running = false
|
44
|
+
end
|
45
|
+
running.should be_true
|
46
|
+
when 'stop'
|
47
|
+
if process
|
48
|
+
# it's a child process, so we can use waitpid
|
49
|
+
begin
|
50
|
+
Timeout::timeout(seconds.to_i) do
|
51
|
+
Process.waitpid(pid)
|
52
|
+
running = false
|
53
|
+
end
|
54
|
+
rescue Timeout::Error
|
55
|
+
running = true
|
56
|
+
end
|
57
|
+
else
|
58
|
+
# started via dante, so we'll need to monitor externally
|
59
|
+
while (running != false) && (attempts < max_attempts)
|
60
|
+
begin
|
61
|
+
Process.kill(0, pid)
|
62
|
+
attempts += 1; sleep 0.1
|
63
|
+
running = true
|
64
|
+
rescue Errno::EINVAL, Errno::ESRCH, RangeError, Errno::EPERM => e
|
65
|
+
running = false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
running.should be_false
|
70
|
+
when 'restart'
|
71
|
+
read_pid = nil
|
72
|
+
while attempts < max_attempts
|
73
|
+
time_and_pid = time_and_pid_from_file('tmp/cucumber_cli/flapjack_d.pid')
|
74
|
+
read_pid = time_and_pid.last
|
75
|
+
break if read_pid != pid
|
76
|
+
attempts += 1; sleep 0.1
|
77
|
+
end
|
78
|
+
read_pid.should_not == pid
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
@@ -22,7 +22,7 @@ def submit_event(event)
|
|
22
22
|
@redis.rpush 'events', event.to_json
|
23
23
|
end
|
24
24
|
|
25
|
-
def set_scheduled_maintenance(entity, check, duration
|
25
|
+
def set_scheduled_maintenance(entity, check, duration)
|
26
26
|
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
27
27
|
t = Time.now.to_i
|
28
28
|
entity_check.create_scheduled_maintenance(t, duration, :summary => "upgrading everything")
|
@@ -200,11 +200,12 @@ Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in a criti
|
|
200
200
|
set_critical_state(entity, check)
|
201
201
|
end
|
202
202
|
|
203
|
-
Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in scheduled maintenance
|
203
|
+
Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in scheduled maintenance(?: for (.+))?$/ do |check, entity, duration|
|
204
204
|
check ||= @check
|
205
205
|
entity ||= @entity
|
206
|
+
durn = duration ? ChronicDuration.parse(duration) : 60*60*2
|
206
207
|
remove_unscheduled_maintenance(entity, check)
|
207
|
-
set_scheduled_maintenance(entity, check)
|
208
|
+
set_scheduled_maintenance(entity, check, durn)
|
208
209
|
end
|
209
210
|
|
210
211
|
# TODO set the state directly rather than submit & drain
|
@@ -345,6 +346,14 @@ Given /^user (\d+) has the following notification intervals:$/ do |contact_id, i
|
|
345
346
|
end
|
346
347
|
end
|
347
348
|
|
349
|
+
Given /^user (\d+) has the following notification rollup thresholds:$/ do |contact_id, rollup_thresholds|
|
350
|
+
contact = Flapjack::Data::Contact.find_by_id(contact_id, :redis => @redis)
|
351
|
+
rollup_thresholds.hashes.each do |rollup_threshold|
|
352
|
+
contact.set_rollup_threshold_for_media('email', rollup_threshold['email'].to_i)
|
353
|
+
contact.set_rollup_threshold_for_media('sms', rollup_threshold['sms'].to_i)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
348
357
|
Given /^user (\d+) has the following notification rules:$/ do |contact_id, rules|
|
349
358
|
contact = Flapjack::Data::Contact.find_by_id(contact_id, :redis => @redis)
|
350
359
|
timezone = contact.timezone
|
@@ -395,24 +404,25 @@ Then /^all alert dropping keys for user (\d+) should have expired$/ do |contact_
|
|
395
404
|
@redis.keys("drop_alerts_for_contact:#{contact_id}*").should be_empty
|
396
405
|
end
|
397
406
|
|
398
|
-
Then /^(
|
407
|
+
Then /^(\w+) (\w+) alert(?:s)?(?: of)?(?: type (\w+))?(?: and)?(?: rollup (\w+))? should be queued for (.*)$/ do |num_queued, media, notification_type, rollup, address|
|
399
408
|
check = check ? check : @check
|
400
409
|
entity = entity ? entity : @entity
|
401
410
|
case num_queued
|
402
411
|
when 'no'
|
403
412
|
num_queued = 0
|
404
413
|
end
|
405
|
-
queue
|
406
|
-
queue.find_all {|n|
|
414
|
+
queue = Resque.peek("#{media}_notifications", 0, 30)
|
415
|
+
queue.find_all {|n|
|
416
|
+
type_ok = notification_type ? ( n['args'].first['notification_type'] == notification_type ) : true
|
417
|
+
rollup_ok = true
|
418
|
+
if rollup
|
419
|
+
if rollup == 'none'
|
420
|
+
rollup_ok = n['args'].first['rollup'].nil?
|
421
|
+
else
|
422
|
+
rollup_ok = n['args'].first['rollup'] == rollup
|
423
|
+
end
|
424
|
+
end
|
425
|
+
type_ok && rollup_ok && ( n['args'].first['address'] == address )
|
426
|
+
}.length.should == num_queued.to_i
|
407
427
|
end
|
408
428
|
|
409
|
-
Then /^(.*) sms alert(?:s)? should be queued for (.*)$/ do |num_queued, address|
|
410
|
-
check = check ? check : @check
|
411
|
-
entity = entity ? entity : @entity
|
412
|
-
case num_queued
|
413
|
-
when 'no'
|
414
|
-
num_queued = 0
|
415
|
-
end
|
416
|
-
queue = Resque.peek('sms_notifications', 0, 30)
|
417
|
-
queue.find_all {|n| n['args'].first['address'] == address }.length.should == num_queued.to_i
|
418
|
-
end
|