flapjack 0.6.53 → 0.6.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/bin/flapjack +103 -19
  2. data/bin/flapjack-nagios-receiver +166 -52
  3. data/bin/flapper +107 -18
  4. data/etc/flapjack_config.yaml.example +16 -0
  5. data/features/events.feature +63 -0
  6. data/features/steps/events_steps.rb +5 -5
  7. data/features/steps/notifications_steps.rb +8 -6
  8. data/features/steps/time_travel_steps.rb +4 -4
  9. data/features/support/env.rb +1 -2
  10. data/flapjack.gemspec +1 -1
  11. data/lib/flapjack/configuration.rb +11 -13
  12. data/lib/flapjack/coordinator.rb +100 -220
  13. data/lib/flapjack/data/entity_check.rb +2 -2
  14. data/lib/flapjack/data/event.rb +3 -3
  15. data/lib/flapjack/executive.rb +30 -40
  16. data/lib/flapjack/filters/delays.rb +1 -1
  17. data/lib/flapjack/gateways/api.rb +6 -23
  18. data/lib/flapjack/gateways/email.rb +4 -10
  19. data/lib/flapjack/gateways/email/alert.html.haml +0 -5
  20. data/lib/flapjack/gateways/email/alert.text.erb +0 -1
  21. data/lib/flapjack/gateways/jabber.rb +80 -67
  22. data/lib/flapjack/gateways/oobetet.rb +29 -25
  23. data/lib/flapjack/gateways/pagerduty.rb +26 -45
  24. data/lib/flapjack/gateways/sms_messagenet.rb +10 -17
  25. data/lib/flapjack/gateways/web.rb +7 -21
  26. data/lib/flapjack/gateways/web/views/_css.haml +3 -0
  27. data/lib/flapjack/gateways/web/views/check.haml +1 -1
  28. data/lib/flapjack/logger.rb +57 -0
  29. data/lib/flapjack/patches.rb +0 -10
  30. data/lib/flapjack/pikelet.rb +214 -30
  31. data/lib/flapjack/redis_pool.rb +2 -17
  32. data/lib/flapjack/version.rb +1 -1
  33. data/spec/lib/flapjack/coordinator_spec.rb +116 -136
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +3 -3
  35. data/spec/lib/flapjack/executive_spec.rb +33 -34
  36. data/spec/lib/flapjack/gateways/api_spec.rb +4 -2
  37. data/spec/lib/flapjack/gateways/jabber_spec.rb +39 -36
  38. data/spec/lib/flapjack/gateways/oobetet_spec.rb +14 -24
  39. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +43 -45
  40. data/spec/lib/flapjack/gateways/web_spec.rb +42 -35
  41. data/spec/lib/flapjack/logger_spec.rb +32 -0
  42. data/spec/lib/flapjack/pikelet_spec.rb +124 -15
  43. data/spec/lib/flapjack/redis_pool_spec.rb +1 -3
  44. data/spec/spec_helper.rb +34 -1
  45. data/tasks/events.rake +1 -0
  46. data/tmp/create_event_ok.rb +31 -0
  47. data/tmp/create_event_unknown.rb +31 -0
  48. data/tmp/create_events_ok.rb +1 -1
  49. metadata +10 -11
  50. data/bin/flapjack-nagios-receiver-control +0 -15
  51. data/bin/flapper-control +0 -15
  52. data/lib/flapjack/daemonizing.rb +0 -186
  53. data/lib/flapjack/gateways/base.rb +0 -38
@@ -8,15 +8,12 @@ require 'em-synchrony/fiber_iterator'
8
8
  require 'yajl/json_gem'
9
9
 
10
10
  require 'flapjack/utility'
11
- require 'flapjack/gateways/base'
12
11
 
13
12
  module Flapjack
14
13
 
15
14
  module Gateways
16
15
 
17
16
  class Oobetet < Blather::Client
18
-
19
- include Flapjack::Gateways::Generic
20
17
  include Flapjack::Utility
21
18
 
22
19
  log = Logger.new(STDOUT)
@@ -24,13 +21,23 @@ module Flapjack
24
21
  log.level = Logger::INFO
25
22
  Blather.logger = log
26
23
 
24
+ def initialize(opts = {})
25
+ @config = opts[:config]
26
+ @logger = opts[:logger]
27
+ super()
28
+ end
29
+
30
+ def stop
31
+ @should_quit = true
32
+ end
33
+
27
34
  def setup
28
35
  @hostname = Socket.gethostname
29
36
  @flapjacktest_jid = Blather::JID.new((@config['jabberid'] || 'flapjacktest') + "/#{@hostname}:#{Process.pid}")
30
37
 
31
38
  super(@flapjacktest_jid, @config['password'], @config['server'], @config['port'].to_i)
32
39
 
33
- logger.debug("Building jabber connection with jabberid: " +
40
+ @logger.debug("Building jabber connection with jabberid: " +
34
41
  @flapjacktest_jid.to_s + ", port: " + @config['port'].to_s +
35
42
  ", server: " + @config['server'].to_s + ", password: " +
36
43
  @config['password'].to_s)
@@ -80,12 +87,12 @@ module Flapjack
80
87
 
81
88
  # Join the MUC Chat room after connecting.
82
89
  def on_ready(stanza)
83
- return if should_quit?
90
+ return if @should_quit
84
91
  @connected_at = Time.now.to_i
85
- logger.info("Jabber Connected")
92
+ @logger.info("Jabber Connected")
86
93
  if @config['rooms'] && @config['rooms'].length > 0
87
94
  @config['rooms'].each do |room|
88
- logger.info("Joining room #{room}")
95
+ @logger.info("Joining room #{room}")
89
96
  presence = Blather::Stanza::Presence.new
90
97
  presence.from = @flapjacktest_jid
91
98
  presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
@@ -98,8 +105,8 @@ module Flapjack
98
105
 
99
106
  # returning true to prevent the reactor loop from stopping
100
107
  def on_disconnect(stanza)
101
- return true if should_quit?
102
- logger.warn("jabbers disconnected! reconnecting in 1 second ...")
108
+ return true if @should_quit
109
+ @logger.warn("jabbers disconnected! reconnecting in 1 second ...")
103
110
  EventMachine::Timer.new(1) do
104
111
  connect # Blather::Client.connect
105
112
  end
@@ -107,12 +114,12 @@ module Flapjack
107
114
  end
108
115
 
109
116
  def on_groupchat(stanza)
110
- return if should_quit?
117
+ return if @should_quit
111
118
 
112
119
  stanza_body = stanza.body
113
120
 
114
- logger.debug("groupchat stanza body: #{stanza_body}")
115
- logger.debug("groupchat message received: #{stanza.inspect}")
121
+ @logger.debug("groupchat stanza body: #{stanza_body}")
122
+ @logger.debug("groupchat message received: #{stanza.inspect}")
116
123
 
117
124
  if (stanza_body =~ /^(?:problem|recovery|acknowledgement)/i) &&
118
125
  (stanza_body =~ /^(\w+).*#{Regexp.escape(@check_matcher)}/)
@@ -120,21 +127,21 @@ module Flapjack
120
127
  # got something interesting
121
128
  status = $1.downcase
122
129
  t = Time.now.to_i
123
- logger.debug("groupchat found the following state for #{@check_matcher}: #{status}")
130
+ @logger.debug("groupchat found the following state for #{@check_matcher}: #{status}")
124
131
 
125
132
  case status
126
133
  when 'problem'
127
- logger.debug("updating @times last_problem")
134
+ @logger.debug("updating @times last_problem")
128
135
  @times[:last_problem] = t
129
136
  when 'recovery'
130
- logger.debug("updating @times last_recovery")
137
+ @logger.debug("updating @times last_recovery")
131
138
  @times[:last_recovery] = t
132
139
  when 'acknowledgement'
133
- logger.debug("updating @times last_ack")
140
+ @logger.debug("updating @times last_ack")
134
141
  @times[:last_ack] = t
135
142
  end
136
143
  end
137
- logger.debug("@times: #{@times.inspect}")
144
+ @logger.debug("@times: #{@times.inspect}")
138
145
  end
139
146
 
140
147
  def check_timers
@@ -209,15 +216,15 @@ module Flapjack
209
216
  http = EM::HttpRequest.new(@pagerduty_events_api_url).post(options)
210
217
  response = Yajl::Parser.parse(http.response)
211
218
  status = http.response_header.status
212
- logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
219
+ @logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
213
220
  [status, response]
214
221
  end
215
222
 
216
- def main
217
- logger.debug("New oobetet pikelet with the following options: #{@config.inspect}")
223
+ def start
224
+ @logger.debug("New oobetet pikelet with the following options: #{@config.inspect}")
218
225
 
219
226
  keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
220
- logger.debug("calling keepalive on the jabber connection")
227
+ @logger.debug("calling keepalive on the jabber connection")
221
228
  write(' ') if connected?
222
229
  end
223
230
 
@@ -225,7 +232,7 @@ module Flapjack
225
232
  register_handlers
226
233
  connect # Blather::Client.connect
227
234
 
228
- until should_quit?
235
+ until @should_quit
229
236
  EM::Synchrony.sleep(10)
230
237
  check_timers
231
238
  end
@@ -236,6 +243,3 @@ module Flapjack
236
243
  end
237
244
  end
238
245
  end
239
-
240
-
241
-
@@ -9,47 +9,36 @@ require 'flapjack/data/entity_check'
9
9
  require 'flapjack/data/global'
10
10
  require 'flapjack/redis_pool'
11
11
 
12
- require 'flapjack/gateways/base'
13
-
14
12
  module Flapjack
15
13
 
16
14
  module Gateways
17
15
 
18
16
  class Pagerduty
19
- include Flapjack::Gateways::Generic
20
-
21
17
  PAGERDUTY_EVENTS_API_URL = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
22
18
  SEM_PAGERDUTY_ACKS_RUNNING = 'sem_pagerduty_acks_running'
23
19
 
24
- alias_method :generic_bootstrap, :bootstrap
25
- alias_method :generic_cleanup, :cleanup
26
-
27
- def bootstrap(opts = {})
28
- generic_bootstrap(opts)
29
-
20
+ def initialize(opts = {})
21
+ @config = opts[:config]
22
+ @logger = opts[:logger]
30
23
  @redis_config = opts[:redis_config]
31
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
24
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
32
25
 
33
- logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
26
+ @logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
34
27
 
35
28
  @pagerduty_acks_started = nil
29
+ super()
36
30
  end
37
31
 
38
- def cleanup
39
- @redis.empty! if @redis
40
- @redis_timer.empty! if @redis_timer
41
- generic_cleanup
32
+ def stop
33
+ @logger.info("stopping")
34
+ @should_quit = true
35
+ @redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
42
36
  end
43
37
 
44
- def add_shutdown_event(opts = {})
45
- return unless redis = opts[:redis]
46
- redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
47
- end
48
-
49
- def main
50
- logger.debug("pagerduty gateway - commencing main method")
38
+ def start
39
+ @logger.info("starting")
51
40
  while not test_pagerduty_connection do
52
- logger.error("Can't connect to the pagerduty API, retrying after 10 seconds")
41
+ @logger.error("Can't connect to the pagerduty API, retrying after 10 seconds")
53
42
  EM::Synchrony.sleep(10)
54
43
  end
55
44
 
@@ -58,19 +47,18 @@ module Flapjack
58
47
  @redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
59
48
 
60
49
  acknowledgement_timer = EM::Synchrony.add_periodic_timer(10) do
61
- @redis_timer ||= Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
62
50
  find_pagerduty_acknowledgements_if_safe
63
51
  end
64
52
 
65
53
  queue = @config['queue']
66
54
  events = {}
67
55
 
68
- until should_quit?
69
- logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
56
+ until @should_quit
57
+ @logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
70
58
  events[queue] = @redis.blpop(queue, 0)
71
59
  event = Yajl::Parser.parse(events[queue][1])
72
60
  type = event['notification_type']
73
- logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
61
+ @logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
74
62
  unless 'shutdown'.eql?(type)
75
63
  event_id = event['event_id']
76
64
  entity, check = event_id.split(':')
@@ -117,18 +105,18 @@ module Flapjack
117
105
  # timeout of five minutes to guard against stale locks caused by crashing code) either in this
118
106
  # process or in other processes
119
107
  if (@pagerduty_acks_started and @pagerduty_acks_started > (Time.now.to_i - 300)) or
120
- @redis_timer.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
121
- logger.debug("skipping looking for acks in pagerduty as this is already happening")
108
+ @redis.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
109
+ @logger.debug("skipping looking for acks in pagerduty as this is already happening")
122
110
  return
123
111
  end
124
112
 
125
113
  @pagerduty_acks_started = Time.now.to_i
126
- @redis_timer.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
127
- @redis_timer.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
114
+ @redis.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
115
+ @redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
128
116
 
129
117
  find_pagerduty_acknowledgements
130
118
 
131
- @redis_timer.del(SEM_PAGERDUTY_ACKS_RUNNING)
119
+ @redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
132
120
  @pagerduty_acks_started = nil
133
121
  end
134
122
 
@@ -141,7 +129,7 @@ module Flapjack
141
129
  "description" => "I love APIs with noops." }
142
130
  code, results = send_pagerduty_event(noop)
143
131
  return true if code == 200 && results['status'] =~ /success/i
144
- logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
132
+ @logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
145
133
  false
146
134
  end
147
135
 
@@ -150,15 +138,14 @@ module Flapjack
150
138
  http = EM::HttpRequest.new(PAGERDUTY_EVENTS_API_URL).post(options)
151
139
  response = Yajl::Parser.parse(http.response)
152
140
  status = http.response_header.status
153
- logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
141
+ @logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
154
142
  [status, response]
155
143
  end
156
144
 
157
145
  def find_pagerduty_acknowledgements
146
+ @logger.debug("looking for acks in pagerduty for unack'd problems")
158
147
 
159
- logger.debug("looking for acks in pagerduty for unack'd problems")
160
-
161
- unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis_timer)
148
+ unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis)
162
149
 
163
150
  @logger.debug "found unacknowledged failing checks as follows: " + unacknowledged_failing_checks.join(', ')
164
151
 
@@ -190,7 +177,7 @@ module Flapjack
190
177
  end
191
178
 
192
179
  pg_acknowledged_by = acknowledged[:pg_acknowledged_by]
193
- @logger.debug "#{entity_check.entity_name}:#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
180
+ @logger.info "#{entity_check.entity_name}:#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
194
181
  who_text = ""
195
182
  if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
196
183
  who_text = " by #{pg_acknowledged_by['name']}"
@@ -223,12 +210,6 @@ module Flapjack
223
210
  @logger.debug("pagerduty_acknowledged?: auth: #{options[:head].inspect}")
224
211
 
225
212
  http = EM::HttpRequest.new(url).get(options)
226
- # DEBUG flapjack-pagerduty: pagerduty_acknowledged?: decoded response as:
227
- # {"incidents"=>[{"incident_number"=>40, "status"=>"acknowledged",
228
- # "last_status_change_by"=>{"id"=>"PO1NWPS", "name"=>"Jesse Reynolds",
229
- # "email"=>"jesse@bulletproof.net",
230
- # "html_url"=>"http://bltprf.pagerduty.com/users/PO1NWPS"}}], "limit"=>100, "offset"=>0,
231
- # "total"=>1}
232
213
  begin
233
214
  response = Yajl::Parser.parse(http.response)
234
215
  rescue Yajl::ParseError
@@ -3,27 +3,20 @@
3
3
  require 'em-synchrony'
4
4
  require 'em-synchrony/em-http'
5
5
 
6
- require 'flapjack/gateways/base'
7
-
8
6
  module Flapjack
9
7
  module Gateways
10
8
  class SmsMessagenet
11
- extend Flapjack::Gateways::Resque
12
9
 
13
10
  MESSAGENET_URL = 'https://www.messagenet.com.au/dotnet/Lodge.asmx/LodgeSMSMessage'
14
11
 
15
12
  class << self
16
13
 
17
- alias_method :orig_bootstrap, :bootstrap
18
-
19
- def bootstrap(opts = {})
20
- return if @bootstrapped
14
+ def start
21
15
  @sent = 0
22
- orig_bootstrap(opts)
23
16
  end
24
17
 
25
18
  def perform(notification)
26
- logger.debug "Woo, got a notification to send out: #{notification.inspect}"
19
+ @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
27
20
 
28
21
  notification_type = notification['notification_type']
29
22
  contact_first_name = notification['contact_first_name']
@@ -50,15 +43,15 @@ module Flapjack
50
43
  notification['message'] = message
51
44
 
52
45
  # TODO log error and skip instead of raising errors
53
- if config.nil? || (config.respond_to?(:empty?) && config.empty?)
54
- logger.error "Messagenet config is missing"
46
+ if @config.nil? || (@config.respond_to?(:empty?) && @config.empty?)
47
+ @logger.error "Messagenet config is missing"
55
48
  return
56
49
  end
57
50
 
58
51
  errors = []
59
52
 
60
- username = config["username"]
61
- password = config["password"]
53
+ username = @config["username"]
54
+ password = @config["password"]
62
55
  address = notification['address']
63
56
  message = notification['message']
64
57
  notification_id = notification['id']
@@ -74,7 +67,7 @@ module Flapjack
74
67
  end
75
68
 
76
69
  unless errors.empty?
77
- errors.each {|err| logger.error err }
70
+ errors.each {|err| @logger.error err }
78
71
  return
79
72
  end
80
73
 
@@ -85,15 +78,15 @@ module Flapjack
85
78
 
86
79
  http = EM::HttpRequest.new(MESSAGENET_URL).get(:query => query)
87
80
 
88
- logger.debug "server response: #{http.response}"
81
+ @logger.debug "server response: #{http.response}"
89
82
 
90
83
  status = (http.nil? || http.response_header.nil?) ? nil : http.response_header.status
91
84
  if (status >= 200) && (status <= 206)
92
85
  @sent += 1
93
- logger.info "Sent SMS via Messagenet, response status is #{status}, " +
86
+ @logger.info "Sent SMS via Messagenet, response status is #{status}, " +
94
87
  "notification_id: #{notification_id}"
95
88
  else
96
- logger.error "Failed to send SMS via Messagenet, response status is #{status}, " +
89
+ @logger.error "Failed to send SMS via Messagenet, response status is #{status}, " +
97
90
  "notification_id: #{notification_id}"
98
91
  end
99
92
 
@@ -13,8 +13,6 @@ require 'flapjack/data/entity_check'
13
13
  require 'flapjack/redis_pool'
14
14
  require 'flapjack/utility'
15
15
 
16
- require 'flapjack/gateways/base'
17
-
18
16
  module Flapjack
19
17
 
20
18
  module Gateways
@@ -34,8 +32,8 @@ module Flapjack
34
32
  s, h, b = printer.call(env)
35
33
  [s, h, b]
36
34
  else
37
- logger.error e.message
38
- logger.error e.backtrace.join("\n")
35
+ @logger.error e.message
36
+ @logger.error e.backtrace.join("\n")
39
37
  [503, {}, ""]
40
38
  end
41
39
  end
@@ -46,29 +44,17 @@ module Flapjack
46
44
  use Rack::MethodOverride
47
45
 
48
46
  class << self
49
- include Flapjack::Gateways::Thin
50
-
51
- attr_accessor :redis
52
-
53
- alias_method :thin_bootstrap, :bootstrap
54
- alias_method :thin_cleanup, :cleanup
47
+ def start
48
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
55
49
 
56
- def bootstrap(opts = {})
57
- thin_bootstrap(opts)
58
- @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1)
50
+ @logger.info "starting web - class"
59
51
 
60
- if config && config['access_log']
61
- access_logger = Flapjack::AsyncLogger.new(config['access_log'])
52
+ if @config && @config['access_log']
53
+ access_logger = Flapjack::AsyncLogger.new(@config['access_log'])
62
54
  use Flapjack::CommonLogger, access_logger
63
55
  end
64
56
 
65
57
  end
66
-
67
- def cleanup
68
- @redis.empty! if @redis
69
- thin_cleanup
70
- end
71
-
72
58
  end
73
59
 
74
60
  include Flapjack::Utility
@@ -24,6 +24,9 @@
24
24
  table td.critical {
25
25
  background-color: #fb9a99;
26
26
  }
27
+ table td.unknown {
28
+ background-color: #fb9a99;
29
+ }
27
30
  table td.down {
28
31
  background-color: #fb9a99;
29
32
  }
@@ -13,7 +13,7 @@
13
13
  %form{:action => "/acknowledgements/#{check_path_escaped}", :method => "post"}
14
14
  %h2
15
15
  State: #{@check_state ? @check_state.upcase : ''}
16
- - if (['warning', 'critical'].include?(@check_state) and !(@current_unscheduled_maintenance || @current_scheduled_maintenance))
16
+ - if (['warning', 'critical', 'unknown'].include?(@check_state) and !(@current_unscheduled_maintenance || @current_scheduled_maintenance))
17
17
  %input{:type => 'hidden', :name => 'acknowledgement_id', :value => "#{@acknowledgement_id}"}
18
18
  %input{:type => 'submit', :value => 'Acknowledge', :class => 'button'}
19
19
  with