flapjack 0.6.53 → 0.6.54

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/bin/flapjack +103 -19
  2. data/bin/flapjack-nagios-receiver +166 -52
  3. data/bin/flapper +107 -18
  4. data/etc/flapjack_config.yaml.example +16 -0
  5. data/features/events.feature +63 -0
  6. data/features/steps/events_steps.rb +5 -5
  7. data/features/steps/notifications_steps.rb +8 -6
  8. data/features/steps/time_travel_steps.rb +4 -4
  9. data/features/support/env.rb +1 -2
  10. data/flapjack.gemspec +1 -1
  11. data/lib/flapjack/configuration.rb +11 -13
  12. data/lib/flapjack/coordinator.rb +100 -220
  13. data/lib/flapjack/data/entity_check.rb +2 -2
  14. data/lib/flapjack/data/event.rb +3 -3
  15. data/lib/flapjack/executive.rb +30 -40
  16. data/lib/flapjack/filters/delays.rb +1 -1
  17. data/lib/flapjack/gateways/api.rb +6 -23
  18. data/lib/flapjack/gateways/email.rb +4 -10
  19. data/lib/flapjack/gateways/email/alert.html.haml +0 -5
  20. data/lib/flapjack/gateways/email/alert.text.erb +0 -1
  21. data/lib/flapjack/gateways/jabber.rb +80 -67
  22. data/lib/flapjack/gateways/oobetet.rb +29 -25
  23. data/lib/flapjack/gateways/pagerduty.rb +26 -45
  24. data/lib/flapjack/gateways/sms_messagenet.rb +10 -17
  25. data/lib/flapjack/gateways/web.rb +7 -21
  26. data/lib/flapjack/gateways/web/views/_css.haml +3 -0
  27. data/lib/flapjack/gateways/web/views/check.haml +1 -1
  28. data/lib/flapjack/logger.rb +57 -0
  29. data/lib/flapjack/patches.rb +0 -10
  30. data/lib/flapjack/pikelet.rb +214 -30
  31. data/lib/flapjack/redis_pool.rb +2 -17
  32. data/lib/flapjack/version.rb +1 -1
  33. data/spec/lib/flapjack/coordinator_spec.rb +116 -136
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +3 -3
  35. data/spec/lib/flapjack/executive_spec.rb +33 -34
  36. data/spec/lib/flapjack/gateways/api_spec.rb +4 -2
  37. data/spec/lib/flapjack/gateways/jabber_spec.rb +39 -36
  38. data/spec/lib/flapjack/gateways/oobetet_spec.rb +14 -24
  39. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +43 -45
  40. data/spec/lib/flapjack/gateways/web_spec.rb +42 -35
  41. data/spec/lib/flapjack/logger_spec.rb +32 -0
  42. data/spec/lib/flapjack/pikelet_spec.rb +124 -15
  43. data/spec/lib/flapjack/redis_pool_spec.rb +1 -3
  44. data/spec/spec_helper.rb +34 -1
  45. data/tasks/events.rake +1 -0
  46. data/tmp/create_event_ok.rb +31 -0
  47. data/tmp/create_event_unknown.rb +31 -0
  48. data/tmp/create_events_ok.rb +1 -1
  49. metadata +10 -11
  50. data/bin/flapjack-nagios-receiver-control +0 -15
  51. data/bin/flapper-control +0 -15
  52. data/lib/flapjack/daemonizing.rb +0 -186
  53. data/lib/flapjack/gateways/base.rb +0 -38
@@ -8,15 +8,12 @@ require 'em-synchrony/fiber_iterator'
8
8
  require 'yajl/json_gem'
9
9
 
10
10
  require 'flapjack/utility'
11
- require 'flapjack/gateways/base'
12
11
 
13
12
  module Flapjack
14
13
 
15
14
  module Gateways
16
15
 
17
16
  class Oobetet < Blather::Client
18
-
19
- include Flapjack::Gateways::Generic
20
17
  include Flapjack::Utility
21
18
 
22
19
  log = Logger.new(STDOUT)
@@ -24,13 +21,23 @@ module Flapjack
24
21
  log.level = Logger::INFO
25
22
  Blather.logger = log
26
23
 
24
+ def initialize(opts = {})
25
+ @config = opts[:config]
26
+ @logger = opts[:logger]
27
+ super()
28
+ end
29
+
30
+ def stop
31
+ @should_quit = true
32
+ end
33
+
27
34
  def setup
28
35
  @hostname = Socket.gethostname
29
36
  @flapjacktest_jid = Blather::JID.new((@config['jabberid'] || 'flapjacktest') + "/#{@hostname}:#{Process.pid}")
30
37
 
31
38
  super(@flapjacktest_jid, @config['password'], @config['server'], @config['port'].to_i)
32
39
 
33
- logger.debug("Building jabber connection with jabberid: " +
40
+ @logger.debug("Building jabber connection with jabberid: " +
34
41
  @flapjacktest_jid.to_s + ", port: " + @config['port'].to_s +
35
42
  ", server: " + @config['server'].to_s + ", password: " +
36
43
  @config['password'].to_s)
@@ -80,12 +87,12 @@ module Flapjack
80
87
 
81
88
  # Join the MUC Chat room after connecting.
82
89
  def on_ready(stanza)
83
- return if should_quit?
90
+ return if @should_quit
84
91
  @connected_at = Time.now.to_i
85
- logger.info("Jabber Connected")
92
+ @logger.info("Jabber Connected")
86
93
  if @config['rooms'] && @config['rooms'].length > 0
87
94
  @config['rooms'].each do |room|
88
- logger.info("Joining room #{room}")
95
+ @logger.info("Joining room #{room}")
89
96
  presence = Blather::Stanza::Presence.new
90
97
  presence.from = @flapjacktest_jid
91
98
  presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
@@ -98,8 +105,8 @@ module Flapjack
98
105
 
99
106
  # returning true to prevent the reactor loop from stopping
100
107
  def on_disconnect(stanza)
101
- return true if should_quit?
102
- logger.warn("jabbers disconnected! reconnecting in 1 second ...")
108
+ return true if @should_quit
109
+ @logger.warn("jabbers disconnected! reconnecting in 1 second ...")
103
110
  EventMachine::Timer.new(1) do
104
111
  connect # Blather::Client.connect
105
112
  end
@@ -107,12 +114,12 @@ module Flapjack
107
114
  end
108
115
 
109
116
  def on_groupchat(stanza)
110
- return if should_quit?
117
+ return if @should_quit
111
118
 
112
119
  stanza_body = stanza.body
113
120
 
114
- logger.debug("groupchat stanza body: #{stanza_body}")
115
- logger.debug("groupchat message received: #{stanza.inspect}")
121
+ @logger.debug("groupchat stanza body: #{stanza_body}")
122
+ @logger.debug("groupchat message received: #{stanza.inspect}")
116
123
 
117
124
  if (stanza_body =~ /^(?:problem|recovery|acknowledgement)/i) &&
118
125
  (stanza_body =~ /^(\w+).*#{Regexp.escape(@check_matcher)}/)
@@ -120,21 +127,21 @@ module Flapjack
120
127
  # got something interesting
121
128
  status = $1.downcase
122
129
  t = Time.now.to_i
123
- logger.debug("groupchat found the following state for #{@check_matcher}: #{status}")
130
+ @logger.debug("groupchat found the following state for #{@check_matcher}: #{status}")
124
131
 
125
132
  case status
126
133
  when 'problem'
127
- logger.debug("updating @times last_problem")
134
+ @logger.debug("updating @times last_problem")
128
135
  @times[:last_problem] = t
129
136
  when 'recovery'
130
- logger.debug("updating @times last_recovery")
137
+ @logger.debug("updating @times last_recovery")
131
138
  @times[:last_recovery] = t
132
139
  when 'acknowledgement'
133
- logger.debug("updating @times last_ack")
140
+ @logger.debug("updating @times last_ack")
134
141
  @times[:last_ack] = t
135
142
  end
136
143
  end
137
- logger.debug("@times: #{@times.inspect}")
144
+ @logger.debug("@times: #{@times.inspect}")
138
145
  end
139
146
 
140
147
  def check_timers
@@ -209,15 +216,15 @@ module Flapjack
209
216
  http = EM::HttpRequest.new(@pagerduty_events_api_url).post(options)
210
217
  response = Yajl::Parser.parse(http.response)
211
218
  status = http.response_header.status
212
- logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
219
+ @logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
213
220
  [status, response]
214
221
  end
215
222
 
216
- def main
217
- logger.debug("New oobetet pikelet with the following options: #{@config.inspect}")
223
+ def start
224
+ @logger.debug("New oobetet pikelet with the following options: #{@config.inspect}")
218
225
 
219
226
  keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
220
- logger.debug("calling keepalive on the jabber connection")
227
+ @logger.debug("calling keepalive on the jabber connection")
221
228
  write(' ') if connected?
222
229
  end
223
230
 
@@ -225,7 +232,7 @@ module Flapjack
225
232
  register_handlers
226
233
  connect # Blather::Client.connect
227
234
 
228
- until should_quit?
235
+ until @should_quit
229
236
  EM::Synchrony.sleep(10)
230
237
  check_timers
231
238
  end
@@ -236,6 +243,3 @@ module Flapjack
236
243
  end
237
244
  end
238
245
  end
239
-
240
-
241
-
@@ -9,47 +9,36 @@ require 'flapjack/data/entity_check'
9
9
  require 'flapjack/data/global'
10
10
  require 'flapjack/redis_pool'
11
11
 
12
- require 'flapjack/gateways/base'
13
-
14
12
  module Flapjack
15
13
 
16
14
  module Gateways
17
15
 
18
16
  class Pagerduty
19
- include Flapjack::Gateways::Generic
20
-
21
17
  PAGERDUTY_EVENTS_API_URL = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
22
18
  SEM_PAGERDUTY_ACKS_RUNNING = 'sem_pagerduty_acks_running'
23
19
 
24
- alias_method :generic_bootstrap, :bootstrap
25
- alias_method :generic_cleanup, :cleanup
26
-
27
- def bootstrap(opts = {})
28
- generic_bootstrap(opts)
29
-
20
+ def initialize(opts = {})
21
+ @config = opts[:config]
22
+ @logger = opts[:logger]
30
23
  @redis_config = opts[:redis_config]
31
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
24
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
32
25
 
33
- logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
26
+ @logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
34
27
 
35
28
  @pagerduty_acks_started = nil
29
+ super()
36
30
  end
37
31
 
38
- def cleanup
39
- @redis.empty! if @redis
40
- @redis_timer.empty! if @redis_timer
41
- generic_cleanup
32
+ def stop
33
+ @logger.info("stopping")
34
+ @should_quit = true
35
+ @redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
42
36
  end
43
37
 
44
- def add_shutdown_event(opts = {})
45
- return unless redis = opts[:redis]
46
- redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
47
- end
48
-
49
- def main
50
- logger.debug("pagerduty gateway - commencing main method")
38
+ def start
39
+ @logger.info("starting")
51
40
  while not test_pagerduty_connection do
52
- logger.error("Can't connect to the pagerduty API, retrying after 10 seconds")
41
+ @logger.error("Can't connect to the pagerduty API, retrying after 10 seconds")
53
42
  EM::Synchrony.sleep(10)
54
43
  end
55
44
 
@@ -58,19 +47,18 @@ module Flapjack
58
47
  @redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
59
48
 
60
49
  acknowledgement_timer = EM::Synchrony.add_periodic_timer(10) do
61
- @redis_timer ||= Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
62
50
  find_pagerduty_acknowledgements_if_safe
63
51
  end
64
52
 
65
53
  queue = @config['queue']
66
54
  events = {}
67
55
 
68
- until should_quit?
69
- logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
56
+ until @should_quit
57
+ @logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
70
58
  events[queue] = @redis.blpop(queue, 0)
71
59
  event = Yajl::Parser.parse(events[queue][1])
72
60
  type = event['notification_type']
73
- logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
61
+ @logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
74
62
  unless 'shutdown'.eql?(type)
75
63
  event_id = event['event_id']
76
64
  entity, check = event_id.split(':')
@@ -117,18 +105,18 @@ module Flapjack
117
105
  # timeout of five minutes to guard against stale locks caused by crashing code) either in this
118
106
  # process or in other processes
119
107
  if (@pagerduty_acks_started and @pagerduty_acks_started > (Time.now.to_i - 300)) or
120
- @redis_timer.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
121
- logger.debug("skipping looking for acks in pagerduty as this is already happening")
108
+ @redis.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
109
+ @logger.debug("skipping looking for acks in pagerduty as this is already happening")
122
110
  return
123
111
  end
124
112
 
125
113
  @pagerduty_acks_started = Time.now.to_i
126
- @redis_timer.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
127
- @redis_timer.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
114
+ @redis.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
115
+ @redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
128
116
 
129
117
  find_pagerduty_acknowledgements
130
118
 
131
- @redis_timer.del(SEM_PAGERDUTY_ACKS_RUNNING)
119
+ @redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
132
120
  @pagerduty_acks_started = nil
133
121
  end
134
122
 
@@ -141,7 +129,7 @@ module Flapjack
141
129
  "description" => "I love APIs with noops." }
142
130
  code, results = send_pagerduty_event(noop)
143
131
  return true if code == 200 && results['status'] =~ /success/i
144
- logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
132
+ @logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
145
133
  false
146
134
  end
147
135
 
@@ -150,15 +138,14 @@ module Flapjack
150
138
  http = EM::HttpRequest.new(PAGERDUTY_EVENTS_API_URL).post(options)
151
139
  response = Yajl::Parser.parse(http.response)
152
140
  status = http.response_header.status
153
- logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
141
+ @logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
154
142
  [status, response]
155
143
  end
156
144
 
157
145
  def find_pagerduty_acknowledgements
146
+ @logger.debug("looking for acks in pagerduty for unack'd problems")
158
147
 
159
- logger.debug("looking for acks in pagerduty for unack'd problems")
160
-
161
- unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis_timer)
148
+ unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis)
162
149
 
163
150
  @logger.debug "found unacknowledged failing checks as follows: " + unacknowledged_failing_checks.join(', ')
164
151
 
@@ -190,7 +177,7 @@ module Flapjack
190
177
  end
191
178
 
192
179
  pg_acknowledged_by = acknowledged[:pg_acknowledged_by]
193
- @logger.debug "#{entity_check.entity_name}:#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
180
+ @logger.info "#{entity_check.entity_name}:#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
194
181
  who_text = ""
195
182
  if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
196
183
  who_text = " by #{pg_acknowledged_by['name']}"
@@ -223,12 +210,6 @@ module Flapjack
223
210
  @logger.debug("pagerduty_acknowledged?: auth: #{options[:head].inspect}")
224
211
 
225
212
  http = EM::HttpRequest.new(url).get(options)
226
- # DEBUG flapjack-pagerduty: pagerduty_acknowledged?: decoded response as:
227
- # {"incidents"=>[{"incident_number"=>40, "status"=>"acknowledged",
228
- # "last_status_change_by"=>{"id"=>"PO1NWPS", "name"=>"Jesse Reynolds",
229
- # "email"=>"jesse@bulletproof.net",
230
- # "html_url"=>"http://bltprf.pagerduty.com/users/PO1NWPS"}}], "limit"=>100, "offset"=>0,
231
- # "total"=>1}
232
213
  begin
233
214
  response = Yajl::Parser.parse(http.response)
234
215
  rescue Yajl::ParseError
@@ -3,27 +3,20 @@
3
3
  require 'em-synchrony'
4
4
  require 'em-synchrony/em-http'
5
5
 
6
- require 'flapjack/gateways/base'
7
-
8
6
  module Flapjack
9
7
  module Gateways
10
8
  class SmsMessagenet
11
- extend Flapjack::Gateways::Resque
12
9
 
13
10
  MESSAGENET_URL = 'https://www.messagenet.com.au/dotnet/Lodge.asmx/LodgeSMSMessage'
14
11
 
15
12
  class << self
16
13
 
17
- alias_method :orig_bootstrap, :bootstrap
18
-
19
- def bootstrap(opts = {})
20
- return if @bootstrapped
14
+ def start
21
15
  @sent = 0
22
- orig_bootstrap(opts)
23
16
  end
24
17
 
25
18
  def perform(notification)
26
- logger.debug "Woo, got a notification to send out: #{notification.inspect}"
19
+ @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
27
20
 
28
21
  notification_type = notification['notification_type']
29
22
  contact_first_name = notification['contact_first_name']
@@ -50,15 +43,15 @@ module Flapjack
50
43
  notification['message'] = message
51
44
 
52
45
  # TODO log error and skip instead of raising errors
53
- if config.nil? || (config.respond_to?(:empty?) && config.empty?)
54
- logger.error "Messagenet config is missing"
46
+ if @config.nil? || (@config.respond_to?(:empty?) && @config.empty?)
47
+ @logger.error "Messagenet config is missing"
55
48
  return
56
49
  end
57
50
 
58
51
  errors = []
59
52
 
60
- username = config["username"]
61
- password = config["password"]
53
+ username = @config["username"]
54
+ password = @config["password"]
62
55
  address = notification['address']
63
56
  message = notification['message']
64
57
  notification_id = notification['id']
@@ -74,7 +67,7 @@ module Flapjack
74
67
  end
75
68
 
76
69
  unless errors.empty?
77
- errors.each {|err| logger.error err }
70
+ errors.each {|err| @logger.error err }
78
71
  return
79
72
  end
80
73
 
@@ -85,15 +78,15 @@ module Flapjack
85
78
 
86
79
  http = EM::HttpRequest.new(MESSAGENET_URL).get(:query => query)
87
80
 
88
- logger.debug "server response: #{http.response}"
81
+ @logger.debug "server response: #{http.response}"
89
82
 
90
83
  status = (http.nil? || http.response_header.nil?) ? nil : http.response_header.status
91
84
  if (status >= 200) && (status <= 206)
92
85
  @sent += 1
93
- logger.info "Sent SMS via Messagenet, response status is #{status}, " +
86
+ @logger.info "Sent SMS via Messagenet, response status is #{status}, " +
94
87
  "notification_id: #{notification_id}"
95
88
  else
96
- logger.error "Failed to send SMS via Messagenet, response status is #{status}, " +
89
+ @logger.error "Failed to send SMS via Messagenet, response status is #{status}, " +
97
90
  "notification_id: #{notification_id}"
98
91
  end
99
92
 
@@ -13,8 +13,6 @@ require 'flapjack/data/entity_check'
13
13
  require 'flapjack/redis_pool'
14
14
  require 'flapjack/utility'
15
15
 
16
- require 'flapjack/gateways/base'
17
-
18
16
  module Flapjack
19
17
 
20
18
  module Gateways
@@ -34,8 +32,8 @@ module Flapjack
34
32
  s, h, b = printer.call(env)
35
33
  [s, h, b]
36
34
  else
37
- logger.error e.message
38
- logger.error e.backtrace.join("\n")
35
+ @logger.error e.message
36
+ @logger.error e.backtrace.join("\n")
39
37
  [503, {}, ""]
40
38
  end
41
39
  end
@@ -46,29 +44,17 @@ module Flapjack
46
44
  use Rack::MethodOverride
47
45
 
48
46
  class << self
49
- include Flapjack::Gateways::Thin
50
-
51
- attr_accessor :redis
52
-
53
- alias_method :thin_bootstrap, :bootstrap
54
- alias_method :thin_cleanup, :cleanup
47
+ def start
48
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
55
49
 
56
- def bootstrap(opts = {})
57
- thin_bootstrap(opts)
58
- @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1)
50
+ @logger.info "starting web - class"
59
51
 
60
- if config && config['access_log']
61
- access_logger = Flapjack::AsyncLogger.new(config['access_log'])
52
+ if @config && @config['access_log']
53
+ access_logger = Flapjack::AsyncLogger.new(@config['access_log'])
62
54
  use Flapjack::CommonLogger, access_logger
63
55
  end
64
56
 
65
57
  end
66
-
67
- def cleanup
68
- @redis.empty! if @redis
69
- thin_cleanup
70
- end
71
-
72
58
  end
73
59
 
74
60
  include Flapjack::Utility
@@ -24,6 +24,9 @@
24
24
  table td.critical {
25
25
  background-color: #fb9a99;
26
26
  }
27
+ table td.unknown {
28
+ background-color: #fb9a99;
29
+ }
27
30
  table td.down {
28
31
  background-color: #fb9a99;
29
32
  }
@@ -13,7 +13,7 @@
13
13
  %form{:action => "/acknowledgements/#{check_path_escaped}", :method => "post"}
14
14
  %h2
15
15
  State: #{@check_state ? @check_state.upcase : ''}
16
- - if (['warning', 'critical'].include?(@check_state) and !(@current_unscheduled_maintenance || @current_scheduled_maintenance))
16
+ - if (['warning', 'critical', 'unknown'].include?(@check_state) and !(@current_unscheduled_maintenance || @current_scheduled_maintenance))
17
17
  %input{:type => 'hidden', :name => 'acknowledgement_id', :value => "#{@acknowledgement_id}"}
18
18
  %input{:type => 'submit', :value => 'Acknowledge', :class => 'button'}
19
19
  with