flapjack 0.6.53 → 0.6.54

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/bin/flapjack +103 -19
  2. data/bin/flapjack-nagios-receiver +166 -52
  3. data/bin/flapper +107 -18
  4. data/etc/flapjack_config.yaml.example +16 -0
  5. data/features/events.feature +63 -0
  6. data/features/steps/events_steps.rb +5 -5
  7. data/features/steps/notifications_steps.rb +8 -6
  8. data/features/steps/time_travel_steps.rb +4 -4
  9. data/features/support/env.rb +1 -2
  10. data/flapjack.gemspec +1 -1
  11. data/lib/flapjack/configuration.rb +11 -13
  12. data/lib/flapjack/coordinator.rb +100 -220
  13. data/lib/flapjack/data/entity_check.rb +2 -2
  14. data/lib/flapjack/data/event.rb +3 -3
  15. data/lib/flapjack/executive.rb +30 -40
  16. data/lib/flapjack/filters/delays.rb +1 -1
  17. data/lib/flapjack/gateways/api.rb +6 -23
  18. data/lib/flapjack/gateways/email.rb +4 -10
  19. data/lib/flapjack/gateways/email/alert.html.haml +0 -5
  20. data/lib/flapjack/gateways/email/alert.text.erb +0 -1
  21. data/lib/flapjack/gateways/jabber.rb +80 -67
  22. data/lib/flapjack/gateways/oobetet.rb +29 -25
  23. data/lib/flapjack/gateways/pagerduty.rb +26 -45
  24. data/lib/flapjack/gateways/sms_messagenet.rb +10 -17
  25. data/lib/flapjack/gateways/web.rb +7 -21
  26. data/lib/flapjack/gateways/web/views/_css.haml +3 -0
  27. data/lib/flapjack/gateways/web/views/check.haml +1 -1
  28. data/lib/flapjack/logger.rb +57 -0
  29. data/lib/flapjack/patches.rb +0 -10
  30. data/lib/flapjack/pikelet.rb +214 -30
  31. data/lib/flapjack/redis_pool.rb +2 -17
  32. data/lib/flapjack/version.rb +1 -1
  33. data/spec/lib/flapjack/coordinator_spec.rb +116 -136
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +3 -3
  35. data/spec/lib/flapjack/executive_spec.rb +33 -34
  36. data/spec/lib/flapjack/gateways/api_spec.rb +4 -2
  37. data/spec/lib/flapjack/gateways/jabber_spec.rb +39 -36
  38. data/spec/lib/flapjack/gateways/oobetet_spec.rb +14 -24
  39. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +43 -45
  40. data/spec/lib/flapjack/gateways/web_spec.rb +42 -35
  41. data/spec/lib/flapjack/logger_spec.rb +32 -0
  42. data/spec/lib/flapjack/pikelet_spec.rb +124 -15
  43. data/spec/lib/flapjack/redis_pool_spec.rb +1 -3
  44. data/spec/spec_helper.rb +34 -1
  45. data/tasks/events.rake +1 -0
  46. data/tmp/create_event_ok.rb +31 -0
  47. data/tmp/create_event_unknown.rb +31 -0
  48. data/tmp/create_events_ok.rb +1 -1
  49. metadata +10 -11
  50. data/bin/flapjack-nagios-receiver-control +0 -15
  51. data/bin/flapper-control +0 -15
  52. data/lib/flapjack/daemonizing.rb +0 -186
  53. data/lib/flapjack/gateways/base.rb +0 -38
@@ -238,7 +238,7 @@ module Flapjack
238
238
  @redis.zadd("#{@key}:sorted_state_timestamps", timestamp, timestamp)
239
239
 
240
240
  case state
241
- when STATE_WARNING, STATE_CRITICAL
241
+ when STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN
242
242
  @redis.zadd('failed_checks', timestamp, @key)
243
243
  # FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
244
244
  @redis.zadd("failed_checks:client:#{client}", timestamp, @key) if client
@@ -310,7 +310,7 @@ module Flapjack
310
310
  end
311
311
 
312
312
  def failed?
313
- [STATE_WARNING, STATE_CRITICAL].include?( state )
313
+ [STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN].include?( state )
314
314
  end
315
315
 
316
316
  def ok?
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'yajl'
3
+ require 'yajl/json_gem'
4
4
 
5
5
  module Flapjack
6
6
  module Data
@@ -43,7 +43,7 @@ module Flapjack
43
43
  raise "Redis connection not set" unless redis = opts[:redis]
44
44
 
45
45
  evt['time'] = Time.now.to_i if evt['time'].nil?
46
- redis.rpush('events', Yajl::Encoder.encode(evt))
46
+ redis.rpush('events', ::Yajl::Encoder.encode(evt))
47
47
  end
48
48
 
49
49
  # Provide a count of the number of events on the queue to be processed.
@@ -147,7 +147,7 @@ module Flapjack
147
147
  end
148
148
 
149
149
  def failure?
150
- warning? or critical?
150
+ warning? or critical? or unknown?
151
151
  end
152
152
 
153
153
  end
@@ -3,7 +3,6 @@
3
3
  require 'log4r'
4
4
  require 'log4r/outputter/fileoutputter'
5
5
 
6
- require 'flapjack'
7
6
  require 'flapjack/filters/acknowledgement'
8
7
  require 'flapjack/filters/ok'
9
8
  require 'flapjack/filters/scheduled_maintenance'
@@ -14,7 +13,6 @@ require 'flapjack/data/contact'
14
13
  require 'flapjack/data/entity_check'
15
14
  require 'flapjack/data/notification'
16
15
  require 'flapjack/data/event'
17
- require 'flapjack/pikelet'
18
16
  require 'flapjack/redis_pool'
19
17
 
20
18
  require 'flapjack/gateways/email'
@@ -23,15 +21,12 @@ require 'flapjack/gateways/sms_messagenet'
23
21
  module Flapjack
24
22
 
25
23
  class Executive
26
- include Flapjack::GenericPikelet
27
24
 
28
- alias_method :generic_bootstrap, :bootstrap
29
- alias_method :generic_cleanup, :cleanup
30
-
31
- def bootstrap(opts = {})
32
- generic_bootstrap(opts)
33
-
34
- @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1)
25
+ def initialize(opts = {})
26
+ @config = opts[:config]
27
+ @redis_config = opts[:redis_config]
28
+ @logger = opts[:logger]
29
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
35
30
 
36
31
  @queues = {:email => @config['email_queue'],
37
32
  :sms => @config['sms_queue'],
@@ -44,7 +39,7 @@ module Flapjack
44
39
 
45
40
  # FIXME: Put loading filters into separate method
46
41
  # FIXME: should we make the filters more configurable by the end user?
47
- options = { :log => @logger, :persistence => @redis }
42
+ options = { :log => opts[:logger], :persistence => @redis }
48
43
  @filters = []
49
44
  @filters << Flapjack::Filters::Ok.new(options)
50
45
  @filters << Flapjack::Filters::ScheduledMaintenance.new(options)
@@ -80,16 +75,11 @@ module Flapjack
80
75
  @redis.hset("event_counters:#{@instance_id}", 'action', 0)
81
76
  end
82
77
 
83
- def cleanup
84
- @redis.empty! if @redis
85
- generic_cleanup
86
- end
87
-
88
- def main
78
+ def start
89
79
  @logger.info("Booting main loop.")
90
80
 
91
- until should_quit? && @received_shutdown
92
- @logger.info("Waiting for event...")
81
+ until @should_quit
82
+ @logger.debug("Waiting for event...")
93
83
  event = Flapjack::Data::Event.next(:redis => @redis)
94
84
  process_event(event) unless event.nil?
95
85
  end
@@ -99,12 +89,12 @@ module Flapjack
99
89
 
100
90
  # this must use a separate connection to the main Executive one, as it's running
101
91
  # from a different fiber while the main one is blocking.
102
- def add_shutdown_event(opts = {})
103
- return unless redis = opts[:redis]
104
- redis.rpush('events', JSON.generate('type' => 'shutdown',
105
- 'host' => '',
106
- 'service' => '',
107
- 'state' => ''))
92
+ def stop
93
+ @should_quit = true
94
+ @redis.rpush('events', JSON.generate('type' => 'shutdown',
95
+ 'host' => '',
96
+ 'service' => '',
97
+ 'state' => ''))
108
98
  end
109
99
 
110
100
  private
@@ -115,29 +105,29 @@ module Flapjack
115
105
  @logger.debug("Raw event received: #{event.inspect}")
116
106
  time_at = event.time
117
107
  time_at_str = time_at ? ", #{Time.at(time_at).to_s}" : ''
118
- @logger.info("Processing Event: #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
108
+ @logger.debug("Processing Event: #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
119
109
 
120
- entity_check = (event.type == 'shutdown') ? nil :
110
+ entity_check = ('shutdown' == event.type) ? nil :
121
111
  Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @redis)
122
112
 
123
- result = update_keys(event, entity_check)
113
+ result = update_keys(event, entity_check)
124
114
  return if result[:shutdown]
125
- skip_filters = result[:skip_filters]
126
115
 
127
- blocker = @filters.find {|filter| filter.block?(event) } unless skip_filters
116
+ blocker = nil
128
117
 
129
- if skip_filters
130
- @logger.info("#{Time.now}: Not sending notifications for event #{event.id} because filtering was skipped")
118
+ if result[:skip_filters]
119
+ @logger.debug("Not generating notifications for event #{event.id} because filtering was skipped")
131
120
  return
121
+ else
122
+ blocker = @filters.find {|filter| filter.block?(event) }
132
123
  end
133
124
 
134
125
  if blocker
135
- blocker_names = [ blocker.name ]
136
- @logger.info("#{Time.now}: Not sending notifications for event #{event.id} because these filters blocked: #{blocker_names.join(', ')}")
126
+ @logger.debug("Not generating notifications for event #{event.id} because this filter blocked: #{blocker.name}")
137
127
  return
138
128
  end
139
129
 
140
- @logger.info("#{Time.now}: Sending notifications for event #{event.id}")
130
+ @logger.info("Generating notifications for event #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
141
131
  send_notification_messages(event, entity_check)
142
132
  end
143
133
 
@@ -201,7 +191,7 @@ module Flapjack
201
191
  end
202
192
  when 'shutdown'
203
193
  # should this be logged as an action instead? being minimally invasive for now
204
- result[:shutdown] = @received_shutdown = true
194
+ result[:shutdown] = true
205
195
  end
206
196
 
207
197
  result
@@ -215,9 +205,9 @@ module Flapjack
215
205
  case event.type
216
206
  when 'service'
217
207
  case event.state
218
- when 'ok', 'unknown'
208
+ when 'ok'
219
209
  notification_type = 'recovery'
220
- when 'warning', 'critical'
210
+ when 'warning', 'critical', 'unknown'
221
211
  notification_type = 'problem'
222
212
  end
223
213
  when 'action'
@@ -237,7 +227,7 @@ module Flapjack
237
227
  if contacts.empty?
238
228
  @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS")
239
229
  return
240
- end
230
+ end
241
231
 
242
232
  notification = Flapjack::Data::Notification.for_event(event, :type => notification_type)
243
233
 
@@ -248,7 +238,7 @@ module Flapjack
248
238
  "#{notification_type} | #{msg.contact.id} | #{media_type.to_s} | #{msg.address}")
249
239
 
250
240
  unless @queues[media_type]
251
- # TODO log error
241
+ @logger.error("no queue for media type: #{media_type}")
252
242
  next
253
243
  end
254
244
 
@@ -19,7 +19,7 @@ module Flapjack
19
19
 
20
20
  result = false
21
21
 
22
- if (event.type == 'service') and (event.critical? or event.warning?)
22
+ if (event.type == 'service') and (event.failure?)
23
23
 
24
24
  entity_check = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
25
25
  current_time = Time.now.to_i
@@ -19,8 +19,6 @@ require 'flapjack/gateways/api/entity_presenter'
19
19
  require 'flapjack/rack_logger'
20
20
  require 'flapjack/redis_pool'
21
21
 
22
- require 'flapjack/gateways/base'
23
-
24
22
  # from https://github.com/sinatra/sinatra/issues/501
25
23
  # TODO move to its own file
26
24
  module Rack
@@ -59,8 +57,8 @@ module Flapjack
59
57
  else
60
58
  # doesn't work with Rack::Test unless we wrap tests in EM.synchrony blocks
61
59
  rescue_exception = Proc.new { |env, exception|
62
- logger.error exception.message
63
- logger.error exception.backtrace.join("\n")
60
+ @logger.error exception.message
61
+ @logger.error exception.backtrace.join("\n")
64
62
  [503, {}, {:errors => [exception.message]}.to_json]
65
63
  }
66
64
 
@@ -70,28 +68,13 @@ module Flapjack
70
68
  use Rack::JsonParamsParser
71
69
 
72
70
  class << self
73
- include Flapjack::Gateways::Thin
74
-
75
- attr_accessor :redis
76
-
77
- alias_method :thin_bootstrap, :bootstrap
78
- alias_method :thin_cleanup, :cleanup
79
-
80
- def bootstrap(opts = {})
81
- thin_bootstrap(opts)
82
- @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1)
83
-
84
- if config && config['access_log']
85
- access_logger = Flapjack::AsyncLogger.new(config['access_log'])
71
+ def start
72
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
73
+ if @config && @config['access_log']
74
+ access_logger = Flapjack::AsyncLogger.new(@config['access_log'])
86
75
  use Flapjack::CommonLogger, access_logger
87
76
  end
88
77
  end
89
-
90
- def cleanup
91
- @redis.empty! if @redis
92
- thin_cleanup
93
- end
94
-
95
78
  end
96
79
 
97
80
  def redis
@@ -9,23 +9,17 @@ require 'em-synchrony'
9
9
  require 'em/protocols/smtpclient'
10
10
 
11
11
  require 'flapjack/data/entity_check'
12
- require 'flapjack/gateways/base'
13
12
 
14
13
  module Flapjack
15
14
  module Gateways
16
15
 
17
16
  class Email
18
- extend Flapjack::Gateways::Resque
19
17
 
20
18
  class << self
21
19
 
22
- alias_method :orig_bootstrap, :bootstrap
23
-
24
- def bootstrap(opts = {})
25
- return if @bootstrapped
26
- @smtp_config = opts[:config].delete('smtp_config')
20
+ def start
21
+ @smtp_config = @config.delete('smtp_config')
27
22
  @sent = 0
28
- orig_bootstrap(opts)
29
23
  end
30
24
 
31
25
  def perform(notification)
@@ -61,7 +55,7 @@ module Flapjack
61
55
 
62
56
  fqdn = `/bin/hostname -f`.chomp
63
57
  m_from = "flapjack@#{fqdn}"
64
- logger.debug("flapjack_mailer: set from to #{m_from}")
58
+ @logger.debug("flapjack_mailer: set from to #{m_from}")
65
59
  m_reply_to = m_from
66
60
  m_to = notification['address']
67
61
 
@@ -92,7 +86,7 @@ module Flapjack
92
86
  @logger.info "Email response: #{response.inspect}"
93
87
 
94
88
  rescue Exception => e
95
- @logger.error "Error delivering email to #{mail.to}: #{e.message}"
89
+ @logger.error "Error delivering email to #{m_to}: #{e.message}"
96
90
  @logger.error e.backtrace.join("\n")
97
91
  end
98
92
  end
@@ -37,11 +37,6 @@
37
37
  %td
38
38
  %strong Time
39
39
  %td= Time.at(@time.to_i).to_s
40
- %tr
41
- %td
42
- %strong Flapjack
43
- %td
44
- %a(href="http://127.0.0.1:9292/check?entity=#{@entity}&amp;check=#{@check}") http://127.0.0.1:9292/check?entity=#{@entity}&amp;check=#{@check}
45
40
 
46
41
  %p Cheers,
47
42
  %p Flapjack
@@ -7,7 +7,6 @@ Check: <%= @check %>
7
7
  State: <%= @state %>
8
8
  Summary: <%= @summary %>
9
9
  Time: <%= Time.at(@time.to_i).to_s %>
10
- Flapjack: http://127.0.0.1:9292/check?entity=<%= @entity %>&check=<%= @check %>
11
10
 
12
11
  Cheers,
13
12
  Flapjack
@@ -21,14 +21,11 @@ require 'flapjack/redis_pool'
21
21
  require 'flapjack/utility'
22
22
  require 'flapjack/version'
23
23
 
24
- require 'flapjack/gateways/base'
25
-
26
24
  module Flapjack
27
25
 
28
26
  module Gateways
29
27
 
30
28
  class Jabber < Blather::Client
31
- include Flapjack::Gateways::Generic
32
29
  include Flapjack::Utility
33
30
 
34
31
  log = Logger.new(STDOUT)
@@ -36,23 +33,21 @@ module Flapjack
36
33
  log.level = Logger::INFO
37
34
  Blather.logger = log
38
35
 
39
- alias_method :generic_bootstrap, :bootstrap
40
- alias_method :generic_cleanup, :cleanup
41
-
42
- def bootstrap(opts = {})
43
- generic_bootstrap(opts)
44
-
36
+ def initialize(opts = {})
37
+ @config = opts[:config]
45
38
  @redis_config = opts[:redis_config]
46
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
39
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
40
+
41
+ @logger = opts[:logger]
47
42
 
48
43
  @buffer = []
49
44
  @hostname = Socket.gethostname
45
+ super()
50
46
  end
51
47
 
52
- def cleanup
53
- @redis.empty! if @redis
54
- @redis_handler.empty! if @redis_handler
55
- generic_cleanup
48
+ def stop
49
+ @should_quit = true
50
+ @redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
56
51
  end
57
52
 
58
53
  def setup
@@ -60,7 +55,7 @@ module Flapjack
60
55
 
61
56
  super(@flapjack_jid, @config['password'], @config['server'], @config['port'].to_i)
62
57
 
63
- logger.debug("Building jabber connection with jabberid: " +
58
+ @logger.debug("Building jabber connection with jabberid: " +
64
59
  @flapjack_jid.to_s + ", port: " + @config['port'].to_s +
65
60
  ", server: " + @config['server'].to_s + ", password: " +
66
61
  @config['password'].to_s)
@@ -94,25 +89,28 @@ module Flapjack
94
89
 
95
90
  # Join the MUC Chat room after connecting.
96
91
  def on_ready(stanza)
97
- return if should_quit? && @shutting_down
98
- @redis_handler ||= Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
92
+ return if @should_quit
99
93
  @connected_at = Time.now.to_i
100
- logger.info("Jabber Connected")
94
+ @logger.info("Jabber Connected")
101
95
  if @config['rooms'] && @config['rooms'].length > 0
102
96
  @config['rooms'].each do |room|
103
- logger.info("Joining room #{room}")
97
+ @logger.info("Joining room #{room}")
104
98
  presence = Blather::Stanza::Presence.new
105
99
  presence.from = @flapjack_jid
106
100
  presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
107
101
  presence << "<x xmlns='http://jabber.org/protocol/muc'/>"
108
- write presence
109
- say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
102
+ EventMachine::Synchrony.next_tick do
103
+ write presence
104
+ say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
105
+ end
110
106
  end
111
107
  end
112
108
  return if @buffer.empty?
113
109
  while stanza = @buffer.shift
114
110
  @logger.debug("Sending a buffered jabber message to: #{stanza.to}, using: #{stanza.type}, message: #{stanza.body}")
115
- write(stanza)
111
+ EventMachine::Synchrony.next_tick do
112
+ write(stanza)
113
+ end
116
114
  end
117
115
  end
118
116
 
@@ -139,12 +137,12 @@ module Flapjack
139
137
  four_hours = 4 * 60 * 60
140
138
  duration = (dur.nil? || (dur <= 0)) ? four_hours : dur
141
139
 
142
- event_id = @redis_handler.hget('unacknowledged_failures', ackid)
140
+ event_id = @redis.hget('unacknowledged_failures', ackid)
143
141
 
144
142
  if event_id.nil?
145
143
  error = "not found"
146
144
  else
147
- entity_check = Flapjack::Data::EntityCheck.for_event_id(event_id, :redis => @redis_handler)
145
+ entity_check = Flapjack::Data::EntityCheck.for_event_id(event_id, :redis => @redis)
148
146
  error = "unknown entity" if entity_check.nil?
149
147
  end
150
148
 
@@ -172,7 +170,7 @@ module Flapjack
172
170
 
173
171
  when command =~ /^identify$/
174
172
  t = Process.times
175
- boot_time = Time.at(@redis_handler.get('boot_time').to_i)
173
+ boot_time = Time.at(@redis.get('boot_time').to_i)
176
174
  msg = "Flapjack #{Flapjack::VERSION} process #{Process.pid} on #{`hostname -f`.chomp} \n"
177
175
  msg += "Boot time: #{boot_time}\n"
178
176
  msg += "User CPU Time: #{t.utime}\n"
@@ -185,11 +183,11 @@ module Flapjack
185
183
 
186
184
  msg = "so you want me to test notifications for entity: #{entity_name}, check: #{check_name} eh? ... well OK!"
187
185
 
188
- entity = Flapjack::Data::Entity.find_by_name(entity_name, :redis => @redis_handler)
186
+ entity = Flapjack::Data::Entity.find_by_name(entity_name, :redis => @redis)
189
187
  if entity
190
188
  summary = "Testing notifications to all contacts interested in entity: #{entity.name}, check: #{check_name}"
191
189
 
192
- entity_check = Flapjack::Data::EntityCheck.for_entity(entity, check_name, :redis => @redis_handler)
190
+ entity_check = Flapjack::Data::EntityCheck.for_entity(entity, check_name, :redis => @redis)
193
191
  puts entity_check.inspect
194
192
  entity_check.test_notifications('summary' => summary)
195
193
 
@@ -199,7 +197,7 @@ module Flapjack
199
197
 
200
198
  when command =~ /^(find )?entities matching\s+\/(.*)\/.*$/i
201
199
  pattern = $2.chomp.strip
202
- entity_list = Flapjack::Data::Entity.find_all_name_matching(pattern, :redis => @redis_handler)
200
+ entity_list = Flapjack::Data::Entity.find_all_name_matching(pattern, :redis => @redis)
203
201
  max_showable = 30
204
202
  number_found = entity_list.length
205
203
  entity_list = entity_list[0..(max_showable - 1)] if number_found > max_showable
@@ -226,8 +224,8 @@ module Flapjack
226
224
  end
227
225
 
228
226
  def on_groupchat(stanza)
229
- return if should_quit? && @shutting_down
230
- logger.debug("groupchat message received: #{stanza.inspect}")
227
+ return if @should_quit
228
+ @logger.debug("groupchat message received: #{stanza.inspect}")
231
229
 
232
230
  if stanza.body =~ /^flapjack:\s+(.*)/
233
231
  command = $1
@@ -238,15 +236,17 @@ module Flapjack
238
236
  action = results[:action]
239
237
 
240
238
  if msg || action
241
- say(stanza.from.stripped, msg, :groupchat)
242
- logger.debug("Sent to group chat: #{msg}")
243
- action.call if action
239
+ EventMachine::Synchrony.next_tick do
240
+ @logger.info("sending to group chat: #{msg}")
241
+ say(stanza.from.stripped, msg, :groupchat)
242
+ action.call if action
243
+ end
244
244
  end
245
245
  end
246
246
 
247
247
  def on_chat(stanza)
248
- return if should_quit? && @shutting_down
249
- logger.debug("chat message received: #{stanza.inspect}")
248
+ return if @should_quit
249
+ @logger.debug("chat message received: #{stanza.inspect}")
250
250
 
251
251
  if stanza.body =~ /^flapjack:\s+(.*)/
252
252
  command = $1
@@ -259,23 +259,42 @@ module Flapjack
259
259
  action = results[:action]
260
260
 
261
261
  if msg || action
262
- say(stanza.from.stripped, msg, :chat)
263
- logger.debug("Sent to #{stanza.from.stripped}: #{msg}")
264
- action.call if action
262
+ EventMachine::Synchrony.next_tick do
263
+ @logger.info("Sending to #{stanza.from.stripped}: #{msg}")
264
+ say(stanza.from.stripped, msg, :chat)
265
+ action.call if action
266
+ end
265
267
  end
266
268
  end
267
269
 
268
- # returning true to prevent the reactor loop from stopping
269
- def on_disconnect(stanza)
270
- return true if should_quit? && @shutting_down
271
- logger.warn("jabbers disconnected! reconnecting in 1 second ...")
272
- EventMachine::Timer.new(1) do
270
+ def connect_with_retry
271
+ attempt = 0
272
+ delay = 2
273
+ begin
274
+ attempt += 1
275
+ delay = 10 if attempt > 10
276
+ delay = 60 if attempt > 60
277
+ EventMachine::Synchrony.sleep(delay || 3) if attempt > 1
278
+ @logger.debug("attempting connection to the jabber server")
273
279
  connect # Blather::Client.connect
280
+ rescue StandardError => detail
281
+ @logger.error("unable to connect to the jabber server (attempt #{attempt}), retrying in #{delay} seconds ...")
282
+ @logger.error("detail: #{detail.message}")
283
+ @logger.debug(detail.backtrace.join("\n"))
284
+ retry unless @should_quit
274
285
  end
286
+ end
287
+
288
+ # returning true to prevent the reactor loop from stopping
289
+ def on_disconnect(stanza)
290
+ @logger.warn("disconnect handler called")
291
+ return true if @should_quit
292
+ @logger.warn("jabbers disconnected! reconnecting after a short deley ...")
293
+ connect_with_retry
275
294
  true
276
295
  end
277
296
 
278
- def say(to, msg, using = :chat)
297
+ def say(to, msg, using = :chat, tick = true)
279
298
  stanza = Blather::Stanza::Message.new(to, msg, using)
280
299
  if connected?
281
300
  @logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
@@ -286,45 +305,40 @@ module Flapjack
286
305
  end
287
306
  end
288
307
 
289
- def add_shutdown_event(opts = {})
290
- return unless redis = opts[:redis]
291
- redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
292
- end
293
-
294
- def main
295
- logger.debug("New Jabber pikelet with the following options: #{@config.inspect}")
296
-
297
- count_timer = EM::Synchrony.add_periodic_timer(30) do
298
- logger.debug("connection count: #{EM.connection_count} #{Time.now.to_s}.#{Time.now.usec.to_s}")
299
- end
308
+ def start
309
+ @logger.info("starting")
310
+ @logger.debug("new jabber pikelet with the following options: #{@config.inspect}")
300
311
 
301
312
  keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
302
- logger.debug("calling keepalive on the jabber connection")
303
- write(' ') if connected?
313
+ @logger.debug("calling keepalive on the jabber connection")
314
+ if connected?
315
+ EventMachine::Synchrony.next_tick do
316
+ write(' ')
317
+ end
318
+ end
304
319
  end
305
320
 
306
321
  setup
307
- connect # Blather::Client.connect
322
+ connect_with_retry
308
323
 
309
324
  # simplified to use a single queue only as it makes the shutdown logic easier
310
325
  queue = @config['queue']
311
326
  events = {}
312
327
 
313
- until should_quit? && @shutting_down
328
+ until @should_quit
314
329
 
315
330
  # FIXME: should also check if presence has been established in any group chat rooms that are
316
331
  # configured before starting to process events, otherwise the first few may get lost (send
317
332
  # before joining the group chat rooms)
318
333
  if connected?
319
- logger.debug("jabber is connected so commencing blpop on #{queue}")
334
+ @logger.debug("jabber is connected so commencing blpop on #{queue}")
320
335
  events[queue] = @redis.blpop(queue, 0)
321
336
  event = Yajl::Parser.parse(events[queue][1])
322
337
  type = event['notification_type'] || 'unknown'
323
- logger.debug('jabber notification event received')
324
- logger.debug(event.inspect)
338
+ @logger.debug('jabber notification event received')
339
+ @logger.debug(event.inspect)
325
340
  if 'shutdown'.eql?(type)
326
- if should_quit?
327
- @shutting_down = true
341
+ if @should_quit
328
342
  EventMachine::Synchrony.next_tick do
329
343
  # get delays without the next_tick
330
344
  close # Blather::Client.close
@@ -337,7 +351,7 @@ module Flapjack
337
351
  duration = event['duration'] ? time_period_in_words(event['duration']) : '4 hours'
338
352
  address = event['address']
339
353
 
340
- logger.debug("processing jabber notification address: #{address}, event: #{entity}:#{check}, state: #{state}, summary: #{summary}")
354
+ @logger.debug("processing jabber notification address: #{address}, event: #{entity}:#{check}, state: #{state}, summary: #{summary}")
341
355
 
342
356
  ack_str =
343
357
  event['event_count'] &&
@@ -371,12 +385,11 @@ module Flapjack
371
385
  end
372
386
  end
373
387
  else
374
- logger.debug("not connected, sleep 1 before retry")
388
+ @logger.debug("not connected, sleep 1 before retry")
375
389
  EM::Synchrony.sleep(1)
376
390
  end
377
391
  end
378
392
 
379
- count_timer.cancel
380
393
  keepalive_timer.cancel
381
394
  end
382
395