flapjack 0.6.39 → 0.6.40

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +5 -1
  3. data/README.md +3 -2
  4. data/Rakefile +2 -1
  5. data/bin/flapjack +2 -2
  6. data/bin/flapjack-nagios-receiver +2 -8
  7. data/bin/flapjack-populator +11 -11
  8. data/etc/flapjack_config.yaml.example +28 -0
  9. data/features/steps/events_steps.rb +1 -1
  10. data/features/steps/notifications_steps.rb +7 -4
  11. data/features/support/env.rb +17 -6
  12. data/flapjack.gemspec +1 -0
  13. data/lib/flapjack/api.rb +72 -28
  14. data/lib/flapjack/configuration.rb +9 -1
  15. data/lib/flapjack/coordinator.rb +138 -162
  16. data/lib/flapjack/data/contact.rb +3 -1
  17. data/lib/flapjack/data/entity.rb +10 -1
  18. data/lib/flapjack/data/entity_check.rb +19 -21
  19. data/lib/flapjack/data/event.rb +26 -27
  20. data/lib/flapjack/data/message.rb +45 -0
  21. data/lib/flapjack/data/notification.rb +49 -0
  22. data/lib/flapjack/executive.rb +53 -74
  23. data/lib/flapjack/filters/acknowledgement.rb +14 -11
  24. data/lib/flapjack/jabber.rb +84 -18
  25. data/lib/flapjack/notification/email.rb +67 -37
  26. data/lib/flapjack/notification/sms.rb +40 -28
  27. data/lib/flapjack/oobetet.rb +1 -1
  28. data/lib/flapjack/pagerduty.rb +24 -15
  29. data/lib/flapjack/patches.rb +3 -1
  30. data/lib/flapjack/pikelet.rb +51 -20
  31. data/lib/flapjack/rack_logger.rb +8 -0
  32. data/lib/flapjack/version.rb +1 -1
  33. data/lib/flapjack/web.rb +51 -27
  34. data/spec/lib/flapjack/api_spec.rb +28 -3
  35. data/spec/lib/flapjack/coordinator_spec.rb +69 -43
  36. data/spec/lib/flapjack/data/contact_spec.rb +17 -9
  37. data/spec/lib/flapjack/data/entity_check_spec.rb +0 -25
  38. data/spec/lib/flapjack/data/entity_spec.rb +4 -0
  39. data/spec/lib/flapjack/data/global_spec.rb +6 -0
  40. data/spec/lib/flapjack/data/message_spec.rb +6 -0
  41. data/spec/lib/flapjack/data/notification_spec.rb +6 -0
  42. data/spec/lib/flapjack/executive_spec.rb +2 -2
  43. data/spec/lib/flapjack/jabber_spec.rb +8 -9
  44. data/spec/lib/flapjack/pagerduty_spec.rb +53 -45
  45. data/spec/lib/flapjack/utility_spec.rb +55 -0
  46. data/spec/lib/flapjack/web_spec.rb +7 -5
  47. data/tasks/events.rake +26 -59
  48. data/tasks/profile.rake +366 -0
  49. metadata +30 -19
  50. data/lib/flapjack/notification/common.rb +0 -23
  51. data/lib/flapjack/persistence/couch.rb +0 -5
  52. data/lib/flapjack/persistence/couch/connection.rb +0 -66
  53. data/lib/flapjack/persistence/couch/couch.rb +0 -63
  54. data/lib/flapjack/persistence/data_mapper.rb +0 -3
  55. data/lib/flapjack/persistence/data_mapper/data_mapper.rb +0 -67
  56. data/lib/flapjack/persistence/data_mapper/models/check.rb +0 -90
  57. data/lib/flapjack/persistence/data_mapper/models/check_template.rb +0 -20
  58. data/lib/flapjack/persistence/data_mapper/models/event.rb +0 -19
  59. data/lib/flapjack/persistence/data_mapper/models/node.rb +0 -18
  60. data/lib/flapjack/persistence/data_mapper/models/related_check.rb +0 -15
  61. data/lib/flapjack/persistence/sqlite3.rb +0 -3
  62. data/lib/flapjack/persistence/sqlite3/sqlite3.rb +0 -166
  63. data/lib/flapjack/transports/beanstalkd.rb +0 -50
  64. data/lib/flapjack/transports/result.rb +0 -58
  65. data/lib/flapjack/worker/application.rb +0 -121
  66. data/lib/flapjack/worker/cli.rb +0 -49
@@ -56,7 +56,15 @@ module Flapjack
56
56
  config_env['redis'][k] = v
57
57
  end
58
58
 
59
- config_env
59
+ redis_path = (config_env['redis']['path'] || nil)
60
+ base_opts = {:db => (config_env['redis']['db'] || 0)}
61
+ redis_config = base_opts.merge(
62
+ (redis_path ? { :path => redis_path } :
63
+ { :host => (config_env['redis']['host'] || '127.0.0.1'),
64
+ :port => (config_env['redis']['port'] || 6379)})
65
+ )
66
+
67
+ return config_env, redis_config
60
68
  end
61
69
 
62
70
  end
@@ -30,8 +30,9 @@ module Flapjack
30
30
 
31
31
  include Flapjack::Daemonizable
32
32
 
33
- def initialize(config = {})
33
+ def initialize(config, redis_options)
34
34
  @config = config
35
+ @redis_options = redis_options
35
36
  @pikelets = []
36
37
 
37
38
  @logger = Log4r::Logger.new("flapjack-coordinator")
@@ -42,59 +43,50 @@ module Flapjack
42
43
  def start(options = {})
43
44
  @signals = options[:signals]
44
45
  if options[:daemonize]
46
+ @signals = options[:signals]
45
47
  daemonize
46
48
  else
47
- setup
49
+ run(:signals => options[:signals])
48
50
  end
49
51
  end
50
52
 
51
53
  def after_daemonize
52
- setup
54
+ run(:signals => @signals)
53
55
  end
54
56
 
55
57
  def stop
58
+ return if @stopping
59
+ @stopping = true
56
60
  shutdown
57
61
  end
58
62
 
59
63
  private
60
64
 
61
- def setup
65
+ # map from config key to pikelet class
66
+ PIKELET_TYPES = {'executive' => Flapjack::Executive,
67
+ 'jabber_gateway' => Flapjack::Jabber,
68
+ 'pagerduty_gateway' => Flapjack::Pagerduty,
69
+ 'oobetet' => Flapjack::Oobetet,
62
70
 
63
- # FIXME: the following is currently repeated in flapjack-populator and
64
- # flapjack-nagios-receiver - move to a method in a module and include it
65
- redis_host = @config['redis']['host'] || '127.0.0.1'
66
- redis_port = @config['redis']['port'] || 6379
67
- redis_path = @config['redis']['path'] || nil
68
- redis_db = @config['redis']['db'] || 0
71
+ 'web' => Flapjack::Web,
72
+ 'api' => Flapjack::API,
69
73
 
70
- if redis_path
71
- @redis_options = { :db => redis_db, :path => redis_path }
72
- else
73
- @redis_options = { :db => redis_db, :host => redis_host, :port => redis_port }
74
- end
74
+ 'email_notifier' => Flapjack::Notification::Email,
75
+ 'sms_notifier' => Flapjack::Notification::Sms}
76
+
77
+ def run(options = {})
75
78
 
76
79
  EM.synchrony do
77
80
  @logger.debug "config keys: #{@config.keys}"
78
81
 
79
- pikelet_keys = ['executive', 'jabber_gateway', 'pagerduty_gateway',
80
- 'email_notifier', 'sms_notifier', 'web', 'api',
81
- 'oobetet']
82
-
83
82
  @config.keys.each do |pikelet_type|
84
- next unless pikelet_keys.include?(pikelet_type) &&
83
+ next unless PIKELET_TYPES.keys.include?(pikelet_type) &&
85
84
  @config[pikelet_type].is_a?(Hash) &&
86
85
  @config[pikelet_type]['enabled']
87
86
  @logger.debug "coordinator is now initialising the #{pikelet_type} pikelet"
88
87
  pikelet_cfg = @config[pikelet_type]
89
88
 
90
- case pikelet_type
91
- when 'executive', 'jabber_gateway', 'pagerduty_gateway', 'oobetet'
92
- build_pikelet(pikelet_type, pikelet_cfg)
93
- when 'web', 'api'
94
- build_thin_pikelet(pikelet_type, pikelet_cfg)
95
- when 'email_notifier', 'sms_notifier'
96
- build_resque_pikelet(pikelet_type, pikelet_cfg)
97
- end
89
+ build_pikelet(pikelet_type, pikelet_cfg)
98
90
  end
99
91
 
100
92
  setup_signals if @signals
@@ -102,186 +94,166 @@ module Flapjack
102
94
 
103
95
  end
104
96
 
97
+ # the global nature of this seems at odds with it calling stop
98
+ # within a single coordinator instance. Coordinator is essentially
99
+ # a singleton anyway...
105
100
  def setup_signals
106
- trap('INT') { stop }
107
- trap('TERM') { stop }
108
- unless RUBY_PLATFORM =~ /mswin/
109
- trap('QUIT') { stop }
110
- # trap('HUP') { }
101
+ Kernel.trap('INT') { stop }
102
+ Kernel.trap('TERM') { stop }
103
+ unless RbConfig::CONFIG['host_os'] =~ /mswin|windows|cygwin/i
104
+ Kernel.trap('QUIT') { stop }
105
+ # Kernel.trap('HUP') { }
111
106
  end
112
107
  end
113
108
 
114
109
  def build_pikelet(pikelet_type, pikelet_cfg)
115
- pikelet_class = case pikelet_type
116
- when 'executive'
117
- Flapjack::Executive
118
- when 'jabber_gateway'
119
- Flapjack::Jabber
120
- when 'pagerduty_gateway'
121
- Flapjack::Pagerduty
122
- when 'oobetet'
123
- Flapjack::Oobetet
124
- end
125
- return unless pikelet_class
126
-
127
- pikelet = pikelet_class.new
128
- f = Fiber.new {
129
- begin
130
- pikelet.bootstrap(:redis => @redis_options, :config => pikelet_cfg)
131
- pikelet.main
132
- rescue Exception => e
133
- trace = e.backtrace.join("\n")
134
- @logger.fatal "#{e.message}\n#{trace}"
135
- stop
136
- end
137
- }
138
- @pikelets << {:fiber => f, :type => pikelet_type, :instance => pikelet}
139
- f.resume
140
- @logger.debug "new fiber created for #{pikelet_type}"
141
- end
110
+ return unless pikelet_class = PIKELET_TYPES[pikelet_type]
142
111
 
143
- def build_thin_pikelet(pikelet_type, pikelet_cfg)
144
- pikelet_class = case pikelet_type
145
- when 'web'
146
- Flapjack::Web
147
- when 'api'
148
- Flapjack::API
149
- end
150
- return unless pikelet_class
151
-
152
- port = nil
153
- if pikelet_cfg['port']
154
- port = pikelet_cfg['port'].to_i
155
- end
112
+ inc_mod = pikelet_class.included_modules
113
+ ext_mod = extended_modules(pikelet_class)
156
114
 
157
- port = 3001 if (port.nil? || port <= 0 || port > 65535)
115
+ pikelet = nil
116
+ fiber = nil
158
117
 
159
- pikelet_class.class_variable_set('@@redis',
160
- Flapjack::RedisPool.new(:config => @redis_options))
118
+ if inc_mod.include?(Flapjack::GenericPikelet)
119
+ pikelet = pikelet_class.new
120
+ pikelet.bootstrap(:config => pikelet_cfg, :redis_config => @redis_options)
161
121
 
162
- Thin::Logging.silent = true
122
+ else
123
+ pikelet_class.bootstrap(:config => pikelet_cfg, :redis_config => @redis_options)
163
124
 
164
- pikelet = Thin::Server.new('0.0.0.0', port, pikelet_class, :signals => false)
165
- @pikelets << {:instance => pikelet, :type => pikelet_type}
166
- pikelet.start
167
- @logger.debug "new thin server instance started for #{pikelet_type}"
168
- end
125
+ if ext_mod.include?(Flapjack::ThinPikelet)
169
126
 
170
- def build_resque_pikelet(pikelet_type, pikelet_cfg)
171
- pikelet_class = case pikelet_type
172
- when 'email_notifier'
173
- Flapjack::Notification::Email
174
- when 'sms_notifier'
175
- Flapjack::Notification::Sms
176
- end
177
- return unless pikelet_class
178
-
179
- # set up connection pooling, stop resque errors (ensure that it's only
180
- # done once)
181
- @resque_pool = nil
182
- if (['email_notifier', 'sms_notifier'] & @pikelets.collect {|p| p[:type]}).empty?
183
- pool = Flapjack::RedisPool.new(:config => @redis_options)
184
- ::Resque.redis = pool
185
- @resque_pool = pool
186
- ## NB: can override the default 'resque' namespace like this
187
- #::Resque.redis.namespace = 'flapjack'
188
- end
127
+ unless @thin_silenced
128
+ Thin::Logging.silent = true
129
+ @thin_silenced = true
130
+ end
189
131
 
190
- # See https://github.com/mikel/mail/blob/master/lib/mail/mail.rb#L53
191
- # & https://github.com/mikel/mail/blob/master/spec/mail/configuration_spec.rb
192
- # for details of configuring mail gem. defaults to SMTP, localhost, port 25
132
+ pikelet = Thin::Server.new('0.0.0.0',
133
+ pikelet_class.instance_variable_get('@port'),
134
+ pikelet_class, :signals => false)
193
135
 
194
- if pikelet_type.eql?('email_notifier')
195
- smtp_config = {}
136
+ elsif ext_mod.include?(Flapjack::ResquePikelet)
196
137
 
197
- if pikelet_cfg['smtp_config']
198
- smtp_config = pikelet_cfg['smtp_config'].keys.inject({}) do |ret,obj|
199
- ret[obj.to_sym] = pikelet_cfg['smtp_config'][obj]
200
- ret
138
+ # set up connection pooling, stop resque errors
139
+ unless @resque_pool
140
+ @resque_pool = Flapjack::RedisPool.new(:config => @redis_options)
141
+ ::Resque.redis = @resque_pool
142
+ ## NB: can override the default 'resque' namespace like this
143
+ #::Resque.redis.namespace = 'flapjack'
201
144
  end
145
+
146
+ # TODO error if pikelet_cfg['queue'].nil?
147
+ pikelet = EM::Resque::Worker.new(pikelet_cfg['queue'])
148
+ # # Use these to debug the resque workers
149
+ # pikelet.verbose = true
150
+ # pikelet.very_verbose = true
202
151
  end
203
152
 
204
- Mail.defaults {
205
- delivery_method :smtp, {:enable_starttls_auto => false}.merge(smtp_config)
153
+ end
154
+
155
+ pikelet_info = {:class => pikelet_class, :instance => pikelet}
156
+
157
+ if inc_mod.include?(Flapjack::GenericPikelet) ||
158
+ ext_mod.include?(Flapjack::ResquePikelet)
159
+
160
+ fiber = Fiber.new {
161
+ begin
162
+ # Can't use local inc_mod/ext_mod variables in the new fiber
163
+ if pikelet.is_a?(Flapjack::GenericPikelet)
164
+ pikelet.main
165
+ elsif extended_modules(pikelet_class).include?(Flapjack::ResquePikelet)
166
+ pikelet.work(0.1)
167
+ end
168
+ rescue Exception => e
169
+ trace = e.backtrace.join("\n")
170
+ @logger.fatal "#{e.message}\n#{trace}"
171
+ stop
172
+ end
206
173
  }
174
+
175
+ pikelet_info[:fiber] = fiber
176
+ fiber.resume
177
+ @logger.debug "new fiber created for #{pikelet_type}"
178
+ elsif ext_mod.include?(Flapjack::ThinPikelet)
179
+ pikelet.start
180
+ @logger.debug "new thin server instance started for #{pikelet_type}"
207
181
  end
208
182
 
209
- pikelet_class.class_variable_set('@@config', pikelet_cfg)
210
-
211
- # TODO error if pikelet_cfg['queue'].nil?
212
- pikelet = EM::Resque::Worker.new(pikelet_cfg['queue'])
213
- # # Use these to debug the resque workers
214
- # pikelet.verbose = true
215
- # pikelet.very_verbose = true
216
-
217
- f = Fiber.new {
218
- begin
219
- pikelet.work(0.1)
220
- rescue Exception => e
221
- trace = e.backtrace.join("\n")
222
- @logger.fatal "#{e.message}\n#{trace}"
223
- stop
183
+ @pikelets << pikelet_info
184
+ end
185
+
186
+ # only prints state changes, otherwise pikelets not closing promptly can
187
+ # cause everything else to be spammy
188
+ def health_check
189
+ @pikelets.each do |pik|
190
+ status = if extended_modules(pik[:class]).include?(Flapjack::ThinPikelet)
191
+ pik[:instance].backend.size > 0 ? 'running' : 'stopped'
192
+ elsif pik[:fiber]
193
+ pik[:fiber].alive? ? 'running' : 'stopped'
224
194
  end
225
- }
226
- @pikelets << {:fiber => f, :type => pikelet_type, :instance => pikelet}
227
- f.resume
228
- @logger.debug "new fiber created for #{pikelet_type}"
195
+ next if pik.has_key?(:status) && pik[:status].eql?(status)
196
+ @logger.info "#{pik[:class].name}: #{status}"
197
+ pik[:status] = status
198
+ end
229
199
  end
230
200
 
231
- # # TODO rewrite to be less spammy -- print only initial state and changes
232
- # def health_check
233
- # @pikelets.each do |pik|
234
- # if pik[:instance].is_a?(Thin::Server)
235
- # s = pik[:instance].backend.size
236
- # @logger.debug "thin on port #{pik[:instance].port} - #{s} connections"
237
- # elsif pik[:fiber]
238
- # @logger.debug "#{pik[:type]}: #{pik[:fiber].alive? ? 'alive' : 'dead'}"
239
- # end
240
- # end
241
- # end
242
-
243
- # TODO whem merged with other changes, have this check pik[:class] instead,
244
- # makes tests neater
245
201
  def shutdown
246
202
  @pikelets.each do |pik|
247
- case pik[:instance]
248
- when Flapjack::Executive, Flapjack::Jabber, Flapjack::Pagerduty
203
+
204
+ pik_inst = pik[:instance]
205
+ ext_mod = extended_modules(pik[:class])
206
+
207
+ # would be neater if we could use something similar for the class << self
208
+ # included pikelets as well
209
+ if pik_inst.is_a?(Flapjack::GenericPikelet)
249
210
  if pik[:fiber] && pik[:fiber].alive?
250
- pik[:instance].stop
211
+ pik_inst.stop
251
212
  Fiber.new {
252
213
  # this needs to use a separate Redis connection from the pikelet's
253
214
  # one, as that's in the middle of its blpop
254
215
  r = Redis.new(@redis_options.merge(:driver => 'synchrony'))
255
- pik[:instance].add_shutdown_event(:redis => r)
216
+ pik_inst.add_shutdown_event(:redis => r)
256
217
  r.quit
257
218
  }.resume
258
219
  end
259
- when EM::Resque::Worker
220
+ elsif ext_mod.include?(Flapjack::ResquePikelet)
260
221
  # resque is polling, so we don't need a shutdown object
261
- pik[:instance].shutdown if pik[:fiber] && pik[:fiber].alive?
262
- when Thin::Server # web, api
222
+ pik_inst.shutdown if pik[:fiber] && pik[:fiber].alive?
223
+ elsif ext_mod.include?(Flapjack::ThinPikelet)
263
224
  # drop from this side, as HTTP keepalive etc. means browsers
264
225
  # keep connections alive for ages, and we'd be hanging around
265
226
  # waiting for them to drop
266
- pik[:instance].stop!
227
+ pik_inst.stop!
267
228
  end
268
229
  end
269
230
 
270
- fibers = @pikelets.collect {|p| p[:fiber] }.compact
271
- thin_pikelets = @pikelets.collect {|p| p[:instance]}.select {|i| i.is_a?(Thin::Server) }
272
-
273
231
  Fiber.new {
232
+
274
233
  loop do
275
- # health_check
276
- if fibers.any?(&:alive?) || thin_pikelets.any?{|tp| !tp.backend.empty? }
234
+ health_check
235
+
236
+ if @pikelets.any? {|p| p[:status] == 'running'}
277
237
  EM::Synchrony.sleep 0.25
278
238
  else
279
239
  @resque_pool.empty! if @resque_pool
280
240
 
281
- [Flapjack::Web, Flapjack::API].each do |klass|
282
- next unless klass.class_variable_defined?('@@redis') &&
283
- redis = klass.class_variable_get('@@redis')
284
- redis.empty!
241
+ @pikelets.each do |pik|
242
+
243
+ pik_inst = pik[:instance]
244
+ ext_mod = extended_modules(pik[:class])
245
+
246
+ if pik_inst.is_a?(Flapjack::GenericPikelet)
247
+
248
+ pik_inst.cleanup
249
+
250
+ elsif [Flapjack::ResquePikelet, Flapjack::ThinPikelet].any?{|fp|
251
+ ext_mod.include?(fp)
252
+ }
253
+
254
+ pik[:class].cleanup
255
+
256
+ end
285
257
  end
286
258
 
287
259
  EM.stop
@@ -291,6 +263,10 @@ module Flapjack
291
263
  }.resume
292
264
  end
293
265
 
266
+ def extended_modules(klass)
267
+ (class << klass; self; end).included_modules
268
+ end
269
+
294
270
  end
295
271
 
296
272
  end
@@ -24,7 +24,7 @@ module Flapjack
24
24
  contact = self.find_by_id(id, :redis => redis)
25
25
  ret << contact if contact
26
26
  ret
27
- }
27
+ }.sort_by {|c| [c.last_name, c.first_name]}
28
28
  end
29
29
 
30
30
  def self.delete_all(options = {})
@@ -41,6 +41,8 @@ module Flapjack
41
41
  raise "No id value passed" unless id
42
42
  logger = options[:logger]
43
43
 
44
+ return unless redis.hexists("contact:#{id}", 'first_name')
45
+
44
46
  fn, ln, em = redis.hmget("contact:#{id}", 'first_name', 'last_name', 'email')
45
47
  me = redis.hgetall("contact_media:#{id}")
46
48
 
@@ -13,7 +13,7 @@ module Flapjack
13
13
  redis.keys("entity_id:*").collect {|k|
14
14
  k =~ /^entity_id:(.+)$/; entity_name = $1
15
15
  self.new(:name => entity_name, :id => redis.get("entity_id:#{entity_name}"), :redis => redis)
16
- }
16
+ }.sort_by(&:name)
17
17
  end
18
18
 
19
19
  # NB: should probably be called in the context of a Redis multi block; not doing so
@@ -62,6 +62,15 @@ module Flapjack
62
62
  self.new(:name => entity_name, :id => entity_id, :redis => redis)
63
63
  end
64
64
 
65
+ def self.find_all_name_matching(pattern, options = {})
66
+ raise "Redis connection not set" unless redis = options[:redis]
67
+ matched_entities = redis.keys('check:*').collect {|check|
68
+ a, entity, c = check.split(':')
69
+ match = (entity =~ /#{pattern}/) ? entity : nil
70
+ }
71
+ matched_entities.compact.sort.uniq
72
+ end
73
+
65
74
  def check_list
66
75
  @redis.keys("check:#{@name}:*").map {|k| k =~ /^check:#{@name}:(.+)$/; $1}
67
76
  end