flapjack 0.6.37 → 0.6.38

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml CHANGED
@@ -5,6 +5,6 @@ gemfile:
5
5
  - Gemfile
6
6
  services:
7
7
  - redis-server
8
- # uncomment this line if your project needs to run something other than `rake`:
9
- # script: bundle exec rspec spec
10
-
8
+ before_script:
9
+ - mkdir -p ./log
10
+ script: bundle exec rspec spec && bundle exec cucumber features
data/README.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Flapjack
2
2
  ========
3
3
 
4
+ [![Travis CI Status][id_travis_img]][id_travis_link]
5
+
6
+ [id_travis_link]: https://secure.travis-ci.org/#!/flpjck/flapjack
7
+ [id_travis_img]: https://secure.travis-ci.org/flpjck/flapjack.png
8
+
4
9
  Flapjack is a highly scalable and distributed monitoring notification system.
5
10
 
6
11
  Flapjack provides a scalable method for dealing with events representing changes in system state (OK -> WARNING -> CRITICAL transitions) and alerting appropriate people as necessary.
@@ -21,6 +21,7 @@ require 'flapjack/oobetet'
21
21
  require 'flapjack/pagerduty'
22
22
  require 'flapjack/notification/email'
23
23
  require 'flapjack/notification/sms'
24
+ require 'flapjack/redis_pool'
24
25
  require 'flapjack/web'
25
26
 
26
27
  module Flapjack
@@ -158,12 +159,13 @@ module Flapjack
158
159
 
159
160
  port = 3001 if (port.nil? || port <= 0 || port > 65535)
160
161
 
161
- pikelet_class.class_variable_set('@@redis', build_redis_connection_pool)
162
+ pool = Flapjack::RedisPool.new(:config => @redis_options)
163
+ pikelet_class.class_variable_set('@@redis', pool)
162
164
 
163
165
  Thin::Logging.silent = true
164
166
 
165
167
  pikelet = Thin::Server.new('0.0.0.0', port, pikelet_class, :signals => false)
166
- @pikelets << {:instance => pikelet, :type => pikelet_type}
168
+ @pikelets << {:instance => pikelet, :type => pikelet_type, :pool => pool}
167
169
  pikelet.start
168
170
  @logger.debug "new thin server instance started for #{pikelet_type}"
169
171
  end
@@ -179,8 +181,10 @@ module Flapjack
179
181
 
180
182
  # set up connection pooling, stop resque errors (ensure that it's only
181
183
  # done once)
184
+ pool = nil
182
185
  if (['email_notifier', 'sms_notifier'] & @pikelets.collect {|p| p[:type]}).empty?
183
- ::Resque.redis = build_redis_connection_pool
186
+ pool = Flapjack::RedisPool.new(:config => @redis_options)
187
+ ::Resque.redis = pool
184
188
  ## NB: can override the default 'resque' namespace like this
185
189
  #::Resque.redis.namespace = 'flapjack'
186
190
  end
@@ -221,17 +225,13 @@ module Flapjack
221
225
  stop
222
226
  end
223
227
  }
224
- @pikelets << {:fiber => f, :type => pikelet_type}
228
+ pikelet_values = {:fiber => f, :type => pikelet_type}
229
+ pikelet_values[:pool] = pool if pool
230
+ @pikelets << pikelet_values
225
231
  f.resume
226
232
  @logger.debug "new fiber created for #{pikelet_type}"
227
233
  end
228
234
 
229
- def build_redis_connection_pool(options = {})
230
- EventMachine::Synchrony::ConnectionPool.new(:size => options[:size] || 5) do
231
- ::Redis.new(@redis_options)
232
- end
233
- end
234
-
235
235
  # # TODO rewrite to be less spammy -- print only initial state and changes
236
236
  # def health_check
237
237
  # @pikelets.each do |pik|
@@ -259,10 +259,8 @@ module Flapjack
259
259
  }.resume
260
260
  end
261
261
  when EM::Resque::Worker
262
- if pik[:fiber] && pik[:fiber].alive?
263
- # resque is polling, so we don't need a shutdown object
264
- pik[:instance].shutdown
265
- end
262
+ # resque is polling, so we don't need a shutdown object
263
+ pik[:instance].shutdown if pik[:fiber] && pik[:fiber].alive?
266
264
  when Thin::Server # web, api
267
265
  # drop from this side, as HTTP keepalive etc. means browsers
268
266
  # keep connections alive for ages, and we'd be hanging around
@@ -280,6 +278,16 @@ module Flapjack
280
278
  if fibers.any?(&:alive?) || thin_pikelets.any?{|tp| !tp.backend.empty? }
281
279
  EM::Synchrony.sleep 0.25
282
280
  else
281
+ @pikelets.select {|p| thin_pikelets.include?(p) }.each do |tp|
282
+ tp[:pool].empty!
283
+ end
284
+
285
+ rsq_p = @pikelets.detect {|p|
286
+ ['email_notifier', 'sms_notifier'].include?(p[:type]) && !p[:pool].nil?
287
+ }
288
+
289
+ rsq_p[:pool].empty! if rsq_p
290
+
283
291
  EM.stop
284
292
  break
285
293
  end
@@ -3,6 +3,8 @@
3
3
  # NB: use of redis.keys probably indicates we should maintain a data
4
4
  # structure to avoid the need for this type of query
5
5
 
6
+ require 'set'
7
+
6
8
  module Flapjack
7
9
 
8
10
  module Data
@@ -85,18 +87,35 @@ module Flapjack
85
87
  merge('service_key' => service_key)
86
88
  end
87
89
 
88
- def entities
89
- @redis.keys('contacts_for:*').inject([]) {|ret, k|
90
+ def entities_and_checks
91
+ @redis.keys('contacts_for:*').inject({}) {|ret, k|
90
92
  if @redis.sismember(k, self.id)
91
- k =~ /^contacts_for:(.+)$/
92
- entity_id = $1
93
- if entity_name = @redis.hget("entity:#{entity_id}", 'name')
94
- ret << Flapjack::Data::Entity.new(:name => entity_name,
95
- :id => entity_id, :redis => @redis)
93
+ if k =~ /^contacts_for:([a-zA-Z0-9][a-zA-Z0-9\.\-]*[a-zA-Z0-9])(?::(\w+))?$/
94
+ entity_id = $1
95
+ check = $2
96
+
97
+ unless ret.has_key?(entity_id)
98
+ ret[entity_id] = {}
99
+ if entity_name = @redis.hget("entity:#{entity_id}", 'name')
100
+ entity = Flapjack::Data::Entity.new(:name => entity_name,
101
+ :id => entity_id, :redis => @redis)
102
+ ret[entity_id][:entity] = entity
103
+ end
104
+ # using a set to ensure unique check values
105
+ ret[entity_id][:checks] = Set.new
106
+ end
107
+
108
+ if check
109
+ # just add this check for the entity
110
+ ret[entity_id][:checks] |= check
111
+ else
112
+ # registered for the entity so add all checks
113
+ ret[entity_id][:checks] |= entity.check_list
114
+ end
96
115
  end
97
116
  end
98
117
  ret
99
- }
118
+ }.values
100
119
  end
101
120
 
102
121
  def name
@@ -45,7 +45,10 @@ module Flapjack
45
45
  @filters << Flapjack::Filters::Delays.new(options)
46
46
  @filters << Flapjack::Filters::Acknowledgement.new(options)
47
47
 
48
- @boot_time = Time.now
48
+ @boot_time = Time.now
49
+ @fqdn = `/bin/hostname -f`.chomp
50
+ @pid = Process.pid
51
+ @instance_id = "#{@fqdn}:#{@pid}"
49
52
 
50
53
  # FIXME: all of the below keys assume there is only ever one executive running;
51
54
  # we could generate a fuid and save it to disk, and prepend it from that
@@ -61,6 +64,12 @@ module Flapjack
61
64
  @redis.hset('event_counters', 'failure', 0)
62
65
  @redis.hset('event_counters', 'action', 0)
63
66
  end
67
+
68
+ @redis.zadd('executive_instances', @boot_time.to_i, @instance_id)
69
+ @redis.hset("event_counters:#{@instance_id}", 'all', 0)
70
+ @redis.hset("event_counters:#{@instance_id}", 'ok', 0)
71
+ @redis.hset("event_counters:#{@instance_id}", 'failure', 0)
72
+ @redis.hset("event_counters:#{@instance_id}", 'action', 0)
64
73
  end
65
74
 
66
75
  def main
@@ -73,6 +82,9 @@ module Flapjack
73
82
  event = Flapjack::Data::Event.next(:persistence => @redis)
74
83
  process_event(event) unless event.nil?
75
84
  end
85
+
86
+ @redis.empty! if @redis
87
+
76
88
  @logger.info("Exiting main loop.")
77
89
  end
78
90
 
@@ -123,6 +135,7 @@ module Flapjack
123
135
  result = { :skip_filters => false }
124
136
  timestamp = Time.now.to_i
125
137
  @event_count = @redis.hincrby('event_counters', 'all', 1)
138
+ @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
126
139
 
127
140
  # FIXME skip if entity_check.nil?
128
141
 
@@ -137,8 +150,10 @@ module Flapjack
137
150
 
138
151
  if event.ok?
139
152
  @redis.hincrby('event_counters', 'ok', 1)
153
+ @redis.hincrby("event_counters:#{@instance_id}", 'ok', 1)
140
154
  elsif event.failure?
141
155
  @redis.hincrby('event_counters', 'failure', 1)
156
+ @redis.hincrby("event_counters:#{@instance_id}", 'failure', 1)
142
157
  @redis.hset('unacknowledged_failures', @event_count, event.id)
143
158
  end
144
159
 
@@ -167,7 +182,8 @@ module Flapjack
167
182
  when 'action'
168
183
  # When an action event is processed, store the event.
169
184
  @redis.hset(event.id + ':actions', timestamp, event.state)
170
- @redis.hincrby('event_counters', 'action', 1) if event.ok?
185
+ @redis.hincrby('event_counters', 'action', 1)
186
+ @redis.hincrby("event_counters:#{@instance_id}", 'action', 1)
171
187
 
172
188
  if event.acknowledgement? && event.acknowledgement_id
173
189
  @redis.hdel('unacknowledged_failures', event.acknowledgement_id)
@@ -315,6 +315,9 @@ module Flapjack
315
315
 
316
316
  count_timer.cancel
317
317
  keepalive_timer.cancel
318
+
319
+ @redis.empty! if @redis
320
+ @redis_handler.empty! if @redis_handler
318
321
  end
319
322
 
320
323
  end
@@ -3,19 +3,12 @@
3
3
  #require 'socket'
4
4
 
5
5
  require 'eventmachine'
6
- # the redis/synchrony gems need to be required in this particular order, see # the redis-rb README for details
7
- #require 'hiredis'
8
6
  require 'em-synchrony'
9
- #require 'redis/connection/synchrony'
10
- #require 'redis'
11
-
12
- #require 'chronic_duration'
13
7
 
14
8
  require 'blather/client/client'
15
9
  require 'em-synchrony/fiber_iterator'
16
10
  require 'yajl/json_gem'
17
11
 
18
- #require 'flapjack/data/entity_check'
19
12
  require 'flapjack/pikelet'
20
13
  require 'flapjack/utility'
21
14
 
@@ -223,6 +223,9 @@ module Flapjack
223
223
  end
224
224
 
225
225
  acknowledgement_timer.cancel
226
+
227
+ @redis.empty! if @redis
228
+ @redis_timer.empty! if @redis_timer
226
229
  end
227
230
 
228
231
  end
@@ -12,6 +12,8 @@ require 'log4r'
12
12
  require 'log4r/outputter/consoleoutputters'
13
13
  require 'log4r/outputter/syslogoutputter'
14
14
 
15
+ require 'flapjack/redis_pool'
16
+
15
17
  module Flapjack
16
18
  module Pikelet
17
19
  attr_accessor :logger, :redis, :config
@@ -27,9 +29,7 @@ module Flapjack
27
29
  def build_redis_connection_pool(options = {})
28
30
  return unless @bootstrapped
29
31
  if defined?(EventMachine) && defined?(EventMachine::Synchrony)
30
- EventMachine::Synchrony::ConnectionPool.new(:size => options[:size] || 5) do
31
- ::Redis.new(@redis_config)
32
- end
32
+ Flapjack::RedisPool.new(:config => @redis_config, :size => options[:size])
33
33
  else
34
34
  ::Redis.new(@redis_config)
35
35
  end
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'eventmachine'
4
+ # the redis/synchrony gems need to be required in this particular order, see
5
+ # the redis-rb README for details
6
+ require 'hiredis'
7
+ require 'em-synchrony'
8
+ require 'redis/connection/synchrony'
9
+ require 'redis'
10
+
11
+ # require 'eventmachine/synchrony/connection_pool'
12
+
13
+ module Flapjack
14
+ class RedisPool < EventMachine::Synchrony::ConnectionPool
15
+
16
+ def initialize(opts = {})
17
+ config = opts.delete(:config)
18
+ super(:size => opts[:size] || 5) {
19
+ ::Redis.new(config)
20
+ }
21
+ end
22
+
23
+ def empty!
24
+ f = Fiber.current
25
+
26
+ until @available.empty? && @pending.empty?
27
+ begin
28
+ conn = acquire(f)
29
+ conn.quit
30
+ @available.delete(conn)
31
+ ensure
32
+ if pending = @pending.shift
33
+ pending.resume
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  module Flapjack
4
- VERSION = "0.6.37"
4
+ VERSION = "0.6.38"
5
5
  end
data/lib/flapjack/web.rb CHANGED
@@ -196,7 +196,9 @@ module Flapjack
196
196
  @pagerduty_credentials = @contact.pagerduty_credentials
197
197
  end
198
198
 
199
- @entities = @contact.entities
199
+ @entities_and_checks = @contact.entities_and_checks.sort_by {|ec|
200
+ ec[:entity].name
201
+ }
200
202
 
201
203
  haml :contact
202
204
  end
@@ -232,18 +234,21 @@ module Flapjack
232
234
  end
233
235
 
234
236
  def self_stats
237
+ @fqdn = `/bin/hostname -f`.chomp
238
+ @pid = Process.pid
239
+ @instance_id = "#{@fqdn}:#{@pid}"
240
+
235
241
  @keys = @@redis.keys '*'
236
- @count_failing_checks = @@redis.zcard 'failed_checks'
237
- @count_all_checks = @@redis.keys('check:*:*').length
238
- @event_counter_all = @@redis.hget('event_counters', 'all')
239
- @event_counter_ok = @@redis.hget('event_counters', 'ok')
240
- @event_counter_failure = @@redis.hget('event_counters', 'failure')
241
- @event_counter_action = @@redis.hget('event_counters', 'action')
242
- @boot_time = Time.at(@@redis.get('boot_time').to_i)
243
- @uptime = Time.now.to_i - @boot_time.to_i
244
- @uptime_string = time_period_in_words(@uptime)
245
- @event_rate_all = (@uptime > 0) ?
246
- (@event_counter_all.to_f / @uptime) : 0
242
+ @count_failing_checks = @@redis.zcard 'failed_checks'
243
+ @count_all_checks = @@redis.keys('check:*:*').length
244
+ @executive_instances = @@redis.zrange('executive_instances', '0', '-1', :withscores => true)
245
+ @event_counters = @@redis.hgetall('event_counters')
246
+ @event_counters_instance = @@redis.hgetall("event_counters:#{@instance_id}")
247
+ @boot_time = Time.at(@@redis.zscore('executive_instances', @instance_id).to_i)
248
+ @uptime = Time.now.to_i - @boot_time.to_i
249
+ @uptime_string = time_period_in_words(@uptime)
250
+ @event_rate_all = (@uptime > 0) ?
251
+ (@event_counters_instance['all'].to_f / @uptime) : 0
247
252
  @events_queued = @@redis.llen('events')
248
253
  end
249
254
 
@@ -11,40 +11,36 @@
11
11
  %div#wrapper
12
12
  = nav
13
13
  %h1= @contact.name
14
- - if @entities && !@entities.empty?
14
+ - if @contact.media && !@contact.media.empty?
15
+ %ul
16
+ - @contact.media.each_pair do |mk, mv|
17
+ - if 'pagerduty'.eql?(mk)
18
+ %li= "PagerDuty: "
19
+ %ul
20
+ - @pagerduty_credentials.each_pair do |pk, pv|
21
+ - if 'password'.eql?(pk)
22
+ %li= "#{pk}: ..."
23
+ - else
24
+ %li= "#{pk}: #{pv}"
25
+ - else
26
+ %li= "#{mk.capitalize}: #{mv}"
27
+ - else
28
+ %p No media
29
+ - if @entities_and_checks && !@entities_and_checks.empty?
15
30
  %table
16
31
  %tr
17
32
  %th Entity name
18
33
  %th Checks
19
- %th Media
20
- - @entities.sort_by(&:name).each do |entity|
34
+ - @entities_and_checks.each do |ec|
35
+ - entity = ec[:entity]
36
+ - checks = ec[:checks]
21
37
  %tr
22
38
  %td
23
39
  %p= entity.name
24
40
  %td
25
- - checks = entity.check_list
26
- - if !checks.empty?
27
- - checks.each do |check|
28
- - link = "/check?entity=#{entity.name}&check=#{check}"
29
- %p
30
- %a(title='check status' href=link) #{check}
31
- - else
32
- %p No checks
33
- %td
34
- - if @contact.media && !@contact.media.empty?
35
- %ul
36
- - @contact.media.each_pair do |mk, mv|
37
- - if 'pagerduty'.eql?(mk)
38
- %li= "PagerDuty: "
39
- %ul
40
- - @pagerduty_credentials.each_pair do |pk, pv|
41
- - if 'password'.eql?(pk)
42
- %li= "#{pk}: ..."
43
- - else
44
- %li= "#{pk}: #{pv}"
45
- - else
46
- %li= "#{mk.capitalize}: #{mv}"
47
- - else
48
- %p No media
41
+ - checks.each do |check|
42
+ - link = "/check?entity=#{entity.name}&check=#{check}"
43
+ %p
44
+ %a(title='check status' href=link) #{check}
49
45
  - else
50
46
  %p No entities
@@ -17,9 +17,11 @@
17
17
  %h4
18
18
  Number of failing services:
19
19
  = @count
20
- %p Events processed: #{@event_counter_all} (ok: #{@event_counter_ok}, failure: #{@event_counter_failure}, action: #{@event_counter_action})
21
- %p Average rate: #{@event_rate_all} events per second
20
+ %p Events processed (all time): #{@event_counters['all']} (ok: #{@event_counters['ok']}, failure: #{@event_counters['failure']}, action: #{@event_counters['action']})
21
+ %p Events processed (this instance): #{@event_counters_instance['all']} (ok: #{@event_counters_instance['ok']}, failure: #{@event_counters_instance['failure']}, action: #{@event_counters_instance['action']})
22
+ %p Average rate (this instance): #{@event_rate_all} events per second
22
23
  %p Total keys in redis: #{@keys.length}
23
24
  %p Uptime: #{@uptime_string}
24
25
  %p Boot time: #{@boot_time}
25
26
  %p Current time: #{Time.now}
27
+ %p Executive Instances: #{@executive_instances.inspect}
@@ -36,6 +36,7 @@ describe Flapjack::Executive, :redis => true do
36
36
 
37
37
  executive = Flapjack::Executive.new
38
38
  executive.bootstrap(:config => {})
39
+ @redis.should_receive(:empty!)
39
40
  executive.should_receive(:build_redis_connection_pool).and_return(@redis)
40
41
 
41
42
  # hacky, but the behaviour it's mimicking (shutdown from another thread) isn't
@@ -134,6 +134,7 @@ describe Flapjack::Jabber do
134
134
  EM::Synchrony.should_receive(:add_periodic_timer).with(60).and_return(timer_2)
135
135
 
136
136
  redis = mock('redis')
137
+ redis.should_receive(:empty!)
137
138
 
138
139
  fj = Flapjack::Jabber.new
139
140
  fj.bootstrap(:config => config)
@@ -25,11 +25,9 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
25
25
  @redis.should_receive(:keys).with('*').and_return([])
26
26
  @redis.should_receive(:zcard).with('failed_checks')
27
27
  @redis.should_receive(:keys).with('check:*:*').and_return([])
28
- @redis.should_receive(:hget).with('event_counters', 'all')
29
- @redis.should_receive(:hget).with('event_counters', 'ok')
30
- @redis.should_receive(:hget).with('event_counters', 'failure')
31
- @redis.should_receive(:hget).with('event_counters', 'action')
32
- @redis.should_receive(:get).with('boot_time').and_return(0)
28
+ @redis.should_receive(:zscore).with('executive_instances', anything).and_return(Time.now.to_i)
29
+ @redis.should_receive(:hgetall).twice.and_return({'all' => '8001', 'ok' => '8002'},
30
+ {'all' => '9001', 'ok' => '9002'}) #
33
31
  @redis.should_receive(:llen).with('events')
34
32
  end
35
33
 
@@ -67,6 +65,7 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
67
65
  end
68
66
 
69
67
  it "shows a page listing failing checks" do
68
+ @redis.should_receive(:zrange).with("executive_instances", "0", "-1", :withscores => true)
70
69
  @redis.should_receive(:zrange).with('failed_checks', 0, -1).and_return(["#{entity_name}:#{check}:states"])
71
70
 
72
71
  expect_stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flapjack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.37
4
+ version: 0.6.38
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-10-03 00:00:00.000000000 Z
14
+ date: 2012-10-09 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: daemons
@@ -453,6 +453,7 @@ files:
453
453
  - lib/flapjack/persistence/sqlite3.rb
454
454
  - lib/flapjack/persistence/sqlite3/sqlite3.rb
455
455
  - lib/flapjack/pikelet.rb
456
+ - lib/flapjack/redis_pool.rb
456
457
  - lib/flapjack/transports/beanstalkd.rb
457
458
  - lib/flapjack/transports/result.rb
458
459
  - lib/flapjack/utility.rb