flapjack 0.6.37 → 0.6.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml CHANGED
@@ -5,6 +5,6 @@ gemfile:
5
5
  - Gemfile
6
6
  services:
7
7
  - redis-server
8
- # uncomment this line if your project needs to run something other than `rake`:
9
- # script: bundle exec rspec spec
10
-
8
+ before_script:
9
+ - mkdir -p ./log
10
+ script: bundle exec rspec spec && bundle exec cucumber features
data/README.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Flapjack
2
2
  ========
3
3
 
4
+ [![Travis CI Status][id_travis_img]][id_travis_link]
5
+
6
+ [id_travis_link]: https://secure.travis-ci.org/#!/flpjck/flapjack
7
+ [id_travis_img]: https://secure.travis-ci.org/flpjck/flapjack.png
8
+
4
9
  Flapjack is a highly scalable and distributed monitoring notification system.
5
10
 
6
11
  Flapjack provides a scalable method for dealing with events representing changes in system state (OK -> WARNING -> CRITICAL transitions) and alerting appropriate people as necessary.
@@ -21,6 +21,7 @@ require 'flapjack/oobetet'
21
21
  require 'flapjack/pagerduty'
22
22
  require 'flapjack/notification/email'
23
23
  require 'flapjack/notification/sms'
24
+ require 'flapjack/redis_pool'
24
25
  require 'flapjack/web'
25
26
 
26
27
  module Flapjack
@@ -158,12 +159,13 @@ module Flapjack
158
159
 
159
160
  port = 3001 if (port.nil? || port <= 0 || port > 65535)
160
161
 
161
- pikelet_class.class_variable_set('@@redis', build_redis_connection_pool)
162
+ pool = Flapjack::RedisPool.new(:config => @redis_options)
163
+ pikelet_class.class_variable_set('@@redis', pool)
162
164
 
163
165
  Thin::Logging.silent = true
164
166
 
165
167
  pikelet = Thin::Server.new('0.0.0.0', port, pikelet_class, :signals => false)
166
- @pikelets << {:instance => pikelet, :type => pikelet_type}
168
+ @pikelets << {:instance => pikelet, :type => pikelet_type, :pool => pool}
167
169
  pikelet.start
168
170
  @logger.debug "new thin server instance started for #{pikelet_type}"
169
171
  end
@@ -179,8 +181,10 @@ module Flapjack
179
181
 
180
182
  # set up connection pooling, stop resque errors (ensure that it's only
181
183
  # done once)
184
+ pool = nil
182
185
  if (['email_notifier', 'sms_notifier'] & @pikelets.collect {|p| p[:type]}).empty?
183
- ::Resque.redis = build_redis_connection_pool
186
+ pool = Flapjack::RedisPool.new(:config => @redis_options)
187
+ ::Resque.redis = pool
184
188
  ## NB: can override the default 'resque' namespace like this
185
189
  #::Resque.redis.namespace = 'flapjack'
186
190
  end
@@ -221,17 +225,13 @@ module Flapjack
221
225
  stop
222
226
  end
223
227
  }
224
- @pikelets << {:fiber => f, :type => pikelet_type}
228
+ pikelet_values = {:fiber => f, :type => pikelet_type}
229
+ pikelet_values[:pool] = pool if pool
230
+ @pikelets << pikelet_values
225
231
  f.resume
226
232
  @logger.debug "new fiber created for #{pikelet_type}"
227
233
  end
228
234
 
229
- def build_redis_connection_pool(options = {})
230
- EventMachine::Synchrony::ConnectionPool.new(:size => options[:size] || 5) do
231
- ::Redis.new(@redis_options)
232
- end
233
- end
234
-
235
235
  # # TODO rewrite to be less spammy -- print only initial state and changes
236
236
  # def health_check
237
237
  # @pikelets.each do |pik|
@@ -259,10 +259,8 @@ module Flapjack
259
259
  }.resume
260
260
  end
261
261
  when EM::Resque::Worker
262
- if pik[:fiber] && pik[:fiber].alive?
263
- # resque is polling, so we don't need a shutdown object
264
- pik[:instance].shutdown
265
- end
262
+ # resque is polling, so we don't need a shutdown object
263
+ pik[:instance].shutdown if pik[:fiber] && pik[:fiber].alive?
266
264
  when Thin::Server # web, api
267
265
  # drop from this side, as HTTP keepalive etc. means browsers
268
266
  # keep connections alive for ages, and we'd be hanging around
@@ -280,6 +278,16 @@ module Flapjack
280
278
  if fibers.any?(&:alive?) || thin_pikelets.any?{|tp| !tp.backend.empty? }
281
279
  EM::Synchrony.sleep 0.25
282
280
  else
281
+ @pikelets.select {|p| thin_pikelets.include?(p) }.each do |tp|
282
+ tp[:pool].empty!
283
+ end
284
+
285
+ rsq_p = @pikelets.detect {|p|
286
+ ['email_notifier', 'sms_notifier'].include?(p[:type]) && !p[:pool].nil?
287
+ }
288
+
289
+ rsq_p[:pool].empty! if rsq_p
290
+
283
291
  EM.stop
284
292
  break
285
293
  end
@@ -3,6 +3,8 @@
3
3
  # NB: use of redis.keys probably indicates we should maintain a data
4
4
  # structure to avoid the need for this type of query
5
5
 
6
+ require 'set'
7
+
6
8
  module Flapjack
7
9
 
8
10
  module Data
@@ -85,18 +87,35 @@ module Flapjack
85
87
  merge('service_key' => service_key)
86
88
  end
87
89
 
88
- def entities
89
- @redis.keys('contacts_for:*').inject([]) {|ret, k|
90
+ def entities_and_checks
91
+ @redis.keys('contacts_for:*').inject({}) {|ret, k|
90
92
  if @redis.sismember(k, self.id)
91
- k =~ /^contacts_for:(.+)$/
92
- entity_id = $1
93
- if entity_name = @redis.hget("entity:#{entity_id}", 'name')
94
- ret << Flapjack::Data::Entity.new(:name => entity_name,
95
- :id => entity_id, :redis => @redis)
93
+ if k =~ /^contacts_for:([a-zA-Z0-9][a-zA-Z0-9\.\-]*[a-zA-Z0-9])(?::(\w+))?$/
94
+ entity_id = $1
95
+ check = $2
96
+
97
+ unless ret.has_key?(entity_id)
98
+ ret[entity_id] = {}
99
+ if entity_name = @redis.hget("entity:#{entity_id}", 'name')
100
+ entity = Flapjack::Data::Entity.new(:name => entity_name,
101
+ :id => entity_id, :redis => @redis)
102
+ ret[entity_id][:entity] = entity
103
+ end
104
+ # using a set to ensure unique check values
105
+ ret[entity_id][:checks] = Set.new
106
+ end
107
+
108
+ if check
109
+ # just add this check for the entity
110
+ ret[entity_id][:checks] |= check
111
+ else
112
+ # registered for the entity so add all checks
113
+ ret[entity_id][:checks] |= entity.check_list
114
+ end
96
115
  end
97
116
  end
98
117
  ret
99
- }
118
+ }.values
100
119
  end
101
120
 
102
121
  def name
@@ -45,7 +45,10 @@ module Flapjack
45
45
  @filters << Flapjack::Filters::Delays.new(options)
46
46
  @filters << Flapjack::Filters::Acknowledgement.new(options)
47
47
 
48
- @boot_time = Time.now
48
+ @boot_time = Time.now
49
+ @fqdn = `/bin/hostname -f`.chomp
50
+ @pid = Process.pid
51
+ @instance_id = "#{@fqdn}:#{@pid}"
49
52
 
50
53
  # FIXME: all of the below keys assume there is only ever one executive running;
51
54
  # we could generate a fuid and save it to disk, and prepend it from that
@@ -61,6 +64,12 @@ module Flapjack
61
64
  @redis.hset('event_counters', 'failure', 0)
62
65
  @redis.hset('event_counters', 'action', 0)
63
66
  end
67
+
68
+ @redis.zadd('executive_instances', @boot_time.to_i, @instance_id)
69
+ @redis.hset("event_counters:#{@instance_id}", 'all', 0)
70
+ @redis.hset("event_counters:#{@instance_id}", 'ok', 0)
71
+ @redis.hset("event_counters:#{@instance_id}", 'failure', 0)
72
+ @redis.hset("event_counters:#{@instance_id}", 'action', 0)
64
73
  end
65
74
 
66
75
  def main
@@ -73,6 +82,9 @@ module Flapjack
73
82
  event = Flapjack::Data::Event.next(:persistence => @redis)
74
83
  process_event(event) unless event.nil?
75
84
  end
85
+
86
+ @redis.empty! if @redis
87
+
76
88
  @logger.info("Exiting main loop.")
77
89
  end
78
90
 
@@ -123,6 +135,7 @@ module Flapjack
123
135
  result = { :skip_filters => false }
124
136
  timestamp = Time.now.to_i
125
137
  @event_count = @redis.hincrby('event_counters', 'all', 1)
138
+ @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
126
139
 
127
140
  # FIXME skip if entity_check.nil?
128
141
 
@@ -137,8 +150,10 @@ module Flapjack
137
150
 
138
151
  if event.ok?
139
152
  @redis.hincrby('event_counters', 'ok', 1)
153
+ @redis.hincrby("event_counters:#{@instance_id}", 'ok', 1)
140
154
  elsif event.failure?
141
155
  @redis.hincrby('event_counters', 'failure', 1)
156
+ @redis.hincrby("event_counters:#{@instance_id}", 'failure', 1)
142
157
  @redis.hset('unacknowledged_failures', @event_count, event.id)
143
158
  end
144
159
 
@@ -167,7 +182,8 @@ module Flapjack
167
182
  when 'action'
168
183
  # When an action event is processed, store the event.
169
184
  @redis.hset(event.id + ':actions', timestamp, event.state)
170
- @redis.hincrby('event_counters', 'action', 1) if event.ok?
185
+ @redis.hincrby('event_counters', 'action', 1)
186
+ @redis.hincrby("event_counters:#{@instance_id}", 'action', 1)
171
187
 
172
188
  if event.acknowledgement? && event.acknowledgement_id
173
189
  @redis.hdel('unacknowledged_failures', event.acknowledgement_id)
@@ -315,6 +315,9 @@ module Flapjack
315
315
 
316
316
  count_timer.cancel
317
317
  keepalive_timer.cancel
318
+
319
+ @redis.empty! if @redis
320
+ @redis_handler.empty! if @redis_handler
318
321
  end
319
322
 
320
323
  end
@@ -3,19 +3,12 @@
3
3
  #require 'socket'
4
4
 
5
5
  require 'eventmachine'
6
- # the redis/synchrony gems need to be required in this particular order, see # the redis-rb README for details
7
- #require 'hiredis'
8
6
  require 'em-synchrony'
9
- #require 'redis/connection/synchrony'
10
- #require 'redis'
11
-
12
- #require 'chronic_duration'
13
7
 
14
8
  require 'blather/client/client'
15
9
  require 'em-synchrony/fiber_iterator'
16
10
  require 'yajl/json_gem'
17
11
 
18
- #require 'flapjack/data/entity_check'
19
12
  require 'flapjack/pikelet'
20
13
  require 'flapjack/utility'
21
14
 
@@ -223,6 +223,9 @@ module Flapjack
223
223
  end
224
224
 
225
225
  acknowledgement_timer.cancel
226
+
227
+ @redis.empty! if @redis
228
+ @redis_timer.empty! if @redis_timer
226
229
  end
227
230
 
228
231
  end
@@ -12,6 +12,8 @@ require 'log4r'
12
12
  require 'log4r/outputter/consoleoutputters'
13
13
  require 'log4r/outputter/syslogoutputter'
14
14
 
15
+ require 'flapjack/redis_pool'
16
+
15
17
  module Flapjack
16
18
  module Pikelet
17
19
  attr_accessor :logger, :redis, :config
@@ -27,9 +29,7 @@ module Flapjack
27
29
  def build_redis_connection_pool(options = {})
28
30
  return unless @bootstrapped
29
31
  if defined?(EventMachine) && defined?(EventMachine::Synchrony)
30
- EventMachine::Synchrony::ConnectionPool.new(:size => options[:size] || 5) do
31
- ::Redis.new(@redis_config)
32
- end
32
+ Flapjack::RedisPool.new(:config => @redis_config, :size => options[:size])
33
33
  else
34
34
  ::Redis.new(@redis_config)
35
35
  end
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'eventmachine'
4
+ # the redis/synchrony gems need to be required in this particular order, see
5
+ # the redis-rb README for details
6
+ require 'hiredis'
7
+ require 'em-synchrony'
8
+ require 'redis/connection/synchrony'
9
+ require 'redis'
10
+
11
+ # require 'eventmachine/synchrony/connection_pool'
12
+
13
+ module Flapjack
14
+ class RedisPool < EventMachine::Synchrony::ConnectionPool
15
+
16
+ def initialize(opts = {})
17
+ config = opts.delete(:config)
18
+ super(:size => opts[:size] || 5) {
19
+ ::Redis.new(config)
20
+ }
21
+ end
22
+
23
+ def empty!
24
+ f = Fiber.current
25
+
26
+ until @available.empty? && @pending.empty?
27
+ begin
28
+ conn = acquire(f)
29
+ conn.quit
30
+ @available.delete(conn)
31
+ ensure
32
+ if pending = @pending.shift
33
+ pending.resume
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  module Flapjack
4
- VERSION = "0.6.37"
4
+ VERSION = "0.6.38"
5
5
  end
data/lib/flapjack/web.rb CHANGED
@@ -196,7 +196,9 @@ module Flapjack
196
196
  @pagerduty_credentials = @contact.pagerduty_credentials
197
197
  end
198
198
 
199
- @entities = @contact.entities
199
+ @entities_and_checks = @contact.entities_and_checks.sort_by {|ec|
200
+ ec[:entity].name
201
+ }
200
202
 
201
203
  haml :contact
202
204
  end
@@ -232,18 +234,21 @@ module Flapjack
232
234
  end
233
235
 
234
236
  def self_stats
237
+ @fqdn = `/bin/hostname -f`.chomp
238
+ @pid = Process.pid
239
+ @instance_id = "#{@fqdn}:#{@pid}"
240
+
235
241
  @keys = @@redis.keys '*'
236
- @count_failing_checks = @@redis.zcard 'failed_checks'
237
- @count_all_checks = @@redis.keys('check:*:*').length
238
- @event_counter_all = @@redis.hget('event_counters', 'all')
239
- @event_counter_ok = @@redis.hget('event_counters', 'ok')
240
- @event_counter_failure = @@redis.hget('event_counters', 'failure')
241
- @event_counter_action = @@redis.hget('event_counters', 'action')
242
- @boot_time = Time.at(@@redis.get('boot_time').to_i)
243
- @uptime = Time.now.to_i - @boot_time.to_i
244
- @uptime_string = time_period_in_words(@uptime)
245
- @event_rate_all = (@uptime > 0) ?
246
- (@event_counter_all.to_f / @uptime) : 0
242
+ @count_failing_checks = @@redis.zcard 'failed_checks'
243
+ @count_all_checks = @@redis.keys('check:*:*').length
244
+ @executive_instances = @@redis.zrange('executive_instances', '0', '-1', :withscores => true)
245
+ @event_counters = @@redis.hgetall('event_counters')
246
+ @event_counters_instance = @@redis.hgetall("event_counters:#{@instance_id}")
247
+ @boot_time = Time.at(@@redis.zscore('executive_instances', @instance_id).to_i)
248
+ @uptime = Time.now.to_i - @boot_time.to_i
249
+ @uptime_string = time_period_in_words(@uptime)
250
+ @event_rate_all = (@uptime > 0) ?
251
+ (@event_counters_instance['all'].to_f / @uptime) : 0
247
252
  @events_queued = @@redis.llen('events')
248
253
  end
249
254
 
@@ -11,40 +11,36 @@
11
11
  %div#wrapper
12
12
  = nav
13
13
  %h1= @contact.name
14
- - if @entities && !@entities.empty?
14
+ - if @contact.media && !@contact.media.empty?
15
+ %ul
16
+ - @contact.media.each_pair do |mk, mv|
17
+ - if 'pagerduty'.eql?(mk)
18
+ %li= "PagerDuty: "
19
+ %ul
20
+ - @pagerduty_credentials.each_pair do |pk, pv|
21
+ - if 'password'.eql?(pk)
22
+ %li= "#{pk}: ..."
23
+ - else
24
+ %li= "#{pk}: #{pv}"
25
+ - else
26
+ %li= "#{mk.capitalize}: #{mv}"
27
+ - else
28
+ %p No media
29
+ - if @entities_and_checks && !@entities_and_checks.empty?
15
30
  %table
16
31
  %tr
17
32
  %th Entity name
18
33
  %th Checks
19
- %th Media
20
- - @entities.sort_by(&:name).each do |entity|
34
+ - @entities_and_checks.each do |ec|
35
+ - entity = ec[:entity]
36
+ - checks = ec[:checks]
21
37
  %tr
22
38
  %td
23
39
  %p= entity.name
24
40
  %td
25
- - checks = entity.check_list
26
- - if !checks.empty?
27
- - checks.each do |check|
28
- - link = "/check?entity=#{entity.name}&check=#{check}"
29
- %p
30
- %a(title='check status' href=link) #{check}
31
- - else
32
- %p No checks
33
- %td
34
- - if @contact.media && !@contact.media.empty?
35
- %ul
36
- - @contact.media.each_pair do |mk, mv|
37
- - if 'pagerduty'.eql?(mk)
38
- %li= "PagerDuty: "
39
- %ul
40
- - @pagerduty_credentials.each_pair do |pk, pv|
41
- - if 'password'.eql?(pk)
42
- %li= "#{pk}: ..."
43
- - else
44
- %li= "#{pk}: #{pv}"
45
- - else
46
- %li= "#{mk.capitalize}: #{mv}"
47
- - else
48
- %p No media
41
+ - checks.each do |check|
42
+ - link = "/check?entity=#{entity.name}&check=#{check}"
43
+ %p
44
+ %a(title='check status' href=link) #{check}
49
45
  - else
50
46
  %p No entities
@@ -17,9 +17,11 @@
17
17
  %h4
18
18
  Number of failing services:
19
19
  = @count
20
- %p Events processed: #{@event_counter_all} (ok: #{@event_counter_ok}, failure: #{@event_counter_failure}, action: #{@event_counter_action})
21
- %p Average rate: #{@event_rate_all} events per second
20
+ %p Events processed (all time): #{@event_counters['all']} (ok: #{@event_counters['ok']}, failure: #{@event_counters['failure']}, action: #{@event_counters['action']})
21
+ %p Events processed (this instance): #{@event_counters_instance['all']} (ok: #{@event_counters_instance['ok']}, failure: #{@event_counters_instance['failure']}, action: #{@event_counters_instance['action']})
22
+ %p Average rate (this instance): #{@event_rate_all} events per second
22
23
  %p Total keys in redis: #{@keys.length}
23
24
  %p Uptime: #{@uptime_string}
24
25
  %p Boot time: #{@boot_time}
25
26
  %p Current time: #{Time.now}
27
+ %p Executive Instances: #{@executive_instances.inspect}
@@ -36,6 +36,7 @@ describe Flapjack::Executive, :redis => true do
36
36
 
37
37
  executive = Flapjack::Executive.new
38
38
  executive.bootstrap(:config => {})
39
+ @redis.should_receive(:empty!)
39
40
  executive.should_receive(:build_redis_connection_pool).and_return(@redis)
40
41
 
41
42
  # hacky, but the behaviour it's mimicking (shutdown from another thread) isn't
@@ -134,6 +134,7 @@ describe Flapjack::Jabber do
134
134
  EM::Synchrony.should_receive(:add_periodic_timer).with(60).and_return(timer_2)
135
135
 
136
136
  redis = mock('redis')
137
+ redis.should_receive(:empty!)
137
138
 
138
139
  fj = Flapjack::Jabber.new
139
140
  fj.bootstrap(:config => config)
@@ -25,11 +25,9 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
25
25
  @redis.should_receive(:keys).with('*').and_return([])
26
26
  @redis.should_receive(:zcard).with('failed_checks')
27
27
  @redis.should_receive(:keys).with('check:*:*').and_return([])
28
- @redis.should_receive(:hget).with('event_counters', 'all')
29
- @redis.should_receive(:hget).with('event_counters', 'ok')
30
- @redis.should_receive(:hget).with('event_counters', 'failure')
31
- @redis.should_receive(:hget).with('event_counters', 'action')
32
- @redis.should_receive(:get).with('boot_time').and_return(0)
28
+ @redis.should_receive(:zscore).with('executive_instances', anything).and_return(Time.now.to_i)
29
+ @redis.should_receive(:hgetall).twice.and_return({'all' => '8001', 'ok' => '8002'},
30
+ {'all' => '9001', 'ok' => '9002'}) #
33
31
  @redis.should_receive(:llen).with('events')
34
32
  end
35
33
 
@@ -67,6 +65,7 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
67
65
  end
68
66
 
69
67
  it "shows a page listing failing checks" do
68
+ @redis.should_receive(:zrange).with("executive_instances", "0", "-1", :withscores => true)
70
69
  @redis.should_receive(:zrange).with('failed_checks', 0, -1).and_return(["#{entity_name}:#{check}:states"])
71
70
 
72
71
  expect_stats
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flapjack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.37
4
+ version: 0.6.38
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-10-03 00:00:00.000000000 Z
14
+ date: 2012-10-09 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: daemons
@@ -453,6 +453,7 @@ files:
453
453
  - lib/flapjack/persistence/sqlite3.rb
454
454
  - lib/flapjack/persistence/sqlite3/sqlite3.rb
455
455
  - lib/flapjack/pikelet.rb
456
+ - lib/flapjack/redis_pool.rb
456
457
  - lib/flapjack/transports/beanstalkd.rb
457
458
  - lib/flapjack/transports/result.rb
458
459
  - lib/flapjack/utility.rb