flapjack 0.5.5 → 0.6.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
data/lib/flapjack/patches.rb
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
1
3
|
require 'ostruct'
|
|
2
4
|
require 'daemons'
|
|
3
|
-
require '
|
|
5
|
+
require 'thin'
|
|
6
|
+
require 'resque'
|
|
7
|
+
# require 'log4r'
|
|
4
8
|
|
|
5
9
|
class OpenStruct
|
|
6
10
|
def to_h
|
|
@@ -9,33 +13,33 @@ class OpenStruct
|
|
|
9
13
|
end
|
|
10
14
|
|
|
11
15
|
module Daemons
|
|
12
|
-
class PidFile
|
|
16
|
+
class PidFile
|
|
13
17
|
# we override this method so creating pid files is fork-safe
|
|
14
|
-
def filename
|
|
18
|
+
def filename
|
|
15
19
|
File.join(@dir, "#{@progname}#{Process.pid}.pid")
|
|
16
20
|
end
|
|
17
21
|
end
|
|
18
22
|
end
|
|
19
23
|
|
|
20
|
-
module Log4r
|
|
21
|
-
class Logger
|
|
22
|
-
def error(args)
|
|
23
|
-
err(args)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def warning(args)
|
|
27
|
-
warn(args)
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
24
|
+
#module Log4r
|
|
25
|
+
# class Logger
|
|
26
|
+
# def error(args)
|
|
27
|
+
# err(args)
|
|
28
|
+
# end
|
|
29
|
+
#
|
|
30
|
+
# def warning(args)
|
|
31
|
+
# warn(args)
|
|
32
|
+
# end
|
|
33
|
+
# end
|
|
34
|
+
#end
|
|
31
35
|
|
|
32
|
-
# extracted from Extlib.
|
|
36
|
+
# extracted from Extlib.
|
|
33
37
|
# FIXME: what's the licensing here?
|
|
34
38
|
class String
|
|
35
|
-
def camel_case
|
|
36
|
-
return self if self !~ /_/ && self =~ /[A-Z]+.*/
|
|
37
|
-
split('_').map{|e| e.capitalize}.join
|
|
38
|
-
end
|
|
39
|
+
def camel_case
|
|
40
|
+
return self if self !~ /_/ && self =~ /[A-Z]+.*/
|
|
41
|
+
split('_').map{|e| e.capitalize}.join
|
|
42
|
+
end
|
|
39
43
|
end
|
|
40
44
|
|
|
41
45
|
# http://gist.github.com/151324
|
|
@@ -49,3 +53,88 @@ class Hash
|
|
|
49
53
|
end
|
|
50
54
|
end
|
|
51
55
|
end
|
|
56
|
+
|
|
57
|
+
# we don't want to stop the entire EM reactor when we stop a web server
|
|
58
|
+
module Thin
|
|
59
|
+
module Backends
|
|
60
|
+
class Base
|
|
61
|
+
def stop!
|
|
62
|
+
@running = false
|
|
63
|
+
@stopping = false
|
|
64
|
+
|
|
65
|
+
# EventMachine.stop if EventMachine.reactor_running?
|
|
66
|
+
@connections.each { |connection| connection.close_connection }
|
|
67
|
+
close
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Resque is really designed around a multiprocess model, so we here we
|
|
74
|
+
# stub some that behaviour away.
|
|
75
|
+
module Resque
|
|
76
|
+
|
|
77
|
+
class Worker
|
|
78
|
+
|
|
79
|
+
def procline(string)
|
|
80
|
+
# $0 = "resque-#{Resque::Version}: #{string}"
|
|
81
|
+
# log! $0
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Redefining the entire method to stop the direct access to $0 :(
|
|
85
|
+
def work(interval = 5.0, &block)
|
|
86
|
+
interval = Float(interval)
|
|
87
|
+
# $0 = "resque: Starting"
|
|
88
|
+
startup
|
|
89
|
+
|
|
90
|
+
loop do
|
|
91
|
+
break if shutdown?
|
|
92
|
+
|
|
93
|
+
if not paused? and job = reserve
|
|
94
|
+
log "got: #{job.inspect}"
|
|
95
|
+
job.worker = self
|
|
96
|
+
run_hook :before_fork, job
|
|
97
|
+
working_on job
|
|
98
|
+
|
|
99
|
+
if @child = fork
|
|
100
|
+
srand # Reseeding
|
|
101
|
+
procline "Forked #{@child} at #{Time.now.to_i}"
|
|
102
|
+
Process.wait(@child)
|
|
103
|
+
else
|
|
104
|
+
unregister_signal_handlers if !@cant_fork && term_child
|
|
105
|
+
procline "Processing #{job.queue} since #{Time.now.to_i}"
|
|
106
|
+
redis.client.reconnect if !@cant_fork # Don't share connection with parent
|
|
107
|
+
perform(job, &block)
|
|
108
|
+
exit! unless @cant_fork
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
done_working
|
|
112
|
+
@child = nil
|
|
113
|
+
else
|
|
114
|
+
break if interval.zero?
|
|
115
|
+
log! "Sleeping for #{interval} seconds"
|
|
116
|
+
procline paused? ? "Paused" : "Waiting for #{@queues.join(',')}"
|
|
117
|
+
sleep interval
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
ensure
|
|
122
|
+
unregister_worker
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# As Redis::Future objects inherit from BasicObject, it's difficult to
|
|
129
|
+
# distinguish between them and other objects in collected data from
|
|
130
|
+
# pipelined queries.
|
|
131
|
+
#
|
|
132
|
+
# (One alternative would be to put other values in Futures ourselves, and
|
|
133
|
+
# evaluate everything...)
|
|
134
|
+
class Redis
|
|
135
|
+
class Future
|
|
136
|
+
def class
|
|
137
|
+
::Redis::Future
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
1
3
|
class Check
|
|
2
4
|
include DataMapper::Resource
|
|
3
5
|
|
|
@@ -6,9 +8,9 @@ class Check
|
|
|
6
8
|
has n, :related_checks, :child_key => [:child_id, :parent_id]
|
|
7
9
|
has n, :events
|
|
8
10
|
|
|
9
|
-
#has n, :parent_checks, :through => :related_checks,
|
|
11
|
+
#has n, :parent_checks, :through => :related_checks,
|
|
10
12
|
# :child_key => :child_id, :class_name => "Check"
|
|
11
|
-
#has n, :child_checks, :through => :related_checks,
|
|
13
|
+
#has n, :child_checks, :through => :related_checks,
|
|
12
14
|
# :child_key => :parent_id, :class_name => "Check"
|
|
13
15
|
|
|
14
16
|
belongs_to :node
|
|
@@ -27,7 +29,7 @@ class Check
|
|
|
27
29
|
property :deleted_at, ParanoidDateTime
|
|
28
30
|
|
|
29
31
|
# copy command onto check
|
|
30
|
-
before :valid? do
|
|
32
|
+
before :valid? do
|
|
31
33
|
if self.check_template && self.command.blank?
|
|
32
34
|
self.command = self.check_template.command
|
|
33
35
|
self.name = self.check_template.name
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
1
3
|
class Node
|
|
2
4
|
include DataMapper::Resource
|
|
3
5
|
|
|
@@ -6,7 +8,7 @@ class Node
|
|
|
6
8
|
property :fqdn, String, :key => true
|
|
7
9
|
|
|
8
10
|
validates_is_unique :fqdn
|
|
9
|
-
validates_format :fqdn, :with => /^[0-9|a-z|A-Z|\-|\.]*$/,
|
|
11
|
+
validates_format :fqdn, :with => /^[0-9|a-z|A-Z|\-|\.]*$/,
|
|
10
12
|
:message => "not a RFC1035-formatted FQDN (see section 2.3.1)"
|
|
11
13
|
|
|
12
14
|
def hostname
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
# This class encapsulates the config data and environmental setup used
|
|
4
|
+
# by the various Flapjack components.
|
|
5
|
+
#
|
|
6
|
+
# "In Australia and New Zealand, small pancakes (about 75 mm in diameter) known as pikelets
|
|
7
|
+
# are also eaten. They are traditionally served with jam and/or whipped cream, or solely
|
|
8
|
+
# with butter, at afternoon tea, but can also be served at morning tea."
|
|
9
|
+
# from http://en.wikipedia.org/wiki/Pancake
|
|
10
|
+
|
|
11
|
+
require 'log4r'
|
|
12
|
+
require 'log4r/outputter/consoleoutputters'
|
|
13
|
+
require 'log4r/outputter/syslogoutputter'
|
|
14
|
+
|
|
15
|
+
module Flapjack
|
|
16
|
+
module Pikelet
|
|
17
|
+
attr_accessor :logger, :redis, :config
|
|
18
|
+
|
|
19
|
+
def should_quit?
|
|
20
|
+
@should_quit
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def stop
|
|
24
|
+
@should_quit = true
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def build_redis_connection_pool(options = {})
|
|
28
|
+
return unless @bootstrapped
|
|
29
|
+
if defined?(EventMachine) && defined?(EventMachine::Synchrony)
|
|
30
|
+
EventMachine::Synchrony::ConnectionPool.new(:size => options[:size] || 5) do
|
|
31
|
+
::Redis.new(@redis_config.merge(:driver => 'synchrony'))
|
|
32
|
+
end
|
|
33
|
+
else
|
|
34
|
+
::Redis.new(@redis_config)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def bootstrap(opts = {})
|
|
39
|
+
return if @bootstrapped
|
|
40
|
+
|
|
41
|
+
unless @logger = opts[:logger]
|
|
42
|
+
@logger = Log4r::Logger.new("#{self.class.to_s.downcase.gsub('::', '-')}")
|
|
43
|
+
@logger.add(Log4r::StdoutOutputter.new("flapjack"))
|
|
44
|
+
@logger.add(Log4r::SyslogOutputter.new("flapjack"))
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
@redis_config = opts[:redis] || {}
|
|
48
|
+
@config = opts[:config] || {}
|
|
49
|
+
|
|
50
|
+
@should_quit = false
|
|
51
|
+
|
|
52
|
+
@bootstrapped = true
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -23,7 +23,7 @@ module Flapjack
|
|
|
23
23
|
begin
|
|
24
24
|
@queue = Beanstalk::Pool.new(["#{@config.host}:#{@config.port}"], @config.queue_name)
|
|
25
25
|
rescue Beanstalk::NotConnected => e
|
|
26
|
-
@log.error("Couldn't connect to the '#{@config.queue_name}' Beanstalk queue.
|
|
26
|
+
@log.error("Couldn't connect to the '#{@config.queue_name}' Beanstalk queue. Retrying in 5 seconds.")
|
|
27
27
|
sleep 5
|
|
28
28
|
retry
|
|
29
29
|
end
|
|
@@ -10,22 +10,22 @@ module Flapjack
|
|
|
10
10
|
@job = options[:job]
|
|
11
11
|
@result = OpenStruct.new(options[:result])
|
|
12
12
|
end
|
|
13
|
-
|
|
14
|
-
# Whether a check returns an ok status.
|
|
13
|
+
|
|
14
|
+
# Whether a check returns an ok status.
|
|
15
15
|
def ok?
|
|
16
16
|
@result.retval == 0
|
|
17
17
|
end
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
# Whether a check has a warning status.
|
|
20
20
|
def warning?
|
|
21
21
|
@result.retval == 1
|
|
22
22
|
end
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
# Whether a check has a critical status.
|
|
25
25
|
def critical?
|
|
26
26
|
@result.retval == 2
|
|
27
27
|
end
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
# Human readable representation of the check's return value.
|
|
30
30
|
def status
|
|
31
31
|
case @result.retval
|
|
@@ -34,7 +34,7 @@ module Flapjack
|
|
|
34
34
|
when 2 ; "critical"
|
|
35
35
|
end
|
|
36
36
|
end
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
# FIXME: there is a *lot* of duplication here - implement a proxy
|
|
39
39
|
# object pattern?
|
|
40
40
|
def id
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
module Flapjack
|
|
4
|
+
module Utility
|
|
5
|
+
|
|
6
|
+
def time_period_in_words(period)
|
|
7
|
+
period_mm, period_ss = period.divmod(60)
|
|
8
|
+
period_hh, period_mm = period_mm.divmod(60)
|
|
9
|
+
period_dd, period_hh = period_hh.divmod(24)
|
|
10
|
+
["#{period_dd} days",
|
|
11
|
+
"#{period_hh} hours",
|
|
12
|
+
"#{period_mm} minutes",
|
|
13
|
+
"#{period_ss} seconds"].reject {|s| s =~ /^0 /}.join(', ')
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Returns relative time in words referencing the given date
|
|
17
|
+
# relative_time_ago(Time.now) => 'about a minute ago'
|
|
18
|
+
def relative_time_ago(from_time)
|
|
19
|
+
distance_in_minutes = (((Time.now - from_time.to_time).abs)/60).round
|
|
20
|
+
case distance_in_minutes
|
|
21
|
+
when 0..1 then 'about a minute'
|
|
22
|
+
when 2..44 then "#{distance_in_minutes} minutes"
|
|
23
|
+
when 45..89 then 'about 1 hour'
|
|
24
|
+
when 90..1439 then "about #{(distance_in_minutes.to_f / 60.0).round} hours"
|
|
25
|
+
when 1440..2439 then '1 day'
|
|
26
|
+
when 2440..2879 then 'about 2 days'
|
|
27
|
+
when 2880..43199 then "#{(distance_in_minutes / 1440).round} days"
|
|
28
|
+
when 43200..86399 then 'about 1 month'
|
|
29
|
+
when 86400..525599 then "#{(distance_in_minutes / 43200).round} months"
|
|
30
|
+
when 525600..1051199 then 'about 1 year'
|
|
31
|
+
else "over #{(distance_in_minutes / 525600).round} years"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# returns a string showing the local timezone we're running in
|
|
36
|
+
# eg "CST (UTC+09:30)"
|
|
37
|
+
def local_timezone
|
|
38
|
+
tzname = Time.new.zone
|
|
39
|
+
q, r = Time.new.utc_offset.divmod(3600)
|
|
40
|
+
sign = (q < 0) ? '-' : '+'
|
|
41
|
+
tzoffset = sign + "%02d" % q.abs.to_s + ':' + r.to_f.div(60).to_s
|
|
42
|
+
"#{tzname} (UTC#{tzoffset})"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
end
|
data/lib/flapjack/web.rb
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'fiber'
|
|
4
|
+
|
|
5
|
+
require 'chronic'
|
|
6
|
+
require 'chronic_duration'
|
|
7
|
+
require 'sinatra/base'
|
|
8
|
+
require 'haml'
|
|
9
|
+
require 'rack/fiber_pool'
|
|
10
|
+
|
|
11
|
+
require 'flapjack/pikelet'
|
|
12
|
+
require 'flapjack/data/entity_check'
|
|
13
|
+
require 'flapjack/utility'
|
|
14
|
+
|
|
15
|
+
module Flapjack
|
|
16
|
+
class Web < Sinatra::Base
|
|
17
|
+
|
|
18
|
+
# doesn't work with Rack::Test for some reason
|
|
19
|
+
unless 'test'.eql?(FLAPJACK_ENV)
|
|
20
|
+
rescue_exception = Proc.new { |env, exception|
|
|
21
|
+
p exception.message
|
|
22
|
+
puts exception.backtrace.join("\n")
|
|
23
|
+
[503, {}, exception.message]
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
use Rack::FiberPool, :size => 25, :rescue_exception => rescue_exception
|
|
27
|
+
end
|
|
28
|
+
use Rack::MethodOverride
|
|
29
|
+
extend Flapjack::Pikelet
|
|
30
|
+
include Flapjack::Utility
|
|
31
|
+
|
|
32
|
+
set :views, settings.root + '/web/views'
|
|
33
|
+
|
|
34
|
+
before do
|
|
35
|
+
# will only initialise the first time it's run
|
|
36
|
+
Flapjack::Web.bootstrap
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
get '/' do
|
|
40
|
+
self_stats
|
|
41
|
+
|
|
42
|
+
# TODO (?) recast as Entity.all do |e|; e.checks.do |ec|; ...
|
|
43
|
+
@states = @@redis.keys('*:*:states').map { |r|
|
|
44
|
+
parts = r.split(':')[0..1]
|
|
45
|
+
[parts[0], parts[1]] + entity_check_state(parts[0], parts[1])
|
|
46
|
+
}.compact.sort_by {|parts| parts }
|
|
47
|
+
haml :index
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
get '/failing' do
|
|
51
|
+
self_stats
|
|
52
|
+
@states = @@redis.zrange('failed_checks', 0, -1).map {|key|
|
|
53
|
+
parts = key.split(':')
|
|
54
|
+
[parts[0], parts[1]] + entity_check_state(parts[0], parts[1])
|
|
55
|
+
}.compact.sort_by {|parts| parts}
|
|
56
|
+
haml :index
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
get '/self_stats' do
|
|
60
|
+
self_stats
|
|
61
|
+
haml :self_stats
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
get '/check' do
|
|
65
|
+
@entity = params[:entity]
|
|
66
|
+
@check = params[:check]
|
|
67
|
+
|
|
68
|
+
entity_check = get_entity_check(@entity, @check)
|
|
69
|
+
return 404 if entity_check.nil?
|
|
70
|
+
|
|
71
|
+
last_change = entity_check.last_change
|
|
72
|
+
|
|
73
|
+
@check_state = entity_check.state
|
|
74
|
+
@check_last_update = entity_check.last_update
|
|
75
|
+
@check_last_change = last_change
|
|
76
|
+
@check_summary = entity_check.summary
|
|
77
|
+
@last_notifications =
|
|
78
|
+
{:problem => entity_check.last_problem_notification,
|
|
79
|
+
:recovery => entity_check.last_recovery_notification,
|
|
80
|
+
:acknowledgement => entity_check.last_acknowledgement_notification
|
|
81
|
+
}
|
|
82
|
+
@in_scheduled_maintenance = entity_check.in_scheduled_maintenance?
|
|
83
|
+
@in_unscheduled_maintenance = entity_check.in_unscheduled_maintenance?
|
|
84
|
+
@scheduled_maintenances = entity_check.maintenances(nil, nil, :scheduled => true)
|
|
85
|
+
@acknowledgement_id = entity_check.failed? ?
|
|
86
|
+
entity_check.event_count_at(entity_check.last_change) : nil
|
|
87
|
+
|
|
88
|
+
haml :check
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
post '/acknowledgements/:entity/:check' do
|
|
92
|
+
@entity = params[:entity]
|
|
93
|
+
@check = params[:check]
|
|
94
|
+
@summary = params[:summary]
|
|
95
|
+
@acknowledgement_id = params[:acknowledgement_id]
|
|
96
|
+
|
|
97
|
+
dur = ChronicDuration.parse(params[:duration] || '')
|
|
98
|
+
@duration = (dur.nil? || (dur <= 0)) ? (4 * 60 * 60) : dur
|
|
99
|
+
|
|
100
|
+
entity_check = get_entity_check(@entity, @check)
|
|
101
|
+
return 404 if entity_check.nil?
|
|
102
|
+
|
|
103
|
+
ack = entity_check.create_acknowledgement('summary' => (@summary || ''),
|
|
104
|
+
'acknowledgement_id' => @acknowledgement_id, 'duration' => @duration)
|
|
105
|
+
@acknowledge_success = !!ack
|
|
106
|
+
[201, haml(:acknowledge)]
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# FIXME: there is bound to be a more idiomatic / restful way of doing this
|
|
110
|
+
post '/end_unscheduled_maintenance/:entity/:check' do
|
|
111
|
+
@entity = params[:entity]
|
|
112
|
+
@check = params[:check]
|
|
113
|
+
|
|
114
|
+
entity_check = get_entity_check(@entity, @check)
|
|
115
|
+
return 404 if entity_check.nil?
|
|
116
|
+
|
|
117
|
+
entity_check.end_unscheduled_maintenance
|
|
118
|
+
|
|
119
|
+
redirect back
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# create scheduled maintenance
|
|
123
|
+
post '/scheduled_maintenances/:entity/:check' do
|
|
124
|
+
start_time = Chronic.parse(params[:start_time]).to_i
|
|
125
|
+
raise ArgumentError, "start time parsed to zero" unless start_time > 0
|
|
126
|
+
duration = ChronicDuration.parse(params[:duration])
|
|
127
|
+
summary = params[:summary]
|
|
128
|
+
|
|
129
|
+
entity_check = get_entity_check(params[:entity], params[:check])
|
|
130
|
+
return 404 if entity_check.nil?
|
|
131
|
+
|
|
132
|
+
entity_check.create_scheduled_maintenance(:start_time => start_time,
|
|
133
|
+
:duration => duration,
|
|
134
|
+
:summary => summary)
|
|
135
|
+
redirect back
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# delete a scheduled maintenance
|
|
139
|
+
delete '/scheduled_maintenances/:entity/:check' do
|
|
140
|
+
entity_check = get_entity_check(params[:entity], params[:check])
|
|
141
|
+
return 404 if entity_check.nil?
|
|
142
|
+
|
|
143
|
+
entity_check.delete_scheduled_maintenance(:start_time => params[:start_time].to_i)
|
|
144
|
+
redirect back
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
private
|
|
148
|
+
|
|
149
|
+
def get_entity_check(entity, check)
|
|
150
|
+
entity_obj = (entity && entity.length > 0) ?
|
|
151
|
+
Flapjack::Data::Entity.find_by_name(entity, :redis => @@redis) : nil
|
|
152
|
+
return if entity_obj.nil? || (check.nil? || check.length == 0)
|
|
153
|
+
Flapjack::Data::EntityCheck.for_entity(entity_obj, check, :redis => @@redis)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def entity_check_state(entity_name, check)
|
|
157
|
+
entity = Flapjack::Data::Entity.find_by_name(entity_name,
|
|
158
|
+
:redis => @@redis)
|
|
159
|
+
return if entity.nil?
|
|
160
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity(entity,
|
|
161
|
+
check, :redis => @@redis)
|
|
162
|
+
latest_notif =
|
|
163
|
+
{:problem => entity_check.last_problem_notification,
|
|
164
|
+
:recovery => entity_check.last_recovery_notification,
|
|
165
|
+
:acknowledgement => entity_check.last_acknowledgement_notification
|
|
166
|
+
}.max_by {|n| n[1] || 0}
|
|
167
|
+
[(entity_check.state || '-'),
|
|
168
|
+
(entity_check.last_change || '-'),
|
|
169
|
+
(entity_check.last_update || '-'),
|
|
170
|
+
entity_check.in_unscheduled_maintenance?,
|
|
171
|
+
entity_check.in_scheduled_maintenance?,
|
|
172
|
+
latest_notif[0],
|
|
173
|
+
latest_notif[1]
|
|
174
|
+
]
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def self_stats
|
|
178
|
+
@keys = @@redis.keys '*'
|
|
179
|
+
@count_failing_checks = @@redis.zcard 'failed_checks'
|
|
180
|
+
@count_all_checks = @@redis.keys('check:*:*').length
|
|
181
|
+
@event_counter_all = @@redis.hget('event_counters', 'all')
|
|
182
|
+
@event_counter_ok = @@redis.hget('event_counters', 'ok')
|
|
183
|
+
@event_counter_failure = @@redis.hget('event_counters', 'failure')
|
|
184
|
+
@event_counter_action = @@redis.hget('event_counters', 'action')
|
|
185
|
+
@boot_time = Time.at(@@redis.get('boot_time').to_i)
|
|
186
|
+
@uptime = Time.now.to_i - @boot_time.to_i
|
|
187
|
+
@uptime_string = time_period_in_words(@uptime)
|
|
188
|
+
@event_rate_all = (@uptime > 0) ?
|
|
189
|
+
(@event_counter_all.to_f / @uptime) : 0
|
|
190
|
+
@events_queued = @@redis.llen('events')
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def logger
|
|
194
|
+
Flapjack::Web.logger
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
end
|
|
198
|
+
end
|