flapjack 1.0.0rc3 → 1.0.0rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -2
  3. data/.ruby-version +1 -0
  4. data/CHANGELOG.md +20 -0
  5. data/CONTRIBUTING.md +2 -2
  6. data/Gemfile +1 -1
  7. data/README.md +6 -16
  8. data/build.sh +13 -1
  9. data/etc/flapjack_config.yaml.example +98 -12
  10. data/features/cli.feature +8 -8
  11. data/features/cli_flapjack-nagios-receiver.feature +29 -37
  12. data/features/cli_flapper.feature +24 -12
  13. data/features/cli_simulate-failed-check.feature +2 -2
  14. data/features/notifications.feature +18 -1
  15. data/features/steps/cli_steps.rb +2 -2
  16. data/features/steps/notifications_steps.rb +71 -0
  17. data/features/support/env.rb +7 -6
  18. data/flapjack.gemspec +3 -1
  19. data/lib/flapjack/cli/flapper.rb +74 -25
  20. data/lib/flapjack/cli/import.rb +3 -4
  21. data/lib/flapjack/cli/maintenance.rb +182 -0
  22. data/lib/flapjack/cli/receiver.rb +110 -121
  23. data/lib/flapjack/cli/server.rb +30 -26
  24. data/lib/flapjack/cli/simulate.rb +2 -3
  25. data/lib/flapjack/data/contact.rb +1 -1
  26. data/lib/flapjack/data/entity.rb +425 -32
  27. data/lib/flapjack/data/entity_check.rb +212 -14
  28. data/lib/flapjack/data/event.rb +1 -1
  29. data/lib/flapjack/gateways/aws_sns.rb +134 -0
  30. data/lib/flapjack/gateways/aws_sns/alert.text.erb +5 -0
  31. data/lib/flapjack/gateways/aws_sns/rollup.text.erb +2 -0
  32. data/lib/flapjack/gateways/jabber.rb +2 -2
  33. data/lib/flapjack/gateways/jsonapi/check_methods.rb +1 -1
  34. data/lib/flapjack/gateways/jsonapi/contact_methods.rb +1 -1
  35. data/lib/flapjack/gateways/jsonapi/entity_methods.rb +15 -1
  36. data/lib/flapjack/gateways/jsonapi/metrics_methods.rb +4 -3
  37. data/lib/flapjack/gateways/jsonapi/report_methods.rb +1 -1
  38. data/lib/flapjack/gateways/web.rb +35 -16
  39. data/lib/flapjack/gateways/web/public/css/tablesort.css +0 -16
  40. data/lib/flapjack/gateways/web/public/js/backbone.jsonapi.js +1 -1
  41. data/lib/flapjack/gateways/web/public/js/jquery.tablesorter.widgets.js +0 -45
  42. data/lib/flapjack/gateways/web/public/js/modules/contact.js +2 -2
  43. data/lib/flapjack/gateways/web/public/js/modules/entity.js +2 -2
  44. data/lib/flapjack/gateways/web/public/js/modules/medium.js +4 -4
  45. data/lib/flapjack/gateways/web/public/js/self_stats.js +1 -1
  46. data/lib/flapjack/gateways/web/views/check.html.erb +10 -10
  47. data/lib/flapjack/gateways/web/views/checks.html.erb +1 -1
  48. data/lib/flapjack/gateways/web/views/contact.html.erb +5 -1
  49. data/lib/flapjack/gateways/web/views/edit_contacts.html.erb +3 -4
  50. data/lib/flapjack/gateways/web/views/entities.html.erb +1 -1
  51. data/lib/flapjack/gateways/web/views/index.html.erb +2 -2
  52. data/lib/flapjack/gateways/web/views/layout.erb +3 -3
  53. data/lib/flapjack/gateways/web/views/self_stats.html.erb +5 -6
  54. data/lib/flapjack/notifier.rb +4 -1
  55. data/lib/flapjack/patches.rb +8 -2
  56. data/lib/flapjack/pikelet.rb +3 -1
  57. data/lib/flapjack/version.rb +1 -1
  58. data/libexec/httpbroker.go +1 -1
  59. data/spec/lib/flapjack/coordinator_spec.rb +3 -3
  60. data/spec/lib/flapjack/data/contact_spec.rb +2 -2
  61. data/spec/lib/flapjack/data/entity_check_spec.rb +805 -53
  62. data/spec/lib/flapjack/data/entity_spec.rb +661 -0
  63. data/spec/lib/flapjack/gateways/aws_sns_spec.rb +123 -0
  64. data/spec/lib/flapjack/gateways/jabber_spec.rb +1 -1
  65. data/spec/lib/flapjack/gateways/jsonapi/check_methods_spec.rb +1 -1
  66. data/spec/lib/flapjack/gateways/jsonapi/entity_methods_spec.rb +2 -2
  67. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +1 -1
  68. data/spec/lib/flapjack/gateways/web_spec.rb +11 -11
  69. data/spec/support/profile_all_formatter.rb +10 -10
  70. data/spec/support/uncolored_doc_formatter.rb +66 -4
  71. data/src/flapjack/event.go +1 -1
  72. data/tasks/benchmarks.rake +24 -20
  73. data/tasks/entities.rake +148 -0
  74. data/tmp/dummy_contacts.json +43 -0
  75. data/tmp/dummy_entities.json +37 -1
  76. metadata +43 -7
  77. data/tmp/test_entities.json +0 -1
@@ -23,14 +23,40 @@ module Flapjack
23
23
  def initialize(global_options, options)
24
24
  @global_options = global_options
25
25
  @options = options
26
+
27
+ @config = Flapjack::Configuration.new
28
+ @config.load(global_options[:config])
29
+ @config_env = @config.all
30
+
31
+ if @config_env.nil? || @config_env.empty?
32
+ exit_now! "No config data for environment '#{FLAPJACK_ENV}' found in '#{global_options[:config]}'"
33
+ end
34
+
35
+ @pidfile = case
36
+ when !@options[:pidfile].nil?
37
+ @options[:pidfile]
38
+ when !@config_env['pid_dir'].nil?
39
+ @config_env['pid_dir'] + 'flapper.pid'
40
+ else
41
+ "/var/run/flapjack/flapper.pid"
42
+ end
43
+
44
+ @logfile = case
45
+ when !@options[:logfile].nil?
46
+ @options[:logfile]
47
+ when !@config_env['log_dir'].nil?
48
+ @config_env['log_dir'] + 'flapper.log'
49
+ else
50
+ "/var/run/flapjack/flapper.log"
51
+ end
26
52
  end
27
53
 
28
54
  def start
29
55
  if runner.daemon_running?
30
56
  puts "flapper is already running."
31
- exit 1
32
57
  else
33
58
  print "flapper starting..."
59
+ print "\n" unless @options[:daemonize]
34
60
  runner.execute(:daemonize => @options[:daemonize]) do
35
61
  main(@options['bind-ip'], @options['bind-port'].to_i, @options[:frequency])
36
62
  end
@@ -39,14 +65,15 @@ module Flapjack
39
65
  end
40
66
 
41
67
  def stop
68
+ pid = get_pid
42
69
  if runner.daemon_running?
43
70
  print "flapper stopping..."
44
71
  runner.execute(:kill => true)
45
72
  puts " done."
46
73
  else
47
74
  puts "flapper is not running."
48
- exit 1
49
75
  end
76
+ exit_now! unless wait_pid_gone(pid)
50
77
  end
51
78
 
52
79
  def restart
@@ -58,12 +85,12 @@ module Flapjack
58
85
  end
59
86
 
60
87
  def status
61
- uptime = (runner.daemon_running?) ? (Time.now - File.stat(@options[:pidfile]).ctime) : 0
62
88
  if runner.daemon_running?
63
- puts "flapper is running: #{uptime}"
89
+ pid = get_pid
90
+ uptime = Time.now - File.stat(@pidfile).ctime
91
+ puts "flapper is running: pid #{pid}, uptime #{uptime}"
64
92
  else
65
- puts "flapper is not running"
66
- exit 3
93
+ exit_now! "flapper is not running"
67
94
  end
68
95
  end
69
96
 
@@ -79,8 +106,8 @@ module Flapjack
79
106
  def runner
80
107
  return @runner if @runner
81
108
 
82
- @runner = Dante::Runner.new('flapper', :pid_path => @options[:pidfile],
83
- :log_path => @options[:logfile])
109
+ @runner = Dante::Runner.new('flapper', :pid_path => @pidfile,
110
+ :log_path => @logfile)
84
111
  @runner
85
112
  end
86
113
 
@@ -109,12 +136,42 @@ module Flapjack
109
136
  end
110
137
  end
111
138
 
139
+ def process_exists(pid)
140
+ return unless pid
141
+ begin
142
+ Process.kill(0, pid)
143
+ return true
144
+ rescue Errno::ESRCH
145
+ return false
146
+ end
147
+ end
148
+
149
+ # wait until the specified pid no longer exists, or until a timeout is reached
150
+ def wait_pid_gone(pid, timeout = 30)
151
+ print "waiting for a max of #{timeout} seconds for process #{pid} to exit" if process_exists(pid)
152
+ started_at = Time.now.to_i
153
+ while process_exists(pid)
154
+ break unless (Time.now.to_i - started_at < timeout)
155
+ print '.'
156
+ sleep 1
157
+ end
158
+ puts ''
159
+ !process_exists(pid)
160
+ end
161
+
162
+ def get_pid
163
+ IO.read(@pidfile).chomp.to_i
164
+ rescue StandardError
165
+ pid = nil
166
+ end
167
+
168
+
112
169
  end
113
170
  end
114
171
  end
115
172
 
116
173
 
117
- desc 'Artificial service that oscillates up and down'
174
+ desc 'Artificial service that oscillates up and down, for use in http://flapjack.io/docs/1.0/usage/oobetet'
118
175
  command :flapper do |flapper|
119
176
 
120
177
  flapper.desc 'start flapper'
@@ -123,11 +180,9 @@ command :flapper do |flapper|
123
180
  start.switch [:d, 'daemonize'], :desc => 'Daemonize',
124
181
  :default_value => true
125
182
 
126
- start.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to',
127
- :default_value => "/var/run/flapjack/flapper.pid"
183
+ start.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to'
128
184
 
129
- start.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to',
130
- :default_value => "/var/log/flapjack/flapper.log"
185
+ start.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
131
186
 
132
187
  start.flag [:b, 'bind-ip'], :desc => 'ADDRESS (IPv4 or IPv6) for flapper to bind to',
133
188
  :default_value => Flapjack::CLI::Flapper.local_ip
@@ -147,11 +202,9 @@ command :flapper do |flapper|
147
202
  flapper.desc 'stop flapper'
148
203
  flapper.command :stop do |stop|
149
204
 
150
- stop.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to',
151
- :default_value => "/var/run/flapjack/flapper.pid"
205
+ stop.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to'
152
206
 
153
- stop.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to',
154
- :default_value => "/var/log/flapjack/flapper.log"
207
+ stop.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
155
208
 
156
209
  stop.action do |global_options, options, args|
157
210
  cli_flapper = Flapjack::CLI::Flapper.new(global_options, options)
@@ -162,11 +215,9 @@ command :flapper do |flapper|
162
215
  flapper.desc 'restart flapper'
163
216
  flapper.command :restart do |restart|
164
217
 
165
- restart.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to',
166
- :default_value => "/var/run/flapjack/flapper.pid"
218
+ restart.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to'
167
219
 
168
- restart.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to',
169
- :default_value => "/var/log/flapjack/flapper.log"
220
+ restart.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
170
221
 
171
222
  restart.flag [:b, 'bind-ip'], :desc => 'ADDRESS (IPv4 or IPv6) for flapper to bind to',
172
223
  :default_value => Flapjack::CLI::Flapper.local_ip
@@ -186,11 +237,9 @@ command :flapper do |flapper|
186
237
  flapper.desc 'flapper status'
187
238
  flapper.command :status do |status|
188
239
 
189
- status.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to',
190
- :default_value => "/var/run/flapjack/flapper.pid"
240
+ status.flag [:p, 'pidfile'], :desc => 'PATH of the pidfile to write to'
191
241
 
192
- status.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to',
193
- :default_value => "/var/log/flapjack/flapper.log"
242
+ status.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
194
243
 
195
244
  status.action do |global_options, options, args|
196
245
  cli_flapper = Flapjack::CLI::Flapper.new(global_options, options)
@@ -22,8 +22,7 @@ module Flapjack
22
22
  @config_env = config.all
23
23
 
24
24
  if @config_env.nil? || @config_env.empty?
25
- puts "No config data for environment '#{FLAPJACK_ENV}' found in '#{global_options[:config]}'"
26
- exit 1
25
+ exit_now! "No config data for environment '#{FLAPJACK_ENV}' found in '#{global_options[:config]}'"
27
26
  end
28
27
 
29
28
  @redis_options = config.for_redis
@@ -65,14 +64,14 @@ module Flapjack
65
64
  private
66
65
 
67
66
  def redis
68
- @redis ||= Redis.new(@redis_options)
67
+ @redis ||= Redis.new(@redis_options.merge(:driver => :ruby))
69
68
  end
70
69
 
71
70
  end
72
71
  end
73
72
  end
74
73
 
75
- desc 'Bulk import data from an external source'
74
+ desc 'Bulk import data from an external source, reading from JSON formatted data files'
76
75
  command :import do |import|
77
76
 
78
77
  import.desc 'Import contacts'
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'eventmachine'
4
+ require 'em-synchrony'
5
+ require 'redis'
6
+ require 'redis/connection/synchrony'
7
+
8
+ require 'flapjack/configuration'
9
+ require 'flapjack/data/event'
10
+ require 'flapjack/data/entity_check'
11
+ require 'terminal-table'
12
+
13
+ module Flapjack
14
+ module CLI
15
+ class Maintenance
16
+
17
+ def initialize(global_options, options)
18
+ @global_options = global_options
19
+ @options = options
20
+
21
+ config = Flapjack::Configuration.new
22
+ config.load(global_options[:config])
23
+ @config_env = config.all
24
+
25
+ if @config_env.nil? || @config_env.empty?
26
+ exit_now! "No config data for environment '#{FLAPJACK_ENV}' found in '#{global_options[:config]}'"
27
+ end
28
+
29
+ @redis_options = config.for_redis.merge(:driver => :ruby)
30
+ @options[:redis] = redis
31
+ end
32
+
33
+ def show
34
+ exit_now!("state must be one of 'ok', 'warning', 'critical', 'unknown'") unless @options[:state].nil? || %w(ok warning critical unknown).include?(@options[:state].downcase)
35
+ exit_now!("type must be one of 'scheduled', 'unscheduled'") unless %w(scheduled unscheduled).include?(@options[:type].downcase)
36
+ %w(started finishing).each do |time|
37
+ exit_now!("#{time.capitalize} time must start with 'more than', 'less than', 'on', 'before', 'after' or between") if @options[time] && !@options[time].downcase.start_with?('more than', 'less than', 'on', 'before', 'after', 'between')
38
+ end
39
+ @options[:finishing] ||= 'after now'
40
+ maintenances = Flapjack::Data::EntityCheck.find_maintenance(@options)
41
+ rows = []
42
+ maintenances.each do |m|
43
+ row = []
44
+ # Convert the unix timestamps of the start and end time back into readable times
45
+ m.each { |k, v| row.push(k.to_s.end_with?('time') ? Time.at(v) : v) }
46
+ rows.push(row)
47
+ end
48
+ puts Terminal::Table.new :headings => ['Entity', 'Check', 'State', 'Start', 'Duration (s)', 'Reason', 'End'], :rows => rows
49
+ maintenances
50
+ end
51
+
52
+ def delete
53
+ maintenances = show
54
+ exit_now!('The following maintenances would be deleted. Run this command again with --apply true to remove them.') unless @options[:apply]
55
+ errors = Flapjack::Data::EntityCheck.delete_maintenance(@options)
56
+ (errors.each { |k, v| puts "#{k}: #{v}" }; exit_now!('Failed to delete maintenances')) if errors.length > 0
57
+ puts "The maintenances above have been deleted"
58
+ end
59
+
60
+ def create
61
+ exit_now!("Entity & check must be supplied to create a maintenance period") if @options[:entity].nil? || @options[:check].nil?
62
+ errors = Flapjack::Data::EntityCheck.create_maintenance(@options)
63
+ (errors.each { |k, v| puts "#{k}: #{v}" }; exit_now!('Failed to create maintenances')) if errors.length > 0
64
+ puts "The maintenances specified have been created"
65
+ end
66
+
67
+ private
68
+
69
+ def redis
70
+ @redis ||= Redis.new(@redis_options)
71
+ end
72
+
73
+ end
74
+ end
75
+ end
76
+
77
+ desc 'Show, create and delete maintenance windows'
78
+ command :maintenance do |maintenance|
79
+
80
+
81
+ maintenance.desc 'Show maintenance windows according to criteria (default: all ongoing maintenance)'
82
+ maintenance.command :show do |show|
83
+
84
+ show.flag [:e, 'entity'],
85
+ :desc => 'The entity for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'db*\' or \'[[:lower:]]\''
86
+
87
+ show.flag [:c, 'check'],
88
+ :desc => 'The check for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'http*\' or \'[[:lower:]]\''
89
+
90
+ show.flag [:r, 'reason'],
91
+ :desc => 'The reason for the maintenance window to occur. This can be a string, or a ruby regex of the form \'Downtime for *\' or \'[[:lower:]]\''
92
+
93
+ show.flag [:s, 'start', 'started', 'starting'],
94
+ :desc => 'The start time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between times and time"'
95
+
96
+ show.flag [:d, 'duration'],
97
+ :desc => 'The total duration of the maintenance window. This should be prefixed with "more than", "less than", "before, "after" or "equal to", or or of the form "between 3 and 4 hours". This should be an interval'
98
+
99
+ show.flag [:f, 'finish', 'finished', 'finishing', 'remain', 'remained', 'remaining', 'end'],
100
+ :desc => 'The finishing time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between time and time"'
101
+
102
+ show.flag [:st, 'state'],
103
+ :desc => 'The state that the check is currently in'
104
+
105
+ show.flag [:t, 'type'],
106
+ :desc => 'The type of maintenance scheduled',
107
+ :default_value => 'scheduled'
108
+
109
+ show.action do |global_options,options,args|
110
+ maintenance = Flapjack::CLI::Maintenance.new(global_options, options)
111
+ maintenance.show
112
+ end
113
+ end
114
+
115
+ maintenance.desc 'Delete maintenance windows according to criteria (default: all ongoing maintenance)'
116
+ maintenance.command :delete do |delete|
117
+
118
+ delete.flag [:a, 'apply'],
119
+ :desc => 'Whether this deletion should occur',
120
+ :default_value => false
121
+
122
+ delete.flag [:e, 'entity'],
123
+ :desc => 'The entity for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'db*\' or \'[[:lower:]]\''
124
+
125
+ delete.flag [:c, 'check'],
126
+ :desc => 'The check for the maintenance window to occur on. This can be a string, or a ruby regex of the form \'http*\' or \'[[:lower:]]\''
127
+
128
+ delete.flag [:r, 'reason'],
129
+ :desc => 'The reason for the maintenance window to occur. This can be a string, or a ruby regex of the form \'Downtime for *\' or \'[[:lower:]]\''
130
+
131
+ delete.flag [:s, 'start', 'started', 'starting'],
132
+ :desc => 'The start time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between times and time"'
133
+
134
+ delete.flag [:d, 'duration'],
135
+ :desc => 'The total duration of the maintenance window. This should be prefixed with "more than", "less than", "before, "after" or "equal to", or or of the form "between 3 and 4 hours". This should be an interval'
136
+
137
+ delete.flag [:f, 'finish', 'finished', 'finishing', 'remain', 'remained', 'remaining', 'end'],
138
+ :desc => 'The finishing time for the maintenance window. This should be prefixed with "more than", "less than", "on", "before", or "after", or of the form "between time and time"'
139
+
140
+ delete.flag [:st, 'state'],
141
+ :desc => 'The state that the check is currently in'
142
+
143
+ delete.flag [:t, 'type'],
144
+ :desc => 'The type of maintenance scheduled',
145
+ :default_value => 'scheduled'
146
+
147
+ delete.action do |global_options,options,args|
148
+ maintenance = Flapjack::CLI::Maintenance.new(global_options, options)
149
+ maintenance.delete
150
+ end
151
+ end
152
+
153
+ maintenance.desc 'Create a maintenance window'
154
+ maintenance.command :create do |create|
155
+
156
+ create.flag [:e, 'entity'],
157
+ :desc => 'The entity for the maintenance window to occur on. This can be a comma separated list',
158
+ :type => Array
159
+
160
+ create.flag [:c, 'check'],
161
+ :desc => 'The check for the maintenance window to occur on. This can be a comma separated list',
162
+ :type => Array
163
+
164
+ create.flag [:r, 'reason'],
165
+ :desc => 'The reason for the maintenance window to occur'
166
+
167
+ create.flag [:s, 'start', 'started', 'starting'],
168
+ :desc => 'The start time for the maintenance window'
169
+
170
+ create.flag [:d, 'duration'],
171
+ :desc => 'The total duration of the maintenance window. This should be an interval'
172
+
173
+ create.flag [:t, 'type'],
174
+ :desc => 'The type of maintenance scheduled ("scheduled")',
175
+ :default_value => 'scheduled'
176
+
177
+ create.action do |global_options,options,args|
178
+ maintenance = Flapjack::CLI::Maintenance.new(global_options, options)
179
+ maintenance.create
180
+ end
181
+ end
182
+ end
@@ -20,123 +20,87 @@ module Flapjack
20
20
  @global_options = global_options
21
21
  @options = options
22
22
 
23
- config = Flapjack::Configuration.new
24
- config.load(global_options[:config])
25
- @config_env = config.all
23
+ @config = Flapjack::Configuration.new
24
+ @config.load(global_options[:config])
25
+ @config_env = @config.all
26
26
 
27
27
  if @config_env.nil? || @config_env.empty?
28
- puts "No config data for environment '#{FLAPJACK_ENV}' found in '#{global_options[:config]}'"
29
- exit 1
28
+ exit_now! "No config data for environment '#{FLAPJACK_ENV}' found in '#{global_options[:config]}'"
30
29
  end
31
30
 
32
- @redis_options = config.for_redis
33
- end
31
+ @config_runner = @config_env["#{@options[:type]}-receiver"] || {}
34
32
 
35
- # For nagios-receiver:
36
- #
37
- # nagios.cfg must contain the following perfdata templates for host and service data (modified from the default
38
- # to include hoststate / servicestate, and a fake service 'HOST' for hostperfdata, so that the
39
- # fields match up:
40
- #
41
- # host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\tHOST\t$HOSTSTATE$\t$HOSTEXECUTIONTIME$\t$HOSTLATENCY$\t$HOSTOUTPUT$\t$HOSTPERFDATA$
42
- #
43
- # service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$
44
- #
45
-
46
- def nagios_start
47
- if runner('nagios').daemon_running?
48
- puts "nagios-receiver is already running."
49
- exit 1
33
+ @pidfile = case
34
+ when !@options[:pidfile].nil?
35
+ @options[:pidfile]
36
+ when !@config_env['pid_dir'].nil?
37
+ @config_env['pid_dir'] + "#{@options[:type]}-receiver.pid"
50
38
  else
51
- print "nagios-receiver starting..."
52
- runner('nagios').execute(:daemonize => @options[:daemonize]) do
53
- begin
54
- main(:fifo => @options[:fifo], :nagios => true)
55
- rescue Exception => e
56
- p e.message
57
- puts e.backtrace.join("\n")
58
- end
59
- end
60
- puts " done."
39
+ "/var/run/flapjack/#{@options[:type]}-receiver.pid"
61
40
  end
62
- end
63
41
 
64
- def nagios_stop
65
- if runner('nagios').daemon_running?
66
- print "nagios-receiver stopping..."
67
- runner('nagios').execute(:kill => true)
68
- puts " done."
42
+ @logfile = case
43
+ when !@options[:logfile].nil?
44
+ @options[:logfile]
45
+ when !@config_env['log_dir'].nil?
46
+ @config_env['log_dir'] + "#{@options[:type]}-receiver.log"
69
47
  else
70
- puts "nagios-receiver is not running."
71
- exit 1
48
+ "/var/run/flapjack/#{@options[:type]}-receiver.log"
72
49
  end
73
- end
74
50
 
75
- def nagios_restart
76
- print "nagios-receiver restarting..."
77
- runner('nagios').execute(:daemonize => true, :restart => true) do
78
- main(:fifo => @options[:fifo], :nagios => true)
79
- end
80
- puts " done."
51
+ @redis_options = @config.for_redis
81
52
  end
82
53
 
83
- def nagios_status
84
- config_runner = @config_env["nagios-receiver"] || {}
85
- pidfile = @options[:pidfile] || config_runner['pid_file'] ||
86
- "/var/run/flapjack/nagios-receiver.pid"
87
- uptime = (runner('nagios').daemon_running?) ? (Time.now - File.stat(pidfile).ctime) : 0
88
- if runner('nagios').daemon_running?
89
- puts "nagios-receiver is running: #{uptime}"
54
+ def start
55
+ if runner(@options[:type]).daemon_running?
56
+ puts "#{@options[:type]}-receiver is already running."
90
57
  else
91
- puts "nagios-receiver is not running"
92
- exit 3
93
- end
94
- end
95
-
96
- def nsca_start
97
- if runner('nsca').daemon_running?
98
- puts "nsca-receiver is already running."
99
- exit 1
100
- else
101
- print "nsca-receiver starting..."
102
- runner('nsca').execute(:daemonize => @options[:daemonize]) do
103
- main(:fifo => @options[:fifo], :nsca => true)
58
+ print "#{@options[:type]}-receiver starting..."
59
+ print "\n" unless @options[:daemonize]
60
+ runner(@options[:type]).execute(:daemonize => @options[:daemonize]) do
61
+ begin
62
+ main(:fifo => @options[:fifo], :type => @options[:type])
63
+ rescue Exception => e
64
+ p e.message
65
+ puts e.backtrace.join("\n")
66
+ end
104
67
  end
105
68
  puts " done."
106
69
  end
107
70
  end
108
71
 
109
- def nsca_stop
110
- if runner('nsca').daemon_running?
111
- print "nsca-receiver stopping..."
112
- runner('nsca').execute(:kill => true)
72
+ def stop
73
+ pid = get_pid
74
+ if runner(@options[:type]).daemon_running?
75
+ print "#{@options[:type]}-receiver stopping..."
76
+ runner(@options[:type]).execute(:kill => true)
113
77
  puts " done."
114
78
  else
115
- puts "nsca-receiver is not running."
116
- exit 1
79
+ puts "#{@options[:type]}-receiver is not running."
117
80
  end
81
+ exit_now! unless wait_pid_gone(pid)
118
82
  end
119
83
 
120
- def nsca_restart
121
- print "nsca-receiver restarting..."
122
- runner('nsca').execute(:daemonize => true, :restart => true) do
123
- main(:fifo => @options[:fifo], :nsca => true)
84
+ def restart
85
+ print "#{@options[:type]}-receiver restarting..."
86
+ runner(@options[:type]).execute(:daemonize => true, :restart => true) do
87
+ begin
88
+ main(:fifo => @options[:fifo], :type => @options[:type])
89
+ rescue Exception => e
90
+ p e.message
91
+ puts e.backtrace.join("\n")
92
+ end
124
93
  end
125
94
  puts " done."
126
95
  end
127
96
 
128
- def nsca_status
129
- config_runner = @config_env["nsca-receiver"] || {}
130
-
131
- pidfile = @options[:pidfile] || config_runner['pid_file'] ||
132
- "/var/run/flapjack/nsca-receiver.pid"
133
-
134
- uptime = (runner('nsca').daemon_running?) ? (Time.now - File.stat(pidfile).ctime) : 0
135
- if runner('nsca').daemon_running?
136
- puts "nsca-receiver is running: #{uptime}"
97
+ def status
98
+ if runner(@options[:type]).daemon_running?
99
+ pid = get_pid
100
+ uptime = Time.now - File.stat(@pidfile).ctime
101
+ puts "#{@options[:type]}-receiver is running: pid #{pid}, uptime #{uptime}"
137
102
  else
138
- puts "nsca-receiver is not running"
139
- exit 3
103
+ exit_now! "#{@options[:type]}-receiver is not running"
140
104
  end
141
105
  end
142
106
 
@@ -159,25 +123,16 @@ module Flapjack
159
123
  def runner(type)
160
124
  return @runner if @runner
161
125
 
162
- config_runner = @config_env["#{type}-receiver"] || {}
163
-
164
- pidfile = @options[:pidfile].nil? ?
165
- (config_runner['pid_file'] || "/var/run/flapjack/#{type}-receiver.pid") :
166
- @options[:pidfile]
167
-
168
- logfile = @options[:logfile].nil? ?
169
- (config_runner['log_file'] || "/var/log/flapjack/#{type}-receiver.log") :
170
- @options[:logfile]
171
-
172
- @runner = Dante::Runner.new("#{type}-receiver", :pid_path => pidfile,
173
- :log_path => logfile)
126
+ @runner = Dante::Runner.new("#{@options[:type]}-receiver", :pid_path => @pidfile,
127
+ :log_path => @logfile)
174
128
  @runner
175
129
  end
176
130
 
177
131
  def process_input(opts)
178
- config_rec = if opts[:nagios]
132
+ config_rec = case opts[:type]
133
+ when /nagios/
179
134
  @config_env['nagios-receiver'] || {}
180
- elsif opts[:nsca]
135
+ when /nsca/
181
136
  @config_env['nsca-receiver'] || {}
182
137
  else
183
138
  raise "Unknown receiver type"
@@ -204,8 +159,8 @@ module Flapjack
204
159
  check_latency, check_output, check_perfdata, check_long_output =
205
160
  [nil] * 10
206
161
 
207
- if opts[:nagios]
208
-
162
+ case opts[:type]
163
+ when /nagios/
209
164
  object_type, timestamp, entity, check, state, check_time,
210
165
  check_latency, check_output, check_perfdata, check_long_output = split_line
211
166
 
@@ -221,7 +176,7 @@ module Flapjack
221
176
  next
222
177
  end
223
178
 
224
- elsif opts[:nsca]
179
+ when /nsca/
225
180
 
226
181
  timestamp, passivecheck = split_line
227
182
  split_passive = passivecheck.split(";")
@@ -269,12 +224,40 @@ module Flapjack
269
224
  def main(opts)
270
225
  fifo = opts[:fifo]
271
226
  while true
272
- process_input(:fifo => fifo, :nagios => opts[:nagios], :nsca => opts[:nsca])
227
+ process_input(:fifo => fifo, :type => opts[:type])
273
228
  puts "Whoops with the fifo, restarting main loop in 10 seconds"
274
229
  sleep 10
275
230
  end
276
231
  end
277
232
 
233
+ def process_exists(pid)
234
+ return unless pid
235
+ begin
236
+ Process.kill(0, pid)
237
+ return true
238
+ rescue Errno::ESRCH
239
+ return false
240
+ end
241
+ end
242
+
243
+ # wait until the specified pid no longer exists, or until a timeout is reached
244
+ def wait_pid_gone(pid, timeout = 30)
245
+ print "waiting for a max of #{timeout} seconds for process #{pid} to exit" if process_exists(pid)
246
+ started_at = Time.now.to_i
247
+ while process_exists(pid)
248
+ break unless (Time.now.to_i - started_at < timeout)
249
+ print '.'
250
+ sleep 1
251
+ end
252
+ puts ''
253
+ !process_exists(pid)
254
+ end
255
+
256
+ def get_pid
257
+ IO.read(@pidfile).chomp.to_i
258
+ rescue StandardError
259
+ pid = nil
260
+ end
278
261
 
279
262
  class EventFeedHandler < Oj::ScHandler
280
263
 
@@ -319,8 +302,7 @@ module Flapjack
319
302
  input = if opts[:from]
320
303
  File.open(opts[:from]) # Explodes if file does not exist.
321
304
  elsif $stdin.tty?
322
- puts "No file provided, and STDIN is from terminal! Exiting..."
323
- exit(1)
305
+ exit_now! "No file provided, and STDIN is from terminal! Exiting..."
324
306
  else
325
307
  $stdin
326
308
  end
@@ -349,8 +331,7 @@ module Flapjack
349
331
 
350
332
  def mirror_receive(opts)
351
333
  unless opts[:follow] || opts[:all]
352
- puts "one or both of --follow or --all is required"
353
- exit 1
334
+ exit_now! "one or both of --follow or --all is required"
354
335
  end
355
336
 
356
337
  source_redis = Redis.new(:url => opts[:source])
@@ -422,7 +403,7 @@ module Flapjack
422
403
  end
423
404
  end
424
405
 
425
- desc 'Receive events from external systems and send them to Flapjack'
406
+ desc 'Receive events from external systems and sends them to Flapjack'
426
407
  arg_name 'receiver'
427
408
  command :receiver do |receiver|
428
409
 
@@ -453,7 +434,7 @@ command :receiver do |receiver|
453
434
  # host_perfdata_file_mode=p
454
435
  # service_perfdata_file_mode=p
455
436
 
456
- # Details on the wiki: https://github.com/flapjack/flapjack/wiki/USING#configuring-nagios
437
+ # Details on the wiki: http://flapjack.io/docs/1.0/usage/USING#configuring-nagios
457
438
  # '
458
439
 
459
440
  nagios.command :start do |start|
@@ -468,8 +449,9 @@ command :receiver do |receiver|
468
449
  start.flag [:f, 'fifo'], :desc => 'PATH of the nagios perfdata named pipe'
469
450
 
470
451
  start.action do |global_options,options,args|
452
+ options.merge!(:type => 'nagios')
471
453
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
472
- receiver.nagios_start
454
+ receiver.start
473
455
  end
474
456
  end
475
457
 
@@ -480,8 +462,9 @@ command :receiver do |receiver|
480
462
  stop.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
481
463
 
482
464
  stop.action do |global_options,options,args|
465
+ options.merge!(:type => 'nagios')
483
466
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
484
- receiver.nagios_stop
467
+ receiver.stop
485
468
  end
486
469
  end
487
470
 
@@ -494,8 +477,9 @@ command :receiver do |receiver|
494
477
  restart.flag [:f, 'fifo'], :desc => 'PATH of the nagios perfdata named pipe'
495
478
 
496
479
  restart.action do |global_options,options,args|
480
+ options.merge!(:type => 'nagios')
497
481
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
498
- receiver.nagios_restart
482
+ receiver.restart
499
483
  end
500
484
  end
501
485
 
@@ -506,8 +490,9 @@ command :receiver do |receiver|
506
490
  status.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
507
491
 
508
492
  status.action do |global_options,options,args|
493
+ options.merge!(:type => 'nagios')
509
494
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
510
- receiver.nagios_status
495
+ receiver.status
511
496
  end
512
497
  end
513
498
 
@@ -535,7 +520,7 @@ command :receiver do |receiver|
535
520
  # The Nsca daemon is optionally writing to a tempfile if the named pipe does
536
521
  # not exist.
537
522
 
538
- # Details on the wiki: https://github.com/flapjack/flapjack/wiki/USING#XXX
523
+ # Details on the wiki: http://flapjack.io/docs/1.0/usage/USING#XXX
539
524
  # '
540
525
 
541
526
  start.switch [:d, 'daemonize'], :desc => 'Daemonize',
@@ -548,8 +533,9 @@ command :receiver do |receiver|
548
533
  start.flag [:f, 'fifo'], :desc => 'PATH of the nagios perfdata named pipe'
549
534
 
550
535
  start.action do |global_options,options,args|
536
+ options.merge!(:type => 'nsca')
551
537
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
552
- receiver.nsca_start
538
+ receiver.start
553
539
  end
554
540
  end
555
541
 
@@ -560,8 +546,9 @@ command :receiver do |receiver|
560
546
  stop.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
561
547
 
562
548
  stop.action do |global_options,options,args|
549
+ options.merge!(:type => 'nsca')
563
550
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
564
- receiver.nsca_stop
551
+ receiver.stop
565
552
  end
566
553
  end
567
554
 
@@ -574,8 +561,9 @@ command :receiver do |receiver|
574
561
  restart.flag [:f, 'fifo'], :desc => 'PATH of the nagios perfdata named pipe'
575
562
 
576
563
  restart.action do |global_options,options,args|
564
+ options.merge!(:type => 'nsca')
577
565
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
578
- receiver.nsca_restart
566
+ receiver.restart
579
567
  end
580
568
  end
581
569
 
@@ -586,8 +574,9 @@ command :receiver do |receiver|
586
574
  status.flag [:l, 'logfile'], :desc => 'PATH of the logfile to write to'
587
575
 
588
576
  status.action do |global_options,options,args|
577
+ options.merge!(:type => 'nsca')
589
578
  receiver = Flapjack::CLI::Receiver.new(global_options, options)
590
- receiver.nsca_status
579
+ receiver.status
591
580
  end
592
581
  end
593
582