mcproc 2016.2.20

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
data/lib/god/task.rb ADDED
@@ -0,0 +1,552 @@
1
+ module God
2
+
3
+ class Task
4
+ # Public: Gets/Sets the String name of the task.
5
+ attr_accessor :name
6
+
7
+ # Public: Gets/Sets the Numeric default interval to be used between poll
8
+ # events.
9
+ attr_accessor :interval
10
+
11
+ # Public: Gets/Sets the String group name of the task.
12
+ attr_accessor :group
13
+
14
+ # Public: Gets/Sets the Array of Symbol valid states for the state machine.
15
+ attr_accessor :valid_states
16
+
17
+ # Public: Gets/Sets the Symbol initial state of the state machine.
18
+ attr_accessor :initial_state
19
+
20
+ # Gets/Sets the Driver for this task.
21
+ attr_accessor :driver
22
+
23
+ # Public: Sets whether the task should autostart when god starts. Defaults
24
+ # to true (enabled).
25
+ attr_writer :autostart
26
+
27
+ # Returns true if autostart is enabled, false if not.
28
+ def autostart?
29
+ @autostart
30
+ end
31
+
32
+ # api
33
+ attr_accessor :state, :behaviors, :metrics, :directory
34
+
35
+ def initialize
36
+ @autostart ||= true
37
+
38
+ # initial state is unmonitored
39
+ self.state = :unmonitored
40
+
41
+ # the list of behaviors
42
+ self.behaviors = []
43
+
44
+ # the list of conditions for each action
45
+ self.metrics = {nil => [], :unmonitored => [], :stop => []}
46
+
47
+ # the condition -> metric lookup
48
+ self.directory = {}
49
+
50
+ # driver
51
+ self.driver = Driver.new(self)
52
+ end
53
+
54
+ # Initialize the metrics to an empty state.
55
+ #
56
+ # Returns nothing.
57
+ def prepare
58
+ self.valid_states.each do |state|
59
+ self.metrics[state] ||= []
60
+ end
61
+ end
62
+
63
+ # Verify that the minimum set of configuration requirements has been met.
64
+ #
65
+ # Returns true if valid, false if not.
66
+ def valid?
67
+ valid = true
68
+
69
+ # A name must be specified.
70
+ if self.name.nil?
71
+ valid = false
72
+ applog(self, :error, "No name String was specified.")
73
+ end
74
+
75
+ # Valid states must be specified.
76
+ if self.valid_states.nil?
77
+ valid = false
78
+ applog(self, :error, "No valid_states Array or Symbols was specified.")
79
+ end
80
+
81
+ # An initial state must be specified.
82
+ if self.initial_state.nil?
83
+ valid = false
84
+ applog(self, :error, "No initial_state Symbol was specified.")
85
+ end
86
+
87
+ valid
88
+ end
89
+
90
+ ###########################################################################
91
+ #
92
+ # Advanced mode
93
+ #
94
+ ###########################################################################
95
+
96
+ # Convert the given input into canonical hash form which looks like:
97
+ #
98
+ # { true => :state } or { true => :state, false => :otherstate }
99
+ #
100
+ # to - The Symbol or Hash destination.
101
+ #
102
+ # Returns the canonical Hash.
103
+ def canonical_hash_form(to)
104
+ to.instance_of?(Symbol) ? {true => to} : to
105
+ end
106
+
107
+ # Public: Define a transition handler which consists of a set of conditions
108
+ #
109
+ # start_states - The Symbol or Array of Symbols start state(s).
110
+ # end_states - The Symbol or Hash end states.
111
+ #
112
+ # Yields the Metric for this transition.
113
+ #
114
+ # Returns nothing.
115
+ def transition(start_states, end_states)
116
+ # Convert end_states into canonical hash form.
117
+ canonical_end_states = canonical_hash_form(end_states)
118
+
119
+ Array(start_states).each do |start_state|
120
+ # Validate start state.
121
+ unless self.valid_states.include?(start_state)
122
+ abort "Invalid state :#{start_state}. Must be one of the symbols #{self.valid_states.map{|x| ":#{x}"}.join(', ')}"
123
+ end
124
+
125
+ # Create a new metric to hold the task, end states, and conditions.
126
+ m = Metric.new(self, canonical_end_states)
127
+
128
+ if block_given?
129
+ # Let the config file define some conditions on the metric.
130
+ yield(m)
131
+ else
132
+ # Add an :always condition if no block was given.
133
+ m.condition(:always) do |c|
134
+ c.what = true
135
+ end
136
+ end
137
+
138
+ # Populate the condition -> metric directory.
139
+ m.conditions.each do |c|
140
+ self.directory[c] = m
141
+ end
142
+
143
+ # Record the metric.
144
+ self.metrics[start_state] ||= []
145
+ self.metrics[start_state] << m
146
+ end
147
+ end
148
+
149
+ # Public: Define a lifecycle handler. Conditions that belong to a
150
+ # lifecycle are active as long as the process is being monitored.
151
+ #
152
+ # Returns nothing.
153
+ def lifecycle
154
+ # Create a new metric to hold the task and conditions.
155
+ m = Metric.new(self)
156
+
157
+ # Let the config file define some conditions on the metric.
158
+ yield(m)
159
+
160
+ # Populate the condition -> metric directory.
161
+ m.conditions.each do |c|
162
+ self.directory[c] = m
163
+ end
164
+
165
+ # Record the metric.
166
+ self.metrics[nil] << m
167
+ end
168
+
169
+ ###########################################################################
170
+ #
171
+ # Lifecycle
172
+ #
173
+ ###########################################################################
174
+
175
+ # Enable monitoring.
176
+ #
177
+ # Returns nothing.
178
+ def monitor
179
+ self.move(self.initial_state)
180
+ end
181
+
182
+ # Disable monitoring.
183
+ #
184
+ # Returns nothing.
185
+ def unmonitor
186
+ self.move(:unmonitored)
187
+ end
188
+
189
+ # Move to the given state.
190
+ #
191
+ # to_state - The Symbol representing the state to move to.
192
+ #
193
+ # Returns this Task.
194
+ def move(to_state)
195
+ if !self.driver.in_driver_context?
196
+ # Called from outside Driver. Send an async message to Driver.
197
+ self.driver.message(:move, [to_state])
198
+ else
199
+ # Called from within Driver. Record original info.
200
+ orig_to_state = to_state
201
+ from_state = self.state
202
+
203
+ # Log.
204
+ msg = "#{self.name} move '#{from_state}' to '#{to_state}'"
205
+ applog(self, :info, msg)
206
+
207
+ # Cleanup from current state.
208
+ self.driver.clear_events
209
+ self.metrics[from_state].each { |m| m.disable }
210
+ if to_state == :unmonitored
211
+ self.metrics[nil].each { |m| m.disable }
212
+ end
213
+
214
+ # Perform action.
215
+ self.action(to_state)
216
+
217
+ # Enable simple mode.
218
+ if [:start, :restart].include?(to_state) && self.metrics[to_state].empty?
219
+ to_state = :up
220
+ end
221
+
222
+ # Move to new state.
223
+ self.metrics[to_state].each { |m| m.enable }
224
+
225
+ # If no from state, enable lifecycle metric.
226
+ if from_state == :unmonitored
227
+ self.metrics[nil].each { |m| m.enable }
228
+ end
229
+
230
+ # Set state.
231
+ self.state = to_state
232
+
233
+ # Broadcast to interested TriggerConditions.
234
+ Trigger.broadcast(self, :state_change, [from_state, orig_to_state])
235
+
236
+ # Log.
237
+ msg = "#{self.name} moved '#{from_state}' to '#{to_state}'"
238
+ applog(self, :info, msg)
239
+ end
240
+
241
+ self
242
+ end
243
+
244
+ # Notify the Driver that an EventCondition has triggered.
245
+ #
246
+ # condition - The Condition.
247
+ #
248
+ # Returns nothing.
249
+ def trigger(condition)
250
+ self.driver.message(:handle_event, [condition])
251
+ end
252
+
253
+ def signal(sig)
254
+ # noop
255
+ end
256
+
257
+ ###########################################################################
258
+ #
259
+ # Actions
260
+ #
261
+ ###########################################################################
262
+
263
+ def method_missing(sym, *args)
264
+ unless (sym.to_s =~ /=$/)
265
+ super
266
+ end
267
+
268
+ base = sym.to_s.chop.intern
269
+
270
+ unless self.valid_states.include?(base)
271
+ super
272
+ end
273
+
274
+ self.class.send(:attr_accessor, base)
275
+ self.send(sym, *args)
276
+ end
277
+
278
+ # Perform the given action.
279
+ #
280
+ # a - The Symbol action.
281
+ # c - The Condition.
282
+ #
283
+ # Returns this Task.
284
+ def action(a, c = nil)
285
+ if !self.driver.in_driver_context?
286
+ # Called from outside Driver. Send an async message to Driver.
287
+ self.driver.message(:action, [a, c])
288
+ else
289
+ # Called from within Driver.
290
+ if self.respond_to?(a)
291
+ command = self.send(a)
292
+
293
+ case command
294
+ when String
295
+ msg = "#{self.name} #{a}: #{command}"
296
+ applog(self, :info, msg)
297
+
298
+ system(command)
299
+ when Proc
300
+ msg = "#{self.name} #{a}: lambda"
301
+ applog(self, :info, msg)
302
+
303
+ command.call
304
+ else
305
+ raise NotImplementedError
306
+ end
307
+ end
308
+ end
309
+ end
310
+
311
+ ###########################################################################
312
+ #
313
+ # Events
314
+ #
315
+ ###########################################################################
316
+
317
+ def attach(condition)
318
+ case condition
319
+ when PollCondition
320
+ self.driver.schedule(condition, 0)
321
+ when EventCondition, TriggerCondition
322
+ condition.register
323
+ end
324
+ end
325
+
326
+ def detach(condition)
327
+ case condition
328
+ when PollCondition
329
+ condition.reset
330
+ when EventCondition, TriggerCondition
331
+ condition.deregister
332
+ end
333
+ end
334
+
335
+ ###########################################################################
336
+ #
337
+ # Registration
338
+ #
339
+ ###########################################################################
340
+
341
+ def register!
342
+ # override if necessary
343
+ end
344
+
345
+ def unregister!
346
+ driver.shutdown
347
+ end
348
+
349
+ ###########################################################################
350
+ #
351
+ # Handlers
352
+ #
353
+ ###########################################################################
354
+
355
+ # Evaluate and handle the given poll condition. Handles logging
356
+ # notifications, and moving to the new state if necessary.
357
+ #
358
+ # condition - The Condition to handle.
359
+ #
360
+ # Returns nothing.
361
+ def handle_poll(condition)
362
+ # Lookup metric.
363
+ metric = self.directory[condition]
364
+
365
+ # Run the test.
366
+ begin
367
+ result = condition.test
368
+ rescue Object => e
369
+ cname = condition.class.to_s.split('::').last
370
+ message = format("Unhandled exception in %s condition - (%s): %s\n%s",
371
+ cname, e.class, e.message, e.backtrace.join("\n"))
372
+ applog(self, :error, message)
373
+ result = false
374
+ end
375
+
376
+ # Log.
377
+ messages = self.log_line(self, metric, condition, result)
378
+
379
+ # Notify.
380
+ if result && condition.notify
381
+ self.notify(condition, messages.last)
382
+ end
383
+
384
+ # After-condition.
385
+ condition.after
386
+
387
+ # Get the destination.
388
+ dest =
389
+ if result && condition.transition
390
+ # Condition override.
391
+ condition.transition
392
+ else
393
+ # Regular.
394
+ metric.destination && metric.destination[result]
395
+ end
396
+
397
+ # Transition or reschedule.
398
+ if dest
399
+ # Transition.
400
+ begin
401
+ self.move(dest)
402
+ rescue EventRegistrationFailedError
403
+ msg = self.name + ' Event registration failed, moving back to previous state'
404
+ applog(self, :info, msg)
405
+
406
+ dest = self.state
407
+ retry
408
+ end
409
+ else
410
+ # Reschedule.
411
+ self.driver.schedule(condition)
412
+ end
413
+ end
414
+
415
+ # Asynchronously evaluate and handle the given event condition. Handles
416
+ # logging notifications, and moving to the new state if necessary.
417
+ #
418
+ # condition - The Condition to handle.
419
+ #
420
+ # Returns nothing.
421
+ def handle_event(condition)
422
+ # Lookup metric.
423
+ metric = self.directory[condition]
424
+
425
+ # Log.
426
+ messages = self.log_line(self, metric, condition, true)
427
+
428
+ # Notify.
429
+ if condition.notify
430
+ self.notify(condition, messages.last)
431
+ end
432
+
433
+ # Get the destination.
434
+ dest =
435
+ if condition.transition
436
+ # Condition override.
437
+ condition.transition
438
+ else
439
+ # Regular.
440
+ metric.destination && metric.destination[true]
441
+ end
442
+
443
+ if dest
444
+ self.move(dest)
445
+ end
446
+ end
447
+
448
+ # Determine whether a trigger happened.
449
+ #
450
+ # metric - The Metric.
451
+ # result - The Boolean result from the condition's test.
452
+ #
453
+ # Returns Boolean
454
+ def trigger?(metric, result)
455
+ metric.destination && metric.destination[result]
456
+ end
457
+
458
+ # Log info about the condition and return the list of messages logged.
459
+ #
460
+ # watch - The Watch.
461
+ # metric - The Metric.
462
+ # condition - The Condition.
463
+ # result - The Boolean result of the condition test evaluation.
464
+ #
465
+ # Returns the Array of String messages.
466
+ def log_line(watch, metric, condition, result)
467
+ status =
468
+ if self.trigger?(metric, result)
469
+ "[trigger]"
470
+ else
471
+ "[ok]"
472
+ end
473
+
474
+ messages = []
475
+
476
+ # Log info if available.
477
+ if condition.info
478
+ Array(condition.info).each do |condition_info|
479
+ messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
480
+ applog(watch, :info, messages.last)
481
+ end
482
+ else
483
+ messages << "#{watch.name} #{status} (#{condition.base_name})"
484
+ applog(watch, :info, messages.last)
485
+ end
486
+
487
+ # Log.
488
+ debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
489
+ applog(watch, :debug, debug_message)
490
+
491
+ messages
492
+ end
493
+
494
+ # Format the destination specification for use in debug logging.
495
+ #
496
+ # metric - The Metric.
497
+ # condition - The Condition.
498
+ #
499
+ # Returns the formatted String.
500
+ def dest_desc(metric, condition)
501
+ if condition.transition
502
+ {true => condition.transition}.inspect
503
+ else
504
+ if metric.destination
505
+ metric.destination.inspect
506
+ else
507
+ 'none'
508
+ end
509
+ end
510
+ end
511
+
512
+ # Notify all recipients of the given condition with the specified message.
513
+ #
514
+ # condition - The Condition.
515
+ # message - The String message to send.
516
+ #
517
+ # Returns nothing.
518
+ def notify(condition, message)
519
+ spec = Contact.normalize(condition.notify)
520
+ unmatched = []
521
+
522
+ # Resolve contacts.
523
+ resolved_contacts =
524
+ spec[:contacts].inject([]) do |acc, contact_name_or_group|
525
+ cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
526
+ unmatched << contact_name_or_group if cons.empty?
527
+ acc += cons
528
+ acc
529
+ end
530
+
531
+ # Warn about unmatched contacts.
532
+ unless unmatched.empty?
533
+ msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
534
+ applog(condition.watch, :warn, msg)
535
+ end
536
+
537
+ # Notify each contact.
538
+ resolved_contacts.each do |c|
539
+ host = `hostname`.chomp rescue 'none'
540
+ begin
541
+ c.notify(message, Time.now, spec[:priority], spec[:category], host)
542
+ msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
543
+ applog(condition.watch, :info, msg % [])
544
+ rescue Exception => e
545
+ applog(condition.watch, :error, "#{e.message} #{e.backtrace}")
546
+ msg = "#{condition.watch.name} Failed to deliver notification for contact: #{c.name} (#{c.base_name})"
547
+ applog(condition.watch, :error, msg % [])
548
+ end
549
+ end
550
+ end
551
+ end
552
+ end