firenxis-god 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. data/Announce.txt +135 -0
  2. data/History.txt +393 -0
  3. data/README.txt +59 -0
  4. data/Rakefile +142 -0
  5. data/bin/god +132 -0
  6. data/ext/god/.gitignore +5 -0
  7. data/ext/god/extconf.rb +55 -0
  8. data/ext/god/kqueue_handler.c +125 -0
  9. data/ext/god/netlink_handler.c +168 -0
  10. data/god.gemspec +164 -0
  11. data/lib/god.rb +701 -0
  12. data/lib/god/behavior.rb +52 -0
  13. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  14. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  15. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  16. data/lib/god/cli/command.rb +256 -0
  17. data/lib/god/cli/run.rb +172 -0
  18. data/lib/god/cli/version.rb +23 -0
  19. data/lib/god/compat19.rb +36 -0
  20. data/lib/god/condition.rb +96 -0
  21. data/lib/god/conditions/always.rb +23 -0
  22. data/lib/god/conditions/complex.rb +86 -0
  23. data/lib/god/conditions/cpu_usage.rb +80 -0
  24. data/lib/god/conditions/degrading_lambda.rb +52 -0
  25. data/lib/god/conditions/disk_usage.rb +32 -0
  26. data/lib/god/conditions/file_mtime.rb +28 -0
  27. data/lib/god/conditions/flapping.rb +128 -0
  28. data/lib/god/conditions/http_response_code.rb +168 -0
  29. data/lib/god/conditions/lambda.rb +25 -0
  30. data/lib/god/conditions/memory_usage.rb +82 -0
  31. data/lib/god/conditions/process_exits.rb +72 -0
  32. data/lib/god/conditions/process_running.rb +74 -0
  33. data/lib/god/conditions/tries.rb +44 -0
  34. data/lib/god/configurable.rb +57 -0
  35. data/lib/god/contact.rb +114 -0
  36. data/lib/god/contacts/campfire.rb +121 -0
  37. data/lib/god/contacts/email.rb +136 -0
  38. data/lib/god/contacts/jabber.rb +75 -0
  39. data/lib/god/contacts/prowl.rb +57 -0
  40. data/lib/god/contacts/scout.rb +55 -0
  41. data/lib/god/contacts/twitter.rb +51 -0
  42. data/lib/god/contacts/webhook.rb +73 -0
  43. data/lib/god/dependency_graph.rb +41 -0
  44. data/lib/god/diagnostics.rb +37 -0
  45. data/lib/god/driver.rb +206 -0
  46. data/lib/god/errors.rb +24 -0
  47. data/lib/god/event_handler.rb +108 -0
  48. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  49. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  50. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  51. data/lib/god/logger.rb +109 -0
  52. data/lib/god/metric.rb +59 -0
  53. data/lib/god/process.rb +363 -0
  54. data/lib/god/registry.rb +32 -0
  55. data/lib/god/simple_logger.rb +59 -0
  56. data/lib/god/socket.rb +107 -0
  57. data/lib/god/sugar.rb +47 -0
  58. data/lib/god/sys_logger.rb +45 -0
  59. data/lib/god/system/portable_poller.rb +42 -0
  60. data/lib/god/system/process.rb +50 -0
  61. data/lib/god/system/slash_proc_poller.rb +92 -0
  62. data/lib/god/task.rb +503 -0
  63. data/lib/god/timeline.rb +25 -0
  64. data/lib/god/trigger.rb +43 -0
  65. data/lib/god/watch.rb +188 -0
  66. data/test/configs/child_events/child_events.god +44 -0
  67. data/test/configs/child_events/simple_server.rb +3 -0
  68. data/test/configs/child_polls/child_polls.god +37 -0
  69. data/test/configs/child_polls/simple_server.rb +12 -0
  70. data/test/configs/complex/complex.god +59 -0
  71. data/test/configs/complex/simple_server.rb +3 -0
  72. data/test/configs/contact/contact.god +108 -0
  73. data/test/configs/contact/simple_server.rb +3 -0
  74. data/test/configs/daemon_events/daemon_events.god +37 -0
  75. data/test/configs/daemon_events/simple_server.rb +8 -0
  76. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  77. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  78. data/test/configs/daemon_polls/simple_server.rb +6 -0
  79. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  80. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  81. data/test/configs/lifecycle/lifecycle.god +25 -0
  82. data/test/configs/matias/matias.god +50 -0
  83. data/test/configs/real.rb +59 -0
  84. data/test/configs/running_load/running_load.god +16 -0
  85. data/test/configs/stop_options/simple_server.rb +12 -0
  86. data/test/configs/stop_options/stop_options.god +39 -0
  87. data/test/configs/stress/simple_server.rb +3 -0
  88. data/test/configs/stress/stress.god +15 -0
  89. data/test/configs/task/logs/.placeholder +0 -0
  90. data/test/configs/task/task.god +26 -0
  91. data/test/configs/test.rb +61 -0
  92. data/test/helper.rb +141 -0
  93. data/test/suite.rb +6 -0
  94. data/test/test_behavior.rb +18 -0
  95. data/test/test_campfire.rb +23 -0
  96. data/test/test_condition.rb +50 -0
  97. data/test/test_conditions_disk_usage.rb +50 -0
  98. data/test/test_conditions_http_response_code.rb +109 -0
  99. data/test/test_conditions_process_running.rb +40 -0
  100. data/test/test_conditions_tries.rb +67 -0
  101. data/test/test_contact.rb +109 -0
  102. data/test/test_dependency_graph.rb +62 -0
  103. data/test/test_driver.rb +11 -0
  104. data/test/test_email.rb +34 -0
  105. data/test/test_event_handler.rb +80 -0
  106. data/test/test_god.rb +570 -0
  107. data/test/test_handlers_kqueue_handler.rb +16 -0
  108. data/test/test_jabber.rb +29 -0
  109. data/test/test_logger.rb +55 -0
  110. data/test/test_metric.rb +72 -0
  111. data/test/test_process.rb +247 -0
  112. data/test/test_prowl.rb +15 -0
  113. data/test/test_registry.rb +15 -0
  114. data/test/test_socket.rb +34 -0
  115. data/test/test_sugar.rb +42 -0
  116. data/test/test_system_portable_poller.rb +17 -0
  117. data/test/test_system_process.rb +30 -0
  118. data/test/test_task.rb +246 -0
  119. data/test/test_timeline.rb +37 -0
  120. data/test/test_trigger.rb +59 -0
  121. data/test/test_watch.rb +279 -0
  122. data/test/test_webhook.rb +15 -0
  123. metadata +362 -0
data/lib/god/task.rb ADDED
@@ -0,0 +1,503 @@
1
+ module God
2
+
3
+ class Task
4
+ attr_accessor :name, :interval, :group, :valid_states, :initial_state, :driver
5
+
6
+ attr_writer :autostart
7
+ def autostart?; @autostart; end
8
+
9
+ # api
10
+ attr_accessor :state, :behaviors, :metrics, :directory
11
+
12
+ def initialize
13
+ @autostart ||= true
14
+
15
+ # initial state is unmonitored
16
+ self.state = :unmonitored
17
+
18
+ # the list of behaviors
19
+ self.behaviors = []
20
+
21
+ # the list of conditions for each action
22
+ self.metrics = {nil => [], :unmonitored => [], :stop => []}
23
+
24
+ # the condition -> metric lookup
25
+ self.directory = {}
26
+
27
+ # driver
28
+ self.driver = Driver.new(self)
29
+ end
30
+
31
+ def prepare
32
+ self.valid_states.each do |state|
33
+ self.metrics[state] ||= []
34
+ end
35
+ end
36
+
37
+ def valid?
38
+ valid = true
39
+
40
+ # a name must be specified
41
+ if self.name.nil?
42
+ valid = false
43
+ applog(self, :error, "No name was specified")
44
+ end
45
+
46
+ # valid_states must be specified
47
+ if self.valid_states.nil?
48
+ valid = false
49
+ applog(self, :error, "No valid_states array was specified")
50
+ end
51
+
52
+ # valid_states must be specified
53
+ if self.initial_state.nil?
54
+ valid = false
55
+ applog(self, :error, "No initial_state was specified")
56
+ end
57
+
58
+ valid
59
+ end
60
+
61
+ ###########################################################################
62
+ #
63
+ # Advanced mode
64
+ #
65
+ ###########################################################################
66
+
67
+ def canonical_hash_form(to)
68
+ to.instance_of?(Symbol) ? {true => to} : to
69
+ end
70
+
71
+ # Define a transition handler which consists of a set of conditions
72
+ def transition(start_states, end_states)
73
+ # convert end_states into canonical hash form
74
+ canonical_end_states = canonical_hash_form(end_states)
75
+
76
+ Array(start_states).each do |start_state|
77
+ # validate start state
78
+ unless self.valid_states.include?(start_state)
79
+ abort "Invalid state :#{start_state}. Must be one of the symbols #{self.valid_states.map{|x| ":#{x}"}.join(', ')}"
80
+ end
81
+
82
+ # create a new metric to hold the watch, end states, and conditions
83
+ m = Metric.new(self, canonical_end_states)
84
+
85
+ if block_given?
86
+ # let the config file define some conditions on the metric
87
+ yield(m)
88
+ else
89
+ # add an :always condition if no block
90
+ m.condition(:always) do |c|
91
+ c.what = true
92
+ end
93
+ end
94
+
95
+ # populate the condition -> metric directory
96
+ m.conditions.each do |c|
97
+ self.directory[c] = m
98
+ end
99
+
100
+ # record the metric
101
+ self.metrics[start_state] ||= []
102
+ self.metrics[start_state] << m
103
+ end
104
+ end
105
+
106
+ def lifecycle
107
+ # create a new metric to hold the watch and conditions
108
+ m = Metric.new(self)
109
+
110
+ # let the config file define some conditions on the metric
111
+ yield(m)
112
+
113
+ # populate the condition -> metric directory
114
+ m.conditions.each do |c|
115
+ self.directory[c] = m
116
+ end
117
+
118
+ # record the metric
119
+ self.metrics[nil] << m
120
+ end
121
+
122
+ ###########################################################################
123
+ #
124
+ # Lifecycle
125
+ #
126
+ ###########################################################################
127
+
128
+ # Enable monitoring
129
+ #
130
+ # Returns nothing
131
+ def monitor
132
+ self.move(self.initial_state)
133
+ end
134
+
135
+ # Disable monitoring
136
+ #
137
+ # Returns nothing
138
+ def unmonitor
139
+ self.move(:unmonitored)
140
+ end
141
+
142
+ # Move to the givent state
143
+ # +to_state+ is the Symbol representing the state to move to
144
+ #
145
+ # Returns Task (self)
146
+ def move(to_state)
147
+ if !self.driver.in_driver_context?
148
+ # called from outside Driver
149
+
150
+ # send an async message to Driver
151
+ self.driver.message(:move, [to_state])
152
+ else
153
+ # called from within Driver
154
+
155
+ # record original info
156
+ orig_to_state = to_state
157
+ from_state = self.state
158
+
159
+ # log
160
+ msg = "#{self.name} move '#{from_state}' to '#{to_state}'"
161
+ applog(self, :info, msg)
162
+
163
+ # cleanup from current state
164
+ self.driver.clear_events
165
+ self.metrics[from_state].each { |m| m.disable }
166
+ if to_state == :unmonitored
167
+ self.metrics[nil].each { |m| m.disable }
168
+ end
169
+
170
+ # perform action
171
+ self.action(to_state)
172
+
173
+ # enable simple mode
174
+ if [:start, :restart].include?(to_state) && self.metrics[to_state].empty?
175
+ to_state = :up
176
+ end
177
+
178
+ # move to new state
179
+ self.metrics[to_state].each { |m| m.enable }
180
+
181
+ # if no from state, enable lifecycle metric
182
+ if from_state == :unmonitored
183
+ self.metrics[nil].each { |m| m.enable }
184
+ end
185
+
186
+ # set state
187
+ self.state = to_state
188
+
189
+ # broadcast to interested TriggerConditions
190
+ Trigger.broadcast(self, :state_change, [from_state, orig_to_state])
191
+
192
+ # log
193
+ msg = "#{self.name} moved '#{from_state}' to '#{to_state}'"
194
+ applog(self, :info, msg)
195
+ end
196
+
197
+ self
198
+ end
199
+
200
+ # Notify the Driver that an EventCondition has triggered
201
+ #
202
+ # Returns nothing
203
+ def trigger(condition)
204
+ self.driver.message(:handle_event, [condition])
205
+ end
206
+
207
+ def signal(sig)
208
+ # noop
209
+ end
210
+
211
+ ###########################################################################
212
+ #
213
+ # Actions
214
+ #
215
+ ###########################################################################
216
+
217
+ def method_missing(sym, *args)
218
+ unless (sym.to_s =~ /=$/)
219
+ super
220
+ end
221
+
222
+ base = sym.to_s.chop.intern
223
+
224
+ unless self.valid_states.include?(base)
225
+ super
226
+ end
227
+
228
+ self.class.send(:attr_accessor, base)
229
+ self.send(sym, *args)
230
+ end
231
+
232
+ # Perform the given action
233
+ # +a+ is the action Symbol
234
+ # +c+ is the Condition
235
+ #
236
+ # Returns Task (self)
237
+ def action(a, c = nil)
238
+ if !self.driver.in_driver_context?
239
+ # called from outside Driver
240
+
241
+ # send an async message to Driver
242
+ self.driver.message(:action, [a, c])
243
+ else
244
+ # called from within Driver
245
+
246
+ if self.respond_to?(a)
247
+ command = self.send(a)
248
+
249
+ case command
250
+ when String
251
+ msg = "#{self.name} #{a}: #{command}"
252
+ applog(self, :info, msg)
253
+
254
+ system(command)
255
+ when Proc
256
+ msg = "#{self.name} #{a}: lambda"
257
+ applog(self, :info, msg)
258
+
259
+ command.call
260
+ else
261
+ raise NotImplementedError
262
+ end
263
+ end
264
+ end
265
+ end
266
+
267
+ ###########################################################################
268
+ #
269
+ # Events
270
+ #
271
+ ###########################################################################
272
+
273
+ def attach(condition)
274
+ case condition
275
+ when PollCondition
276
+ self.driver.schedule(condition, 0)
277
+ when EventCondition, TriggerCondition
278
+ condition.register
279
+ end
280
+ end
281
+
282
+ def detach(condition)
283
+ case condition
284
+ when PollCondition
285
+ condition.reset
286
+ when EventCondition, TriggerCondition
287
+ condition.deregister
288
+ end
289
+ end
290
+
291
+ ###########################################################################
292
+ #
293
+ # Registration
294
+ #
295
+ ###########################################################################
296
+
297
+ def register!
298
+ # override if necessary
299
+ end
300
+
301
+ def unregister!
302
+ driver.shutdown
303
+ end
304
+
305
+ ###########################################################################
306
+ #
307
+ # Handlers
308
+ #
309
+ ###########################################################################
310
+
311
+ # Evaluate and handle the given poll condition. Handles logging
312
+ # notifications, and moving to the new state if necessary
313
+ # +condition+ is the Condition to handle
314
+ #
315
+ # Returns nothing
316
+ def handle_poll(condition)
317
+ # lookup metric
318
+ metric = self.directory[condition]
319
+
320
+ # run the test
321
+ begin
322
+ result = condition.test
323
+ rescue Object => e
324
+ cname = condition.class.to_s.split('::').last
325
+ message = format("Unhandled exception in %s condition - (%s): %s\n%s",
326
+ cname, e.class, e.message, e.backtrace.join("\n"))
327
+ applog(self, :error, message)
328
+ result = false
329
+ end
330
+
331
+ # log
332
+ messages = self.log_line(self, metric, condition, result)
333
+
334
+ # notify
335
+ if result && condition.notify
336
+ self.notify(condition, messages.last)
337
+ end
338
+
339
+ # after-condition
340
+ condition.after
341
+
342
+ # get the destination
343
+ dest =
344
+ if result && condition.transition
345
+ # condition override
346
+ condition.transition
347
+ else
348
+ # regular
349
+ metric.destination && metric.destination[result]
350
+ end
351
+
352
+ # transition or reschedule
353
+ if dest
354
+ # transition
355
+ begin
356
+ self.move(dest)
357
+ rescue EventRegistrationFailedError
358
+ msg = self.name + ' Event registration failed, moving back to previous state'
359
+ applog(self, :info, msg)
360
+
361
+ dest = self.state
362
+ retry
363
+ end
364
+ else
365
+ # reschedule
366
+ self.driver.schedule(condition)
367
+ end
368
+ end
369
+
370
+ # Asynchronously evaluate and handle the given event condition. Handles logging
371
+ # notifications, and moving to the new state if necessary
372
+ # +condition+ is the Condition to handle
373
+ #
374
+ # Returns nothing
375
+ def handle_event(condition)
376
+ # lookup metric
377
+ metric = self.directory[condition]
378
+
379
+ # log
380
+ messages = self.log_line(self, metric, condition, true)
381
+
382
+ # notify
383
+ if condition.notify
384
+ self.notify(condition, messages.last)
385
+ end
386
+
387
+ # get the destination
388
+ dest =
389
+ if condition.transition
390
+ # condition override
391
+ condition.transition
392
+ else
393
+ # regular
394
+ metric.destination && metric.destination[true]
395
+ end
396
+
397
+ if dest
398
+ self.move(dest)
399
+ end
400
+ end
401
+
402
+ # Determine whether a trigger happened
403
+ # +metric+ is the Metric
404
+ # +result+ is the result from the condition's test
405
+ #
406
+ # Returns Boolean
407
+ def trigger?(metric, result)
408
+ metric.destination && metric.destination[result]
409
+ end
410
+
411
+ # Log info about the condition and return the list of messages logged
412
+ # +watch+ is the Watch
413
+ # +metric+ is the Metric
414
+ # +condition+ is the Condition
415
+ # +result+ is the Boolean result of the condition test evaluation
416
+ #
417
+ # Returns String[]
418
+ def log_line(watch, metric, condition, result)
419
+ status =
420
+ if self.trigger?(metric, result)
421
+ "[trigger]"
422
+ else
423
+ "[ok]"
424
+ end
425
+
426
+ messages = []
427
+
428
+ # log info if available
429
+ if condition.info
430
+ Array(condition.info).each do |condition_info|
431
+ messages << "#{watch.name} #{status} #{condition_info} (#{condition.base_name})"
432
+ applog(watch, :info, messages.last)
433
+ end
434
+ else
435
+ messages << "#{watch.name} #{status} (#{condition.base_name})"
436
+ applog(watch, :info, messages.last)
437
+ end
438
+
439
+ # log
440
+ debug_message = watch.name + ' ' + condition.base_name + " [#{result}] " + self.dest_desc(metric, condition)
441
+ applog(watch, :debug, debug_message)
442
+
443
+ messages
444
+ end
445
+
446
+ # Format the destination specification for use in debug logging
447
+ # +metric+ is the Metric
448
+ # +condition+ is the Condition
449
+ #
450
+ # Returns String
451
+ def dest_desc(metric, condition)
452
+ if condition.transition
453
+ {true => condition.transition}.inspect
454
+ else
455
+ if metric.destination
456
+ metric.destination.inspect
457
+ else
458
+ 'none'
459
+ end
460
+ end
461
+ end
462
+
463
+ # Notify all recipeients of the given condition with the specified message
464
+ # +condition+ is the Condition
465
+ # +message+ is the String message to send
466
+ #
467
+ # Returns nothing
468
+ def notify(condition, message)
469
+ spec = Contact.normalize(condition.notify)
470
+ unmatched = []
471
+
472
+ # resolve contacts
473
+ resolved_contacts =
474
+ spec[:contacts].inject([]) do |acc, contact_name_or_group|
475
+ cons = Array(God.contacts[contact_name_or_group] || God.contact_groups[contact_name_or_group])
476
+ unmatched << contact_name_or_group if cons.empty?
477
+ acc += cons
478
+ acc
479
+ end
480
+
481
+ # warn about unmatched contacts
482
+ unless unmatched.empty?
483
+ msg = "#{condition.watch.name} no matching contacts for '#{unmatched.join(", ")}'"
484
+ applog(condition.watch, :warn, msg)
485
+ end
486
+
487
+ # notify each contact
488
+ resolved_contacts.each do |c|
489
+ host = `hostname`.chomp rescue 'none'
490
+ begin
491
+ c.notify(message, Time.now, spec[:priority], spec[:category], host)
492
+ msg = "#{condition.watch.name} #{c.info ? c.info : "notification sent for contact: #{c.name}"} (#{c.base_name})"
493
+ applog(condition.watch, :info, msg % [])
494
+ rescue Exception => e
495
+ applog(condition.watch, :error, "#{e.message} #{e.backtrace}")
496
+ msg = "#{condition.watch.name} Failed to deliver notification for contact: #{c.name} (#{c.base_name})"
497
+ applog(condition.watch, :error, msg % [])
498
+ end
499
+ end
500
+ end
501
+ end
502
+
503
+ end