samhendley-god 0.7.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +293 -0
  2. data/Manifest.txt +114 -0
  3. data/README.txt +60 -0
  4. data/Rakefile +35 -0
  5. data/bin/god +128 -0
  6. data/examples/events.god +84 -0
  7. data/examples/gravatar.god +54 -0
  8. data/examples/single.god +66 -0
  9. data/ext/god/extconf.rb +55 -0
  10. data/ext/god/kqueue_handler.c +123 -0
  11. data/ext/god/netlink_handler.c +167 -0
  12. data/init/god +42 -0
  13. data/lib/god.rb +667 -0
  14. data/lib/god/behavior.rb +52 -0
  15. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  16. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  17. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  18. data/lib/god/cli/command.rb +229 -0
  19. data/lib/god/cli/run.rb +176 -0
  20. data/lib/god/cli/version.rb +23 -0
  21. data/lib/god/condition.rb +96 -0
  22. data/lib/god/conditions/always.rb +23 -0
  23. data/lib/god/conditions/complex.rb +86 -0
  24. data/lib/god/conditions/cpu_usage.rb +80 -0
  25. data/lib/god/conditions/degrading_lambda.rb +52 -0
  26. data/lib/god/conditions/disk_usage.rb +27 -0
  27. data/lib/god/conditions/file_mtime.rb +28 -0
  28. data/lib/god/conditions/flapping.rb +128 -0
  29. data/lib/god/conditions/http_response_code.rb +168 -0
  30. data/lib/god/conditions/lambda.rb +25 -0
  31. data/lib/god/conditions/memory_usage.rb +82 -0
  32. data/lib/god/conditions/process_exits.rb +72 -0
  33. data/lib/god/conditions/process_running.rb +74 -0
  34. data/lib/god/conditions/tries.rb +44 -0
  35. data/lib/god/configurable.rb +57 -0
  36. data/lib/god/contact.rb +106 -0
  37. data/lib/god/contacts/campfire.rb +82 -0
  38. data/lib/god/contacts/email.rb +95 -0
  39. data/lib/god/contacts/jabber.rb +65 -0
  40. data/lib/god/contacts/twitter.rb +39 -0
  41. data/lib/god/contacts/webhook.rb +47 -0
  42. data/lib/god/dependency_graph.rb +41 -0
  43. data/lib/god/diagnostics.rb +37 -0
  44. data/lib/god/driver.rb +206 -0
  45. data/lib/god/errors.rb +24 -0
  46. data/lib/god/event_handler.rb +111 -0
  47. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  48. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  49. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  50. data/lib/god/logger.rb +120 -0
  51. data/lib/god/metric.rb +59 -0
  52. data/lib/god/process.rb +342 -0
  53. data/lib/god/registry.rb +32 -0
  54. data/lib/god/simple_logger.rb +53 -0
  55. data/lib/god/socket.rb +96 -0
  56. data/lib/god/sugar.rb +47 -0
  57. data/lib/god/system/portable_poller.rb +42 -0
  58. data/lib/god/system/process.rb +42 -0
  59. data/lib/god/system/slash_proc_poller.rb +92 -0
  60. data/lib/god/task.rb +491 -0
  61. data/lib/god/timeline.rb +25 -0
  62. data/lib/god/trigger.rb +43 -0
  63. data/lib/god/watch.rb +184 -0
  64. data/test/configs/child_events/child_events.god +44 -0
  65. data/test/configs/child_events/simple_server.rb +3 -0
  66. data/test/configs/child_polls/child_polls.god +37 -0
  67. data/test/configs/child_polls/simple_server.rb +12 -0
  68. data/test/configs/complex/complex.god +59 -0
  69. data/test/configs/complex/simple_server.rb +3 -0
  70. data/test/configs/contact/contact.god +84 -0
  71. data/test/configs/contact/simple_server.rb +3 -0
  72. data/test/configs/daemon_events/daemon_events.god +37 -0
  73. data/test/configs/daemon_events/simple_server.rb +8 -0
  74. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  75. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  76. data/test/configs/daemon_polls/simple_server.rb +6 -0
  77. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  78. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  79. data/test/configs/matias/matias.god +50 -0
  80. data/test/configs/real.rb +59 -0
  81. data/test/configs/running_load/running_load.god +16 -0
  82. data/test/configs/stress/simple_server.rb +3 -0
  83. data/test/configs/stress/stress.god +15 -0
  84. data/test/configs/task/logs/.placeholder +0 -0
  85. data/test/configs/task/task.god +26 -0
  86. data/test/configs/test.rb +61 -0
  87. data/test/helper.rb +151 -0
  88. data/test/suite.rb +6 -0
  89. data/test/test_behavior.rb +21 -0
  90. data/test/test_campfire.rb +41 -0
  91. data/test/test_condition.rb +50 -0
  92. data/test/test_conditions_disk_usage.rb +56 -0
  93. data/test/test_conditions_http_response_code.rb +109 -0
  94. data/test/test_conditions_process_running.rb +44 -0
  95. data/test/test_conditions_tries.rb +67 -0
  96. data/test/test_contact.rb +109 -0
  97. data/test/test_dependency_graph.rb +62 -0
  98. data/test/test_driver.rb +11 -0
  99. data/test/test_email.rb +45 -0
  100. data/test/test_event_handler.rb +80 -0
  101. data/test/test_god.rb +598 -0
  102. data/test/test_handlers_kqueue_handler.rb +16 -0
  103. data/test/test_logger.rb +63 -0
  104. data/test/test_metric.rb +72 -0
  105. data/test/test_process.rb +246 -0
  106. data/test/test_registry.rb +15 -0
  107. data/test/test_socket.rb +42 -0
  108. data/test/test_sugar.rb +42 -0
  109. data/test/test_system_portable_poller.rb +17 -0
  110. data/test/test_system_process.rb +30 -0
  111. data/test/test_task.rb +262 -0
  112. data/test/test_timeline.rb +37 -0
  113. data/test/test_trigger.rb +59 -0
  114. data/test/test_watch.rb +279 -0
  115. metadata +193 -0
@@ -0,0 +1,167 @@
1
+ #ifdef __linux__ /* only build on linux */
2
+
3
+ #include <ruby.h>
4
+ #include <sys/types.h>
5
+ #include <unistd.h>
6
+ #include <sys/socket.h>
7
+ #include <linux/netlink.h>
8
+ #include <linux/connector.h>
9
+ #include <linux/cn_proc.h>
10
+ #include <errno.h>
11
+
12
+ static VALUE mGod;
13
+ static VALUE cNetlinkHandler;
14
+ static VALUE cEventHandler;
15
+
16
+ static ID proc_exit;
17
+ static ID proc_fork;
18
+ static ID m_call;
19
+ static ID m_watching_pid;
20
+
21
+ static int nl_sock; /* socket for netlink connection */
22
+
23
+
24
+ VALUE
25
+ nlh_handle_events()
26
+ {
27
+ char buff[CONNECTOR_MAX_MSG_SIZE];
28
+ struct nlmsghdr *hdr;
29
+ struct proc_event *event;
30
+
31
+ VALUE extra_data;
32
+
33
+ fd_set fds;
34
+
35
+ FD_ZERO(&fds);
36
+ FD_SET(nl_sock, &fds);
37
+
38
+ if (0 > rb_thread_select(nl_sock + 1, &fds, NULL, NULL, NULL)) {
39
+ rb_raise(rb_eStandardError, strerror(errno));
40
+ }
41
+
42
+ /* If there were no events detected, return */
43
+ if (! FD_ISSET(nl_sock, &fds)) {
44
+ return INT2FIX(0);
45
+ }
46
+
47
+ /* if there are events, make calls */
48
+ if (-1 == recv(nl_sock, buff, sizeof(buff), 0)) {
49
+ rb_raise(rb_eStandardError, strerror(errno));
50
+ }
51
+
52
+ hdr = (struct nlmsghdr *)buff;
53
+
54
+ if (NLMSG_ERROR == hdr->nlmsg_type) {
55
+ rb_raise(rb_eStandardError, strerror(errno));
56
+ } else if (NLMSG_DONE == hdr->nlmsg_type) {
57
+
58
+ event = (struct proc_event *)((struct cn_msg *)NLMSG_DATA(hdr))->data;
59
+
60
+ switch(event->what) {
61
+ case PROC_EVENT_EXIT:
62
+ if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.exit.process_pid))) {
63
+ return INT2FIX(0);
64
+ }
65
+
66
+ extra_data = rb_hash_new();
67
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("pid")), INT2FIX(event->event_data.exit.process_pid));
68
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_code")), INT2FIX(event->event_data.exit.exit_code));
69
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_signal")), INT2FIX(event->event_data.exit.exit_signal));
70
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("thread_group_id")), INT2FIX(event->event_data.exit.process_tgid));
71
+
72
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit), extra_data);
73
+ return INT2FIX(1);
74
+
75
+ case PROC_EVENT_FORK:
76
+ if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.fork.parent_pid))) {
77
+ return INT2FIX(0);
78
+ }
79
+
80
+ extra_data = rb_hash_new();
81
+ rb_hash_aset(extra_data, rb_intern("parent_pid"), INT2FIX(event->event_data.fork.parent_pid));
82
+ rb_hash_aset(extra_data, rb_intern("parent_thread_group_id"), INT2FIX(event->event_data.fork.parent_tgid));
83
+ rb_hash_aset(extra_data, rb_intern("child_pid"), INT2FIX(event->event_data.fork.child_pid));
84
+ rb_hash_aset(extra_data, rb_intern("child_thread_group_id"), INT2FIX(event->event_data.fork.child_tgid));
85
+
86
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork), extra_data);
87
+ return INT2FIX(1);
88
+
89
+ case PROC_EVENT_NONE:
90
+ case PROC_EVENT_EXEC:
91
+ case PROC_EVENT_UID:
92
+ case PROC_EVENT_GID:
93
+ break;
94
+ }
95
+ }
96
+
97
+ return Qnil;
98
+ }
99
+
100
+
101
+ #define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
102
+ sizeof(int))
103
+
104
+ void
105
+ connect_to_netlink()
106
+ {
107
+ struct sockaddr_nl sa_nl; /* netlink interface info */
108
+ char buff[NL_MESSAGE_SIZE];
109
+ struct nlmsghdr *hdr; /* for telling netlink what we want */
110
+ struct cn_msg *msg; /* the actual connector message */
111
+
112
+ /* connect to netlink socket */
113
+ nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
114
+
115
+ if (-1 == nl_sock) {
116
+ rb_raise(rb_eStandardError, strerror(errno));
117
+ }
118
+
119
+ bzero(&sa_nl, sizeof(sa_nl));
120
+ sa_nl.nl_family = AF_NETLINK;
121
+ sa_nl.nl_groups = CN_IDX_PROC;
122
+ sa_nl.nl_pid = getpid();
123
+
124
+ if (-1 == bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl))) {
125
+ rb_raise(rb_eStandardError, strerror(errno));
126
+ }
127
+
128
+ /* Fill header */
129
+ hdr = (struct nlmsghdr *)buff;
130
+ hdr->nlmsg_len = NL_MESSAGE_SIZE;
131
+ hdr->nlmsg_type = NLMSG_DONE;
132
+ hdr->nlmsg_flags = 0;
133
+ hdr->nlmsg_seq = 0;
134
+ hdr->nlmsg_pid = getpid();
135
+
136
+ /* Fill message */
137
+ msg = (struct cn_msg *)NLMSG_DATA(hdr);
138
+ msg->id.idx = CN_IDX_PROC; /* Connecting to process information */
139
+ msg->id.val = CN_VAL_PROC;
140
+ msg->seq = 0;
141
+ msg->ack = 0;
142
+ msg->flags = 0;
143
+ msg->len = sizeof(int);
144
+ *(int*)msg->data = PROC_CN_MCAST_LISTEN;
145
+
146
+ if (-1 == send(nl_sock, hdr, hdr->nlmsg_len, 0)) {
147
+ rb_raise(rb_eStandardError, strerror(errno));
148
+ }
149
+ }
150
+
151
+ void
152
+ Init_netlink_handler_ext()
153
+ {
154
+ proc_exit = rb_intern("proc_exit");
155
+ proc_fork = rb_intern("proc_fork");
156
+ m_call = rb_intern("call");
157
+ m_watching_pid = rb_intern("watching_pid?");
158
+
159
+ mGod = rb_const_get(rb_cObject, rb_intern("God"));
160
+ cEventHandler = rb_const_get(mGod, rb_intern("EventHandler"));
161
+ cNetlinkHandler = rb_define_class_under(mGod, "NetlinkHandler", rb_cObject);
162
+ rb_define_singleton_method(cNetlinkHandler, "handle_events", nlh_handle_events, 0);
163
+
164
+ connect_to_netlink();
165
+ }
166
+
167
+ #endif
@@ -0,0 +1,42 @@
1
+ #!/bin/bash
2
+ #
3
+ # god Startup script for god (http://god.rubyforge.org)
4
+ #
5
+ # chkconfig: - 85 15
6
+ # description: God is an easy to configure, easy to extend monitoring \
7
+ # framework written in Ruby.
8
+ #
9
+
10
+ CONF_DIR=/etc/god
11
+
12
+ RETVAL=0
13
+
14
+ # Go no further if config directory is missing.
15
+ [ -d "$CONF_DIR" ] || exit 0
16
+
17
+ case "$1" in
18
+ start)
19
+ # Create pid directory
20
+ ruby /usr/bin/god -c $CONF_DIR/master.conf
21
+ RETVAL=$?
22
+ ;;
23
+ stop)
24
+ ruby /usr/bin/god terminate
25
+ RETVAL=$?
26
+ ;;
27
+ restart)
28
+ ruby /usr/bin/god terminate
29
+ ruby /usr/bin/god -c $CONF_DIR/master.conf
30
+ RETVAL=$?
31
+ ;;
32
+ status)
33
+ ruby /usr/bin/god status
34
+ RETVAL=$?
35
+ ;;
36
+ *)
37
+ echo "Usage: god {start|stop|restart|status}"
38
+ exit 1
39
+ ;;
40
+ esac
41
+
42
+ exit $RETVAL
@@ -0,0 +1,667 @@
1
+ $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # core
7
+ require 'stringio'
8
+ require 'fileutils'
9
+
10
+ begin
11
+ require 'fastthread'
12
+ rescue LoadError
13
+ ensure
14
+ require 'thread'
15
+ end
16
+
17
+ # stdlib
18
+
19
+ # internal requires
20
+ require 'god/errors'
21
+ require 'god/simple_logger'
22
+ require 'god/logger'
23
+
24
+ require 'god/system/process'
25
+ require 'god/system/portable_poller'
26
+ require 'god/system/slash_proc_poller'
27
+
28
+ require 'god/dependency_graph'
29
+ require 'god/timeline'
30
+ require 'god/configurable'
31
+
32
+ require 'god/task'
33
+
34
+ require 'god/behavior'
35
+ require 'god/behaviors/clean_pid_file'
36
+ require 'god/behaviors/clean_unix_socket'
37
+ require 'god/behaviors/notify_when_flapping'
38
+
39
+ require 'god/condition'
40
+ require 'god/conditions/process_running'
41
+ require 'god/conditions/process_exits'
42
+ require 'god/conditions/tries'
43
+ require 'god/conditions/memory_usage'
44
+ require 'god/conditions/cpu_usage'
45
+ require 'god/conditions/always'
46
+ require 'god/conditions/lambda'
47
+ require 'god/conditions/degrading_lambda'
48
+ require 'god/conditions/flapping'
49
+ require 'god/conditions/http_response_code'
50
+ require 'god/conditions/disk_usage'
51
+ require 'god/conditions/complex'
52
+ require 'god/conditions/file_mtime'
53
+
54
+ require 'god/contact'
55
+ require 'god/contacts/email'
56
+ require 'god/contacts/webhook'
57
+ begin
58
+ require 'god/contacts/twitter'
59
+ rescue LoadError
60
+ end
61
+ begin
62
+ require 'god/contacts/jabber'
63
+ rescue LoadError
64
+ end
65
+ begin
66
+ require 'god/contacts/campfire'
67
+ rescue LoadError
68
+ end
69
+
70
+ require 'god/socket'
71
+ require 'god/driver'
72
+
73
+ require 'god/metric'
74
+ require 'god/watch'
75
+
76
+ require 'god/trigger'
77
+ require 'god/event_handler'
78
+ require 'god/registry'
79
+ require 'god/process'
80
+
81
+ require 'god/sugar'
82
+
83
+ require 'god/cli/version'
84
+ require 'god/cli/command'
85
+
86
+ require 'god/diagnostics'
87
+
88
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
89
+
90
+ # App wide logging system
91
+ LOG = God::Logger.new
92
+
93
+ def applog(watch, level, text)
94
+ LOG.log(watch, level, text)
95
+ end
96
+
97
+ # The $run global determines whether god should be started when the
98
+ # program would normally end. This should be set to true if when god
99
+ # should be started (e.g. `god -c <config file>`) and false otherwise
100
+ # (e.g. `god status`)
101
+ $run ||= nil
102
+
103
+ GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
104
+
105
+ # Return the binding of god's root level
106
+ def root_binding
107
+ binding
108
+ end
109
+
110
+ module Kernel
111
+ alias_method :abort_orig, :abort
112
+
113
+ def abort(text = nil)
114
+ $run = false
115
+ applog(nil, :error, text) if text
116
+ exit(1)
117
+ end
118
+
119
+ alias_method :exit_orig, :exit
120
+
121
+ def exit(code = 0)
122
+ $run = false
123
+ exit_orig(code)
124
+ end
125
+ end
126
+
127
+ class Module
128
+ def safe_attr_accessor(*args)
129
+ args.each do |arg|
130
+ define_method((arg.to_s + "=").intern) do |other|
131
+ if !self.running && self.inited
132
+ abort "God.#{arg} must be set before any Tasks are defined"
133
+ end
134
+
135
+ if self.running && self.inited
136
+ applog(nil, :warn, "God.#{arg} can't be set while god is running")
137
+ return
138
+ end
139
+
140
+ instance_variable_set(('@' + arg.to_s).intern, other)
141
+ end
142
+
143
+ define_method(arg) do
144
+ instance_variable_get(('@' + arg.to_s).intern)
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ module God
151
+ VERSION = '0.7.13'
152
+
153
+ LOG_BUFFER_SIZE_DEFAULT = 100
154
+ PID_FILE_DIRECTORY_DEFAULTS = ['/var/run/god', '~/.god/pids']
155
+ DRB_PORT_DEFAULT = 17165
156
+ DRB_ALLOW_DEFAULT = ['127.0.0.1']
157
+ LOG_LEVEL_DEFAULT = :info
158
+
159
+ class << self
160
+ # user configurable
161
+ safe_attr_accessor :pid,
162
+ :host,
163
+ :port,
164
+ :allow,
165
+ :log_buffer_size,
166
+ :pid_file_directory,
167
+ :log_file,
168
+ :log_level,
169
+ :use_events
170
+
171
+ # internal
172
+ attr_accessor :inited,
173
+ :running,
174
+ :pending_watches,
175
+ :pending_watch_states,
176
+ :server,
177
+ :watches,
178
+ :groups,
179
+ :contacts,
180
+ :contact_groups,
181
+ :main
182
+ end
183
+
184
+ # initialize class instance variables
185
+ self.pid = nil
186
+ self.host = nil
187
+ self.port = nil
188
+ self.allow = nil
189
+ self.log_buffer_size = nil
190
+ self.pid_file_directory = nil
191
+ self.log_level = nil
192
+
193
+ # Initialize internal data.
194
+ #
195
+ # Returns nothing
196
+ def self.internal_init
197
+ # only do this once
198
+ return if self.inited
199
+
200
+ # variable init
201
+ self.watches = {}
202
+ self.groups = {}
203
+ self.pending_watches = []
204
+ self.pending_watch_states = {}
205
+ self.contacts = {}
206
+ self.contact_groups = {}
207
+
208
+ # set defaults
209
+ self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
210
+ self.port ||= DRB_PORT_DEFAULT
211
+ self.allow ||= DRB_ALLOW_DEFAULT
212
+ self.log_level ||= LOG_LEVEL_DEFAULT
213
+
214
+ # additional setup
215
+ self.setup
216
+
217
+ # log level
218
+ log_level_map = {:debug => Logger::DEBUG,
219
+ :info => Logger::INFO,
220
+ :warn => Logger::WARN,
221
+ :error => Logger::ERROR,
222
+ :fatal => Logger::FATAL}
223
+ LOG.level = log_level_map[self.log_level]
224
+
225
+ # init has been executed
226
+ self.inited = true
227
+
228
+ # not yet running
229
+ self.running = false
230
+ end
231
+
232
+ # Instantiate a new, empty Watch object and pass it to the mandatory
233
+ # block. The attributes of the watch will be set by the configuration
234
+ # file.
235
+ #
236
+ # Aborts on duplicate watch name
237
+ # invalid watch
238
+ # conflicting group name
239
+ #
240
+ # Returns nothing
241
+ def self.watch(&block)
242
+ self.task(Watch, &block)
243
+ end
244
+
245
+ # Instantiate a new, empty Task object and yield it to the mandatory
246
+ # block. The attributes of the task will be set by the configuration
247
+ # file.
248
+ #
249
+ # Aborts on duplicate task name
250
+ # invalid task
251
+ # conflicting group name
252
+ #
253
+ # Returns nothing
254
+ def self.task(klass = Task)
255
+ self.internal_init
256
+
257
+ t = klass.new
258
+ yield(t)
259
+
260
+ # do the post-configuration
261
+ t.prepare
262
+
263
+ # if running, completely remove the watch (if necessary) to
264
+ # prepare for the reload
265
+ existing_watch = self.watches[t.name]
266
+ if self.running && existing_watch
267
+ self.pending_watch_states[existing_watch.name] = existing_watch.state
268
+ self.unwatch(existing_watch)
269
+ end
270
+
271
+ # ensure the new watch has a unique name
272
+ if self.watches[t.name] || self.groups[t.name]
273
+ abort "Task name '#{t.name}' already used for a Task or Group"
274
+ end
275
+
276
+ # ensure watch is internally valid
277
+ t.valid? || abort("Task '#{t.name}' is not valid (see above)")
278
+
279
+ # add to list of watches
280
+ self.watches[t.name] = t
281
+
282
+ # add to pending watches
283
+ self.pending_watches << t
284
+
285
+ # add to group if specified
286
+ if t.group
287
+ # ensure group name hasn't been used for a watch already
288
+ if self.watches[t.group]
289
+ abort "Group name '#{t.group}' already used for a Task"
290
+ end
291
+
292
+ self.groups[t.group] ||= []
293
+ self.groups[t.group] << t
294
+ end
295
+
296
+ # register watch
297
+ t.register!
298
+
299
+ # log
300
+ if self.running && existing_watch
301
+ applog(t, :info, "#{t.name} Reloaded config")
302
+ elsif self.running
303
+ applog(t, :info, "#{t.name} Loaded config")
304
+ end
305
+ end
306
+
307
+ # Unmonitor and remove the given watch from god.
308
+ # +watch+ is the Watch to remove
309
+ #
310
+ # Returns nothing
311
+ def self.unwatch(watch)
312
+ # unmonitor
313
+ watch.unmonitor unless watch.state == :unmonitored
314
+
315
+ # unregister
316
+ watch.unregister!
317
+
318
+ # remove from watches
319
+ self.watches.delete(watch.name)
320
+
321
+ # remove from groups
322
+ if watch.group
323
+ self.groups[watch.group].delete(watch)
324
+ end
325
+
326
+ applog(watch, :info, "#{watch.name} unwatched")
327
+ end
328
+
329
+ # Instantiate a new Contact of the given kind and send it to the block.
330
+ # Then prepare, validate, and record the Contact.
331
+ # +kind+ is the contact class specifier
332
+ #
333
+ # Aborts on invalid kind
334
+ # duplicate contact name
335
+ # invalid contact
336
+ # conflicting group name
337
+ #
338
+ # Returns nothing
339
+ def self.contact(kind)
340
+ self.internal_init
341
+
342
+ # create the contact
343
+ begin
344
+ c = Contact.generate(kind)
345
+ rescue NoSuchContactError => e
346
+ abort e.message
347
+ end
348
+
349
+ # send to block so config can set attributes
350
+ yield(c) if block_given?
351
+
352
+ # call prepare on the contact
353
+ c.prepare
354
+
355
+ # remove existing contacts of same name
356
+ existing_contact = self.contacts[c.name]
357
+ if self.running && existing_contact
358
+ self.uncontact(existing_contact)
359
+ end
360
+
361
+ # warn and noop if the contact has been defined before
362
+ if self.contacts[c.name] || self.contact_groups[c.name]
363
+ applog(nil, :warn, "Contact name '#{c.name}' already used for a Contact or Contact Group")
364
+ return
365
+ end
366
+
367
+ # abort if the Contact is invalid, the Contact will have printed
368
+ # out its own error messages by now
369
+ unless Contact.valid?(c) && c.valid?
370
+ abort "Exiting on invalid contact"
371
+ end
372
+
373
+ # add to list of contacts
374
+ self.contacts[c.name] = c
375
+
376
+ # add to contact group if specified
377
+ if c.group
378
+ # ensure group name hasn't been used for a contact already
379
+ if self.contacts[c.group]
380
+ abort "Contact Group name '#{c.group}' already used for a Contact"
381
+ end
382
+
383
+ self.contact_groups[c.group] ||= []
384
+ self.contact_groups[c.group] << c
385
+ end
386
+ end
387
+
388
+ # Remove the given contact from god.
389
+ # +contact+ is the Contact to remove
390
+ #
391
+ # Returns nothing
392
+ def self.uncontact(contact)
393
+ self.contacts.delete(contact.name)
394
+ if contact.group
395
+ self.contact_groups[contact.group].delete(contact)
396
+ end
397
+ end
398
+
399
+ # Control the lifecycle of the given task(s).
400
+ # +name+ is the name of a task/group (String)
401
+ # +command+ is the command to run (String)
402
+ # one of: "start"
403
+ # "monitor"
404
+ # "restart"
405
+ # "stop"
406
+ # "unmonitor"
407
+ # "remove"
408
+ #
409
+ # Returns String[]:task_names
410
+ def self.control(name, command)
411
+ # get the list of items
412
+ items = Array(self.watches[name] || self.groups[name]).dup
413
+
414
+ jobs = []
415
+
416
+ # do the command
417
+ case command
418
+ when "start", "monitor"
419
+ items.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
420
+ when "restart"
421
+ items.each { |w| jobs << Thread.new { w.move(:restart) } }
422
+ when "stop"
423
+ items.each { |w| jobs << Thread.new { w.action(:stop); w.unmonitor if w.state != :unmonitored } }
424
+ when "unmonitor"
425
+ items.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
426
+ when "remove"
427
+ items.each { |w| self.unwatch(w) }
428
+ else
429
+ raise InvalidCommandError.new
430
+ end
431
+
432
+ jobs.each { |j| j.join }
433
+
434
+ items.map { |x| x.name }
435
+ end
436
+
437
+ # Unmonitor and stop all tasks.
438
+ #
439
+ # Returns true on success
440
+ # false if all tasks could not be stopped within 10 seconds
441
+ def self.stop_all
442
+ self.watches.sort.each do |name, w|
443
+ Thread.new do
444
+ w.unmonitor if w.state != :unmonitored
445
+ w.action(:stop) if w.alive?
446
+ end
447
+ end
448
+
449
+ 10.times do
450
+ return true unless self.watches.map { |name, w| w.alive? }.any?
451
+ sleep 1
452
+ end
453
+
454
+ return false
455
+ end
456
+
457
+ # Force the termination of god.
458
+ # * Clean up pid file if one exists
459
+ # * Stop DRb service
460
+ # * Hard exit using exit!
461
+ #
462
+ # Never returns because the process will no longer exist!
463
+ def self.terminate
464
+ FileUtils.rm_f(self.pid) if self.pid
465
+ self.server.stop if self.server
466
+ exit!(0)
467
+ end
468
+
469
+ # Gather the status of each task.
470
+ #
471
+ # Examples
472
+ # God.status
473
+ # # => { 'mongrel' => :up, 'nginx' => :up }
474
+ #
475
+ # Returns { String:task_name => Symbol:status, ... }
476
+ def self.status
477
+ info = {}
478
+ self.watches.map do |name, w|
479
+ info[name] = {:state => w.state, :group => w.group}
480
+ end
481
+ info
482
+ end
483
+
484
+ # Send a signal to each task.
485
+ # +name+ is the String name of the task or group
486
+ # +signal+ is the signal to send. e.g. HUP, 9
487
+ #
488
+ # Returns String[]:task_names
489
+ def self.signal(name, signal)
490
+ items = Array(self.watches[name] || self.groups[name]).dup
491
+ jobs = []
492
+ items.each { |w| jobs << Thread.new { w.signal(signal) } }
493
+ jobs.each { |j| j.join }
494
+ items.map { |x| x.name }
495
+ end
496
+
497
+ # Log lines for the given task since the specified time.
498
+ # +watch_name+ is the name of the task (may be abbreviated)
499
+ # +since+ is the Time since which to report log lines
500
+ #
501
+ # Raises God::NoSuchWatchError if no tasks matched
502
+ #
503
+ # Returns String:joined_log_lines
504
+ def self.running_log(watch_name, since)
505
+ matches = pattern_match(watch_name, self.watches.keys)
506
+
507
+ unless matches.first
508
+ raise NoSuchWatchError.new
509
+ end
510
+
511
+ LOG.watch_log_since(matches.first, since)
512
+ end
513
+
514
+ # Load a config file into a running god instance. Rescues any exceptions
515
+ # that the config may raise and reports these back to the caller.
516
+ # +code+ is a String containing the config file
517
+ # +filename+ is the filename of the config file
518
+ #
519
+ # Returns [String[]:task_names, String:errors]
520
+ def self.running_load(code, filename)
521
+ errors = ""
522
+ watches = []
523
+
524
+ begin
525
+ LOG.start_capture
526
+
527
+ Gem.clear_paths
528
+ eval(code, root_binding, filename)
529
+ self.pending_watches.each do |w|
530
+ if previous_state = self.pending_watch_states[w.name]
531
+ w.monitor unless previous_state == :unmonitored
532
+ else
533
+ w.monitor if w.autostart?
534
+ end
535
+ end
536
+ watches = self.pending_watches.dup
537
+ self.pending_watches.clear
538
+ self.pending_watch_states.clear
539
+ rescue Exception => e
540
+ # don't ever let running_load take down god
541
+ errors << LOG.finish_capture
542
+
543
+ unless e.instance_of?(SystemExit)
544
+ errors << e.message << "\n"
545
+ errors << e.backtrace.join("\n")
546
+ end
547
+ end
548
+
549
+ names = watches.map { |x| x.name }
550
+ [names, errors]
551
+ end
552
+
553
+ # Load the given file(s) according to the given glob.
554
+ # +glob+ is the glob-enabled path to load
555
+ #
556
+ # Returns nothing
557
+ def self.load(glob)
558
+ Dir[glob].each do |f|
559
+ Kernel.load f
560
+ end
561
+ end
562
+
563
+ def self.setup
564
+ if self.pid_file_directory
565
+ # pid file dir was specified, ensure it is created and writable
566
+ unless File.exist?(self.pid_file_directory)
567
+ begin
568
+ FileUtils.mkdir_p(self.pid_file_directory)
569
+ rescue Errno::EACCES => e
570
+ abort "Failed to create pid file directory: #{e.message}"
571
+ end
572
+ end
573
+
574
+ unless File.writable?(self.pid_file_directory)
575
+ abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
576
+ end
577
+ else
578
+ # no pid file dir specified, try defaults
579
+ PID_FILE_DIRECTORY_DEFAULTS.each do |idir|
580
+ dir = File.expand_path(idir)
581
+ begin
582
+ FileUtils.mkdir_p(dir)
583
+ if File.writable?(dir)
584
+ self.pid_file_directory = dir
585
+ break
586
+ end
587
+ rescue Errno::EACCES => e
588
+ end
589
+ end
590
+
591
+ unless self.pid_file_directory
592
+ dirs = PID_FILE_DIRECTORY_DEFAULTS.map { |x| File.expand_path(x) }
593
+ abort "No pid file directory exists, could be created, or is writable at any of #{dirs.join(', ')}"
594
+ end
595
+ end
596
+
597
+ applog(nil, :info, "Using pid file directory: #{self.pid_file_directory}")
598
+ end
599
+
600
+ # Initialize and startup the machinery that makes god work.
601
+ #
602
+ # Returns nothing
603
+ def self.start
604
+ self.internal_init
605
+
606
+ # instantiate server
607
+ self.server = Socket.new(self.port)
608
+
609
+ # start monitoring any watches set to autostart
610
+ self.watches.values.each { |w| w.monitor if w.autostart? }
611
+
612
+ # clear pending watches
613
+ self.pending_watches.clear
614
+
615
+ # mark as running
616
+ self.running = true
617
+
618
+ # don't exit
619
+ self.main =
620
+ Thread.new do
621
+ loop do
622
+ sleep 60
623
+ end
624
+ end
625
+
626
+ self.main.join
627
+ end
628
+
629
+ # To be called on program exit to start god
630
+ #
631
+ # Returns nothing
632
+ def self.at_exit
633
+ self.start
634
+ end
635
+
636
+ # private
637
+
638
+ # Match a shortened pattern against a list of String candidates.
639
+ # The pattern is expanded into a regular expression by
640
+ # inserting .* between each character.
641
+ # +pattern+ is the String containing the abbreviation
642
+ # +list+ is the Array of Strings to match against
643
+ #
644
+ # Examples
645
+ #
646
+ # list = %w{ foo bar bars }
647
+ # pattern = 'br'
648
+ # God.pattern_match(list, pattern)
649
+ # # => ['bar', 'bars']
650
+ #
651
+ # Returns String[]:matched_elements
652
+ def self.pattern_match(pattern, list)
653
+ regex = pattern.split('').join('.*')
654
+
655
+ list.select do |item|
656
+ item =~ Regexp.new(regex)
657
+ end
658
+ end
659
+ end
660
+
661
+ # Runs immediately before the program exits. If $run is true,
662
+ # start god, if $run is false, exit normally.
663
+ #
664
+ # Returns nothing
665
+ at_exit do
666
+ God.at_exit if $run
667
+ end