jwilkins-god 0.7.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/History.txt +261 -0
  2. data/Manifest.txt +107 -0
  3. data/README.txt +59 -0
  4. data/Rakefile +35 -0
  5. data/bin/god +127 -0
  6. data/examples/events.god +84 -0
  7. data/examples/gravatar.god +54 -0
  8. data/examples/single.god +66 -0
  9. data/ext/god/extconf.rb +55 -0
  10. data/ext/god/kqueue_handler.c +123 -0
  11. data/ext/god/netlink_handler.c +167 -0
  12. data/init/god +42 -0
  13. data/lib/god.rb +649 -0
  14. data/lib/god/behavior.rb +52 -0
  15. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  16. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  17. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  18. data/lib/god/cli/command.rb +206 -0
  19. data/lib/god/cli/run.rb +177 -0
  20. data/lib/god/cli/version.rb +23 -0
  21. data/lib/god/condition.rb +96 -0
  22. data/lib/god/conditions/always.rb +23 -0
  23. data/lib/god/conditions/complex.rb +86 -0
  24. data/lib/god/conditions/cpu_usage.rb +80 -0
  25. data/lib/god/conditions/degrading_lambda.rb +52 -0
  26. data/lib/god/conditions/disk_usage.rb +27 -0
  27. data/lib/god/conditions/flapping.rb +128 -0
  28. data/lib/god/conditions/http_response_code.rb +168 -0
  29. data/lib/god/conditions/lambda.rb +25 -0
  30. data/lib/god/conditions/memory_usage.rb +82 -0
  31. data/lib/god/conditions/process_exits.rb +72 -0
  32. data/lib/god/conditions/process_running.rb +74 -0
  33. data/lib/god/conditions/tries.rb +44 -0
  34. data/lib/god/configurable.rb +57 -0
  35. data/lib/god/contact.rb +106 -0
  36. data/lib/god/contacts/email.rb +95 -0
  37. data/lib/god/dependency_graph.rb +41 -0
  38. data/lib/god/diagnostics.rb +37 -0
  39. data/lib/god/driver.rb +206 -0
  40. data/lib/god/errors.rb +24 -0
  41. data/lib/god/event_handler.rb +111 -0
  42. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  43. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  44. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  45. data/lib/god/logger.rb +120 -0
  46. data/lib/god/metric.rb +59 -0
  47. data/lib/god/process.rb +327 -0
  48. data/lib/god/registry.rb +32 -0
  49. data/lib/god/simple_logger.rb +53 -0
  50. data/lib/god/socket.rb +96 -0
  51. data/lib/god/sugar.rb +47 -0
  52. data/lib/god/system/portable_poller.rb +42 -0
  53. data/lib/god/system/process.rb +42 -0
  54. data/lib/god/system/slash_proc_poller.rb +82 -0
  55. data/lib/god/task.rb +487 -0
  56. data/lib/god/timeline.rb +25 -0
  57. data/lib/god/trigger.rb +43 -0
  58. data/lib/god/watch.rb +183 -0
  59. data/test/configs/child_events/child_events.god +44 -0
  60. data/test/configs/child_events/simple_server.rb +3 -0
  61. data/test/configs/child_polls/child_polls.god +37 -0
  62. data/test/configs/child_polls/simple_server.rb +12 -0
  63. data/test/configs/complex/complex.god +59 -0
  64. data/test/configs/complex/simple_server.rb +3 -0
  65. data/test/configs/contact/contact.god +74 -0
  66. data/test/configs/contact/simple_server.rb +3 -0
  67. data/test/configs/daemon_events/daemon_events.god +37 -0
  68. data/test/configs/daemon_events/simple_server.rb +8 -0
  69. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  70. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  71. data/test/configs/daemon_polls/simple_server.rb +6 -0
  72. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  73. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  74. data/test/configs/matias/matias.god +50 -0
  75. data/test/configs/real.rb +59 -0
  76. data/test/configs/running_load/running_load.god +16 -0
  77. data/test/configs/stress/simple_server.rb +3 -0
  78. data/test/configs/stress/stress.god +15 -0
  79. data/test/configs/task/logs/.placeholder +0 -0
  80. data/test/configs/task/task.god +26 -0
  81. data/test/configs/test.rb +61 -0
  82. data/test/helper.rb +151 -0
  83. data/test/suite.rb +6 -0
  84. data/test/test_behavior.rb +21 -0
  85. data/test/test_condition.rb +50 -0
  86. data/test/test_conditions_disk_usage.rb +56 -0
  87. data/test/test_conditions_http_response_code.rb +109 -0
  88. data/test/test_conditions_process_running.rb +44 -0
  89. data/test/test_conditions_tries.rb +67 -0
  90. data/test/test_contact.rb +109 -0
  91. data/test/test_dependency_graph.rb +62 -0
  92. data/test/test_driver.rb +11 -0
  93. data/test/test_event_handler.rb +80 -0
  94. data/test/test_god.rb +598 -0
  95. data/test/test_handlers_kqueue_handler.rb +16 -0
  96. data/test/test_logger.rb +63 -0
  97. data/test/test_metric.rb +72 -0
  98. data/test/test_process.rb +246 -0
  99. data/test/test_registry.rb +15 -0
  100. data/test/test_socket.rb +42 -0
  101. data/test/test_sugar.rb +42 -0
  102. data/test/test_system_portable_poller.rb +17 -0
  103. data/test/test_system_process.rb +30 -0
  104. data/test/test_task.rb +262 -0
  105. data/test/test_timeline.rb +37 -0
  106. data/test/test_trigger.rb +59 -0
  107. data/test/test_watch.rb +279 -0
  108. metadata +186 -0
@@ -0,0 +1,167 @@
1
+ #ifdef __linux__ /* only build on linux */
2
+
3
+ #include <ruby.h>
4
+ #include <sys/types.h>
5
+ #include <unistd.h>
6
+ #include <sys/socket.h>
7
+ #include <linux/netlink.h>
8
+ #include <linux/connector.h>
9
+ #include <linux/cn_proc.h>
10
+ #include <errno.h>
11
+
12
+ static VALUE mGod;
13
+ static VALUE cNetlinkHandler;
14
+ static VALUE cEventHandler;
15
+
16
+ static ID proc_exit;
17
+ static ID proc_fork;
18
+ static ID m_call;
19
+ static ID m_watching_pid;
20
+
21
+ static int nl_sock; /* socket for netlink connection */
22
+
23
+
24
+ VALUE
25
+ nlh_handle_events()
26
+ {
27
+ char buff[CONNECTOR_MAX_MSG_SIZE];
28
+ struct nlmsghdr *hdr;
29
+ struct proc_event *event;
30
+
31
+ VALUE extra_data;
32
+
33
+ fd_set fds;
34
+
35
+ FD_ZERO(&fds);
36
+ FD_SET(nl_sock, &fds);
37
+
38
+ if (0 > rb_thread_select(nl_sock + 1, &fds, NULL, NULL, NULL)) {
39
+ rb_raise(rb_eStandardError, strerror(errno));
40
+ }
41
+
42
+ /* If there were no events detected, return */
43
+ if (! FD_ISSET(nl_sock, &fds)) {
44
+ return INT2FIX(0);
45
+ }
46
+
47
+ /* if there are events, make calls */
48
+ if (-1 == recv(nl_sock, buff, sizeof(buff), 0)) {
49
+ rb_raise(rb_eStandardError, strerror(errno));
50
+ }
51
+
52
+ hdr = (struct nlmsghdr *)buff;
53
+
54
+ if (NLMSG_ERROR == hdr->nlmsg_type) {
55
+ rb_raise(rb_eStandardError, strerror(errno));
56
+ } else if (NLMSG_DONE == hdr->nlmsg_type) {
57
+
58
+ event = (struct proc_event *)((struct cn_msg *)NLMSG_DATA(hdr))->data;
59
+
60
+ switch(event->what) {
61
+ case PROC_EVENT_EXIT:
62
+ if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.exit.process_pid))) {
63
+ return INT2FIX(0);
64
+ }
65
+
66
+ extra_data = rb_hash_new();
67
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("pid")), INT2FIX(event->event_data.exit.process_pid));
68
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_code")), INT2FIX(event->event_data.exit.exit_code));
69
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("exit_signal")), INT2FIX(event->event_data.exit.exit_signal));
70
+ rb_hash_aset(extra_data, ID2SYM(rb_intern("thread_group_id")), INT2FIX(event->event_data.exit.process_tgid));
71
+
72
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.exit.process_pid), ID2SYM(proc_exit), extra_data);
73
+ return INT2FIX(1);
74
+
75
+ case PROC_EVENT_FORK:
76
+ if (Qnil == rb_funcall(cEventHandler, m_watching_pid, 1, INT2FIX(event->event_data.fork.parent_pid))) {
77
+ return INT2FIX(0);
78
+ }
79
+
80
+ extra_data = rb_hash_new();
81
+ rb_hash_aset(extra_data, rb_intern("parent_pid"), INT2FIX(event->event_data.fork.parent_pid));
82
+ rb_hash_aset(extra_data, rb_intern("parent_thread_group_id"), INT2FIX(event->event_data.fork.parent_tgid));
83
+ rb_hash_aset(extra_data, rb_intern("child_pid"), INT2FIX(event->event_data.fork.child_pid));
84
+ rb_hash_aset(extra_data, rb_intern("child_thread_group_id"), INT2FIX(event->event_data.fork.child_tgid));
85
+
86
+ rb_funcall(cEventHandler, m_call, 3, INT2FIX(event->event_data.fork.parent_pid), ID2SYM(proc_fork), extra_data);
87
+ return INT2FIX(1);
88
+
89
+ case PROC_EVENT_NONE:
90
+ case PROC_EVENT_EXEC:
91
+ case PROC_EVENT_UID:
92
+ case PROC_EVENT_GID:
93
+ break;
94
+ }
95
+ }
96
+
97
+ return Qnil;
98
+ }
99
+
100
+
101
+ #define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
102
+ sizeof(int))
103
+
104
+ void
105
+ connect_to_netlink()
106
+ {
107
+ struct sockaddr_nl sa_nl; /* netlink interface info */
108
+ char buff[NL_MESSAGE_SIZE];
109
+ struct nlmsghdr *hdr; /* for telling netlink what we want */
110
+ struct cn_msg *msg; /* the actual connector message */
111
+
112
+ /* connect to netlink socket */
113
+ nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
114
+
115
+ if (-1 == nl_sock) {
116
+ rb_raise(rb_eStandardError, strerror(errno));
117
+ }
118
+
119
+ bzero(&sa_nl, sizeof(sa_nl));
120
+ sa_nl.nl_family = AF_NETLINK;
121
+ sa_nl.nl_groups = CN_IDX_PROC;
122
+ sa_nl.nl_pid = getpid();
123
+
124
+ if (-1 == bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl))) {
125
+ rb_raise(rb_eStandardError, strerror(errno));
126
+ }
127
+
128
+ /* Fill header */
129
+ hdr = (struct nlmsghdr *)buff;
130
+ hdr->nlmsg_len = NL_MESSAGE_SIZE;
131
+ hdr->nlmsg_type = NLMSG_DONE;
132
+ hdr->nlmsg_flags = 0;
133
+ hdr->nlmsg_seq = 0;
134
+ hdr->nlmsg_pid = getpid();
135
+
136
+ /* Fill message */
137
+ msg = (struct cn_msg *)NLMSG_DATA(hdr);
138
+ msg->id.idx = CN_IDX_PROC; /* Connecting to process information */
139
+ msg->id.val = CN_VAL_PROC;
140
+ msg->seq = 0;
141
+ msg->ack = 0;
142
+ msg->flags = 0;
143
+ msg->len = sizeof(int);
144
+ *(int*)msg->data = PROC_CN_MCAST_LISTEN;
145
+
146
+ if (-1 == send(nl_sock, hdr, hdr->nlmsg_len, 0)) {
147
+ rb_raise(rb_eStandardError, strerror(errno));
148
+ }
149
+ }
150
+
151
+ void
152
+ Init_netlink_handler_ext()
153
+ {
154
+ proc_exit = rb_intern("proc_exit");
155
+ proc_fork = rb_intern("proc_fork");
156
+ m_call = rb_intern("call");
157
+ m_watching_pid = rb_intern("watching_pid?");
158
+
159
+ mGod = rb_const_get(rb_cObject, rb_intern("God"));
160
+ cEventHandler = rb_const_get(mGod, rb_intern("EventHandler"));
161
+ cNetlinkHandler = rb_define_class_under(mGod, "NetlinkHandler", rb_cObject);
162
+ rb_define_singleton_method(cNetlinkHandler, "handle_events", nlh_handle_events, 0);
163
+
164
+ connect_to_netlink();
165
+ }
166
+
167
+ #endif
@@ -0,0 +1,42 @@
1
+ #!/bin/bash
2
+ #
3
+ # god Startup script for god (http://god.rubyforge.org)
4
+ #
5
+ # chkconfig: - 85 15
6
+ # description: God is an easy to configure, easy to extend monitoring \
7
+ # framework written in Ruby.
8
+ #
9
+
10
+ CONF_DIR=/etc/god
11
+
12
+ RETVAL=0
13
+
14
+ # Go no further if config directory is missing.
15
+ [ -d "$CONF_DIR" ] || exit 0
16
+
17
+ case "$1" in
18
+ start)
19
+ # Create pid directory
20
+ ruby /usr/bin/god -c $CONF_DIR/master.conf
21
+ RETVAL=$?
22
+ ;;
23
+ stop)
24
+ ruby /usr/bin/god terminate
25
+ RETVAL=$?
26
+ ;;
27
+ restart)
28
+ ruby /usr/bin/god terminate
29
+ ruby /usr/bin/god -c $CONF_DIR/master.conf
30
+ RETVAL=$?
31
+ ;;
32
+ status)
33
+ ruby /usr/bin/god status
34
+ RETVAL=$?
35
+ ;;
36
+ *)
37
+ echo "Usage: god {start|stop|restart|status}"
38
+ exit 1
39
+ ;;
40
+ esac
41
+
42
+ exit $RETVAL
@@ -0,0 +1,649 @@
1
+ $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # core
7
+ require 'stringio'
8
+ require 'fileutils'
9
+
10
+ begin
11
+ require 'fastthread'
12
+ rescue LoadError
13
+ ensure
14
+ require 'thread'
15
+ end
16
+
17
+ # stdlib
18
+
19
+ # internal requires
20
+ require 'god/errors'
21
+ require 'god/simple_logger'
22
+ require 'god/logger'
23
+
24
+ require 'god/system/process'
25
+ require 'god/system/portable_poller'
26
+ require 'god/system/slash_proc_poller'
27
+
28
+ require 'god/dependency_graph'
29
+ require 'god/timeline'
30
+ require 'god/configurable'
31
+
32
+ require 'god/task'
33
+
34
+ require 'god/behavior'
35
+ require 'god/behaviors/clean_pid_file'
36
+ require 'god/behaviors/clean_unix_socket'
37
+ require 'god/behaviors/notify_when_flapping'
38
+
39
+ require 'god/condition'
40
+ require 'god/conditions/process_running'
41
+ require 'god/conditions/process_exits'
42
+ require 'god/conditions/tries'
43
+ require 'god/conditions/memory_usage'
44
+ require 'god/conditions/cpu_usage'
45
+ require 'god/conditions/always'
46
+ require 'god/conditions/lambda'
47
+ require 'god/conditions/degrading_lambda'
48
+ require 'god/conditions/flapping'
49
+ require 'god/conditions/http_response_code'
50
+ require 'god/conditions/disk_usage'
51
+ require 'god/conditions/complex'
52
+ require 'god/conditions/file_mtime'
53
+
54
+ require 'god/contact'
55
+ require 'god/contacts/email'
56
+ begin
57
+ require 'god/contacts/twitterer'
58
+ rescue LoadError
59
+ end
60
+ begin
61
+ require 'god/contacts/jabber'
62
+ rescue LoadError
63
+ end
64
+
65
+ require 'god/socket'
66
+ require 'god/driver'
67
+
68
+ require 'god/metric'
69
+ require 'god/watch'
70
+
71
+ require 'god/trigger'
72
+ require 'god/event_handler'
73
+ require 'god/registry'
74
+ require 'god/process'
75
+
76
+ require 'god/sugar'
77
+
78
+ require 'god/cli/version'
79
+ require 'god/cli/command'
80
+
81
+ require 'god/diagnostics'
82
+
83
+ $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
84
+
85
+ # App wide logging system
86
+ LOG = God::Logger.new
87
+
88
+ def applog(watch, level, text)
89
+ LOG.log(watch, level, text)
90
+ end
91
+
92
+ # The $run global determines whether god should be started when the
93
+ # program would normally end. This should be set to true if when god
94
+ # should be started (e.g. `god -c <config file>`) and false otherwise
95
+ # (e.g. `god status`)
96
+ $run ||= nil
97
+
98
+ GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
99
+
100
+ # Return the binding of god's root level
101
+ def root_binding
102
+ binding
103
+ end
104
+
105
+ module Kernel
106
+ alias_method :abort_orig, :abort
107
+
108
+ def abort(text = nil)
109
+ $run = false
110
+ applog(nil, :error, text) if text
111
+ exit(1)
112
+ end
113
+
114
+ alias_method :exit_orig, :exit
115
+
116
+ def exit(code = 0)
117
+ $run = false
118
+ exit_orig(code)
119
+ end
120
+ end
121
+
122
+ class Module
123
+ def safe_attr_accessor(*args)
124
+ args.each do |arg|
125
+ define_method((arg.to_s + "=").intern) do |other|
126
+ if !self.running && self.inited
127
+ abort "God.#{arg} must be set before any Tasks are defined"
128
+ end
129
+
130
+ if self.running && self.inited
131
+ applog(nil, :warn, "God.#{arg} can't be set while god is running")
132
+ return
133
+ end
134
+
135
+ instance_variable_set(('@' + arg.to_s).intern, other)
136
+ end
137
+
138
+ define_method(arg) do
139
+ instance_variable_get(('@' + arg.to_s).intern)
140
+ end
141
+ end
142
+ end
143
+ end
144
+
145
+ module God
146
+ VERSION = '0.7.9'
147
+
148
+ LOG_BUFFER_SIZE_DEFAULT = 100
149
+ PID_FILE_DIRECTORY_DEFAULTS = ['/var/run/god', '~/.god/pids']
150
+ DRB_PORT_DEFAULT = 17165
151
+ DRB_ALLOW_DEFAULT = ['127.0.0.1']
152
+ LOG_LEVEL_DEFAULT = :info
153
+
154
+ class << self
155
+ # user configurable
156
+ safe_attr_accessor :pid,
157
+ :host,
158
+ :port,
159
+ :allow,
160
+ :log_buffer_size,
161
+ :pid_file_directory,
162
+ :log_file,
163
+ :log_level,
164
+ :use_events
165
+
166
+ # internal
167
+ attr_accessor :inited,
168
+ :running,
169
+ :pending_watches,
170
+ :pending_watch_states,
171
+ :server,
172
+ :watches,
173
+ :groups,
174
+ :contacts,
175
+ :contact_groups,
176
+ :main
177
+ end
178
+
179
+ # initialize class instance variables
180
+ self.pid = nil
181
+ self.host = nil
182
+ self.port = nil
183
+ self.allow = nil
184
+ self.log_buffer_size = nil
185
+ self.pid_file_directory = nil
186
+ self.log_level = nil
187
+
188
+ # Initialize internal data.
189
+ #
190
+ # Returns nothing
191
+ def self.internal_init
192
+ # only do this once
193
+ return if self.inited
194
+
195
+ # variable init
196
+ self.watches = {}
197
+ self.groups = {}
198
+ self.pending_watches = []
199
+ self.pending_watch_states = {}
200
+ self.contacts = {}
201
+ self.contact_groups = {}
202
+
203
+ # set defaults
204
+ self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
205
+ self.port ||= DRB_PORT_DEFAULT
206
+ self.allow ||= DRB_ALLOW_DEFAULT
207
+ self.log_level ||= LOG_LEVEL_DEFAULT
208
+
209
+ # additional setup
210
+ self.setup
211
+
212
+ # log level
213
+ log_level_map = {:debug => Logger::DEBUG,
214
+ :info => Logger::INFO,
215
+ :warn => Logger::WARN,
216
+ :error => Logger::ERROR,
217
+ :fatal => Logger::FATAL}
218
+ LOG.level = log_level_map[self.log_level]
219
+
220
+ # init has been executed
221
+ self.inited = true
222
+
223
+ # not yet running
224
+ self.running = false
225
+ end
226
+
227
+ # Instantiate a new, empty Watch object and pass it to the mandatory
228
+ # block. The attributes of the watch will be set by the configuration
229
+ # file.
230
+ #
231
+ # Aborts on duplicate watch name
232
+ # invalid watch
233
+ # conflicting group name
234
+ #
235
+ # Returns nothing
236
+ def self.watch(&block)
237
+ self.task(Watch, &block)
238
+ end
239
+
240
+ # Instantiate a new, empty Task object and yield it to the mandatory
241
+ # block. The attributes of the task will be set by the configuration
242
+ # file.
243
+ #
244
+ # Aborts on duplicate task name
245
+ # invalid task
246
+ # conflicting group name
247
+ #
248
+ # Returns nothing
249
+ def self.task(klass = Task)
250
+ self.internal_init
251
+
252
+ t = klass.new
253
+ yield(t)
254
+
255
+ # do the post-configuration
256
+ t.prepare
257
+
258
+ # if running, completely remove the watch (if necessary) to
259
+ # prepare for the reload
260
+ existing_watch = self.watches[t.name]
261
+ if self.running && existing_watch
262
+ self.pending_watch_states[existing_watch.name] = existing_watch.state
263
+ self.unwatch(existing_watch)
264
+ end
265
+
266
+ # ensure the new watch has a unique name
267
+ if self.watches[t.name] || self.groups[t.name]
268
+ abort "Task name '#{t.name}' already used for a Task or Group"
269
+ end
270
+
271
+ # ensure watch is internally valid
272
+ t.valid? || abort("Task '#{t.name}' is not valid (see above)")
273
+
274
+ # add to list of watches
275
+ self.watches[t.name] = t
276
+
277
+ # add to pending watches
278
+ self.pending_watches << t
279
+
280
+ # add to group if specified
281
+ if t.group
282
+ # ensure group name hasn't been used for a watch already
283
+ if self.watches[t.group]
284
+ abort "Group name '#{t.group}' already used for a Task"
285
+ end
286
+
287
+ self.groups[t.group] ||= []
288
+ self.groups[t.group] << t
289
+ end
290
+
291
+ # register watch
292
+ t.register!
293
+
294
+ # log
295
+ if self.running && existing_watch
296
+ applog(t, :info, "#{t.name} Reloaded config")
297
+ elsif self.running
298
+ applog(t, :info, "#{t.name} Loaded config")
299
+ end
300
+ end
301
+
302
+ # Unmonitor and remove the given watch from god.
303
+ # +watch+ is the Watch to remove
304
+ #
305
+ # Returns nothing
306
+ def self.unwatch(watch)
307
+ # unmonitor
308
+ watch.unmonitor unless watch.state == :unmonitored
309
+
310
+ # unregister
311
+ watch.unregister!
312
+
313
+ # remove from watches
314
+ self.watches.delete(watch.name)
315
+
316
+ # remove from groups
317
+ if watch.group
318
+ self.groups[watch.group].delete(watch)
319
+ end
320
+
321
+ applog(watch, :info, "#{watch.name} unwatched")
322
+ end
323
+
324
+ # Instantiate a new Contact of the given kind and send it to the block.
325
+ # Then prepare, validate, and record the Contact.
326
+ # +kind+ is the contact class specifier
327
+ #
328
+ # Aborts on invalid kind
329
+ # duplicate contact name
330
+ # invalid contact
331
+ # conflicting group name
332
+ #
333
+ # Returns nothing
334
+ def self.contact(kind)
335
+ self.internal_init
336
+
337
+ # create the contact
338
+ begin
339
+ c = Contact.generate(kind)
340
+ rescue NoSuchContactError => e
341
+ abort e.message
342
+ end
343
+
344
+ # send to block so config can set attributes
345
+ yield(c) if block_given?
346
+
347
+ # call prepare on the contact
348
+ c.prepare
349
+
350
+ # remove existing contacts of same name
351
+ existing_contact = self.contacts[c.name]
352
+ if self.running && existing_contact
353
+ self.uncontact(existing_contact)
354
+ end
355
+
356
+ # warn and noop if the contact has been defined before
357
+ if self.contacts[c.name] || self.contact_groups[c.name]
358
+ applog(nil, :warn, "Contact name '#{c.name}' already used for a Contact or Contact Group")
359
+ return
360
+ end
361
+
362
+ # abort if the Contact is invalid, the Contact will have printed
363
+ # out its own error messages by now
364
+ unless Contact.valid?(c) && c.valid?
365
+ abort "Exiting on invalid contact"
366
+ end
367
+
368
+ # add to list of contacts
369
+ self.contacts[c.name] = c
370
+
371
+ # add to contact group if specified
372
+ if c.group
373
+ # ensure group name hasn't been used for a contact already
374
+ if self.contacts[c.group]
375
+ abort "Contact Group name '#{c.group}' already used for a Contact"
376
+ end
377
+
378
+ self.contact_groups[c.group] ||= []
379
+ self.contact_groups[c.group] << c
380
+ end
381
+ end
382
+
383
+ # Remove the given contact from god.
384
+ # +contact+ is the Contact to remove
385
+ #
386
+ # Returns nothing
387
+ def self.uncontact(contact)
388
+ self.contacts.delete(contact.name)
389
+ if contact.group
390
+ self.contact_groups[contact.group].delete(contact)
391
+ end
392
+ end
393
+
394
+ # Control the lifecycle of the given task(s).
395
+ # +name+ is the name of a task/group (String)
396
+ # +command+ is the command to run (String)
397
+ # one of: "start"
398
+ # "monitor"
399
+ # "restart"
400
+ # "stop"
401
+ # "unmonitor"
402
+ # "remove"
403
+ #
404
+ # Returns String[]:task_names
405
+ def self.control(name, command)
406
+ # get the list of items
407
+ items = Array(self.watches[name] || self.groups[name]).dup
408
+
409
+ jobs = []
410
+
411
+ # do the command
412
+ case command
413
+ when "start", "monitor"
414
+ items.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
415
+ when "restart"
416
+ items.each { |w| jobs << Thread.new { w.move(:restart) } }
417
+ when "stop"
418
+ items.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
419
+ when "unmonitor"
420
+ items.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
421
+ when "remove"
422
+ items.each { |w| self.unwatch(w) }
423
+ else
424
+ raise InvalidCommandError.new
425
+ end
426
+
427
+ jobs.each { |j| j.join }
428
+
429
+ items.map { |x| x.name }
430
+ end
431
+
432
+ # Unmonitor and stop all tasks.
433
+ #
434
+ # Returns true on success
435
+ # false if all tasks could not be stopped within 10 seconds
436
+ def self.stop_all
437
+ self.watches.sort.each do |name, w|
438
+ Thread.new do
439
+ w.unmonitor if w.state != :unmonitored
440
+ w.action(:stop) if w.alive?
441
+ end
442
+ end
443
+
444
+ 10.times do
445
+ return true unless self.watches.map { |name, w| w.alive? }.any?
446
+ sleep 1
447
+ end
448
+
449
+ return false
450
+ end
451
+
452
+ # Force the termination of god.
453
+ # * Clean up pid file if one exists
454
+ # * Stop DRb service
455
+ # * Hard exit using exit!
456
+ #
457
+ # Never returns because the process will no longer exist!
458
+ def self.terminate
459
+ FileUtils.rm_f(self.pid) if self.pid
460
+ self.server.stop if self.server
461
+ exit!(0)
462
+ end
463
+
464
+ # Gather the status of each task.
465
+ #
466
+ # Examples
467
+ # God.status
468
+ # # => { 'mongrel' => :up, 'nginx' => :up }
469
+ #
470
+ # Returns { String:task_name => Symbol:status, ... }
471
+ def self.status
472
+ info = {}
473
+ self.watches.map do |name, w|
474
+ info[name] = {:state => w.state, :group => w.group}
475
+ end
476
+ info
477
+ end
478
+
479
+ # Log lines for the given task since the specified time.
480
+ # +watch_name+ is the name of the task (may be abbreviated)
481
+ # +since+ is the Time since which to report log lines
482
+ #
483
+ # Raises God::NoSuchWatchError if no tasks matched
484
+ #
485
+ # Returns String:joined_log_lines
486
+ def self.running_log(watch_name, since)
487
+ matches = pattern_match(watch_name, self.watches.keys)
488
+
489
+ unless matches.first
490
+ raise NoSuchWatchError.new
491
+ end
492
+
493
+ LOG.watch_log_since(matches.first, since)
494
+ end
495
+
496
+ # Load a config file into a running god instance. Rescues any exceptions
497
+ # that the config may raise and reports these back to the caller.
498
+ # +code+ is a String containing the config file
499
+ # +filename+ is the filename of the config file
500
+ #
501
+ # Returns [String[]:task_names, String:errors]
502
+ def self.running_load(code, filename)
503
+ errors = ""
504
+ watches = []
505
+
506
+ begin
507
+ LOG.start_capture
508
+
509
+ Gem.clear_paths
510
+ eval(code, root_binding, filename)
511
+ self.pending_watches.each do |w|
512
+ if previous_state = self.pending_watch_states[w.name]
513
+ w.monitor unless previous_state == :unmonitored
514
+ else
515
+ w.monitor if w.autostart?
516
+ end
517
+ end
518
+ watches = self.pending_watches.dup
519
+ self.pending_watches.clear
520
+ self.pending_watch_states.clear
521
+ rescue Exception => e
522
+ # don't ever let running_load take down god
523
+ errors << LOG.finish_capture
524
+
525
+ unless e.instance_of?(SystemExit)
526
+ errors << e.message << "\n"
527
+ errors << e.backtrace.join("\n")
528
+ end
529
+ end
530
+
531
+ names = watches.map { |x| x.name }
532
+ [names, errors]
533
+ end
534
+
535
+ # Load the given file(s) according to the given glob.
536
+ # +glob+ is the glob-enabled path to load
537
+ #
538
+ # Returns nothing
539
+ def self.load(glob)
540
+ Dir[glob].each do |f|
541
+ Kernel.load f
542
+ end
543
+ end
544
+
545
+ def self.setup
546
+ if self.pid_file_directory
547
+ # pid file dir was specified, ensure it is created and writable
548
+ unless File.exist?(self.pid_file_directory)
549
+ begin
550
+ FileUtils.mkdir_p(self.pid_file_directory)
551
+ rescue Errno::EACCES => e
552
+ abort "Failed to create pid file directory: #{e.message}"
553
+ end
554
+ end
555
+
556
+ unless File.writable?(self.pid_file_directory)
557
+ abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
558
+ end
559
+ else
560
+ # no pid file dir specified, try defaults
561
+ PID_FILE_DIRECTORY_DEFAULTS.each do |idir|
562
+ dir = File.expand_path(idir)
563
+ begin
564
+ FileUtils.mkdir_p(dir)
565
+ if File.writable?(dir)
566
+ self.pid_file_directory = dir
567
+ break
568
+ end
569
+ rescue Errno::EACCES => e
570
+ end
571
+ end
572
+
573
+ unless self.pid_file_directory
574
+ dirs = PID_FILE_DIRECTORY_DEFAULTS.map { |x| File.expand_path(x) }
575
+ abort "No pid file directory exists, could be created, or is writable at any of #{dirs.join(', ')}"
576
+ end
577
+ end
578
+
579
+ applog(nil, :info, "Using pid file directory: #{self.pid_file_directory}")
580
+ end
581
+
582
+ # Initialize and startup the machinery that makes god work.
583
+ #
584
+ # Returns nothing
585
+ def self.start
586
+ self.internal_init
587
+
588
+ # instantiate server
589
+ self.server = Socket.new(self.port)
590
+
591
+ # start monitoring any watches set to autostart
592
+ self.watches.values.each { |w| w.monitor if w.autostart? }
593
+
594
+ # clear pending watches
595
+ self.pending_watches.clear
596
+
597
+ # mark as running
598
+ self.running = true
599
+
600
+ # don't exit
601
+ self.main =
602
+ Thread.new do
603
+ loop do
604
+ sleep 60
605
+ end
606
+ end
607
+
608
+ self.main.join
609
+ end
610
+
611
+ # To be called on program exit to start god
612
+ #
613
+ # Returns nothing
614
+ def self.at_exit
615
+ self.start
616
+ end
617
+
618
+ # private
619
+
620
+ # Match a shortened pattern against a list of String candidates.
621
+ # The pattern is expanded into a regular expression by
622
+ # inserting .* between each character.
623
+ # +pattern+ is the String containing the abbreviation
624
+ # +list+ is the Array of Strings to match against
625
+ #
626
+ # Examples
627
+ #
628
+ # list = %w{ foo bar bars }
629
+ # pattern = 'br'
630
+ # God.pattern_match(list, pattern)
631
+ # # => ['bar', 'bars']
632
+ #
633
+ # Returns String[]:matched_elements
634
+ def self.pattern_match(pattern, list)
635
+ regex = pattern.split('').join('.*')
636
+
637
+ list.select do |item|
638
+ item =~ Regexp.new(regex)
639
+ end
640
+ end
641
+ end
642
+
643
+ # Runs immediately before the program exits. If $run is true,
644
+ # start god, if $run is false, exit normally.
645
+ #
646
+ # Returns nothing
647
+ at_exit do
648
+ God.at_exit if $run
649
+ end