mcproc 2016.2.20

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +7 -0
  2. data/Announce.txt +135 -0
  3. data/Gemfile +9 -0
  4. data/History.txt +469 -0
  5. data/LICENSE +22 -0
  6. data/README.md +37 -0
  7. data/Rakefile +185 -0
  8. data/TODO.md +37 -0
  9. data/bin/mcproc +134 -0
  10. data/doc/intro.asciidoc +20 -0
  11. data/doc/mcproc.asciidoc +1592 -0
  12. data/ext/god/.gitignore +5 -0
  13. data/ext/god/extconf.rb +56 -0
  14. data/ext/god/kqueue_handler.c +133 -0
  15. data/ext/god/netlink_handler.c +182 -0
  16. data/lib/god.rb +780 -0
  17. data/lib/god/behavior.rb +52 -0
  18. data/lib/god/behaviors/clean_pid_file.rb +21 -0
  19. data/lib/god/behaviors/clean_unix_socket.rb +21 -0
  20. data/lib/god/behaviors/notify_when_flapping.rb +51 -0
  21. data/lib/god/cli/command.rb +268 -0
  22. data/lib/god/cli/run.rb +170 -0
  23. data/lib/god/cli/version.rb +23 -0
  24. data/lib/god/compat19.rb +33 -0
  25. data/lib/god/condition.rb +96 -0
  26. data/lib/god/conditions/always.rb +36 -0
  27. data/lib/god/conditions/complex.rb +86 -0
  28. data/lib/god/conditions/cpu_usage.rb +80 -0
  29. data/lib/god/conditions/degrading_lambda.rb +52 -0
  30. data/lib/god/conditions/disk_usage.rb +32 -0
  31. data/lib/god/conditions/file_mtime.rb +28 -0
  32. data/lib/god/conditions/file_touched.rb +44 -0
  33. data/lib/god/conditions/flapping.rb +128 -0
  34. data/lib/god/conditions/http_response_code.rb +184 -0
  35. data/lib/god/conditions/lambda.rb +25 -0
  36. data/lib/god/conditions/memory_usage.rb +82 -0
  37. data/lib/god/conditions/process_exits.rb +66 -0
  38. data/lib/god/conditions/process_running.rb +63 -0
  39. data/lib/god/conditions/socket_responding.rb +142 -0
  40. data/lib/god/conditions/tries.rb +44 -0
  41. data/lib/god/configurable.rb +57 -0
  42. data/lib/god/contact.rb +114 -0
  43. data/lib/god/contacts/airbrake.rb +44 -0
  44. data/lib/god/contacts/campfire.rb +121 -0
  45. data/lib/god/contacts/email.rb +130 -0
  46. data/lib/god/contacts/hipchat.rb +117 -0
  47. data/lib/god/contacts/jabber.rb +75 -0
  48. data/lib/god/contacts/prowl.rb +57 -0
  49. data/lib/god/contacts/scout.rb +55 -0
  50. data/lib/god/contacts/sensu.rb +59 -0
  51. data/lib/god/contacts/slack.rb +98 -0
  52. data/lib/god/contacts/statsd.rb +46 -0
  53. data/lib/god/contacts/twitter.rb +51 -0
  54. data/lib/god/contacts/webhook.rb +74 -0
  55. data/lib/god/driver.rb +238 -0
  56. data/lib/god/errors.rb +24 -0
  57. data/lib/god/event_handler.rb +112 -0
  58. data/lib/god/event_handlers/dummy_handler.rb +13 -0
  59. data/lib/god/event_handlers/kqueue_handler.rb +17 -0
  60. data/lib/god/event_handlers/netlink_handler.rb +13 -0
  61. data/lib/god/logger.rb +109 -0
  62. data/lib/god/metric.rb +87 -0
  63. data/lib/god/process.rb +381 -0
  64. data/lib/god/registry.rb +32 -0
  65. data/lib/god/simple_logger.rb +59 -0
  66. data/lib/god/socket.rb +113 -0
  67. data/lib/god/sugar.rb +62 -0
  68. data/lib/god/sys_logger.rb +45 -0
  69. data/lib/god/system/portable_poller.rb +42 -0
  70. data/lib/god/system/process.rb +50 -0
  71. data/lib/god/system/slash_proc_poller.rb +92 -0
  72. data/lib/god/task.rb +552 -0
  73. data/lib/god/timeline.rb +25 -0
  74. data/lib/god/trigger.rb +43 -0
  75. data/lib/god/watch.rb +340 -0
  76. data/mcproc.gemspec +192 -0
  77. data/test/configs/child_events/child_events.god +44 -0
  78. data/test/configs/child_events/simple_server.rb +3 -0
  79. data/test/configs/child_polls/child_polls.god +37 -0
  80. data/test/configs/child_polls/simple_server.rb +12 -0
  81. data/test/configs/complex/complex.god +59 -0
  82. data/test/configs/complex/simple_server.rb +3 -0
  83. data/test/configs/contact/contact.god +118 -0
  84. data/test/configs/contact/simple_server.rb +3 -0
  85. data/test/configs/daemon_events/daemon_events.god +37 -0
  86. data/test/configs/daemon_events/simple_server.rb +8 -0
  87. data/test/configs/daemon_events/simple_server_stop.rb +11 -0
  88. data/test/configs/daemon_polls/daemon_polls.god +17 -0
  89. data/test/configs/daemon_polls/simple_server.rb +6 -0
  90. data/test/configs/degrading_lambda/degrading_lambda.god +31 -0
  91. data/test/configs/degrading_lambda/tcp_server.rb +15 -0
  92. data/test/configs/keepalive/keepalive.god +9 -0
  93. data/test/configs/keepalive/keepalive.rb +12 -0
  94. data/test/configs/lifecycle/lifecycle.god +25 -0
  95. data/test/configs/matias/matias.god +50 -0
  96. data/test/configs/real.rb +59 -0
  97. data/test/configs/running_load/running_load.god +16 -0
  98. data/test/configs/stop_options/simple_server.rb +12 -0
  99. data/test/configs/stop_options/stop_options.god +39 -0
  100. data/test/configs/stress/simple_server.rb +3 -0
  101. data/test/configs/stress/stress.god +15 -0
  102. data/test/configs/task/logs/.placeholder +0 -0
  103. data/test/configs/task/task.god +26 -0
  104. data/test/configs/test.rb +61 -0
  105. data/test/configs/usr1_trapper.rb +10 -0
  106. data/test/helper.rb +172 -0
  107. data/test/suite.rb +6 -0
  108. data/test/test_airbrake.rb +14 -0
  109. data/test/test_behavior.rb +18 -0
  110. data/test/test_campfire.rb +22 -0
  111. data/test/test_condition.rb +52 -0
  112. data/test/test_conditions_disk_usage.rb +50 -0
  113. data/test/test_conditions_http_response_code.rb +109 -0
  114. data/test/test_conditions_process_running.rb +40 -0
  115. data/test/test_conditions_socket_responding.rb +176 -0
  116. data/test/test_conditions_tries.rb +67 -0
  117. data/test/test_contact.rb +109 -0
  118. data/test/test_driver.rb +26 -0
  119. data/test/test_email.rb +34 -0
  120. data/test/test_event_handler.rb +82 -0
  121. data/test/test_god.rb +710 -0
  122. data/test/test_god_system.rb +201 -0
  123. data/test/test_handlers_kqueue_handler.rb +16 -0
  124. data/test/test_hipchat.rb +23 -0
  125. data/test/test_jabber.rb +29 -0
  126. data/test/test_logger.rb +55 -0
  127. data/test/test_metric.rb +74 -0
  128. data/test/test_process.rb +263 -0
  129. data/test/test_prowl.rb +15 -0
  130. data/test/test_registry.rb +15 -0
  131. data/test/test_sensu.rb +11 -0
  132. data/test/test_slack.rb +57 -0
  133. data/test/test_socket.rb +34 -0
  134. data/test/test_statsd.rb +22 -0
  135. data/test/test_sugar.rb +42 -0
  136. data/test/test_system_portable_poller.rb +17 -0
  137. data/test/test_system_process.rb +30 -0
  138. data/test/test_task.rb +246 -0
  139. data/test/test_timeline.rb +37 -0
  140. data/test/test_trigger.rb +63 -0
  141. data/test/test_watch.rb +286 -0
  142. data/test/test_webhook.rb +22 -0
  143. metadata +475 -0
@@ -0,0 +1,32 @@
1
+ module God
2
+ def self.registry
3
+ @registry ||= Registry.new
4
+ end
5
+
6
+ class Registry
7
+ def initialize
8
+ @storage = {}
9
+ end
10
+
11
+ def add(item)
12
+ # raise TypeError unless item.is_a? God::Process
13
+ @storage[item.name] = item
14
+ end
15
+
16
+ def remove(item)
17
+ @storage.delete(item.name)
18
+ end
19
+
20
+ def size
21
+ @storage.size
22
+ end
23
+
24
+ def [](name)
25
+ @storage[name]
26
+ end
27
+
28
+ def reset
29
+ @storage.clear
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,59 @@
1
+ module God
2
+
3
+ class SimpleLogger
4
+ DEBUG = 2
5
+ INFO = 4
6
+ WARN = 8
7
+ ERROR = 16
8
+ FATAL = 32
9
+
10
+ SEV_LABEL = {DEBUG => 'DEBUG',
11
+ INFO => 'INFO',
12
+ WARN => 'WARN',
13
+ ERROR => 'ERROR',
14
+ FATAL => 'FATAL'}
15
+
16
+ CONSTANT_TO_SYMBOL = { DEBUG => :debug,
17
+ INFO => :info,
18
+ WARN => :warn,
19
+ ERROR => :error,
20
+ FATAL => :fatal }
21
+
22
+ attr_accessor :datetime_format, :level
23
+
24
+ def initialize(io)
25
+ @io = io
26
+ @level = INFO
27
+ @datetime_format = "%Y-%m-%d %H:%M:%S"
28
+ end
29
+
30
+ def output(level, msg)
31
+ return if level < self.level
32
+
33
+ time = Time.now.strftime(self.datetime_format)
34
+ label = SEV_LABEL[level]
35
+ @io.print("#{label[0..0]} [#{time}] #{label.rjust(5)}: #{msg}\n")
36
+ end
37
+
38
+ def fatal(msg)
39
+ self.output(FATAL, msg)
40
+ end
41
+
42
+ def error(msg)
43
+ self.output(ERROR, msg)
44
+ end
45
+
46
+ def warn(msg)
47
+ self.output(WARN, msg)
48
+ end
49
+
50
+ def info(msg)
51
+ self.output(INFO, msg)
52
+ end
53
+
54
+ def debug(msg)
55
+ self.output(DEBUG, msg)
56
+ end
57
+ end
58
+
59
+ end
data/lib/god/socket.rb ADDED
@@ -0,0 +1,113 @@
1
+ require 'drb'
2
+
3
+ module God
4
+
5
+ # The God::Server oversees the DRb server which dishes out info on this God daemon.
6
+ class Socket
7
+ attr_reader :port
8
+
9
+ # The location of the socket for a given port
10
+ # +port+ is the port number
11
+ #
12
+ # Returns String (file location)
13
+ def self.socket_file(port)
14
+ "/tmp/god.#{port}.sock"
15
+ end
16
+
17
+ # The address of the socket for a given port
18
+ # +port+ is the port number
19
+ #
20
+ # Returns String (drb address)
21
+ def self.socket(port)
22
+ "drbunix://#{self.socket_file(port)}"
23
+ end
24
+
25
+ # The location of the socket for this Server
26
+ #
27
+ # Returns String (file location)
28
+ def socket_file
29
+ self.class.socket_file(@port)
30
+ end
31
+
32
+ # The address of the socket for this Server
33
+ #
34
+ # Returns String (drb address)
35
+ def socket
36
+ self.class.socket(@port)
37
+ end
38
+
39
+ # Create a new Server and star the DRb server
40
+ # +port+ is the port on which to start the DRb service (default nil)
41
+ def initialize(port = nil, user = nil, group = nil, perm = nil)
42
+ @port = port
43
+ @user = user
44
+ @group = group
45
+ @perm = perm
46
+ start
47
+ end
48
+
49
+ # Returns true
50
+ def ping
51
+ true
52
+ end
53
+
54
+ # Forward API calls to God
55
+ #
56
+ # Returns whatever the forwarded call returns
57
+ def method_missing(*args, &block)
58
+ God.send(*args, &block)
59
+ end
60
+
61
+ # Stop the DRb server and delete the socket file
62
+ #
63
+ # Returns nothing
64
+ def stop
65
+ DRb.stop_service
66
+ FileUtils.rm_f(self.socket_file)
67
+ end
68
+
69
+ private
70
+
71
+ # Start the DRb server. Abort if there is already a running god instance
72
+ # on the socket.
73
+ #
74
+ # Returns nothing
75
+ def start
76
+ begin
77
+ @drb ||= DRb.start_service(self.socket, self)
78
+ applog(nil, :info, "Started on #{DRb.uri}")
79
+ rescue Errno::EADDRINUSE
80
+ applog(nil, :info, "Socket already in use")
81
+ server = DRbObject.new(nil, self.socket)
82
+
83
+ begin
84
+ Timeout.timeout(5) do
85
+ server.ping
86
+ end
87
+ abort "Socket #{self.socket} already in use by another instance of god"
88
+ rescue StandardError, Timeout::Error
89
+ applog(nil, :info, "Socket is stale, reopening")
90
+ File.delete(self.socket_file) rescue nil
91
+ @drb ||= DRb.start_service(self.socket, self)
92
+ applog(nil, :info, "Started on #{DRb.uri}")
93
+ end
94
+ end
95
+
96
+ if File.exists?(self.socket_file)
97
+ if @user
98
+ user_method = @user.is_a?(Integer) ? :getpwuid : :getpwnam
99
+ uid = Etc.send(user_method, @user).uid
100
+ gid = Etc.send(user_method, @user).gid
101
+ end
102
+ if @group
103
+ group_method = @group.is_a?(Integer) ? :getgrgid : :getgrnam
104
+ gid = Etc.send(group_method, @group).gid
105
+ end
106
+
107
+ File.chmod(Integer(@perm), socket_file) if @perm
108
+ File.chown(uid, gid, socket_file) if uid or gid
109
+ end
110
+ end
111
+ end
112
+
113
+ end
data/lib/god/sugar.rb ADDED
@@ -0,0 +1,62 @@
1
+ class Numeric
2
+ # Public: Units of seconds.
3
+ def seconds
4
+ self
5
+ end
6
+
7
+ # Public: Units of seconds.
8
+ alias :second :seconds
9
+
10
+ # Public: Units of minutes (60 seconds).
11
+ def minutes
12
+ self * 60
13
+ end
14
+
15
+ # Public: Units of minutes (60 seconds).
16
+ alias :minute :minutes
17
+
18
+ # Public: Units of hours (3600 seconds).
19
+ def hours
20
+ self * 3600
21
+ end
22
+
23
+ # Public: Units of hours (3600 seconds).
24
+ alias :hour :hours
25
+
26
+ # Public: Units of days (86400 seconds).
27
+ def days
28
+ self * 86400
29
+ end
30
+
31
+ # Public: Units of days (86400 seconds).
32
+ alias :day :days
33
+
34
+ # Units of kilobytes.
35
+ def kilobytes
36
+ self
37
+ end
38
+
39
+ # Units of kilobytes.
40
+ alias :kilobyte :kilobytes
41
+
42
+ # Units of megabytes (1024 kilobytes).
43
+ def megabytes
44
+ self * 1024
45
+ end
46
+
47
+ # Units of megabytes (1024 kilobytes).
48
+ alias :megabyte :megabytes
49
+
50
+ # Units of gigabytes (1,048,576 kilobytes).
51
+ def gigabytes
52
+ self * (1024 ** 2)
53
+ end
54
+
55
+ # Units of gigabytes (1,048,576 kilobytes).
56
+ alias :gigabyte :gigabytes
57
+
58
+ # Units of percent. e.g. 50.percent.
59
+ def percent
60
+ self
61
+ end
62
+ end
@@ -0,0 +1,45 @@
1
+ begin
2
+ require 'syslog'
3
+
4
+ # Ensure that Syslog is open
5
+ begin
6
+ Syslog.open('god')
7
+ rescue RuntimeError
8
+ Syslog.reopen('god')
9
+ end
10
+
11
+ Syslog.info("Syslog enabled.")
12
+
13
+ module God
14
+
15
+ class SysLogger
16
+ SYMBOL_EQUIVALENTS = { :fatal => Syslog::LOG_CRIT,
17
+ :error => Syslog::LOG_ERR,
18
+ :warn => Syslog::LOG_WARNING,
19
+ :info => Syslog::LOG_INFO,
20
+ :debug => Syslog::LOG_DEBUG }
21
+
22
+ # Set the log level
23
+ # +level+ is the Symbol level to set as maximum. One of:
24
+ # [:fatal | :error | :warn | :info | :debug ]
25
+ #
26
+ # Returns Nothing
27
+ def self.level=(level)
28
+ Syslog.mask = Syslog::LOG_UPTO(SYMBOL_EQUIVALENTS[level])
29
+ end
30
+
31
+ # Log a message to syslog.
32
+ # +level+ is the Symbol level of the message. One of:
33
+ # [:fatal | :error | :warn | :info | :debug ]
34
+ # +text+ is the String text of the message
35
+ #
36
+ # Returns Nothing
37
+ def self.log(level, text)
38
+ Syslog.log(SYMBOL_EQUIVALENTS[level], '%s', text)
39
+ end
40
+ end
41
+
42
+ end
43
+ rescue Object => e
44
+ puts "Syslog could not be enabled: #{e.message}"
45
+ end
@@ -0,0 +1,42 @@
1
+ module God
2
+ module System
3
+ class PortablePoller
4
+ def initialize(pid)
5
+ @pid = pid
6
+ end
7
+ # Memory usage in kilobytes (resident set size)
8
+ def memory
9
+ ps_int('rss')
10
+ end
11
+
12
+ # Percentage memory usage
13
+ def percent_memory
14
+ ps_float('%mem')
15
+ end
16
+
17
+ # Percentage CPU usage
18
+ def percent_cpu
19
+ ps_float('%cpu')
20
+ end
21
+
22
+ private
23
+
24
+ def ps_int(keyword)
25
+ `ps -o #{keyword}= -p #{@pid}`.to_i
26
+ end
27
+
28
+ def ps_float(keyword)
29
+ `ps -o #{keyword}= -p #{@pid}`.to_f
30
+ end
31
+
32
+ def ps_string(keyword)
33
+ `ps -o #{keyword}= -p #{@pid}`.strip
34
+ end
35
+
36
+ def time_string_to_seconds(text)
37
+ _, minutes, seconds, useconds = *text.match(/(\d+):(\d{2}).(\d{2})/)
38
+ (minutes.to_i * 60) + seconds.to_i
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,50 @@
1
+ module God
2
+ module System
3
+
4
+ class Process
5
+ def self.fetch_system_poller
6
+ @@poller ||= if SlashProcPoller.usable?
7
+ SlashProcPoller
8
+ else
9
+ PortablePoller
10
+ end
11
+ end
12
+
13
+ def initialize(pid)
14
+ @pid = pid.to_i
15
+ @poller = self.class.fetch_system_poller.new(@pid)
16
+ end
17
+
18
+ # Return true if this process is running, false otherwise
19
+ def exists?
20
+ !!::Process.kill(0, @pid) rescue false
21
+ end
22
+
23
+ # Memory usage in kilobytes (resident set size)
24
+ def memory
25
+ @poller.memory
26
+ end
27
+
28
+ # Percentage memory usage
29
+ def percent_memory
30
+ @poller.percent_memory
31
+ end
32
+
33
+ # Percentage CPU usage
34
+ def percent_cpu
35
+ @poller.percent_cpu
36
+ end
37
+
38
+ private
39
+
40
+ def fetch_system_poller
41
+ if SlashProcPoller.usable?
42
+ SlashProcPoller
43
+ else
44
+ PortablePoller
45
+ end
46
+ end
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,92 @@
1
+ module God
2
+ module System
3
+ class SlashProcPoller < PortablePoller
4
+ @@kb_per_page = 4 # TODO: Need to make this portable
5
+ @@hertz = 100
6
+ @@total_mem = nil
7
+
8
+ MeminfoPath = '/proc/meminfo'
9
+ UptimePath = '/proc/uptime'
10
+
11
+ RequiredPaths = [MeminfoPath, UptimePath]
12
+
13
+ # FreeBSD has /proc by default, but nothing mounted there!
14
+ # So we should check for the actual required paths!
15
+ # Returns true if +RequiredPaths+ are readable.
16
+ def self.usable?
17
+ RequiredPaths.all? do |path|
18
+ test(?r, path) && readable?(path)
19
+ end
20
+ end
21
+
22
+ def initialize(pid)
23
+ super(pid)
24
+
25
+ unless @@total_mem # in K
26
+ File.open(MeminfoPath) do |f|
27
+ @@total_mem = f.gets.split[1]
28
+ end
29
+ end
30
+ end
31
+
32
+ def memory
33
+ stat[:rss].to_i * @@kb_per_page
34
+ rescue # This shouldn't fail is there's an error (or proc doesn't exist)
35
+ 0
36
+ end
37
+
38
+ def percent_memory
39
+ (memory / @@total_mem.to_f) * 100
40
+ rescue # This shouldn't fail is there's an error (or proc doesn't exist)
41
+ 0
42
+ end
43
+
44
+ # TODO: Change this to calculate the wma instead
45
+ def percent_cpu
46
+ stats = stat
47
+ total_time = stats[:utime].to_i + stats[:stime].to_i # in jiffies
48
+ seconds = uptime - stats[:starttime].to_i / @@hertz
49
+ if seconds == 0
50
+ 0
51
+ else
52
+ ((total_time * 1000 / @@hertz) / seconds) / 10
53
+ end
54
+ rescue # This shouldn't fail is there's an error (or proc doesn't exist)
55
+ 0
56
+ end
57
+
58
+ private
59
+
60
+ # Some systems (CentOS?) have a /proc, but they can hang when trying to
61
+ # read from them. Try to use this sparingly as it is expensive.
62
+ def self.readable?(path)
63
+ begin
64
+ timeout(1) { File.read(path) }
65
+ rescue Timeout::Error
66
+ false
67
+ end
68
+ end
69
+
70
+ # in seconds
71
+ def uptime
72
+ File.read(UptimePath).split[0].to_f
73
+ end
74
+
75
+ def stat
76
+ stats = {}
77
+ stats[:pid], stats[:comm], stats[:state], stats[:ppid], stats[:pgrp],
78
+ stats[:session], stats[:tty_nr], stats[:tpgid], stats[:flags],
79
+ stats[:minflt], stats[:cminflt], stats[:majflt], stats[:cmajflt],
80
+ stats[:utime], stats[:stime], stats[:cutime], stats[:cstime],
81
+ stats[:priority], stats[:nice], _, stats[:itrealvalue],
82
+ stats[:starttime], stats[:vsize], stats[:rss], stats[:rlim],
83
+ stats[:startcode], stats[:endcode], stats[:startstack], stats[:kstkesp],
84
+ stats[:kstkeip], stats[:signal], stats[:blocked], stats[:sigignore],
85
+ stats[:sigcatch], stats[:wchan], stats[:nswap], stats[:cnswap],
86
+ stats[:exit_signal], stats[:processor], stats[:rt_priority],
87
+ stats[:policy] = File.read("/proc/#{@pid}/stat").scan(/\(.*?\)|\w+/)
88
+ stats
89
+ end
90
+ end
91
+ end
92
+ end