epi 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/epi +16 -0
- data/lib/epi.rb +52 -0
- data/lib/epi/cli.rb +22 -0
- data/lib/epi/cli/command.rb +27 -0
- data/lib/epi/cli/commands/config.rb +28 -0
- data/lib/epi/cli/commands/job.rb +19 -0
- data/lib/epi/cli/commands/server.rb +38 -0
- data/lib/epi/cli/commands/status.rb +13 -0
- data/lib/epi/configuration_file.rb +56 -0
- data/lib/epi/core_ext.rb +1 -0
- data/lib/epi/core_ext/inflector.rb +11 -0
- data/lib/epi/data.rb +152 -0
- data/lib/epi/exceptions.rb +10 -0
- data/lib/epi/exceptions/base.rb +7 -0
- data/lib/epi/exceptions/fatal.rb +12 -0
- data/lib/epi/exceptions/invalid_configuration_file.rb +14 -0
- data/lib/epi/exceptions/shutdown.rb +7 -0
- data/lib/epi/job.rb +110 -0
- data/lib/epi/job_description.rb +107 -0
- data/lib/epi/jobs.rb +83 -0
- data/lib/epi/launch.rb +59 -0
- data/lib/epi/process_status.rb +71 -0
- data/lib/epi/running_process.rb +159 -0
- data/lib/epi/server.rb +104 -0
- data/lib/epi/server/receiver.rb +46 -0
- data/lib/epi/server/responder.rb +44 -0
- data/lib/epi/server/responders/command.rb +15 -0
- data/lib/epi/server/responders/config.rb +30 -0
- data/lib/epi/server/responders/job.rb +70 -0
- data/lib/epi/server/responders/shutdown.rb +15 -0
- data/lib/epi/server/responders/status.rb +52 -0
- data/lib/epi/server/sender.rb +64 -0
- data/lib/epi/version.rb +3 -0
- metadata +106 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Epi
|
4
|
+
# noinspection RubyTooManyInstanceVariablesInspection
|
5
|
+
class RunningProcess
|
6
|
+
|
7
|
+
DEFAULT_TIMEOUT = 20
|
8
|
+
|
9
|
+
@users = {}
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
def user_name(uid)
|
14
|
+
@users[uid.to_i] ||= `id -un #{uid}`.chomp
|
15
|
+
end
|
16
|
+
|
17
|
+
def group_name(gid)
|
18
|
+
groups[gid.to_i]
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def groups
|
24
|
+
@groups ||= read_groups
|
25
|
+
end
|
26
|
+
|
27
|
+
def read_groups
|
28
|
+
{}.tap do |result|
|
29
|
+
File.readlines('/etc/group').each do |line|
|
30
|
+
result[$2.to_i] = $1 if line =~ /^([^:]+):[^:]+:(-?\d+):/
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
PS_FORMAT = 'pid,%cpu,%mem,rss,vsz,lstart,uid,gid,command'
|
38
|
+
|
39
|
+
attr_reader :pid
|
40
|
+
|
41
|
+
def logger
|
42
|
+
Epi.logger
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize(pid, ps_line = nil)
|
46
|
+
@pid = pid
|
47
|
+
@ps_line = ps_line
|
48
|
+
@props = {}
|
49
|
+
reload! unless ps_line
|
50
|
+
end
|
51
|
+
|
52
|
+
def reload!
|
53
|
+
@props = {}
|
54
|
+
@parts = nil
|
55
|
+
@ps_line = `ps -p #{pid} -o #{PS_FORMAT}`.lines[1]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Returns `true` if the process was running when this instance was created
|
59
|
+
def was_alive?
|
60
|
+
!@ps_line.nil?
|
61
|
+
end
|
62
|
+
|
63
|
+
# CPU usage as a percentage
|
64
|
+
# @return [Float]
|
65
|
+
def cpu_percentage
|
66
|
+
@cpu_percentage ||= parts[1].to_f
|
67
|
+
end
|
68
|
+
|
69
|
+
# Physical memory usage as a percentage
|
70
|
+
# @return [Float]
|
71
|
+
def memory_percentage
|
72
|
+
@memory_percentage ||= parts[2].to_f
|
73
|
+
end
|
74
|
+
|
75
|
+
# Physical memory usage in bytes (rounded to the nearest kilobyte)
|
76
|
+
# @return [Fixnum]
|
77
|
+
def physical_memory
|
78
|
+
@physical_memory ||= parts[3].to_i * 1024
|
79
|
+
end
|
80
|
+
|
81
|
+
# Virtual memory usage in bytes (rounded to the nearest kilobyte)
|
82
|
+
# @return [Fixnum]
|
83
|
+
def virtual_memory
|
84
|
+
@virtual_memory ||= parts[4].to_i * 1024
|
85
|
+
end
|
86
|
+
|
87
|
+
# Sum of {#physical_memory} and {#total_memory}
|
88
|
+
# @return [Fixnum]
|
89
|
+
def total_memory
|
90
|
+
@total_memory ||= physical_memory + virtual_memory
|
91
|
+
end
|
92
|
+
|
93
|
+
# Time at which the process was started
|
94
|
+
# @return [Time]
|
95
|
+
def started_at
|
96
|
+
@started_at ||= Time.parse parts[5..9].join ' '
|
97
|
+
end
|
98
|
+
|
99
|
+
# Name of the user that owns the process
|
100
|
+
# @return [String]
|
101
|
+
def user
|
102
|
+
@user ||= self.class.user_name parts[10]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Name of the group that owns the process
|
106
|
+
# @return [String]
|
107
|
+
def group
|
108
|
+
@group ||= self.class.group_name parts[11]
|
109
|
+
end
|
110
|
+
|
111
|
+
# The command that was used to start the process, including its arguments
|
112
|
+
# @return [String]
|
113
|
+
def command
|
114
|
+
@command ||= parts[12]
|
115
|
+
end
|
116
|
+
|
117
|
+
# Whether the process is root-owned
|
118
|
+
# @return [TrueClass|FalseClass]
|
119
|
+
def root?
|
120
|
+
user == 'root'
|
121
|
+
end
|
122
|
+
|
123
|
+
# Kill a running process
|
124
|
+
# @param timeout [TrueClass|FalseClass|Numeric] `true` to kill immediately (KILL),
|
125
|
+
# `false` to kill gracefully (TERM), or a number of seconds to wait between trying
|
126
|
+
# both (first TERM, then KILL).
|
127
|
+
# @return [RunningProcess]
|
128
|
+
def kill(timeout = DEFAULT_TIMEOUT)
|
129
|
+
if timeout.is_a? Numeric
|
130
|
+
begin
|
131
|
+
logger.info "Will wait #{timeout} second#{timeout != 1 && 's'} for process to terminate gracefully"
|
132
|
+
Timeout::timeout(timeout) { kill false }
|
133
|
+
rescue Timeout::Error
|
134
|
+
kill true
|
135
|
+
end
|
136
|
+
else
|
137
|
+
signal = timeout ? 'KILL' : 'TERM'
|
138
|
+
logger.info "Sending #{signal} to process #{pid}"
|
139
|
+
Process.kill signal, pid
|
140
|
+
sleep 0.2 while `ps -p #{pid} > /dev/null 2>&1; echo $?`.chomp.to_i == 0
|
141
|
+
logger.info "Process #{pid} terminated by signal #{signal}"
|
142
|
+
end
|
143
|
+
self
|
144
|
+
end
|
145
|
+
|
146
|
+
def kill!
|
147
|
+
kill true
|
148
|
+
end
|
149
|
+
|
150
|
+
private
|
151
|
+
|
152
|
+
def parts
|
153
|
+
raise 'Tried to access details of a non-running process' unless String === @ps_line
|
154
|
+
@parts ||= @ps_line.strip.split(/\s+/, 13)
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
data/lib/epi/server.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
|
3
|
+
require_relative 'server/sender'
|
4
|
+
require_relative 'server/receiver'
|
5
|
+
require_relative 'server/responder'
|
6
|
+
|
7
|
+
module Epi
|
8
|
+
module Server
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
attr_reader :start_time
|
13
|
+
|
14
|
+
def logger
|
15
|
+
Epi.logger
|
16
|
+
end
|
17
|
+
|
18
|
+
def ensure_running
|
19
|
+
should_run_as_root = Data.root?
|
20
|
+
|
21
|
+
if running? && should_run_as_root && !process.root?
|
22
|
+
logger.info "Server needs to run as root, but is running as #{process.user}"
|
23
|
+
shutdown
|
24
|
+
end
|
25
|
+
|
26
|
+
unless running?
|
27
|
+
if should_run_as_root && !Epi.root?
|
28
|
+
raise Exceptions::Fatal, 'Found root data but not running as root. Either run again as root, ' +
|
29
|
+
'or specify EPI_HOME as a directory other than /etc/epi'
|
30
|
+
end
|
31
|
+
|
32
|
+
logger.info 'Starting server'
|
33
|
+
Epi.launch [$0, 'server', 'run'],
|
34
|
+
stdout: Data.home + 'server.log',
|
35
|
+
stderr: Data.home + 'server_errors.log'
|
36
|
+
|
37
|
+
begin
|
38
|
+
Timeout::timeout(5) { sleep 0.05 until socket_path.exist? }
|
39
|
+
rescue Timeout::Error
|
40
|
+
raise Exceptions::Fatal, 'Server not started after 5 seconds'
|
41
|
+
end unless socket_path.exist?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def socket_path
|
46
|
+
Data.home + 'socket'
|
47
|
+
end
|
48
|
+
|
49
|
+
def run
|
50
|
+
raise Exceptions::Fatal, 'Server already running' if running?
|
51
|
+
|
52
|
+
# Save the server PID
|
53
|
+
Data.server_pid = Process.pid
|
54
|
+
|
55
|
+
# Run an initial beat
|
56
|
+
Jobs.beat!
|
57
|
+
|
58
|
+
# Start a server
|
59
|
+
EventMachine.start_unix_domain_server socket_path.to_s, Receiver
|
60
|
+
logger.info "Listening on socket #{socket_path}"
|
61
|
+
|
62
|
+
# Make sure other users can connect to the server
|
63
|
+
socket_path.chmod 0777 #TODO: make configurable
|
64
|
+
|
65
|
+
# Ensure the socket is destroyed when the server exits
|
66
|
+
EventMachine.add_shutdown_hook { socket_path.delete }
|
67
|
+
|
68
|
+
@start_time = Time.now
|
69
|
+
end
|
70
|
+
|
71
|
+
def send(*args)
|
72
|
+
ensure_running
|
73
|
+
Sender.send *args
|
74
|
+
end
|
75
|
+
|
76
|
+
def shutdown(process = nil)
|
77
|
+
process ||= self.process
|
78
|
+
raise Exceptions::Fatal, 'Attempted to shut down server when no server is running' unless running?
|
79
|
+
if process.pid == Process.pid
|
80
|
+
EventMachine.next_tick do
|
81
|
+
EventMachine.stop_event_loop
|
82
|
+
Data.server_pid = nil
|
83
|
+
logger.info 'Server has shut down'
|
84
|
+
end
|
85
|
+
else
|
86
|
+
logger.info 'Server will shut down'
|
87
|
+
send :shutdown
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def running?
|
92
|
+
process && process.was_alive?
|
93
|
+
end
|
94
|
+
|
95
|
+
def process
|
96
|
+
server_pid = Data.server_pid
|
97
|
+
@process = nil if @process && @process.pid != server_pid
|
98
|
+
@process ||= server_pid && RunningProcess.new(server_pid)
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
require 'bson'
|
3
|
+
|
4
|
+
module Epi
|
5
|
+
module Server
|
6
|
+
class Receiver < EventMachine::Connection
|
7
|
+
|
8
|
+
def logger
|
9
|
+
Epi.logger
|
10
|
+
end
|
11
|
+
|
12
|
+
def receive_data(data)
|
13
|
+
response = begin
|
14
|
+
data = Hash.from_bson StringIO.new data
|
15
|
+
logger.debug "Received message of type '#{data['type']}'"
|
16
|
+
{result: Responder.run(self, data.delete('type').to_s, data)}
|
17
|
+
rescue Exceptions::Shutdown
|
18
|
+
self.should_shut_down = true
|
19
|
+
{result: 'Server is shutting down'}
|
20
|
+
rescue => error
|
21
|
+
{error: {
|
22
|
+
class: error.class.name,
|
23
|
+
message: error.message,
|
24
|
+
backtrace: error.backtrace
|
25
|
+
}}
|
26
|
+
end
|
27
|
+
response[:complete] = true
|
28
|
+
send_data response.to_bson
|
29
|
+
Server.shutdown if should_shut_down
|
30
|
+
end
|
31
|
+
|
32
|
+
def puts(text)
|
33
|
+
data = {
|
34
|
+
result: "#{text}\n",
|
35
|
+
complete: false
|
36
|
+
}
|
37
|
+
send_data data.to_bson
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
attr_accessor :should_shut_down
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Epi
|
2
|
+
module Server
|
3
|
+
unless defined? Responder
|
4
|
+
class Responder
|
5
|
+
include Exceptions
|
6
|
+
|
7
|
+
# Runs a responder by name.
|
8
|
+
#
|
9
|
+
# @param receiver [Receiver] The receiver that is running the responder
|
10
|
+
# @param name [String] Name of the responder to invoke, e.g. 'command'
|
11
|
+
# @param data [Hash] Data included in the message, to be extracted onto the responder before it is run
|
12
|
+
def self.run(receiver, name, data)
|
13
|
+
klass_name = name.camelize.to_sym
|
14
|
+
klass = Responders.const_defined?(klass_name) && Responders.const_get(klass_name)
|
15
|
+
raise Fatal, 'Unknown message type' unless Class === klass && klass < Responder
|
16
|
+
responder = klass.new(receiver)
|
17
|
+
data.each { |key, value| responder.__send__ :"#{key}=", value }
|
18
|
+
responder.run
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :receiver
|
22
|
+
|
23
|
+
def logger
|
24
|
+
Epi.logger
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(receiver)
|
28
|
+
@receiver = receiver
|
29
|
+
end
|
30
|
+
|
31
|
+
def run
|
32
|
+
raise NotImplementedError, "You need to define #run for class #{self.class.name}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def puts(text)
|
36
|
+
receiver.puts text
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
Dir[File.expand_path '../responders/*.rb', __FILE__].each { |f| require f }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Epi
|
2
|
+
module Server
|
3
|
+
module Responders
|
4
|
+
class Config < Responder
|
5
|
+
|
6
|
+
attr_accessor :add_paths
|
7
|
+
|
8
|
+
def run
|
9
|
+
result = []
|
10
|
+
configs = Data.configuration_paths
|
11
|
+
add_paths.each do |path|
|
12
|
+
path = path.to_s
|
13
|
+
if configs.include?(path)
|
14
|
+
logger.warn "Tried to re-add config path: #{path}"
|
15
|
+
result << "Config path already loaded: #{path}"
|
16
|
+
else
|
17
|
+
logger.info "Adding config path: #{path}"
|
18
|
+
configs << path
|
19
|
+
result << "Added config path: #{path}"
|
20
|
+
end
|
21
|
+
end if add_paths
|
22
|
+
Data.save
|
23
|
+
Jobs.beat!
|
24
|
+
result.join ' '
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Epi
|
2
|
+
module Server
|
3
|
+
module Responders
|
4
|
+
class Job < Responder
|
5
|
+
|
6
|
+
attr_accessor :id, :instruction
|
7
|
+
|
8
|
+
def run
|
9
|
+
Jobs.beat!
|
10
|
+
raise Exceptions::Fatal, 'Unknown job ID' unless Epi::Job === job
|
11
|
+
case instruction
|
12
|
+
when /^\d+$/ then set instruction.to_i
|
13
|
+
when /^(\d+ )?(more|less)$/ then __send__ $2, ($1 || 1).to_i
|
14
|
+
else __send__ instruction
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def job
|
21
|
+
@job ||= Jobs[id]
|
22
|
+
end
|
23
|
+
|
24
|
+
def set(count, validate = true)
|
25
|
+
allowed = job.allowed_processes
|
26
|
+
raise Exceptions::Fatal, "Requested count #{count} is outside allowed range #{allowed}" unless !validate || allowed === count
|
27
|
+
original = job.expected_count
|
28
|
+
raise Exceptions::Fatal, "Already running #{count} process#{count != 1 ? 'es' : ''}" unless !validate || original != count
|
29
|
+
job.expected_count = count
|
30
|
+
job.sync!
|
31
|
+
"#{count < original ? 'De' : 'In'}creasing '#{job.name}' processes by #{(original - count).abs} (from #{original} to #{count})"
|
32
|
+
end
|
33
|
+
|
34
|
+
def more(increase)
|
35
|
+
set job.expected_count + increase
|
36
|
+
end
|
37
|
+
|
38
|
+
def less(decrease)
|
39
|
+
set job.expected_count - decrease
|
40
|
+
end
|
41
|
+
|
42
|
+
def max
|
43
|
+
set job.allowed_processes.max
|
44
|
+
end
|
45
|
+
|
46
|
+
def min
|
47
|
+
set job.allowed_processes.min
|
48
|
+
end
|
49
|
+
|
50
|
+
def pause
|
51
|
+
set 0
|
52
|
+
end
|
53
|
+
|
54
|
+
def resume
|
55
|
+
set job.job_description.initial_processes
|
56
|
+
end
|
57
|
+
alias_method :reset, :resume
|
58
|
+
|
59
|
+
def restart
|
60
|
+
count = job.expected_count
|
61
|
+
raise Exceptions::Fatal, 'This job has no processes to restart' if count == 0
|
62
|
+
set 0, false
|
63
|
+
set count
|
64
|
+
"Replacing #{count} '#{job.name}' process#{count != 1 ? 'es' : ''}"
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|