ominfra 0.0.0.dev76__py3-none-any.whl → 0.0.0.dev78__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ominfra/clouds/aws/auth.py +97 -92
- ominfra/clouds/aws/dataclasses.py +149 -0
- ominfra/clouds/aws/journald2aws/__init__.py +0 -0
- ominfra/clouds/aws/journald2aws/journald.py +67 -0
- ominfra/clouds/aws/logs.py +173 -0
- ominfra/deploy/_executor.py +17 -0
- ominfra/pyremote/_runcommands.py +17 -0
- ominfra/scripts/__init__.py +0 -0
- ominfra/scripts/supervisor.py +3323 -0
- ominfra/supervisor/__init__.py +1 -0
- ominfra/supervisor/__main__.py +4 -0
- ominfra/supervisor/compat.py +208 -0
- ominfra/supervisor/configs.py +110 -0
- ominfra/supervisor/context.py +405 -0
- ominfra/supervisor/datatypes.py +171 -0
- ominfra/supervisor/dispatchers.py +307 -0
- ominfra/supervisor/events.py +304 -0
- ominfra/supervisor/exceptions.py +22 -0
- ominfra/supervisor/poller.py +232 -0
- ominfra/supervisor/process.py +782 -0
- ominfra/supervisor/states.py +78 -0
- ominfra/supervisor/supervisor.py +390 -0
- ominfra/supervisor/types.py +49 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev78.dist-info}/METADATA +3 -3
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev78.dist-info}/RECORD +29 -9
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev78.dist-info}/LICENSE +0 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev78.dist-info}/WHEEL +0 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev78.dist-info}/entry_points.txt +0 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev78.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,782 @@
|
|
1
|
+
# ruff: noqa: UP006 UP007
|
2
|
+
import errno
|
3
|
+
import functools
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
import shlex
|
7
|
+
import signal
|
8
|
+
import time
|
9
|
+
import traceback
|
10
|
+
import typing as ta
|
11
|
+
|
12
|
+
from .compat import as_bytes
|
13
|
+
from .compat import as_string
|
14
|
+
from .compat import close_fd
|
15
|
+
from .compat import compact_traceback
|
16
|
+
from .compat import decode_wait_status
|
17
|
+
from .compat import get_path
|
18
|
+
from .compat import real_exit
|
19
|
+
from .compat import signame
|
20
|
+
from .configs import ProcessConfig
|
21
|
+
from .configs import ProcessGroupConfig
|
22
|
+
from .context import ServerContext
|
23
|
+
from .context import check_execv_args
|
24
|
+
from .context import close_child_pipes
|
25
|
+
from .context import close_parent_pipes
|
26
|
+
from .context import drop_privileges
|
27
|
+
from .context import make_pipes
|
28
|
+
from .datatypes import RestartUnconditionally
|
29
|
+
from .dispatchers import Dispatcher
|
30
|
+
from .dispatchers import InputDispatcher
|
31
|
+
from .dispatchers import OutputDispatcher
|
32
|
+
from .events import EventRejectedEvent
|
33
|
+
from .events import ProcessCommunicationEvent
|
34
|
+
from .events import ProcessCommunicationStderrEvent
|
35
|
+
from .events import ProcessCommunicationStdoutEvent
|
36
|
+
from .events import ProcessStateBackoffEvent
|
37
|
+
from .events import ProcessStateEvent
|
38
|
+
from .events import ProcessStateExitedEvent
|
39
|
+
from .events import ProcessStateFatalEvent
|
40
|
+
from .events import ProcessStateRunningEvent
|
41
|
+
from .events import ProcessStateStartingEvent
|
42
|
+
from .events import ProcessStateStoppedEvent
|
43
|
+
from .events import ProcessStateStoppingEvent
|
44
|
+
from .events import ProcessStateUnknownEvent
|
45
|
+
from .events import notify_event
|
46
|
+
from .exceptions import BadCommandError
|
47
|
+
from .exceptions import ProcessError
|
48
|
+
from .states import STOPPED_STATES
|
49
|
+
from .states import ProcessState
|
50
|
+
from .states import ProcessStates
|
51
|
+
from .states import SupervisorStates
|
52
|
+
from .states import get_process_state_description
|
53
|
+
from .types import AbstractServerContext
|
54
|
+
from .types import AbstractSubprocess
|
55
|
+
|
56
|
+
|
57
|
+
log = logging.getLogger(__name__)
|
58
|
+
|
59
|
+
|
60
|
+
@functools.total_ordering
|
61
|
+
class Subprocess(AbstractSubprocess):
|
62
|
+
"""A class to manage a subprocess."""
|
63
|
+
|
64
|
+
# Initial state; overridden by instance variables
|
65
|
+
|
66
|
+
# pid = 0 # Subprocess pid; 0 when not running
|
67
|
+
# config = None # ProcessConfig instance
|
68
|
+
# state = None # process state code
|
69
|
+
listener_state = None # listener state code (if we're an event listener)
|
70
|
+
event = None # event currently being processed (if we're an event listener)
|
71
|
+
laststart = 0. # Last time the subprocess was started; 0 if never
|
72
|
+
laststop = 0. # Last time the subprocess was stopped; 0 if never
|
73
|
+
last_stop_report = 0. # Last time "waiting for x to stop" logged, to throttle
|
74
|
+
delay = 0. # If nonzero, delay starting or killing until this time
|
75
|
+
administrative_stop = False # true if process has been stopped by an admin
|
76
|
+
system_stop = False # true if process has been stopped by the system
|
77
|
+
killing = False # true if we are trying to kill this process
|
78
|
+
backoff = 0 # backoff counter (to startretries)
|
79
|
+
dispatchers = None # asyncore output dispatchers (keyed by fd)
|
80
|
+
pipes = None # map of channel name to file descriptor #
|
81
|
+
exitstatus = None # status attached to dead process by finish()
|
82
|
+
spawn_err = None # error message attached by spawn() if any
|
83
|
+
group = None # ProcessGroup instance if process is in the group
|
84
|
+
|
85
|
+
def __init__(self, config: ProcessConfig, group: 'ProcessGroup', context: AbstractServerContext) -> None:
|
86
|
+
super().__init__()
|
87
|
+
self._config = config
|
88
|
+
self.group = group
|
89
|
+
self._context = context
|
90
|
+
self._dispatchers: dict = {}
|
91
|
+
self._pipes: dict = {}
|
92
|
+
self.state = ProcessStates.STOPPED
|
93
|
+
self._pid = 0
|
94
|
+
|
95
|
+
@property
|
96
|
+
def pid(self) -> int:
|
97
|
+
return self._pid
|
98
|
+
|
99
|
+
@property
|
100
|
+
def config(self) -> ProcessConfig:
|
101
|
+
return self._config
|
102
|
+
|
103
|
+
@property
|
104
|
+
def context(self) -> AbstractServerContext:
|
105
|
+
return self._context
|
106
|
+
|
107
|
+
def remove_logs(self) -> None:
|
108
|
+
for dispatcher in self._dispatchers.values():
|
109
|
+
if hasattr(dispatcher, 'remove_logs'):
|
110
|
+
dispatcher.remove_logs()
|
111
|
+
|
112
|
+
def reopen_logs(self) -> None:
|
113
|
+
for dispatcher in self._dispatchers.values():
|
114
|
+
if hasattr(dispatcher, 'reopen_logs'):
|
115
|
+
dispatcher.reopen_logs()
|
116
|
+
|
117
|
+
def drain(self) -> None:
|
118
|
+
for dispatcher in self._dispatchers.values():
|
119
|
+
# note that we *must* call readable() for every dispatcher, as it may have side effects for a given
|
120
|
+
# dispatcher (eg. call handle_listener_state_change for event listener processes)
|
121
|
+
if dispatcher.readable():
|
122
|
+
dispatcher.handle_read_event()
|
123
|
+
if dispatcher.writable():
|
124
|
+
dispatcher.handle_write_event()
|
125
|
+
|
126
|
+
def write(self, chars: ta.Union[bytes, str]) -> None:
|
127
|
+
if not self.pid or self.killing:
|
128
|
+
raise OSError(errno.EPIPE, 'Process already closed')
|
129
|
+
|
130
|
+
stdin_fd = self._pipes['stdin']
|
131
|
+
if stdin_fd is None:
|
132
|
+
raise OSError(errno.EPIPE, 'Process has no stdin channel')
|
133
|
+
|
134
|
+
dispatcher = self._dispatchers[stdin_fd]
|
135
|
+
if dispatcher.closed:
|
136
|
+
raise OSError(errno.EPIPE, "Process' stdin channel is closed")
|
137
|
+
|
138
|
+
dispatcher.input_buffer += chars
|
139
|
+
dispatcher.flush() # this must raise EPIPE if the pipe is closed
|
140
|
+
|
141
|
+
def _get_execv_args(self) -> ta.Tuple[str, ta.Sequence[str]]:
|
142
|
+
"""
|
143
|
+
Internal: turn a program name into a file name, using $PATH, make sure it exists / is executable, raising a
|
144
|
+
ProcessError if not
|
145
|
+
"""
|
146
|
+
try:
|
147
|
+
commandargs = shlex.split(self.config.command)
|
148
|
+
except ValueError as e:
|
149
|
+
raise BadCommandError(f"can't parse command {self.config.command!r}: {e}") # noqa
|
150
|
+
|
151
|
+
if commandargs:
|
152
|
+
program = commandargs[0]
|
153
|
+
else:
|
154
|
+
raise BadCommandError('command is empty')
|
155
|
+
|
156
|
+
if '/' in program:
|
157
|
+
filename = program
|
158
|
+
try:
|
159
|
+
st = os.stat(filename)
|
160
|
+
except OSError:
|
161
|
+
st = None
|
162
|
+
|
163
|
+
else:
|
164
|
+
path = get_path()
|
165
|
+
found = None
|
166
|
+
st = None
|
167
|
+
for dir in path: # noqa
|
168
|
+
found = os.path.join(dir, program)
|
169
|
+
try:
|
170
|
+
st = os.stat(found)
|
171
|
+
except OSError:
|
172
|
+
pass
|
173
|
+
else:
|
174
|
+
break
|
175
|
+
if st is None:
|
176
|
+
filename = program
|
177
|
+
else:
|
178
|
+
filename = found # type: ignore
|
179
|
+
|
180
|
+
# check_execv_args will raise a ProcessError if the execv args are bogus, we break it out into a separate
|
181
|
+
# options method call here only to service unit tests
|
182
|
+
check_execv_args(filename, commandargs, st)
|
183
|
+
|
184
|
+
return filename, commandargs
|
185
|
+
|
186
|
+
event_map: ta.ClassVar[ta.Mapping[int, ta.Type[ProcessStateEvent]]] = {
|
187
|
+
ProcessStates.BACKOFF: ProcessStateBackoffEvent,
|
188
|
+
ProcessStates.FATAL: ProcessStateFatalEvent,
|
189
|
+
ProcessStates.UNKNOWN: ProcessStateUnknownEvent,
|
190
|
+
ProcessStates.STOPPED: ProcessStateStoppedEvent,
|
191
|
+
ProcessStates.EXITED: ProcessStateExitedEvent,
|
192
|
+
ProcessStates.RUNNING: ProcessStateRunningEvent,
|
193
|
+
ProcessStates.STARTING: ProcessStateStartingEvent,
|
194
|
+
ProcessStates.STOPPING: ProcessStateStoppingEvent,
|
195
|
+
}
|
196
|
+
|
197
|
+
def change_state(self, new_state: ProcessState, expected: bool = True) -> bool:
|
198
|
+
old_state = self.state
|
199
|
+
if new_state is old_state:
|
200
|
+
return False
|
201
|
+
|
202
|
+
self.state = new_state
|
203
|
+
if new_state == ProcessStates.BACKOFF:
|
204
|
+
now = time.time()
|
205
|
+
self.backoff += 1
|
206
|
+
self.delay = now + self.backoff
|
207
|
+
|
208
|
+
event_class = self.event_map.get(new_state)
|
209
|
+
if event_class is not None:
|
210
|
+
event = event_class(self, old_state, expected)
|
211
|
+
notify_event(event)
|
212
|
+
|
213
|
+
return True
|
214
|
+
|
215
|
+
def _check_in_state(self, *states: ProcessState) -> None:
|
216
|
+
if self.state not in states:
|
217
|
+
current_state = get_process_state_description(self.state)
|
218
|
+
allowable_states = ' '.join(map(get_process_state_description, states))
|
219
|
+
processname = as_string(self.config.name)
|
220
|
+
raise AssertionError('Assertion failed for %s: %s not in %s' % (processname, current_state, allowable_states)) # noqa
|
221
|
+
|
222
|
+
def _record_spawn_err(self, msg: str) -> None:
|
223
|
+
self.spawn_err = msg
|
224
|
+
log.info('spawn_err: %s', msg)
|
225
|
+
|
226
|
+
def spawn(self) -> ta.Optional[int]:
|
227
|
+
processname = as_string(self.config.name)
|
228
|
+
|
229
|
+
if self.pid:
|
230
|
+
log.warning('process \'%s\' already running', processname)
|
231
|
+
return None
|
232
|
+
|
233
|
+
self.killing = False
|
234
|
+
self.spawn_err = None
|
235
|
+
self.exitstatus = None
|
236
|
+
self.system_stop = False
|
237
|
+
self.administrative_stop = False
|
238
|
+
|
239
|
+
self.laststart = time.time()
|
240
|
+
|
241
|
+
self._check_in_state(
|
242
|
+
ProcessStates.EXITED,
|
243
|
+
ProcessStates.FATAL,
|
244
|
+
ProcessStates.BACKOFF,
|
245
|
+
ProcessStates.STOPPED,
|
246
|
+
)
|
247
|
+
|
248
|
+
self.change_state(ProcessStates.STARTING)
|
249
|
+
|
250
|
+
try:
|
251
|
+
filename, argv = self._get_execv_args()
|
252
|
+
except ProcessError as what:
|
253
|
+
self._record_spawn_err(what.args[0])
|
254
|
+
self._check_in_state(ProcessStates.STARTING)
|
255
|
+
self.change_state(ProcessStates.BACKOFF)
|
256
|
+
return None
|
257
|
+
|
258
|
+
try:
|
259
|
+
self._dispatchers, self._pipes = self._make_dispatchers() # type: ignore
|
260
|
+
except OSError as why:
|
261
|
+
code = why.args[0]
|
262
|
+
if code == errno.EMFILE:
|
263
|
+
# too many file descriptors open
|
264
|
+
msg = f"too many open files to spawn '{processname}'"
|
265
|
+
else:
|
266
|
+
msg = f"unknown error making dispatchers for '{processname}': {errno.errorcode.get(code, code)}"
|
267
|
+
self._record_spawn_err(msg)
|
268
|
+
self._check_in_state(ProcessStates.STARTING)
|
269
|
+
self.change_state(ProcessStates.BACKOFF)
|
270
|
+
return None
|
271
|
+
|
272
|
+
try:
|
273
|
+
pid = os.fork()
|
274
|
+
except OSError as why:
|
275
|
+
code = why.args[0]
|
276
|
+
if code == errno.EAGAIN:
|
277
|
+
# process table full
|
278
|
+
msg = f'Too many processes in process table to spawn \'{processname}\''
|
279
|
+
else:
|
280
|
+
msg = f'unknown error during fork for \'{processname}\': {errno.errorcode.get(code, code)}'
|
281
|
+
self._record_spawn_err(msg)
|
282
|
+
self._check_in_state(ProcessStates.STARTING)
|
283
|
+
self.change_state(ProcessStates.BACKOFF)
|
284
|
+
close_parent_pipes(self._pipes)
|
285
|
+
close_child_pipes(self._pipes)
|
286
|
+
return None
|
287
|
+
|
288
|
+
if pid != 0:
|
289
|
+
return self._spawn_as_parent(pid)
|
290
|
+
|
291
|
+
else:
|
292
|
+
self._spawn_as_child(filename, argv)
|
293
|
+
return None
|
294
|
+
|
295
|
+
def _make_dispatchers(self) -> ta.Tuple[ta.Mapping[int, Dispatcher], ta.Mapping[str, int]]:
|
296
|
+
use_stderr = not self.config.redirect_stderr
|
297
|
+
p = make_pipes(use_stderr)
|
298
|
+
stdout_fd, stderr_fd, stdin_fd = p['stdout'], p['stderr'], p['stdin']
|
299
|
+
dispatchers: ta.Dict[int, Dispatcher] = {}
|
300
|
+
etype: ta.Type[ProcessCommunicationEvent]
|
301
|
+
if stdout_fd is not None:
|
302
|
+
etype = ProcessCommunicationStdoutEvent
|
303
|
+
dispatchers[stdout_fd] = OutputDispatcher(self, etype, stdout_fd)
|
304
|
+
if stderr_fd is not None:
|
305
|
+
etype = ProcessCommunicationStderrEvent
|
306
|
+
dispatchers[stderr_fd] = OutputDispatcher(self, etype, stderr_fd)
|
307
|
+
if stdin_fd is not None:
|
308
|
+
dispatchers[stdin_fd] = InputDispatcher(self, 'stdin', stdin_fd)
|
309
|
+
return dispatchers, p
|
310
|
+
|
311
|
+
def _spawn_as_parent(self, pid: int) -> int:
|
312
|
+
# Parent
|
313
|
+
self._pid = pid
|
314
|
+
close_child_pipes(self._pipes)
|
315
|
+
log.info('spawned: \'%s\' with pid %s', as_string(self.config.name), pid)
|
316
|
+
self.spawn_err = None
|
317
|
+
self.delay = time.time() + self.config.startsecs
|
318
|
+
self.context.pid_history[pid] = self
|
319
|
+
return pid
|
320
|
+
|
321
|
+
def _prepare_child_fds(self) -> None:
|
322
|
+
os.dup2(self._pipes['child_stdin'], 0)
|
323
|
+
os.dup2(self._pipes['child_stdout'], 1)
|
324
|
+
if self.config.redirect_stderr:
|
325
|
+
os.dup2(self._pipes['child_stdout'], 2)
|
326
|
+
else:
|
327
|
+
os.dup2(self._pipes['child_stderr'], 2)
|
328
|
+
for i in range(3, self.context.config.minfds):
|
329
|
+
close_fd(i)
|
330
|
+
|
331
|
+
def _spawn_as_child(self, filename: str, argv: ta.Sequence[str]) -> None:
|
332
|
+
try:
|
333
|
+
# prevent child from receiving signals sent to the parent by calling os.setpgrp to create a new process
|
334
|
+
# group for the child; this prevents, for instance, the case of child processes being sent a SIGINT when
|
335
|
+
# running supervisor in foreground mode and Ctrl-C in the terminal window running supervisord is pressed.
|
336
|
+
# Presumably it also prevents HUP, etc received by supervisord from being sent to children.
|
337
|
+
os.setpgrp()
|
338
|
+
|
339
|
+
self._prepare_child_fds()
|
340
|
+
# sending to fd 2 will put this output in the stderr log
|
341
|
+
|
342
|
+
# set user
|
343
|
+
setuid_msg = self.set_uid()
|
344
|
+
if setuid_msg:
|
345
|
+
uid = self.config.uid
|
346
|
+
msg = f"couldn't setuid to {uid}: {setuid_msg}\n"
|
347
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
348
|
+
return # finally clause will exit the child process
|
349
|
+
|
350
|
+
# set environment
|
351
|
+
env = os.environ.copy()
|
352
|
+
env['SUPERVISOR_ENABLED'] = '1'
|
353
|
+
env['SUPERVISOR_PROCESS_NAME'] = self.config.name
|
354
|
+
if self.group:
|
355
|
+
env['SUPERVISOR_GROUP_NAME'] = self.group.config.name
|
356
|
+
if self.config.environment is not None:
|
357
|
+
env.update(self.config.environment)
|
358
|
+
|
359
|
+
# change directory
|
360
|
+
cwd = self.config.directory
|
361
|
+
try:
|
362
|
+
if cwd is not None:
|
363
|
+
os.chdir(cwd)
|
364
|
+
except OSError as why:
|
365
|
+
code = errno.errorcode.get(why.args[0], why.args[0])
|
366
|
+
msg = f"couldn't chdir to {cwd}: {code}\n"
|
367
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
368
|
+
return # finally clause will exit the child process
|
369
|
+
|
370
|
+
# set umask, then execve
|
371
|
+
try:
|
372
|
+
if self.config.umask is not None:
|
373
|
+
os.umask(self.config.umask)
|
374
|
+
os.execve(filename, list(argv), env)
|
375
|
+
except OSError as why:
|
376
|
+
code = errno.errorcode.get(why.args[0], why.args[0])
|
377
|
+
msg = f"couldn't exec {argv[0]}: {code}\n"
|
378
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
379
|
+
except Exception: # noqa
|
380
|
+
(file, fun, line), t, v, tbinfo = compact_traceback()
|
381
|
+
error = f'{t}, {v}: file: {file} line: {line}'
|
382
|
+
msg = f"couldn't exec {filename}: {error}\n"
|
383
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
384
|
+
|
385
|
+
# this point should only be reached if execve failed. the finally clause will exit the child process.
|
386
|
+
|
387
|
+
finally:
|
388
|
+
os.write(2, as_bytes('supervisor: child process was not spawned\n'))
|
389
|
+
real_exit(127) # exit process with code for spawn failure
|
390
|
+
|
391
|
+
def _check_and_adjust_for_system_clock_rollback(self, test_time):
|
392
|
+
"""
|
393
|
+
Check if system clock has rolled backward beyond test_time. If so, set affected timestamps to test_time.
|
394
|
+
"""
|
395
|
+
if self.state == ProcessStates.STARTING:
|
396
|
+
self.laststart = min(test_time, self.laststart)
|
397
|
+
if self.delay > 0 and test_time < (self.delay - self.config.startsecs):
|
398
|
+
self.delay = test_time + self.config.startsecs
|
399
|
+
|
400
|
+
elif self.state == ProcessStates.RUNNING:
|
401
|
+
if test_time > self.laststart and test_time < (self.laststart + self.config.startsecs):
|
402
|
+
self.laststart = test_time - self.config.startsecs
|
403
|
+
|
404
|
+
elif self.state == ProcessStates.STOPPING:
|
405
|
+
self.last_stop_report = min(test_time, self.last_stop_report)
|
406
|
+
if self.delay > 0 and test_time < (self.delay - self.config.stopwaitsecs):
|
407
|
+
self.delay = test_time + self.config.stopwaitsecs
|
408
|
+
|
409
|
+
elif self.state == ProcessStates.BACKOFF:
|
410
|
+
if self.delay > 0 and test_time < (self.delay - self.backoff):
|
411
|
+
self.delay = test_time + self.backoff
|
412
|
+
|
413
|
+
def stop(self) -> ta.Optional[str]:
|
414
|
+
self.administrative_stop = True
|
415
|
+
self.last_stop_report = 0
|
416
|
+
return self.kill(self.config.stopsignal)
|
417
|
+
|
418
|
+
def stop_report(self) -> None:
|
419
|
+
""" Log a 'waiting for x to stop' message with throttling. """
|
420
|
+
if self.state == ProcessStates.STOPPING:
|
421
|
+
now = time.time()
|
422
|
+
|
423
|
+
self._check_and_adjust_for_system_clock_rollback(now)
|
424
|
+
|
425
|
+
if now > (self.last_stop_report + 2): # every 2 seconds
|
426
|
+
log.info('waiting for %s to stop', as_string(self.config.name))
|
427
|
+
self.last_stop_report = now
|
428
|
+
|
429
|
+
def give_up(self) -> None:
|
430
|
+
self.delay = 0
|
431
|
+
self.backoff = 0
|
432
|
+
self.system_stop = True
|
433
|
+
self._check_in_state(ProcessStates.BACKOFF)
|
434
|
+
self.change_state(ProcessStates.FATAL)
|
435
|
+
|
436
|
+
def kill(self, sig: int) -> ta.Optional[str]:
|
437
|
+
"""
|
438
|
+
Send a signal to the subprocess with the intention to kill it (to make it exit). This may or may not actually
|
439
|
+
kill it.
|
440
|
+
|
441
|
+
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
442
|
+
running.
|
443
|
+
"""
|
444
|
+
now = time.time()
|
445
|
+
|
446
|
+
processname = as_string(self.config.name)
|
447
|
+
# If the process is in BACKOFF and we want to stop or kill it, then BACKOFF -> STOPPED. This is needed because
|
448
|
+
# if startretries is a large number and the process isn't starting successfully, the stop request would be
|
449
|
+
# blocked for a long time waiting for the retries.
|
450
|
+
if self.state == ProcessStates.BACKOFF:
|
451
|
+
log.debug('Attempted to kill %s, which is in BACKOFF state.', processname)
|
452
|
+
self.change_state(ProcessStates.STOPPED)
|
453
|
+
return None
|
454
|
+
|
455
|
+
args: tuple
|
456
|
+
if not self.pid:
|
457
|
+
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (processname, signame(sig))
|
458
|
+
log.debug(fmt, *args)
|
459
|
+
return fmt % args
|
460
|
+
|
461
|
+
# If we're in the stopping state, then we've already sent the stop signal and this is the kill signal
|
462
|
+
if self.state == ProcessStates.STOPPING:
|
463
|
+
killasgroup = self.config.killasgroup
|
464
|
+
else:
|
465
|
+
killasgroup = self.config.stopasgroup
|
466
|
+
|
467
|
+
as_group = ''
|
468
|
+
if killasgroup:
|
469
|
+
as_group = 'process group '
|
470
|
+
|
471
|
+
log.debug('killing %s (pid %s) %swith signal %s', processname, self.pid, as_group, signame(sig))
|
472
|
+
|
473
|
+
# RUNNING/STARTING/STOPPING -> STOPPING
|
474
|
+
self.killing = True
|
475
|
+
self.delay = now + self.config.stopwaitsecs
|
476
|
+
# we will already be in the STOPPING state if we're doing a SIGKILL as a result of overrunning stopwaitsecs
|
477
|
+
self._check_in_state(ProcessStates.RUNNING, ProcessStates.STARTING, ProcessStates.STOPPING)
|
478
|
+
self.change_state(ProcessStates.STOPPING)
|
479
|
+
|
480
|
+
pid = self.pid
|
481
|
+
if killasgroup:
|
482
|
+
# send to the whole process group instead
|
483
|
+
pid = -self.pid
|
484
|
+
|
485
|
+
try:
|
486
|
+
try:
|
487
|
+
os.kill(pid, sig)
|
488
|
+
except OSError as exc:
|
489
|
+
if exc.errno == errno.ESRCH:
|
490
|
+
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s', processname, self.pid, str(exc)) # noqa
|
491
|
+
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
492
|
+
# processing.
|
493
|
+
return None
|
494
|
+
raise
|
495
|
+
except Exception: # noqa
|
496
|
+
tb = traceback.format_exc()
|
497
|
+
fmt, args = 'unknown problem killing %s (%s):%s', (processname, self.pid, tb)
|
498
|
+
log.critical(fmt, *args)
|
499
|
+
self.change_state(ProcessStates.UNKNOWN)
|
500
|
+
self.killing = False
|
501
|
+
self.delay = 0
|
502
|
+
return fmt % args
|
503
|
+
|
504
|
+
return None
|
505
|
+
|
506
|
+
def signal(self, sig: int) -> ta.Optional[str]:
|
507
|
+
"""
|
508
|
+
Send a signal to the subprocess, without intending to kill it.
|
509
|
+
|
510
|
+
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
511
|
+
running.
|
512
|
+
"""
|
513
|
+
processname = as_string(self.config.name)
|
514
|
+
args: tuple
|
515
|
+
if not self.pid:
|
516
|
+
fmt, args = "attempted to send %s sig %s but it wasn't running", (processname, signame(sig))
|
517
|
+
log.debug(fmt, *args)
|
518
|
+
return fmt % args
|
519
|
+
|
520
|
+
log.debug('sending %s (pid %s) sig %s', processname, self.pid, signame(sig))
|
521
|
+
|
522
|
+
self._check_in_state(ProcessStates.RUNNING, ProcessStates.STARTING, ProcessStates.STOPPING)
|
523
|
+
|
524
|
+
try:
|
525
|
+
try:
|
526
|
+
os.kill(self.pid, sig)
|
527
|
+
except OSError as exc:
|
528
|
+
if exc.errno == errno.ESRCH:
|
529
|
+
log.debug('unable to signal %s (pid %s), it probably just now exited '
|
530
|
+
'on its own: %s', processname, self.pid, str(exc))
|
531
|
+
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
532
|
+
# processing.
|
533
|
+
return None
|
534
|
+
raise
|
535
|
+
except Exception: # noqa
|
536
|
+
tb = traceback.format_exc()
|
537
|
+
fmt, args = 'unknown problem sending sig %s (%s):%s', (processname, self.pid, tb)
|
538
|
+
log.critical(fmt, *args)
|
539
|
+
self.change_state(ProcessStates.UNKNOWN)
|
540
|
+
return fmt % args
|
541
|
+
|
542
|
+
return None
|
543
|
+
|
544
|
+
def finish(self, sts: int) -> None:
|
545
|
+
""" The process was reaped and we need to report and manage its state """
|
546
|
+
self.drain()
|
547
|
+
|
548
|
+
es, msg = decode_wait_status(sts)
|
549
|
+
|
550
|
+
now = time.time()
|
551
|
+
|
552
|
+
self._check_and_adjust_for_system_clock_rollback(now)
|
553
|
+
|
554
|
+
self.laststop = now
|
555
|
+
processname = as_string(self.config.name)
|
556
|
+
|
557
|
+
if now > self.laststart:
|
558
|
+
too_quickly = now - self.laststart < self.config.startsecs
|
559
|
+
else:
|
560
|
+
too_quickly = False
|
561
|
+
log.warning(
|
562
|
+
"process '%s' (%s) laststart time is in the future, don't "
|
563
|
+
"know how long process was running so assuming it did "
|
564
|
+
"not exit too quickly", processname, self.pid)
|
565
|
+
|
566
|
+
exit_expected = es in self.config.exitcodes
|
567
|
+
|
568
|
+
if self.killing:
|
569
|
+
# likely the result of a stop request implies STOPPING -> STOPPED
|
570
|
+
self.killing = False
|
571
|
+
self.delay = 0
|
572
|
+
self.exitstatus = es
|
573
|
+
|
574
|
+
fmt, args = 'stopped: %s (%s)', (processname, msg)
|
575
|
+
self._check_in_state(ProcessStates.STOPPING)
|
576
|
+
self.change_state(ProcessStates.STOPPED)
|
577
|
+
if exit_expected:
|
578
|
+
log.info(fmt, *args)
|
579
|
+
else:
|
580
|
+
log.warning(fmt, *args)
|
581
|
+
|
582
|
+
elif too_quickly:
|
583
|
+
# the program did not stay up long enough to make it to RUNNING implies STARTING -> BACKOFF
|
584
|
+
self.exitstatus = None
|
585
|
+
self.spawn_err = 'Exited too quickly (process log may have details)'
|
586
|
+
self._check_in_state(ProcessStates.STARTING)
|
587
|
+
self.change_state(ProcessStates.BACKOFF)
|
588
|
+
log.warning('exited: %s (%s)', processname, msg + '; not expected')
|
589
|
+
|
590
|
+
else:
|
591
|
+
# this finish was not the result of a stop request, the program was in the RUNNING state but exited implies
|
592
|
+
# RUNNING -> EXITED normally but see next comment
|
593
|
+
self.delay = 0
|
594
|
+
self.backoff = 0
|
595
|
+
self.exitstatus = es
|
596
|
+
|
597
|
+
# if the process was STARTING but a system time change causes self.laststart to be in the future, the normal
|
598
|
+
# STARTING->RUNNING transition can be subverted so we perform the transition here.
|
599
|
+
if self.state == ProcessStates.STARTING:
|
600
|
+
self.change_state(ProcessStates.RUNNING)
|
601
|
+
|
602
|
+
self._check_in_state(ProcessStates.RUNNING)
|
603
|
+
|
604
|
+
if exit_expected:
|
605
|
+
# expected exit code
|
606
|
+
self.change_state(ProcessStates.EXITED, expected=True)
|
607
|
+
log.info('exited: %s (%s)', processname, msg + '; expected')
|
608
|
+
else:
|
609
|
+
# unexpected exit code
|
610
|
+
self.spawn_err = f'Bad exit code {es}'
|
611
|
+
self.change_state(ProcessStates.EXITED, expected=False)
|
612
|
+
log.warning('exited: %s (%s)', processname, msg + '; not expected')
|
613
|
+
|
614
|
+
self._pid = 0
|
615
|
+
close_parent_pipes(self._pipes)
|
616
|
+
self._pipes = {}
|
617
|
+
self._dispatchers = {}
|
618
|
+
|
619
|
+
# if we died before we processed the current event (only happens if we're an event listener), notify the event
|
620
|
+
# system that this event was rejected so it can be processed again.
|
621
|
+
if self.event is not None:
|
622
|
+
# Note: this should only be true if we were in the BUSY state when finish() was called.
|
623
|
+
notify_event(EventRejectedEvent(self, self.event)) # type: ignore
|
624
|
+
self.event = None
|
625
|
+
|
626
|
+
def set_uid(self) -> ta.Optional[str]:
|
627
|
+
if self.config.uid is None:
|
628
|
+
return None
|
629
|
+
msg = drop_privileges(self.config.uid)
|
630
|
+
return msg
|
631
|
+
|
632
|
+
def __lt__(self, other):
|
633
|
+
return self.config.priority < other.config.priority
|
634
|
+
|
635
|
+
def __eq__(self, other):
|
636
|
+
return self.config.priority == other.config.priority
|
637
|
+
|
638
|
+
def __repr__(self):
|
639
|
+
# repr can't return anything other than a native string, but the name might be unicode - a problem on Python 2.
|
640
|
+
name = self.config.name
|
641
|
+
return f'<Subprocess at {id(self)} with name {name} in state {get_process_state_description(self.get_state())}>'
|
642
|
+
|
643
|
+
def get_state(self) -> ProcessState:
|
644
|
+
return self.state
|
645
|
+
|
646
|
+
def transition(self):
|
647
|
+
now = time.time()
|
648
|
+
state = self.state
|
649
|
+
|
650
|
+
self._check_and_adjust_for_system_clock_rollback(now)
|
651
|
+
|
652
|
+
logger = log
|
653
|
+
|
654
|
+
if self.context.state > SupervisorStates.RESTARTING:
|
655
|
+
# dont start any processes if supervisor is shutting down
|
656
|
+
if state == ProcessStates.EXITED:
|
657
|
+
if self.config.autorestart:
|
658
|
+
if self.config.autorestart is RestartUnconditionally:
|
659
|
+
# EXITED -> STARTING
|
660
|
+
self.spawn()
|
661
|
+
elif self.exitstatus not in self.config.exitcodes: # type: ignore
|
662
|
+
# EXITED -> STARTING
|
663
|
+
self.spawn()
|
664
|
+
|
665
|
+
elif state == ProcessStates.STOPPED and not self.laststart:
|
666
|
+
if self.config.autostart:
|
667
|
+
# STOPPED -> STARTING
|
668
|
+
self.spawn()
|
669
|
+
|
670
|
+
elif state == ProcessStates.BACKOFF:
|
671
|
+
if self.backoff <= self.config.startretries:
|
672
|
+
if now > self.delay:
|
673
|
+
# BACKOFF -> STARTING
|
674
|
+
self.spawn()
|
675
|
+
|
676
|
+
processname = as_string(self.config.name)
|
677
|
+
if state == ProcessStates.STARTING:
|
678
|
+
if now - self.laststart > self.config.startsecs:
|
679
|
+
# STARTING -> RUNNING if the proc has started successfully and it has stayed up for at least
|
680
|
+
# proc.config.startsecs,
|
681
|
+
self.delay = 0
|
682
|
+
self.backoff = 0
|
683
|
+
self._check_in_state(ProcessStates.STARTING)
|
684
|
+
self.change_state(ProcessStates.RUNNING)
|
685
|
+
msg = ('entered RUNNING state, process has stayed up for > than %s seconds (startsecs)' % self.config.startsecs) # noqa
|
686
|
+
logger.info('success: %s %s', processname, msg)
|
687
|
+
|
688
|
+
if state == ProcessStates.BACKOFF:
|
689
|
+
if self.backoff > self.config.startretries:
|
690
|
+
# BACKOFF -> FATAL if the proc has exceeded its number of retries
|
691
|
+
self.give_up()
|
692
|
+
msg = ('entered FATAL state, too many start retries too quickly')
|
693
|
+
logger.info('gave up: %s %s', processname, msg)
|
694
|
+
|
695
|
+
elif state == ProcessStates.STOPPING:
|
696
|
+
time_left = self.delay - now
|
697
|
+
if time_left <= 0:
|
698
|
+
# kill processes which are taking too long to stop with a final sigkill. if this doesn't kill it, the
|
699
|
+
# process will be stuck in the STOPPING state forever.
|
700
|
+
log.warning('killing \'%s\' (%s) with SIGKILL', processname, self.pid)
|
701
|
+
self.kill(signal.SIGKILL)
|
702
|
+
|
703
|
+
def create_auto_child_logs(self):
|
704
|
+
# temporary logfiles which are erased at start time
|
705
|
+
# get_autoname = self.context.get_auto_child_log_name # noqa
|
706
|
+
# sid = self.context.config.identifier # noqa
|
707
|
+
# name = self.config.name # noqa
|
708
|
+
# if self.stdout_logfile is Automatic:
|
709
|
+
# self.stdout_logfile = get_autoname(name, sid, 'stdout')
|
710
|
+
# if self.stderr_logfile is Automatic:
|
711
|
+
# self.stderr_logfile = get_autoname(name, sid, 'stderr')
|
712
|
+
pass
|
713
|
+
|
714
|
+
|
715
|
+
@functools.total_ordering
|
716
|
+
class ProcessGroup:
|
717
|
+
def __init__(self, config: ProcessGroupConfig, context: ServerContext):
|
718
|
+
super().__init__()
|
719
|
+
self.config = config
|
720
|
+
self.context = context
|
721
|
+
self.processes = {}
|
722
|
+
for pconfig in self.config.processes or []:
|
723
|
+
process = Subprocess(pconfig, self, self.context)
|
724
|
+
self.processes[pconfig.name] = process
|
725
|
+
|
726
|
+
def __lt__(self, other):
|
727
|
+
return self.config.priority < other.config.priority
|
728
|
+
|
729
|
+
def __eq__(self, other):
|
730
|
+
return self.config.priority == other.config.priority
|
731
|
+
|
732
|
+
def __repr__(self):
|
733
|
+
# repr can't return anything other than a native string, but the name might be unicode - a problem on Python 2.
|
734
|
+
name = self.config.name
|
735
|
+
return f'<{self.__class__.__name__} instance at {id(self)} named {name}>'
|
736
|
+
|
737
|
+
def remove_logs(self) -> None:
|
738
|
+
for process in self.processes.values():
|
739
|
+
process.remove_logs()
|
740
|
+
|
741
|
+
def reopen_logs(self) -> None:
|
742
|
+
for process in self.processes.values():
|
743
|
+
process.reopen_logs()
|
744
|
+
|
745
|
+
def stop_all(self) -> None:
|
746
|
+
processes = list(self.processes.values())
|
747
|
+
processes.sort()
|
748
|
+
processes.reverse() # stop in desc priority order
|
749
|
+
|
750
|
+
for proc in processes:
|
751
|
+
state = proc.get_state()
|
752
|
+
if state == ProcessStates.RUNNING:
|
753
|
+
# RUNNING -> STOPPING
|
754
|
+
proc.stop()
|
755
|
+
|
756
|
+
elif state == ProcessStates.STARTING:
|
757
|
+
# STARTING -> STOPPING
|
758
|
+
proc.stop()
|
759
|
+
|
760
|
+
elif state == ProcessStates.BACKOFF:
|
761
|
+
# BACKOFF -> FATAL
|
762
|
+
proc.give_up()
|
763
|
+
|
764
|
+
def get_unstopped_processes(self) -> ta.List[Subprocess]:
|
765
|
+
return [x for x in self.processes.values() if x.get_state() not in STOPPED_STATES]
|
766
|
+
|
767
|
+
def get_dispatchers(self) -> ta.Dict[int, Dispatcher]:
|
768
|
+
dispatchers = {}
|
769
|
+
for process in self.processes.values():
|
770
|
+
dispatchers.update(process._dispatchers) # noqa
|
771
|
+
return dispatchers
|
772
|
+
|
773
|
+
def before_remove(self) -> None:
|
774
|
+
pass
|
775
|
+
|
776
|
+
def transition(self) -> None:
|
777
|
+
for proc in self.processes.values():
|
778
|
+
proc.transition()
|
779
|
+
|
780
|
+
def after_setuid(self) -> None:
|
781
|
+
for proc in self.processes.values():
|
782
|
+
proc.create_auto_child_logs()
|