ominfra 0.0.0.dev126__py3-none-any.whl → 0.0.0.dev128__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- ominfra/clouds/aws/auth.py +1 -1
- ominfra/deploy/_executor.py +1 -1
- ominfra/deploy/poly/_main.py +1 -1
- ominfra/pyremote/_runcommands.py +1 -1
- ominfra/scripts/journald2aws.py +2 -2
- ominfra/scripts/supervisor.py +4736 -4166
- ominfra/supervisor/configs.py +34 -11
- ominfra/supervisor/context.py +7 -345
- ominfra/supervisor/dispatchers.py +21 -324
- ominfra/supervisor/dispatchersimpl.py +343 -0
- ominfra/supervisor/groups.py +33 -111
- ominfra/supervisor/groupsimpl.py +86 -0
- ominfra/supervisor/inject.py +45 -20
- ominfra/supervisor/main.py +3 -3
- ominfra/supervisor/pipes.py +85 -0
- ominfra/supervisor/poller.py +42 -38
- ominfra/supervisor/privileges.py +65 -0
- ominfra/supervisor/process.py +6 -742
- ominfra/supervisor/processimpl.py +516 -0
- ominfra/supervisor/setup.py +38 -0
- ominfra/supervisor/setupimpl.py +262 -0
- ominfra/supervisor/spawning.py +32 -0
- ominfra/supervisor/spawningimpl.py +350 -0
- ominfra/supervisor/supervisor.py +67 -84
- ominfra/supervisor/types.py +101 -47
- ominfra/supervisor/utils/__init__.py +0 -0
- ominfra/supervisor/utils/collections.py +52 -0
- ominfra/supervisor/utils/diag.py +31 -0
- ominfra/supervisor/utils/fds.py +46 -0
- ominfra/supervisor/utils/fs.py +47 -0
- ominfra/supervisor/utils/os.py +45 -0
- ominfra/supervisor/utils/ostypes.py +9 -0
- ominfra/supervisor/utils/signals.py +60 -0
- ominfra/supervisor/utils/strings.py +105 -0
- ominfra/supervisor/utils/users.py +67 -0
- {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/METADATA +3 -3
- {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/RECORD +41 -25
- ominfra/supervisor/datatypes.py +0 -175
- ominfra/supervisor/signals.py +0 -52
- ominfra/supervisor/utils.py +0 -206
- {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/LICENSE +0 -0
- {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/WHEEL +0 -0
- {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/entry_points.txt +0 -0
- {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/top_level.txt +0 -0
ominfra/supervisor/process.py
CHANGED
@@ -1,755 +1,19 @@
|
|
1
1
|
# ruff: noqa: UP006 UP007
|
2
|
-
import errno
|
3
|
-
import os.path
|
4
|
-
import shlex
|
5
|
-
import signal
|
6
|
-
import time
|
7
|
-
import traceback
|
8
2
|
import typing as ta
|
9
3
|
|
10
|
-
from omlish.lite.check import check_isinstance
|
11
|
-
from omlish.lite.logs import log
|
12
|
-
from omlish.lite.typing import Func
|
13
|
-
|
14
|
-
from .configs import ProcessConfig
|
15
|
-
from .context import check_execv_args
|
16
|
-
from .context import close_child_pipes
|
17
|
-
from .context import close_parent_pipes
|
18
|
-
from .context import drop_privileges
|
19
|
-
from .context import make_pipes
|
20
|
-
from .datatypes import RestartUnconditionally
|
21
|
-
from .events import PROCESS_STATE_EVENT_MAP
|
22
|
-
from .events import EventCallbacks
|
23
|
-
from .events import ProcessCommunicationEvent
|
24
|
-
from .events import ProcessCommunicationStderrEvent
|
25
|
-
from .events import ProcessCommunicationStdoutEvent
|
26
|
-
from .exceptions import BadCommandError
|
27
|
-
from .exceptions import ProcessError
|
28
|
-
from .signals import sig_name
|
29
|
-
from .states import ProcessState
|
30
|
-
from .states import SupervisorState
|
31
|
-
from .types import Dispatcher
|
32
|
-
from .types import InputDispatcher
|
33
|
-
from .types import OutputDispatcher
|
34
4
|
from .types import Process
|
35
|
-
from .
|
36
|
-
from .types import ServerContext
|
37
|
-
from .utils import as_bytes
|
38
|
-
from .utils import as_string
|
39
|
-
from .utils import close_fd
|
40
|
-
from .utils import compact_traceback
|
41
|
-
from .utils import decode_wait_status
|
42
|
-
from .utils import get_path
|
43
|
-
from .utils import real_exit
|
44
|
-
|
45
|
-
|
46
|
-
# (process: Process, event_type: ta.Type[ProcessCommunicationEvent], fd: int)
|
47
|
-
OutputDispatcherFactory = ta.NewType('OutputDispatcherFactory', Func[OutputDispatcher])
|
48
|
-
|
49
|
-
# (process: Process, event_type: ta.Type[ProcessCommunicationEvent], fd: int)
|
50
|
-
InputDispatcherFactory = ta.NewType('InputDispatcherFactory', Func[InputDispatcher])
|
51
|
-
|
52
|
-
InheritedFds = ta.NewType('InheritedFds', ta.FrozenSet[int])
|
5
|
+
from .utils.ostypes import Pid
|
53
6
|
|
54
7
|
|
55
8
|
##
|
56
9
|
|
57
10
|
|
58
|
-
class
|
59
|
-
|
60
|
-
|
61
|
-
def __init__(
|
62
|
-
self,
|
63
|
-
config: ProcessConfig,
|
64
|
-
group: ProcessGroup,
|
65
|
-
*,
|
66
|
-
context: ServerContext,
|
67
|
-
event_callbacks: EventCallbacks,
|
68
|
-
|
69
|
-
output_dispatcher_factory: OutputDispatcherFactory,
|
70
|
-
input_dispatcher_factory: InputDispatcherFactory,
|
71
|
-
|
72
|
-
inherited_fds: ta.Optional[InheritedFds] = None,
|
73
|
-
|
74
|
-
) -> None:
|
75
|
-
super().__init__()
|
76
|
-
|
77
|
-
self._config = config
|
78
|
-
self._group = group
|
79
|
-
|
80
|
-
self._context = context
|
81
|
-
self._event_callbacks = event_callbacks
|
82
|
-
|
83
|
-
self._output_dispatcher_factory = output_dispatcher_factory
|
84
|
-
self._input_dispatcher_factory = input_dispatcher_factory
|
85
|
-
|
86
|
-
self._inherited_fds = InheritedFds(frozenset(inherited_fds or []))
|
87
|
-
|
88
|
-
self._dispatchers: ta.Dict[int, Dispatcher] = {}
|
89
|
-
self._pipes: ta.Dict[str, int] = {}
|
90
|
-
|
91
|
-
self._state = ProcessState.STOPPED
|
92
|
-
self._pid = 0 # 0 when not running
|
93
|
-
|
94
|
-
self._last_start = 0. # Last time the subprocess was started; 0 if never
|
95
|
-
self._last_stop = 0. # Last time the subprocess was stopped; 0 if never
|
96
|
-
self._last_stop_report = 0. # Last time "waiting for x to stop" logged, to throttle
|
97
|
-
self._delay = 0. # If nonzero, delay starting or killing until this time
|
98
|
-
|
99
|
-
self._administrative_stop = False # true if process has been stopped by an admin
|
100
|
-
self._system_stop = False # true if process has been stopped by the system
|
101
|
-
|
102
|
-
self._killing = False # true if we are trying to kill this process
|
103
|
-
|
104
|
-
self._backoff = 0 # backoff counter (to startretries)
|
105
|
-
|
106
|
-
self._exitstatus: ta.Optional[int] = None # status attached to dead process by finish()
|
107
|
-
self._spawn_err: ta.Optional[str] = None # error message attached by spawn() if any
|
108
|
-
|
109
|
-
@property
|
110
|
-
def pid(self) -> int:
|
111
|
-
return self._pid
|
112
|
-
|
113
|
-
@property
|
114
|
-
def group(self) -> ProcessGroup:
|
115
|
-
return self._group
|
116
|
-
|
117
|
-
@property
|
118
|
-
def config(self) -> ProcessConfig:
|
119
|
-
return self._config
|
120
|
-
|
121
|
-
@property
|
122
|
-
def context(self) -> ServerContext:
|
123
|
-
return self._context
|
124
|
-
|
125
|
-
@property
|
126
|
-
def state(self) -> ProcessState:
|
127
|
-
return self._state
|
128
|
-
|
129
|
-
@property
|
130
|
-
def backoff(self) -> int:
|
131
|
-
return self._backoff
|
132
|
-
|
133
|
-
def get_dispatchers(self) -> ta.Mapping[int, Dispatcher]:
|
134
|
-
return self._dispatchers
|
135
|
-
|
136
|
-
def remove_logs(self) -> None:
|
137
|
-
for dispatcher in self._dispatchers.values():
|
138
|
-
if hasattr(dispatcher, 'remove_logs'):
|
139
|
-
dispatcher.remove_logs()
|
140
|
-
|
141
|
-
def reopen_logs(self) -> None:
|
142
|
-
for dispatcher in self._dispatchers.values():
|
143
|
-
if hasattr(dispatcher, 'reopen_logs'):
|
144
|
-
dispatcher.reopen_logs()
|
145
|
-
|
146
|
-
def drain(self) -> None:
|
147
|
-
for dispatcher in self._dispatchers.values():
|
148
|
-
# note that we *must* call readable() for every dispatcher, as it may have side effects for a given
|
149
|
-
# dispatcher (eg. call handle_listener_state_change for event listener processes)
|
150
|
-
if dispatcher.readable():
|
151
|
-
dispatcher.handle_read_event()
|
152
|
-
if dispatcher.writable():
|
153
|
-
dispatcher.handle_write_event()
|
154
|
-
|
155
|
-
def write(self, chars: ta.Union[bytes, str]) -> None:
|
156
|
-
if not self.pid or self._killing:
|
157
|
-
raise OSError(errno.EPIPE, 'Process already closed')
|
158
|
-
|
159
|
-
stdin_fd = self._pipes['stdin']
|
160
|
-
if stdin_fd is None:
|
161
|
-
raise OSError(errno.EPIPE, 'Process has no stdin channel')
|
162
|
-
|
163
|
-
dispatcher = check_isinstance(self._dispatchers[stdin_fd], InputDispatcher)
|
164
|
-
if dispatcher.closed:
|
165
|
-
raise OSError(errno.EPIPE, "Process' stdin channel is closed")
|
166
|
-
|
167
|
-
dispatcher.write(chars)
|
168
|
-
dispatcher.flush() # this must raise EPIPE if the pipe is closed
|
169
|
-
|
170
|
-
def _get_execv_args(self) -> ta.Tuple[str, ta.Sequence[str]]:
|
171
|
-
"""
|
172
|
-
Internal: turn a program name into a file name, using $PATH, make sure it exists / is executable, raising a
|
173
|
-
ProcessError if not
|
174
|
-
"""
|
175
|
-
|
176
|
-
try:
|
177
|
-
commandargs = shlex.split(self._config.command)
|
178
|
-
except ValueError as e:
|
179
|
-
raise BadCommandError(f"can't parse command {self._config.command!r}: {e}") # noqa
|
180
|
-
|
181
|
-
if commandargs:
|
182
|
-
program = commandargs[0]
|
183
|
-
else:
|
184
|
-
raise BadCommandError('command is empty')
|
185
|
-
|
186
|
-
if '/' in program:
|
187
|
-
filename = program
|
188
|
-
try:
|
189
|
-
st = os.stat(filename)
|
190
|
-
except OSError:
|
191
|
-
st = None
|
192
|
-
|
193
|
-
else:
|
194
|
-
path = get_path()
|
195
|
-
found = None
|
196
|
-
st = None
|
197
|
-
for dir in path: # noqa
|
198
|
-
found = os.path.join(dir, program)
|
199
|
-
try:
|
200
|
-
st = os.stat(found)
|
201
|
-
except OSError:
|
202
|
-
pass
|
203
|
-
else:
|
204
|
-
break
|
205
|
-
if st is None:
|
206
|
-
filename = program
|
207
|
-
else:
|
208
|
-
filename = found # type: ignore
|
209
|
-
|
210
|
-
# check_execv_args will raise a ProcessError if the execv args are bogus, we break it out into a separate
|
211
|
-
# options method call here only to service unit tests
|
212
|
-
check_execv_args(filename, commandargs, st)
|
213
|
-
|
214
|
-
return filename, commandargs
|
215
|
-
|
216
|
-
def change_state(self, new_state: ProcessState, expected: bool = True) -> bool:
|
217
|
-
old_state = self._state
|
218
|
-
if new_state is old_state:
|
219
|
-
return False
|
220
|
-
|
221
|
-
self._state = new_state
|
222
|
-
if new_state == ProcessState.BACKOFF:
|
223
|
-
now = time.time()
|
224
|
-
self._backoff += 1
|
225
|
-
self._delay = now + self._backoff
|
226
|
-
|
227
|
-
event_class = PROCESS_STATE_EVENT_MAP.get(new_state)
|
228
|
-
if event_class is not None:
|
229
|
-
event = event_class(self, old_state, expected)
|
230
|
-
self._event_callbacks.notify(event)
|
231
|
-
|
232
|
-
return True
|
233
|
-
|
234
|
-
def _check_in_state(self, *states: ProcessState) -> None:
|
235
|
-
if self._state not in states:
|
236
|
-
current_state = self._state.name
|
237
|
-
allowable_states = ' '.join(s.name for s in states)
|
238
|
-
process_name = as_string(self._config.name)
|
239
|
-
raise RuntimeError('Assertion failed for %s: %s not in %s' % (process_name, current_state, allowable_states)) # noqa
|
240
|
-
|
241
|
-
def _record_spawn_err(self, msg: str) -> None:
|
242
|
-
self._spawn_err = msg
|
243
|
-
log.info('_spawn_err: %s', msg)
|
244
|
-
|
245
|
-
def spawn(self) -> ta.Optional[int]:
|
246
|
-
process_name = as_string(self._config.name)
|
247
|
-
|
248
|
-
if self.pid:
|
249
|
-
log.warning('process \'%s\' already running', process_name)
|
250
|
-
return None
|
251
|
-
|
252
|
-
self._killing = False
|
253
|
-
self._spawn_err = None
|
254
|
-
self._exitstatus = None
|
255
|
-
self._system_stop = False
|
256
|
-
self._administrative_stop = False
|
257
|
-
|
258
|
-
self._last_start = time.time()
|
259
|
-
|
260
|
-
self._check_in_state(
|
261
|
-
ProcessState.EXITED,
|
262
|
-
ProcessState.FATAL,
|
263
|
-
ProcessState.BACKOFF,
|
264
|
-
ProcessState.STOPPED,
|
265
|
-
)
|
266
|
-
|
267
|
-
self.change_state(ProcessState.STARTING)
|
268
|
-
|
269
|
-
try:
|
270
|
-
filename, argv = self._get_execv_args()
|
271
|
-
except ProcessError as what:
|
272
|
-
self._record_spawn_err(what.args[0])
|
273
|
-
self._check_in_state(ProcessState.STARTING)
|
274
|
-
self.change_state(ProcessState.BACKOFF)
|
275
|
-
return None
|
276
|
-
|
277
|
-
try:
|
278
|
-
self._dispatchers, self._pipes = self._make_dispatchers() # type: ignore
|
279
|
-
except OSError as why:
|
280
|
-
code = why.args[0]
|
281
|
-
if code == errno.EMFILE:
|
282
|
-
# too many file descriptors open
|
283
|
-
msg = f"too many open files to spawn '{process_name}'"
|
284
|
-
else:
|
285
|
-
msg = f"unknown error making dispatchers for '{process_name}': {errno.errorcode.get(code, code)}"
|
286
|
-
self._record_spawn_err(msg)
|
287
|
-
self._check_in_state(ProcessState.STARTING)
|
288
|
-
self.change_state(ProcessState.BACKOFF)
|
289
|
-
return None
|
290
|
-
|
291
|
-
try:
|
292
|
-
pid = os.fork()
|
293
|
-
except OSError as why:
|
294
|
-
code = why.args[0]
|
295
|
-
if code == errno.EAGAIN:
|
296
|
-
# process table full
|
297
|
-
msg = f'Too many processes in process table to spawn \'{process_name}\''
|
298
|
-
else:
|
299
|
-
msg = f'unknown error during fork for \'{process_name}\': {errno.errorcode.get(code, code)}'
|
300
|
-
self._record_spawn_err(msg)
|
301
|
-
self._check_in_state(ProcessState.STARTING)
|
302
|
-
self.change_state(ProcessState.BACKOFF)
|
303
|
-
close_parent_pipes(self._pipes)
|
304
|
-
close_child_pipes(self._pipes)
|
305
|
-
return None
|
306
|
-
|
307
|
-
if pid != 0:
|
308
|
-
return self._spawn_as_parent(pid)
|
309
|
-
|
310
|
-
else:
|
311
|
-
self._spawn_as_child(filename, argv)
|
312
|
-
return None
|
313
|
-
|
314
|
-
def _make_dispatchers(self) -> ta.Tuple[ta.Mapping[int, Dispatcher], ta.Mapping[str, int]]:
|
315
|
-
use_stderr = not self._config.redirect_stderr
|
316
|
-
|
317
|
-
p = make_pipes(use_stderr)
|
318
|
-
stdout_fd, stderr_fd, stdin_fd = p['stdout'], p['stderr'], p['stdin']
|
319
|
-
|
320
|
-
dispatchers: ta.Dict[int, Dispatcher] = {}
|
321
|
-
|
322
|
-
dispatcher_kw = dict(
|
323
|
-
event_callbacks=self._event_callbacks,
|
324
|
-
)
|
325
|
-
|
326
|
-
etype: ta.Type[ProcessCommunicationEvent]
|
327
|
-
if stdout_fd is not None:
|
328
|
-
etype = ProcessCommunicationStdoutEvent
|
329
|
-
dispatchers[stdout_fd] = check_isinstance(self._output_dispatcher_factory(
|
330
|
-
self,
|
331
|
-
etype,
|
332
|
-
stdout_fd,
|
333
|
-
**dispatcher_kw,
|
334
|
-
), OutputDispatcher)
|
335
|
-
|
336
|
-
if stderr_fd is not None:
|
337
|
-
etype = ProcessCommunicationStderrEvent
|
338
|
-
dispatchers[stderr_fd] = check_isinstance(self._output_dispatcher_factory(
|
339
|
-
self,
|
340
|
-
etype,
|
341
|
-
stderr_fd,
|
342
|
-
**dispatcher_kw,
|
343
|
-
), OutputDispatcher)
|
344
|
-
|
345
|
-
if stdin_fd is not None:
|
346
|
-
dispatchers[stdin_fd] = check_isinstance(self._input_dispatcher_factory(
|
347
|
-
self,
|
348
|
-
'stdin',
|
349
|
-
stdin_fd,
|
350
|
-
**dispatcher_kw,
|
351
|
-
), InputDispatcher)
|
352
|
-
|
353
|
-
return dispatchers, p
|
11
|
+
class ProcessStateError(RuntimeError):
|
12
|
+
pass
|
354
13
|
|
355
|
-
def _spawn_as_parent(self, pid: int) -> int:
|
356
|
-
# Parent
|
357
|
-
self._pid = pid
|
358
|
-
close_child_pipes(self._pipes)
|
359
|
-
log.info('spawned: \'%s\' with pid %s', as_string(self._config.name), pid)
|
360
|
-
self._spawn_err = None
|
361
|
-
self._delay = time.time() + self._config.startsecs
|
362
|
-
self.context.pid_history[pid] = self
|
363
|
-
return pid
|
364
14
|
|
365
|
-
|
366
|
-
os.dup2(self._pipes['child_stdin'], 0)
|
367
|
-
os.dup2(self._pipes['child_stdout'], 1)
|
368
|
-
if self._config.redirect_stderr:
|
369
|
-
os.dup2(self._pipes['child_stdout'], 2)
|
370
|
-
else:
|
371
|
-
os.dup2(self._pipes['child_stderr'], 2)
|
372
|
-
|
373
|
-
for i in range(3, self.context.config.minfds):
|
374
|
-
if i in self._inherited_fds:
|
375
|
-
continue
|
376
|
-
close_fd(i)
|
377
|
-
|
378
|
-
def _spawn_as_child(self, filename: str, argv: ta.Sequence[str]) -> None:
|
379
|
-
try:
|
380
|
-
# prevent child from receiving signals sent to the parent by calling os.setpgrp to create a new process
|
381
|
-
# group for the child; this prevents, for instance, the case of child processes being sent a SIGINT when
|
382
|
-
# running supervisor in foreground mode and Ctrl-C in the terminal window running supervisord is pressed.
|
383
|
-
# Presumably it also prevents HUP, etc received by supervisord from being sent to children.
|
384
|
-
os.setpgrp()
|
385
|
-
|
386
|
-
self._prepare_child_fds()
|
387
|
-
# sending to fd 2 will put this output in the stderr log
|
388
|
-
|
389
|
-
# set user
|
390
|
-
setuid_msg = self.set_uid()
|
391
|
-
if setuid_msg:
|
392
|
-
uid = self._config.uid
|
393
|
-
msg = f"couldn't setuid to {uid}: {setuid_msg}\n"
|
394
|
-
os.write(2, as_bytes('supervisor: ' + msg))
|
395
|
-
return # finally clause will exit the child process
|
396
|
-
|
397
|
-
# set environment
|
398
|
-
env = os.environ.copy()
|
399
|
-
env['SUPERVISOR_ENABLED'] = '1'
|
400
|
-
env['SUPERVISOR_PROCESS_NAME'] = self._config.name
|
401
|
-
if self._group:
|
402
|
-
env['SUPERVISOR_GROUP_NAME'] = self._group.config.name
|
403
|
-
if self._config.environment is not None:
|
404
|
-
env.update(self._config.environment)
|
405
|
-
|
406
|
-
# change directory
|
407
|
-
cwd = self._config.directory
|
408
|
-
try:
|
409
|
-
if cwd is not None:
|
410
|
-
os.chdir(os.path.expanduser(cwd))
|
411
|
-
except OSError as why:
|
412
|
-
code = errno.errorcode.get(why.args[0], why.args[0])
|
413
|
-
msg = f"couldn't chdir to {cwd}: {code}\n"
|
414
|
-
os.write(2, as_bytes('supervisor: ' + msg))
|
415
|
-
return # finally clause will exit the child process
|
416
|
-
|
417
|
-
# set umask, then execve
|
418
|
-
try:
|
419
|
-
if self._config.umask is not None:
|
420
|
-
os.umask(self._config.umask)
|
421
|
-
os.execve(filename, list(argv), env)
|
422
|
-
except OSError as why:
|
423
|
-
code = errno.errorcode.get(why.args[0], why.args[0])
|
424
|
-
msg = f"couldn't exec {argv[0]}: {code}\n"
|
425
|
-
os.write(2, as_bytes('supervisor: ' + msg))
|
426
|
-
except Exception: # noqa
|
427
|
-
(file, fun, line), t, v, tbinfo = compact_traceback()
|
428
|
-
error = f'{t}, {v}: file: {file} line: {line}'
|
429
|
-
msg = f"couldn't exec {filename}: {error}\n"
|
430
|
-
os.write(2, as_bytes('supervisor: ' + msg))
|
431
|
-
|
432
|
-
# this point should only be reached if execve failed. the finally clause will exit the child process.
|
433
|
-
|
434
|
-
finally:
|
435
|
-
os.write(2, as_bytes('supervisor: child process was not spawned\n'))
|
436
|
-
real_exit(127) # exit process with code for spawn failure
|
437
|
-
|
438
|
-
def _check_and_adjust_for_system_clock_rollback(self, test_time):
|
439
|
-
"""
|
440
|
-
Check if system clock has rolled backward beyond test_time. If so, set affected timestamps to test_time.
|
441
|
-
"""
|
442
|
-
|
443
|
-
if self._state == ProcessState.STARTING:
|
444
|
-
self._last_start = min(test_time, self._last_start)
|
445
|
-
if self._delay > 0 and test_time < (self._delay - self._config.startsecs):
|
446
|
-
self._delay = test_time + self._config.startsecs
|
447
|
-
|
448
|
-
elif self._state == ProcessState.RUNNING:
|
449
|
-
if test_time > self._last_start and test_time < (self._last_start + self._config.startsecs):
|
450
|
-
self._last_start = test_time - self._config.startsecs
|
451
|
-
|
452
|
-
elif self._state == ProcessState.STOPPING:
|
453
|
-
self._last_stop_report = min(test_time, self._last_stop_report)
|
454
|
-
if self._delay > 0 and test_time < (self._delay - self._config.stopwaitsecs):
|
455
|
-
self._delay = test_time + self._config.stopwaitsecs
|
456
|
-
|
457
|
-
elif self._state == ProcessState.BACKOFF:
|
458
|
-
if self._delay > 0 and test_time < (self._delay - self._backoff):
|
459
|
-
self._delay = test_time + self._backoff
|
460
|
-
|
461
|
-
def stop(self) -> ta.Optional[str]:
|
462
|
-
self._administrative_stop = True
|
463
|
-
self._last_stop_report = 0
|
464
|
-
return self.kill(self._config.stopsignal)
|
465
|
-
|
466
|
-
def stop_report(self) -> None:
|
467
|
-
"""Log a 'waiting for x to stop' message with throttling."""
|
468
|
-
|
469
|
-
if self._state == ProcessState.STOPPING:
|
470
|
-
now = time.time()
|
471
|
-
|
472
|
-
self._check_and_adjust_for_system_clock_rollback(now)
|
473
|
-
|
474
|
-
if now > (self._last_stop_report + 2): # every 2 seconds
|
475
|
-
log.info('waiting for %s to stop', as_string(self._config.name))
|
476
|
-
self._last_stop_report = now
|
477
|
-
|
478
|
-
def give_up(self) -> None:
|
479
|
-
self._delay = 0
|
480
|
-
self._backoff = 0
|
481
|
-
self._system_stop = True
|
482
|
-
self._check_in_state(ProcessState.BACKOFF)
|
483
|
-
self.change_state(ProcessState.FATAL)
|
484
|
-
|
485
|
-
def kill(self, sig: int) -> ta.Optional[str]:
|
486
|
-
"""
|
487
|
-
Send a signal to the subprocess with the intention to kill it (to make it exit). This may or may not actually
|
488
|
-
kill it.
|
489
|
-
|
490
|
-
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
491
|
-
running.
|
492
|
-
"""
|
493
|
-
now = time.time()
|
494
|
-
|
495
|
-
process_name = as_string(self._config.name)
|
496
|
-
# If the process is in BACKOFF and we want to stop or kill it, then BACKOFF -> STOPPED. This is needed because
|
497
|
-
# if startretries is a large number and the process isn't starting successfully, the stop request would be
|
498
|
-
# blocked for a long time waiting for the retries.
|
499
|
-
if self._state == ProcessState.BACKOFF:
|
500
|
-
log.debug('Attempted to kill %s, which is in BACKOFF state.', process_name)
|
501
|
-
self.change_state(ProcessState.STOPPED)
|
502
|
-
return None
|
503
|
-
|
504
|
-
args: tuple
|
505
|
-
if not self.pid:
|
506
|
-
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (process_name, sig_name(sig))
|
507
|
-
log.debug(fmt, *args)
|
508
|
-
return fmt % args
|
509
|
-
|
510
|
-
# If we're in the stopping state, then we've already sent the stop signal and this is the kill signal
|
511
|
-
if self._state == ProcessState.STOPPING:
|
512
|
-
killasgroup = self._config.killasgroup
|
513
|
-
else:
|
514
|
-
killasgroup = self._config.stopasgroup
|
515
|
-
|
516
|
-
as_group = ''
|
517
|
-
if killasgroup:
|
518
|
-
as_group = 'process group '
|
519
|
-
|
520
|
-
log.debug('killing %s (pid %s) %s with signal %s', process_name, self.pid, as_group, sig_name(sig))
|
521
|
-
|
522
|
-
# RUNNING/STARTING/STOPPING -> STOPPING
|
523
|
-
self._killing = True
|
524
|
-
self._delay = now + self._config.stopwaitsecs
|
525
|
-
# we will already be in the STOPPING state if we're doing a SIGKILL as a result of overrunning stopwaitsecs
|
526
|
-
self._check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
|
527
|
-
self.change_state(ProcessState.STOPPING)
|
528
|
-
|
529
|
-
pid = self.pid
|
530
|
-
if killasgroup:
|
531
|
-
# send to the whole process group instead
|
532
|
-
pid = -self.pid
|
533
|
-
|
534
|
-
try:
|
535
|
-
try:
|
536
|
-
os.kill(pid, sig)
|
537
|
-
except OSError as exc:
|
538
|
-
if exc.errno == errno.ESRCH:
|
539
|
-
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s', process_name, self.pid, str(exc)) # noqa
|
540
|
-
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
541
|
-
# processing.
|
542
|
-
return None
|
543
|
-
raise
|
544
|
-
except Exception: # noqa
|
545
|
-
tb = traceback.format_exc()
|
546
|
-
fmt, args = 'unknown problem killing %s (%s):%s', (process_name, self.pid, tb)
|
547
|
-
log.critical(fmt, *args)
|
548
|
-
self.change_state(ProcessState.UNKNOWN)
|
549
|
-
self._killing = False
|
550
|
-
self._delay = 0
|
551
|
-
return fmt % args
|
552
|
-
|
553
|
-
return None
|
554
|
-
|
555
|
-
def signal(self, sig: int) -> ta.Optional[str]:
|
556
|
-
"""
|
557
|
-
Send a signal to the subprocess, without intending to kill it.
|
558
|
-
|
559
|
-
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
560
|
-
running.
|
561
|
-
"""
|
562
|
-
process_name = as_string(self._config.name)
|
563
|
-
args: tuple
|
564
|
-
if not self.pid:
|
565
|
-
fmt, args = "attempted to send %s sig %s but it wasn't running", (process_name, sig_name(sig))
|
566
|
-
log.debug(fmt, *args)
|
567
|
-
return fmt % args
|
568
|
-
|
569
|
-
log.debug('sending %s (pid %s) sig %s', process_name, self.pid, sig_name(sig))
|
570
|
-
|
571
|
-
self._check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
|
572
|
-
|
573
|
-
try:
|
574
|
-
try:
|
575
|
-
os.kill(self.pid, sig)
|
576
|
-
except OSError as exc:
|
577
|
-
if exc.errno == errno.ESRCH:
|
578
|
-
log.debug(
|
579
|
-
'unable to signal %s (pid %s), it probably just now exited on its own: %s',
|
580
|
-
process_name,
|
581
|
-
self.pid,
|
582
|
-
str(exc),
|
583
|
-
)
|
584
|
-
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
585
|
-
# processing.
|
586
|
-
return None
|
587
|
-
raise
|
588
|
-
except Exception: # noqa
|
589
|
-
tb = traceback.format_exc()
|
590
|
-
fmt, args = 'unknown problem sending sig %s (%s):%s', (process_name, self.pid, tb)
|
591
|
-
log.critical(fmt, *args)
|
592
|
-
self.change_state(ProcessState.UNKNOWN)
|
593
|
-
return fmt % args
|
594
|
-
|
595
|
-
return None
|
596
|
-
|
597
|
-
def finish(self, sts: int) -> None:
|
598
|
-
"""The process was reaped and we need to report and manage its state."""
|
599
|
-
|
600
|
-
self.drain()
|
601
|
-
|
602
|
-
es, msg = decode_wait_status(sts)
|
603
|
-
|
604
|
-
now = time.time()
|
605
|
-
|
606
|
-
self._check_and_adjust_for_system_clock_rollback(now)
|
607
|
-
|
608
|
-
self._last_stop = now
|
609
|
-
process_name = as_string(self._config.name)
|
610
|
-
|
611
|
-
if now > self._last_start:
|
612
|
-
too_quickly = now - self._last_start < self._config.startsecs
|
613
|
-
else:
|
614
|
-
too_quickly = False
|
615
|
-
log.warning(
|
616
|
-
"process '%s' (%s) last_start time is in the future, don't know how long process was running so "
|
617
|
-
"assuming it did not exit too quickly",
|
618
|
-
process_name,
|
619
|
-
self.pid,
|
620
|
-
)
|
621
|
-
|
622
|
-
exit_expected = es in self._config.exitcodes
|
623
|
-
|
624
|
-
if self._killing:
|
625
|
-
# likely the result of a stop request implies STOPPING -> STOPPED
|
626
|
-
self._killing = False
|
627
|
-
self._delay = 0
|
628
|
-
self._exitstatus = es
|
629
|
-
|
630
|
-
fmt, args = 'stopped: %s (%s)', (process_name, msg)
|
631
|
-
self._check_in_state(ProcessState.STOPPING)
|
632
|
-
self.change_state(ProcessState.STOPPED)
|
633
|
-
if exit_expected:
|
634
|
-
log.info(fmt, *args)
|
635
|
-
else:
|
636
|
-
log.warning(fmt, *args)
|
637
|
-
|
638
|
-
elif too_quickly:
|
639
|
-
# the program did not stay up long enough to make it to RUNNING implies STARTING -> BACKOFF
|
640
|
-
self._exitstatus = None
|
641
|
-
self._spawn_err = 'Exited too quickly (process log may have details)'
|
642
|
-
self._check_in_state(ProcessState.STARTING)
|
643
|
-
self.change_state(ProcessState.BACKOFF)
|
644
|
-
log.warning('exited: %s (%s)', process_name, msg + '; not expected')
|
645
|
-
|
646
|
-
else:
|
647
|
-
# this finish was not the result of a stop request, the program was in the RUNNING state but exited implies
|
648
|
-
# RUNNING -> EXITED normally but see next comment
|
649
|
-
self._delay = 0
|
650
|
-
self._backoff = 0
|
651
|
-
self._exitstatus = es
|
652
|
-
|
653
|
-
# if the process was STARTING but a system time change causes self.last_start to be in the future, the
|
654
|
-
# normal STARTING->RUNNING transition can be subverted so we perform the transition here.
|
655
|
-
if self._state == ProcessState.STARTING:
|
656
|
-
self.change_state(ProcessState.RUNNING)
|
657
|
-
|
658
|
-
self._check_in_state(ProcessState.RUNNING)
|
659
|
-
|
660
|
-
if exit_expected:
|
661
|
-
# expected exit code
|
662
|
-
self.change_state(ProcessState.EXITED, expected=True)
|
663
|
-
log.info('exited: %s (%s)', process_name, msg + '; expected')
|
664
|
-
else:
|
665
|
-
# unexpected exit code
|
666
|
-
self._spawn_err = f'Bad exit code {es}'
|
667
|
-
self.change_state(ProcessState.EXITED, expected=False)
|
668
|
-
log.warning('exited: %s (%s)', process_name, msg + '; not expected')
|
669
|
-
|
670
|
-
self._pid = 0
|
671
|
-
close_parent_pipes(self._pipes)
|
672
|
-
self._pipes = {}
|
673
|
-
self._dispatchers = {}
|
674
|
-
|
675
|
-
def set_uid(self) -> ta.Optional[str]:
|
676
|
-
if self._config.uid is None:
|
677
|
-
return None
|
678
|
-
msg = drop_privileges(self._config.uid)
|
679
|
-
return msg
|
680
|
-
|
681
|
-
def __repr__(self) -> str:
|
682
|
-
# repr can't return anything other than a native string, but the name might be unicode - a problem on Python 2.
|
683
|
-
name = self._config.name
|
684
|
-
return f'<Subprocess at {id(self)} with name {name} in state {self.get_state().name}>'
|
685
|
-
|
686
|
-
def get_state(self) -> ProcessState:
|
687
|
-
return self._state
|
688
|
-
|
689
|
-
def transition(self) -> None:
|
690
|
-
now = time.time()
|
691
|
-
state = self._state
|
692
|
-
|
693
|
-
self._check_and_adjust_for_system_clock_rollback(now)
|
694
|
-
|
695
|
-
logger = log
|
696
|
-
|
697
|
-
if self.context.state > SupervisorState.RESTARTING:
|
698
|
-
# dont start any processes if supervisor is shutting down
|
699
|
-
if state == ProcessState.EXITED:
|
700
|
-
if self._config.autorestart:
|
701
|
-
if self._config.autorestart is RestartUnconditionally:
|
702
|
-
# EXITED -> STARTING
|
703
|
-
self.spawn()
|
704
|
-
elif self._exitstatus not in self._config.exitcodes:
|
705
|
-
# EXITED -> STARTING
|
706
|
-
self.spawn()
|
707
|
-
|
708
|
-
elif state == ProcessState.STOPPED and not self._last_start:
|
709
|
-
if self._config.autostart:
|
710
|
-
# STOPPED -> STARTING
|
711
|
-
self.spawn()
|
712
|
-
|
713
|
-
elif state == ProcessState.BACKOFF:
|
714
|
-
if self._backoff <= self._config.startretries:
|
715
|
-
if now > self._delay:
|
716
|
-
# BACKOFF -> STARTING
|
717
|
-
self.spawn()
|
718
|
-
|
719
|
-
process_name = as_string(self._config.name)
|
720
|
-
if state == ProcessState.STARTING:
|
721
|
-
if now - self._last_start > self._config.startsecs:
|
722
|
-
# STARTING -> RUNNING if the proc has started successfully and it has stayed up for at least
|
723
|
-
# proc.config.startsecs,
|
724
|
-
self._delay = 0
|
725
|
-
self._backoff = 0
|
726
|
-
self._check_in_state(ProcessState.STARTING)
|
727
|
-
self.change_state(ProcessState.RUNNING)
|
728
|
-
msg = ('entered RUNNING state, process has stayed up for > than %s seconds (startsecs)' % self._config.startsecs) # noqa
|
729
|
-
logger.info('success: %s %s', process_name, msg)
|
730
|
-
|
731
|
-
if state == ProcessState.BACKOFF:
|
732
|
-
if self._backoff > self._config.startretries:
|
733
|
-
# BACKOFF -> FATAL if the proc has exceeded its number of retries
|
734
|
-
self.give_up()
|
735
|
-
msg = ('entered FATAL state, too many start retries too quickly')
|
736
|
-
logger.info('gave up: %s %s', process_name, msg)
|
15
|
+
##
|
737
16
|
|
738
|
-
elif state == ProcessState.STOPPING:
|
739
|
-
time_left = self._delay - now
|
740
|
-
if time_left <= 0:
|
741
|
-
# kill processes which are taking too long to stop with a final sigkill. if this doesn't kill it, the
|
742
|
-
# process will be stuck in the STOPPING state forever.
|
743
|
-
log.warning('killing \'%s\' (%s) with SIGKILL', process_name, self.pid)
|
744
|
-
self.kill(signal.SIGKILL)
|
745
17
|
|
746
|
-
|
747
|
-
|
748
|
-
# get_autoname = self.context.get_auto_child_log_name # noqa
|
749
|
-
# sid = self.context.config.identifier # noqa
|
750
|
-
# name = self._config.name # noqa
|
751
|
-
# if self.stdout_logfile is Automatic:
|
752
|
-
# self.stdout_logfile = get_autoname(name, sid, 'stdout')
|
753
|
-
# if self.stderr_logfile is Automatic:
|
754
|
-
# self.stderr_logfile = get_autoname(name, sid, 'stderr')
|
755
|
-
pass
|
18
|
+
class PidHistory(ta.Dict[Pid, Process]):
|
19
|
+
pass
|