ominfra 0.0.0.dev76__py3-none-any.whl → 0.0.0.dev77__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ominfra/supervisor/__init__.py +1 -0
- ominfra/supervisor/__main__.py +4 -0
- ominfra/supervisor/_supervisor.py +3305 -0
- ominfra/supervisor/compat.py +208 -0
- ominfra/supervisor/configs.py +110 -0
- ominfra/supervisor/context.py +405 -0
- ominfra/supervisor/datatypes.py +171 -0
- ominfra/supervisor/dispatchers.py +304 -0
- ominfra/supervisor/events.py +304 -0
- ominfra/supervisor/exceptions.py +22 -0
- ominfra/supervisor/poller.py +232 -0
- ominfra/supervisor/process.py +782 -0
- ominfra/supervisor/states.py +78 -0
- ominfra/supervisor/supervisor.py +390 -0
- ominfra/supervisor/types.py +49 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev77.dist-info}/METADATA +3 -3
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev77.dist-info}/RECORD +21 -6
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev77.dist-info}/LICENSE +0 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev77.dist-info}/WHEEL +0 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev77.dist-info}/entry_points.txt +0 -0
- {ominfra-0.0.0.dev76.dist-info → ominfra-0.0.0.dev77.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3305 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# noinspection DuplicatedCode
|
3
|
+
# @omlish-lite
|
4
|
+
# @omlish-script
|
5
|
+
# @omlish-amalg-output supervisor.py
|
6
|
+
# ruff: noqa: N802 UP006 UP007 UP036
|
7
|
+
import abc
|
8
|
+
import contextlib
|
9
|
+
import dataclasses as dc
|
10
|
+
import datetime
|
11
|
+
import errno
|
12
|
+
import fcntl
|
13
|
+
import functools
|
14
|
+
import grp
|
15
|
+
import json
|
16
|
+
import logging
|
17
|
+
import os
|
18
|
+
import pwd
|
19
|
+
import re
|
20
|
+
import resource
|
21
|
+
import select
|
22
|
+
import shlex
|
23
|
+
import signal
|
24
|
+
import stat
|
25
|
+
import sys
|
26
|
+
import tempfile
|
27
|
+
import threading
|
28
|
+
import time
|
29
|
+
import traceback
|
30
|
+
import types
|
31
|
+
import typing as ta
|
32
|
+
import warnings
|
33
|
+
|
34
|
+
|
35
|
+
########################################
|
36
|
+
|
37
|
+
|
38
|
+
if sys.version_info < (3, 8):
|
39
|
+
raise OSError(
|
40
|
+
f'Requires python (3, 8), got {sys.version_info} from {sys.executable}') # noqa
|
41
|
+
|
42
|
+
|
43
|
+
########################################
|
44
|
+
|
45
|
+
|
46
|
+
# ../compat.py
|
47
|
+
T = ta.TypeVar('T')
|
48
|
+
|
49
|
+
# ../states.py
|
50
|
+
ProcessState = int # ta.TypeAlias
|
51
|
+
SupervisorState = int # ta.TypeAlias
|
52
|
+
|
53
|
+
|
54
|
+
########################################
|
55
|
+
# ../compat.py
|
56
|
+
|
57
|
+
|
58
|
+
def as_bytes(s: ta.Union[str, bytes], encoding: str = 'utf8') -> bytes:
|
59
|
+
if isinstance(s, bytes):
|
60
|
+
return s
|
61
|
+
else:
|
62
|
+
return s.encode(encoding)
|
63
|
+
|
64
|
+
|
65
|
+
def as_string(s: ta.Union[str, bytes], encoding='utf8') -> str:
|
66
|
+
if isinstance(s, str):
|
67
|
+
return s
|
68
|
+
else:
|
69
|
+
return s.decode(encoding)
|
70
|
+
|
71
|
+
|
72
|
+
def compact_traceback() -> ta.Tuple[
|
73
|
+
ta.Tuple[str, str, int],
|
74
|
+
ta.Type[BaseException],
|
75
|
+
BaseException,
|
76
|
+
types.TracebackType,
|
77
|
+
]:
|
78
|
+
t, v, tb = sys.exc_info()
|
79
|
+
tbinfo = []
|
80
|
+
if not tb:
|
81
|
+
raise RuntimeError('No traceback')
|
82
|
+
while tb:
|
83
|
+
tbinfo.append((
|
84
|
+
tb.tb_frame.f_code.co_filename,
|
85
|
+
tb.tb_frame.f_code.co_name,
|
86
|
+
str(tb.tb_lineno),
|
87
|
+
))
|
88
|
+
tb = tb.tb_next
|
89
|
+
|
90
|
+
# just to be safe
|
91
|
+
del tb
|
92
|
+
|
93
|
+
file, function, line = tbinfo[-1]
|
94
|
+
info = ' '.join(['[%s|%s|%s]' % x for x in tbinfo]) # noqa
|
95
|
+
return (file, function, line), t, v, info # type: ignore
|
96
|
+
|
97
|
+
|
98
|
+
def find_prefix_at_end(haystack: bytes, needle: bytes) -> int:
|
99
|
+
l = len(needle) - 1
|
100
|
+
while l and not haystack.endswith(needle[:l]):
|
101
|
+
l -= 1
|
102
|
+
return l
|
103
|
+
|
104
|
+
|
105
|
+
class ExitNow(Exception): # noqa
|
106
|
+
pass
|
107
|
+
|
108
|
+
|
109
|
+
##
|
110
|
+
|
111
|
+
|
112
|
+
def decode_wait_status(sts: int) -> ta.Tuple[int, str]:
|
113
|
+
"""
|
114
|
+
Decode the status returned by wait() or waitpid().
|
115
|
+
|
116
|
+
Return a tuple (exitstatus, message) where exitstatus is the exit status, or -1 if the process was killed by a
|
117
|
+
signal; and message is a message telling what happened. It is the caller's responsibility to display the message.
|
118
|
+
"""
|
119
|
+
if os.WIFEXITED(sts):
|
120
|
+
es = os.WEXITSTATUS(sts) & 0xffff
|
121
|
+
msg = f'exit status {es}'
|
122
|
+
return es, msg
|
123
|
+
elif os.WIFSIGNALED(sts):
|
124
|
+
sig = os.WTERMSIG(sts)
|
125
|
+
msg = f'terminated by {signame(sig)}'
|
126
|
+
if hasattr(os, 'WCOREDUMP'):
|
127
|
+
iscore = os.WCOREDUMP(sts)
|
128
|
+
else:
|
129
|
+
iscore = bool(sts & 0x80)
|
130
|
+
if iscore:
|
131
|
+
msg += ' (core dumped)'
|
132
|
+
return -1, msg
|
133
|
+
else:
|
134
|
+
msg = 'unknown termination cause 0x%04x' % sts # noqa
|
135
|
+
return -1, msg
|
136
|
+
|
137
|
+
|
138
|
+
_signames: ta.Optional[ta.Mapping[int, str]] = None
|
139
|
+
|
140
|
+
|
141
|
+
def signame(sig: int) -> str:
|
142
|
+
global _signames
|
143
|
+
if _signames is None:
|
144
|
+
_signames = _init_signames()
|
145
|
+
return _signames.get(sig) or 'signal %d' % sig
|
146
|
+
|
147
|
+
|
148
|
+
def _init_signames() -> ta.Dict[int, str]:
|
149
|
+
d = {}
|
150
|
+
for k, v in signal.__dict__.items():
|
151
|
+
k_startswith = getattr(k, 'startswith', None)
|
152
|
+
if k_startswith is None:
|
153
|
+
continue
|
154
|
+
if k_startswith('SIG') and not k_startswith('SIG_'):
|
155
|
+
d[v] = k
|
156
|
+
return d
|
157
|
+
|
158
|
+
|
159
|
+
class SignalReceiver:
|
160
|
+
def __init__(self) -> None:
|
161
|
+
super().__init__()
|
162
|
+
self._signals_recvd: ta.List[int] = []
|
163
|
+
|
164
|
+
def receive(self, sig: int, frame: ta.Any) -> None:
|
165
|
+
if sig not in self._signals_recvd:
|
166
|
+
self._signals_recvd.append(sig)
|
167
|
+
|
168
|
+
def install(self, *sigs: int) -> None:
|
169
|
+
for sig in sigs:
|
170
|
+
signal.signal(sig, self.receive)
|
171
|
+
|
172
|
+
def get_signal(self) -> ta.Optional[int]:
|
173
|
+
if self._signals_recvd:
|
174
|
+
sig = self._signals_recvd.pop(0)
|
175
|
+
else:
|
176
|
+
sig = None
|
177
|
+
return sig
|
178
|
+
|
179
|
+
|
180
|
+
def readfd(fd: int) -> bytes:
|
181
|
+
try:
|
182
|
+
data = os.read(fd, 2 << 16) # 128K
|
183
|
+
except OSError as why:
|
184
|
+
if why.args[0] not in (errno.EWOULDBLOCK, errno.EBADF, errno.EINTR):
|
185
|
+
raise
|
186
|
+
data = b''
|
187
|
+
return data
|
188
|
+
|
189
|
+
|
190
|
+
def try_unlink(path: str) -> bool:
|
191
|
+
try:
|
192
|
+
os.unlink(path)
|
193
|
+
except OSError:
|
194
|
+
return False
|
195
|
+
return True
|
196
|
+
|
197
|
+
|
198
|
+
def close_fd(fd: int) -> bool:
|
199
|
+
try:
|
200
|
+
os.close(fd)
|
201
|
+
except OSError:
|
202
|
+
return False
|
203
|
+
return True
|
204
|
+
|
205
|
+
|
206
|
+
def mktempfile(suffix: str, prefix: str, dir: str) -> str: # noqa
|
207
|
+
fd, filename = tempfile.mkstemp(suffix, prefix, dir)
|
208
|
+
os.close(fd)
|
209
|
+
return filename
|
210
|
+
|
211
|
+
|
212
|
+
def real_exit(code: int) -> None:
|
213
|
+
os._exit(code) # noqa
|
214
|
+
|
215
|
+
|
216
|
+
def get_path() -> ta.Sequence[str]:
|
217
|
+
"""Return a list corresponding to $PATH, or a default."""
|
218
|
+
path = ['/bin', '/usr/bin', '/usr/local/bin']
|
219
|
+
if 'PATH' in os.environ:
|
220
|
+
p = os.environ['PATH']
|
221
|
+
if p:
|
222
|
+
path = p.split(os.pathsep)
|
223
|
+
return path
|
224
|
+
|
225
|
+
|
226
|
+
def normalize_path(v: str) -> str:
|
227
|
+
return os.path.normpath(os.path.abspath(os.path.expanduser(v)))
|
228
|
+
|
229
|
+
|
230
|
+
ANSI_ESCAPE_BEGIN = b'\x1b['
|
231
|
+
ANSI_TERMINATORS = (b'H', b'f', b'A', b'B', b'C', b'D', b'R', b's', b'u', b'J', b'K', b'h', b'l', b'p', b'm')
|
232
|
+
|
233
|
+
|
234
|
+
def strip_escapes(s):
|
235
|
+
"""Remove all ANSI color escapes from the given string."""
|
236
|
+
result = b''
|
237
|
+
show = 1
|
238
|
+
i = 0
|
239
|
+
l = len(s)
|
240
|
+
while i < l:
|
241
|
+
if show == 0 and s[i:i + 1] in ANSI_TERMINATORS:
|
242
|
+
show = 1
|
243
|
+
elif show:
|
244
|
+
n = s.find(ANSI_ESCAPE_BEGIN, i)
|
245
|
+
if n == -1:
|
246
|
+
return result + s[i:]
|
247
|
+
else:
|
248
|
+
result = result + s[i:n]
|
249
|
+
i = n
|
250
|
+
show = 0
|
251
|
+
i += 1
|
252
|
+
return result
|
253
|
+
|
254
|
+
|
255
|
+
########################################
|
256
|
+
# ../datatypes.py
|
257
|
+
|
258
|
+
|
259
|
+
class Automatic:
|
260
|
+
pass
|
261
|
+
|
262
|
+
|
263
|
+
class Syslog:
|
264
|
+
"""TODO deprecated; remove this special 'syslog' filename in the future"""
|
265
|
+
|
266
|
+
|
267
|
+
LOGFILE_NONES = ('none', 'off', None)
|
268
|
+
LOGFILE_AUTOS = (Automatic, 'auto')
|
269
|
+
LOGFILE_SYSLOGS = (Syslog, 'syslog')
|
270
|
+
|
271
|
+
|
272
|
+
def logfile_name(val):
|
273
|
+
if hasattr(val, 'lower'):
|
274
|
+
coerced = val.lower()
|
275
|
+
else:
|
276
|
+
coerced = val
|
277
|
+
|
278
|
+
if coerced in LOGFILE_NONES:
|
279
|
+
return None
|
280
|
+
elif coerced in LOGFILE_AUTOS:
|
281
|
+
return Automatic
|
282
|
+
elif coerced in LOGFILE_SYSLOGS:
|
283
|
+
return Syslog
|
284
|
+
else:
|
285
|
+
return existing_dirpath(val)
|
286
|
+
|
287
|
+
|
288
|
+
def name_to_uid(name: str) -> int:
|
289
|
+
try:
|
290
|
+
uid = int(name)
|
291
|
+
except ValueError:
|
292
|
+
try:
|
293
|
+
pwdrec = pwd.getpwnam(name)
|
294
|
+
except KeyError:
|
295
|
+
raise ValueError(f'Invalid user name {name}') # noqa
|
296
|
+
uid = pwdrec[2]
|
297
|
+
else:
|
298
|
+
try:
|
299
|
+
pwd.getpwuid(uid) # check if uid is valid
|
300
|
+
except KeyError:
|
301
|
+
raise ValueError(f'Invalid user id {name}') # noqa
|
302
|
+
return uid
|
303
|
+
|
304
|
+
|
305
|
+
def name_to_gid(name: str) -> int:
|
306
|
+
try:
|
307
|
+
gid = int(name)
|
308
|
+
except ValueError:
|
309
|
+
try:
|
310
|
+
grprec = grp.getgrnam(name)
|
311
|
+
except KeyError:
|
312
|
+
raise ValueError(f'Invalid group name {name}') # noqa
|
313
|
+
gid = grprec[2]
|
314
|
+
else:
|
315
|
+
try:
|
316
|
+
grp.getgrgid(gid) # check if gid is valid
|
317
|
+
except KeyError:
|
318
|
+
raise ValueError(f'Invalid group id {name}') # noqa
|
319
|
+
return gid
|
320
|
+
|
321
|
+
|
322
|
+
def gid_for_uid(uid: int) -> int:
|
323
|
+
pwrec = pwd.getpwuid(uid)
|
324
|
+
return pwrec[3]
|
325
|
+
|
326
|
+
|
327
|
+
def octal_type(arg: ta.Union[str, int]) -> int:
|
328
|
+
if isinstance(arg, int):
|
329
|
+
return arg
|
330
|
+
try:
|
331
|
+
return int(arg, 8)
|
332
|
+
except (TypeError, ValueError):
|
333
|
+
raise ValueError(f'{arg} can not be converted to an octal type') # noqa
|
334
|
+
|
335
|
+
|
336
|
+
def existing_directory(v: str) -> str:
|
337
|
+
nv = os.path.expanduser(v)
|
338
|
+
if os.path.isdir(nv):
|
339
|
+
return nv
|
340
|
+
raise ValueError(f'{v} is not an existing directory')
|
341
|
+
|
342
|
+
|
343
|
+
def existing_dirpath(v: str) -> str:
|
344
|
+
nv = os.path.expanduser(v)
|
345
|
+
dir = os.path.dirname(nv) # noqa
|
346
|
+
if not dir:
|
347
|
+
# relative pathname with no directory component
|
348
|
+
return nv
|
349
|
+
if os.path.isdir(dir):
|
350
|
+
return nv
|
351
|
+
raise ValueError(f'The directory named as part of the path {v} does not exist')
|
352
|
+
|
353
|
+
|
354
|
+
def logging_level(value: ta.Union[str, int]) -> int:
|
355
|
+
if isinstance(value, int):
|
356
|
+
return value
|
357
|
+
s = str(value).lower()
|
358
|
+
level = logging.getLevelNamesMapping().get(s.upper())
|
359
|
+
if level is None:
|
360
|
+
raise ValueError(f'bad logging level name {value!r}')
|
361
|
+
return level
|
362
|
+
|
363
|
+
|
364
|
+
class SuffixMultiplier:
|
365
|
+
# d is a dictionary of suffixes to integer multipliers. If no suffixes match, default is the multiplier. Matches
|
366
|
+
# are case insensitive. Return values are in the fundamental unit.
|
367
|
+
def __init__(self, d, default=1):
|
368
|
+
super().__init__()
|
369
|
+
self._d = d
|
370
|
+
self._default = default
|
371
|
+
# all keys must be the same size
|
372
|
+
self._keysz = None
|
373
|
+
for k in d:
|
374
|
+
if self._keysz is None:
|
375
|
+
self._keysz = len(k)
|
376
|
+
elif self._keysz != len(k): # type: ignore
|
377
|
+
raise ValueError(k)
|
378
|
+
|
379
|
+
def __call__(self, v: ta.Union[str, int]) -> int:
|
380
|
+
if isinstance(v, int):
|
381
|
+
return v
|
382
|
+
v = v.lower()
|
383
|
+
for s, m in self._d.items():
|
384
|
+
if v[-self._keysz:] == s: # type: ignore
|
385
|
+
return int(v[:-self._keysz]) * m # type: ignore
|
386
|
+
return int(v) * self._default
|
387
|
+
|
388
|
+
|
389
|
+
byte_size = SuffixMultiplier({
|
390
|
+
'kb': 1024,
|
391
|
+
'mb': 1024 * 1024,
|
392
|
+
'gb': 1024 * 1024 * 1024,
|
393
|
+
})
|
394
|
+
|
395
|
+
|
396
|
+
# all valid signal numbers
|
397
|
+
SIGNUMS = [getattr(signal, k) for k in dir(signal) if k.startswith('SIG')]
|
398
|
+
|
399
|
+
|
400
|
+
def signal_number(value: ta.Union[int, str]) -> int:
|
401
|
+
try:
|
402
|
+
num = int(value)
|
403
|
+
except (ValueError, TypeError):
|
404
|
+
name = value.strip().upper() # type: ignore
|
405
|
+
if not name.startswith('SIG'):
|
406
|
+
name = f'SIG{name}'
|
407
|
+
num = getattr(signal, name, None) # type: ignore
|
408
|
+
if num is None:
|
409
|
+
raise ValueError(f'value {value!r} is not a valid signal name') # noqa
|
410
|
+
if num not in SIGNUMS:
|
411
|
+
raise ValueError(f'value {value!r} is not a valid signal number')
|
412
|
+
return num
|
413
|
+
|
414
|
+
|
415
|
+
class RestartWhenExitUnexpected:
|
416
|
+
pass
|
417
|
+
|
418
|
+
|
419
|
+
class RestartUnconditionally:
|
420
|
+
pass
|
421
|
+
|
422
|
+
|
423
|
+
########################################
|
424
|
+
# ../exceptions.py
|
425
|
+
|
426
|
+
|
427
|
+
class ProcessError(Exception):
|
428
|
+
""" Specialized exceptions used when attempting to start a process """
|
429
|
+
|
430
|
+
|
431
|
+
class BadCommandError(ProcessError):
|
432
|
+
""" Indicates the command could not be parsed properly. """
|
433
|
+
|
434
|
+
|
435
|
+
class NotExecutableError(ProcessError):
|
436
|
+
""" Indicates that the filespec cannot be executed because its path
|
437
|
+
resolves to a file which is not executable, or which is a directory. """
|
438
|
+
|
439
|
+
|
440
|
+
class NotFoundError(ProcessError):
|
441
|
+
""" Indicates that the filespec cannot be executed because it could not be found """
|
442
|
+
|
443
|
+
|
444
|
+
class NoPermissionError(ProcessError):
|
445
|
+
"""
|
446
|
+
Indicates that the file cannot be executed because the supervisor process does not possess the appropriate UNIX
|
447
|
+
filesystem permission to execute the file.
|
448
|
+
"""
|
449
|
+
|
450
|
+
|
451
|
+
########################################
|
452
|
+
# ../poller.py
|
453
|
+
|
454
|
+
|
455
|
+
log = logging.getLogger(__name__)
|
456
|
+
|
457
|
+
|
458
|
+
class BasePoller(abc.ABC):
|
459
|
+
|
460
|
+
def __init__(self) -> None:
|
461
|
+
super().__init__()
|
462
|
+
|
463
|
+
@abc.abstractmethod
|
464
|
+
def register_readable(self, fd: int) -> None:
|
465
|
+
raise NotImplementedError
|
466
|
+
|
467
|
+
@abc.abstractmethod
|
468
|
+
def register_writable(self, fd: int) -> None:
|
469
|
+
raise NotImplementedError
|
470
|
+
|
471
|
+
@abc.abstractmethod
|
472
|
+
def unregister_readable(self, fd: int) -> None:
|
473
|
+
raise NotImplementedError
|
474
|
+
|
475
|
+
@abc.abstractmethod
|
476
|
+
def unregister_writable(self, fd: int) -> None:
|
477
|
+
raise NotImplementedError
|
478
|
+
|
479
|
+
@abc.abstractmethod
|
480
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[int], ta.List[int]]:
|
481
|
+
raise NotImplementedError
|
482
|
+
|
483
|
+
def before_daemonize(self) -> None: # noqa
|
484
|
+
pass
|
485
|
+
|
486
|
+
def after_daemonize(self) -> None: # noqa
|
487
|
+
pass
|
488
|
+
|
489
|
+
def close(self) -> None: # noqa
|
490
|
+
pass
|
491
|
+
|
492
|
+
|
493
|
+
class SelectPoller(BasePoller):
|
494
|
+
|
495
|
+
def __init__(self) -> None:
|
496
|
+
super().__init__()
|
497
|
+
|
498
|
+
self._readables: ta.Set[int] = set()
|
499
|
+
self._writables: ta.Set[int] = set()
|
500
|
+
|
501
|
+
def register_readable(self, fd: int) -> None:
|
502
|
+
self._readables.add(fd)
|
503
|
+
|
504
|
+
def register_writable(self, fd: int) -> None:
|
505
|
+
self._writables.add(fd)
|
506
|
+
|
507
|
+
def unregister_readable(self, fd: int) -> None:
|
508
|
+
self._readables.discard(fd)
|
509
|
+
|
510
|
+
def unregister_writable(self, fd: int) -> None:
|
511
|
+
self._writables.discard(fd)
|
512
|
+
|
513
|
+
def unregister_all(self) -> None:
|
514
|
+
self._readables.clear()
|
515
|
+
self._writables.clear()
|
516
|
+
|
517
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[int], ta.List[int]]:
|
518
|
+
try:
|
519
|
+
r, w, x = select.select(
|
520
|
+
self._readables,
|
521
|
+
self._writables,
|
522
|
+
[], timeout,
|
523
|
+
)
|
524
|
+
except OSError as err:
|
525
|
+
if err.args[0] == errno.EINTR:
|
526
|
+
log.debug('EINTR encountered in poll')
|
527
|
+
return [], []
|
528
|
+
if err.args[0] == errno.EBADF:
|
529
|
+
log.debug('EBADF encountered in poll')
|
530
|
+
self.unregister_all()
|
531
|
+
return [], []
|
532
|
+
raise
|
533
|
+
return r, w
|
534
|
+
|
535
|
+
|
536
|
+
class PollPoller(BasePoller):
|
537
|
+
_READ = select.POLLIN | select.POLLPRI | select.POLLHUP
|
538
|
+
_WRITE = select.POLLOUT
|
539
|
+
|
540
|
+
def __init__(self) -> None:
|
541
|
+
super().__init__()
|
542
|
+
|
543
|
+
self._poller = select.poll()
|
544
|
+
self._readables: set[int] = set()
|
545
|
+
self._writables: set[int] = set()
|
546
|
+
|
547
|
+
def register_readable(self, fd: int) -> None:
|
548
|
+
self._poller.register(fd, self._READ)
|
549
|
+
self._readables.add(fd)
|
550
|
+
|
551
|
+
def register_writable(self, fd: int) -> None:
|
552
|
+
self._poller.register(fd, self._WRITE)
|
553
|
+
self._writables.add(fd)
|
554
|
+
|
555
|
+
def unregister_readable(self, fd: int) -> None:
|
556
|
+
self._readables.discard(fd)
|
557
|
+
self._poller.unregister(fd)
|
558
|
+
if fd in self._writables:
|
559
|
+
self._poller.register(fd, self._WRITE)
|
560
|
+
|
561
|
+
def unregister_writable(self, fd: int) -> None:
|
562
|
+
self._writables.discard(fd)
|
563
|
+
self._poller.unregister(fd)
|
564
|
+
if fd in self._readables:
|
565
|
+
self._poller.register(fd, self._READ)
|
566
|
+
|
567
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[int], ta.List[int]]:
|
568
|
+
fds = self._poll_fds(timeout) # type: ignore
|
569
|
+
readables, writables = [], []
|
570
|
+
for fd, eventmask in fds:
|
571
|
+
if self._ignore_invalid(fd, eventmask):
|
572
|
+
continue
|
573
|
+
if eventmask & self._READ:
|
574
|
+
readables.append(fd)
|
575
|
+
if eventmask & self._WRITE:
|
576
|
+
writables.append(fd)
|
577
|
+
return readables, writables
|
578
|
+
|
579
|
+
def _poll_fds(self, timeout: float) -> ta.List[ta.Tuple[int, int]]:
|
580
|
+
try:
|
581
|
+
return self._poller.poll(timeout * 1000)
|
582
|
+
except OSError as err:
|
583
|
+
if err.args[0] == errno.EINTR:
|
584
|
+
log.debug('EINTR encountered in poll')
|
585
|
+
return []
|
586
|
+
raise
|
587
|
+
|
588
|
+
def _ignore_invalid(self, fd: int, eventmask: int) -> bool:
|
589
|
+
if eventmask & select.POLLNVAL:
|
590
|
+
# POLLNVAL means `fd` value is invalid, not open. When a process quits it's `fd`s are closed so there is no
|
591
|
+
# more reason to keep this `fd` registered If the process restarts it's `fd`s are registered again.
|
592
|
+
self._poller.unregister(fd)
|
593
|
+
self._readables.discard(fd)
|
594
|
+
self._writables.discard(fd)
|
595
|
+
return True
|
596
|
+
return False
|
597
|
+
|
598
|
+
|
599
|
+
class KqueuePoller(BasePoller):
|
600
|
+
max_events = 1000
|
601
|
+
|
602
|
+
def __init__(self) -> None:
|
603
|
+
super().__init__()
|
604
|
+
|
605
|
+
self._kqueue: ta.Optional[ta.Any] = select.kqueue()
|
606
|
+
self._readables: set[int] = set()
|
607
|
+
self._writables: set[int] = set()
|
608
|
+
|
609
|
+
def register_readable(self, fd: int) -> None:
|
610
|
+
self._readables.add(fd)
|
611
|
+
kevent = select.kevent(fd, filter=select.KQ_FILTER_READ, flags=select.KQ_EV_ADD)
|
612
|
+
self._kqueue_control(fd, kevent)
|
613
|
+
|
614
|
+
def register_writable(self, fd: int) -> None:
|
615
|
+
self._writables.add(fd)
|
616
|
+
kevent = select.kevent(fd, filter=select.KQ_FILTER_WRITE, flags=select.KQ_EV_ADD)
|
617
|
+
self._kqueue_control(fd, kevent)
|
618
|
+
|
619
|
+
def unregister_readable(self, fd: int) -> None:
|
620
|
+
kevent = select.kevent(fd, filter=select.KQ_FILTER_READ, flags=select.KQ_EV_DELETE)
|
621
|
+
self._readables.discard(fd)
|
622
|
+
self._kqueue_control(fd, kevent)
|
623
|
+
|
624
|
+
def unregister_writable(self, fd: int) -> None:
|
625
|
+
kevent = select.kevent(fd, filter=select.KQ_FILTER_WRITE, flags=select.KQ_EV_DELETE)
|
626
|
+
self._writables.discard(fd)
|
627
|
+
self._kqueue_control(fd, kevent)
|
628
|
+
|
629
|
+
def _kqueue_control(self, fd: int, kevent: 'select.kevent') -> None:
|
630
|
+
try:
|
631
|
+
self._kqueue.control([kevent], 0) # type: ignore
|
632
|
+
except OSError as error:
|
633
|
+
if error.errno == errno.EBADF:
|
634
|
+
log.debug('EBADF encountered in kqueue. Invalid file descriptor %s', fd)
|
635
|
+
else:
|
636
|
+
raise
|
637
|
+
|
638
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[int], ta.List[int]]:
|
639
|
+
readables, writables = [], [] # type: ignore
|
640
|
+
|
641
|
+
try:
|
642
|
+
kevents = self._kqueue.control(None, self.max_events, timeout) # type: ignore
|
643
|
+
except OSError as error:
|
644
|
+
if error.errno == errno.EINTR:
|
645
|
+
log.debug('EINTR encountered in poll')
|
646
|
+
return readables, writables
|
647
|
+
raise
|
648
|
+
|
649
|
+
for kevent in kevents:
|
650
|
+
if kevent.filter == select.KQ_FILTER_READ:
|
651
|
+
readables.append(kevent.ident)
|
652
|
+
if kevent.filter == select.KQ_FILTER_WRITE:
|
653
|
+
writables.append(kevent.ident)
|
654
|
+
|
655
|
+
return readables, writables
|
656
|
+
|
657
|
+
def before_daemonize(self) -> None:
|
658
|
+
self.close()
|
659
|
+
|
660
|
+
def after_daemonize(self) -> None:
|
661
|
+
self._kqueue = select.kqueue()
|
662
|
+
for fd in self._readables:
|
663
|
+
self.register_readable(fd)
|
664
|
+
for fd in self._writables:
|
665
|
+
self.register_writable(fd)
|
666
|
+
|
667
|
+
def close(self) -> None:
|
668
|
+
self._kqueue.close() # type: ignore
|
669
|
+
self._kqueue = None
|
670
|
+
|
671
|
+
|
672
|
+
Poller: ta.Type[BasePoller]
|
673
|
+
if hasattr(select, 'kqueue'):
|
674
|
+
Poller = KqueuePoller
|
675
|
+
elif hasattr(select, 'poll'):
|
676
|
+
Poller = PollPoller
|
677
|
+
else:
|
678
|
+
Poller = SelectPoller
|
679
|
+
|
680
|
+
|
681
|
+
########################################
|
682
|
+
# ../../../omlish/lite/check.py
|
683
|
+
|
684
|
+
|
685
|
+
def check_isinstance(v: T, spec: ta.Union[ta.Type[T], tuple]) -> T:
|
686
|
+
if not isinstance(v, spec):
|
687
|
+
raise TypeError(v)
|
688
|
+
return v
|
689
|
+
|
690
|
+
|
691
|
+
def check_not_isinstance(v: T, spec: ta.Union[type, tuple]) -> T:
|
692
|
+
if isinstance(v, spec):
|
693
|
+
raise TypeError(v)
|
694
|
+
return v
|
695
|
+
|
696
|
+
|
697
|
+
def check_not_none(v: ta.Optional[T]) -> T:
|
698
|
+
if v is None:
|
699
|
+
raise ValueError
|
700
|
+
return v
|
701
|
+
|
702
|
+
|
703
|
+
def check_not(v: ta.Any) -> None:
|
704
|
+
if v:
|
705
|
+
raise ValueError(v)
|
706
|
+
return v
|
707
|
+
|
708
|
+
|
709
|
+
def check_non_empty_str(v: ta.Optional[str]) -> str:
|
710
|
+
if not v:
|
711
|
+
raise ValueError
|
712
|
+
return v
|
713
|
+
|
714
|
+
|
715
|
+
def check_state(v: bool, msg: str = 'Illegal state') -> None:
|
716
|
+
if not v:
|
717
|
+
raise ValueError(msg)
|
718
|
+
|
719
|
+
|
720
|
+
########################################
|
721
|
+
# ../../../omlish/lite/json.py
|
722
|
+
|
723
|
+
|
724
|
+
##
|
725
|
+
|
726
|
+
|
727
|
+
JSON_PRETTY_INDENT = 2
|
728
|
+
|
729
|
+
JSON_PRETTY_KWARGS: ta.Mapping[str, ta.Any] = dict(
|
730
|
+
indent=JSON_PRETTY_INDENT,
|
731
|
+
)
|
732
|
+
|
733
|
+
json_dump_pretty: ta.Callable[..., bytes] = functools.partial(json.dump, **JSON_PRETTY_KWARGS) # type: ignore
|
734
|
+
json_dumps_pretty: ta.Callable[..., str] = functools.partial(json.dumps, **JSON_PRETTY_KWARGS)
|
735
|
+
|
736
|
+
|
737
|
+
##
|
738
|
+
|
739
|
+
|
740
|
+
JSON_COMPACT_SEPARATORS = (',', ':')
|
741
|
+
|
742
|
+
JSON_COMPACT_KWARGS: ta.Mapping[str, ta.Any] = dict(
|
743
|
+
indent=None,
|
744
|
+
separators=JSON_COMPACT_SEPARATORS,
|
745
|
+
)
|
746
|
+
|
747
|
+
json_dump_compact: ta.Callable[..., bytes] = functools.partial(json.dump, **JSON_COMPACT_KWARGS) # type: ignore
|
748
|
+
json_dumps_compact: ta.Callable[..., str] = functools.partial(json.dumps, **JSON_COMPACT_KWARGS)
|
749
|
+
|
750
|
+
|
751
|
+
########################################
|
752
|
+
# ../configs.py
|
753
|
+
|
754
|
+
|
755
|
+
@dc.dataclass(frozen=True)
|
756
|
+
class ServerConfig:
|
757
|
+
user: ta.Optional[str] = None
|
758
|
+
nodaemon: bool = False
|
759
|
+
umask: int = 0o22
|
760
|
+
directory: ta.Optional[str] = None
|
761
|
+
logfile: str = 'supervisord.log'
|
762
|
+
logfile_maxbytes: int = 50 * 1024 * 1024
|
763
|
+
logfile_backups: int = 10
|
764
|
+
loglevel: int = logging.INFO
|
765
|
+
pidfile: str = 'supervisord.pid'
|
766
|
+
identifier: str = 'supervisor'
|
767
|
+
child_logdir: str = '/dev/null'
|
768
|
+
minfds: int = 1024
|
769
|
+
minprocs: int = 200
|
770
|
+
nocleanup: bool = False
|
771
|
+
strip_ansi: bool = False
|
772
|
+
silent: bool = False
|
773
|
+
|
774
|
+
groups: ta.Optional[ta.Sequence['ProcessGroupConfig']] = None
|
775
|
+
|
776
|
+
@classmethod
|
777
|
+
def new(
|
778
|
+
cls,
|
779
|
+
umask: ta.Union[int, str] = 0o22,
|
780
|
+
directory: ta.Optional[str] = None,
|
781
|
+
logfile: str = 'supervisord.log',
|
782
|
+
logfile_maxbytes: ta.Union[int, str] = 50 * 1024 * 1024,
|
783
|
+
loglevel: ta.Union[int, str] = logging.INFO,
|
784
|
+
pidfile: str = 'supervisord.pid',
|
785
|
+
child_logdir: ta.Optional[str] = None,
|
786
|
+
**kwargs: ta.Any,
|
787
|
+
) -> 'ServerConfig':
|
788
|
+
return cls(
|
789
|
+
umask=octal_type(umask),
|
790
|
+
directory=existing_directory(directory) if directory is not None else None,
|
791
|
+
logfile=existing_dirpath(logfile),
|
792
|
+
logfile_maxbytes=byte_size(logfile_maxbytes),
|
793
|
+
loglevel=logging_level(loglevel),
|
794
|
+
pidfile=existing_dirpath(pidfile),
|
795
|
+
child_logdir=child_logdir if child_logdir else tempfile.gettempdir(),
|
796
|
+
**kwargs,
|
797
|
+
)
|
798
|
+
|
799
|
+
|
800
|
+
@dc.dataclass(frozen=True)
|
801
|
+
class ProcessGroupConfig:
|
802
|
+
name: str
|
803
|
+
|
804
|
+
priority: int = 999
|
805
|
+
|
806
|
+
processes: ta.Optional[ta.Sequence['ProcessConfig']] = None
|
807
|
+
|
808
|
+
|
809
|
+
@dc.dataclass(frozen=True)
|
810
|
+
class ProcessConfig:
|
811
|
+
name: str
|
812
|
+
command: str
|
813
|
+
|
814
|
+
uid: ta.Optional[int] = None
|
815
|
+
directory: ta.Optional[str] = None
|
816
|
+
umask: ta.Optional[int] = None
|
817
|
+
priority: int = 999
|
818
|
+
|
819
|
+
autostart: bool = True
|
820
|
+
autorestart: str = 'unexpected'
|
821
|
+
|
822
|
+
startsecs: int = 1
|
823
|
+
startretries: int = 3
|
824
|
+
|
825
|
+
numprocs: int = 1
|
826
|
+
numprocs_start: int = 0
|
827
|
+
|
828
|
+
@dc.dataclass(frozen=True)
|
829
|
+
class Log:
|
830
|
+
file: ta.Optional[str] = None
|
831
|
+
capture_maxbytes: ta.Optional[int] = None
|
832
|
+
events_enabled: bool = False
|
833
|
+
syslog: bool = False
|
834
|
+
backups: ta.Optional[int] = None
|
835
|
+
maxbytes: ta.Optional[int] = None
|
836
|
+
|
837
|
+
stdout: Log = Log()
|
838
|
+
stderr: Log = Log()
|
839
|
+
|
840
|
+
stopsignal: int = signal.SIGTERM
|
841
|
+
stopwaitsecs: int = 10
|
842
|
+
stopasgroup: bool = False
|
843
|
+
|
844
|
+
killasgroup: bool = False
|
845
|
+
|
846
|
+
exitcodes: ta.Iterable[int] = (0,)
|
847
|
+
|
848
|
+
redirect_stderr: bool = False
|
849
|
+
|
850
|
+
environment: ta.Optional[ta.Mapping[str, str]] = None
|
851
|
+
|
852
|
+
|
853
|
+
########################################
|
854
|
+
# ../states.py
|
855
|
+
|
856
|
+
|
857
|
+
##
|
858
|
+
|
859
|
+
|
860
|
+
def _names_by_code(states: ta.Any) -> ta.Dict[int, str]:
|
861
|
+
d = {}
|
862
|
+
for name in states.__dict__:
|
863
|
+
if not name.startswith('__'):
|
864
|
+
code = getattr(states, name)
|
865
|
+
d[code] = name
|
866
|
+
return d
|
867
|
+
|
868
|
+
|
869
|
+
##
|
870
|
+
|
871
|
+
|
872
|
+
class ProcessStates:
|
873
|
+
STOPPED = 0
|
874
|
+
STARTING = 10
|
875
|
+
RUNNING = 20
|
876
|
+
BACKOFF = 30
|
877
|
+
STOPPING = 40
|
878
|
+
EXITED = 100
|
879
|
+
FATAL = 200
|
880
|
+
UNKNOWN = 1000
|
881
|
+
|
882
|
+
|
883
|
+
STOPPED_STATES = (
|
884
|
+
ProcessStates.STOPPED,
|
885
|
+
ProcessStates.EXITED,
|
886
|
+
ProcessStates.FATAL,
|
887
|
+
ProcessStates.UNKNOWN,
|
888
|
+
)
|
889
|
+
|
890
|
+
RUNNING_STATES = (
|
891
|
+
ProcessStates.RUNNING,
|
892
|
+
ProcessStates.BACKOFF,
|
893
|
+
ProcessStates.STARTING,
|
894
|
+
)
|
895
|
+
|
896
|
+
SIGNALLABLE_STATES = (
|
897
|
+
ProcessStates.RUNNING,
|
898
|
+
ProcessStates.STARTING,
|
899
|
+
ProcessStates.STOPPING,
|
900
|
+
)
|
901
|
+
|
902
|
+
|
903
|
+
_process_states_by_code = _names_by_code(ProcessStates)
|
904
|
+
|
905
|
+
|
906
|
+
def get_process_state_description(code: ProcessState) -> str:
|
907
|
+
return check_not_none(_process_states_by_code.get(code))
|
908
|
+
|
909
|
+
|
910
|
+
##
|
911
|
+
|
912
|
+
|
913
|
+
class SupervisorStates:
|
914
|
+
FATAL = 2
|
915
|
+
RUNNING = 1
|
916
|
+
RESTARTING = 0
|
917
|
+
SHUTDOWN = -1
|
918
|
+
|
919
|
+
|
920
|
+
_supervisor_states_by_code = _names_by_code(SupervisorStates)
|
921
|
+
|
922
|
+
|
923
|
+
def get_supervisor_state_description(code: SupervisorState) -> str:
|
924
|
+
return check_not_none(_supervisor_states_by_code.get(code))
|
925
|
+
|
926
|
+
|
927
|
+
########################################
|
928
|
+
# ../../../omlish/lite/logs.py
|
929
|
+
"""
|
930
|
+
TODO:
|
931
|
+
- translate json keys
|
932
|
+
- debug
|
933
|
+
"""
|
934
|
+
|
935
|
+
|
936
|
+
log = logging.getLogger(__name__)
|
937
|
+
|
938
|
+
|
939
|
+
##
|
940
|
+
|
941
|
+
|
942
|
+
class TidLogFilter(logging.Filter):
|
943
|
+
|
944
|
+
def filter(self, record):
|
945
|
+
record.tid = threading.get_native_id()
|
946
|
+
return True
|
947
|
+
|
948
|
+
|
949
|
+
##
|
950
|
+
|
951
|
+
|
952
|
+
class JsonLogFormatter(logging.Formatter):
|
953
|
+
|
954
|
+
KEYS: ta.Mapping[str, bool] = {
|
955
|
+
'name': False,
|
956
|
+
'msg': False,
|
957
|
+
'args': False,
|
958
|
+
'levelname': False,
|
959
|
+
'levelno': False,
|
960
|
+
'pathname': False,
|
961
|
+
'filename': False,
|
962
|
+
'module': False,
|
963
|
+
'exc_info': True,
|
964
|
+
'exc_text': True,
|
965
|
+
'stack_info': True,
|
966
|
+
'lineno': False,
|
967
|
+
'funcName': False,
|
968
|
+
'created': False,
|
969
|
+
'msecs': False,
|
970
|
+
'relativeCreated': False,
|
971
|
+
'thread': False,
|
972
|
+
'threadName': False,
|
973
|
+
'processName': False,
|
974
|
+
'process': False,
|
975
|
+
}
|
976
|
+
|
977
|
+
def format(self, record: logging.LogRecord) -> str:
|
978
|
+
dct = {
|
979
|
+
k: v
|
980
|
+
for k, o in self.KEYS.items()
|
981
|
+
for v in [getattr(record, k)]
|
982
|
+
if not (o and v is None)
|
983
|
+
}
|
984
|
+
return json_dumps_compact(dct)
|
985
|
+
|
986
|
+
|
987
|
+
##
|
988
|
+
|
989
|
+
|
990
|
+
STANDARD_LOG_FORMAT_PARTS = [
|
991
|
+
('asctime', '%(asctime)-15s'),
|
992
|
+
('process', 'pid=%(process)-6s'),
|
993
|
+
('thread', 'tid=%(thread)x'),
|
994
|
+
('levelname', '%(levelname)s'),
|
995
|
+
('name', '%(name)s'),
|
996
|
+
('separator', '::'),
|
997
|
+
('message', '%(message)s'),
|
998
|
+
]
|
999
|
+
|
1000
|
+
|
1001
|
+
class StandardLogFormatter(logging.Formatter):
|
1002
|
+
|
1003
|
+
@staticmethod
|
1004
|
+
def build_log_format(parts: ta.Iterable[ta.Tuple[str, str]]) -> str:
|
1005
|
+
return ' '.join(v for k, v in parts)
|
1006
|
+
|
1007
|
+
converter = datetime.datetime.fromtimestamp # type: ignore
|
1008
|
+
|
1009
|
+
def formatTime(self, record, datefmt=None):
|
1010
|
+
ct = self.converter(record.created) # type: ignore
|
1011
|
+
if datefmt:
|
1012
|
+
return ct.strftime(datefmt) # noqa
|
1013
|
+
else:
|
1014
|
+
t = ct.strftime("%Y-%m-%d %H:%M:%S") # noqa
|
1015
|
+
return '%s.%03d' % (t, record.msecs)
|
1016
|
+
|
1017
|
+
|
1018
|
+
##
|
1019
|
+
|
1020
|
+
|
1021
|
+
class ProxyLogFilterer(logging.Filterer):
|
1022
|
+
def __init__(self, underlying: logging.Filterer) -> None: # noqa
|
1023
|
+
self._underlying = underlying
|
1024
|
+
|
1025
|
+
@property
|
1026
|
+
def underlying(self) -> logging.Filterer:
|
1027
|
+
return self._underlying
|
1028
|
+
|
1029
|
+
@property
|
1030
|
+
def filters(self):
|
1031
|
+
return self._underlying.filters
|
1032
|
+
|
1033
|
+
@filters.setter
|
1034
|
+
def filters(self, filters):
|
1035
|
+
self._underlying.filters = filters
|
1036
|
+
|
1037
|
+
def addFilter(self, filter): # noqa
|
1038
|
+
self._underlying.addFilter(filter)
|
1039
|
+
|
1040
|
+
def removeFilter(self, filter): # noqa
|
1041
|
+
self._underlying.removeFilter(filter)
|
1042
|
+
|
1043
|
+
def filter(self, record):
|
1044
|
+
return self._underlying.filter(record)
|
1045
|
+
|
1046
|
+
|
1047
|
+
class ProxyLogHandler(ProxyLogFilterer, logging.Handler):
|
1048
|
+
def __init__(self, underlying: logging.Handler) -> None: # noqa
|
1049
|
+
ProxyLogFilterer.__init__(self, underlying)
|
1050
|
+
|
1051
|
+
_underlying: logging.Handler
|
1052
|
+
|
1053
|
+
@property
|
1054
|
+
def underlying(self) -> logging.Handler:
|
1055
|
+
return self._underlying
|
1056
|
+
|
1057
|
+
def get_name(self):
|
1058
|
+
return self._underlying.get_name()
|
1059
|
+
|
1060
|
+
def set_name(self, name):
|
1061
|
+
self._underlying.set_name(name)
|
1062
|
+
|
1063
|
+
@property
|
1064
|
+
def name(self):
|
1065
|
+
return self._underlying.name
|
1066
|
+
|
1067
|
+
@property
|
1068
|
+
def level(self):
|
1069
|
+
return self._underlying.level
|
1070
|
+
|
1071
|
+
@level.setter
|
1072
|
+
def level(self, level):
|
1073
|
+
self._underlying.level = level
|
1074
|
+
|
1075
|
+
@property
|
1076
|
+
def formatter(self):
|
1077
|
+
return self._underlying.formatter
|
1078
|
+
|
1079
|
+
@formatter.setter
|
1080
|
+
def formatter(self, formatter):
|
1081
|
+
self._underlying.formatter = formatter
|
1082
|
+
|
1083
|
+
def createLock(self):
|
1084
|
+
self._underlying.createLock()
|
1085
|
+
|
1086
|
+
def acquire(self):
|
1087
|
+
self._underlying.acquire()
|
1088
|
+
|
1089
|
+
def release(self):
|
1090
|
+
self._underlying.release()
|
1091
|
+
|
1092
|
+
def setLevel(self, level):
|
1093
|
+
self._underlying.setLevel(level)
|
1094
|
+
|
1095
|
+
def format(self, record):
|
1096
|
+
return self._underlying.format(record)
|
1097
|
+
|
1098
|
+
def emit(self, record):
|
1099
|
+
self._underlying.emit(record)
|
1100
|
+
|
1101
|
+
def handle(self, record):
|
1102
|
+
return self._underlying.handle(record)
|
1103
|
+
|
1104
|
+
def setFormatter(self, fmt):
|
1105
|
+
self._underlying.setFormatter(fmt)
|
1106
|
+
|
1107
|
+
def flush(self):
|
1108
|
+
self._underlying.flush()
|
1109
|
+
|
1110
|
+
def close(self):
|
1111
|
+
self._underlying.close()
|
1112
|
+
|
1113
|
+
def handleError(self, record):
|
1114
|
+
self._underlying.handleError(record)
|
1115
|
+
|
1116
|
+
|
1117
|
+
##
|
1118
|
+
|
1119
|
+
|
1120
|
+
class StandardLogHandler(ProxyLogHandler):
|
1121
|
+
pass
|
1122
|
+
|
1123
|
+
|
1124
|
+
##
|
1125
|
+
|
1126
|
+
|
1127
|
+
@contextlib.contextmanager
|
1128
|
+
def _locking_logging_module_lock() -> ta.Iterator[None]:
|
1129
|
+
if hasattr(logging, '_acquireLock'):
|
1130
|
+
logging._acquireLock() # noqa
|
1131
|
+
try:
|
1132
|
+
yield
|
1133
|
+
finally:
|
1134
|
+
logging._releaseLock() # type: ignore # noqa
|
1135
|
+
|
1136
|
+
elif hasattr(logging, '_lock'):
|
1137
|
+
# https://github.com/python/cpython/commit/74723e11109a320e628898817ab449b3dad9ee96
|
1138
|
+
with logging._lock: # noqa
|
1139
|
+
yield
|
1140
|
+
|
1141
|
+
else:
|
1142
|
+
raise Exception("Can't find lock in logging module")
|
1143
|
+
|
1144
|
+
|
1145
|
+
def configure_standard_logging(
|
1146
|
+
level: ta.Union[int, str] = logging.INFO,
|
1147
|
+
*,
|
1148
|
+
json: bool = False,
|
1149
|
+
target: ta.Optional[logging.Logger] = None,
|
1150
|
+
force: bool = False,
|
1151
|
+
) -> ta.Optional[StandardLogHandler]:
|
1152
|
+
with _locking_logging_module_lock():
|
1153
|
+
if target is None:
|
1154
|
+
target = logging.root
|
1155
|
+
|
1156
|
+
#
|
1157
|
+
|
1158
|
+
if not force:
|
1159
|
+
if any(isinstance(h, StandardLogHandler) for h in list(target.handlers)):
|
1160
|
+
return None
|
1161
|
+
|
1162
|
+
#
|
1163
|
+
|
1164
|
+
handler = logging.StreamHandler()
|
1165
|
+
|
1166
|
+
#
|
1167
|
+
|
1168
|
+
formatter: logging.Formatter
|
1169
|
+
if json:
|
1170
|
+
formatter = JsonLogFormatter()
|
1171
|
+
else:
|
1172
|
+
formatter = StandardLogFormatter(StandardLogFormatter.build_log_format(STANDARD_LOG_FORMAT_PARTS))
|
1173
|
+
handler.setFormatter(formatter)
|
1174
|
+
|
1175
|
+
#
|
1176
|
+
|
1177
|
+
handler.addFilter(TidLogFilter())
|
1178
|
+
|
1179
|
+
#
|
1180
|
+
|
1181
|
+
target.addHandler(handler)
|
1182
|
+
|
1183
|
+
#
|
1184
|
+
|
1185
|
+
if level is not None:
|
1186
|
+
target.setLevel(level)
|
1187
|
+
|
1188
|
+
#
|
1189
|
+
|
1190
|
+
return StandardLogHandler(handler)
|
1191
|
+
|
1192
|
+
|
1193
|
+
########################################
|
1194
|
+
# ../events.py
|
1195
|
+
|
1196
|
+
|
1197
|
+
class EventCallbacks:
|
1198
|
+
def __init__(self) -> None:
|
1199
|
+
super().__init__()
|
1200
|
+
|
1201
|
+
self._callbacks: ta.List[ta.Tuple[type, ta.Callable]] = []
|
1202
|
+
|
1203
|
+
def subscribe(self, type, callback): # noqa
|
1204
|
+
self._callbacks.append((type, callback))
|
1205
|
+
|
1206
|
+
def unsubscribe(self, type, callback): # noqa
|
1207
|
+
self._callbacks.remove((type, callback))
|
1208
|
+
|
1209
|
+
def notify(self, event):
|
1210
|
+
for type, callback in self._callbacks: # noqa
|
1211
|
+
if isinstance(event, type):
|
1212
|
+
callback(event)
|
1213
|
+
|
1214
|
+
def clear(self):
|
1215
|
+
self._callbacks[:] = []
|
1216
|
+
|
1217
|
+
|
1218
|
+
EVENT_CALLBACKS = EventCallbacks()
|
1219
|
+
|
1220
|
+
notify_event = EVENT_CALLBACKS.notify
|
1221
|
+
clear_events = EVENT_CALLBACKS.clear
|
1222
|
+
|
1223
|
+
|
1224
|
+
class Event:
|
1225
|
+
"""Abstract event type """
|
1226
|
+
|
1227
|
+
|
1228
|
+
class ProcessLogEvent(Event):
|
1229
|
+
"""Abstract"""
|
1230
|
+
channel: ta.Optional[str] = None
|
1231
|
+
|
1232
|
+
def __init__(self, process, pid, data):
|
1233
|
+
super().__init__()
|
1234
|
+
self.process = process
|
1235
|
+
self.pid = pid
|
1236
|
+
self.data = data
|
1237
|
+
|
1238
|
+
def payload(self):
|
1239
|
+
groupname = ''
|
1240
|
+
if self.process.group is not None:
|
1241
|
+
groupname = self.process.group.config.name
|
1242
|
+
try:
|
1243
|
+
data = as_string(self.data)
|
1244
|
+
except UnicodeDecodeError:
|
1245
|
+
data = f'Undecodable: {self.data!r}'
|
1246
|
+
fmt = as_string('processname:%s groupname:%s pid:%s channel:%s\n%s')
|
1247
|
+
result = fmt % (
|
1248
|
+
as_string(self.process.config.name),
|
1249
|
+
as_string(groupname),
|
1250
|
+
self.pid,
|
1251
|
+
as_string(self.channel), # type: ignore
|
1252
|
+
data,
|
1253
|
+
)
|
1254
|
+
return result
|
1255
|
+
|
1256
|
+
|
1257
|
+
class ProcessLogStdoutEvent(ProcessLogEvent):
|
1258
|
+
channel = 'stdout'
|
1259
|
+
|
1260
|
+
|
1261
|
+
class ProcessLogStderrEvent(ProcessLogEvent):
|
1262
|
+
channel = 'stderr'
|
1263
|
+
|
1264
|
+
|
1265
|
+
class ProcessCommunicationEvent(Event):
|
1266
|
+
""" Abstract """
|
1267
|
+
# event mode tokens
|
1268
|
+
BEGIN_TOKEN = b'<!--XSUPERVISOR:BEGIN-->'
|
1269
|
+
END_TOKEN = b'<!--XSUPERVISOR:END-->'
|
1270
|
+
|
1271
|
+
def __init__(self, process, pid, data):
|
1272
|
+
super().__init__()
|
1273
|
+
self.process = process
|
1274
|
+
self.pid = pid
|
1275
|
+
self.data = data
|
1276
|
+
|
1277
|
+
def payload(self):
|
1278
|
+
groupname = ''
|
1279
|
+
if self.process.group is not None:
|
1280
|
+
groupname = self.process.group.config.name
|
1281
|
+
try:
|
1282
|
+
data = as_string(self.data)
|
1283
|
+
except UnicodeDecodeError:
|
1284
|
+
data = f'Undecodable: {self.data!r}'
|
1285
|
+
return f'processname:{self.process.config.name} groupname:{groupname} pid:{self.pid}\n{data}'
|
1286
|
+
|
1287
|
+
|
1288
|
+
class ProcessCommunicationStdoutEvent(ProcessCommunicationEvent):
|
1289
|
+
channel = 'stdout'
|
1290
|
+
|
1291
|
+
|
1292
|
+
class ProcessCommunicationStderrEvent(ProcessCommunicationEvent):
|
1293
|
+
channel = 'stderr'
|
1294
|
+
|
1295
|
+
|
1296
|
+
class RemoteCommunicationEvent(Event):
|
1297
|
+
def __init__(self, type, data): # noqa
|
1298
|
+
super().__init__()
|
1299
|
+
self.type = type
|
1300
|
+
self.data = data
|
1301
|
+
|
1302
|
+
def payload(self):
|
1303
|
+
return f'type:{self.type}\n{self.data}'
|
1304
|
+
|
1305
|
+
|
1306
|
+
class SupervisorStateChangeEvent(Event):
|
1307
|
+
""" Abstract class """
|
1308
|
+
|
1309
|
+
def payload(self):
|
1310
|
+
return ''
|
1311
|
+
|
1312
|
+
|
1313
|
+
class SupervisorRunningEvent(SupervisorStateChangeEvent):
|
1314
|
+
pass
|
1315
|
+
|
1316
|
+
|
1317
|
+
class SupervisorStoppingEvent(SupervisorStateChangeEvent):
|
1318
|
+
pass
|
1319
|
+
|
1320
|
+
|
1321
|
+
class EventRejectedEvent: # purposely does not subclass Event
|
1322
|
+
def __init__(self, process, event):
|
1323
|
+
super().__init__()
|
1324
|
+
self.process = process
|
1325
|
+
self.event = event
|
1326
|
+
|
1327
|
+
|
1328
|
+
class ProcessStateEvent(Event):
|
1329
|
+
""" Abstract class, never raised directly """
|
1330
|
+
frm = None
|
1331
|
+
to = None
|
1332
|
+
|
1333
|
+
def __init__(self, process, from_state, expected=True):
|
1334
|
+
super().__init__()
|
1335
|
+
self.process = process
|
1336
|
+
self.from_state = from_state
|
1337
|
+
self.expected = expected
|
1338
|
+
# we eagerly render these so if the process pid, etc changes beneath
|
1339
|
+
# us, we stash the values at the time the event was sent
|
1340
|
+
self.extra_values = self.get_extra_values()
|
1341
|
+
|
1342
|
+
def payload(self):
|
1343
|
+
groupname = ''
|
1344
|
+
if self.process.group is not None:
|
1345
|
+
groupname = self.process.group.config.name
|
1346
|
+
l = [
|
1347
|
+
('processname', self.process.config.name),
|
1348
|
+
('groupname', groupname),
|
1349
|
+
('from_state', get_process_state_description(self.from_state)),
|
1350
|
+
]
|
1351
|
+
l.extend(self.extra_values)
|
1352
|
+
s = ' '.join([f'{name}:{val}' for name, val in l])
|
1353
|
+
return s
|
1354
|
+
|
1355
|
+
def get_extra_values(self):
|
1356
|
+
return []
|
1357
|
+
|
1358
|
+
|
1359
|
+
class ProcessStateFatalEvent(ProcessStateEvent):
|
1360
|
+
pass
|
1361
|
+
|
1362
|
+
|
1363
|
+
class ProcessStateUnknownEvent(ProcessStateEvent):
|
1364
|
+
pass
|
1365
|
+
|
1366
|
+
|
1367
|
+
class ProcessStateStartingOrBackoffEvent(ProcessStateEvent):
|
1368
|
+
def get_extra_values(self):
|
1369
|
+
return [('tries', int(self.process.backoff))]
|
1370
|
+
|
1371
|
+
|
1372
|
+
class ProcessStateBackoffEvent(ProcessStateStartingOrBackoffEvent):
|
1373
|
+
pass
|
1374
|
+
|
1375
|
+
|
1376
|
+
class ProcessStateStartingEvent(ProcessStateStartingOrBackoffEvent):
|
1377
|
+
pass
|
1378
|
+
|
1379
|
+
|
1380
|
+
class ProcessStateExitedEvent(ProcessStateEvent):
|
1381
|
+
def get_extra_values(self):
|
1382
|
+
return [('expected', int(self.expected)), ('pid', self.process.pid)]
|
1383
|
+
|
1384
|
+
|
1385
|
+
class ProcessStateRunningEvent(ProcessStateEvent):
|
1386
|
+
def get_extra_values(self):
|
1387
|
+
return [('pid', self.process.pid)]
|
1388
|
+
|
1389
|
+
|
1390
|
+
class ProcessStateStoppingEvent(ProcessStateEvent):
|
1391
|
+
def get_extra_values(self):
|
1392
|
+
return [('pid', self.process.pid)]
|
1393
|
+
|
1394
|
+
|
1395
|
+
class ProcessStateStoppedEvent(ProcessStateEvent):
|
1396
|
+
def get_extra_values(self):
|
1397
|
+
return [('pid', self.process.pid)]
|
1398
|
+
|
1399
|
+
|
1400
|
+
class ProcessGroupEvent(Event):
|
1401
|
+
def __init__(self, group):
|
1402
|
+
super().__init__()
|
1403
|
+
self.group = group
|
1404
|
+
|
1405
|
+
def payload(self):
|
1406
|
+
return f'groupname:{self.group}\n'
|
1407
|
+
|
1408
|
+
|
1409
|
+
class ProcessGroupAddedEvent(ProcessGroupEvent):
|
1410
|
+
pass
|
1411
|
+
|
1412
|
+
|
1413
|
+
class ProcessGroupRemovedEvent(ProcessGroupEvent):
|
1414
|
+
pass
|
1415
|
+
|
1416
|
+
|
1417
|
+
class TickEvent(Event):
|
1418
|
+
""" Abstract """
|
1419
|
+
|
1420
|
+
def __init__(self, when, supervisord):
|
1421
|
+
super().__init__()
|
1422
|
+
self.when = when
|
1423
|
+
self.supervisord = supervisord
|
1424
|
+
|
1425
|
+
def payload(self):
|
1426
|
+
return f'when:{self.when}'
|
1427
|
+
|
1428
|
+
|
1429
|
+
class Tick5Event(TickEvent):
|
1430
|
+
period = 5
|
1431
|
+
|
1432
|
+
|
1433
|
+
class Tick60Event(TickEvent):
|
1434
|
+
period = 60
|
1435
|
+
|
1436
|
+
|
1437
|
+
class Tick3600Event(TickEvent):
|
1438
|
+
period = 3600
|
1439
|
+
|
1440
|
+
|
1441
|
+
TICK_EVENTS = [ # imported elsewhere
|
1442
|
+
Tick5Event,
|
1443
|
+
Tick60Event,
|
1444
|
+
Tick3600Event,
|
1445
|
+
]
|
1446
|
+
|
1447
|
+
|
1448
|
+
class EventTypes:
|
1449
|
+
EVENT = Event # abstract
|
1450
|
+
|
1451
|
+
PROCESS_STATE = ProcessStateEvent # abstract
|
1452
|
+
PROCESS_STATE_STOPPED = ProcessStateStoppedEvent
|
1453
|
+
PROCESS_STATE_EXITED = ProcessStateExitedEvent
|
1454
|
+
PROCESS_STATE_STARTING = ProcessStateStartingEvent
|
1455
|
+
PROCESS_STATE_STOPPING = ProcessStateStoppingEvent
|
1456
|
+
PROCESS_STATE_BACKOFF = ProcessStateBackoffEvent
|
1457
|
+
PROCESS_STATE_FATAL = ProcessStateFatalEvent
|
1458
|
+
PROCESS_STATE_RUNNING = ProcessStateRunningEvent
|
1459
|
+
PROCESS_STATE_UNKNOWN = ProcessStateUnknownEvent
|
1460
|
+
|
1461
|
+
PROCESS_COMMUNICATION = ProcessCommunicationEvent # abstract
|
1462
|
+
PROCESS_COMMUNICATION_STDOUT = ProcessCommunicationStdoutEvent
|
1463
|
+
PROCESS_COMMUNICATION_STDERR = ProcessCommunicationStderrEvent
|
1464
|
+
|
1465
|
+
PROCESS_LOG = ProcessLogEvent
|
1466
|
+
PROCESS_LOG_STDOUT = ProcessLogStdoutEvent
|
1467
|
+
PROCESS_LOG_STDERR = ProcessLogStderrEvent
|
1468
|
+
|
1469
|
+
REMOTE_COMMUNICATION = RemoteCommunicationEvent
|
1470
|
+
|
1471
|
+
SUPERVISOR_STATE_CHANGE = SupervisorStateChangeEvent # abstract
|
1472
|
+
SUPERVISOR_STATE_CHANGE_RUNNING = SupervisorRunningEvent
|
1473
|
+
SUPERVISOR_STATE_CHANGE_STOPPING = SupervisorStoppingEvent
|
1474
|
+
|
1475
|
+
TICK = TickEvent # abstract
|
1476
|
+
TICK_5 = Tick5Event
|
1477
|
+
TICK_60 = Tick60Event
|
1478
|
+
TICK_3600 = Tick3600Event
|
1479
|
+
|
1480
|
+
PROCESS_GROUP = ProcessGroupEvent # abstract
|
1481
|
+
PROCESS_GROUP_ADDED = ProcessGroupAddedEvent
|
1482
|
+
PROCESS_GROUP_REMOVED = ProcessGroupRemovedEvent
|
1483
|
+
|
1484
|
+
|
1485
|
+
def get_event_name_by_type(requested):
|
1486
|
+
for name, typ in EventTypes.__dict__.items():
|
1487
|
+
if typ is requested:
|
1488
|
+
return name
|
1489
|
+
return None
|
1490
|
+
|
1491
|
+
|
1492
|
+
def register(name, event):
|
1493
|
+
setattr(EventTypes, name, event)
|
1494
|
+
|
1495
|
+
|
1496
|
+
########################################
|
1497
|
+
# ../types.py
|
1498
|
+
|
1499
|
+
|
1500
|
+
class AbstractServerContext(abc.ABC):
|
1501
|
+
@property
|
1502
|
+
@abc.abstractmethod
|
1503
|
+
def config(self) -> ServerConfig:
|
1504
|
+
raise NotImplementedError
|
1505
|
+
|
1506
|
+
@property
|
1507
|
+
@abc.abstractmethod
|
1508
|
+
def state(self) -> SupervisorState:
|
1509
|
+
raise NotImplementedError
|
1510
|
+
|
1511
|
+
@abc.abstractmethod
|
1512
|
+
def set_state(self, state: SupervisorState) -> None:
|
1513
|
+
raise NotImplementedError
|
1514
|
+
|
1515
|
+
@property
|
1516
|
+
@abc.abstractmethod
|
1517
|
+
def pid_history(self) -> ta.Dict[int, 'AbstractSubprocess']:
|
1518
|
+
raise NotImplementedError
|
1519
|
+
|
1520
|
+
|
1521
|
+
class AbstractSubprocess(abc.ABC):
|
1522
|
+
@property
|
1523
|
+
@abc.abstractmethod
|
1524
|
+
def pid(self) -> int:
|
1525
|
+
raise NotImplementedError
|
1526
|
+
|
1527
|
+
@property
|
1528
|
+
@abc.abstractmethod
|
1529
|
+
def config(self) -> ProcessConfig:
|
1530
|
+
raise NotImplementedError
|
1531
|
+
|
1532
|
+
@property
|
1533
|
+
@abc.abstractmethod
|
1534
|
+
def context(self) -> AbstractServerContext:
|
1535
|
+
raise NotImplementedError
|
1536
|
+
|
1537
|
+
@abc.abstractmethod
|
1538
|
+
def finish(self, sts: int) -> None:
|
1539
|
+
raise NotImplementedError
|
1540
|
+
|
1541
|
+
|
1542
|
+
########################################
|
1543
|
+
# ../context.py
|
1544
|
+
|
1545
|
+
|
1546
|
+
log = logging.getLogger(__name__)
|
1547
|
+
|
1548
|
+
|
1549
|
+
class ServerContext(AbstractServerContext):
|
1550
|
+
first = False
|
1551
|
+
test = False
|
1552
|
+
|
1553
|
+
##
|
1554
|
+
|
1555
|
+
def __init__(self, config: ServerConfig) -> None:
|
1556
|
+
super().__init__()
|
1557
|
+
|
1558
|
+
self._config = config
|
1559
|
+
|
1560
|
+
self._pid_history: ta.Dict[int, AbstractSubprocess] = {}
|
1561
|
+
self._state: SupervisorState = SupervisorStates.RUNNING
|
1562
|
+
|
1563
|
+
self.signal_receiver = SignalReceiver()
|
1564
|
+
|
1565
|
+
self.poller = Poller()
|
1566
|
+
|
1567
|
+
if self.config.user is not None:
|
1568
|
+
uid = name_to_uid(self.config.user)
|
1569
|
+
self.uid = uid
|
1570
|
+
self.gid = gid_for_uid(uid)
|
1571
|
+
else:
|
1572
|
+
self.uid = None
|
1573
|
+
self.gid = None
|
1574
|
+
|
1575
|
+
self.unlink_pidfile = False
|
1576
|
+
|
1577
|
+
@property
|
1578
|
+
def config(self) -> ServerConfig:
|
1579
|
+
return self._config
|
1580
|
+
|
1581
|
+
@property
|
1582
|
+
def state(self) -> SupervisorState:
|
1583
|
+
return self._state
|
1584
|
+
|
1585
|
+
def set_state(self, state: SupervisorState) -> None:
|
1586
|
+
self._state = state
|
1587
|
+
|
1588
|
+
@property
|
1589
|
+
def pid_history(self) -> ta.Dict[int, AbstractSubprocess]:
|
1590
|
+
return self._pid_history
|
1591
|
+
|
1592
|
+
uid: ta.Optional[int]
|
1593
|
+
gid: ta.Optional[int]
|
1594
|
+
|
1595
|
+
##
|
1596
|
+
|
1597
|
+
def set_signals(self) -> None:
|
1598
|
+
self.signal_receiver.install(
|
1599
|
+
signal.SIGTERM,
|
1600
|
+
signal.SIGINT,
|
1601
|
+
signal.SIGQUIT,
|
1602
|
+
signal.SIGHUP,
|
1603
|
+
signal.SIGCHLD,
|
1604
|
+
signal.SIGUSR2,
|
1605
|
+
)
|
1606
|
+
|
1607
|
+
def waitpid(self) -> ta.Tuple[ta.Optional[int], ta.Optional[int]]:
|
1608
|
+
# Need pthread_sigmask here to avoid concurrent sigchld, but Python doesn't offer in Python < 3.4. There is
|
1609
|
+
# still a race condition here; we can get a sigchld while we're sitting in the waitpid call. However, AFAICT, if
|
1610
|
+
# waitpid is interrupted by SIGCHLD, as long as we call waitpid again (which happens every so often during the
|
1611
|
+
# normal course in the mainloop), we'll eventually reap the child that we tried to reap during the interrupted
|
1612
|
+
# call. At least on Linux, this appears to be true, or at least stopping 50 processes at once never left zombies
|
1613
|
+
# lying around.
|
1614
|
+
try:
|
1615
|
+
pid, sts = os.waitpid(-1, os.WNOHANG)
|
1616
|
+
except OSError as exc:
|
1617
|
+
code = exc.args[0]
|
1618
|
+
if code not in (errno.ECHILD, errno.EINTR):
|
1619
|
+
log.critical('waitpid error %r; a process may not be cleaned up properly', code)
|
1620
|
+
if code == errno.EINTR:
|
1621
|
+
log.debug('EINTR during reap')
|
1622
|
+
pid, sts = None, None
|
1623
|
+
return pid, sts
|
1624
|
+
|
1625
|
+
def set_uid_or_exit(self) -> None:
|
1626
|
+
"""
|
1627
|
+
Set the uid of the supervisord process. Called during supervisord startup only. No return value. Exits the
|
1628
|
+
process via usage() if privileges could not be dropped.
|
1629
|
+
"""
|
1630
|
+
if self.uid is None:
|
1631
|
+
if os.getuid() == 0:
|
1632
|
+
warnings.warn(
|
1633
|
+
'Supervisor is running as root. Privileges were not dropped because no user is specified in the '
|
1634
|
+
'config file. If you intend to run as root, you can set user=root in the config file to avoid '
|
1635
|
+
'this message.',
|
1636
|
+
)
|
1637
|
+
else:
|
1638
|
+
msg = drop_privileges(self.uid)
|
1639
|
+
if msg is None:
|
1640
|
+
log.info('Set uid to user %s succeeded', self.uid)
|
1641
|
+
else: # failed to drop privileges
|
1642
|
+
raise RuntimeError(msg)
|
1643
|
+
|
1644
|
+
def set_rlimits_or_exit(self) -> None:
|
1645
|
+
"""
|
1646
|
+
Set the rlimits of the supervisord process. Called during supervisord startup only. No return value. Exits
|
1647
|
+
the process via usage() if any rlimits could not be set.
|
1648
|
+
"""
|
1649
|
+
|
1650
|
+
limits = []
|
1651
|
+
|
1652
|
+
if hasattr(resource, 'RLIMIT_NOFILE'):
|
1653
|
+
limits.append({
|
1654
|
+
'msg': (
|
1655
|
+
'The minimum number of file descriptors required to run this process is %(min_limit)s as per the '
|
1656
|
+
'"minfds" command-line argument or config file setting. The current environment will only allow '
|
1657
|
+
'you to open %(hard)s file descriptors. Either raise the number of usable file descriptors in '
|
1658
|
+
'your environment (see README.rst) or lower the minfds setting in the config file to allow the '
|
1659
|
+
'process to start.'
|
1660
|
+
),
|
1661
|
+
'min': self.config.minfds,
|
1662
|
+
'resource': resource.RLIMIT_NOFILE,
|
1663
|
+
'name': 'RLIMIT_NOFILE',
|
1664
|
+
})
|
1665
|
+
|
1666
|
+
if hasattr(resource, 'RLIMIT_NPROC'):
|
1667
|
+
limits.append({
|
1668
|
+
'msg': (
|
1669
|
+
'The minimum number of available processes required to run this program is %(min_limit)s as per '
|
1670
|
+
'the "minprocs" command-line argument or config file setting. The current environment will only '
|
1671
|
+
'allow you to open %(hard)s processes. Either raise the number of usable processes in your '
|
1672
|
+
'environment (see README.rst) or lower the minprocs setting in the config file to allow the '
|
1673
|
+
'program to start.'
|
1674
|
+
),
|
1675
|
+
'min': self.config.minprocs,
|
1676
|
+
'resource': resource.RLIMIT_NPROC,
|
1677
|
+
'name': 'RLIMIT_NPROC',
|
1678
|
+
})
|
1679
|
+
|
1680
|
+
for limit in limits:
|
1681
|
+
min_limit = limit['min']
|
1682
|
+
res = limit['resource']
|
1683
|
+
msg = limit['msg']
|
1684
|
+
name = limit['name']
|
1685
|
+
|
1686
|
+
soft, hard = resource.getrlimit(res) # type: ignore
|
1687
|
+
|
1688
|
+
# -1 means unlimited
|
1689
|
+
if soft < min_limit and soft != -1: # type: ignore
|
1690
|
+
if hard < min_limit and hard != -1: # type: ignore
|
1691
|
+
# setrlimit should increase the hard limit if we are root, if not then setrlimit raises and we print
|
1692
|
+
# usage
|
1693
|
+
hard = min_limit # type: ignore
|
1694
|
+
|
1695
|
+
try:
|
1696
|
+
resource.setrlimit(res, (min_limit, hard)) # type: ignore
|
1697
|
+
log.info('Increased %s limit to %s', name, min_limit)
|
1698
|
+
except (resource.error, ValueError):
|
1699
|
+
raise RuntimeError(msg % dict( # type: ignore # noqa
|
1700
|
+
min_limit=min_limit,
|
1701
|
+
res=res,
|
1702
|
+
name=name,
|
1703
|
+
soft=soft,
|
1704
|
+
hard=hard,
|
1705
|
+
))
|
1706
|
+
|
1707
|
+
def cleanup(self) -> None:
|
1708
|
+
if self.unlink_pidfile:
|
1709
|
+
try_unlink(self.config.pidfile)
|
1710
|
+
self.poller.close()
|
1711
|
+
|
1712
|
+
def cleanup_fds(self) -> None:
|
1713
|
+
# try to close any leaked file descriptors (for reload)
|
1714
|
+
start = 5
|
1715
|
+
os.closerange(start, self.config.minfds)
|
1716
|
+
|
1717
|
+
def clear_auto_child_logdir(self) -> None:
|
1718
|
+
# must be called after realize()
|
1719
|
+
child_logdir = self.config.child_logdir
|
1720
|
+
fnre = re.compile(rf'.+?---{self.config.identifier}-\S+\.log\.?\d{{0,4}}')
|
1721
|
+
try:
|
1722
|
+
filenames = os.listdir(child_logdir)
|
1723
|
+
except OSError:
|
1724
|
+
log.warning('Could not clear child_log dir')
|
1725
|
+
return
|
1726
|
+
|
1727
|
+
for filename in filenames:
|
1728
|
+
if fnre.match(filename):
|
1729
|
+
pathname = os.path.join(child_logdir, filename)
|
1730
|
+
try:
|
1731
|
+
os.remove(pathname)
|
1732
|
+
except OSError:
|
1733
|
+
log.warning('Failed to clean up %r', pathname)
|
1734
|
+
|
1735
|
+
def daemonize(self) -> None:
|
1736
|
+
self.poller.before_daemonize()
|
1737
|
+
self._daemonize()
|
1738
|
+
self.poller.after_daemonize()
|
1739
|
+
|
1740
|
+
def _daemonize(self) -> None:
|
1741
|
+
# To daemonize, we need to become the leader of our own session (process) group. If we do not, signals sent to
|
1742
|
+
# our parent process will also be sent to us. This might be bad because signals such as SIGINT can be sent to
|
1743
|
+
# our parent process during normal (uninteresting) operations such as when we press Ctrl-C in the parent
|
1744
|
+
# terminal window to escape from a logtail command. To disassociate ourselves from our parent's session group we
|
1745
|
+
# use os.setsid. It means "set session id", which has the effect of disassociating a process from is current
|
1746
|
+
# session and process group and setting itself up as a new session leader.
|
1747
|
+
#
|
1748
|
+
# Unfortunately we cannot call setsid if we're already a session group leader, so we use "fork" to make a copy
|
1749
|
+
# of ourselves that is guaranteed to not be a session group leader.
|
1750
|
+
#
|
1751
|
+
# We also change directories, set stderr and stdout to null, and change our umask.
|
1752
|
+
#
|
1753
|
+
# This explanation was (gratefully) garnered from
|
1754
|
+
# http://www.cems.uwe.ac.uk/~irjohnso/coursenotes/lrc/system/daemons/d3.htm
|
1755
|
+
|
1756
|
+
pid = os.fork()
|
1757
|
+
if pid != 0:
|
1758
|
+
# Parent
|
1759
|
+
log.debug('supervisord forked; parent exiting')
|
1760
|
+
real_exit(0)
|
1761
|
+
# Child
|
1762
|
+
log.info('daemonizing the supervisord process')
|
1763
|
+
if self.config.directory:
|
1764
|
+
try:
|
1765
|
+
os.chdir(self.config.directory)
|
1766
|
+
except OSError as err:
|
1767
|
+
log.critical("can't chdir into %r: %s", self.config.directory, err)
|
1768
|
+
else:
|
1769
|
+
log.info('set current directory: %r', self.config.directory)
|
1770
|
+
os.dup2(0, os.open('/dev/null', os.O_RDONLY))
|
1771
|
+
os.dup2(1, os.open('/dev/null', os.O_WRONLY))
|
1772
|
+
os.dup2(2, os.open('/dev/null', os.O_WRONLY))
|
1773
|
+
os.setsid()
|
1774
|
+
os.umask(self.config.umask)
|
1775
|
+
# XXX Stevens, in his Advanced Unix book, section 13.3 (page 417) recommends calling umask(0) and closing unused
|
1776
|
+
# file descriptors. In his Network Programming book, he additionally recommends ignoring SIGHUP and forking
|
1777
|
+
# again after the setsid() call, for obscure SVR4 reasons.
|
1778
|
+
|
1779
|
+
def get_auto_child_log_name(self, name: str, identifier: str, channel: str) -> str:
|
1780
|
+
prefix = f'{name}-{channel}---{identifier}-'
|
1781
|
+
logfile = mktempfile(
|
1782
|
+
suffix='.log',
|
1783
|
+
prefix=prefix,
|
1784
|
+
dir=self.config.child_logdir,
|
1785
|
+
)
|
1786
|
+
return logfile
|
1787
|
+
|
1788
|
+
def get_signal(self) -> ta.Optional[int]:
|
1789
|
+
return self.signal_receiver.get_signal()
|
1790
|
+
|
1791
|
+
def write_pidfile(self) -> None:
|
1792
|
+
pid = os.getpid()
|
1793
|
+
try:
|
1794
|
+
with open(self.config.pidfile, 'w') as f:
|
1795
|
+
f.write(f'{pid}\n')
|
1796
|
+
except OSError:
|
1797
|
+
log.critical('could not write pidfile %s', self.config.pidfile)
|
1798
|
+
else:
|
1799
|
+
self.unlink_pidfile = True
|
1800
|
+
log.info('supervisord started with pid %s', pid)
|
1801
|
+
|
1802
|
+
|
1803
|
+
def drop_privileges(user: ta.Union[int, str, None]) -> ta.Optional[str]:
|
1804
|
+
"""
|
1805
|
+
Drop privileges to become the specified user, which may be a username or uid. Called for supervisord startup
|
1806
|
+
and when spawning subprocesses. Returns None on success or a string error message if privileges could not be
|
1807
|
+
dropped.
|
1808
|
+
"""
|
1809
|
+
if user is None:
|
1810
|
+
return 'No user specified to setuid to!'
|
1811
|
+
|
1812
|
+
# get uid for user, which can be a number or username
|
1813
|
+
try:
|
1814
|
+
uid = int(user)
|
1815
|
+
except ValueError:
|
1816
|
+
try:
|
1817
|
+
pwrec = pwd.getpwnam(user) # type: ignore
|
1818
|
+
except KeyError:
|
1819
|
+
return f"Can't find username {user!r}"
|
1820
|
+
uid = pwrec[2]
|
1821
|
+
else:
|
1822
|
+
try:
|
1823
|
+
pwrec = pwd.getpwuid(uid)
|
1824
|
+
except KeyError:
|
1825
|
+
return f"Can't find uid {uid!r}"
|
1826
|
+
|
1827
|
+
current_uid = os.getuid()
|
1828
|
+
|
1829
|
+
if current_uid == uid:
|
1830
|
+
# do nothing and return successfully if the uid is already the current one. this allows a supervisord
|
1831
|
+
# running as an unprivileged user "foo" to start a process where the config has "user=foo" (same user) in
|
1832
|
+
# it.
|
1833
|
+
return None
|
1834
|
+
|
1835
|
+
if current_uid != 0:
|
1836
|
+
return "Can't drop privilege as nonroot user"
|
1837
|
+
|
1838
|
+
gid = pwrec[3]
|
1839
|
+
if hasattr(os, 'setgroups'):
|
1840
|
+
user = pwrec[0]
|
1841
|
+
groups = [grprec[2] for grprec in grp.getgrall() if user in grprec[3]]
|
1842
|
+
|
1843
|
+
# always put our primary gid first in this list, otherwise we can lose group info since sometimes the first
|
1844
|
+
# group in the setgroups list gets overwritten on the subsequent setgid call (at least on freebsd 9 with
|
1845
|
+
# python 2.7 - this will be safe though for all unix /python version combos)
|
1846
|
+
groups.insert(0, gid)
|
1847
|
+
try:
|
1848
|
+
os.setgroups(groups)
|
1849
|
+
except OSError:
|
1850
|
+
return 'Could not set groups of effective user'
|
1851
|
+
try:
|
1852
|
+
os.setgid(gid)
|
1853
|
+
except OSError:
|
1854
|
+
return 'Could not set group id of effective user'
|
1855
|
+
os.setuid(uid)
|
1856
|
+
return None
|
1857
|
+
|
1858
|
+
|
1859
|
+
def make_pipes(stderr=True) -> ta.Mapping[str, int]:
|
1860
|
+
"""
|
1861
|
+
Create pipes for parent to child stdin/stdout/stderr communications. Open fd in non-blocking mode so we can
|
1862
|
+
read them in the mainloop without blocking. If stderr is False, don't create a pipe for stderr.
|
1863
|
+
"""
|
1864
|
+
|
1865
|
+
pipes: ta.Dict[str, ta.Optional[int]] = {
|
1866
|
+
'child_stdin': None,
|
1867
|
+
'stdin': None,
|
1868
|
+
'stdout': None,
|
1869
|
+
'child_stdout': None,
|
1870
|
+
'stderr': None,
|
1871
|
+
'child_stderr': None,
|
1872
|
+
}
|
1873
|
+
try:
|
1874
|
+
stdin, child_stdin = os.pipe()
|
1875
|
+
pipes['child_stdin'], pipes['stdin'] = stdin, child_stdin
|
1876
|
+
stdout, child_stdout = os.pipe()
|
1877
|
+
pipes['stdout'], pipes['child_stdout'] = stdout, child_stdout
|
1878
|
+
if stderr:
|
1879
|
+
stderr, child_stderr = os.pipe()
|
1880
|
+
pipes['stderr'], pipes['child_stderr'] = stderr, child_stderr
|
1881
|
+
for fd in (pipes['stdout'], pipes['stderr'], pipes['stdin']):
|
1882
|
+
if fd is not None:
|
1883
|
+
flags = fcntl.fcntl(fd, fcntl.F_GETFL) | os.O_NDELAY
|
1884
|
+
fcntl.fcntl(fd, fcntl.F_SETFL, flags)
|
1885
|
+
return pipes # type: ignore
|
1886
|
+
except OSError:
|
1887
|
+
for fd in pipes.values():
|
1888
|
+
if fd is not None:
|
1889
|
+
close_fd(fd)
|
1890
|
+
raise
|
1891
|
+
|
1892
|
+
|
1893
|
+
def close_parent_pipes(pipes: ta.Mapping[str, int]) -> None:
|
1894
|
+
for fdname in ('stdin', 'stdout', 'stderr'):
|
1895
|
+
fd = pipes.get(fdname)
|
1896
|
+
if fd is not None:
|
1897
|
+
close_fd(fd)
|
1898
|
+
|
1899
|
+
|
1900
|
+
def close_child_pipes(pipes: ta.Mapping[str, int]) -> None:
|
1901
|
+
for fdname in ('child_stdin', 'child_stdout', 'child_stderr'):
|
1902
|
+
fd = pipes.get(fdname)
|
1903
|
+
if fd is not None:
|
1904
|
+
close_fd(fd)
|
1905
|
+
|
1906
|
+
|
1907
|
+
def check_execv_args(filename, argv, st) -> None:
|
1908
|
+
if st is None:
|
1909
|
+
raise NotFoundError(f"can't find command {filename!r}")
|
1910
|
+
|
1911
|
+
elif stat.S_ISDIR(st[stat.ST_MODE]):
|
1912
|
+
raise NotExecutableError(f'command at {filename!r} is a directory')
|
1913
|
+
|
1914
|
+
elif not (stat.S_IMODE(st[stat.ST_MODE]) & 0o111):
|
1915
|
+
raise NotExecutableError(f'command at {filename!r} is not executable')
|
1916
|
+
|
1917
|
+
elif not os.access(filename, os.X_OK):
|
1918
|
+
raise NoPermissionError(f'no permission to run command {filename!r}')
|
1919
|
+
|
1920
|
+
|
1921
|
+
########################################
|
1922
|
+
# ../dispatchers.py
|
1923
|
+
|
1924
|
+
|
1925
|
+
log = logging.getLogger(__name__)
|
1926
|
+
|
1927
|
+
|
1928
|
+
class Dispatcher(abc.ABC):
|
1929
|
+
|
1930
|
+
def __init__(self, process: AbstractSubprocess, channel: str, fd: int) -> None:
|
1931
|
+
super().__init__()
|
1932
|
+
|
1933
|
+
self._process = process # process which "owns" this dispatcher
|
1934
|
+
self._channel = channel # 'stderr' or 'stdout'
|
1935
|
+
self._fd = fd
|
1936
|
+
self._closed = False # True if close() has been called
|
1937
|
+
|
1938
|
+
def __repr__(self) -> str:
|
1939
|
+
return f'<{self.__class__.__name__} at {id(self)} for {self._process} ({self._channel})>'
|
1940
|
+
|
1941
|
+
@property
|
1942
|
+
def process(self) -> AbstractSubprocess:
|
1943
|
+
return self._process
|
1944
|
+
|
1945
|
+
@property
|
1946
|
+
def channel(self) -> str:
|
1947
|
+
return self._channel
|
1948
|
+
|
1949
|
+
@property
|
1950
|
+
def fd(self) -> int:
|
1951
|
+
return self._fd
|
1952
|
+
|
1953
|
+
@property
|
1954
|
+
def closed(self) -> bool:
|
1955
|
+
return self._closed
|
1956
|
+
|
1957
|
+
@abc.abstractmethod
|
1958
|
+
def readable(self) -> bool:
|
1959
|
+
raise NotImplementedError
|
1960
|
+
|
1961
|
+
@abc.abstractmethod
|
1962
|
+
def writable(self) -> bool:
|
1963
|
+
raise NotImplementedError
|
1964
|
+
|
1965
|
+
def handle_read_event(self) -> None:
|
1966
|
+
raise TypeError
|
1967
|
+
|
1968
|
+
def handle_write_event(self) -> None:
|
1969
|
+
raise TypeError
|
1970
|
+
|
1971
|
+
def handle_error(self) -> None:
|
1972
|
+
nil, t, v, tbinfo = compact_traceback()
|
1973
|
+
|
1974
|
+
log.critical('uncaptured python exception, closing channel %s (%s:%s %s)', repr(self), t, v, tbinfo)
|
1975
|
+
self.close()
|
1976
|
+
|
1977
|
+
def close(self) -> None:
|
1978
|
+
if not self._closed:
|
1979
|
+
log.debug('fd %s closed, stopped monitoring %s', self._fd, self)
|
1980
|
+
self._closed = True
|
1981
|
+
|
1982
|
+
def flush(self) -> None: # noqa
|
1983
|
+
pass
|
1984
|
+
|
1985
|
+
|
1986
|
+
class OutputDispatcher(Dispatcher):
|
1987
|
+
"""
|
1988
|
+
Dispatcher for one channel (stdout or stderr) of one process. Serves several purposes:
|
1989
|
+
|
1990
|
+
- capture output sent within <!--XSUPERVISOR:BEGIN--> and <!--XSUPERVISOR:END--> tags and signal a
|
1991
|
+
ProcessCommunicationEvent by calling notify_event(event).
|
1992
|
+
- route the output to the appropriate log handlers as specified in the config.
|
1993
|
+
"""
|
1994
|
+
|
1995
|
+
child_log = None # the current logger (normal_log or capture_log)
|
1996
|
+
normal_log = None # the "normal" (non-capture) logger
|
1997
|
+
capture_log = None # the logger used while we're in capture_mode
|
1998
|
+
capture_mode = False # are we capturing process event data
|
1999
|
+
output_buffer = b'' # data waiting to be logged
|
2000
|
+
|
2001
|
+
def __init__(self, process: AbstractSubprocess, event_type, fd):
|
2002
|
+
"""
|
2003
|
+
Initialize the dispatcher.
|
2004
|
+
|
2005
|
+
`event_type` should be one of ProcessLogStdoutEvent or ProcessLogStderrEvent
|
2006
|
+
"""
|
2007
|
+
super().__init__(process, event_type.channel, fd)
|
2008
|
+
self.event_type = event_type
|
2009
|
+
|
2010
|
+
self.lc: ProcessConfig.Log = getattr(process.config, self._channel)
|
2011
|
+
|
2012
|
+
self._init_normal_log()
|
2013
|
+
self._init_capture_log()
|
2014
|
+
|
2015
|
+
self.child_log = self.normal_log
|
2016
|
+
|
2017
|
+
# all code below is purely for minor speedups
|
2018
|
+
begin_token = self.event_type.BEGIN_TOKEN
|
2019
|
+
end_token = self.event_type.END_TOKEN
|
2020
|
+
self.begin_token_data = (begin_token, len(begin_token))
|
2021
|
+
self.end_token_data = (end_token, len(end_token))
|
2022
|
+
self.main_log_level = logging.DEBUG
|
2023
|
+
config = self._process.config
|
2024
|
+
self.log_to_main_log = process.context.config.loglevel <= self.main_log_level
|
2025
|
+
self.stdout_events_enabled = config.stdout.events_enabled
|
2026
|
+
self.stderr_events_enabled = config.stderr.events_enabled
|
2027
|
+
|
2028
|
+
def _init_normal_log(self) -> None:
|
2029
|
+
"""
|
2030
|
+
Configure the "normal" (non-capture) log for this channel of this process. Sets self.normal_log if logging is
|
2031
|
+
enabled.
|
2032
|
+
"""
|
2033
|
+
config = self._process.config # noqa
|
2034
|
+
channel = self._channel # noqa
|
2035
|
+
|
2036
|
+
logfile = self.lc.file
|
2037
|
+
maxbytes = self.lc.maxbytes # noqa
|
2038
|
+
backups = self.lc.backups # noqa
|
2039
|
+
to_syslog = self.lc.syslog
|
2040
|
+
|
2041
|
+
if logfile or to_syslog:
|
2042
|
+
self.normal_log = logging.getLogger(__name__)
|
2043
|
+
|
2044
|
+
# if logfile:
|
2045
|
+
# loggers.handle_file(
|
2046
|
+
# self.normal_log,
|
2047
|
+
# filename=logfile,
|
2048
|
+
# fmt='%(message)s',
|
2049
|
+
# rotating=bool(maxbytes), # optimization
|
2050
|
+
# maxbytes=maxbytes,
|
2051
|
+
# backups=backups,
|
2052
|
+
# )
|
2053
|
+
#
|
2054
|
+
# if to_syslog:
|
2055
|
+
# loggers.handle_syslog(
|
2056
|
+
# self.normal_log,
|
2057
|
+
# fmt=config.name + ' %(message)s',
|
2058
|
+
# )
|
2059
|
+
|
2060
|
+
def _init_capture_log(self):
|
2061
|
+
"""
|
2062
|
+
Configure the capture log for this process. This log is used to temporarily capture output when special output
|
2063
|
+
is detected. Sets self.capture_log if capturing is enabled.
|
2064
|
+
"""
|
2065
|
+
capture_maxbytes = self.lc.capture_maxbytes
|
2066
|
+
if capture_maxbytes:
|
2067
|
+
self.capture_log = logging.getLogger(__name__)
|
2068
|
+
# loggers.handle_boundIO(
|
2069
|
+
# self.capture_log,
|
2070
|
+
# fmt='%(message)s',
|
2071
|
+
# maxbytes=capture_maxbytes,
|
2072
|
+
# )
|
2073
|
+
|
2074
|
+
def remove_logs(self):
|
2075
|
+
for log in (self.normal_log, self.capture_log):
|
2076
|
+
if log is not None:
|
2077
|
+
for handler in log.handlers:
|
2078
|
+
handler.remove() # type: ignore
|
2079
|
+
handler.reopen() # type: ignore
|
2080
|
+
|
2081
|
+
def reopen_logs(self):
|
2082
|
+
for log in (self.normal_log, self.capture_log):
|
2083
|
+
if log is not None:
|
2084
|
+
for handler in log.handlers:
|
2085
|
+
handler.reopen() # type: ignore
|
2086
|
+
|
2087
|
+
def _log(self, data):
|
2088
|
+
if data:
|
2089
|
+
if self._process.context.config.strip_ansi:
|
2090
|
+
data = strip_escapes(data)
|
2091
|
+
if self.child_log:
|
2092
|
+
self.child_log.info(data) # type: ignore
|
2093
|
+
if self.log_to_main_log:
|
2094
|
+
if not isinstance(data, bytes):
|
2095
|
+
text = data
|
2096
|
+
else:
|
2097
|
+
try:
|
2098
|
+
text = data.decode('utf-8')
|
2099
|
+
except UnicodeDecodeError:
|
2100
|
+
text = f'Undecodable: {data!r}'
|
2101
|
+
log.log(self.main_log_level, '%r %s output:\n%s', self._process.config.name, self._channel, text) # noqa
|
2102
|
+
if self._channel == 'stdout':
|
2103
|
+
if self.stdout_events_enabled:
|
2104
|
+
notify_event(ProcessLogStdoutEvent(self._process, self._process.pid, data))
|
2105
|
+
elif self.stderr_events_enabled:
|
2106
|
+
notify_event(ProcessLogStderrEvent(self._process, self._process.pid, data))
|
2107
|
+
|
2108
|
+
def record_output(self):
|
2109
|
+
if self.capture_log is None:
|
2110
|
+
# shortcut trying to find capture data
|
2111
|
+
data = self.output_buffer
|
2112
|
+
self.output_buffer = b''
|
2113
|
+
self._log(data)
|
2114
|
+
return
|
2115
|
+
|
2116
|
+
if self.capture_mode:
|
2117
|
+
token, tokenlen = self.end_token_data
|
2118
|
+
else:
|
2119
|
+
token, tokenlen = self.begin_token_data
|
2120
|
+
|
2121
|
+
if len(self.output_buffer) <= tokenlen:
|
2122
|
+
return # not enough data
|
2123
|
+
|
2124
|
+
data = self.output_buffer
|
2125
|
+
self.output_buffer = b''
|
2126
|
+
|
2127
|
+
try:
|
2128
|
+
before, after = data.split(token, 1)
|
2129
|
+
except ValueError:
|
2130
|
+
after = None
|
2131
|
+
index = find_prefix_at_end(data, token)
|
2132
|
+
if index:
|
2133
|
+
self.output_buffer = self.output_buffer + data[-index:]
|
2134
|
+
data = data[:-index]
|
2135
|
+
self._log(data)
|
2136
|
+
else:
|
2137
|
+
self._log(before)
|
2138
|
+
self.toggle_capture_mode()
|
2139
|
+
self.output_buffer = after # type: ignore
|
2140
|
+
|
2141
|
+
if after:
|
2142
|
+
self.record_output()
|
2143
|
+
|
2144
|
+
def toggle_capture_mode(self):
|
2145
|
+
self.capture_mode = not self.capture_mode
|
2146
|
+
|
2147
|
+
if self.capture_log is not None:
|
2148
|
+
if self.capture_mode:
|
2149
|
+
self.child_log = self.capture_log
|
2150
|
+
else:
|
2151
|
+
for handler in self.capture_log.handlers:
|
2152
|
+
handler.flush()
|
2153
|
+
data = self.capture_log.getvalue() # type: ignore
|
2154
|
+
channel = self._channel
|
2155
|
+
procname = self._process.config.name
|
2156
|
+
event = self.event_type(self._process, self._process.pid, data)
|
2157
|
+
notify_event(event)
|
2158
|
+
|
2159
|
+
log.debug('%r %s emitted a comm event', procname, channel)
|
2160
|
+
for handler in self.capture_log.handlers:
|
2161
|
+
handler.remove() # type: ignore
|
2162
|
+
handler.reopen() # type: ignore
|
2163
|
+
self.child_log = self.normal_log
|
2164
|
+
|
2165
|
+
def writable(self) -> bool:
|
2166
|
+
return False
|
2167
|
+
|
2168
|
+
def readable(self) -> bool:
|
2169
|
+
if self._closed:
|
2170
|
+
return False
|
2171
|
+
return True
|
2172
|
+
|
2173
|
+
def handle_read_event(self) -> None:
|
2174
|
+
data = readfd(self._fd)
|
2175
|
+
self.output_buffer += data
|
2176
|
+
self.record_output()
|
2177
|
+
if not data:
|
2178
|
+
# if we get no data back from the pipe, it means that the child process has ended. See
|
2179
|
+
# mail.python.org/pipermail/python-dev/2004-August/046850.html
|
2180
|
+
self.close()
|
2181
|
+
|
2182
|
+
|
2183
|
+
class InputDispatcher(Dispatcher):
|
2184
|
+
|
2185
|
+
def __init__(self, process: AbstractSubprocess, channel: str, fd: int) -> None:
|
2186
|
+
super().__init__(process, channel, fd)
|
2187
|
+
self._input_buffer = b''
|
2188
|
+
|
2189
|
+
def writable(self) -> bool:
|
2190
|
+
if self._input_buffer and not self._closed:
|
2191
|
+
return True
|
2192
|
+
return False
|
2193
|
+
|
2194
|
+
def readable(self) -> bool:
|
2195
|
+
return False
|
2196
|
+
|
2197
|
+
def flush(self) -> None:
|
2198
|
+
# other code depends on this raising EPIPE if the pipe is closed
|
2199
|
+
sent = os.write(self._fd, as_bytes(self._input_buffer))
|
2200
|
+
self._input_buffer = self._input_buffer[sent:]
|
2201
|
+
|
2202
|
+
def handle_write_event(self) -> None:
|
2203
|
+
if self._input_buffer:
|
2204
|
+
try:
|
2205
|
+
self.flush()
|
2206
|
+
except OSError as why:
|
2207
|
+
if why.args[0] == errno.EPIPE:
|
2208
|
+
self._input_buffer = b''
|
2209
|
+
self.close()
|
2210
|
+
else:
|
2211
|
+
raise
|
2212
|
+
|
2213
|
+
|
2214
|
+
########################################
|
2215
|
+
# ../process.py
|
2216
|
+
|
2217
|
+
|
2218
|
+
log = logging.getLogger(__name__)
|
2219
|
+
|
2220
|
+
|
2221
|
+
@functools.total_ordering
|
2222
|
+
class Subprocess(AbstractSubprocess):
|
2223
|
+
"""A class to manage a subprocess."""
|
2224
|
+
|
2225
|
+
# Initial state; overridden by instance variables
|
2226
|
+
|
2227
|
+
# pid = 0 # Subprocess pid; 0 when not running
|
2228
|
+
# config = None # ProcessConfig instance
|
2229
|
+
# state = None # process state code
|
2230
|
+
listener_state = None # listener state code (if we're an event listener)
|
2231
|
+
event = None # event currently being processed (if we're an event listener)
|
2232
|
+
laststart = 0. # Last time the subprocess was started; 0 if never
|
2233
|
+
laststop = 0. # Last time the subprocess was stopped; 0 if never
|
2234
|
+
last_stop_report = 0. # Last time "waiting for x to stop" logged, to throttle
|
2235
|
+
delay = 0. # If nonzero, delay starting or killing until this time
|
2236
|
+
administrative_stop = False # true if process has been stopped by an admin
|
2237
|
+
system_stop = False # true if process has been stopped by the system
|
2238
|
+
killing = False # true if we are trying to kill this process
|
2239
|
+
backoff = 0 # backoff counter (to startretries)
|
2240
|
+
dispatchers = None # asyncore output dispatchers (keyed by fd)
|
2241
|
+
pipes = None # map of channel name to file descriptor #
|
2242
|
+
exitstatus = None # status attached to dead process by finish()
|
2243
|
+
spawn_err = None # error message attached by spawn() if any
|
2244
|
+
group = None # ProcessGroup instance if process is in the group
|
2245
|
+
|
2246
|
+
def __init__(self, config: ProcessConfig, group: 'ProcessGroup', context: AbstractServerContext) -> None:
|
2247
|
+
super().__init__()
|
2248
|
+
self._config = config
|
2249
|
+
self.group = group
|
2250
|
+
self._context = context
|
2251
|
+
self._dispatchers: dict = {}
|
2252
|
+
self._pipes: dict = {}
|
2253
|
+
self.state = ProcessStates.STOPPED
|
2254
|
+
self._pid = 0
|
2255
|
+
|
2256
|
+
@property
|
2257
|
+
def pid(self) -> int:
|
2258
|
+
return self._pid
|
2259
|
+
|
2260
|
+
@property
|
2261
|
+
def config(self) -> ProcessConfig:
|
2262
|
+
return self._config
|
2263
|
+
|
2264
|
+
@property
|
2265
|
+
def context(self) -> AbstractServerContext:
|
2266
|
+
return self._context
|
2267
|
+
|
2268
|
+
def remove_logs(self) -> None:
|
2269
|
+
for dispatcher in self._dispatchers.values():
|
2270
|
+
if hasattr(dispatcher, 'remove_logs'):
|
2271
|
+
dispatcher.remove_logs()
|
2272
|
+
|
2273
|
+
def reopen_logs(self) -> None:
|
2274
|
+
for dispatcher in self._dispatchers.values():
|
2275
|
+
if hasattr(dispatcher, 'reopen_logs'):
|
2276
|
+
dispatcher.reopen_logs()
|
2277
|
+
|
2278
|
+
def drain(self) -> None:
|
2279
|
+
for dispatcher in self._dispatchers.values():
|
2280
|
+
# note that we *must* call readable() for every dispatcher, as it may have side effects for a given
|
2281
|
+
# dispatcher (eg. call handle_listener_state_change for event listener processes)
|
2282
|
+
if dispatcher.readable():
|
2283
|
+
dispatcher.handle_read_event()
|
2284
|
+
if dispatcher.writable():
|
2285
|
+
dispatcher.handle_write_event()
|
2286
|
+
|
2287
|
+
def write(self, chars: ta.Union[bytes, str]) -> None:
|
2288
|
+
if not self.pid or self.killing:
|
2289
|
+
raise OSError(errno.EPIPE, 'Process already closed')
|
2290
|
+
|
2291
|
+
stdin_fd = self._pipes['stdin']
|
2292
|
+
if stdin_fd is None:
|
2293
|
+
raise OSError(errno.EPIPE, 'Process has no stdin channel')
|
2294
|
+
|
2295
|
+
dispatcher = self._dispatchers[stdin_fd]
|
2296
|
+
if dispatcher.closed:
|
2297
|
+
raise OSError(errno.EPIPE, "Process' stdin channel is closed")
|
2298
|
+
|
2299
|
+
dispatcher.input_buffer += chars
|
2300
|
+
dispatcher.flush() # this must raise EPIPE if the pipe is closed
|
2301
|
+
|
2302
|
+
def _get_execv_args(self) -> ta.Tuple[str, ta.Sequence[str]]:
|
2303
|
+
"""
|
2304
|
+
Internal: turn a program name into a file name, using $PATH, make sure it exists / is executable, raising a
|
2305
|
+
ProcessError if not
|
2306
|
+
"""
|
2307
|
+
try:
|
2308
|
+
commandargs = shlex.split(self.config.command)
|
2309
|
+
except ValueError as e:
|
2310
|
+
raise BadCommandError(f"can't parse command {self.config.command!r}: {e}") # noqa
|
2311
|
+
|
2312
|
+
if commandargs:
|
2313
|
+
program = commandargs[0]
|
2314
|
+
else:
|
2315
|
+
raise BadCommandError('command is empty')
|
2316
|
+
|
2317
|
+
if '/' in program:
|
2318
|
+
filename = program
|
2319
|
+
try:
|
2320
|
+
st = os.stat(filename)
|
2321
|
+
except OSError:
|
2322
|
+
st = None
|
2323
|
+
|
2324
|
+
else:
|
2325
|
+
path = get_path()
|
2326
|
+
found = None
|
2327
|
+
st = None
|
2328
|
+
for dir in path: # noqa
|
2329
|
+
found = os.path.join(dir, program)
|
2330
|
+
try:
|
2331
|
+
st = os.stat(found)
|
2332
|
+
except OSError:
|
2333
|
+
pass
|
2334
|
+
else:
|
2335
|
+
break
|
2336
|
+
if st is None:
|
2337
|
+
filename = program
|
2338
|
+
else:
|
2339
|
+
filename = found # type: ignore
|
2340
|
+
|
2341
|
+
# check_execv_args will raise a ProcessError if the execv args are bogus, we break it out into a separate
|
2342
|
+
# options method call here only to service unit tests
|
2343
|
+
check_execv_args(filename, commandargs, st)
|
2344
|
+
|
2345
|
+
return filename, commandargs
|
2346
|
+
|
2347
|
+
event_map: ta.ClassVar[ta.Mapping[int, ta.Type[ProcessStateEvent]]] = {
|
2348
|
+
ProcessStates.BACKOFF: ProcessStateBackoffEvent,
|
2349
|
+
ProcessStates.FATAL: ProcessStateFatalEvent,
|
2350
|
+
ProcessStates.UNKNOWN: ProcessStateUnknownEvent,
|
2351
|
+
ProcessStates.STOPPED: ProcessStateStoppedEvent,
|
2352
|
+
ProcessStates.EXITED: ProcessStateExitedEvent,
|
2353
|
+
ProcessStates.RUNNING: ProcessStateRunningEvent,
|
2354
|
+
ProcessStates.STARTING: ProcessStateStartingEvent,
|
2355
|
+
ProcessStates.STOPPING: ProcessStateStoppingEvent,
|
2356
|
+
}
|
2357
|
+
|
2358
|
+
def change_state(self, new_state: ProcessState, expected: bool = True) -> bool:
|
2359
|
+
old_state = self.state
|
2360
|
+
if new_state is old_state:
|
2361
|
+
return False
|
2362
|
+
|
2363
|
+
self.state = new_state
|
2364
|
+
if new_state == ProcessStates.BACKOFF:
|
2365
|
+
now = time.time()
|
2366
|
+
self.backoff += 1
|
2367
|
+
self.delay = now + self.backoff
|
2368
|
+
|
2369
|
+
event_class = self.event_map.get(new_state)
|
2370
|
+
if event_class is not None:
|
2371
|
+
event = event_class(self, old_state, expected)
|
2372
|
+
notify_event(event)
|
2373
|
+
|
2374
|
+
return True
|
2375
|
+
|
2376
|
+
def _check_in_state(self, *states: ProcessState) -> None:
|
2377
|
+
if self.state not in states:
|
2378
|
+
current_state = get_process_state_description(self.state)
|
2379
|
+
allowable_states = ' '.join(map(get_process_state_description, states))
|
2380
|
+
processname = as_string(self.config.name)
|
2381
|
+
raise AssertionError('Assertion failed for %s: %s not in %s' % (processname, current_state, allowable_states)) # noqa
|
2382
|
+
|
2383
|
+
def _record_spawn_err(self, msg: str) -> None:
|
2384
|
+
self.spawn_err = msg
|
2385
|
+
log.info('spawn_err: %s', msg)
|
2386
|
+
|
2387
|
+
def spawn(self) -> ta.Optional[int]:
|
2388
|
+
processname = as_string(self.config.name)
|
2389
|
+
|
2390
|
+
if self.pid:
|
2391
|
+
log.warning('process \'%s\' already running', processname)
|
2392
|
+
return None
|
2393
|
+
|
2394
|
+
self.killing = False
|
2395
|
+
self.spawn_err = None
|
2396
|
+
self.exitstatus = None
|
2397
|
+
self.system_stop = False
|
2398
|
+
self.administrative_stop = False
|
2399
|
+
|
2400
|
+
self.laststart = time.time()
|
2401
|
+
|
2402
|
+
self._check_in_state(
|
2403
|
+
ProcessStates.EXITED,
|
2404
|
+
ProcessStates.FATAL,
|
2405
|
+
ProcessStates.BACKOFF,
|
2406
|
+
ProcessStates.STOPPED,
|
2407
|
+
)
|
2408
|
+
|
2409
|
+
self.change_state(ProcessStates.STARTING)
|
2410
|
+
|
2411
|
+
try:
|
2412
|
+
filename, argv = self._get_execv_args()
|
2413
|
+
except ProcessError as what:
|
2414
|
+
self._record_spawn_err(what.args[0])
|
2415
|
+
self._check_in_state(ProcessStates.STARTING)
|
2416
|
+
self.change_state(ProcessStates.BACKOFF)
|
2417
|
+
return None
|
2418
|
+
|
2419
|
+
try:
|
2420
|
+
self._dispatchers, self._pipes = self._make_dispatchers() # type: ignore
|
2421
|
+
except OSError as why:
|
2422
|
+
code = why.args[0]
|
2423
|
+
if code == errno.EMFILE:
|
2424
|
+
# too many file descriptors open
|
2425
|
+
msg = f"too many open files to spawn '{processname}'"
|
2426
|
+
else:
|
2427
|
+
msg = f"unknown error making dispatchers for '{processname}': {errno.errorcode.get(code, code)}"
|
2428
|
+
self._record_spawn_err(msg)
|
2429
|
+
self._check_in_state(ProcessStates.STARTING)
|
2430
|
+
self.change_state(ProcessStates.BACKOFF)
|
2431
|
+
return None
|
2432
|
+
|
2433
|
+
try:
|
2434
|
+
pid = os.fork()
|
2435
|
+
except OSError as why:
|
2436
|
+
code = why.args[0]
|
2437
|
+
if code == errno.EAGAIN:
|
2438
|
+
# process table full
|
2439
|
+
msg = f'Too many processes in process table to spawn \'{processname}\''
|
2440
|
+
else:
|
2441
|
+
msg = f'unknown error during fork for \'{processname}\': {errno.errorcode.get(code, code)}'
|
2442
|
+
self._record_spawn_err(msg)
|
2443
|
+
self._check_in_state(ProcessStates.STARTING)
|
2444
|
+
self.change_state(ProcessStates.BACKOFF)
|
2445
|
+
close_parent_pipes(self._pipes)
|
2446
|
+
close_child_pipes(self._pipes)
|
2447
|
+
return None
|
2448
|
+
|
2449
|
+
if pid != 0:
|
2450
|
+
return self._spawn_as_parent(pid)
|
2451
|
+
|
2452
|
+
else:
|
2453
|
+
self._spawn_as_child(filename, argv)
|
2454
|
+
return None
|
2455
|
+
|
2456
|
+
def _make_dispatchers(self) -> ta.Tuple[ta.Mapping[int, Dispatcher], ta.Mapping[str, int]]:
|
2457
|
+
use_stderr = not self.config.redirect_stderr
|
2458
|
+
p = make_pipes(use_stderr)
|
2459
|
+
stdout_fd, stderr_fd, stdin_fd = p['stdout'], p['stderr'], p['stdin']
|
2460
|
+
dispatchers: ta.Dict[int, Dispatcher] = {}
|
2461
|
+
etype: ta.Type[ProcessCommunicationEvent]
|
2462
|
+
if stdout_fd is not None:
|
2463
|
+
etype = ProcessCommunicationStdoutEvent
|
2464
|
+
dispatchers[stdout_fd] = OutputDispatcher(self, etype, stdout_fd)
|
2465
|
+
if stderr_fd is not None:
|
2466
|
+
etype = ProcessCommunicationStderrEvent
|
2467
|
+
dispatchers[stderr_fd] = OutputDispatcher(self, etype, stderr_fd)
|
2468
|
+
if stdin_fd is not None:
|
2469
|
+
dispatchers[stdin_fd] = InputDispatcher(self, 'stdin', stdin_fd)
|
2470
|
+
return dispatchers, p
|
2471
|
+
|
2472
|
+
def _spawn_as_parent(self, pid: int) -> int:
|
2473
|
+
# Parent
|
2474
|
+
self._pid = pid
|
2475
|
+
close_child_pipes(self._pipes)
|
2476
|
+
log.info('spawned: \'%s\' with pid %s', as_string(self.config.name), pid)
|
2477
|
+
self.spawn_err = None
|
2478
|
+
self.delay = time.time() + self.config.startsecs
|
2479
|
+
self.context.pid_history[pid] = self
|
2480
|
+
return pid
|
2481
|
+
|
2482
|
+
def _prepare_child_fds(self) -> None:
|
2483
|
+
os.dup2(self._pipes['child_stdin'], 0)
|
2484
|
+
os.dup2(self._pipes['child_stdout'], 1)
|
2485
|
+
if self.config.redirect_stderr:
|
2486
|
+
os.dup2(self._pipes['child_stdout'], 2)
|
2487
|
+
else:
|
2488
|
+
os.dup2(self._pipes['child_stderr'], 2)
|
2489
|
+
for i in range(3, self.context.config.minfds):
|
2490
|
+
close_fd(i)
|
2491
|
+
|
2492
|
+
def _spawn_as_child(self, filename: str, argv: ta.Sequence[str]) -> None:
|
2493
|
+
try:
|
2494
|
+
# prevent child from receiving signals sent to the parent by calling os.setpgrp to create a new process
|
2495
|
+
# group for the child; this prevents, for instance, the case of child processes being sent a SIGINT when
|
2496
|
+
# running supervisor in foreground mode and Ctrl-C in the terminal window running supervisord is pressed.
|
2497
|
+
# Presumably it also prevents HUP, etc received by supervisord from being sent to children.
|
2498
|
+
os.setpgrp()
|
2499
|
+
|
2500
|
+
self._prepare_child_fds()
|
2501
|
+
# sending to fd 2 will put this output in the stderr log
|
2502
|
+
|
2503
|
+
# set user
|
2504
|
+
setuid_msg = self.set_uid()
|
2505
|
+
if setuid_msg:
|
2506
|
+
uid = self.config.uid
|
2507
|
+
msg = f"couldn't setuid to {uid}: {setuid_msg}\n"
|
2508
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
2509
|
+
return # finally clause will exit the child process
|
2510
|
+
|
2511
|
+
# set environment
|
2512
|
+
env = os.environ.copy()
|
2513
|
+
env['SUPERVISOR_ENABLED'] = '1'
|
2514
|
+
env['SUPERVISOR_PROCESS_NAME'] = self.config.name
|
2515
|
+
if self.group:
|
2516
|
+
env['SUPERVISOR_GROUP_NAME'] = self.group.config.name
|
2517
|
+
if self.config.environment is not None:
|
2518
|
+
env.update(self.config.environment)
|
2519
|
+
|
2520
|
+
# change directory
|
2521
|
+
cwd = self.config.directory
|
2522
|
+
try:
|
2523
|
+
if cwd is not None:
|
2524
|
+
os.chdir(cwd)
|
2525
|
+
except OSError as why:
|
2526
|
+
code = errno.errorcode.get(why.args[0], why.args[0])
|
2527
|
+
msg = f"couldn't chdir to {cwd}: {code}\n"
|
2528
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
2529
|
+
return # finally clause will exit the child process
|
2530
|
+
|
2531
|
+
# set umask, then execve
|
2532
|
+
try:
|
2533
|
+
if self.config.umask is not None:
|
2534
|
+
os.umask(self.config.umask)
|
2535
|
+
os.execve(filename, list(argv), env)
|
2536
|
+
except OSError as why:
|
2537
|
+
code = errno.errorcode.get(why.args[0], why.args[0])
|
2538
|
+
msg = f"couldn't exec {argv[0]}: {code}\n"
|
2539
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
2540
|
+
except Exception: # noqa
|
2541
|
+
(file, fun, line), t, v, tbinfo = compact_traceback()
|
2542
|
+
error = f'{t}, {v}: file: {file} line: {line}'
|
2543
|
+
msg = f"couldn't exec {filename}: {error}\n"
|
2544
|
+
os.write(2, as_bytes('supervisor: ' + msg))
|
2545
|
+
|
2546
|
+
# this point should only be reached if execve failed. the finally clause will exit the child process.
|
2547
|
+
|
2548
|
+
finally:
|
2549
|
+
os.write(2, as_bytes('supervisor: child process was not spawned\n'))
|
2550
|
+
real_exit(127) # exit process with code for spawn failure
|
2551
|
+
|
2552
|
+
def _check_and_adjust_for_system_clock_rollback(self, test_time):
|
2553
|
+
"""
|
2554
|
+
Check if system clock has rolled backward beyond test_time. If so, set affected timestamps to test_time.
|
2555
|
+
"""
|
2556
|
+
if self.state == ProcessStates.STARTING:
|
2557
|
+
self.laststart = min(test_time, self.laststart)
|
2558
|
+
if self.delay > 0 and test_time < (self.delay - self.config.startsecs):
|
2559
|
+
self.delay = test_time + self.config.startsecs
|
2560
|
+
|
2561
|
+
elif self.state == ProcessStates.RUNNING:
|
2562
|
+
if test_time > self.laststart and test_time < (self.laststart + self.config.startsecs):
|
2563
|
+
self.laststart = test_time - self.config.startsecs
|
2564
|
+
|
2565
|
+
elif self.state == ProcessStates.STOPPING:
|
2566
|
+
self.last_stop_report = min(test_time, self.last_stop_report)
|
2567
|
+
if self.delay > 0 and test_time < (self.delay - self.config.stopwaitsecs):
|
2568
|
+
self.delay = test_time + self.config.stopwaitsecs
|
2569
|
+
|
2570
|
+
elif self.state == ProcessStates.BACKOFF:
|
2571
|
+
if self.delay > 0 and test_time < (self.delay - self.backoff):
|
2572
|
+
self.delay = test_time + self.backoff
|
2573
|
+
|
2574
|
+
def stop(self) -> ta.Optional[str]:
|
2575
|
+
self.administrative_stop = True
|
2576
|
+
self.last_stop_report = 0
|
2577
|
+
return self.kill(self.config.stopsignal)
|
2578
|
+
|
2579
|
+
def stop_report(self) -> None:
|
2580
|
+
""" Log a 'waiting for x to stop' message with throttling. """
|
2581
|
+
if self.state == ProcessStates.STOPPING:
|
2582
|
+
now = time.time()
|
2583
|
+
|
2584
|
+
self._check_and_adjust_for_system_clock_rollback(now)
|
2585
|
+
|
2586
|
+
if now > (self.last_stop_report + 2): # every 2 seconds
|
2587
|
+
log.info('waiting for %s to stop', as_string(self.config.name))
|
2588
|
+
self.last_stop_report = now
|
2589
|
+
|
2590
|
+
def give_up(self) -> None:
|
2591
|
+
self.delay = 0
|
2592
|
+
self.backoff = 0
|
2593
|
+
self.system_stop = True
|
2594
|
+
self._check_in_state(ProcessStates.BACKOFF)
|
2595
|
+
self.change_state(ProcessStates.FATAL)
|
2596
|
+
|
2597
|
+
def kill(self, sig: int) -> ta.Optional[str]:
|
2598
|
+
"""
|
2599
|
+
Send a signal to the subprocess with the intention to kill it (to make it exit). This may or may not actually
|
2600
|
+
kill it.
|
2601
|
+
|
2602
|
+
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
2603
|
+
running.
|
2604
|
+
"""
|
2605
|
+
now = time.time()
|
2606
|
+
|
2607
|
+
processname = as_string(self.config.name)
|
2608
|
+
# If the process is in BACKOFF and we want to stop or kill it, then BACKOFF -> STOPPED. This is needed because
|
2609
|
+
# if startretries is a large number and the process isn't starting successfully, the stop request would be
|
2610
|
+
# blocked for a long time waiting for the retries.
|
2611
|
+
if self.state == ProcessStates.BACKOFF:
|
2612
|
+
log.debug('Attempted to kill %s, which is in BACKOFF state.', processname)
|
2613
|
+
self.change_state(ProcessStates.STOPPED)
|
2614
|
+
return None
|
2615
|
+
|
2616
|
+
args: tuple
|
2617
|
+
if not self.pid:
|
2618
|
+
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (processname, signame(sig))
|
2619
|
+
log.debug(fmt, *args)
|
2620
|
+
return fmt % args
|
2621
|
+
|
2622
|
+
# If we're in the stopping state, then we've already sent the stop signal and this is the kill signal
|
2623
|
+
if self.state == ProcessStates.STOPPING:
|
2624
|
+
killasgroup = self.config.killasgroup
|
2625
|
+
else:
|
2626
|
+
killasgroup = self.config.stopasgroup
|
2627
|
+
|
2628
|
+
as_group = ''
|
2629
|
+
if killasgroup:
|
2630
|
+
as_group = 'process group '
|
2631
|
+
|
2632
|
+
log.debug('killing %s (pid %s) %swith signal %s', processname, self.pid, as_group, signame(sig))
|
2633
|
+
|
2634
|
+
# RUNNING/STARTING/STOPPING -> STOPPING
|
2635
|
+
self.killing = True
|
2636
|
+
self.delay = now + self.config.stopwaitsecs
|
2637
|
+
# we will already be in the STOPPING state if we're doing a SIGKILL as a result of overrunning stopwaitsecs
|
2638
|
+
self._check_in_state(ProcessStates.RUNNING, ProcessStates.STARTING, ProcessStates.STOPPING)
|
2639
|
+
self.change_state(ProcessStates.STOPPING)
|
2640
|
+
|
2641
|
+
pid = self.pid
|
2642
|
+
if killasgroup:
|
2643
|
+
# send to the whole process group instead
|
2644
|
+
pid = -self.pid
|
2645
|
+
|
2646
|
+
try:
|
2647
|
+
try:
|
2648
|
+
os.kill(pid, sig)
|
2649
|
+
except OSError as exc:
|
2650
|
+
if exc.errno == errno.ESRCH:
|
2651
|
+
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s', processname, self.pid, str(exc)) # noqa
|
2652
|
+
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
2653
|
+
# processing.
|
2654
|
+
return None
|
2655
|
+
raise
|
2656
|
+
except Exception: # noqa
|
2657
|
+
tb = traceback.format_exc()
|
2658
|
+
fmt, args = 'unknown problem killing %s (%s):%s', (processname, self.pid, tb)
|
2659
|
+
log.critical(fmt, *args)
|
2660
|
+
self.change_state(ProcessStates.UNKNOWN)
|
2661
|
+
self.killing = False
|
2662
|
+
self.delay = 0
|
2663
|
+
return fmt % args
|
2664
|
+
|
2665
|
+
return None
|
2666
|
+
|
2667
|
+
def signal(self, sig: int) -> ta.Optional[str]:
|
2668
|
+
"""
|
2669
|
+
Send a signal to the subprocess, without intending to kill it.
|
2670
|
+
|
2671
|
+
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
2672
|
+
running.
|
2673
|
+
"""
|
2674
|
+
processname = as_string(self.config.name)
|
2675
|
+
args: tuple
|
2676
|
+
if not self.pid:
|
2677
|
+
fmt, args = "attempted to send %s sig %s but it wasn't running", (processname, signame(sig))
|
2678
|
+
log.debug(fmt, *args)
|
2679
|
+
return fmt % args
|
2680
|
+
|
2681
|
+
log.debug('sending %s (pid %s) sig %s', processname, self.pid, signame(sig))
|
2682
|
+
|
2683
|
+
self._check_in_state(ProcessStates.RUNNING, ProcessStates.STARTING, ProcessStates.STOPPING)
|
2684
|
+
|
2685
|
+
try:
|
2686
|
+
try:
|
2687
|
+
os.kill(self.pid, sig)
|
2688
|
+
except OSError as exc:
|
2689
|
+
if exc.errno == errno.ESRCH:
|
2690
|
+
log.debug('unable to signal %s (pid %s), it probably just now exited '
|
2691
|
+
'on its own: %s', processname, self.pid, str(exc))
|
2692
|
+
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
2693
|
+
# processing.
|
2694
|
+
return None
|
2695
|
+
raise
|
2696
|
+
except Exception: # noqa
|
2697
|
+
tb = traceback.format_exc()
|
2698
|
+
fmt, args = 'unknown problem sending sig %s (%s):%s', (processname, self.pid, tb)
|
2699
|
+
log.critical(fmt, *args)
|
2700
|
+
self.change_state(ProcessStates.UNKNOWN)
|
2701
|
+
return fmt % args
|
2702
|
+
|
2703
|
+
return None
|
2704
|
+
|
2705
|
+
def finish(self, sts: int) -> None:
|
2706
|
+
""" The process was reaped and we need to report and manage its state """
|
2707
|
+
self.drain()
|
2708
|
+
|
2709
|
+
es, msg = decode_wait_status(sts)
|
2710
|
+
|
2711
|
+
now = time.time()
|
2712
|
+
|
2713
|
+
self._check_and_adjust_for_system_clock_rollback(now)
|
2714
|
+
|
2715
|
+
self.laststop = now
|
2716
|
+
processname = as_string(self.config.name)
|
2717
|
+
|
2718
|
+
if now > self.laststart:
|
2719
|
+
too_quickly = now - self.laststart < self.config.startsecs
|
2720
|
+
else:
|
2721
|
+
too_quickly = False
|
2722
|
+
log.warning(
|
2723
|
+
"process '%s' (%s) laststart time is in the future, don't "
|
2724
|
+
"know how long process was running so assuming it did "
|
2725
|
+
"not exit too quickly", processname, self.pid)
|
2726
|
+
|
2727
|
+
exit_expected = es in self.config.exitcodes
|
2728
|
+
|
2729
|
+
if self.killing:
|
2730
|
+
# likely the result of a stop request implies STOPPING -> STOPPED
|
2731
|
+
self.killing = False
|
2732
|
+
self.delay = 0
|
2733
|
+
self.exitstatus = es
|
2734
|
+
|
2735
|
+
fmt, args = 'stopped: %s (%s)', (processname, msg)
|
2736
|
+
self._check_in_state(ProcessStates.STOPPING)
|
2737
|
+
self.change_state(ProcessStates.STOPPED)
|
2738
|
+
if exit_expected:
|
2739
|
+
log.info(fmt, *args)
|
2740
|
+
else:
|
2741
|
+
log.warning(fmt, *args)
|
2742
|
+
|
2743
|
+
elif too_quickly:
|
2744
|
+
# the program did not stay up long enough to make it to RUNNING implies STARTING -> BACKOFF
|
2745
|
+
self.exitstatus = None
|
2746
|
+
self.spawn_err = 'Exited too quickly (process log may have details)'
|
2747
|
+
self._check_in_state(ProcessStates.STARTING)
|
2748
|
+
self.change_state(ProcessStates.BACKOFF)
|
2749
|
+
log.warning('exited: %s (%s)', processname, msg + '; not expected')
|
2750
|
+
|
2751
|
+
else:
|
2752
|
+
# this finish was not the result of a stop request, the program was in the RUNNING state but exited implies
|
2753
|
+
# RUNNING -> EXITED normally but see next comment
|
2754
|
+
self.delay = 0
|
2755
|
+
self.backoff = 0
|
2756
|
+
self.exitstatus = es
|
2757
|
+
|
2758
|
+
# if the process was STARTING but a system time change causes self.laststart to be in the future, the normal
|
2759
|
+
# STARTING->RUNNING transition can be subverted so we perform the transition here.
|
2760
|
+
if self.state == ProcessStates.STARTING:
|
2761
|
+
self.change_state(ProcessStates.RUNNING)
|
2762
|
+
|
2763
|
+
self._check_in_state(ProcessStates.RUNNING)
|
2764
|
+
|
2765
|
+
if exit_expected:
|
2766
|
+
# expected exit code
|
2767
|
+
self.change_state(ProcessStates.EXITED, expected=True)
|
2768
|
+
log.info('exited: %s (%s)', processname, msg + '; expected')
|
2769
|
+
else:
|
2770
|
+
# unexpected exit code
|
2771
|
+
self.spawn_err = f'Bad exit code {es}'
|
2772
|
+
self.change_state(ProcessStates.EXITED, expected=False)
|
2773
|
+
log.warning('exited: %s (%s)', processname, msg + '; not expected')
|
2774
|
+
|
2775
|
+
self._pid = 0
|
2776
|
+
close_parent_pipes(self._pipes)
|
2777
|
+
self._pipes = {}
|
2778
|
+
self._dispatchers = {}
|
2779
|
+
|
2780
|
+
# if we died before we processed the current event (only happens if we're an event listener), notify the event
|
2781
|
+
# system that this event was rejected so it can be processed again.
|
2782
|
+
if self.event is not None:
|
2783
|
+
# Note: this should only be true if we were in the BUSY state when finish() was called.
|
2784
|
+
notify_event(EventRejectedEvent(self, self.event)) # type: ignore
|
2785
|
+
self.event = None
|
2786
|
+
|
2787
|
+
def set_uid(self) -> ta.Optional[str]:
|
2788
|
+
if self.config.uid is None:
|
2789
|
+
return None
|
2790
|
+
msg = drop_privileges(self.config.uid)
|
2791
|
+
return msg
|
2792
|
+
|
2793
|
+
def __lt__(self, other):
|
2794
|
+
return self.config.priority < other.config.priority
|
2795
|
+
|
2796
|
+
def __eq__(self, other):
|
2797
|
+
return self.config.priority == other.config.priority
|
2798
|
+
|
2799
|
+
def __repr__(self):
|
2800
|
+
# repr can't return anything other than a native string, but the name might be unicode - a problem on Python 2.
|
2801
|
+
name = self.config.name
|
2802
|
+
return f'<Subprocess at {id(self)} with name {name} in state {get_process_state_description(self.get_state())}>'
|
2803
|
+
|
2804
|
+
def get_state(self) -> ProcessState:
|
2805
|
+
return self.state
|
2806
|
+
|
2807
|
+
def transition(self):
|
2808
|
+
now = time.time()
|
2809
|
+
state = self.state
|
2810
|
+
|
2811
|
+
self._check_and_adjust_for_system_clock_rollback(now)
|
2812
|
+
|
2813
|
+
logger = log
|
2814
|
+
|
2815
|
+
if self.context.state > SupervisorStates.RESTARTING:
|
2816
|
+
# dont start any processes if supervisor is shutting down
|
2817
|
+
if state == ProcessStates.EXITED:
|
2818
|
+
if self.config.autorestart:
|
2819
|
+
if self.config.autorestart is RestartUnconditionally:
|
2820
|
+
# EXITED -> STARTING
|
2821
|
+
self.spawn()
|
2822
|
+
elif self.exitstatus not in self.config.exitcodes: # type: ignore
|
2823
|
+
# EXITED -> STARTING
|
2824
|
+
self.spawn()
|
2825
|
+
|
2826
|
+
elif state == ProcessStates.STOPPED and not self.laststart:
|
2827
|
+
if self.config.autostart:
|
2828
|
+
# STOPPED -> STARTING
|
2829
|
+
self.spawn()
|
2830
|
+
|
2831
|
+
elif state == ProcessStates.BACKOFF:
|
2832
|
+
if self.backoff <= self.config.startretries:
|
2833
|
+
if now > self.delay:
|
2834
|
+
# BACKOFF -> STARTING
|
2835
|
+
self.spawn()
|
2836
|
+
|
2837
|
+
processname = as_string(self.config.name)
|
2838
|
+
if state == ProcessStates.STARTING:
|
2839
|
+
if now - self.laststart > self.config.startsecs:
|
2840
|
+
# STARTING -> RUNNING if the proc has started successfully and it has stayed up for at least
|
2841
|
+
# proc.config.startsecs,
|
2842
|
+
self.delay = 0
|
2843
|
+
self.backoff = 0
|
2844
|
+
self._check_in_state(ProcessStates.STARTING)
|
2845
|
+
self.change_state(ProcessStates.RUNNING)
|
2846
|
+
msg = ('entered RUNNING state, process has stayed up for > than %s seconds (startsecs)' % self.config.startsecs) # noqa
|
2847
|
+
logger.info('success: %s %s', processname, msg)
|
2848
|
+
|
2849
|
+
if state == ProcessStates.BACKOFF:
|
2850
|
+
if self.backoff > self.config.startretries:
|
2851
|
+
# BACKOFF -> FATAL if the proc has exceeded its number of retries
|
2852
|
+
self.give_up()
|
2853
|
+
msg = ('entered FATAL state, too many start retries too quickly')
|
2854
|
+
logger.info('gave up: %s %s', processname, msg)
|
2855
|
+
|
2856
|
+
elif state == ProcessStates.STOPPING:
|
2857
|
+
time_left = self.delay - now
|
2858
|
+
if time_left <= 0:
|
2859
|
+
# kill processes which are taking too long to stop with a final sigkill. if this doesn't kill it, the
|
2860
|
+
# process will be stuck in the STOPPING state forever.
|
2861
|
+
log.warning('killing \'%s\' (%s) with SIGKILL', processname, self.pid)
|
2862
|
+
self.kill(signal.SIGKILL)
|
2863
|
+
|
2864
|
+
def create_auto_child_logs(self):
|
2865
|
+
# temporary logfiles which are erased at start time
|
2866
|
+
# get_autoname = self.context.get_auto_child_log_name # noqa
|
2867
|
+
# sid = self.context.config.identifier # noqa
|
2868
|
+
# name = self.config.name # noqa
|
2869
|
+
# if self.stdout_logfile is Automatic:
|
2870
|
+
# self.stdout_logfile = get_autoname(name, sid, 'stdout')
|
2871
|
+
# if self.stderr_logfile is Automatic:
|
2872
|
+
# self.stderr_logfile = get_autoname(name, sid, 'stderr')
|
2873
|
+
pass
|
2874
|
+
|
2875
|
+
|
2876
|
+
@functools.total_ordering
|
2877
|
+
class ProcessGroup:
|
2878
|
+
def __init__(self, config: ProcessGroupConfig, context: ServerContext):
|
2879
|
+
super().__init__()
|
2880
|
+
self.config = config
|
2881
|
+
self.context = context
|
2882
|
+
self.processes = {}
|
2883
|
+
for pconfig in self.config.processes or []:
|
2884
|
+
process = Subprocess(pconfig, self, self.context)
|
2885
|
+
self.processes[pconfig.name] = process
|
2886
|
+
|
2887
|
+
def __lt__(self, other):
|
2888
|
+
return self.config.priority < other.config.priority
|
2889
|
+
|
2890
|
+
def __eq__(self, other):
|
2891
|
+
return self.config.priority == other.config.priority
|
2892
|
+
|
2893
|
+
def __repr__(self):
|
2894
|
+
# repr can't return anything other than a native string, but the name might be unicode - a problem on Python 2.
|
2895
|
+
name = self.config.name
|
2896
|
+
return f'<{self.__class__.__name__} instance at {id(self)} named {name}>'
|
2897
|
+
|
2898
|
+
def remove_logs(self) -> None:
|
2899
|
+
for process in self.processes.values():
|
2900
|
+
process.remove_logs()
|
2901
|
+
|
2902
|
+
def reopen_logs(self) -> None:
|
2903
|
+
for process in self.processes.values():
|
2904
|
+
process.reopen_logs()
|
2905
|
+
|
2906
|
+
def stop_all(self) -> None:
|
2907
|
+
processes = list(self.processes.values())
|
2908
|
+
processes.sort()
|
2909
|
+
processes.reverse() # stop in desc priority order
|
2910
|
+
|
2911
|
+
for proc in processes:
|
2912
|
+
state = proc.get_state()
|
2913
|
+
if state == ProcessStates.RUNNING:
|
2914
|
+
# RUNNING -> STOPPING
|
2915
|
+
proc.stop()
|
2916
|
+
|
2917
|
+
elif state == ProcessStates.STARTING:
|
2918
|
+
# STARTING -> STOPPING
|
2919
|
+
proc.stop()
|
2920
|
+
|
2921
|
+
elif state == ProcessStates.BACKOFF:
|
2922
|
+
# BACKOFF -> FATAL
|
2923
|
+
proc.give_up()
|
2924
|
+
|
2925
|
+
def get_unstopped_processes(self) -> ta.List[Subprocess]:
|
2926
|
+
return [x for x in self.processes.values() if x.get_state() not in STOPPED_STATES]
|
2927
|
+
|
2928
|
+
def get_dispatchers(self) -> ta.Dict[int, Dispatcher]:
|
2929
|
+
dispatchers = {}
|
2930
|
+
for process in self.processes.values():
|
2931
|
+
dispatchers.update(process._dispatchers) # noqa
|
2932
|
+
return dispatchers
|
2933
|
+
|
2934
|
+
def before_remove(self) -> None:
|
2935
|
+
pass
|
2936
|
+
|
2937
|
+
def transition(self) -> None:
|
2938
|
+
for proc in self.processes.values():
|
2939
|
+
proc.transition()
|
2940
|
+
|
2941
|
+
def after_setuid(self) -> None:
|
2942
|
+
for proc in self.processes.values():
|
2943
|
+
proc.create_auto_child_logs()
|
2944
|
+
|
2945
|
+
|
2946
|
+
########################################
|
2947
|
+
# supervisor.py
|
2948
|
+
|
2949
|
+
|
2950
|
+
log = logging.getLogger(__name__)
|
2951
|
+
|
2952
|
+
|
2953
|
+
class Supervisor:
|
2954
|
+
|
2955
|
+
def __init__(self, context: ServerContext) -> None:
|
2956
|
+
super().__init__()
|
2957
|
+
|
2958
|
+
self._context = context
|
2959
|
+
self._ticks: ta.Dict[int, float] = {}
|
2960
|
+
self._process_groups: ta.Dict[str, ProcessGroup] = {} # map of process group name to process group object
|
2961
|
+
self._stop_groups: ta.Optional[ta.List[ProcessGroup]] = None # list used for priority ordered shutdown
|
2962
|
+
self._stopping = False # set after we detect that we are handling a stop request
|
2963
|
+
self._last_shutdown_report = 0. # throttle for delayed process error reports at stop
|
2964
|
+
|
2965
|
+
@property
|
2966
|
+
def context(self) -> ServerContext:
|
2967
|
+
return self._context
|
2968
|
+
|
2969
|
+
def get_state(self) -> SupervisorState:
|
2970
|
+
return self._context.state
|
2971
|
+
|
2972
|
+
def main(self) -> None:
|
2973
|
+
if not self._context.first:
|
2974
|
+
# prevent crash on libdispatch-based systems, at least for the first request
|
2975
|
+
self._context.cleanup_fds()
|
2976
|
+
|
2977
|
+
self._context.set_uid_or_exit()
|
2978
|
+
|
2979
|
+
if self._context.first:
|
2980
|
+
self._context.set_rlimits_or_exit()
|
2981
|
+
|
2982
|
+
# this sets the options.logger object delay logger instantiation until after setuid
|
2983
|
+
if not self._context.config.nocleanup:
|
2984
|
+
# clean up old automatic logs
|
2985
|
+
self._context.clear_auto_child_logdir()
|
2986
|
+
|
2987
|
+
self.run()
|
2988
|
+
|
2989
|
+
def run(self) -> None:
|
2990
|
+
self._process_groups = {} # clear
|
2991
|
+
self._stop_groups = None # clear
|
2992
|
+
|
2993
|
+
clear_events()
|
2994
|
+
|
2995
|
+
try:
|
2996
|
+
for config in self._context.config.groups or []:
|
2997
|
+
self.add_process_group(config)
|
2998
|
+
|
2999
|
+
self._context.set_signals()
|
3000
|
+
|
3001
|
+
if not self._context.config.nodaemon and self._context.first:
|
3002
|
+
self._context.daemonize()
|
3003
|
+
|
3004
|
+
# writing pid file needs to come *after* daemonizing or pid will be wrong
|
3005
|
+
self._context.write_pidfile()
|
3006
|
+
|
3007
|
+
self.runforever()
|
3008
|
+
|
3009
|
+
finally:
|
3010
|
+
self._context.cleanup()
|
3011
|
+
|
3012
|
+
def diff_to_active(self):
|
3013
|
+
new = self._context.config.groups or []
|
3014
|
+
cur = [group.config for group in self._process_groups.values()]
|
3015
|
+
|
3016
|
+
curdict = dict(zip([cfg.name for cfg in cur], cur))
|
3017
|
+
newdict = dict(zip([cfg.name for cfg in new], new))
|
3018
|
+
|
3019
|
+
added = [cand for cand in new if cand.name not in curdict]
|
3020
|
+
removed = [cand for cand in cur if cand.name not in newdict]
|
3021
|
+
|
3022
|
+
changed = [cand for cand in new if cand != curdict.get(cand.name, cand)]
|
3023
|
+
|
3024
|
+
return added, changed, removed
|
3025
|
+
|
3026
|
+
def add_process_group(self, config: ProcessGroupConfig) -> bool:
|
3027
|
+
name = config.name
|
3028
|
+
if name in self._process_groups:
|
3029
|
+
return False
|
3030
|
+
|
3031
|
+
group = self._process_groups[name] = ProcessGroup(config, self._context)
|
3032
|
+
group.after_setuid()
|
3033
|
+
|
3034
|
+
notify_event(ProcessGroupAddedEvent(name))
|
3035
|
+
return True
|
3036
|
+
|
3037
|
+
def remove_process_group(self, name: str) -> bool:
|
3038
|
+
if self._process_groups[name].get_unstopped_processes():
|
3039
|
+
return False
|
3040
|
+
|
3041
|
+
self._process_groups[name].before_remove()
|
3042
|
+
|
3043
|
+
del self._process_groups[name]
|
3044
|
+
|
3045
|
+
notify_event(ProcessGroupRemovedEvent(name))
|
3046
|
+
return True
|
3047
|
+
|
3048
|
+
def get_process_map(self) -> ta.Dict[int, Dispatcher]:
|
3049
|
+
process_map = {}
|
3050
|
+
for group in self._process_groups.values():
|
3051
|
+
process_map.update(group.get_dispatchers())
|
3052
|
+
return process_map
|
3053
|
+
|
3054
|
+
def shutdown_report(self) -> ta.List[Subprocess]:
|
3055
|
+
unstopped: ta.List[Subprocess] = []
|
3056
|
+
|
3057
|
+
for group in self._process_groups.values():
|
3058
|
+
unstopped.extend(group.get_unstopped_processes())
|
3059
|
+
|
3060
|
+
if unstopped:
|
3061
|
+
# throttle 'waiting for x to die' reports
|
3062
|
+
now = time.time()
|
3063
|
+
if now > (self._last_shutdown_report + 3): # every 3 secs
|
3064
|
+
names = [as_string(p.config.name) for p in unstopped]
|
3065
|
+
namestr = ', '.join(names)
|
3066
|
+
log.info('waiting for %s to die', namestr)
|
3067
|
+
self._last_shutdown_report = now
|
3068
|
+
for proc in unstopped:
|
3069
|
+
state = get_process_state_description(proc.get_state())
|
3070
|
+
log.debug('%s state: %s', proc.config.name, state)
|
3071
|
+
|
3072
|
+
return unstopped
|
3073
|
+
|
3074
|
+
def _ordered_stop_groups_phase_1(self) -> None:
|
3075
|
+
if self._stop_groups:
|
3076
|
+
# stop the last group (the one with the "highest" priority)
|
3077
|
+
self._stop_groups[-1].stop_all()
|
3078
|
+
|
3079
|
+
def _ordered_stop_groups_phase_2(self) -> None:
|
3080
|
+
# after phase 1 we've transitioned and reaped, let's see if we can remove the group we stopped from the
|
3081
|
+
# stop_groups queue.
|
3082
|
+
if self._stop_groups:
|
3083
|
+
# pop the last group (the one with the "highest" priority)
|
3084
|
+
group = self._stop_groups.pop()
|
3085
|
+
if group.get_unstopped_processes():
|
3086
|
+
# if any processes in the group aren't yet in a stopped state, we're not yet done shutting this group
|
3087
|
+
# down, so push it back on to the end of the stop group queue
|
3088
|
+
self._stop_groups.append(group)
|
3089
|
+
|
3090
|
+
def runforever(self) -> None:
|
3091
|
+
notify_event(SupervisorRunningEvent())
|
3092
|
+
timeout = 1 # this cannot be fewer than the smallest TickEvent (5)
|
3093
|
+
|
3094
|
+
while True:
|
3095
|
+
combined_map = {}
|
3096
|
+
combined_map.update(self.get_process_map())
|
3097
|
+
|
3098
|
+
pgroups = list(self._process_groups.values())
|
3099
|
+
pgroups.sort()
|
3100
|
+
|
3101
|
+
if self._context.state < SupervisorStates.RUNNING:
|
3102
|
+
if not self._stopping:
|
3103
|
+
# first time, set the stopping flag, do a notification and set stop_groups
|
3104
|
+
self._stopping = True
|
3105
|
+
self._stop_groups = pgroups[:]
|
3106
|
+
notify_event(SupervisorStoppingEvent())
|
3107
|
+
|
3108
|
+
self._ordered_stop_groups_phase_1()
|
3109
|
+
|
3110
|
+
if not self.shutdown_report():
|
3111
|
+
# if there are no unstopped processes (we're done killing everything), it's OK to shutdown or reload
|
3112
|
+
raise ExitNow
|
3113
|
+
|
3114
|
+
for fd, dispatcher in combined_map.items():
|
3115
|
+
if dispatcher.readable():
|
3116
|
+
self._context.poller.register_readable(fd)
|
3117
|
+
if dispatcher.writable():
|
3118
|
+
self._context.poller.register_writable(fd)
|
3119
|
+
|
3120
|
+
r, w = self._context.poller.poll(timeout)
|
3121
|
+
|
3122
|
+
for fd in r:
|
3123
|
+
if fd in combined_map:
|
3124
|
+
try:
|
3125
|
+
dispatcher = combined_map[fd]
|
3126
|
+
log.debug('read event caused by %r', dispatcher)
|
3127
|
+
dispatcher.handle_read_event()
|
3128
|
+
if not dispatcher.readable():
|
3129
|
+
self._context.poller.unregister_readable(fd)
|
3130
|
+
except ExitNow:
|
3131
|
+
raise
|
3132
|
+
except Exception: # noqa
|
3133
|
+
combined_map[fd].handle_error()
|
3134
|
+
else:
|
3135
|
+
# if the fd is not in combined_map, we should unregister it. otherwise, it will be polled every
|
3136
|
+
# time, which may cause 100% cpu usage
|
3137
|
+
log.debug('unexpected read event from fd %r', fd)
|
3138
|
+
try:
|
3139
|
+
self._context.poller.unregister_readable(fd)
|
3140
|
+
except Exception: # noqa
|
3141
|
+
pass
|
3142
|
+
|
3143
|
+
for fd in w:
|
3144
|
+
if fd in combined_map:
|
3145
|
+
try:
|
3146
|
+
dispatcher = combined_map[fd]
|
3147
|
+
log.debug('write event caused by %r', dispatcher)
|
3148
|
+
dispatcher.handle_write_event()
|
3149
|
+
if not dispatcher.writable():
|
3150
|
+
self._context.poller.unregister_writable(fd)
|
3151
|
+
except ExitNow:
|
3152
|
+
raise
|
3153
|
+
except Exception: # noqa
|
3154
|
+
combined_map[fd].handle_error()
|
3155
|
+
else:
|
3156
|
+
log.debug('unexpected write event from fd %r', fd)
|
3157
|
+
try:
|
3158
|
+
self._context.poller.unregister_writable(fd)
|
3159
|
+
except Exception: # noqa
|
3160
|
+
pass
|
3161
|
+
|
3162
|
+
for group in pgroups:
|
3163
|
+
group.transition()
|
3164
|
+
|
3165
|
+
self._reap()
|
3166
|
+
self._handle_signal()
|
3167
|
+
self._tick()
|
3168
|
+
|
3169
|
+
if self._context.state < SupervisorStates.RUNNING:
|
3170
|
+
self._ordered_stop_groups_phase_2()
|
3171
|
+
|
3172
|
+
if self._context.test:
|
3173
|
+
break
|
3174
|
+
|
3175
|
+
def _tick(self, now: ta.Optional[float] = None) -> None:
|
3176
|
+
"""Send one or more 'tick' events when the timeslice related to the period for the event type rolls over"""
|
3177
|
+
|
3178
|
+
if now is None:
|
3179
|
+
# now won't be None in unit tests
|
3180
|
+
now = time.time()
|
3181
|
+
|
3182
|
+
for event in TICK_EVENTS:
|
3183
|
+
period = event.period # type: ignore
|
3184
|
+
|
3185
|
+
last_tick = self._ticks.get(period)
|
3186
|
+
if last_tick is None:
|
3187
|
+
# we just started up
|
3188
|
+
last_tick = self._ticks[period] = timeslice(period, now)
|
3189
|
+
|
3190
|
+
this_tick = timeslice(period, now)
|
3191
|
+
if this_tick != last_tick:
|
3192
|
+
self._ticks[period] = this_tick
|
3193
|
+
notify_event(event(this_tick, self))
|
3194
|
+
|
3195
|
+
def _reap(self, *, once: bool = False, depth: int = 0) -> None:
|
3196
|
+
if depth >= 100:
|
3197
|
+
return
|
3198
|
+
|
3199
|
+
pid, sts = self._context.waitpid()
|
3200
|
+
if not pid:
|
3201
|
+
return
|
3202
|
+
|
3203
|
+
process = self._context.pid_history.get(pid, None)
|
3204
|
+
if process is None:
|
3205
|
+
_, msg = decode_wait_status(check_not_none(sts))
|
3206
|
+
log.info('reaped unknown pid %s (%s)', pid, msg)
|
3207
|
+
else:
|
3208
|
+
process.finish(check_not_none(sts))
|
3209
|
+
del self._context.pid_history[pid]
|
3210
|
+
|
3211
|
+
if not once:
|
3212
|
+
# keep reaping until no more kids to reap, but don't recurse infinitely
|
3213
|
+
self._reap(once=False, depth=depth + 1)
|
3214
|
+
|
3215
|
+
def _handle_signal(self) -> None:
|
3216
|
+
sig = self._context.get_signal()
|
3217
|
+
if not sig:
|
3218
|
+
return
|
3219
|
+
|
3220
|
+
if sig in (signal.SIGTERM, signal.SIGINT, signal.SIGQUIT):
|
3221
|
+
log.warning('received %s indicating exit request', signame(sig))
|
3222
|
+
self._context.set_state(SupervisorStates.SHUTDOWN)
|
3223
|
+
|
3224
|
+
elif sig == signal.SIGHUP:
|
3225
|
+
if self._context.state == SupervisorStates.SHUTDOWN:
|
3226
|
+
log.warning('ignored %s indicating restart request (shutdown in progress)', signame(sig)) # noqa
|
3227
|
+
else:
|
3228
|
+
log.warning('received %s indicating restart request', signame(sig)) # noqa
|
3229
|
+
self._context.set_state(SupervisorStates.RESTARTING)
|
3230
|
+
|
3231
|
+
elif sig == signal.SIGCHLD:
|
3232
|
+
log.debug('received %s indicating a child quit', signame(sig))
|
3233
|
+
|
3234
|
+
elif sig == signal.SIGUSR2:
|
3235
|
+
log.info('received %s indicating log reopen request', signame(sig))
|
3236
|
+
# self._context.reopen_logs()
|
3237
|
+
for group in self._process_groups.values():
|
3238
|
+
group.reopen_logs()
|
3239
|
+
|
3240
|
+
else:
|
3241
|
+
log.debug('received %s indicating nothing', signame(sig))
|
3242
|
+
|
3243
|
+
|
3244
|
+
def timeslice(period, when):
|
3245
|
+
return int(when - (when % period))
|
3246
|
+
|
3247
|
+
|
3248
|
+
def main(args=None, test=False):
|
3249
|
+
configure_standard_logging('INFO')
|
3250
|
+
|
3251
|
+
# if we hup, restart by making a new Supervisor()
|
3252
|
+
first = True
|
3253
|
+
while True:
|
3254
|
+
config = ServerConfig.new(
|
3255
|
+
nodaemon=True,
|
3256
|
+
groups=[
|
3257
|
+
ProcessGroupConfig(
|
3258
|
+
name='default',
|
3259
|
+
processes=[
|
3260
|
+
ProcessConfig(
|
3261
|
+
name='sleep',
|
3262
|
+
command='sleep 600',
|
3263
|
+
stdout=ProcessConfig.Log(
|
3264
|
+
file='/dev/fd/1',
|
3265
|
+
maxbytes=0,
|
3266
|
+
),
|
3267
|
+
redirect_stderr=True,
|
3268
|
+
),
|
3269
|
+
ProcessConfig(
|
3270
|
+
name='ls',
|
3271
|
+
command='ls -al',
|
3272
|
+
stdout=ProcessConfig.Log(
|
3273
|
+
file='/dev/fd/1',
|
3274
|
+
maxbytes=0,
|
3275
|
+
),
|
3276
|
+
redirect_stderr=True,
|
3277
|
+
),
|
3278
|
+
],
|
3279
|
+
),
|
3280
|
+
],
|
3281
|
+
)
|
3282
|
+
|
3283
|
+
context = ServerContext(
|
3284
|
+
config,
|
3285
|
+
)
|
3286
|
+
|
3287
|
+
context.first = first
|
3288
|
+
context.test = test
|
3289
|
+
go(context)
|
3290
|
+
# options.close_logger()
|
3291
|
+
first = False
|
3292
|
+
if test or (context.state < SupervisorStates.RESTARTING):
|
3293
|
+
break
|
3294
|
+
|
3295
|
+
|
3296
|
+
def go(context): # pragma: no cover
|
3297
|
+
d = Supervisor(context)
|
3298
|
+
try:
|
3299
|
+
d.main()
|
3300
|
+
except ExitNow:
|
3301
|
+
pass
|
3302
|
+
|
3303
|
+
|
3304
|
+
if __name__ == '__main__':
|
3305
|
+
main()
|