ominfra 0.0.0.dev127__py3-none-any.whl → 0.0.0.dev129__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- ominfra/deploy/_executor.py +24 -0
- ominfra/pyremote/_runcommands.py +24 -0
- ominfra/scripts/journald2aws.py +24 -0
- ominfra/scripts/supervisor.py +1320 -1225
- ominfra/supervisor/configs.py +34 -11
- ominfra/supervisor/dispatchers.py +7 -6
- ominfra/supervisor/dispatchersimpl.py +29 -22
- ominfra/supervisor/groups.py +1 -1
- ominfra/supervisor/groupsimpl.py +2 -2
- ominfra/supervisor/inject.py +22 -17
- ominfra/supervisor/io.py +82 -0
- ominfra/supervisor/main.py +6 -7
- ominfra/supervisor/pipes.py +15 -13
- ominfra/supervisor/poller.py +36 -35
- ominfra/supervisor/{processes.py → process.py} +2 -1
- ominfra/supervisor/{processesimpl.py → processimpl.py} +42 -54
- ominfra/supervisor/setup.py +1 -1
- ominfra/supervisor/setupimpl.py +4 -3
- ominfra/supervisor/signals.py +56 -50
- ominfra/supervisor/spawning.py +2 -1
- ominfra/supervisor/spawningimpl.py +24 -21
- ominfra/supervisor/supervisor.py +72 -134
- ominfra/supervisor/types.py +45 -34
- ominfra/supervisor/utils/__init__.py +0 -0
- ominfra/supervisor/utils/diag.py +31 -0
- ominfra/supervisor/utils/fds.py +46 -0
- ominfra/supervisor/utils/fs.py +47 -0
- ominfra/supervisor/utils/os.py +45 -0
- ominfra/supervisor/utils/ostypes.py +9 -0
- ominfra/supervisor/utils/signals.py +60 -0
- ominfra/supervisor/utils/strings.py +105 -0
- ominfra/supervisor/{users.py → utils/users.py} +11 -8
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/METADATA +3 -3
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/RECORD +39 -33
- ominfra/supervisor/context.py +0 -84
- ominfra/supervisor/datatypes.py +0 -113
- ominfra/supervisor/utils.py +0 -206
- /ominfra/supervisor/{collections.py → utils/collections.py} +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/LICENSE +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/WHEEL +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/entry_points.txt +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/top_level.txt +0 -0
ominfra/supervisor/poller.py
CHANGED
@@ -8,6 +8,7 @@ import typing as ta
|
|
8
8
|
from omlish.lite.logs import log
|
9
9
|
|
10
10
|
from .setup import DaemonizeListener
|
11
|
+
from .utils.ostypes import Fd
|
11
12
|
|
12
13
|
|
13
14
|
class Poller(DaemonizeListener, abc.ABC):
|
@@ -15,23 +16,23 @@ class Poller(DaemonizeListener, abc.ABC):
|
|
15
16
|
super().__init__()
|
16
17
|
|
17
18
|
@abc.abstractmethod
|
18
|
-
def register_readable(self, fd:
|
19
|
+
def register_readable(self, fd: Fd) -> None:
|
19
20
|
raise NotImplementedError
|
20
21
|
|
21
22
|
@abc.abstractmethod
|
22
|
-
def register_writable(self, fd:
|
23
|
+
def register_writable(self, fd: Fd) -> None:
|
23
24
|
raise NotImplementedError
|
24
25
|
|
25
26
|
@abc.abstractmethod
|
26
|
-
def unregister_readable(self, fd:
|
27
|
+
def unregister_readable(self, fd: Fd) -> None:
|
27
28
|
raise NotImplementedError
|
28
29
|
|
29
30
|
@abc.abstractmethod
|
30
|
-
def unregister_writable(self, fd:
|
31
|
+
def unregister_writable(self, fd: Fd) -> None:
|
31
32
|
raise NotImplementedError
|
32
33
|
|
33
34
|
@abc.abstractmethod
|
34
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
35
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
35
36
|
raise NotImplementedError
|
36
37
|
|
37
38
|
def before_daemonize(self) -> None: # noqa
|
@@ -48,37 +49,37 @@ class SelectPoller(Poller):
|
|
48
49
|
def __init__(self) -> None:
|
49
50
|
super().__init__()
|
50
51
|
|
51
|
-
self._readable: ta.Set[
|
52
|
-
self._writable: ta.Set[
|
52
|
+
self._readable: ta.Set[Fd] = set()
|
53
|
+
self._writable: ta.Set[Fd] = set()
|
53
54
|
|
54
|
-
def register_readable(self, fd:
|
55
|
+
def register_readable(self, fd: Fd) -> None:
|
55
56
|
self._readable.add(fd)
|
56
57
|
|
57
|
-
def register_writable(self, fd:
|
58
|
+
def register_writable(self, fd: Fd) -> None:
|
58
59
|
self._writable.add(fd)
|
59
60
|
|
60
|
-
def unregister_readable(self, fd:
|
61
|
+
def unregister_readable(self, fd: Fd) -> None:
|
61
62
|
self._readable.discard(fd)
|
62
63
|
|
63
|
-
def unregister_writable(self, fd:
|
64
|
+
def unregister_writable(self, fd: Fd) -> None:
|
64
65
|
self._writable.discard(fd)
|
65
66
|
|
66
67
|
def unregister_all(self) -> None:
|
67
68
|
self._readable.clear()
|
68
69
|
self._writable.clear()
|
69
70
|
|
70
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
71
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
71
72
|
try:
|
72
73
|
r, w, x = select.select(
|
73
74
|
self._readable,
|
74
75
|
self._writable,
|
75
76
|
[], timeout,
|
76
77
|
)
|
77
|
-
except OSError as
|
78
|
-
if
|
78
|
+
except OSError as exc:
|
79
|
+
if exc.args[0] == errno.EINTR:
|
79
80
|
log.debug('EINTR encountered in poll')
|
80
81
|
return [], []
|
81
|
-
if
|
82
|
+
if exc.args[0] == errno.EBADF:
|
82
83
|
log.debug('EBADF encountered in poll')
|
83
84
|
self.unregister_all()
|
84
85
|
return [], []
|
@@ -94,30 +95,30 @@ class PollPoller(Poller):
|
|
94
95
|
super().__init__()
|
95
96
|
|
96
97
|
self._poller = select.poll()
|
97
|
-
self._readable: set[
|
98
|
-
self._writable: set[
|
98
|
+
self._readable: set[Fd] = set()
|
99
|
+
self._writable: set[Fd] = set()
|
99
100
|
|
100
|
-
def register_readable(self, fd:
|
101
|
+
def register_readable(self, fd: Fd) -> None:
|
101
102
|
self._poller.register(fd, self._READ)
|
102
103
|
self._readable.add(fd)
|
103
104
|
|
104
|
-
def register_writable(self, fd:
|
105
|
+
def register_writable(self, fd: Fd) -> None:
|
105
106
|
self._poller.register(fd, self._WRITE)
|
106
107
|
self._writable.add(fd)
|
107
108
|
|
108
|
-
def unregister_readable(self, fd:
|
109
|
+
def unregister_readable(self, fd: Fd) -> None:
|
109
110
|
self._readable.discard(fd)
|
110
111
|
self._poller.unregister(fd)
|
111
112
|
if fd in self._writable:
|
112
113
|
self._poller.register(fd, self._WRITE)
|
113
114
|
|
114
|
-
def unregister_writable(self, fd:
|
115
|
+
def unregister_writable(self, fd: Fd) -> None:
|
115
116
|
self._writable.discard(fd)
|
116
117
|
self._poller.unregister(fd)
|
117
118
|
if fd in self._readable:
|
118
119
|
self._poller.register(fd, self._READ)
|
119
120
|
|
120
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
121
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
121
122
|
fds = self._poll_fds(timeout) # type: ignore
|
122
123
|
readable, writable = [], []
|
123
124
|
for fd, eventmask in fds:
|
@@ -129,16 +130,16 @@ class PollPoller(Poller):
|
|
129
130
|
writable.append(fd)
|
130
131
|
return readable, writable
|
131
132
|
|
132
|
-
def _poll_fds(self, timeout: float) -> ta.List[ta.Tuple[
|
133
|
+
def _poll_fds(self, timeout: float) -> ta.List[ta.Tuple[Fd, Fd]]:
|
133
134
|
try:
|
134
|
-
return self._poller.poll(timeout * 1000)
|
135
|
-
except OSError as
|
136
|
-
if
|
135
|
+
return self._poller.poll(timeout * 1000) # type: ignore
|
136
|
+
except OSError as exc:
|
137
|
+
if exc.args[0] == errno.EINTR:
|
137
138
|
log.debug('EINTR encountered in poll')
|
138
139
|
return []
|
139
140
|
raise
|
140
141
|
|
141
|
-
def _ignore_invalid(self, fd:
|
142
|
+
def _ignore_invalid(self, fd: Fd, eventmask: int) -> bool:
|
142
143
|
if eventmask & select.POLLNVAL:
|
143
144
|
# POLLNVAL means `fd` value is invalid, not open. When a process quits it's `fd`s are closed so there is no
|
144
145
|
# more reason to keep this `fd` registered If the process restarts it's `fd`s are registered again.
|
@@ -157,30 +158,30 @@ if sys.platform == 'darwin' or sys.platform.startswith('freebsd'):
|
|
157
158
|
super().__init__()
|
158
159
|
|
159
160
|
self._kqueue: ta.Optional[ta.Any] = select.kqueue()
|
160
|
-
self._readable: set[
|
161
|
-
self._writable: set[
|
161
|
+
self._readable: set[Fd] = set()
|
162
|
+
self._writable: set[Fd] = set()
|
162
163
|
|
163
|
-
def register_readable(self, fd:
|
164
|
+
def register_readable(self, fd: Fd) -> None:
|
164
165
|
self._readable.add(fd)
|
165
166
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_READ, flags=select.KQ_EV_ADD)
|
166
167
|
self._kqueue_control(fd, kevent)
|
167
168
|
|
168
|
-
def register_writable(self, fd:
|
169
|
+
def register_writable(self, fd: Fd) -> None:
|
169
170
|
self._writable.add(fd)
|
170
171
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_WRITE, flags=select.KQ_EV_ADD)
|
171
172
|
self._kqueue_control(fd, kevent)
|
172
173
|
|
173
|
-
def unregister_readable(self, fd:
|
174
|
+
def unregister_readable(self, fd: Fd) -> None:
|
174
175
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_READ, flags=select.KQ_EV_DELETE)
|
175
176
|
self._readable.discard(fd)
|
176
177
|
self._kqueue_control(fd, kevent)
|
177
178
|
|
178
|
-
def unregister_writable(self, fd:
|
179
|
+
def unregister_writable(self, fd: Fd) -> None:
|
179
180
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_WRITE, flags=select.KQ_EV_DELETE)
|
180
181
|
self._writable.discard(fd)
|
181
182
|
self._kqueue_control(fd, kevent)
|
182
183
|
|
183
|
-
def _kqueue_control(self, fd:
|
184
|
+
def _kqueue_control(self, fd: Fd, kevent: 'select.kevent') -> None:
|
184
185
|
try:
|
185
186
|
self._kqueue.control([kevent], 0) # type: ignore
|
186
187
|
except OSError as error:
|
@@ -189,7 +190,7 @@ if sys.platform == 'darwin' or sys.platform.startswith('freebsd'):
|
|
189
190
|
else:
|
190
191
|
raise
|
191
192
|
|
192
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
193
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
193
194
|
readable, writable = [], [] # type: ignore
|
194
195
|
|
195
196
|
try:
|
@@ -2,6 +2,7 @@
|
|
2
2
|
import typing as ta
|
3
3
|
|
4
4
|
from .types import Process
|
5
|
+
from .utils.ostypes import Pid
|
5
6
|
|
6
7
|
|
7
8
|
##
|
@@ -14,5 +15,5 @@ class ProcessStateError(RuntimeError):
|
|
14
15
|
##
|
15
16
|
|
16
17
|
|
17
|
-
class PidHistory(ta.Dict[
|
18
|
+
class PidHistory(ta.Dict[Pid, Process]):
|
18
19
|
pass
|
@@ -11,24 +11,25 @@ from omlish.lite.logs import log
|
|
11
11
|
from omlish.lite.typing import Func1
|
12
12
|
|
13
13
|
from .configs import ProcessConfig
|
14
|
-
from .
|
14
|
+
from .configs import RestartUnconditionally
|
15
15
|
from .dispatchers import Dispatchers
|
16
16
|
from .events import PROCESS_STATE_EVENT_MAP
|
17
17
|
from .events import EventCallbacks
|
18
18
|
from .pipes import ProcessPipes
|
19
19
|
from .pipes import close_parent_pipes
|
20
|
-
from .
|
21
|
-
from .signals import sig_name
|
20
|
+
from .process import ProcessStateError
|
22
21
|
from .spawning import ProcessSpawnError
|
23
22
|
from .spawning import ProcessSpawning
|
24
23
|
from .states import ProcessState
|
25
24
|
from .states import SupervisorState
|
26
|
-
from .types import InputDispatcher
|
27
25
|
from .types import Process
|
28
26
|
from .types import ProcessGroup
|
29
|
-
from .types import
|
30
|
-
from .
|
31
|
-
from .utils import decode_wait_status
|
27
|
+
from .types import ProcessInputDispatcher
|
28
|
+
from .types import SupervisorStateManager
|
29
|
+
from .utils.os import decode_wait_status
|
30
|
+
from .utils.ostypes import Pid
|
31
|
+
from .utils.ostypes import Rc
|
32
|
+
from .utils.signals import sig_name
|
32
33
|
|
33
34
|
|
34
35
|
class ProcessSpawningFactory(Func1[Process, ProcessSpawning]):
|
@@ -46,7 +47,7 @@ class ProcessImpl(Process):
|
|
46
47
|
config: ProcessConfig,
|
47
48
|
group: ProcessGroup,
|
48
49
|
*,
|
49
|
-
|
50
|
+
supervisor_states: SupervisorStateManager,
|
50
51
|
event_callbacks: EventCallbacks,
|
51
52
|
process_spawning_factory: ProcessSpawningFactory,
|
52
53
|
) -> None:
|
@@ -55,7 +56,7 @@ class ProcessImpl(Process):
|
|
55
56
|
self._config = config
|
56
57
|
self._group = group
|
57
58
|
|
58
|
-
self.
|
59
|
+
self._supervisor_states = supervisor_states
|
59
60
|
self._event_callbacks = event_callbacks
|
60
61
|
|
61
62
|
self._spawning = process_spawning_factory(self)
|
@@ -66,7 +67,7 @@ class ProcessImpl(Process):
|
|
66
67
|
self._pipes = ProcessPipes()
|
67
68
|
|
68
69
|
self._state = ProcessState.STOPPED
|
69
|
-
self._pid = 0 # 0 when not running
|
70
|
+
self._pid = Pid(0) # 0 when not running
|
70
71
|
|
71
72
|
self._last_start = 0. # Last time the subprocess was started; 0 if never
|
72
73
|
self._last_stop = 0. # Last time the subprocess was stopped; 0 if never
|
@@ -80,13 +81,13 @@ class ProcessImpl(Process):
|
|
80
81
|
|
81
82
|
self._backoff = 0 # backoff counter (to startretries)
|
82
83
|
|
83
|
-
self._exitstatus: ta.Optional[
|
84
|
+
self._exitstatus: ta.Optional[Rc] = None # status attached to dead process by finish()
|
84
85
|
self._spawn_err: ta.Optional[str] = None # error message attached by spawn() if any
|
85
86
|
|
86
87
|
#
|
87
88
|
|
88
89
|
def __repr__(self) -> str:
|
89
|
-
return f'<Subprocess at {id(self)} with name {self._config.name} in state {self.
|
90
|
+
return f'<Subprocess at {id(self)} with name {self._config.name} in state {self._state.name}>'
|
90
91
|
|
91
92
|
#
|
92
93
|
|
@@ -103,15 +104,11 @@ class ProcessImpl(Process):
|
|
103
104
|
return self._group
|
104
105
|
|
105
106
|
@property
|
106
|
-
def pid(self) ->
|
107
|
+
def pid(self) -> Pid:
|
107
108
|
return self._pid
|
108
109
|
|
109
110
|
#
|
110
111
|
|
111
|
-
@property
|
112
|
-
def context(self) -> ServerContext:
|
113
|
-
return self._context
|
114
|
-
|
115
112
|
@property
|
116
113
|
def state(self) -> ProcessState:
|
117
114
|
return self._state
|
@@ -122,11 +119,9 @@ class ProcessImpl(Process):
|
|
122
119
|
|
123
120
|
#
|
124
121
|
|
125
|
-
def spawn(self) -> ta.Optional[
|
126
|
-
process_name = as_string(self._config.name)
|
127
|
-
|
122
|
+
def spawn(self) -> ta.Optional[Pid]:
|
128
123
|
if self.pid:
|
129
|
-
log.warning('process \'%s\' already running',
|
124
|
+
log.warning('process \'%s\' already running', self.name)
|
130
125
|
return None
|
131
126
|
|
132
127
|
self.check_in_state(
|
@@ -176,7 +171,7 @@ class ProcessImpl(Process):
|
|
176
171
|
if stdin_fd is None:
|
177
172
|
raise OSError(errno.EPIPE, 'Process has no stdin channel')
|
178
173
|
|
179
|
-
dispatcher = check_isinstance(self._dispatchers[stdin_fd],
|
174
|
+
dispatcher = check_isinstance(self._dispatchers[stdin_fd], ProcessInputDispatcher)
|
180
175
|
if dispatcher.closed:
|
181
176
|
raise OSError(errno.EPIPE, "Process' stdin channel is closed")
|
182
177
|
|
@@ -249,7 +244,7 @@ class ProcessImpl(Process):
|
|
249
244
|
self._check_and_adjust_for_system_clock_rollback(now)
|
250
245
|
|
251
246
|
if now > (self._last_stop_report + 2): # every 2 seconds
|
252
|
-
log.info('waiting for %s to stop',
|
247
|
+
log.info('waiting for %s to stop', self.name)
|
253
248
|
self._last_stop_report = now
|
254
249
|
|
255
250
|
def give_up(self) -> None:
|
@@ -269,18 +264,17 @@ class ProcessImpl(Process):
|
|
269
264
|
"""
|
270
265
|
now = time.time()
|
271
266
|
|
272
|
-
process_name = as_string(self._config.name)
|
273
267
|
# If the process is in BACKOFF and we want to stop or kill it, then BACKOFF -> STOPPED. This is needed because
|
274
268
|
# if startretries is a large number and the process isn't starting successfully, the stop request would be
|
275
269
|
# blocked for a long time waiting for the retries.
|
276
270
|
if self._state == ProcessState.BACKOFF:
|
277
|
-
log.debug('Attempted to kill %s, which is in BACKOFF state.',
|
271
|
+
log.debug('Attempted to kill %s, which is in BACKOFF state.', self.name)
|
278
272
|
self.change_state(ProcessState.STOPPED)
|
279
273
|
return None
|
280
274
|
|
281
275
|
args: tuple
|
282
276
|
if not self.pid:
|
283
|
-
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (
|
277
|
+
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (self.name, sig_name(sig))
|
284
278
|
log.debug(fmt, *args)
|
285
279
|
return fmt % args
|
286
280
|
|
@@ -294,7 +288,7 @@ class ProcessImpl(Process):
|
|
294
288
|
if killasgroup:
|
295
289
|
as_group = 'process group '
|
296
290
|
|
297
|
-
log.debug('killing %s (pid %s) %s with signal %s',
|
291
|
+
log.debug('killing %s (pid %s) %s with signal %s', self.name, self.pid, as_group, sig_name(sig))
|
298
292
|
|
299
293
|
# RUNNING/STARTING/STOPPING -> STOPPING
|
300
294
|
self._killing = True
|
@@ -303,24 +297,24 @@ class ProcessImpl(Process):
|
|
303
297
|
self.check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
|
304
298
|
self.change_state(ProcessState.STOPPING)
|
305
299
|
|
306
|
-
|
300
|
+
kpid = int(self.pid)
|
307
301
|
if killasgroup:
|
308
302
|
# send to the whole process group instead
|
309
|
-
|
303
|
+
kpid = -kpid
|
310
304
|
|
311
305
|
try:
|
312
306
|
try:
|
313
|
-
os.kill(
|
307
|
+
os.kill(kpid, sig)
|
314
308
|
except OSError as exc:
|
315
309
|
if exc.errno == errno.ESRCH:
|
316
|
-
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s',
|
310
|
+
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s', self.name, self.pid, str(exc)) # noqa
|
317
311
|
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
318
312
|
# processing.
|
319
313
|
return None
|
320
314
|
raise
|
321
315
|
except Exception: # noqa
|
322
316
|
tb = traceback.format_exc()
|
323
|
-
fmt, args = 'unknown problem killing %s (%s):%s', (
|
317
|
+
fmt, args = 'unknown problem killing %s (%s):%s', (self.name, self.pid, tb)
|
324
318
|
log.critical(fmt, *args)
|
325
319
|
self.change_state(ProcessState.UNKNOWN)
|
326
320
|
self._killing = False
|
@@ -336,14 +330,13 @@ class ProcessImpl(Process):
|
|
336
330
|
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
337
331
|
running.
|
338
332
|
"""
|
339
|
-
process_name = as_string(self._config.name)
|
340
333
|
args: tuple
|
341
334
|
if not self.pid:
|
342
|
-
fmt, args = "
|
335
|
+
fmt, args = "Attempted to send %s sig %s but it wasn't running", (self.name, sig_name(sig))
|
343
336
|
log.debug(fmt, *args)
|
344
337
|
return fmt % args
|
345
338
|
|
346
|
-
log.debug('sending %s (pid %s) sig %s',
|
339
|
+
log.debug('sending %s (pid %s) sig %s', self.name, self.pid, sig_name(sig))
|
347
340
|
|
348
341
|
self.check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
|
349
342
|
|
@@ -354,7 +347,7 @@ class ProcessImpl(Process):
|
|
354
347
|
if exc.errno == errno.ESRCH:
|
355
348
|
log.debug(
|
356
349
|
'unable to signal %s (pid %s), it probably just now exited on its own: %s',
|
357
|
-
|
350
|
+
self.name,
|
358
351
|
self.pid,
|
359
352
|
str(exc),
|
360
353
|
)
|
@@ -364,14 +357,14 @@ class ProcessImpl(Process):
|
|
364
357
|
raise
|
365
358
|
except Exception: # noqa
|
366
359
|
tb = traceback.format_exc()
|
367
|
-
fmt, args = 'unknown problem sending sig %s (%s):%s', (
|
360
|
+
fmt, args = 'unknown problem sending sig %s (%s):%s', (self.name, self.pid, tb)
|
368
361
|
log.critical(fmt, *args)
|
369
362
|
self.change_state(ProcessState.UNKNOWN)
|
370
363
|
return fmt % args
|
371
364
|
|
372
365
|
return None
|
373
366
|
|
374
|
-
def finish(self, sts:
|
367
|
+
def finish(self, sts: Rc) -> None:
|
375
368
|
"""The process was reaped and we need to report and manage its state."""
|
376
369
|
|
377
370
|
self._dispatchers.drain()
|
@@ -383,7 +376,6 @@ class ProcessImpl(Process):
|
|
383
376
|
self._check_and_adjust_for_system_clock_rollback(now)
|
384
377
|
|
385
378
|
self._last_stop = now
|
386
|
-
process_name = as_string(self._config.name)
|
387
379
|
|
388
380
|
if now > self._last_start:
|
389
381
|
too_quickly = now - self._last_start < self._config.startsecs
|
@@ -392,7 +384,7 @@ class ProcessImpl(Process):
|
|
392
384
|
log.warning(
|
393
385
|
"process '%s' (%s) last_start time is in the future, don't know how long process was running so "
|
394
386
|
"assuming it did not exit too quickly",
|
395
|
-
|
387
|
+
self.name,
|
396
388
|
self.pid,
|
397
389
|
)
|
398
390
|
|
@@ -402,9 +394,9 @@ class ProcessImpl(Process):
|
|
402
394
|
# likely the result of a stop request implies STOPPING -> STOPPED
|
403
395
|
self._killing = False
|
404
396
|
self._delay = 0
|
405
|
-
self._exitstatus = es
|
397
|
+
self._exitstatus = Rc(es)
|
406
398
|
|
407
|
-
fmt, args = 'stopped: %s (%s)', (
|
399
|
+
fmt, args = 'stopped: %s (%s)', (self.name, msg)
|
408
400
|
self.check_in_state(ProcessState.STOPPING)
|
409
401
|
self.change_state(ProcessState.STOPPED)
|
410
402
|
if exit_expected:
|
@@ -418,7 +410,7 @@ class ProcessImpl(Process):
|
|
418
410
|
self._spawn_err = 'Exited too quickly (process log may have details)'
|
419
411
|
self.check_in_state(ProcessState.STARTING)
|
420
412
|
self.change_state(ProcessState.BACKOFF)
|
421
|
-
log.warning('exited: %s (%s)',
|
413
|
+
log.warning('exited: %s (%s)', self.name, msg + '; not expected')
|
422
414
|
|
423
415
|
else:
|
424
416
|
# this finish was not the result of a stop request, the program was in the RUNNING state but exited implies
|
@@ -437,21 +429,18 @@ class ProcessImpl(Process):
|
|
437
429
|
if exit_expected:
|
438
430
|
# expected exit code
|
439
431
|
self.change_state(ProcessState.EXITED, expected=True)
|
440
|
-
log.info('exited: %s (%s)',
|
432
|
+
log.info('exited: %s (%s)', self.name, msg + '; expected')
|
441
433
|
else:
|
442
434
|
# unexpected exit code
|
443
435
|
self._spawn_err = f'Bad exit code {es}'
|
444
436
|
self.change_state(ProcessState.EXITED, expected=False)
|
445
|
-
log.warning('exited: %s (%s)',
|
437
|
+
log.warning('exited: %s (%s)', self.name, msg + '; not expected')
|
446
438
|
|
447
|
-
self._pid = 0
|
439
|
+
self._pid = Pid(0)
|
448
440
|
close_parent_pipes(self._pipes)
|
449
441
|
self._pipes = ProcessPipes()
|
450
442
|
self._dispatchers = Dispatchers([])
|
451
443
|
|
452
|
-
def get_state(self) -> ProcessState:
|
453
|
-
return self._state
|
454
|
-
|
455
444
|
def transition(self) -> None:
|
456
445
|
now = time.time()
|
457
446
|
state = self._state
|
@@ -460,7 +449,7 @@ class ProcessImpl(Process):
|
|
460
449
|
|
461
450
|
logger = log
|
462
451
|
|
463
|
-
if self.
|
452
|
+
if self._supervisor_states.state > SupervisorState.RESTARTING:
|
464
453
|
# dont start any processes if supervisor is shutting down
|
465
454
|
if state == ProcessState.EXITED:
|
466
455
|
if self._config.autorestart:
|
@@ -482,7 +471,6 @@ class ProcessImpl(Process):
|
|
482
471
|
# BACKOFF -> STARTING
|
483
472
|
self.spawn()
|
484
473
|
|
485
|
-
process_name = as_string(self._config.name)
|
486
474
|
if state == ProcessState.STARTING:
|
487
475
|
if now - self._last_start > self._config.startsecs:
|
488
476
|
# STARTING -> RUNNING if the proc has started successfully and it has stayed up for at least
|
@@ -492,21 +480,21 @@ class ProcessImpl(Process):
|
|
492
480
|
self.check_in_state(ProcessState.STARTING)
|
493
481
|
self.change_state(ProcessState.RUNNING)
|
494
482
|
msg = ('entered RUNNING state, process has stayed up for > than %s seconds (startsecs)' % self._config.startsecs) # noqa
|
495
|
-
logger.info('success: %s %s',
|
483
|
+
logger.info('success: %s %s', self.name, msg)
|
496
484
|
|
497
485
|
if state == ProcessState.BACKOFF:
|
498
486
|
if self._backoff > self._config.startretries:
|
499
487
|
# BACKOFF -> FATAL if the proc has exceeded its number of retries
|
500
488
|
self.give_up()
|
501
489
|
msg = ('entered FATAL state, too many start retries too quickly')
|
502
|
-
logger.info('gave up: %s %s',
|
490
|
+
logger.info('gave up: %s %s', self.name, msg)
|
503
491
|
|
504
492
|
elif state == ProcessState.STOPPING:
|
505
493
|
time_left = self._delay - now
|
506
494
|
if time_left <= 0:
|
507
495
|
# kill processes which are taking too long to stop with a final sigkill. if this doesn't kill it, the
|
508
496
|
# process will be stuck in the STOPPING state forever.
|
509
|
-
log.warning('killing \'%s\' (%s) with SIGKILL',
|
497
|
+
log.warning('killing \'%s\' (%s) with SIGKILL', self.name, self.pid)
|
510
498
|
self.kill(signal.SIGKILL)
|
511
499
|
|
512
500
|
def after_setuid(self) -> None:
|
ominfra/supervisor/setup.py
CHANGED
ominfra/supervisor/setupimpl.py
CHANGED
@@ -14,8 +14,9 @@ from .setup import DaemonizeListeners
|
|
14
14
|
from .setup import SupervisorSetup
|
15
15
|
from .setup import SupervisorUser
|
16
16
|
from .types import ServerEpoch
|
17
|
-
from .utils import
|
18
|
-
from .utils import
|
17
|
+
from .utils.fs import try_unlink
|
18
|
+
from .utils.os import real_exit
|
19
|
+
from .utils.ostypes import Rc
|
19
20
|
|
20
21
|
|
21
22
|
##
|
@@ -238,7 +239,7 @@ class SupervisorSetupImpl(SupervisorSetup):
|
|
238
239
|
if pid != 0:
|
239
240
|
# Parent
|
240
241
|
log.debug('supervisord forked; parent exiting')
|
241
|
-
real_exit(0)
|
242
|
+
real_exit(Rc(0))
|
242
243
|
|
243
244
|
# Child
|
244
245
|
log.info('daemonizing the supervisord process')
|
ominfra/supervisor/signals.py
CHANGED
@@ -1,60 +1,66 @@
|
|
1
1
|
# ruff: noqa: UP006 UP007
|
2
2
|
import signal
|
3
|
-
import typing as ta
|
4
3
|
|
4
|
+
from omlish.lite.logs import log
|
5
5
|
|
6
|
-
|
6
|
+
from .groups import ProcessGroupManager
|
7
|
+
from .states import SupervisorState
|
8
|
+
from .types import ProcessOutputDispatcher
|
9
|
+
from .types import SupervisorStateManager
|
10
|
+
from .utils.signals import SignalReceiver
|
11
|
+
from .utils.signals import sig_name
|
7
12
|
|
8
13
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
except (ValueError, TypeError):
|
18
|
-
name = value.strip().upper() # type: ignore
|
19
|
-
if not name.startswith('SIG'):
|
20
|
-
name = f'SIG{name}'
|
21
|
-
|
22
|
-
if (sn := _SIGS_BY_NAME.get(name)) is None:
|
23
|
-
raise ValueError(f'value {value!r} is not a valid signal name') # noqa
|
24
|
-
num = sn
|
25
|
-
|
26
|
-
if num not in _SIGS_BY_NUM:
|
27
|
-
raise ValueError(f'value {value!r} is not a valid signal number')
|
28
|
-
|
29
|
-
return num
|
30
|
-
|
31
|
-
|
32
|
-
def sig_name(num: int) -> str:
|
33
|
-
if (sig := _SIGS_BY_NUM.get(num)) is not None:
|
34
|
-
return sig.name
|
35
|
-
return f'signal {sig}'
|
36
|
-
|
37
|
-
|
38
|
-
##
|
39
|
-
|
40
|
-
|
41
|
-
class SignalReceiver:
|
42
|
-
def __init__(self) -> None:
|
14
|
+
class SignalHandler:
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
*,
|
18
|
+
states: SupervisorStateManager,
|
19
|
+
signal_receiver: SignalReceiver,
|
20
|
+
process_groups: ProcessGroupManager,
|
21
|
+
) -> None:
|
43
22
|
super().__init__()
|
44
23
|
|
45
|
-
self.
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
signal.
|
24
|
+
self._states = states
|
25
|
+
self._signal_receiver = signal_receiver
|
26
|
+
self._process_groups = process_groups
|
27
|
+
|
28
|
+
def set_signals(self) -> None:
|
29
|
+
self._signal_receiver.install(
|
30
|
+
signal.SIGTERM,
|
31
|
+
signal.SIGINT,
|
32
|
+
signal.SIGQUIT,
|
33
|
+
signal.SIGHUP,
|
34
|
+
signal.SIGCHLD,
|
35
|
+
signal.SIGUSR2,
|
36
|
+
)
|
37
|
+
|
38
|
+
def handle_signals(self) -> None:
|
39
|
+
sig = self._signal_receiver.get_signal()
|
40
|
+
if not sig:
|
41
|
+
return
|
42
|
+
|
43
|
+
if sig in (signal.SIGTERM, signal.SIGINT, signal.SIGQUIT):
|
44
|
+
log.warning('received %s indicating exit request', sig_name(sig))
|
45
|
+
self._states.set_state(SupervisorState.SHUTDOWN)
|
46
|
+
|
47
|
+
elif sig == signal.SIGHUP:
|
48
|
+
if self._states.state == SupervisorState.SHUTDOWN:
|
49
|
+
log.warning('ignored %s indicating restart request (shutdown in progress)', sig_name(sig)) # noqa
|
50
|
+
else:
|
51
|
+
log.warning('received %s indicating restart request', sig_name(sig)) # noqa
|
52
|
+
self._states.set_state(SupervisorState.RESTARTING)
|
53
|
+
|
54
|
+
elif sig == signal.SIGCHLD:
|
55
|
+
log.debug('received %s indicating a child quit', sig_name(sig))
|
56
|
+
|
57
|
+
elif sig == signal.SIGUSR2:
|
58
|
+
log.info('received %s indicating log reopen request', sig_name(sig))
|
59
|
+
|
60
|
+
for p in self._process_groups.all_processes():
|
61
|
+
for d in p.get_dispatchers():
|
62
|
+
if isinstance(d, ProcessOutputDispatcher):
|
63
|
+
d.reopen_logs()
|
54
64
|
|
55
|
-
def get_signal(self) -> ta.Optional[int]:
|
56
|
-
if self._signals_recvd:
|
57
|
-
sig = self._signals_recvd.pop(0)
|
58
65
|
else:
|
59
|
-
|
60
|
-
return sig
|
66
|
+
log.debug('received %s indicating nothing', sig_name(sig))
|