ominfra 0.0.0.dev127__py3-none-any.whl → 0.0.0.dev129__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ominfra/deploy/_executor.py +24 -0
- ominfra/pyremote/_runcommands.py +24 -0
- ominfra/scripts/journald2aws.py +24 -0
- ominfra/scripts/supervisor.py +1320 -1225
- ominfra/supervisor/configs.py +34 -11
- ominfra/supervisor/dispatchers.py +7 -6
- ominfra/supervisor/dispatchersimpl.py +29 -22
- ominfra/supervisor/groups.py +1 -1
- ominfra/supervisor/groupsimpl.py +2 -2
- ominfra/supervisor/inject.py +22 -17
- ominfra/supervisor/io.py +82 -0
- ominfra/supervisor/main.py +6 -7
- ominfra/supervisor/pipes.py +15 -13
- ominfra/supervisor/poller.py +36 -35
- ominfra/supervisor/{processes.py → process.py} +2 -1
- ominfra/supervisor/{processesimpl.py → processimpl.py} +42 -54
- ominfra/supervisor/setup.py +1 -1
- ominfra/supervisor/setupimpl.py +4 -3
- ominfra/supervisor/signals.py +56 -50
- ominfra/supervisor/spawning.py +2 -1
- ominfra/supervisor/spawningimpl.py +24 -21
- ominfra/supervisor/supervisor.py +72 -134
- ominfra/supervisor/types.py +45 -34
- ominfra/supervisor/utils/__init__.py +0 -0
- ominfra/supervisor/utils/diag.py +31 -0
- ominfra/supervisor/utils/fds.py +46 -0
- ominfra/supervisor/utils/fs.py +47 -0
- ominfra/supervisor/utils/os.py +45 -0
- ominfra/supervisor/utils/ostypes.py +9 -0
- ominfra/supervisor/utils/signals.py +60 -0
- ominfra/supervisor/utils/strings.py +105 -0
- ominfra/supervisor/{users.py → utils/users.py} +11 -8
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/METADATA +3 -3
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/RECORD +39 -33
- ominfra/supervisor/context.py +0 -84
- ominfra/supervisor/datatypes.py +0 -113
- ominfra/supervisor/utils.py +0 -206
- /ominfra/supervisor/{collections.py → utils/collections.py} +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/LICENSE +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/WHEEL +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/entry_points.txt +0 -0
- {ominfra-0.0.0.dev127.dist-info → ominfra-0.0.0.dev129.dist-info}/top_level.txt +0 -0
ominfra/supervisor/poller.py
CHANGED
@@ -8,6 +8,7 @@ import typing as ta
|
|
8
8
|
from omlish.lite.logs import log
|
9
9
|
|
10
10
|
from .setup import DaemonizeListener
|
11
|
+
from .utils.ostypes import Fd
|
11
12
|
|
12
13
|
|
13
14
|
class Poller(DaemonizeListener, abc.ABC):
|
@@ -15,23 +16,23 @@ class Poller(DaemonizeListener, abc.ABC):
|
|
15
16
|
super().__init__()
|
16
17
|
|
17
18
|
@abc.abstractmethod
|
18
|
-
def register_readable(self, fd:
|
19
|
+
def register_readable(self, fd: Fd) -> None:
|
19
20
|
raise NotImplementedError
|
20
21
|
|
21
22
|
@abc.abstractmethod
|
22
|
-
def register_writable(self, fd:
|
23
|
+
def register_writable(self, fd: Fd) -> None:
|
23
24
|
raise NotImplementedError
|
24
25
|
|
25
26
|
@abc.abstractmethod
|
26
|
-
def unregister_readable(self, fd:
|
27
|
+
def unregister_readable(self, fd: Fd) -> None:
|
27
28
|
raise NotImplementedError
|
28
29
|
|
29
30
|
@abc.abstractmethod
|
30
|
-
def unregister_writable(self, fd:
|
31
|
+
def unregister_writable(self, fd: Fd) -> None:
|
31
32
|
raise NotImplementedError
|
32
33
|
|
33
34
|
@abc.abstractmethod
|
34
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
35
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
35
36
|
raise NotImplementedError
|
36
37
|
|
37
38
|
def before_daemonize(self) -> None: # noqa
|
@@ -48,37 +49,37 @@ class SelectPoller(Poller):
|
|
48
49
|
def __init__(self) -> None:
|
49
50
|
super().__init__()
|
50
51
|
|
51
|
-
self._readable: ta.Set[
|
52
|
-
self._writable: ta.Set[
|
52
|
+
self._readable: ta.Set[Fd] = set()
|
53
|
+
self._writable: ta.Set[Fd] = set()
|
53
54
|
|
54
|
-
def register_readable(self, fd:
|
55
|
+
def register_readable(self, fd: Fd) -> None:
|
55
56
|
self._readable.add(fd)
|
56
57
|
|
57
|
-
def register_writable(self, fd:
|
58
|
+
def register_writable(self, fd: Fd) -> None:
|
58
59
|
self._writable.add(fd)
|
59
60
|
|
60
|
-
def unregister_readable(self, fd:
|
61
|
+
def unregister_readable(self, fd: Fd) -> None:
|
61
62
|
self._readable.discard(fd)
|
62
63
|
|
63
|
-
def unregister_writable(self, fd:
|
64
|
+
def unregister_writable(self, fd: Fd) -> None:
|
64
65
|
self._writable.discard(fd)
|
65
66
|
|
66
67
|
def unregister_all(self) -> None:
|
67
68
|
self._readable.clear()
|
68
69
|
self._writable.clear()
|
69
70
|
|
70
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
71
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
71
72
|
try:
|
72
73
|
r, w, x = select.select(
|
73
74
|
self._readable,
|
74
75
|
self._writable,
|
75
76
|
[], timeout,
|
76
77
|
)
|
77
|
-
except OSError as
|
78
|
-
if
|
78
|
+
except OSError as exc:
|
79
|
+
if exc.args[0] == errno.EINTR:
|
79
80
|
log.debug('EINTR encountered in poll')
|
80
81
|
return [], []
|
81
|
-
if
|
82
|
+
if exc.args[0] == errno.EBADF:
|
82
83
|
log.debug('EBADF encountered in poll')
|
83
84
|
self.unregister_all()
|
84
85
|
return [], []
|
@@ -94,30 +95,30 @@ class PollPoller(Poller):
|
|
94
95
|
super().__init__()
|
95
96
|
|
96
97
|
self._poller = select.poll()
|
97
|
-
self._readable: set[
|
98
|
-
self._writable: set[
|
98
|
+
self._readable: set[Fd] = set()
|
99
|
+
self._writable: set[Fd] = set()
|
99
100
|
|
100
|
-
def register_readable(self, fd:
|
101
|
+
def register_readable(self, fd: Fd) -> None:
|
101
102
|
self._poller.register(fd, self._READ)
|
102
103
|
self._readable.add(fd)
|
103
104
|
|
104
|
-
def register_writable(self, fd:
|
105
|
+
def register_writable(self, fd: Fd) -> None:
|
105
106
|
self._poller.register(fd, self._WRITE)
|
106
107
|
self._writable.add(fd)
|
107
108
|
|
108
|
-
def unregister_readable(self, fd:
|
109
|
+
def unregister_readable(self, fd: Fd) -> None:
|
109
110
|
self._readable.discard(fd)
|
110
111
|
self._poller.unregister(fd)
|
111
112
|
if fd in self._writable:
|
112
113
|
self._poller.register(fd, self._WRITE)
|
113
114
|
|
114
|
-
def unregister_writable(self, fd:
|
115
|
+
def unregister_writable(self, fd: Fd) -> None:
|
115
116
|
self._writable.discard(fd)
|
116
117
|
self._poller.unregister(fd)
|
117
118
|
if fd in self._readable:
|
118
119
|
self._poller.register(fd, self._READ)
|
119
120
|
|
120
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
121
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
121
122
|
fds = self._poll_fds(timeout) # type: ignore
|
122
123
|
readable, writable = [], []
|
123
124
|
for fd, eventmask in fds:
|
@@ -129,16 +130,16 @@ class PollPoller(Poller):
|
|
129
130
|
writable.append(fd)
|
130
131
|
return readable, writable
|
131
132
|
|
132
|
-
def _poll_fds(self, timeout: float) -> ta.List[ta.Tuple[
|
133
|
+
def _poll_fds(self, timeout: float) -> ta.List[ta.Tuple[Fd, Fd]]:
|
133
134
|
try:
|
134
|
-
return self._poller.poll(timeout * 1000)
|
135
|
-
except OSError as
|
136
|
-
if
|
135
|
+
return self._poller.poll(timeout * 1000) # type: ignore
|
136
|
+
except OSError as exc:
|
137
|
+
if exc.args[0] == errno.EINTR:
|
137
138
|
log.debug('EINTR encountered in poll')
|
138
139
|
return []
|
139
140
|
raise
|
140
141
|
|
141
|
-
def _ignore_invalid(self, fd:
|
142
|
+
def _ignore_invalid(self, fd: Fd, eventmask: int) -> bool:
|
142
143
|
if eventmask & select.POLLNVAL:
|
143
144
|
# POLLNVAL means `fd` value is invalid, not open. When a process quits it's `fd`s are closed so there is no
|
144
145
|
# more reason to keep this `fd` registered If the process restarts it's `fd`s are registered again.
|
@@ -157,30 +158,30 @@ if sys.platform == 'darwin' or sys.platform.startswith('freebsd'):
|
|
157
158
|
super().__init__()
|
158
159
|
|
159
160
|
self._kqueue: ta.Optional[ta.Any] = select.kqueue()
|
160
|
-
self._readable: set[
|
161
|
-
self._writable: set[
|
161
|
+
self._readable: set[Fd] = set()
|
162
|
+
self._writable: set[Fd] = set()
|
162
163
|
|
163
|
-
def register_readable(self, fd:
|
164
|
+
def register_readable(self, fd: Fd) -> None:
|
164
165
|
self._readable.add(fd)
|
165
166
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_READ, flags=select.KQ_EV_ADD)
|
166
167
|
self._kqueue_control(fd, kevent)
|
167
168
|
|
168
|
-
def register_writable(self, fd:
|
169
|
+
def register_writable(self, fd: Fd) -> None:
|
169
170
|
self._writable.add(fd)
|
170
171
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_WRITE, flags=select.KQ_EV_ADD)
|
171
172
|
self._kqueue_control(fd, kevent)
|
172
173
|
|
173
|
-
def unregister_readable(self, fd:
|
174
|
+
def unregister_readable(self, fd: Fd) -> None:
|
174
175
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_READ, flags=select.KQ_EV_DELETE)
|
175
176
|
self._readable.discard(fd)
|
176
177
|
self._kqueue_control(fd, kevent)
|
177
178
|
|
178
|
-
def unregister_writable(self, fd:
|
179
|
+
def unregister_writable(self, fd: Fd) -> None:
|
179
180
|
kevent = select.kevent(fd, filter=select.KQ_FILTER_WRITE, flags=select.KQ_EV_DELETE)
|
180
181
|
self._writable.discard(fd)
|
181
182
|
self._kqueue_control(fd, kevent)
|
182
183
|
|
183
|
-
def _kqueue_control(self, fd:
|
184
|
+
def _kqueue_control(self, fd: Fd, kevent: 'select.kevent') -> None:
|
184
185
|
try:
|
185
186
|
self._kqueue.control([kevent], 0) # type: ignore
|
186
187
|
except OSError as error:
|
@@ -189,7 +190,7 @@ if sys.platform == 'darwin' or sys.platform.startswith('freebsd'):
|
|
189
190
|
else:
|
190
191
|
raise
|
191
192
|
|
192
|
-
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[
|
193
|
+
def poll(self, timeout: ta.Optional[float]) -> ta.Tuple[ta.List[Fd], ta.List[Fd]]:
|
193
194
|
readable, writable = [], [] # type: ignore
|
194
195
|
|
195
196
|
try:
|
@@ -2,6 +2,7 @@
|
|
2
2
|
import typing as ta
|
3
3
|
|
4
4
|
from .types import Process
|
5
|
+
from .utils.ostypes import Pid
|
5
6
|
|
6
7
|
|
7
8
|
##
|
@@ -14,5 +15,5 @@ class ProcessStateError(RuntimeError):
|
|
14
15
|
##
|
15
16
|
|
16
17
|
|
17
|
-
class PidHistory(ta.Dict[
|
18
|
+
class PidHistory(ta.Dict[Pid, Process]):
|
18
19
|
pass
|
@@ -11,24 +11,25 @@ from omlish.lite.logs import log
|
|
11
11
|
from omlish.lite.typing import Func1
|
12
12
|
|
13
13
|
from .configs import ProcessConfig
|
14
|
-
from .
|
14
|
+
from .configs import RestartUnconditionally
|
15
15
|
from .dispatchers import Dispatchers
|
16
16
|
from .events import PROCESS_STATE_EVENT_MAP
|
17
17
|
from .events import EventCallbacks
|
18
18
|
from .pipes import ProcessPipes
|
19
19
|
from .pipes import close_parent_pipes
|
20
|
-
from .
|
21
|
-
from .signals import sig_name
|
20
|
+
from .process import ProcessStateError
|
22
21
|
from .spawning import ProcessSpawnError
|
23
22
|
from .spawning import ProcessSpawning
|
24
23
|
from .states import ProcessState
|
25
24
|
from .states import SupervisorState
|
26
|
-
from .types import InputDispatcher
|
27
25
|
from .types import Process
|
28
26
|
from .types import ProcessGroup
|
29
|
-
from .types import
|
30
|
-
from .
|
31
|
-
from .utils import decode_wait_status
|
27
|
+
from .types import ProcessInputDispatcher
|
28
|
+
from .types import SupervisorStateManager
|
29
|
+
from .utils.os import decode_wait_status
|
30
|
+
from .utils.ostypes import Pid
|
31
|
+
from .utils.ostypes import Rc
|
32
|
+
from .utils.signals import sig_name
|
32
33
|
|
33
34
|
|
34
35
|
class ProcessSpawningFactory(Func1[Process, ProcessSpawning]):
|
@@ -46,7 +47,7 @@ class ProcessImpl(Process):
|
|
46
47
|
config: ProcessConfig,
|
47
48
|
group: ProcessGroup,
|
48
49
|
*,
|
49
|
-
|
50
|
+
supervisor_states: SupervisorStateManager,
|
50
51
|
event_callbacks: EventCallbacks,
|
51
52
|
process_spawning_factory: ProcessSpawningFactory,
|
52
53
|
) -> None:
|
@@ -55,7 +56,7 @@ class ProcessImpl(Process):
|
|
55
56
|
self._config = config
|
56
57
|
self._group = group
|
57
58
|
|
58
|
-
self.
|
59
|
+
self._supervisor_states = supervisor_states
|
59
60
|
self._event_callbacks = event_callbacks
|
60
61
|
|
61
62
|
self._spawning = process_spawning_factory(self)
|
@@ -66,7 +67,7 @@ class ProcessImpl(Process):
|
|
66
67
|
self._pipes = ProcessPipes()
|
67
68
|
|
68
69
|
self._state = ProcessState.STOPPED
|
69
|
-
self._pid = 0 # 0 when not running
|
70
|
+
self._pid = Pid(0) # 0 when not running
|
70
71
|
|
71
72
|
self._last_start = 0. # Last time the subprocess was started; 0 if never
|
72
73
|
self._last_stop = 0. # Last time the subprocess was stopped; 0 if never
|
@@ -80,13 +81,13 @@ class ProcessImpl(Process):
|
|
80
81
|
|
81
82
|
self._backoff = 0 # backoff counter (to startretries)
|
82
83
|
|
83
|
-
self._exitstatus: ta.Optional[
|
84
|
+
self._exitstatus: ta.Optional[Rc] = None # status attached to dead process by finish()
|
84
85
|
self._spawn_err: ta.Optional[str] = None # error message attached by spawn() if any
|
85
86
|
|
86
87
|
#
|
87
88
|
|
88
89
|
def __repr__(self) -> str:
|
89
|
-
return f'<Subprocess at {id(self)} with name {self._config.name} in state {self.
|
90
|
+
return f'<Subprocess at {id(self)} with name {self._config.name} in state {self._state.name}>'
|
90
91
|
|
91
92
|
#
|
92
93
|
|
@@ -103,15 +104,11 @@ class ProcessImpl(Process):
|
|
103
104
|
return self._group
|
104
105
|
|
105
106
|
@property
|
106
|
-
def pid(self) ->
|
107
|
+
def pid(self) -> Pid:
|
107
108
|
return self._pid
|
108
109
|
|
109
110
|
#
|
110
111
|
|
111
|
-
@property
|
112
|
-
def context(self) -> ServerContext:
|
113
|
-
return self._context
|
114
|
-
|
115
112
|
@property
|
116
113
|
def state(self) -> ProcessState:
|
117
114
|
return self._state
|
@@ -122,11 +119,9 @@ class ProcessImpl(Process):
|
|
122
119
|
|
123
120
|
#
|
124
121
|
|
125
|
-
def spawn(self) -> ta.Optional[
|
126
|
-
process_name = as_string(self._config.name)
|
127
|
-
|
122
|
+
def spawn(self) -> ta.Optional[Pid]:
|
128
123
|
if self.pid:
|
129
|
-
log.warning('process \'%s\' already running',
|
124
|
+
log.warning('process \'%s\' already running', self.name)
|
130
125
|
return None
|
131
126
|
|
132
127
|
self.check_in_state(
|
@@ -176,7 +171,7 @@ class ProcessImpl(Process):
|
|
176
171
|
if stdin_fd is None:
|
177
172
|
raise OSError(errno.EPIPE, 'Process has no stdin channel')
|
178
173
|
|
179
|
-
dispatcher = check_isinstance(self._dispatchers[stdin_fd],
|
174
|
+
dispatcher = check_isinstance(self._dispatchers[stdin_fd], ProcessInputDispatcher)
|
180
175
|
if dispatcher.closed:
|
181
176
|
raise OSError(errno.EPIPE, "Process' stdin channel is closed")
|
182
177
|
|
@@ -249,7 +244,7 @@ class ProcessImpl(Process):
|
|
249
244
|
self._check_and_adjust_for_system_clock_rollback(now)
|
250
245
|
|
251
246
|
if now > (self._last_stop_report + 2): # every 2 seconds
|
252
|
-
log.info('waiting for %s to stop',
|
247
|
+
log.info('waiting for %s to stop', self.name)
|
253
248
|
self._last_stop_report = now
|
254
249
|
|
255
250
|
def give_up(self) -> None:
|
@@ -269,18 +264,17 @@ class ProcessImpl(Process):
|
|
269
264
|
"""
|
270
265
|
now = time.time()
|
271
266
|
|
272
|
-
process_name = as_string(self._config.name)
|
273
267
|
# If the process is in BACKOFF and we want to stop or kill it, then BACKOFF -> STOPPED. This is needed because
|
274
268
|
# if startretries is a large number and the process isn't starting successfully, the stop request would be
|
275
269
|
# blocked for a long time waiting for the retries.
|
276
270
|
if self._state == ProcessState.BACKOFF:
|
277
|
-
log.debug('Attempted to kill %s, which is in BACKOFF state.',
|
271
|
+
log.debug('Attempted to kill %s, which is in BACKOFF state.', self.name)
|
278
272
|
self.change_state(ProcessState.STOPPED)
|
279
273
|
return None
|
280
274
|
|
281
275
|
args: tuple
|
282
276
|
if not self.pid:
|
283
|
-
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (
|
277
|
+
fmt, args = "attempted to kill %s with sig %s but it wasn't running", (self.name, sig_name(sig))
|
284
278
|
log.debug(fmt, *args)
|
285
279
|
return fmt % args
|
286
280
|
|
@@ -294,7 +288,7 @@ class ProcessImpl(Process):
|
|
294
288
|
if killasgroup:
|
295
289
|
as_group = 'process group '
|
296
290
|
|
297
|
-
log.debug('killing %s (pid %s) %s with signal %s',
|
291
|
+
log.debug('killing %s (pid %s) %s with signal %s', self.name, self.pid, as_group, sig_name(sig))
|
298
292
|
|
299
293
|
# RUNNING/STARTING/STOPPING -> STOPPING
|
300
294
|
self._killing = True
|
@@ -303,24 +297,24 @@ class ProcessImpl(Process):
|
|
303
297
|
self.check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
|
304
298
|
self.change_state(ProcessState.STOPPING)
|
305
299
|
|
306
|
-
|
300
|
+
kpid = int(self.pid)
|
307
301
|
if killasgroup:
|
308
302
|
# send to the whole process group instead
|
309
|
-
|
303
|
+
kpid = -kpid
|
310
304
|
|
311
305
|
try:
|
312
306
|
try:
|
313
|
-
os.kill(
|
307
|
+
os.kill(kpid, sig)
|
314
308
|
except OSError as exc:
|
315
309
|
if exc.errno == errno.ESRCH:
|
316
|
-
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s',
|
310
|
+
log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s', self.name, self.pid, str(exc)) # noqa
|
317
311
|
# we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
|
318
312
|
# processing.
|
319
313
|
return None
|
320
314
|
raise
|
321
315
|
except Exception: # noqa
|
322
316
|
tb = traceback.format_exc()
|
323
|
-
fmt, args = 'unknown problem killing %s (%s):%s', (
|
317
|
+
fmt, args = 'unknown problem killing %s (%s):%s', (self.name, self.pid, tb)
|
324
318
|
log.critical(fmt, *args)
|
325
319
|
self.change_state(ProcessState.UNKNOWN)
|
326
320
|
self._killing = False
|
@@ -336,14 +330,13 @@ class ProcessImpl(Process):
|
|
336
330
|
Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
|
337
331
|
running.
|
338
332
|
"""
|
339
|
-
process_name = as_string(self._config.name)
|
340
333
|
args: tuple
|
341
334
|
if not self.pid:
|
342
|
-
fmt, args = "
|
335
|
+
fmt, args = "Attempted to send %s sig %s but it wasn't running", (self.name, sig_name(sig))
|
343
336
|
log.debug(fmt, *args)
|
344
337
|
return fmt % args
|
345
338
|
|
346
|
-
log.debug('sending %s (pid %s) sig %s',
|
339
|
+
log.debug('sending %s (pid %s) sig %s', self.name, self.pid, sig_name(sig))
|
347
340
|
|
348
341
|
self.check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
|
349
342
|
|
@@ -354,7 +347,7 @@ class ProcessImpl(Process):
|
|
354
347
|
if exc.errno == errno.ESRCH:
|
355
348
|
log.debug(
|
356
349
|
'unable to signal %s (pid %s), it probably just now exited on its own: %s',
|
357
|
-
|
350
|
+
self.name,
|
358
351
|
self.pid,
|
359
352
|
str(exc),
|
360
353
|
)
|
@@ -364,14 +357,14 @@ class ProcessImpl(Process):
|
|
364
357
|
raise
|
365
358
|
except Exception: # noqa
|
366
359
|
tb = traceback.format_exc()
|
367
|
-
fmt, args = 'unknown problem sending sig %s (%s):%s', (
|
360
|
+
fmt, args = 'unknown problem sending sig %s (%s):%s', (self.name, self.pid, tb)
|
368
361
|
log.critical(fmt, *args)
|
369
362
|
self.change_state(ProcessState.UNKNOWN)
|
370
363
|
return fmt % args
|
371
364
|
|
372
365
|
return None
|
373
366
|
|
374
|
-
def finish(self, sts:
|
367
|
+
def finish(self, sts: Rc) -> None:
|
375
368
|
"""The process was reaped and we need to report and manage its state."""
|
376
369
|
|
377
370
|
self._dispatchers.drain()
|
@@ -383,7 +376,6 @@ class ProcessImpl(Process):
|
|
383
376
|
self._check_and_adjust_for_system_clock_rollback(now)
|
384
377
|
|
385
378
|
self._last_stop = now
|
386
|
-
process_name = as_string(self._config.name)
|
387
379
|
|
388
380
|
if now > self._last_start:
|
389
381
|
too_quickly = now - self._last_start < self._config.startsecs
|
@@ -392,7 +384,7 @@ class ProcessImpl(Process):
|
|
392
384
|
log.warning(
|
393
385
|
"process '%s' (%s) last_start time is in the future, don't know how long process was running so "
|
394
386
|
"assuming it did not exit too quickly",
|
395
|
-
|
387
|
+
self.name,
|
396
388
|
self.pid,
|
397
389
|
)
|
398
390
|
|
@@ -402,9 +394,9 @@ class ProcessImpl(Process):
|
|
402
394
|
# likely the result of a stop request implies STOPPING -> STOPPED
|
403
395
|
self._killing = False
|
404
396
|
self._delay = 0
|
405
|
-
self._exitstatus = es
|
397
|
+
self._exitstatus = Rc(es)
|
406
398
|
|
407
|
-
fmt, args = 'stopped: %s (%s)', (
|
399
|
+
fmt, args = 'stopped: %s (%s)', (self.name, msg)
|
408
400
|
self.check_in_state(ProcessState.STOPPING)
|
409
401
|
self.change_state(ProcessState.STOPPED)
|
410
402
|
if exit_expected:
|
@@ -418,7 +410,7 @@ class ProcessImpl(Process):
|
|
418
410
|
self._spawn_err = 'Exited too quickly (process log may have details)'
|
419
411
|
self.check_in_state(ProcessState.STARTING)
|
420
412
|
self.change_state(ProcessState.BACKOFF)
|
421
|
-
log.warning('exited: %s (%s)',
|
413
|
+
log.warning('exited: %s (%s)', self.name, msg + '; not expected')
|
422
414
|
|
423
415
|
else:
|
424
416
|
# this finish was not the result of a stop request, the program was in the RUNNING state but exited implies
|
@@ -437,21 +429,18 @@ class ProcessImpl(Process):
|
|
437
429
|
if exit_expected:
|
438
430
|
# expected exit code
|
439
431
|
self.change_state(ProcessState.EXITED, expected=True)
|
440
|
-
log.info('exited: %s (%s)',
|
432
|
+
log.info('exited: %s (%s)', self.name, msg + '; expected')
|
441
433
|
else:
|
442
434
|
# unexpected exit code
|
443
435
|
self._spawn_err = f'Bad exit code {es}'
|
444
436
|
self.change_state(ProcessState.EXITED, expected=False)
|
445
|
-
log.warning('exited: %s (%s)',
|
437
|
+
log.warning('exited: %s (%s)', self.name, msg + '; not expected')
|
446
438
|
|
447
|
-
self._pid = 0
|
439
|
+
self._pid = Pid(0)
|
448
440
|
close_parent_pipes(self._pipes)
|
449
441
|
self._pipes = ProcessPipes()
|
450
442
|
self._dispatchers = Dispatchers([])
|
451
443
|
|
452
|
-
def get_state(self) -> ProcessState:
|
453
|
-
return self._state
|
454
|
-
|
455
444
|
def transition(self) -> None:
|
456
445
|
now = time.time()
|
457
446
|
state = self._state
|
@@ -460,7 +449,7 @@ class ProcessImpl(Process):
|
|
460
449
|
|
461
450
|
logger = log
|
462
451
|
|
463
|
-
if self.
|
452
|
+
if self._supervisor_states.state > SupervisorState.RESTARTING:
|
464
453
|
# dont start any processes if supervisor is shutting down
|
465
454
|
if state == ProcessState.EXITED:
|
466
455
|
if self._config.autorestart:
|
@@ -482,7 +471,6 @@ class ProcessImpl(Process):
|
|
482
471
|
# BACKOFF -> STARTING
|
483
472
|
self.spawn()
|
484
473
|
|
485
|
-
process_name = as_string(self._config.name)
|
486
474
|
if state == ProcessState.STARTING:
|
487
475
|
if now - self._last_start > self._config.startsecs:
|
488
476
|
# STARTING -> RUNNING if the proc has started successfully and it has stayed up for at least
|
@@ -492,21 +480,21 @@ class ProcessImpl(Process):
|
|
492
480
|
self.check_in_state(ProcessState.STARTING)
|
493
481
|
self.change_state(ProcessState.RUNNING)
|
494
482
|
msg = ('entered RUNNING state, process has stayed up for > than %s seconds (startsecs)' % self._config.startsecs) # noqa
|
495
|
-
logger.info('success: %s %s',
|
483
|
+
logger.info('success: %s %s', self.name, msg)
|
496
484
|
|
497
485
|
if state == ProcessState.BACKOFF:
|
498
486
|
if self._backoff > self._config.startretries:
|
499
487
|
# BACKOFF -> FATAL if the proc has exceeded its number of retries
|
500
488
|
self.give_up()
|
501
489
|
msg = ('entered FATAL state, too many start retries too quickly')
|
502
|
-
logger.info('gave up: %s %s',
|
490
|
+
logger.info('gave up: %s %s', self.name, msg)
|
503
491
|
|
504
492
|
elif state == ProcessState.STOPPING:
|
505
493
|
time_left = self._delay - now
|
506
494
|
if time_left <= 0:
|
507
495
|
# kill processes which are taking too long to stop with a final sigkill. if this doesn't kill it, the
|
508
496
|
# process will be stuck in the STOPPING state forever.
|
509
|
-
log.warning('killing \'%s\' (%s) with SIGKILL',
|
497
|
+
log.warning('killing \'%s\' (%s) with SIGKILL', self.name, self.pid)
|
510
498
|
self.kill(signal.SIGKILL)
|
511
499
|
|
512
500
|
def after_setuid(self) -> None:
|
ominfra/supervisor/setup.py
CHANGED
ominfra/supervisor/setupimpl.py
CHANGED
@@ -14,8 +14,9 @@ from .setup import DaemonizeListeners
|
|
14
14
|
from .setup import SupervisorSetup
|
15
15
|
from .setup import SupervisorUser
|
16
16
|
from .types import ServerEpoch
|
17
|
-
from .utils import
|
18
|
-
from .utils import
|
17
|
+
from .utils.fs import try_unlink
|
18
|
+
from .utils.os import real_exit
|
19
|
+
from .utils.ostypes import Rc
|
19
20
|
|
20
21
|
|
21
22
|
##
|
@@ -238,7 +239,7 @@ class SupervisorSetupImpl(SupervisorSetup):
|
|
238
239
|
if pid != 0:
|
239
240
|
# Parent
|
240
241
|
log.debug('supervisord forked; parent exiting')
|
241
|
-
real_exit(0)
|
242
|
+
real_exit(Rc(0))
|
242
243
|
|
243
244
|
# Child
|
244
245
|
log.info('daemonizing the supervisord process')
|
ominfra/supervisor/signals.py
CHANGED
@@ -1,60 +1,66 @@
|
|
1
1
|
# ruff: noqa: UP006 UP007
|
2
2
|
import signal
|
3
|
-
import typing as ta
|
4
3
|
|
4
|
+
from omlish.lite.logs import log
|
5
5
|
|
6
|
-
|
6
|
+
from .groups import ProcessGroupManager
|
7
|
+
from .states import SupervisorState
|
8
|
+
from .types import ProcessOutputDispatcher
|
9
|
+
from .types import SupervisorStateManager
|
10
|
+
from .utils.signals import SignalReceiver
|
11
|
+
from .utils.signals import sig_name
|
7
12
|
|
8
13
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
except (ValueError, TypeError):
|
18
|
-
name = value.strip().upper() # type: ignore
|
19
|
-
if not name.startswith('SIG'):
|
20
|
-
name = f'SIG{name}'
|
21
|
-
|
22
|
-
if (sn := _SIGS_BY_NAME.get(name)) is None:
|
23
|
-
raise ValueError(f'value {value!r} is not a valid signal name') # noqa
|
24
|
-
num = sn
|
25
|
-
|
26
|
-
if num not in _SIGS_BY_NUM:
|
27
|
-
raise ValueError(f'value {value!r} is not a valid signal number')
|
28
|
-
|
29
|
-
return num
|
30
|
-
|
31
|
-
|
32
|
-
def sig_name(num: int) -> str:
|
33
|
-
if (sig := _SIGS_BY_NUM.get(num)) is not None:
|
34
|
-
return sig.name
|
35
|
-
return f'signal {sig}'
|
36
|
-
|
37
|
-
|
38
|
-
##
|
39
|
-
|
40
|
-
|
41
|
-
class SignalReceiver:
|
42
|
-
def __init__(self) -> None:
|
14
|
+
class SignalHandler:
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
*,
|
18
|
+
states: SupervisorStateManager,
|
19
|
+
signal_receiver: SignalReceiver,
|
20
|
+
process_groups: ProcessGroupManager,
|
21
|
+
) -> None:
|
43
22
|
super().__init__()
|
44
23
|
|
45
|
-
self.
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
signal.
|
24
|
+
self._states = states
|
25
|
+
self._signal_receiver = signal_receiver
|
26
|
+
self._process_groups = process_groups
|
27
|
+
|
28
|
+
def set_signals(self) -> None:
|
29
|
+
self._signal_receiver.install(
|
30
|
+
signal.SIGTERM,
|
31
|
+
signal.SIGINT,
|
32
|
+
signal.SIGQUIT,
|
33
|
+
signal.SIGHUP,
|
34
|
+
signal.SIGCHLD,
|
35
|
+
signal.SIGUSR2,
|
36
|
+
)
|
37
|
+
|
38
|
+
def handle_signals(self) -> None:
|
39
|
+
sig = self._signal_receiver.get_signal()
|
40
|
+
if not sig:
|
41
|
+
return
|
42
|
+
|
43
|
+
if sig in (signal.SIGTERM, signal.SIGINT, signal.SIGQUIT):
|
44
|
+
log.warning('received %s indicating exit request', sig_name(sig))
|
45
|
+
self._states.set_state(SupervisorState.SHUTDOWN)
|
46
|
+
|
47
|
+
elif sig == signal.SIGHUP:
|
48
|
+
if self._states.state == SupervisorState.SHUTDOWN:
|
49
|
+
log.warning('ignored %s indicating restart request (shutdown in progress)', sig_name(sig)) # noqa
|
50
|
+
else:
|
51
|
+
log.warning('received %s indicating restart request', sig_name(sig)) # noqa
|
52
|
+
self._states.set_state(SupervisorState.RESTARTING)
|
53
|
+
|
54
|
+
elif sig == signal.SIGCHLD:
|
55
|
+
log.debug('received %s indicating a child quit', sig_name(sig))
|
56
|
+
|
57
|
+
elif sig == signal.SIGUSR2:
|
58
|
+
log.info('received %s indicating log reopen request', sig_name(sig))
|
59
|
+
|
60
|
+
for p in self._process_groups.all_processes():
|
61
|
+
for d in p.get_dispatchers():
|
62
|
+
if isinstance(d, ProcessOutputDispatcher):
|
63
|
+
d.reopen_logs()
|
54
64
|
|
55
|
-
def get_signal(self) -> ta.Optional[int]:
|
56
|
-
if self._signals_recvd:
|
57
|
-
sig = self._signals_recvd.pop(0)
|
58
65
|
else:
|
59
|
-
|
60
|
-
return sig
|
66
|
+
log.debug('received %s indicating nothing', sig_name(sig))
|