ominfra 0.0.0.dev126__py3-none-any.whl → 0.0.0.dev128__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ominfra/clouds/aws/auth.py +1 -1
  2. ominfra/deploy/_executor.py +1 -1
  3. ominfra/deploy/poly/_main.py +1 -1
  4. ominfra/pyremote/_runcommands.py +1 -1
  5. ominfra/scripts/journald2aws.py +2 -2
  6. ominfra/scripts/supervisor.py +4736 -4166
  7. ominfra/supervisor/configs.py +34 -11
  8. ominfra/supervisor/context.py +7 -345
  9. ominfra/supervisor/dispatchers.py +21 -324
  10. ominfra/supervisor/dispatchersimpl.py +343 -0
  11. ominfra/supervisor/groups.py +33 -111
  12. ominfra/supervisor/groupsimpl.py +86 -0
  13. ominfra/supervisor/inject.py +45 -20
  14. ominfra/supervisor/main.py +3 -3
  15. ominfra/supervisor/pipes.py +85 -0
  16. ominfra/supervisor/poller.py +42 -38
  17. ominfra/supervisor/privileges.py +65 -0
  18. ominfra/supervisor/process.py +6 -742
  19. ominfra/supervisor/processimpl.py +516 -0
  20. ominfra/supervisor/setup.py +38 -0
  21. ominfra/supervisor/setupimpl.py +262 -0
  22. ominfra/supervisor/spawning.py +32 -0
  23. ominfra/supervisor/spawningimpl.py +350 -0
  24. ominfra/supervisor/supervisor.py +67 -84
  25. ominfra/supervisor/types.py +101 -47
  26. ominfra/supervisor/utils/__init__.py +0 -0
  27. ominfra/supervisor/utils/collections.py +52 -0
  28. ominfra/supervisor/utils/diag.py +31 -0
  29. ominfra/supervisor/utils/fds.py +46 -0
  30. ominfra/supervisor/utils/fs.py +47 -0
  31. ominfra/supervisor/utils/os.py +45 -0
  32. ominfra/supervisor/utils/ostypes.py +9 -0
  33. ominfra/supervisor/utils/signals.py +60 -0
  34. ominfra/supervisor/utils/strings.py +105 -0
  35. ominfra/supervisor/utils/users.py +67 -0
  36. {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/METADATA +3 -3
  37. {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/RECORD +41 -25
  38. ominfra/supervisor/datatypes.py +0 -175
  39. ominfra/supervisor/signals.py +0 -52
  40. ominfra/supervisor/utils.py +0 -206
  41. {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/LICENSE +0 -0
  42. {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/WHEEL +0 -0
  43. {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/entry_points.txt +0 -0
  44. {ominfra-0.0.0.dev126.dist-info → ominfra-0.0.0.dev128.dist-info}/top_level.txt +0 -0
@@ -1,755 +1,19 @@
1
1
  # ruff: noqa: UP006 UP007
2
- import errno
3
- import os.path
4
- import shlex
5
- import signal
6
- import time
7
- import traceback
8
2
  import typing as ta
9
3
 
10
- from omlish.lite.check import check_isinstance
11
- from omlish.lite.logs import log
12
- from omlish.lite.typing import Func
13
-
14
- from .configs import ProcessConfig
15
- from .context import check_execv_args
16
- from .context import close_child_pipes
17
- from .context import close_parent_pipes
18
- from .context import drop_privileges
19
- from .context import make_pipes
20
- from .datatypes import RestartUnconditionally
21
- from .events import PROCESS_STATE_EVENT_MAP
22
- from .events import EventCallbacks
23
- from .events import ProcessCommunicationEvent
24
- from .events import ProcessCommunicationStderrEvent
25
- from .events import ProcessCommunicationStdoutEvent
26
- from .exceptions import BadCommandError
27
- from .exceptions import ProcessError
28
- from .signals import sig_name
29
- from .states import ProcessState
30
- from .states import SupervisorState
31
- from .types import Dispatcher
32
- from .types import InputDispatcher
33
- from .types import OutputDispatcher
34
4
  from .types import Process
35
- from .types import ProcessGroup
36
- from .types import ServerContext
37
- from .utils import as_bytes
38
- from .utils import as_string
39
- from .utils import close_fd
40
- from .utils import compact_traceback
41
- from .utils import decode_wait_status
42
- from .utils import get_path
43
- from .utils import real_exit
44
-
45
-
46
- # (process: Process, event_type: ta.Type[ProcessCommunicationEvent], fd: int)
47
- OutputDispatcherFactory = ta.NewType('OutputDispatcherFactory', Func[OutputDispatcher])
48
-
49
- # (process: Process, event_type: ta.Type[ProcessCommunicationEvent], fd: int)
50
- InputDispatcherFactory = ta.NewType('InputDispatcherFactory', Func[InputDispatcher])
51
-
52
- InheritedFds = ta.NewType('InheritedFds', ta.FrozenSet[int])
5
+ from .utils.ostypes import Pid
53
6
 
54
7
 
55
8
  ##
56
9
 
57
10
 
58
- class ProcessImpl(Process):
59
- """A class to manage a subprocess."""
60
-
61
- def __init__(
62
- self,
63
- config: ProcessConfig,
64
- group: ProcessGroup,
65
- *,
66
- context: ServerContext,
67
- event_callbacks: EventCallbacks,
68
-
69
- output_dispatcher_factory: OutputDispatcherFactory,
70
- input_dispatcher_factory: InputDispatcherFactory,
71
-
72
- inherited_fds: ta.Optional[InheritedFds] = None,
73
-
74
- ) -> None:
75
- super().__init__()
76
-
77
- self._config = config
78
- self._group = group
79
-
80
- self._context = context
81
- self._event_callbacks = event_callbacks
82
-
83
- self._output_dispatcher_factory = output_dispatcher_factory
84
- self._input_dispatcher_factory = input_dispatcher_factory
85
-
86
- self._inherited_fds = InheritedFds(frozenset(inherited_fds or []))
87
-
88
- self._dispatchers: ta.Dict[int, Dispatcher] = {}
89
- self._pipes: ta.Dict[str, int] = {}
90
-
91
- self._state = ProcessState.STOPPED
92
- self._pid = 0 # 0 when not running
93
-
94
- self._last_start = 0. # Last time the subprocess was started; 0 if never
95
- self._last_stop = 0. # Last time the subprocess was stopped; 0 if never
96
- self._last_stop_report = 0. # Last time "waiting for x to stop" logged, to throttle
97
- self._delay = 0. # If nonzero, delay starting or killing until this time
98
-
99
- self._administrative_stop = False # true if process has been stopped by an admin
100
- self._system_stop = False # true if process has been stopped by the system
101
-
102
- self._killing = False # true if we are trying to kill this process
103
-
104
- self._backoff = 0 # backoff counter (to startretries)
105
-
106
- self._exitstatus: ta.Optional[int] = None # status attached to dead process by finish()
107
- self._spawn_err: ta.Optional[str] = None # error message attached by spawn() if any
108
-
109
- @property
110
- def pid(self) -> int:
111
- return self._pid
112
-
113
- @property
114
- def group(self) -> ProcessGroup:
115
- return self._group
116
-
117
- @property
118
- def config(self) -> ProcessConfig:
119
- return self._config
120
-
121
- @property
122
- def context(self) -> ServerContext:
123
- return self._context
124
-
125
- @property
126
- def state(self) -> ProcessState:
127
- return self._state
128
-
129
- @property
130
- def backoff(self) -> int:
131
- return self._backoff
132
-
133
- def get_dispatchers(self) -> ta.Mapping[int, Dispatcher]:
134
- return self._dispatchers
135
-
136
- def remove_logs(self) -> None:
137
- for dispatcher in self._dispatchers.values():
138
- if hasattr(dispatcher, 'remove_logs'):
139
- dispatcher.remove_logs()
140
-
141
- def reopen_logs(self) -> None:
142
- for dispatcher in self._dispatchers.values():
143
- if hasattr(dispatcher, 'reopen_logs'):
144
- dispatcher.reopen_logs()
145
-
146
- def drain(self) -> None:
147
- for dispatcher in self._dispatchers.values():
148
- # note that we *must* call readable() for every dispatcher, as it may have side effects for a given
149
- # dispatcher (eg. call handle_listener_state_change for event listener processes)
150
- if dispatcher.readable():
151
- dispatcher.handle_read_event()
152
- if dispatcher.writable():
153
- dispatcher.handle_write_event()
154
-
155
- def write(self, chars: ta.Union[bytes, str]) -> None:
156
- if not self.pid or self._killing:
157
- raise OSError(errno.EPIPE, 'Process already closed')
158
-
159
- stdin_fd = self._pipes['stdin']
160
- if stdin_fd is None:
161
- raise OSError(errno.EPIPE, 'Process has no stdin channel')
162
-
163
- dispatcher = check_isinstance(self._dispatchers[stdin_fd], InputDispatcher)
164
- if dispatcher.closed:
165
- raise OSError(errno.EPIPE, "Process' stdin channel is closed")
166
-
167
- dispatcher.write(chars)
168
- dispatcher.flush() # this must raise EPIPE if the pipe is closed
169
-
170
- def _get_execv_args(self) -> ta.Tuple[str, ta.Sequence[str]]:
171
- """
172
- Internal: turn a program name into a file name, using $PATH, make sure it exists / is executable, raising a
173
- ProcessError if not
174
- """
175
-
176
- try:
177
- commandargs = shlex.split(self._config.command)
178
- except ValueError as e:
179
- raise BadCommandError(f"can't parse command {self._config.command!r}: {e}") # noqa
180
-
181
- if commandargs:
182
- program = commandargs[0]
183
- else:
184
- raise BadCommandError('command is empty')
185
-
186
- if '/' in program:
187
- filename = program
188
- try:
189
- st = os.stat(filename)
190
- except OSError:
191
- st = None
192
-
193
- else:
194
- path = get_path()
195
- found = None
196
- st = None
197
- for dir in path: # noqa
198
- found = os.path.join(dir, program)
199
- try:
200
- st = os.stat(found)
201
- except OSError:
202
- pass
203
- else:
204
- break
205
- if st is None:
206
- filename = program
207
- else:
208
- filename = found # type: ignore
209
-
210
- # check_execv_args will raise a ProcessError if the execv args are bogus, we break it out into a separate
211
- # options method call here only to service unit tests
212
- check_execv_args(filename, commandargs, st)
213
-
214
- return filename, commandargs
215
-
216
- def change_state(self, new_state: ProcessState, expected: bool = True) -> bool:
217
- old_state = self._state
218
- if new_state is old_state:
219
- return False
220
-
221
- self._state = new_state
222
- if new_state == ProcessState.BACKOFF:
223
- now = time.time()
224
- self._backoff += 1
225
- self._delay = now + self._backoff
226
-
227
- event_class = PROCESS_STATE_EVENT_MAP.get(new_state)
228
- if event_class is not None:
229
- event = event_class(self, old_state, expected)
230
- self._event_callbacks.notify(event)
231
-
232
- return True
233
-
234
- def _check_in_state(self, *states: ProcessState) -> None:
235
- if self._state not in states:
236
- current_state = self._state.name
237
- allowable_states = ' '.join(s.name for s in states)
238
- process_name = as_string(self._config.name)
239
- raise RuntimeError('Assertion failed for %s: %s not in %s' % (process_name, current_state, allowable_states)) # noqa
240
-
241
- def _record_spawn_err(self, msg: str) -> None:
242
- self._spawn_err = msg
243
- log.info('_spawn_err: %s', msg)
244
-
245
- def spawn(self) -> ta.Optional[int]:
246
- process_name = as_string(self._config.name)
247
-
248
- if self.pid:
249
- log.warning('process \'%s\' already running', process_name)
250
- return None
251
-
252
- self._killing = False
253
- self._spawn_err = None
254
- self._exitstatus = None
255
- self._system_stop = False
256
- self._administrative_stop = False
257
-
258
- self._last_start = time.time()
259
-
260
- self._check_in_state(
261
- ProcessState.EXITED,
262
- ProcessState.FATAL,
263
- ProcessState.BACKOFF,
264
- ProcessState.STOPPED,
265
- )
266
-
267
- self.change_state(ProcessState.STARTING)
268
-
269
- try:
270
- filename, argv = self._get_execv_args()
271
- except ProcessError as what:
272
- self._record_spawn_err(what.args[0])
273
- self._check_in_state(ProcessState.STARTING)
274
- self.change_state(ProcessState.BACKOFF)
275
- return None
276
-
277
- try:
278
- self._dispatchers, self._pipes = self._make_dispatchers() # type: ignore
279
- except OSError as why:
280
- code = why.args[0]
281
- if code == errno.EMFILE:
282
- # too many file descriptors open
283
- msg = f"too many open files to spawn '{process_name}'"
284
- else:
285
- msg = f"unknown error making dispatchers for '{process_name}': {errno.errorcode.get(code, code)}"
286
- self._record_spawn_err(msg)
287
- self._check_in_state(ProcessState.STARTING)
288
- self.change_state(ProcessState.BACKOFF)
289
- return None
290
-
291
- try:
292
- pid = os.fork()
293
- except OSError as why:
294
- code = why.args[0]
295
- if code == errno.EAGAIN:
296
- # process table full
297
- msg = f'Too many processes in process table to spawn \'{process_name}\''
298
- else:
299
- msg = f'unknown error during fork for \'{process_name}\': {errno.errorcode.get(code, code)}'
300
- self._record_spawn_err(msg)
301
- self._check_in_state(ProcessState.STARTING)
302
- self.change_state(ProcessState.BACKOFF)
303
- close_parent_pipes(self._pipes)
304
- close_child_pipes(self._pipes)
305
- return None
306
-
307
- if pid != 0:
308
- return self._spawn_as_parent(pid)
309
-
310
- else:
311
- self._spawn_as_child(filename, argv)
312
- return None
313
-
314
- def _make_dispatchers(self) -> ta.Tuple[ta.Mapping[int, Dispatcher], ta.Mapping[str, int]]:
315
- use_stderr = not self._config.redirect_stderr
316
-
317
- p = make_pipes(use_stderr)
318
- stdout_fd, stderr_fd, stdin_fd = p['stdout'], p['stderr'], p['stdin']
319
-
320
- dispatchers: ta.Dict[int, Dispatcher] = {}
321
-
322
- dispatcher_kw = dict(
323
- event_callbacks=self._event_callbacks,
324
- )
325
-
326
- etype: ta.Type[ProcessCommunicationEvent]
327
- if stdout_fd is not None:
328
- etype = ProcessCommunicationStdoutEvent
329
- dispatchers[stdout_fd] = check_isinstance(self._output_dispatcher_factory(
330
- self,
331
- etype,
332
- stdout_fd,
333
- **dispatcher_kw,
334
- ), OutputDispatcher)
335
-
336
- if stderr_fd is not None:
337
- etype = ProcessCommunicationStderrEvent
338
- dispatchers[stderr_fd] = check_isinstance(self._output_dispatcher_factory(
339
- self,
340
- etype,
341
- stderr_fd,
342
- **dispatcher_kw,
343
- ), OutputDispatcher)
344
-
345
- if stdin_fd is not None:
346
- dispatchers[stdin_fd] = check_isinstance(self._input_dispatcher_factory(
347
- self,
348
- 'stdin',
349
- stdin_fd,
350
- **dispatcher_kw,
351
- ), InputDispatcher)
352
-
353
- return dispatchers, p
11
+ class ProcessStateError(RuntimeError):
12
+ pass
354
13
 
355
- def _spawn_as_parent(self, pid: int) -> int:
356
- # Parent
357
- self._pid = pid
358
- close_child_pipes(self._pipes)
359
- log.info('spawned: \'%s\' with pid %s', as_string(self._config.name), pid)
360
- self._spawn_err = None
361
- self._delay = time.time() + self._config.startsecs
362
- self.context.pid_history[pid] = self
363
- return pid
364
14
 
365
- def _prepare_child_fds(self) -> None:
366
- os.dup2(self._pipes['child_stdin'], 0)
367
- os.dup2(self._pipes['child_stdout'], 1)
368
- if self._config.redirect_stderr:
369
- os.dup2(self._pipes['child_stdout'], 2)
370
- else:
371
- os.dup2(self._pipes['child_stderr'], 2)
372
-
373
- for i in range(3, self.context.config.minfds):
374
- if i in self._inherited_fds:
375
- continue
376
- close_fd(i)
377
-
378
- def _spawn_as_child(self, filename: str, argv: ta.Sequence[str]) -> None:
379
- try:
380
- # prevent child from receiving signals sent to the parent by calling os.setpgrp to create a new process
381
- # group for the child; this prevents, for instance, the case of child processes being sent a SIGINT when
382
- # running supervisor in foreground mode and Ctrl-C in the terminal window running supervisord is pressed.
383
- # Presumably it also prevents HUP, etc received by supervisord from being sent to children.
384
- os.setpgrp()
385
-
386
- self._prepare_child_fds()
387
- # sending to fd 2 will put this output in the stderr log
388
-
389
- # set user
390
- setuid_msg = self.set_uid()
391
- if setuid_msg:
392
- uid = self._config.uid
393
- msg = f"couldn't setuid to {uid}: {setuid_msg}\n"
394
- os.write(2, as_bytes('supervisor: ' + msg))
395
- return # finally clause will exit the child process
396
-
397
- # set environment
398
- env = os.environ.copy()
399
- env['SUPERVISOR_ENABLED'] = '1'
400
- env['SUPERVISOR_PROCESS_NAME'] = self._config.name
401
- if self._group:
402
- env['SUPERVISOR_GROUP_NAME'] = self._group.config.name
403
- if self._config.environment is not None:
404
- env.update(self._config.environment)
405
-
406
- # change directory
407
- cwd = self._config.directory
408
- try:
409
- if cwd is not None:
410
- os.chdir(os.path.expanduser(cwd))
411
- except OSError as why:
412
- code = errno.errorcode.get(why.args[0], why.args[0])
413
- msg = f"couldn't chdir to {cwd}: {code}\n"
414
- os.write(2, as_bytes('supervisor: ' + msg))
415
- return # finally clause will exit the child process
416
-
417
- # set umask, then execve
418
- try:
419
- if self._config.umask is not None:
420
- os.umask(self._config.umask)
421
- os.execve(filename, list(argv), env)
422
- except OSError as why:
423
- code = errno.errorcode.get(why.args[0], why.args[0])
424
- msg = f"couldn't exec {argv[0]}: {code}\n"
425
- os.write(2, as_bytes('supervisor: ' + msg))
426
- except Exception: # noqa
427
- (file, fun, line), t, v, tbinfo = compact_traceback()
428
- error = f'{t}, {v}: file: {file} line: {line}'
429
- msg = f"couldn't exec {filename}: {error}\n"
430
- os.write(2, as_bytes('supervisor: ' + msg))
431
-
432
- # this point should only be reached if execve failed. the finally clause will exit the child process.
433
-
434
- finally:
435
- os.write(2, as_bytes('supervisor: child process was not spawned\n'))
436
- real_exit(127) # exit process with code for spawn failure
437
-
438
- def _check_and_adjust_for_system_clock_rollback(self, test_time):
439
- """
440
- Check if system clock has rolled backward beyond test_time. If so, set affected timestamps to test_time.
441
- """
442
-
443
- if self._state == ProcessState.STARTING:
444
- self._last_start = min(test_time, self._last_start)
445
- if self._delay > 0 and test_time < (self._delay - self._config.startsecs):
446
- self._delay = test_time + self._config.startsecs
447
-
448
- elif self._state == ProcessState.RUNNING:
449
- if test_time > self._last_start and test_time < (self._last_start + self._config.startsecs):
450
- self._last_start = test_time - self._config.startsecs
451
-
452
- elif self._state == ProcessState.STOPPING:
453
- self._last_stop_report = min(test_time, self._last_stop_report)
454
- if self._delay > 0 and test_time < (self._delay - self._config.stopwaitsecs):
455
- self._delay = test_time + self._config.stopwaitsecs
456
-
457
- elif self._state == ProcessState.BACKOFF:
458
- if self._delay > 0 and test_time < (self._delay - self._backoff):
459
- self._delay = test_time + self._backoff
460
-
461
- def stop(self) -> ta.Optional[str]:
462
- self._administrative_stop = True
463
- self._last_stop_report = 0
464
- return self.kill(self._config.stopsignal)
465
-
466
- def stop_report(self) -> None:
467
- """Log a 'waiting for x to stop' message with throttling."""
468
-
469
- if self._state == ProcessState.STOPPING:
470
- now = time.time()
471
-
472
- self._check_and_adjust_for_system_clock_rollback(now)
473
-
474
- if now > (self._last_stop_report + 2): # every 2 seconds
475
- log.info('waiting for %s to stop', as_string(self._config.name))
476
- self._last_stop_report = now
477
-
478
- def give_up(self) -> None:
479
- self._delay = 0
480
- self._backoff = 0
481
- self._system_stop = True
482
- self._check_in_state(ProcessState.BACKOFF)
483
- self.change_state(ProcessState.FATAL)
484
-
485
- def kill(self, sig: int) -> ta.Optional[str]:
486
- """
487
- Send a signal to the subprocess with the intention to kill it (to make it exit). This may or may not actually
488
- kill it.
489
-
490
- Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
491
- running.
492
- """
493
- now = time.time()
494
-
495
- process_name = as_string(self._config.name)
496
- # If the process is in BACKOFF and we want to stop or kill it, then BACKOFF -> STOPPED. This is needed because
497
- # if startretries is a large number and the process isn't starting successfully, the stop request would be
498
- # blocked for a long time waiting for the retries.
499
- if self._state == ProcessState.BACKOFF:
500
- log.debug('Attempted to kill %s, which is in BACKOFF state.', process_name)
501
- self.change_state(ProcessState.STOPPED)
502
- return None
503
-
504
- args: tuple
505
- if not self.pid:
506
- fmt, args = "attempted to kill %s with sig %s but it wasn't running", (process_name, sig_name(sig))
507
- log.debug(fmt, *args)
508
- return fmt % args
509
-
510
- # If we're in the stopping state, then we've already sent the stop signal and this is the kill signal
511
- if self._state == ProcessState.STOPPING:
512
- killasgroup = self._config.killasgroup
513
- else:
514
- killasgroup = self._config.stopasgroup
515
-
516
- as_group = ''
517
- if killasgroup:
518
- as_group = 'process group '
519
-
520
- log.debug('killing %s (pid %s) %s with signal %s', process_name, self.pid, as_group, sig_name(sig))
521
-
522
- # RUNNING/STARTING/STOPPING -> STOPPING
523
- self._killing = True
524
- self._delay = now + self._config.stopwaitsecs
525
- # we will already be in the STOPPING state if we're doing a SIGKILL as a result of overrunning stopwaitsecs
526
- self._check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
527
- self.change_state(ProcessState.STOPPING)
528
-
529
- pid = self.pid
530
- if killasgroup:
531
- # send to the whole process group instead
532
- pid = -self.pid
533
-
534
- try:
535
- try:
536
- os.kill(pid, sig)
537
- except OSError as exc:
538
- if exc.errno == errno.ESRCH:
539
- log.debug('unable to signal %s (pid %s), it probably just exited on its own: %s', process_name, self.pid, str(exc)) # noqa
540
- # we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
541
- # processing.
542
- return None
543
- raise
544
- except Exception: # noqa
545
- tb = traceback.format_exc()
546
- fmt, args = 'unknown problem killing %s (%s):%s', (process_name, self.pid, tb)
547
- log.critical(fmt, *args)
548
- self.change_state(ProcessState.UNKNOWN)
549
- self._killing = False
550
- self._delay = 0
551
- return fmt % args
552
-
553
- return None
554
-
555
- def signal(self, sig: int) -> ta.Optional[str]:
556
- """
557
- Send a signal to the subprocess, without intending to kill it.
558
-
559
- Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not
560
- running.
561
- """
562
- process_name = as_string(self._config.name)
563
- args: tuple
564
- if not self.pid:
565
- fmt, args = "attempted to send %s sig %s but it wasn't running", (process_name, sig_name(sig))
566
- log.debug(fmt, *args)
567
- return fmt % args
568
-
569
- log.debug('sending %s (pid %s) sig %s', process_name, self.pid, sig_name(sig))
570
-
571
- self._check_in_state(ProcessState.RUNNING, ProcessState.STARTING, ProcessState.STOPPING)
572
-
573
- try:
574
- try:
575
- os.kill(self.pid, sig)
576
- except OSError as exc:
577
- if exc.errno == errno.ESRCH:
578
- log.debug(
579
- 'unable to signal %s (pid %s), it probably just now exited on its own: %s',
580
- process_name,
581
- self.pid,
582
- str(exc),
583
- )
584
- # we could change the state here but we intentionally do not. we will do it during normal SIGCHLD
585
- # processing.
586
- return None
587
- raise
588
- except Exception: # noqa
589
- tb = traceback.format_exc()
590
- fmt, args = 'unknown problem sending sig %s (%s):%s', (process_name, self.pid, tb)
591
- log.critical(fmt, *args)
592
- self.change_state(ProcessState.UNKNOWN)
593
- return fmt % args
594
-
595
- return None
596
-
597
- def finish(self, sts: int) -> None:
598
- """The process was reaped and we need to report and manage its state."""
599
-
600
- self.drain()
601
-
602
- es, msg = decode_wait_status(sts)
603
-
604
- now = time.time()
605
-
606
- self._check_and_adjust_for_system_clock_rollback(now)
607
-
608
- self._last_stop = now
609
- process_name = as_string(self._config.name)
610
-
611
- if now > self._last_start:
612
- too_quickly = now - self._last_start < self._config.startsecs
613
- else:
614
- too_quickly = False
615
- log.warning(
616
- "process '%s' (%s) last_start time is in the future, don't know how long process was running so "
617
- "assuming it did not exit too quickly",
618
- process_name,
619
- self.pid,
620
- )
621
-
622
- exit_expected = es in self._config.exitcodes
623
-
624
- if self._killing:
625
- # likely the result of a stop request implies STOPPING -> STOPPED
626
- self._killing = False
627
- self._delay = 0
628
- self._exitstatus = es
629
-
630
- fmt, args = 'stopped: %s (%s)', (process_name, msg)
631
- self._check_in_state(ProcessState.STOPPING)
632
- self.change_state(ProcessState.STOPPED)
633
- if exit_expected:
634
- log.info(fmt, *args)
635
- else:
636
- log.warning(fmt, *args)
637
-
638
- elif too_quickly:
639
- # the program did not stay up long enough to make it to RUNNING implies STARTING -> BACKOFF
640
- self._exitstatus = None
641
- self._spawn_err = 'Exited too quickly (process log may have details)'
642
- self._check_in_state(ProcessState.STARTING)
643
- self.change_state(ProcessState.BACKOFF)
644
- log.warning('exited: %s (%s)', process_name, msg + '; not expected')
645
-
646
- else:
647
- # this finish was not the result of a stop request, the program was in the RUNNING state but exited implies
648
- # RUNNING -> EXITED normally but see next comment
649
- self._delay = 0
650
- self._backoff = 0
651
- self._exitstatus = es
652
-
653
- # if the process was STARTING but a system time change causes self.last_start to be in the future, the
654
- # normal STARTING->RUNNING transition can be subverted so we perform the transition here.
655
- if self._state == ProcessState.STARTING:
656
- self.change_state(ProcessState.RUNNING)
657
-
658
- self._check_in_state(ProcessState.RUNNING)
659
-
660
- if exit_expected:
661
- # expected exit code
662
- self.change_state(ProcessState.EXITED, expected=True)
663
- log.info('exited: %s (%s)', process_name, msg + '; expected')
664
- else:
665
- # unexpected exit code
666
- self._spawn_err = f'Bad exit code {es}'
667
- self.change_state(ProcessState.EXITED, expected=False)
668
- log.warning('exited: %s (%s)', process_name, msg + '; not expected')
669
-
670
- self._pid = 0
671
- close_parent_pipes(self._pipes)
672
- self._pipes = {}
673
- self._dispatchers = {}
674
-
675
- def set_uid(self) -> ta.Optional[str]:
676
- if self._config.uid is None:
677
- return None
678
- msg = drop_privileges(self._config.uid)
679
- return msg
680
-
681
- def __repr__(self) -> str:
682
- # repr can't return anything other than a native string, but the name might be unicode - a problem on Python 2.
683
- name = self._config.name
684
- return f'<Subprocess at {id(self)} with name {name} in state {self.get_state().name}>'
685
-
686
- def get_state(self) -> ProcessState:
687
- return self._state
688
-
689
- def transition(self) -> None:
690
- now = time.time()
691
- state = self._state
692
-
693
- self._check_and_adjust_for_system_clock_rollback(now)
694
-
695
- logger = log
696
-
697
- if self.context.state > SupervisorState.RESTARTING:
698
- # dont start any processes if supervisor is shutting down
699
- if state == ProcessState.EXITED:
700
- if self._config.autorestart:
701
- if self._config.autorestart is RestartUnconditionally:
702
- # EXITED -> STARTING
703
- self.spawn()
704
- elif self._exitstatus not in self._config.exitcodes:
705
- # EXITED -> STARTING
706
- self.spawn()
707
-
708
- elif state == ProcessState.STOPPED and not self._last_start:
709
- if self._config.autostart:
710
- # STOPPED -> STARTING
711
- self.spawn()
712
-
713
- elif state == ProcessState.BACKOFF:
714
- if self._backoff <= self._config.startretries:
715
- if now > self._delay:
716
- # BACKOFF -> STARTING
717
- self.spawn()
718
-
719
- process_name = as_string(self._config.name)
720
- if state == ProcessState.STARTING:
721
- if now - self._last_start > self._config.startsecs:
722
- # STARTING -> RUNNING if the proc has started successfully and it has stayed up for at least
723
- # proc.config.startsecs,
724
- self._delay = 0
725
- self._backoff = 0
726
- self._check_in_state(ProcessState.STARTING)
727
- self.change_state(ProcessState.RUNNING)
728
- msg = ('entered RUNNING state, process has stayed up for > than %s seconds (startsecs)' % self._config.startsecs) # noqa
729
- logger.info('success: %s %s', process_name, msg)
730
-
731
- if state == ProcessState.BACKOFF:
732
- if self._backoff > self._config.startretries:
733
- # BACKOFF -> FATAL if the proc has exceeded its number of retries
734
- self.give_up()
735
- msg = ('entered FATAL state, too many start retries too quickly')
736
- logger.info('gave up: %s %s', process_name, msg)
15
+ ##
737
16
 
738
- elif state == ProcessState.STOPPING:
739
- time_left = self._delay - now
740
- if time_left <= 0:
741
- # kill processes which are taking too long to stop with a final sigkill. if this doesn't kill it, the
742
- # process will be stuck in the STOPPING state forever.
743
- log.warning('killing \'%s\' (%s) with SIGKILL', process_name, self.pid)
744
- self.kill(signal.SIGKILL)
745
17
 
746
- def create_auto_child_logs(self) -> None:
747
- # temporary logfiles which are erased at start time
748
- # get_autoname = self.context.get_auto_child_log_name # noqa
749
- # sid = self.context.config.identifier # noqa
750
- # name = self._config.name # noqa
751
- # if self.stdout_logfile is Automatic:
752
- # self.stdout_logfile = get_autoname(name, sid, 'stdout')
753
- # if self.stderr_logfile is Automatic:
754
- # self.stderr_logfile = get_autoname(name, sid, 'stderr')
755
- pass
18
+ class PidHistory(ta.Dict[Pid, Process]):
19
+ pass