ominfra 0.0.0.dev75__py3-none-any.whl → 0.0.0.dev77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ # ruff: noqa: UP006
2
+ import typing as ta
3
+
4
+ from omlish.lite.check import check_not_none
5
+
6
+
7
+ ProcessState = int # ta.TypeAlias
8
+ SupervisorState = int # ta.TypeAlias
9
+
10
+
11
+ ##
12
+
13
+
14
+ def _names_by_code(states: ta.Any) -> ta.Dict[int, str]:
15
+ d = {}
16
+ for name in states.__dict__:
17
+ if not name.startswith('__'):
18
+ code = getattr(states, name)
19
+ d[code] = name
20
+ return d
21
+
22
+
23
+ ##
24
+
25
+
26
+ class ProcessStates:
27
+ STOPPED = 0
28
+ STARTING = 10
29
+ RUNNING = 20
30
+ BACKOFF = 30
31
+ STOPPING = 40
32
+ EXITED = 100
33
+ FATAL = 200
34
+ UNKNOWN = 1000
35
+
36
+
37
+ STOPPED_STATES = (
38
+ ProcessStates.STOPPED,
39
+ ProcessStates.EXITED,
40
+ ProcessStates.FATAL,
41
+ ProcessStates.UNKNOWN,
42
+ )
43
+
44
+ RUNNING_STATES = (
45
+ ProcessStates.RUNNING,
46
+ ProcessStates.BACKOFF,
47
+ ProcessStates.STARTING,
48
+ )
49
+
50
+ SIGNALLABLE_STATES = (
51
+ ProcessStates.RUNNING,
52
+ ProcessStates.STARTING,
53
+ ProcessStates.STOPPING,
54
+ )
55
+
56
+
57
+ _process_states_by_code = _names_by_code(ProcessStates)
58
+
59
+
60
+ def get_process_state_description(code: ProcessState) -> str:
61
+ return check_not_none(_process_states_by_code.get(code))
62
+
63
+
64
+ ##
65
+
66
+
67
+ class SupervisorStates:
68
+ FATAL = 2
69
+ RUNNING = 1
70
+ RESTARTING = 0
71
+ SHUTDOWN = -1
72
+
73
+
74
+ _supervisor_states_by_code = _names_by_code(SupervisorStates)
75
+
76
+
77
+ def get_supervisor_state_description(code: SupervisorState) -> str:
78
+ return check_not_none(_supervisor_states_by_code.get(code))
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env python3
2
+ # ruff: noqa: UP006 UP007
3
+ # @omlish-amalg _supervisor.py
4
+ import logging
5
+ import signal
6
+ import time
7
+ import typing as ta
8
+
9
+ from omlish.lite.check import check_not_none
10
+ from omlish.lite.logs import configure_standard_logging
11
+
12
+ from .compat import ExitNow
13
+ from .compat import as_string
14
+ from .compat import decode_wait_status
15
+ from .compat import signame
16
+ from .configs import ProcessConfig
17
+ from .configs import ProcessGroupConfig
18
+ from .configs import ServerConfig
19
+ from .context import ServerContext
20
+ from .dispatchers import Dispatcher
21
+ from .events import TICK_EVENTS
22
+ from .events import ProcessGroupAddedEvent
23
+ from .events import ProcessGroupRemovedEvent
24
+ from .events import SupervisorRunningEvent
25
+ from .events import SupervisorStoppingEvent
26
+ from .events import clear_events
27
+ from .events import notify_event
28
+ from .process import ProcessGroup
29
+ from .process import Subprocess
30
+ from .states import SupervisorState
31
+ from .states import SupervisorStates
32
+ from .states import get_process_state_description
33
+
34
+
35
+ log = logging.getLogger(__name__)
36
+
37
+
38
+ class Supervisor:
39
+
40
+ def __init__(self, context: ServerContext) -> None:
41
+ super().__init__()
42
+
43
+ self._context = context
44
+ self._ticks: ta.Dict[int, float] = {}
45
+ self._process_groups: ta.Dict[str, ProcessGroup] = {} # map of process group name to process group object
46
+ self._stop_groups: ta.Optional[ta.List[ProcessGroup]] = None # list used for priority ordered shutdown
47
+ self._stopping = False # set after we detect that we are handling a stop request
48
+ self._last_shutdown_report = 0. # throttle for delayed process error reports at stop
49
+
50
+ @property
51
+ def context(self) -> ServerContext:
52
+ return self._context
53
+
54
+ def get_state(self) -> SupervisorState:
55
+ return self._context.state
56
+
57
+ def main(self) -> None:
58
+ if not self._context.first:
59
+ # prevent crash on libdispatch-based systems, at least for the first request
60
+ self._context.cleanup_fds()
61
+
62
+ self._context.set_uid_or_exit()
63
+
64
+ if self._context.first:
65
+ self._context.set_rlimits_or_exit()
66
+
67
+ # this sets the options.logger object delay logger instantiation until after setuid
68
+ if not self._context.config.nocleanup:
69
+ # clean up old automatic logs
70
+ self._context.clear_auto_child_logdir()
71
+
72
+ self.run()
73
+
74
+ def run(self) -> None:
75
+ self._process_groups = {} # clear
76
+ self._stop_groups = None # clear
77
+
78
+ clear_events()
79
+
80
+ try:
81
+ for config in self._context.config.groups or []:
82
+ self.add_process_group(config)
83
+
84
+ self._context.set_signals()
85
+
86
+ if not self._context.config.nodaemon and self._context.first:
87
+ self._context.daemonize()
88
+
89
+ # writing pid file needs to come *after* daemonizing or pid will be wrong
90
+ self._context.write_pidfile()
91
+
92
+ self.runforever()
93
+
94
+ finally:
95
+ self._context.cleanup()
96
+
97
+ def diff_to_active(self):
98
+ new = self._context.config.groups or []
99
+ cur = [group.config for group in self._process_groups.values()]
100
+
101
+ curdict = dict(zip([cfg.name for cfg in cur], cur))
102
+ newdict = dict(zip([cfg.name for cfg in new], new))
103
+
104
+ added = [cand for cand in new if cand.name not in curdict]
105
+ removed = [cand for cand in cur if cand.name not in newdict]
106
+
107
+ changed = [cand for cand in new if cand != curdict.get(cand.name, cand)]
108
+
109
+ return added, changed, removed
110
+
111
+ def add_process_group(self, config: ProcessGroupConfig) -> bool:
112
+ name = config.name
113
+ if name in self._process_groups:
114
+ return False
115
+
116
+ group = self._process_groups[name] = ProcessGroup(config, self._context)
117
+ group.after_setuid()
118
+
119
+ notify_event(ProcessGroupAddedEvent(name))
120
+ return True
121
+
122
+ def remove_process_group(self, name: str) -> bool:
123
+ if self._process_groups[name].get_unstopped_processes():
124
+ return False
125
+
126
+ self._process_groups[name].before_remove()
127
+
128
+ del self._process_groups[name]
129
+
130
+ notify_event(ProcessGroupRemovedEvent(name))
131
+ return True
132
+
133
+ def get_process_map(self) -> ta.Dict[int, Dispatcher]:
134
+ process_map = {}
135
+ for group in self._process_groups.values():
136
+ process_map.update(group.get_dispatchers())
137
+ return process_map
138
+
139
+ def shutdown_report(self) -> ta.List[Subprocess]:
140
+ unstopped: ta.List[Subprocess] = []
141
+
142
+ for group in self._process_groups.values():
143
+ unstopped.extend(group.get_unstopped_processes())
144
+
145
+ if unstopped:
146
+ # throttle 'waiting for x to die' reports
147
+ now = time.time()
148
+ if now > (self._last_shutdown_report + 3): # every 3 secs
149
+ names = [as_string(p.config.name) for p in unstopped]
150
+ namestr = ', '.join(names)
151
+ log.info('waiting for %s to die', namestr)
152
+ self._last_shutdown_report = now
153
+ for proc in unstopped:
154
+ state = get_process_state_description(proc.get_state())
155
+ log.debug('%s state: %s', proc.config.name, state)
156
+
157
+ return unstopped
158
+
159
+ def _ordered_stop_groups_phase_1(self) -> None:
160
+ if self._stop_groups:
161
+ # stop the last group (the one with the "highest" priority)
162
+ self._stop_groups[-1].stop_all()
163
+
164
+ def _ordered_stop_groups_phase_2(self) -> None:
165
+ # after phase 1 we've transitioned and reaped, let's see if we can remove the group we stopped from the
166
+ # stop_groups queue.
167
+ if self._stop_groups:
168
+ # pop the last group (the one with the "highest" priority)
169
+ group = self._stop_groups.pop()
170
+ if group.get_unstopped_processes():
171
+ # if any processes in the group aren't yet in a stopped state, we're not yet done shutting this group
172
+ # down, so push it back on to the end of the stop group queue
173
+ self._stop_groups.append(group)
174
+
175
+ def runforever(self) -> None:
176
+ notify_event(SupervisorRunningEvent())
177
+ timeout = 1 # this cannot be fewer than the smallest TickEvent (5)
178
+
179
+ while True:
180
+ combined_map = {}
181
+ combined_map.update(self.get_process_map())
182
+
183
+ pgroups = list(self._process_groups.values())
184
+ pgroups.sort()
185
+
186
+ if self._context.state < SupervisorStates.RUNNING:
187
+ if not self._stopping:
188
+ # first time, set the stopping flag, do a notification and set stop_groups
189
+ self._stopping = True
190
+ self._stop_groups = pgroups[:]
191
+ notify_event(SupervisorStoppingEvent())
192
+
193
+ self._ordered_stop_groups_phase_1()
194
+
195
+ if not self.shutdown_report():
196
+ # if there are no unstopped processes (we're done killing everything), it's OK to shutdown or reload
197
+ raise ExitNow
198
+
199
+ for fd, dispatcher in combined_map.items():
200
+ if dispatcher.readable():
201
+ self._context.poller.register_readable(fd)
202
+ if dispatcher.writable():
203
+ self._context.poller.register_writable(fd)
204
+
205
+ r, w = self._context.poller.poll(timeout)
206
+
207
+ for fd in r:
208
+ if fd in combined_map:
209
+ try:
210
+ dispatcher = combined_map[fd]
211
+ log.debug('read event caused by %r', dispatcher)
212
+ dispatcher.handle_read_event()
213
+ if not dispatcher.readable():
214
+ self._context.poller.unregister_readable(fd)
215
+ except ExitNow:
216
+ raise
217
+ except Exception: # noqa
218
+ combined_map[fd].handle_error()
219
+ else:
220
+ # if the fd is not in combined_map, we should unregister it. otherwise, it will be polled every
221
+ # time, which may cause 100% cpu usage
222
+ log.debug('unexpected read event from fd %r', fd)
223
+ try:
224
+ self._context.poller.unregister_readable(fd)
225
+ except Exception: # noqa
226
+ pass
227
+
228
+ for fd in w:
229
+ if fd in combined_map:
230
+ try:
231
+ dispatcher = combined_map[fd]
232
+ log.debug('write event caused by %r', dispatcher)
233
+ dispatcher.handle_write_event()
234
+ if not dispatcher.writable():
235
+ self._context.poller.unregister_writable(fd)
236
+ except ExitNow:
237
+ raise
238
+ except Exception: # noqa
239
+ combined_map[fd].handle_error()
240
+ else:
241
+ log.debug('unexpected write event from fd %r', fd)
242
+ try:
243
+ self._context.poller.unregister_writable(fd)
244
+ except Exception: # noqa
245
+ pass
246
+
247
+ for group in pgroups:
248
+ group.transition()
249
+
250
+ self._reap()
251
+ self._handle_signal()
252
+ self._tick()
253
+
254
+ if self._context.state < SupervisorStates.RUNNING:
255
+ self._ordered_stop_groups_phase_2()
256
+
257
+ if self._context.test:
258
+ break
259
+
260
+ def _tick(self, now: ta.Optional[float] = None) -> None:
261
+ """Send one or more 'tick' events when the timeslice related to the period for the event type rolls over"""
262
+
263
+ if now is None:
264
+ # now won't be None in unit tests
265
+ now = time.time()
266
+
267
+ for event in TICK_EVENTS:
268
+ period = event.period # type: ignore
269
+
270
+ last_tick = self._ticks.get(period)
271
+ if last_tick is None:
272
+ # we just started up
273
+ last_tick = self._ticks[period] = timeslice(period, now)
274
+
275
+ this_tick = timeslice(period, now)
276
+ if this_tick != last_tick:
277
+ self._ticks[period] = this_tick
278
+ notify_event(event(this_tick, self))
279
+
280
+ def _reap(self, *, once: bool = False, depth: int = 0) -> None:
281
+ if depth >= 100:
282
+ return
283
+
284
+ pid, sts = self._context.waitpid()
285
+ if not pid:
286
+ return
287
+
288
+ process = self._context.pid_history.get(pid, None)
289
+ if process is None:
290
+ _, msg = decode_wait_status(check_not_none(sts))
291
+ log.info('reaped unknown pid %s (%s)', pid, msg)
292
+ else:
293
+ process.finish(check_not_none(sts))
294
+ del self._context.pid_history[pid]
295
+
296
+ if not once:
297
+ # keep reaping until no more kids to reap, but don't recurse infinitely
298
+ self._reap(once=False, depth=depth + 1)
299
+
300
+ def _handle_signal(self) -> None:
301
+ sig = self._context.get_signal()
302
+ if not sig:
303
+ return
304
+
305
+ if sig in (signal.SIGTERM, signal.SIGINT, signal.SIGQUIT):
306
+ log.warning('received %s indicating exit request', signame(sig))
307
+ self._context.set_state(SupervisorStates.SHUTDOWN)
308
+
309
+ elif sig == signal.SIGHUP:
310
+ if self._context.state == SupervisorStates.SHUTDOWN:
311
+ log.warning('ignored %s indicating restart request (shutdown in progress)', signame(sig)) # noqa
312
+ else:
313
+ log.warning('received %s indicating restart request', signame(sig)) # noqa
314
+ self._context.set_state(SupervisorStates.RESTARTING)
315
+
316
+ elif sig == signal.SIGCHLD:
317
+ log.debug('received %s indicating a child quit', signame(sig))
318
+
319
+ elif sig == signal.SIGUSR2:
320
+ log.info('received %s indicating log reopen request', signame(sig))
321
+ # self._context.reopen_logs()
322
+ for group in self._process_groups.values():
323
+ group.reopen_logs()
324
+
325
+ else:
326
+ log.debug('received %s indicating nothing', signame(sig))
327
+
328
+
329
+ def timeslice(period, when):
330
+ return int(when - (when % period))
331
+
332
+
333
+ def main(args=None, test=False):
334
+ configure_standard_logging('INFO')
335
+
336
+ # if we hup, restart by making a new Supervisor()
337
+ first = True
338
+ while True:
339
+ config = ServerConfig.new(
340
+ nodaemon=True,
341
+ groups=[
342
+ ProcessGroupConfig(
343
+ name='default',
344
+ processes=[
345
+ ProcessConfig(
346
+ name='sleep',
347
+ command='sleep 600',
348
+ stdout=ProcessConfig.Log(
349
+ file='/dev/fd/1',
350
+ maxbytes=0,
351
+ ),
352
+ redirect_stderr=True,
353
+ ),
354
+ ProcessConfig(
355
+ name='ls',
356
+ command='ls -al',
357
+ stdout=ProcessConfig.Log(
358
+ file='/dev/fd/1',
359
+ maxbytes=0,
360
+ ),
361
+ redirect_stderr=True,
362
+ ),
363
+ ],
364
+ ),
365
+ ],
366
+ )
367
+
368
+ context = ServerContext(
369
+ config,
370
+ )
371
+
372
+ context.first = first
373
+ context.test = test
374
+ go(context)
375
+ # options.close_logger()
376
+ first = False
377
+ if test or (context.state < SupervisorStates.RESTARTING):
378
+ break
379
+
380
+
381
+ def go(context): # pragma: no cover
382
+ d = Supervisor(context)
383
+ try:
384
+ d.main()
385
+ except ExitNow:
386
+ pass
387
+
388
+
389
+ if __name__ == '__main__':
390
+ main()
@@ -0,0 +1,49 @@
1
+ # ruff: noqa: UP006
2
+ import abc
3
+ import typing as ta
4
+
5
+ from .configs import ProcessConfig
6
+ from .configs import ServerConfig
7
+ from .states import SupervisorState
8
+
9
+
10
+ class AbstractServerContext(abc.ABC):
11
+ @property
12
+ @abc.abstractmethod
13
+ def config(self) -> ServerConfig:
14
+ raise NotImplementedError
15
+
16
+ @property
17
+ @abc.abstractmethod
18
+ def state(self) -> SupervisorState:
19
+ raise NotImplementedError
20
+
21
+ @abc.abstractmethod
22
+ def set_state(self, state: SupervisorState) -> None:
23
+ raise NotImplementedError
24
+
25
+ @property
26
+ @abc.abstractmethod
27
+ def pid_history(self) -> ta.Dict[int, 'AbstractSubprocess']:
28
+ raise NotImplementedError
29
+
30
+
31
+ class AbstractSubprocess(abc.ABC):
32
+ @property
33
+ @abc.abstractmethod
34
+ def pid(self) -> int:
35
+ raise NotImplementedError
36
+
37
+ @property
38
+ @abc.abstractmethod
39
+ def config(self) -> ProcessConfig:
40
+ raise NotImplementedError
41
+
42
+ @property
43
+ @abc.abstractmethod
44
+ def context(self) -> AbstractServerContext:
45
+ raise NotImplementedError
46
+
47
+ @abc.abstractmethod
48
+ def finish(self, sts: int) -> None:
49
+ raise NotImplementedError
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ominfra
3
- Version: 0.0.0.dev75
3
+ Version: 0.0.0.dev77
4
4
  Summary: ominfra
5
5
  Author: wrmsr
6
6
  License: BSD-3-Clause
@@ -12,8 +12,8 @@ Classifier: Operating System :: OS Independent
12
12
  Classifier: Operating System :: POSIX
13
13
  Requires-Python: ~=3.12
14
14
  License-File: LICENSE
15
- Requires-Dist: omdev ==0.0.0.dev75
16
- Requires-Dist: omlish ==0.0.0.dev75
15
+ Requires-Dist: omdev ==0.0.0.dev77
16
+ Requires-Dist: omlish ==0.0.0.dev77
17
17
  Provides-Extra: all
18
18
  Requires-Dist: paramiko ~=3.5 ; extra == 'all'
19
19
  Requires-Dist: asyncssh ~=2.17 ; (python_version < "3.13") and extra == 'all'
@@ -42,13 +42,28 @@ ominfra/pyremote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
42
42
  ominfra/pyremote/_runcommands.py,sha256=nPtvx_oFHIViYb7V0ius23TF_2_SmEfDzdxa1_5LISc,26405
43
43
  ominfra/pyremote/bootstrap.py,sha256=RvMO3YGaN1E4sgUi1JEtiPak8cjvqtc_vRCq1yqbeZg,3370
44
44
  ominfra/pyremote/runcommands.py,sha256=bviS0_TDIoZVAe4h-_iavbvJtVSFu8lnk7fQ5iasCWE,1571
45
+ ominfra/supervisor/__init__.py,sha256=Y3l4WY4JRi2uLG6kgbGp93fuGfkxkKwZDvhsa0Rwgtk,15
46
+ ominfra/supervisor/__main__.py,sha256=usW9jjq5JPe_2SL8H5PrjDdksO75MX85Ir0HFfb35eM,72
47
+ ominfra/supervisor/_supervisor.py,sha256=ONztv2soL9y73S9Ax8zyG_-0KvEcHzFIbZb73XFkCao,105313
48
+ ominfra/supervisor/compat.py,sha256=sqsvlCNF2iMFdrc0LuTfyCBxXSVACtQx2wCfeHwWvAQ,5044
49
+ ominfra/supervisor/configs.py,sha256=FjgsFijC_ivqJkLua4ZV0UWjDxP3JeDua3aVy4_CnbM,2970
50
+ ominfra/supervisor/context.py,sha256=xh03VN8e4hHj5udjtgUvYnqUticTzCCXdIA0Xp4Ba2c,15335
51
+ ominfra/supervisor/datatypes.py,sha256=cq2p7wnLN0nvKT-jZxaBByqsnCIUz6pX9dPtm69h18Q,4428
52
+ ominfra/supervisor/dispatchers.py,sha256=ye-gPdZ4RnOD2pE0mt2buEwtYdRR1vNa1xvCAMDwsTw,10212
53
+ ominfra/supervisor/events.py,sha256=wT-gPfvv2HCAyQXMq3jiek17Jq6kAZb0U2hekzjf3ks,7743
54
+ ominfra/supervisor/exceptions.py,sha256=jq8Md--zmAHri1BB2XeDPFcTur81IRwArOcZoP7-6W0,746
55
+ ominfra/supervisor/poller.py,sha256=oqNEA7i2XXtERBv552sr29a6mlogmosWjeGOZSul5Kg,7273
56
+ ominfra/supervisor/process.py,sha256=phucIv2a-LHXypY3kJ9fCsKEki9G9XdiJtXCVMgGcZI,31291
57
+ ominfra/supervisor/states.py,sha256=JMxXYTZhJkMNQZ2tTV6wId7wrvnWgiZteskACprKskM,1374
58
+ ominfra/supervisor/supervisor.py,sha256=VAClZWVrZzZ6P0i6TIEKcyzI7WwWl5LRbnXngA4HPH4,13801
59
+ ominfra/supervisor/types.py,sha256=ec62QG0CDJc0XNxCnf3lXxhsxrr4CCScLPI-1SpQjlc,1141
45
60
  ominfra/tailscale/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
61
  ominfra/tailscale/cli.py,sha256=Ltg6RVFsMLLPjLzoGwM6sxjmwjEVEYHAdrqmCc4N1HM,3174
47
62
  ominfra/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
63
  ominfra/tools/listresources.py,sha256=L4t5rszm9ulcdWyr7n48_R9d5Etg4S2a4WQhlbHDtnQ,6106
49
- ominfra-0.0.0.dev75.dist-info/LICENSE,sha256=B_hVtavaA8zCYDW99DYdcpDLKz1n3BBRjZrcbv8uG8c,1451
50
- ominfra-0.0.0.dev75.dist-info/METADATA,sha256=hVfKEKRj6MjkPNueUDTbFdLAdaP9WKvmfRRBeUsa5V0,799
51
- ominfra-0.0.0.dev75.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
52
- ominfra-0.0.0.dev75.dist-info/entry_points.txt,sha256=kgecQ2MgGrM9qK744BoKS3tMesaC3yjLnl9pa5CRczg,37
53
- ominfra-0.0.0.dev75.dist-info/top_level.txt,sha256=E-b2OHkk_AOBLXHYZQ2EOFKl-_6uOGd8EjeG-Zy6h_w,8
54
- ominfra-0.0.0.dev75.dist-info/RECORD,,
64
+ ominfra-0.0.0.dev77.dist-info/LICENSE,sha256=B_hVtavaA8zCYDW99DYdcpDLKz1n3BBRjZrcbv8uG8c,1451
65
+ ominfra-0.0.0.dev77.dist-info/METADATA,sha256=F3x-psxct-590MisOe_YAKVshGjbDwiVnPHTs8CVYpw,799
66
+ ominfra-0.0.0.dev77.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
67
+ ominfra-0.0.0.dev77.dist-info/entry_points.txt,sha256=kgecQ2MgGrM9qK744BoKS3tMesaC3yjLnl9pa5CRczg,37
68
+ ominfra-0.0.0.dev77.dist-info/top_level.txt,sha256=E-b2OHkk_AOBLXHYZQ2EOFKl-_6uOGd8EjeG-Zy6h_w,8
69
+ ominfra-0.0.0.dev77.dist-info/RECORD,,