cgse-common 2024.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
egse/process.py ADDED
@@ -0,0 +1,460 @@
1
+ """
2
+ This module provides functions and classes to work with processes and sub-processes.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import contextlib
7
+ import datetime
8
+ import logging
9
+ import os
10
+ import subprocess
11
+ import threading
12
+ import time
13
+ import uuid
14
+ from typing import List
15
+ from typing import Optional
16
+
17
+ import psutil
18
+ from prometheus_client import Gauge
19
+
20
+ from egse.bits import humanize_bytes
21
+ from egse.system import humanize_seconds
22
+
23
+ LOGGER = logging.getLogger(__name__)
24
+
25
+
26
+ class ProcessStatus:
27
+ """
28
+ The ProcessStatus is basically a dataclass that contains the status information of a running
29
+ process.
30
+
31
+ The available information is the following:
32
+
33
+ * pid: the process identifier
34
+ * uptime: the process up-time as a floating point number expressed in seconds
35
+ * uuid: the UUID1 for this process
36
+ * memory info: memory information on the process
37
+ * cpu usage, percentage and count (number of physical cores)
38
+
39
+ """
40
+
41
+ def __init__(self, *, metrics_prefix: Optional[str] = None):
42
+ self._process = psutil.Process()
43
+ self._cpu_count = psutil.cpu_count(logical=False)
44
+ with self._process.oneshot():
45
+ self._pid: int = self._process.pid
46
+ self._create_time: float = self._process.create_time()
47
+ # not sure if we need to use interval=0.1 as an argument in the next call
48
+ self._cpu_percent: float = self._process.cpu_percent()
49
+ self._cpu_times = self._process.cpu_times()
50
+ self._uptime = (
51
+ datetime.datetime.now(tz=datetime.timezone.utc).timestamp() - self._create_time
52
+ )
53
+ self._memory_info = self._process.memory_full_info()
54
+ self._uuid: uuid.UUID = uuid.uuid1()
55
+
56
+ metrics_prefix = f"{metrics_prefix.lower()}_" if metrics_prefix else ""
57
+
58
+ self.metrics = dict(
59
+ PSUTIL_NUMBER_OF_CPU=Gauge(
60
+ f"{metrics_prefix}psutil_number_of_cpu",
61
+ "Number of physical cores, excluding hyper thread CPUs"
62
+ ),
63
+ PSUTIL_CPU_TIMES=Gauge(
64
+ f"{metrics_prefix}psutil_cpu_times_seconds",
65
+ "Accumulated process time in seconds", ["type"]
66
+ ),
67
+ PSUTIL_CPU_PERCENT=Gauge(
68
+ f"{metrics_prefix}psutil_cpu_percent",
69
+ "The current process CPU utilization as a percentage"
70
+ ),
71
+ PSUTIL_PID=Gauge(
72
+ f"{metrics_prefix}psutil_pid", "Process ID"
73
+ ),
74
+ PSUTIL_MEMORY_INFO=Gauge(
75
+ f"{metrics_prefix}psutil_memory_info_bytes",
76
+ "Memory info for this instrumented process",
77
+ ["type"]
78
+ ),
79
+ PSUTIL_NUMBER_OF_THREADS=Gauge(
80
+ f"{metrics_prefix}psutil_number_of_threads",
81
+ "Return the number of Thread objects currently alive"
82
+ ),
83
+ PSUTIL_PROC_UPTIME=Gauge(
84
+ f"{metrics_prefix}psutil_proccess_uptime",
85
+ "Return the time in seconds that the process is up and running"
86
+ ),
87
+ )
88
+
89
+ self.metrics["PSUTIL_NUMBER_OF_CPU"].set(self._cpu_count)
90
+ self.metrics["PSUTIL_PID"].set(self._process.pid)
91
+
92
+ self.update()
93
+
94
+ def update_metrics(self):
95
+ """
96
+ Updates the metrics that are taken from the psutils module.
97
+
98
+ The following metrics are never updated since they are not changed during a
99
+ process execution:
100
+
101
+ * PSUTIL_NUMBER_OF_CPU
102
+ * PSUTIL_PID
103
+ """
104
+
105
+ self.metrics["PSUTIL_MEMORY_INFO"].labels(type="rss").set(self._memory_info.rss)
106
+ self.metrics["PSUTIL_MEMORY_INFO"].labels(type="uss").set(self._memory_info.uss)
107
+ self.metrics["PSUTIL_CPU_TIMES"].labels(type="user").set(self._cpu_times.user)
108
+ self.metrics["PSUTIL_CPU_TIMES"].labels(type="system").set(self._cpu_times.system)
109
+ self.metrics["PSUTIL_CPU_PERCENT"].set(self._cpu_percent)
110
+ self.metrics["PSUTIL_NUMBER_OF_THREADS"].set(threading.active_count())
111
+ self.metrics["PSUTIL_PROC_UPTIME"].set(self._uptime)
112
+
113
+ def update(self):
114
+ """
115
+ Updates those values that change during execution, like memory usage, number of
116
+ connections, ...
117
+
118
+ This call will also update the metrics!
119
+
120
+ Returns:
121
+ the ProcessStatus object, self.
122
+ """
123
+ self._cpu_percent = self._process.cpu_percent()
124
+ self._cpu_times = self._process.cpu_times()
125
+ self._uptime = time.time() - self._create_time
126
+ self._memory_info = self._process.memory_full_info()
127
+
128
+ self.update_metrics()
129
+
130
+ return self
131
+
132
+ def as_dict(self):
133
+ """Returns all process information as a dictionary.
134
+
135
+ This runs the `update()` method first to bring the numbers up-to-date.
136
+ """
137
+ self.update()
138
+ return {
139
+ "PID": self._pid,
140
+ "Up": self._uptime,
141
+ "UUID": self._uuid,
142
+ "RSS": self._memory_info.rss,
143
+ "USS": self._memory_info.uss,
144
+ "CPU User": self._cpu_times.user,
145
+ "CPU System": self._cpu_times.system,
146
+ "CPU count": self._cpu_count,
147
+ "CPU%": self._cpu_percent,
148
+ }
149
+
150
+ def __str__(self):
151
+ self.update()
152
+ msg = (
153
+ f"PID: {self._pid}, "
154
+ f"Up: {humanize_seconds(self._uptime)}, "
155
+ f"UUID: {self._uuid}, "
156
+ f"RSS: {humanize_bytes(self._memory_info.rss)}, "
157
+ f"USS: {humanize_bytes(self._memory_info.uss)}, "
158
+ f"CPU User: {humanize_seconds(self._cpu_times.user)}, "
159
+ f"CPU System: {humanize_seconds(self._cpu_times.system)}, "
160
+ f"CPU Count: {self._cpu_count}, "
161
+ f"CPU%: {self._cpu_percent}"
162
+ )
163
+ return msg
164
+
165
+
166
+ # * can we restart the same sub process?
167
+ # * do we need to pass the additional arguments to the constructor or to the execute method?
168
+ # When we can restart/re-execute a subprocess, we might want to do that with additional
169
+ # arguments, e.g. to set a debugging flag or to start in simulator mode. Then we will need to
170
+ # do that in the execute method.
171
+ # * Process should have a notion of UUID, which it can request at start-up to communicate to the
172
+ # process manager which can then check if it's known already or a new process that was started
173
+ # (possible on another computer)
174
+
175
+
176
+ class SubProcess:
177
+ """
178
+ A SubProcess that is usually started by the ProcessManager.
179
+
180
+ Usage:
181
+
182
+ hexapod_ui = SubProcess("MyApp", [sys.executable, "-m", "egse.hexapod.hexapod_ui"])
183
+ hexapod_ui.execute()
184
+
185
+ """
186
+
187
+ def __init__(
188
+ self, name: str, cmd: List, args: List = None, shell: bool = True,
189
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
190
+ ):
191
+ self._popen = None
192
+ self._sub_process: psutil.Process | None = None
193
+ self._name = name
194
+ self._cmd = cmd
195
+ self._args = args or []
196
+ self._shell = shell
197
+ self._stdout = stdout
198
+ self._stderr = stderr
199
+
200
+ def execute(self, detach_from_parent=False) -> bool:
201
+ """ Execute the sub-process.
202
+
203
+ Args:
204
+ - detach_from_parent: Boolean indicating whether the sub-process should be detached from the
205
+ parent process. If set to False, the sub-process will be killed whenever the
206
+ parent process is interrupted or stopped.
207
+ """
208
+
209
+ try:
210
+ command: List = [*self._cmd, *self._args]
211
+ LOGGER.debug(f"SubProcess command: {command}")
212
+ # self._popen = subprocess.Popen(command, env=os.environ, close_fds=detach_from_parent)
213
+ self._popen = subprocess.Popen(
214
+ " ".join(command),
215
+ env=os.environ,
216
+ shell=self._shell, # executable='/bin/bash',
217
+ stdout=self._stdout,
218
+ stderr=self._stderr,
219
+ stdin=subprocess.DEVNULL,
220
+ )
221
+ self._sub_process = psutil.Process(self._popen.pid)
222
+
223
+ LOGGER.debug(
224
+ f"SubProcess started: {command}, pid={self._popen.pid}, sub_process="
225
+ f"{self._sub_process} [pid={self._sub_process.pid}]"
226
+ )
227
+ except KeyError:
228
+ LOGGER.error(f"Unknown client process: {self._name}", exc_info=True)
229
+ return False
230
+ except (PermissionError, FileNotFoundError) as exc:
231
+ # This error is raised when the command is not an executable or is not found
232
+ LOGGER.error(f"Could not execute sub-process: {exc}", exc_info=True)
233
+ return False
234
+ return True
235
+
236
+ @property
237
+ def name(self):
238
+ return self._name
239
+
240
+ @property
241
+ def pid(self) -> int:
242
+ return self._sub_process.pid if self._sub_process else None
243
+
244
+ def cmdline(self) -> str:
245
+ return " ".join(self._sub_process.cmdline())
246
+
247
+ def children(self, recursive: bool = True) -> List:
248
+ return self._sub_process.children(recursive=recursive)
249
+
250
+ def is_child(self, pid: int):
251
+ return any(pid == p.pid for p in self._sub_process.children(recursive=True))
252
+
253
+ def is_running(self):
254
+ """
255
+ Check if this process is still running.
256
+
257
+ * checks if process exists
258
+ * checks if process is not a zombie and is not dead
259
+
260
+ Returns:
261
+ True if the process is running.
262
+ """
263
+ if self._sub_process is None:
264
+ return False
265
+ if self._sub_process.is_running():
266
+ # it still might be a zombie process
267
+ if self._sub_process.status() in [psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD]:
268
+ LOGGER.warning("The sub-process is dead or a zombie.")
269
+ return False
270
+ return True
271
+ LOGGER.debug(f"Return value of the sub-process: {self._popen.returncode}")
272
+ return False
273
+
274
+ def exists(self) -> bool:
275
+ """
276
+ Checks if the sub-process exists by checking if its process ID exists.
277
+
278
+ Returns:
279
+ True if the sub-process exists.
280
+ """
281
+ return psutil.pid_exists(self.pid)
282
+
283
+ def quit(self):
284
+ """
285
+ Send a request to quit to the process.
286
+
287
+ This sends a ZeroMQ message "Quit" to the process. The process is expected to answer with
288
+ "Quiting" and then
289
+ actually ends its execution.
290
+
291
+ Returns:
292
+ True when received the answer "Quiting", False otherwise.
293
+ """
294
+ return self.reap_children()
295
+
296
+ def reap_children(self, timeout=3):
297
+ """Tries hard to terminate and ultimately kill all the children of this process."""
298
+
299
+ def on_terminate(proc):
300
+ LOGGER.info(f"process {proc} terminated with exit code {proc.returncode}")
301
+
302
+ return_code = 0
303
+
304
+ procs = [self._sub_process]
305
+ procs.extend(self._sub_process.children())
306
+
307
+ LOGGER.info(f"Processes: {procs}")
308
+
309
+ # send SIGTERM
310
+ for p in procs:
311
+ try:
312
+ LOGGER.info(f"Terminating process {p}")
313
+ p.terminate()
314
+ except psutil.NoSuchProcess:
315
+ pass
316
+ gone, alive = psutil.wait_procs(procs, timeout=timeout, callback=on_terminate)
317
+ if alive:
318
+ # send SIGKILL
319
+ for p in alive:
320
+ LOGGER.info(f"process {p} survived SIGTERM; trying SIGKILL")
321
+ try:
322
+ p.kill()
323
+ except psutil.NoSuchProcess:
324
+ pass
325
+ gone, alive = psutil.wait_procs(alive, timeout=timeout, callback=on_terminate)
326
+ if alive:
327
+ # give up
328
+ for p in alive:
329
+ LOGGER.info(f"process {p} survived SIGKILL; giving up")
330
+ return_code += 1 # return code indicates how many processes are still running
331
+
332
+ return return_code
333
+
334
+ def returncode(self):
335
+ """
336
+ Check if the sub-process is terminated and return its return code or None when the process
337
+ is still running.
338
+ """
339
+ return self._popen.poll()
340
+
341
+ def communicate(self) -> tuple[str, str]:
342
+ output, error = self._popen.communicate()
343
+ return output.decode() if output else None, error.decode() if error else None
344
+
345
+
346
+ def is_process_running(items: List[str] | str, contains: bool = True, case_sensitive: bool = False):
347
+ """
348
+ Check if there is any running process that contains the given items in its commandline.
349
+
350
+ Loops over all running processes and tries to match all items in 'cmd_line_items' to the command line
351
+ of the process. If all 'cmd_line_items' can be matched to a process, the function returns True.
352
+
353
+ Args:
354
+ items: a string or a list of strings that should match command line parts
355
+ contains: if True, the match is done with 'in' otherwise '=='
356
+ case_sensitive: if True, the match shall be case-sensitive
357
+
358
+ Returns:
359
+ True if there exists a running process with the given items, False otherwise.
360
+ """
361
+
362
+ def lower(x: str) -> str:
363
+ return x.lower()
364
+
365
+ def pass_through(x: str) -> str:
366
+ return x
367
+
368
+ case = pass_through if case_sensitive else lower
369
+
370
+ if not items:
371
+ LOGGER.warning("Expected at least one item in 'items', none were given. False returned.")
372
+ return False
373
+
374
+ items = [items] if isinstance(items, str) else items
375
+
376
+ for proc in psutil.process_iter():
377
+ with contextlib.suppress(psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
378
+ # LOGGER.info(f"{proc.name().lower() = }, {proc.cmdline() = }")
379
+ if contains:
380
+ if all(any(case(y) in case(x) for x in proc.cmdline()) for y in items):
381
+ return True
382
+ elif all(any(case(y) == case(x) for x in proc.cmdline()) for y in items):
383
+ return True
384
+
385
+ return False
386
+
387
+
388
+ def get_process_info(items: List[str] | str, contains: bool = True, case_sensitive: bool = False) -> List:
389
+ """
390
+ Loops over all running processes and tries to match each item in 'items' to the command line
391
+ of the process. Any process where all 'items' can be matched will end up in the response.
392
+
393
+ Returns a list with the process info (PID, cmdline, create_time) for any processes where all 'items' match
394
+ the process command line. An empty list is returned when not 'all the items' match for any of the
395
+ processes.
396
+
397
+ Examples:
398
+ >>> get_process_info(items=["feesim"])
399
+ [
400
+ {
401
+ 'pid': 10166,
402
+ 'cmdline': [
403
+ '/Library/Frameworks/Python.framework/Versions/3.8/Resources/Python.app/Contents/MacOS/Python',
404
+ '/Users/rik/git/plato-common-egse/venv38/bin/feesim',
405
+ 'start',
406
+ '--zeromq'
407
+ ],
408
+ 'create_time': 1664898231.915995
409
+ }
410
+ ]
411
+
412
+ >>> get_process_info(items=["dpu_cs", "--zeromq"])
413
+ [
414
+ {
415
+ 'pid': 11595,
416
+ 'cmdline': [
417
+ '/Library/Frameworks/Python.framework/Versions/3.8/Resources/Python.app/Contents/MacOS/Python',
418
+ '/Users/rik/git/plato-common-egse/venv38/bin/dpu_cs',
419
+ 'start',
420
+ '--zeromq'
421
+ ],
422
+ 'create_time': 1664898973.542281
423
+ }
424
+ ]
425
+
426
+ Args:
427
+ items: a string or a list of strings that should match command line items
428
+ contains: if True, the match is done with 'in' otherwise '=='
429
+ case_sensitive: if True, the match shall be case-sensitive
430
+
431
+ Returns:
432
+ A list of process info entries.
433
+
434
+ """
435
+ response = []
436
+
437
+ def lower(x: str) -> str:
438
+ return x.lower()
439
+
440
+ def pass_through(x: str) -> str:
441
+ return x
442
+
443
+ case = pass_through if case_sensitive else lower
444
+
445
+ if not items:
446
+ LOGGER.warning("Expected at least one item in 'items', none were given. Empty list returned.")
447
+ return response
448
+
449
+ items = [items] if isinstance(items, str) else items
450
+
451
+ for proc in psutil.process_iter():
452
+ with contextlib.suppress(psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
453
+ # LOGGER.info(f"{proc.name().lower() = }, {proc.cmdline() = }")
454
+ if contains:
455
+ if all(any(case(y) in case(x) for x in proc.cmdline()) for y in items):
456
+ response.append(proc.as_dict(attrs=['pid', 'cmdline', 'create_time']))
457
+ elif all(any(case(y) == case(x) for x in proc.cmdline()) for y in items):
458
+ response.append(proc.as_dict(attrs=['pid', 'cmdline', 'create_time']))
459
+
460
+ return response