nv-ingest 2025.8.18.dev20250818__py3-none-any.whl → 2025.8.20.dev20250820__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ that the pipeline requires, such as message brokers and other infrastructure.
10
10
  """
11
11
 
12
12
  import logging
13
+ import os
13
14
  import multiprocessing
14
15
  import socket
15
16
  from nv_ingest_api.util.message_brokers.simple_message_broker.broker import SimpleMessageBroker
@@ -35,21 +36,42 @@ def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
35
36
  The process running the SimpleMessageBroker server.
36
37
  """
37
38
 
39
+ # Resolve host/port early for pre-flight checks
40
+ broker_params = broker_client.get("broker_params", {})
41
+ max_queue_size = broker_params.get("max_queue_size", 10000)
42
+ server_host = broker_client.get("host", "0.0.0.0")
43
+ server_port = broker_client.get("port", 7671)
44
+
45
+ # Pre-flight: if something is already listening on the target port, do not spawn another broker.
46
+ # This avoids noisy stack traces from a failing child process when tests/pipeline are run repeatedly.
47
+ def _is_port_open(host: str, port: int) -> bool:
48
+ check_host = "127.0.0.1" if host in ("0.0.0.0", "::") else host
49
+ try:
50
+ with socket.create_connection((check_host, port), timeout=0.5):
51
+ return True
52
+ except Exception:
53
+ return False
54
+
55
+ if _is_port_open(server_host, server_port):
56
+ logger.warning(
57
+ f"SimpleMessageBroker port already in use at {server_host}:{server_port}; "
58
+ f"continuing to spawn a broker process (tests expect a Process to be returned)"
59
+ )
60
+
38
61
  def broker_server():
39
- # Use max_queue_size from broker_params or default to 10000.
40
- broker_params = broker_client.get("broker_params", {})
41
- max_queue_size = broker_params.get("max_queue_size", 10000)
42
- server_host = broker_client.get("host", "0.0.0.0")
43
- server_port = broker_client.get("port", 7671)
44
- # Optionally, set socket options here for reuse.
62
+ # Optionally, set socket options here for reuse (note: binding occurs in server __init__).
45
63
  server = SimpleMessageBroker(server_host, server_port, max_queue_size)
46
- # Enable address reuse on the server socket.
47
- server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
64
+ try:
65
+ server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
66
+ except Exception:
67
+ pass
48
68
  server.serve_forever()
49
69
 
50
70
  p = multiprocessing.Process(target=broker_server)
51
- p.daemon = False
71
+ # If we're launching from inside the pipeline subprocess, mark daemon so the
72
+ # broker dies automatically when the subprocess exits.
73
+ p.daemon = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1"
52
74
  p.start()
53
- logger.info(f"Started SimpleMessageBroker server in separate process on port {broker_client.get('port', 7671)}")
75
+ logger.info(f"Started SimpleMessageBroker server in separate process on port {server_port}")
54
76
 
55
77
  return p
@@ -17,14 +17,15 @@ import sys
17
17
  import time
18
18
  from ctypes import CDLL
19
19
  from datetime import datetime
20
- from typing import Union, Tuple, Optional, TextIO
20
+ from typing import Union, Tuple, Optional, TextIO, Any
21
21
  import json
22
22
 
23
23
  import ray
24
24
  from ray import LoggingConfig
25
25
 
26
- from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
27
- RayPipeline,
26
+ from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
27
+ from nv_ingest.framework.orchestration.process.termination import (
28
+ kill_pipeline_process_group as _kill_pipeline_process_group,
28
29
  )
29
30
  from nv_ingest.pipeline.ingest_pipeline import IngestPipelineBuilder
30
31
  from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
@@ -250,7 +251,7 @@ def launch_pipeline(
250
251
  block: bool = True,
251
252
  disable_dynamic_scaling: Optional[bool] = None,
252
253
  dynamic_memory_threshold: Optional[float] = None,
253
- ) -> Tuple[Union[RayPipeline, None], Optional[float]]:
254
+ ) -> Tuple[Union[Any, None], Optional[float]]:
254
255
  """
255
256
  Launch a pipeline using the provided configuration.
256
257
 
@@ -270,8 +271,8 @@ def launch_pipeline(
270
271
 
271
272
  Returns
272
273
  -------
273
- Tuple[Union[RayPipeline, None], Optional[float]]
274
- Raw RayPipeline object and elapsed time. For blocking execution,
274
+ Tuple[Union[Any, None], Optional[float]]
275
+ Raw pipeline object (type elided to avoid circular import) and elapsed time. For blocking execution,
275
276
  returns (None, elapsed_time). For non-blocking, returns (pipeline, None).
276
277
  """
277
278
  logger.info("Starting pipeline setup")
@@ -328,17 +329,37 @@ def launch_pipeline(
328
329
 
329
330
  # Set up the ingestion pipeline
330
331
  start_abs = datetime.now()
331
- ingest_pipeline = IngestPipelineBuilder(pipeline_config)
332
- ingest_pipeline.build()
332
+ ingest_pipeline = None
333
+ try:
334
+ ingest_pipeline = IngestPipelineBuilder(pipeline_config)
335
+ ingest_pipeline.build()
333
336
 
334
- # Record setup time
335
- end_setup = start_run = datetime.now()
336
- setup_time = (end_setup - start_abs).total_seconds()
337
- logger.info(f"Pipeline setup complete in {setup_time:.2f} seconds")
337
+ # Record setup time
338
+ end_setup = start_run = datetime.now()
339
+ setup_time = (end_setup - start_abs).total_seconds()
340
+ logger.info(f"Pipeline setup complete in {setup_time:.2f} seconds")
338
341
 
339
- # Run the pipeline
340
- logger.debug("Running pipeline")
341
- ingest_pipeline.start()
342
+ # Run the pipeline
343
+ logger.debug("Running pipeline")
344
+ ingest_pipeline.start()
345
+ except Exception as e:
346
+ # Ensure any partial startup is torn down
347
+ logger.error(f"Pipeline startup failed, initiating cleanup: {e}", exc_info=True)
348
+ try:
349
+ if ingest_pipeline is not None:
350
+ try:
351
+ ingest_pipeline.stop()
352
+ except Exception:
353
+ pass
354
+ finally:
355
+ try:
356
+ if ray.is_initialized():
357
+ ray.shutdown()
358
+ logger.info("Ray shutdown complete after startup failure.")
359
+ finally:
360
+ pass
361
+ # Re-raise to surface failure to caller
362
+ raise
342
363
 
343
364
  if block:
344
365
  try:
@@ -350,6 +371,14 @@ def launch_pipeline(
350
371
  ingest_pipeline.stop()
351
372
  ray.shutdown()
352
373
  logger.info("Ray shutdown complete.")
374
+ except Exception as e:
375
+ logger.error(f"Unexpected error during pipeline run: {e}", exc_info=True)
376
+ try:
377
+ ingest_pipeline.stop()
378
+ finally:
379
+ if ray.is_initialized():
380
+ ray.shutdown()
381
+ raise
353
382
 
354
383
  # Record execution times
355
384
  end_run = datetime.now()
@@ -392,12 +421,34 @@ def run_pipeline_process(
392
421
  if stderr:
393
422
  sys.stderr = stderr
394
423
 
424
+ # Ensure the subprocess is killed if the parent dies to avoid hangs
425
+ try:
426
+ set_pdeathsig(signal.SIGKILL)
427
+ except Exception as e:
428
+ logger.debug(f"set_pdeathsig not available or failed: {e}")
429
+
395
430
  # Create a new process group so we can terminate the entire subtree cleanly
396
431
  try:
397
432
  os.setpgrp()
398
433
  except Exception as e:
399
434
  logger.debug(f"os.setpgrp() not available or failed: {e}")
400
435
 
436
+ # Install signal handlers for graceful shutdown in the subprocess
437
+ def _handle_signal(signum, frame):
438
+ try:
439
+ _safe_log(logging.INFO, f"Received signal {signum}; shutting down Ray and exiting...")
440
+ if ray.is_initialized():
441
+ ray.shutdown()
442
+ finally:
443
+ # Exit immediately after best-effort cleanup
444
+ os._exit(0)
445
+
446
+ try:
447
+ signal.signal(signal.SIGINT, _handle_signal)
448
+ signal.signal(signal.SIGTERM, _handle_signal)
449
+ except Exception as e:
450
+ logger.debug(f"Signal handlers not set: {e}")
451
+
401
452
  # Test output redirection
402
453
  print("DEBUG: Direct print to stdout - should appear in parent process")
403
454
  sys.stderr.write("DEBUG: Direct write to stderr - should appear in parent process\n")
@@ -405,93 +456,40 @@ def run_pipeline_process(
405
456
  # Test logging output
406
457
  logger.info("DEBUG: Logger info - may not appear if logging handlers not redirected")
407
458
 
459
+ # If requested, start the simple broker inside this subprocess so it shares the process group
460
+ broker_proc = None
408
461
  try:
462
+ if os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1":
463
+ try:
464
+ # Only launch if the config requests it
465
+ if getattr(pipeline_config, "pipeline", None) and getattr(
466
+ pipeline_config.pipeline, "launch_simple_broker", False
467
+ ):
468
+ _safe_log(logging.INFO, "Starting SimpleMessageBroker inside subprocess")
469
+ broker_proc = start_simple_message_broker({})
470
+ except Exception as e:
471
+ _safe_log(logging.ERROR, f"Failed to start SimpleMessageBroker in subprocess: {e}")
472
+ # Continue without broker; launch will fail fast if required
473
+
409
474
  # Launch the pipeline (blocking)
410
475
  launch_pipeline(pipeline_config, block=True)
411
476
 
412
477
  except Exception as e:
413
478
  logger.error(f"Subprocess pipeline execution failed: {e}")
414
479
  raise
415
-
416
-
417
- def kill_pipeline_process_group(process: multiprocessing.Process) -> None:
418
- """
419
- Kill a pipeline process and its entire process group.
420
-
421
- Note: Although the type annotation specifies a multiprocessing.Process for
422
- compatibility with existing tests and public API, this function is robust
423
- to also being passed a raw PID (int) at runtime.
424
-
425
- Behavior:
426
- - Send SIGTERM to the process group; if still alive after grace period, escalate to SIGKILL.
427
- - If a Process object is provided, attempt to join() with timeouts.
428
- - If only a PID is provided, skip joins and just signal the process group with grace/force.
429
-
430
- Parameters
431
- ----------
432
- process : multiprocessing.Process
433
- Process handle (or a raw PID int) for the process whose process group should be terminated.
434
- """
435
- # Resolve PID and optional Process handle
436
- proc: Optional[object] = None
437
- pid: Optional[int] = None
438
-
439
- if isinstance(process, int):
440
- pid = process
441
- elif hasattr(process, "pid"):
442
- # Duck-type any object that exposes a pid (e.g., multiprocessing.Process or Mock)
443
- proc = process
444
- try:
445
- pid = int(getattr(proc, "pid"))
446
- except Exception as e:
447
- raise AttributeError(f"Invalid process-like object without usable pid: {e}")
448
- else:
449
- raise AttributeError(
450
- "kill_pipeline_process_group expects a multiprocessing.Process or a PID int (process-like object with .pid)"
451
- )
452
-
453
- # If we have a Process handle and it's already dead, nothing to do
454
- if proc is not None and hasattr(proc, "is_alive") and not proc.is_alive():
455
- _safe_log(logging.DEBUG, "Process already terminated")
456
- return
457
-
458
- if pid is None:
459
- # Defensive guard; should not happen
460
- raise AttributeError("Unable to determine PID for process group termination")
461
-
462
- _safe_log(logging.INFO, f"Terminating pipeline process group (PID: {pid})")
463
- try:
464
- # Send graceful termination to the entire process group
465
- os.killpg(os.getpgid(pid), signal.SIGTERM)
466
-
467
- # If we have a Process handle, give it a chance to exit cleanly
468
- if proc is not None and hasattr(proc, "join"):
480
+ finally:
481
+ # Best-effort: if we created a broker here and the pipeline exits normally,
482
+ # attempt a graceful terminate. In failure/termination paths the process group kill
483
+ # from parent or signal handler will take care of it.
484
+ if broker_proc is not None:
469
485
  try:
470
- proc.join(timeout=5.0)
486
+ if hasattr(broker_proc, "is_alive") and broker_proc.is_alive():
487
+ broker_proc.terminate()
471
488
  except Exception:
472
489
  pass
473
- still_alive = getattr(proc, "is_alive", lambda: True)()
474
- else:
475
- # Without a handle, provide a small grace period
476
- time.sleep(2.0)
477
- # Best-effort check: if getpgid fails, it's gone
478
- try:
479
- _ = os.getpgid(pid)
480
- still_alive = True
481
- except Exception:
482
- still_alive = False
483
490
 
484
- if still_alive:
485
- _safe_log(logging.WARNING, "Process group did not terminate gracefully, using SIGKILL")
486
- try:
487
- os.killpg(os.getpgid(pid), signal.SIGKILL)
488
- finally:
489
- if proc is not None and hasattr(proc, "join"):
490
- try:
491
- proc.join(timeout=3.0)
492
- except Exception:
493
- pass
494
-
495
- except (ProcessLookupError, OSError) as e:
496
- # Process or group may already be gone
497
- _safe_log(logging.DEBUG, f"Process group already terminated or not found: {e}")
491
+
492
+ def kill_pipeline_process_group(process: multiprocessing.Process) -> None:
493
+ """Backward-compatible shim that delegates to process.termination implementation."""
494
+ _safe_log(logging.DEBUG, "Delegating kill_pipeline_process_group to process.termination module")
495
+ _kill_pipeline_process_group(process)
@@ -11,11 +11,16 @@ using the configured strategy pattern.
11
11
  """
12
12
 
13
13
  import logging
14
+ import atexit
15
+ import multiprocessing
16
+ import os
17
+ import signal
14
18
  from typing import Optional
15
19
 
16
20
  from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
17
21
  from nv_ingest.framework.orchestration.execution.options import ExecutionOptions, ExecutionResult
18
22
  from nv_ingest.framework.orchestration.process.strategies import ProcessExecutionStrategy
23
+ from nv_ingest.framework.orchestration.process.strategies import SubprocessStrategy
19
24
  from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
20
25
 
21
26
  logger = logging.getLogger(__name__)
@@ -45,6 +50,8 @@ class PipelineLifecycleManager:
45
50
  The strategy to use for pipeline execution.
46
51
  """
47
52
  self.strategy = strategy
53
+ # Track broker process so we can terminate it during teardown
54
+ self._broker_process: Optional[multiprocessing.Process] = None
48
55
 
49
56
  def start(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
50
57
  """
@@ -74,8 +81,18 @@ class PipelineLifecycleManager:
74
81
  """
75
82
  logger.info("Starting pipeline lifecycle")
76
83
 
84
+ # If running pipeline in a subprocess and broker is enabled, ensure the broker
85
+ # is launched in the child process group by signaling via environment variable
86
+ prev_env = None
87
+ set_env = False
88
+ if getattr(config, "pipeline", None) and getattr(config.pipeline, "launch_simple_broker", False):
89
+ if isinstance(self.strategy, SubprocessStrategy):
90
+ prev_env = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS")
91
+ os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = "1"
92
+ set_env = True
93
+
77
94
  try:
78
- # Start message broker if configured
95
+ # Start message broker if configured (may defer to subprocess based on env)
79
96
  self._setup_message_broker(config)
80
97
 
81
98
  # Execute pipeline using the configured strategy
@@ -87,6 +104,15 @@ class PipelineLifecycleManager:
87
104
  except Exception as e:
88
105
  logger.error(f"Failed to start pipeline lifecycle: {e}")
89
106
  raise RuntimeError(f"Pipeline startup failed: {e}") from e
107
+ finally:
108
+ if set_env:
109
+ if prev_env is None:
110
+ try:
111
+ del os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"]
112
+ except KeyError:
113
+ pass
114
+ else:
115
+ os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = prev_env
90
116
 
91
117
  def _setup_message_broker(self, config: PipelineConfigSchema) -> None:
92
118
  """
@@ -98,8 +124,21 @@ class PipelineLifecycleManager:
98
124
  Pipeline configuration containing broker settings.
99
125
  """
100
126
  if config.pipeline.launch_simple_broker:
127
+ # If requested to launch broker inside the subprocess, skip here
128
+ if os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1":
129
+ logger.info("Deferring SimpleMessageBroker launch to subprocess")
130
+ return
101
131
  logger.info("Starting simple message broker")
102
- start_simple_message_broker({})
132
+ # Start the broker and retain a handle for cleanup.
133
+ # Use defaults (host=0.0.0.0, port=7671) as set by the broker implementation.
134
+ try:
135
+ self._broker_process = start_simple_message_broker({})
136
+ # Ensure cleanup at interpreter shutdown in case caller forgets
137
+ atexit.register(self._terminate_broker_atexit)
138
+ logger.info(f"SimpleMessageBroker started (pid={getattr(self._broker_process, 'pid', None)})")
139
+ except Exception as e:
140
+ logger.error(f"Failed to start SimpleMessageBroker: {e}")
141
+ raise
103
142
  else:
104
143
  logger.debug("Simple broker launch not required")
105
144
 
@@ -109,6 +148,8 @@ class PipelineLifecycleManager:
109
148
 
110
149
  This method provides a hook for future pipeline stopping functionality.
111
150
  Currently, pipeline stopping is handled by the individual interfaces.
151
+ Additionally, it ensures any dependent services (like the simple
152
+ message broker) are terminated to avoid lingering processes.
112
153
 
113
154
  Parameters
114
155
  ----------
@@ -116,7 +157,58 @@ class PipelineLifecycleManager:
116
157
  Identifier of the pipeline to stop. Currently unused.
117
158
  """
118
159
  logger.info("Pipeline stop requested")
119
- # TODO: Implement pipeline stopping logic when needed
120
- # This would involve coordinating with the execution strategy
121
- # to gracefully shut down running pipelines
122
- pass
160
+ # Best-effort termination of broker if we started one
161
+ self._terminate_broker()
162
+
163
+ # --- Internal helpers ---
164
+ def _terminate_broker_atexit(self) -> None:
165
+ """Atexit-safe broker termination.
166
+
167
+ Avoids raising exceptions during interpreter shutdown.
168
+ """
169
+ try:
170
+ self._terminate_broker()
171
+ except Exception:
172
+ # Swallow errors at atexit to avoid noisy shutdowns
173
+ pass
174
+
175
+ def _terminate_broker(self) -> None:
176
+ """Terminate the SimpleMessageBroker process if running."""
177
+ proc = self._broker_process
178
+ if not proc:
179
+ return
180
+ try:
181
+ if hasattr(proc, "is_alive") and not proc.is_alive():
182
+ return
183
+ except Exception:
184
+ # If querying state fails, continue with termination attempt
185
+ pass
186
+
187
+ pid = getattr(proc, "pid", None)
188
+ logger.info(f"Stopping SimpleMessageBroker (pid={pid})")
189
+ try:
190
+ # First, try graceful terminate
191
+ proc.terminate()
192
+ try:
193
+ proc.join(timeout=3.0)
194
+ except Exception:
195
+ pass
196
+
197
+ # If still alive, escalate to SIGKILL on the single process
198
+ still_alive = False
199
+ try:
200
+ still_alive = hasattr(proc, "is_alive") and proc.is_alive()
201
+ except Exception:
202
+ still_alive = True
203
+ if still_alive and pid is not None:
204
+ try:
205
+ os.kill(pid, signal.SIGKILL)
206
+ except Exception:
207
+ pass
208
+ try:
209
+ proc.join(timeout=2.0)
210
+ except Exception:
211
+ pass
212
+ finally:
213
+ # Clear handle to avoid repeated attempts
214
+ self._broker_process = None
@@ -11,6 +11,7 @@ Strategy pattern for clean separation of execution concerns.
11
11
  """
12
12
 
13
13
  import atexit
14
+ import os
14
15
  import logging
15
16
  import multiprocessing
16
17
  import time
@@ -25,6 +26,8 @@ from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
25
26
  from nv_ingest.framework.orchestration.process.execution import (
26
27
  launch_pipeline,
27
28
  run_pipeline_process,
29
+ )
30
+ from nv_ingest.framework.orchestration.process.termination import (
28
31
  kill_pipeline_process_group,
29
32
  )
30
33
 
@@ -140,16 +143,45 @@ class SubprocessStrategy(ProcessExecutionStrategy):
140
143
  daemon=False,
141
144
  )
142
145
 
143
- process.start()
146
+ # Hint to the lifecycle manager to skip starting the broker in the parent
147
+ prev_val = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS")
148
+ os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = "1"
149
+ try:
150
+ process.start()
151
+ finally:
152
+ # Restore original env to avoid affecting other code paths
153
+ if prev_val is None:
154
+ try:
155
+ del os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"]
156
+ except KeyError:
157
+ pass
158
+ else:
159
+ os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = prev_val
144
160
  interface = RayPipelineSubprocessInterface(process)
145
161
 
146
162
  if options.block:
147
- # Block until subprocess completes
163
+ # Block until subprocess completes, handling Ctrl+C to ensure teardown
148
164
  start_time = time.time()
149
165
  logger.info("Waiting for subprocess pipeline to complete...")
150
- process.join()
151
- logger.info("Pipeline subprocess completed.")
166
+ try:
167
+ process.join()
168
+ except KeyboardInterrupt:
169
+ logger.info("KeyboardInterrupt in parent; terminating subprocess group...")
170
+ try:
171
+ pid = int(process.pid)
172
+ kill_pipeline_process_group(pid)
173
+ finally:
174
+ # Best-effort wait for process to exit
175
+ try:
176
+ process.join(timeout=5.0)
177
+ except Exception:
178
+ pass
179
+ finally:
180
+ logger.info("Pipeline subprocess completed or terminated.")
152
181
  elapsed_time = time.time() - start_time
182
+ # If process ended with failure, surface it
183
+ if hasattr(process, "exitcode") and process.exitcode not in (0, None):
184
+ raise RuntimeError(f"Pipeline subprocess exited with code {process.exitcode}")
153
185
  return ExecutionResult(interface=None, elapsed_time=elapsed_time)
154
186
  else:
155
187
  # Return interface for non-blocking execution
@@ -0,0 +1,107 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Process termination utilities, isolated to avoid circular imports.
7
+
8
+ This module provides functions to terminate a process and its entire process
9
+ group safely, without depending on pipeline construction or Ray types.
10
+ """
11
+
12
+ import logging
13
+ import os
14
+ import signal
15
+ import time
16
+ from typing import Optional
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def _safe_log(level: int, msg: str) -> None:
22
+ """Best-effort logging that won't crash during interpreter shutdown."""
23
+ try:
24
+ logger.log(level, msg)
25
+ except Exception:
26
+ try:
27
+ # Fallback to stderr if available
28
+ import sys
29
+
30
+ if hasattr(sys, "__stderr__") and sys.__stderr__:
31
+ sys.__stderr__.write(msg + "\n")
32
+ sys.__stderr__.flush()
33
+ except Exception:
34
+ pass
35
+
36
+
37
+ def kill_pipeline_process_group(process) -> None:
38
+ """
39
+ Kill a process and its entire process group.
40
+
41
+ Accepts either a multiprocessing.Process-like object exposing a ``pid`` attribute
42
+ or a raw PID integer. Sends SIGTERM to the process group first, and escalates
43
+ to SIGKILL if it does not terminate within a short grace period.
44
+
45
+ Parameters
46
+ ----------
47
+ process : multiprocessing.Process | int
48
+ Process handle (or a raw PID int) for the process whose process group should be terminated.
49
+ """
50
+ proc: Optional[object] = None
51
+ pid: Optional[int] = None
52
+
53
+ if isinstance(process, int):
54
+ pid = process
55
+ elif hasattr(process, "pid"):
56
+ proc = process
57
+ try:
58
+ pid = int(getattr(proc, "pid"))
59
+ except Exception as e:
60
+ raise AttributeError(f"Invalid process-like object without usable pid: {e}")
61
+ else:
62
+ raise AttributeError(
63
+ "kill_pipeline_process_group expects a multiprocessing.Process or a PID int (process-like object with .pid)"
64
+ )
65
+
66
+ if proc is not None and hasattr(proc, "is_alive") and not proc.is_alive():
67
+ _safe_log(logging.DEBUG, "Process already terminated")
68
+ return
69
+
70
+ if pid is None:
71
+ raise AttributeError("Unable to determine PID for process group termination")
72
+
73
+ _safe_log(logging.INFO, f"Terminating pipeline process group (PID: {pid})")
74
+
75
+ try:
76
+ # Send graceful termination to the entire process group
77
+ os.killpg(os.getpgid(pid), signal.SIGTERM)
78
+
79
+ # If we have a Process handle, give it a chance to exit cleanly
80
+ if proc is not None and hasattr(proc, "join"):
81
+ try:
82
+ proc.join(timeout=5.0)
83
+ except Exception:
84
+ pass
85
+ still_alive = getattr(proc, "is_alive", lambda: True)()
86
+ else:
87
+ # Without a handle, provide a small grace period
88
+ time.sleep(2.0)
89
+ try:
90
+ _ = os.getpgid(pid)
91
+ still_alive = True
92
+ except Exception:
93
+ still_alive = False
94
+
95
+ if still_alive:
96
+ _safe_log(logging.WARNING, "Process group did not terminate gracefully, using SIGKILL")
97
+ try:
98
+ os.killpg(os.getpgid(pid), signal.SIGKILL)
99
+ finally:
100
+ if proc is not None and hasattr(proc, "join"):
101
+ try:
102
+ proc.join(timeout=3.0)
103
+ except Exception:
104
+ pass
105
+
106
+ except (ProcessLookupError, OSError) as e:
107
+ _safe_log(logging.DEBUG, f"Process group already terminated or not found: {e}")