nv-ingest 2025.8.4.dev20250804__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nv_ingest/api/__init__.py +6 -0
- nv_ingest/api/main.py +2 -0
- nv_ingest/api/tracing.py +82 -0
- nv_ingest/api/v2/README.md +203 -0
- nv_ingest/api/v2/__init__.py +3 -0
- nv_ingest/api/v2/ingest.py +1300 -0
- nv_ingest/framework/orchestration/execution/__init__.py +3 -0
- nv_ingest/framework/orchestration/execution/helpers.py +85 -0
- nv_ingest/framework/orchestration/execution/options.py +112 -0
- nv_ingest/framework/orchestration/process/__init__.py +3 -0
- nv_ingest/framework/orchestration/process/dependent_services.py +84 -0
- nv_ingest/framework/orchestration/process/execution.py +495 -0
- nv_ingest/framework/orchestration/process/lifecycle.py +214 -0
- nv_ingest/framework/orchestration/process/strategies.py +218 -0
- nv_ingest/framework/orchestration/process/termination.py +147 -0
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +3 -3
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +32 -38
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +10 -7
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +17 -14
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +11 -6
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +12 -7
- nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
- nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +19 -15
- nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +16 -14
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +16 -13
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +92 -4
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +12 -8
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +12 -9
- nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
- nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +116 -69
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +79 -11
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +12 -6
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +17 -18
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +21 -14
- nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
- nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
- nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
- nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
- nv_ingest/pipeline/__init__.py +3 -0
- nv_ingest/pipeline/config/__init__.py +3 -0
- nv_ingest/pipeline/config/loaders.py +229 -0
- nv_ingest/pipeline/config/replica_resolver.py +237 -0
- nv_ingest/pipeline/default_libmode_pipeline_impl.py +528 -0
- nv_ingest/pipeline/default_pipeline_impl.py +557 -0
- nv_ingest/pipeline/ingest_pipeline.py +389 -0
- nv_ingest/pipeline/pipeline_schema.py +398 -0
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +6 -3
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +64 -43
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Pipeline lifecycle management for declarative execution.
|
|
7
|
+
|
|
8
|
+
This module provides high-level lifecycle management for pipelines,
|
|
9
|
+
orchestrating configuration resolution, broker setup, and execution
|
|
10
|
+
using the configured strategy pattern.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import atexit
|
|
15
|
+
import multiprocessing
|
|
16
|
+
import os
|
|
17
|
+
import signal
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
21
|
+
from nv_ingest.framework.orchestration.execution.options import ExecutionOptions, ExecutionResult
|
|
22
|
+
from nv_ingest.framework.orchestration.process.strategies import ProcessExecutionStrategy
|
|
23
|
+
from nv_ingest.framework.orchestration.process.strategies import SubprocessStrategy
|
|
24
|
+
from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PipelineLifecycleManager:
|
|
30
|
+
"""
|
|
31
|
+
High-level manager for pipeline lifecycle operations.
|
|
32
|
+
|
|
33
|
+
This class orchestrates the complete pipeline lifecycle including
|
|
34
|
+
broker setup, configuration validation, and execution using the
|
|
35
|
+
configured execution strategy.
|
|
36
|
+
|
|
37
|
+
Attributes
|
|
38
|
+
----------
|
|
39
|
+
strategy : ProcessExecutionStrategy
|
|
40
|
+
The execution strategy to use for running pipelines.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, strategy: ProcessExecutionStrategy):
|
|
44
|
+
"""
|
|
45
|
+
Initialize the lifecycle manager with an execution strategy.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
strategy : ProcessExecutionStrategy
|
|
50
|
+
The strategy to use for pipeline execution.
|
|
51
|
+
"""
|
|
52
|
+
self.strategy = strategy
|
|
53
|
+
# Track broker process so we can terminate it during teardown
|
|
54
|
+
self._broker_process: Optional[multiprocessing.Process] = None
|
|
55
|
+
|
|
56
|
+
def start(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
57
|
+
"""
|
|
58
|
+
Start a pipeline using the configured execution strategy.
|
|
59
|
+
|
|
60
|
+
This method handles the complete pipeline startup process:
|
|
61
|
+
1. Validate configuration
|
|
62
|
+
2. Start message broker if required
|
|
63
|
+
3. Execute pipeline using the configured strategy
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
config : PipelineConfigSchema
|
|
68
|
+
Validated pipeline configuration to execute.
|
|
69
|
+
options : ExecutionOptions
|
|
70
|
+
Execution options controlling blocking behavior and output.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
ExecutionResult
|
|
75
|
+
Result containing pipeline interface and/or timing information.
|
|
76
|
+
|
|
77
|
+
Raises
|
|
78
|
+
------
|
|
79
|
+
RuntimeError
|
|
80
|
+
If pipeline startup fails.
|
|
81
|
+
"""
|
|
82
|
+
logger.info("Starting pipeline lifecycle")
|
|
83
|
+
|
|
84
|
+
# If running pipeline in a subprocess and broker is enabled, ensure the broker
|
|
85
|
+
# is launched in the child process group by signaling via environment variable
|
|
86
|
+
prev_env = None
|
|
87
|
+
set_env = False
|
|
88
|
+
if getattr(config, "pipeline", None) and getattr(config.pipeline, "launch_simple_broker", False):
|
|
89
|
+
if isinstance(self.strategy, SubprocessStrategy):
|
|
90
|
+
prev_env = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS")
|
|
91
|
+
os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = "1"
|
|
92
|
+
set_env = True
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
# Start message broker if configured (may defer to subprocess based on env)
|
|
96
|
+
self._setup_message_broker(config)
|
|
97
|
+
|
|
98
|
+
# Execute pipeline using the configured strategy
|
|
99
|
+
result = self.strategy.execute(config, options)
|
|
100
|
+
|
|
101
|
+
logger.info("Pipeline lifecycle started successfully")
|
|
102
|
+
return result
|
|
103
|
+
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.error(f"Failed to start pipeline lifecycle: {e}")
|
|
106
|
+
raise RuntimeError(f"Pipeline startup failed: {e}") from e
|
|
107
|
+
finally:
|
|
108
|
+
if set_env:
|
|
109
|
+
if prev_env is None:
|
|
110
|
+
try:
|
|
111
|
+
del os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"]
|
|
112
|
+
except KeyError:
|
|
113
|
+
pass
|
|
114
|
+
else:
|
|
115
|
+
os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = prev_env
|
|
116
|
+
|
|
117
|
+
def _setup_message_broker(self, config: PipelineConfigSchema) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Set up message broker if required by configuration.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
config : PipelineConfigSchema
|
|
124
|
+
Pipeline configuration containing broker settings.
|
|
125
|
+
"""
|
|
126
|
+
if config.pipeline.launch_simple_broker:
|
|
127
|
+
# If requested to launch broker inside the subprocess, skip here
|
|
128
|
+
if os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1":
|
|
129
|
+
logger.info("Deferring SimpleMessageBroker launch to subprocess")
|
|
130
|
+
return
|
|
131
|
+
logger.info("Starting simple message broker")
|
|
132
|
+
# Start the broker and retain a handle for cleanup.
|
|
133
|
+
# Use defaults (host=0.0.0.0, port=7671) as set by the broker implementation.
|
|
134
|
+
try:
|
|
135
|
+
self._broker_process = start_simple_message_broker({})
|
|
136
|
+
# Ensure cleanup at interpreter shutdown in case caller forgets
|
|
137
|
+
atexit.register(self._terminate_broker_atexit)
|
|
138
|
+
logger.info(f"SimpleMessageBroker started (pid={getattr(self._broker_process, 'pid', None)})")
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.error(f"Failed to start SimpleMessageBroker: {e}")
|
|
141
|
+
raise
|
|
142
|
+
else:
|
|
143
|
+
logger.debug("Simple broker launch not required")
|
|
144
|
+
|
|
145
|
+
def stop(self, pipeline_id: Optional[str] = None) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Stop a running pipeline.
|
|
148
|
+
|
|
149
|
+
This method provides a hook for future pipeline stopping functionality.
|
|
150
|
+
Currently, pipeline stopping is handled by the individual interfaces.
|
|
151
|
+
Additionally, it ensures any dependent services (like the simple
|
|
152
|
+
message broker) are terminated to avoid lingering processes.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
pipeline_id : Optional[str]
|
|
157
|
+
Identifier of the pipeline to stop. Currently unused.
|
|
158
|
+
"""
|
|
159
|
+
logger.info("Pipeline stop requested")
|
|
160
|
+
# Best-effort termination of broker if we started one
|
|
161
|
+
self._terminate_broker()
|
|
162
|
+
|
|
163
|
+
# --- Internal helpers ---
|
|
164
|
+
def _terminate_broker_atexit(self) -> None:
|
|
165
|
+
"""Atexit-safe broker termination.
|
|
166
|
+
|
|
167
|
+
Avoids raising exceptions during interpreter shutdown.
|
|
168
|
+
"""
|
|
169
|
+
try:
|
|
170
|
+
self._terminate_broker()
|
|
171
|
+
except Exception:
|
|
172
|
+
# Swallow errors at atexit to avoid noisy shutdowns
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
def _terminate_broker(self) -> None:
|
|
176
|
+
"""Terminate the SimpleMessageBroker process if running."""
|
|
177
|
+
proc = self._broker_process
|
|
178
|
+
if not proc:
|
|
179
|
+
return
|
|
180
|
+
try:
|
|
181
|
+
if hasattr(proc, "is_alive") and not proc.is_alive():
|
|
182
|
+
return
|
|
183
|
+
except Exception:
|
|
184
|
+
# If querying state fails, continue with termination attempt
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
pid = getattr(proc, "pid", None)
|
|
188
|
+
logger.info(f"Stopping SimpleMessageBroker (pid={pid})")
|
|
189
|
+
try:
|
|
190
|
+
# First, try graceful terminate
|
|
191
|
+
proc.terminate()
|
|
192
|
+
try:
|
|
193
|
+
proc.join(timeout=3.0)
|
|
194
|
+
except Exception:
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
# If still alive, escalate to SIGKILL on the single process
|
|
198
|
+
still_alive = False
|
|
199
|
+
try:
|
|
200
|
+
still_alive = hasattr(proc, "is_alive") and proc.is_alive()
|
|
201
|
+
except Exception:
|
|
202
|
+
still_alive = True
|
|
203
|
+
if still_alive and pid is not None:
|
|
204
|
+
try:
|
|
205
|
+
os.kill(pid, signal.SIGKILL)
|
|
206
|
+
except Exception:
|
|
207
|
+
pass
|
|
208
|
+
try:
|
|
209
|
+
proc.join(timeout=2.0)
|
|
210
|
+
except Exception:
|
|
211
|
+
pass
|
|
212
|
+
finally:
|
|
213
|
+
# Clear handle to avoid repeated attempts
|
|
214
|
+
self._broker_process = None
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Process execution strategies for pipeline deployment.
|
|
7
|
+
|
|
8
|
+
This module defines abstract and concrete strategies for executing pipelines
|
|
9
|
+
in different process contexts (in-process vs subprocess), implementing the
|
|
10
|
+
Strategy pattern for clean separation of execution concerns.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import atexit
|
|
14
|
+
import logging
|
|
15
|
+
import multiprocessing
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
import time
|
|
19
|
+
from abc import ABC, abstractmethod
|
|
20
|
+
|
|
21
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
22
|
+
from nv_ingest.framework.orchestration.execution.options import ExecutionOptions, ExecutionResult
|
|
23
|
+
from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
|
|
24
|
+
RayPipelineInterface,
|
|
25
|
+
RayPipelineSubprocessInterface,
|
|
26
|
+
)
|
|
27
|
+
from nv_ingest.framework.orchestration.process.execution import (
|
|
28
|
+
launch_pipeline,
|
|
29
|
+
run_pipeline_process,
|
|
30
|
+
)
|
|
31
|
+
from nv_ingest.framework.orchestration.process.termination import (
|
|
32
|
+
kill_pipeline_process_group,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ProcessExecutionStrategy(ABC):
|
|
39
|
+
"""
|
|
40
|
+
Abstract base class for pipeline execution strategies.
|
|
41
|
+
|
|
42
|
+
This class defines the interface for different ways of executing
|
|
43
|
+
a pipeline (in-process, subprocess, etc.) using the Strategy pattern.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def execute(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
48
|
+
"""
|
|
49
|
+
Execute a pipeline using this strategy.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
config : PipelineConfigSchema
|
|
54
|
+
Validated pipeline configuration to execute.
|
|
55
|
+
options : ExecutionOptions
|
|
56
|
+
Execution options controlling blocking behavior and output redirection.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
ExecutionResult
|
|
61
|
+
Result containing pipeline interface and/or timing information.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class InProcessStrategy(ProcessExecutionStrategy):
|
|
67
|
+
"""
|
|
68
|
+
Strategy for executing pipelines in the current process.
|
|
69
|
+
|
|
70
|
+
This strategy runs the pipeline directly in the current Python process,
|
|
71
|
+
providing the most direct execution path with minimal overhead.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def execute(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
75
|
+
"""
|
|
76
|
+
Execute pipeline in the current process.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
config : PipelineConfigSchema
|
|
81
|
+
Pipeline configuration to execute.
|
|
82
|
+
options : ExecutionOptions
|
|
83
|
+
Execution options. stdout/stderr are ignored for in-process execution.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
ExecutionResult
|
|
88
|
+
Result with pipeline interface (non-blocking) or elapsed time (blocking).
|
|
89
|
+
"""
|
|
90
|
+
logger.info("Executing pipeline in current process")
|
|
91
|
+
|
|
92
|
+
# Execute the pipeline using existing launch_pipeline function
|
|
93
|
+
# launch_pipeline returns raw RayPipeline object (not wrapped in interface)
|
|
94
|
+
pipeline, total_elapsed = launch_pipeline(
|
|
95
|
+
config,
|
|
96
|
+
block=options.block,
|
|
97
|
+
disable_dynamic_scaling=None, # Already applied in config
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if options.block:
|
|
101
|
+
logger.debug(f"Pipeline execution completed successfully in {total_elapsed:.2f} seconds.")
|
|
102
|
+
return ExecutionResult(interface=None, elapsed_time=total_elapsed)
|
|
103
|
+
else:
|
|
104
|
+
# Wrap the raw RayPipeline in RayPipelineInterface
|
|
105
|
+
interface = RayPipelineInterface(pipeline)
|
|
106
|
+
return ExecutionResult(interface=interface, elapsed_time=None)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class SubprocessStrategy(ProcessExecutionStrategy):
|
|
110
|
+
"""
|
|
111
|
+
Strategy for executing pipelines in a separate subprocess.
|
|
112
|
+
|
|
113
|
+
This strategy launches the pipeline in a separate Python process using
|
|
114
|
+
multiprocessing, providing process isolation and output redirection.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def execute(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
118
|
+
"""
|
|
119
|
+
Execute pipeline in a separate subprocess.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
config : PipelineConfigSchema
|
|
124
|
+
Pipeline configuration to execute.
|
|
125
|
+
options : ExecutionOptions
|
|
126
|
+
Execution options including output redirection streams.
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
ExecutionResult
|
|
131
|
+
Result with subprocess interface (non-blocking) or elapsed time (blocking).
|
|
132
|
+
"""
|
|
133
|
+
logger.info("Launching pipeline in Python subprocess using multiprocessing.")
|
|
134
|
+
|
|
135
|
+
# Create subprocess using fork context
|
|
136
|
+
start_method = "fork"
|
|
137
|
+
if sys.platform.lower() == "darwin":
|
|
138
|
+
start_method = "spawn"
|
|
139
|
+
ctx = multiprocessing.get_context(start_method)
|
|
140
|
+
process = ctx.Process(
|
|
141
|
+
target=run_pipeline_process,
|
|
142
|
+
args=(
|
|
143
|
+
config,
|
|
144
|
+
options.stdout, # raw_stdout
|
|
145
|
+
options.stderr, # raw_stderr
|
|
146
|
+
),
|
|
147
|
+
daemon=False,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Hint to the lifecycle manager to skip starting the broker in the parent
|
|
151
|
+
prev_val = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS")
|
|
152
|
+
os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = "1"
|
|
153
|
+
try:
|
|
154
|
+
process.start()
|
|
155
|
+
finally:
|
|
156
|
+
# Restore original env to avoid affecting other code paths
|
|
157
|
+
if prev_val is None:
|
|
158
|
+
try:
|
|
159
|
+
del os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"]
|
|
160
|
+
except KeyError:
|
|
161
|
+
pass
|
|
162
|
+
else:
|
|
163
|
+
os.environ["NV_INGEST_BROKER_IN_SUBPROCESS"] = prev_val
|
|
164
|
+
interface = RayPipelineSubprocessInterface(process)
|
|
165
|
+
|
|
166
|
+
if options.block:
|
|
167
|
+
# Block until subprocess completes, handling Ctrl+C to ensure teardown
|
|
168
|
+
start_time = time.time()
|
|
169
|
+
logger.info("Waiting for subprocess pipeline to complete...")
|
|
170
|
+
try:
|
|
171
|
+
process.join()
|
|
172
|
+
except KeyboardInterrupt:
|
|
173
|
+
logger.info("KeyboardInterrupt in parent; terminating subprocess group...")
|
|
174
|
+
try:
|
|
175
|
+
pid = int(process.pid)
|
|
176
|
+
kill_pipeline_process_group(pid)
|
|
177
|
+
finally:
|
|
178
|
+
# Best-effort wait for process to exit
|
|
179
|
+
try:
|
|
180
|
+
process.join(timeout=5.0)
|
|
181
|
+
except Exception:
|
|
182
|
+
pass
|
|
183
|
+
finally:
|
|
184
|
+
logger.info("Pipeline subprocess completed or terminated.")
|
|
185
|
+
elapsed_time = time.time() - start_time
|
|
186
|
+
# If process ended with failure, surface it
|
|
187
|
+
if hasattr(process, "exitcode") and process.exitcode not in (0, None):
|
|
188
|
+
raise RuntimeError(f"Pipeline subprocess exited with code {process.exitcode}")
|
|
189
|
+
return ExecutionResult(interface=None, elapsed_time=elapsed_time)
|
|
190
|
+
else:
|
|
191
|
+
# Return interface for non-blocking execution
|
|
192
|
+
logger.info(f"Pipeline subprocess started (PID={process.pid})")
|
|
193
|
+
# Ensure we pass the Process object, not just the PID, to avoid AttributeError
|
|
194
|
+
# kill_pipeline_process_group expects a multiprocessing.Process instance
|
|
195
|
+
# Capture raw PID to avoid using multiprocessing APIs during interpreter shutdown
|
|
196
|
+
pid = int(process.pid)
|
|
197
|
+
atexit.register(kill_pipeline_process_group, pid)
|
|
198
|
+
return ExecutionResult(interface=interface, elapsed_time=None)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def create_execution_strategy(run_in_subprocess: bool) -> ProcessExecutionStrategy:
|
|
202
|
+
"""
|
|
203
|
+
Factory function to create the appropriate execution strategy.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
run_in_subprocess : bool
|
|
208
|
+
If True, creates SubprocessStrategy. If False, creates InProcessStrategy.
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
ProcessExecutionStrategy
|
|
213
|
+
Configured execution strategy instance.
|
|
214
|
+
"""
|
|
215
|
+
if run_in_subprocess:
|
|
216
|
+
return SubprocessStrategy()
|
|
217
|
+
else:
|
|
218
|
+
return InProcessStrategy()
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Process termination utilities, isolated to avoid circular imports.
|
|
7
|
+
|
|
8
|
+
This module provides functions to terminate a process and its entire process
|
|
9
|
+
group safely, without depending on pipeline construction or Ray types.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import signal
|
|
15
|
+
import time
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _safe_log(level: int, msg: str) -> None:
|
|
22
|
+
"""Best-effort logging that won't emit handler tracebacks on closed streams.
|
|
23
|
+
|
|
24
|
+
Temporarily disables logging.raiseExceptions to prevent the logging module
|
|
25
|
+
from printing "--- Logging error ---" to stderr if a handler's stream is
|
|
26
|
+
already closed (common during process teardown). Falls back to writing to
|
|
27
|
+
sys.__stderr__ if available.
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
import logging as _logging
|
|
31
|
+
|
|
32
|
+
prev = getattr(_logging, "raiseExceptions", True)
|
|
33
|
+
# Suppress handler errors being printed to stderr
|
|
34
|
+
_logging.raiseExceptions = False
|
|
35
|
+
|
|
36
|
+
# If there are no handlers, skip and use stderr fallback
|
|
37
|
+
if logger.handlers:
|
|
38
|
+
logger.log(level, msg)
|
|
39
|
+
return
|
|
40
|
+
except Exception:
|
|
41
|
+
# Intentionally ignore and try stderr fallback
|
|
42
|
+
pass
|
|
43
|
+
finally:
|
|
44
|
+
try:
|
|
45
|
+
import logging as _logging # re-import safe even if earlier failed
|
|
46
|
+
|
|
47
|
+
_logging.raiseExceptions = prev # type: ignore[name-defined]
|
|
48
|
+
except Exception:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
# Fallback to stderr if available
|
|
52
|
+
try:
|
|
53
|
+
import sys
|
|
54
|
+
|
|
55
|
+
if hasattr(sys, "__stderr__") and sys.__stderr__:
|
|
56
|
+
sys.__stderr__.write(msg + "\n")
|
|
57
|
+
sys.__stderr__.flush()
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def kill_pipeline_process_group(process) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Kill a process and its entire process group.
|
|
65
|
+
|
|
66
|
+
Accepts either a multiprocessing.Process-like object exposing a ``pid`` attribute
|
|
67
|
+
or a raw PID integer. Sends SIGTERM to the process group first, and escalates
|
|
68
|
+
to SIGKILL if it does not terminate within a short grace period.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
process : multiprocessing.Process | int
|
|
73
|
+
Process handle (or a raw PID int) for the process whose process group should be terminated.
|
|
74
|
+
"""
|
|
75
|
+
proc: Optional[object] = None
|
|
76
|
+
pid: Optional[int] = None
|
|
77
|
+
|
|
78
|
+
if isinstance(process, int):
|
|
79
|
+
pid = process
|
|
80
|
+
elif hasattr(process, "pid"):
|
|
81
|
+
proc = process
|
|
82
|
+
try:
|
|
83
|
+
pid = int(getattr(proc, "pid"))
|
|
84
|
+
except Exception as e:
|
|
85
|
+
raise AttributeError(f"Invalid process-like object without usable pid: {e}")
|
|
86
|
+
else:
|
|
87
|
+
raise AttributeError(
|
|
88
|
+
"kill_pipeline_process_group expects a multiprocessing.Process or a PID int (process-like object with .pid)"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if proc is not None and hasattr(proc, "is_alive") and not proc.is_alive():
|
|
92
|
+
_safe_log(logging.DEBUG, "Process already terminated")
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
if pid is None:
|
|
96
|
+
raise AttributeError("Unable to determine PID for process group termination")
|
|
97
|
+
|
|
98
|
+
_safe_log(logging.INFO, f"Terminating pipeline process group (PID: {pid})")
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Send graceful termination to the entire process group
|
|
102
|
+
try:
|
|
103
|
+
pgid = os.getpgid(pid)
|
|
104
|
+
except Exception:
|
|
105
|
+
# Process already gone
|
|
106
|
+
_safe_log(logging.DEBUG, f"Process group for PID {pid} not found during SIGTERM phase")
|
|
107
|
+
return
|
|
108
|
+
try:
|
|
109
|
+
os.killpg(pgid, signal.SIGTERM)
|
|
110
|
+
except ProcessLookupError:
|
|
111
|
+
_safe_log(logging.DEBUG, f"Process group for PID {pid} no longer exists (SIGTERM)")
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
# If we have a Process handle, give it a chance to exit cleanly
|
|
115
|
+
if proc is not None and hasattr(proc, "join"):
|
|
116
|
+
try:
|
|
117
|
+
proc.join(timeout=5.0)
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
120
|
+
still_alive = getattr(proc, "is_alive", lambda: True)()
|
|
121
|
+
else:
|
|
122
|
+
# Without a handle, provide a small grace period
|
|
123
|
+
time.sleep(2.0)
|
|
124
|
+
try:
|
|
125
|
+
_ = os.getpgid(pid)
|
|
126
|
+
still_alive = True
|
|
127
|
+
except Exception:
|
|
128
|
+
still_alive = False
|
|
129
|
+
|
|
130
|
+
if still_alive:
|
|
131
|
+
_safe_log(logging.WARNING, "Process group did not terminate gracefully, using SIGKILL")
|
|
132
|
+
try:
|
|
133
|
+
try:
|
|
134
|
+
pgid2 = os.getpgid(pid)
|
|
135
|
+
except Exception:
|
|
136
|
+
_safe_log(logging.DEBUG, f"Process group for PID {pid} vanished before SIGKILL")
|
|
137
|
+
return
|
|
138
|
+
os.killpg(pgid2, signal.SIGKILL)
|
|
139
|
+
finally:
|
|
140
|
+
if proc is not None and hasattr(proc, "join"):
|
|
141
|
+
try:
|
|
142
|
+
proc.join(timeout=3.0)
|
|
143
|
+
except Exception:
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
except (ProcessLookupError, OSError) as e:
|
|
147
|
+
_safe_log(logging.DEBUG, f"Process group already terminated or not found: {e}")
|
|
@@ -29,8 +29,8 @@ from nv_ingest.framework.orchestration.ray.stages.sinks.message_broker_task_sink
|
|
|
29
29
|
from nv_ingest.framework.orchestration.ray.stages.sources.message_broker_task_source import (
|
|
30
30
|
MessageBrokerTaskSourceStage,
|
|
31
31
|
MessageBrokerTaskSourceConfig,
|
|
32
|
-
start_simple_message_broker,
|
|
33
32
|
)
|
|
33
|
+
from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
|
|
34
34
|
from nv_ingest.framework.orchestration.ray.stages.storage.image_storage import ImageStorageStage
|
|
35
35
|
from nv_ingest.framework.orchestration.ray.stages.storage.store_embeddings import EmbeddingStorageStage
|
|
36
36
|
from nv_ingest.framework.orchestration.ray.stages.transforms.image_caption import ImageCaptionTransformStage
|
|
@@ -152,11 +152,11 @@ if __name__ == "__main__":
|
|
|
152
152
|
os.environ["OCR_MODEL_NAME"] = "paddle"
|
|
153
153
|
os.environ["NEMORETRIEVER_PARSE_HTTP_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
154
154
|
os.environ["VLM_CAPTION_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
155
|
-
os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/
|
|
155
|
+
os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/nemotron-nano-12b-v2-vl"
|
|
156
156
|
logger.info("Environment variables set.")
|
|
157
157
|
|
|
158
158
|
image_caption_endpoint_url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
159
|
-
model_name = "nvidia/
|
|
159
|
+
model_name = "nvidia/nemotron-nano-12b-v2-vl"
|
|
160
160
|
yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_nim_service("yolox")
|
|
161
161
|
(
|
|
162
162
|
yolox_table_structure_grpc,
|
|
@@ -183,7 +183,7 @@ class PipelineTopology:
|
|
|
183
183
|
"""Marks an actor as pending removal, to be cleaned up by the background thread."""
|
|
184
184
|
with self._lock:
|
|
185
185
|
self._actors_pending_removal.add((stage_name, actor))
|
|
186
|
-
logger.
|
|
186
|
+
logger.debug(f"Marked actor {actor} from stage {stage_name} for removal.")
|
|
187
187
|
|
|
188
188
|
def start_cleanup_thread(self, interval: int = 5) -> None:
|
|
189
189
|
"""Starts the background thread for periodic cleanup tasks."""
|
|
@@ -191,14 +191,14 @@ class PipelineTopology:
|
|
|
191
191
|
self._stop_cleanup.clear()
|
|
192
192
|
self._cleanup_thread = threading.Thread(target=self._cleanup_loop, args=(interval,), daemon=True)
|
|
193
193
|
self._cleanup_thread.start()
|
|
194
|
-
logger.
|
|
194
|
+
logger.debug("Topology cleanup thread started.")
|
|
195
195
|
|
|
196
196
|
def stop_cleanup_thread(self) -> None:
|
|
197
197
|
"""Stops the background cleanup thread."""
|
|
198
198
|
if self._cleanup_thread and self._cleanup_thread.is_alive():
|
|
199
199
|
self._stop_cleanup.set()
|
|
200
200
|
self._cleanup_thread.join(timeout=5)
|
|
201
|
-
logger.
|
|
201
|
+
logger.debug("Topology cleanup thread stopped.")
|
|
202
202
|
|
|
203
203
|
def _cleanup_loop(self, interval: int) -> None:
|
|
204
204
|
"""Periodically checks for and removes actors that have completed shutdown."""
|
|
@@ -235,7 +235,7 @@ class PipelineTopology:
|
|
|
235
235
|
self._actors_pending_removal.remove((stage_name, actor))
|
|
236
236
|
if actor in self._stage_actors.get(stage_name, []):
|
|
237
237
|
self._stage_actors[stage_name].remove(actor)
|
|
238
|
-
logger.
|
|
238
|
+
logger.debug(f"Successfully removed actor {actor} from stage {stage_name} in topology.")
|
|
239
239
|
|
|
240
240
|
time.sleep(interval)
|
|
241
241
|
|