nv-ingest 2025.8.14.dev20250814__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- nv_ingest/framework/orchestration/execution/__init__.py +3 -0
- nv_ingest/framework/orchestration/execution/helpers.py +85 -0
- nv_ingest/framework/orchestration/execution/options.py +112 -0
- nv_ingest/framework/orchestration/process/__init__.py +3 -0
- nv_ingest/framework/orchestration/process/dependent_services.py +55 -0
- nv_ingest/framework/orchestration/process/execution.py +497 -0
- nv_ingest/framework/orchestration/process/lifecycle.py +122 -0
- nv_ingest/framework/orchestration/process/strategies.py +182 -0
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +1 -1
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +23 -23
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +16 -16
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +9 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +10 -6
- nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +18 -17
- nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +14 -13
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +15 -13
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +22 -13
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +10 -7
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +10 -8
- nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
- nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +71 -61
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +7 -5
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +7 -5
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +13 -14
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +18 -12
- nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
- nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
- nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
- nv_ingest/pipeline/__init__.py +3 -0
- nv_ingest/pipeline/config/__init__.py +3 -0
- nv_ingest/pipeline/config/loaders.py +198 -0
- nv_ingest/pipeline/config/replica_resolver.py +227 -0
- nv_ingest/pipeline/default_pipeline_impl.py +517 -0
- nv_ingest/pipeline/ingest_pipeline.py +389 -0
- nv_ingest/pipeline/pipeline_schema.py +398 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/METADATA +1 -1
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/RECORD +54 -40
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Low-level pipeline execution functions.
|
|
7
|
+
|
|
8
|
+
This module contains the core pipeline execution functions that are shared
|
|
9
|
+
between different execution strategies, extracted to avoid circular imports.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import multiprocessing
|
|
14
|
+
import os
|
|
15
|
+
import signal
|
|
16
|
+
import sys
|
|
17
|
+
import time
|
|
18
|
+
from ctypes import CDLL
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from typing import Union, Tuple, Optional, TextIO
|
|
21
|
+
import json
|
|
22
|
+
|
|
23
|
+
import ray
|
|
24
|
+
from ray import LoggingConfig
|
|
25
|
+
|
|
26
|
+
from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
|
|
27
|
+
RayPipeline,
|
|
28
|
+
)
|
|
29
|
+
from nv_ingest.pipeline.ingest_pipeline import IngestPipelineBuilder
|
|
30
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
31
|
+
from nv_ingest.pipeline.config.replica_resolver import resolve_static_replicas
|
|
32
|
+
from nv_ingest_api.util.string_processing.configuration import pretty_print_pipeline_config
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _safe_log(level: int, msg: str) -> None:
|
|
38
|
+
"""Best-effort logging that won't crash during interpreter shutdown.
|
|
39
|
+
|
|
40
|
+
Attempts to emit via the module logger, but if logging handlers/streams
|
|
41
|
+
have already been closed (common in atexit during CI/pytest teardown),
|
|
42
|
+
falls back to writing to sys.__stderr__ and never raises.
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
logger.log(level, msg)
|
|
46
|
+
return
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
try:
|
|
50
|
+
# Use the original un-captured stderr if available
|
|
51
|
+
if hasattr(sys, "__stderr__") and sys.__stderr__:
|
|
52
|
+
sys.__stderr__.write(msg + "\n")
|
|
53
|
+
sys.__stderr__.flush()
|
|
54
|
+
except Exception:
|
|
55
|
+
# Last resort: swallow any error to avoid noisy shutdowns
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def str_to_bool(value: str) -> bool:
|
|
60
|
+
"""Convert string to boolean value."""
|
|
61
|
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def redirect_os_fds(stdout: Optional[TextIO] = None, stderr: Optional[TextIO] = None):
|
|
65
|
+
"""
|
|
66
|
+
Redirect OS-level stdout (fd=1) and stderr (fd=2) to the given file-like objects,
|
|
67
|
+
or to /dev/null if not provided.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
stdout : Optional[TextIO]
|
|
72
|
+
Stream to receive OS-level stdout. If None, redirected to /dev/null.
|
|
73
|
+
stderr : Optional[TextIO]
|
|
74
|
+
Stream to receive OS-level stderr. If None, redirected to /dev/null.
|
|
75
|
+
"""
|
|
76
|
+
import os
|
|
77
|
+
|
|
78
|
+
# Get file descriptors for stdout and stderr, or use /dev/null
|
|
79
|
+
stdout_fd = stdout.fileno() if stdout else os.open(os.devnull, os.O_WRONLY)
|
|
80
|
+
stderr_fd = stderr.fileno() if stderr else os.open(os.devnull, os.O_WRONLY)
|
|
81
|
+
|
|
82
|
+
# Redirect OS-level file descriptors
|
|
83
|
+
os.dup2(stdout_fd, 1) # Redirect stdout (fd=1)
|
|
84
|
+
os.dup2(stderr_fd, 2) # Redirect stderr (fd=2)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def set_pdeathsig(sig=signal.SIGKILL):
|
|
88
|
+
"""Set parent death signal to kill child when parent dies."""
|
|
89
|
+
libc = CDLL("libc.so.6")
|
|
90
|
+
libc.prctl(1, sig) # PR_SET_PDEATHSIG = 1
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def build_logging_config_from_env() -> LoggingConfig:
|
|
94
|
+
"""
|
|
95
|
+
Build Ray LoggingConfig from environment variables.
|
|
96
|
+
Package-level preset (sets all defaults):
|
|
97
|
+
- INGEST_RAY_LOG_LEVEL: PRODUCTION, DEVELOPMENT, DEBUG. Default: DEVELOPMENT
|
|
98
|
+
Individual environment variables (override preset defaults):
|
|
99
|
+
- RAY_LOGGING_LEVEL: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default: INFO
|
|
100
|
+
- RAY_LOGGING_ENCODING: Log encoding format (TEXT, JSON). Default: TEXT
|
|
101
|
+
- RAY_LOGGING_ADDITIONAL_ATTRS: Comma-separated list of additional standard logger attributes
|
|
102
|
+
- RAY_DEDUP_LOGS: Enable/disable log deduplication (0/1). Default: 1 (enabled)
|
|
103
|
+
- RAY_LOG_TO_DRIVER: Enable/disable logging to driver (true/false). Default: true
|
|
104
|
+
- RAY_LOGGING_ROTATE_BYTES: Maximum log file size before rotation (bytes). Default: 1GB
|
|
105
|
+
- RAY_LOGGING_ROTATE_BACKUP_COUNT: Number of backup log files to keep. Default: 19
|
|
106
|
+
- RAY_DISABLE_IMPORT_WARNING: Disable Ray import warnings (0/1). Default: 0
|
|
107
|
+
- RAY_USAGE_STATS_ENABLED: Enable/disable usage stats collection (0/1). Default: 1
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
# Apply package-level preset defaults first
|
|
111
|
+
preset_level = os.environ.get("INGEST_RAY_LOG_LEVEL", "DEVELOPMENT").upper()
|
|
112
|
+
|
|
113
|
+
# Define preset configurations
|
|
114
|
+
presets = {
|
|
115
|
+
"PRODUCTION": {
|
|
116
|
+
"RAY_LOGGING_LEVEL": "ERROR",
|
|
117
|
+
"RAY_LOGGING_ENCODING": "TEXT",
|
|
118
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "",
|
|
119
|
+
"RAY_DEDUP_LOGS": "1",
|
|
120
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
121
|
+
"RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
|
|
122
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "9", # 10GB total
|
|
123
|
+
"RAY_DISABLE_IMPORT_WARNING": "1",
|
|
124
|
+
"RAY_USAGE_STATS_ENABLED": "0",
|
|
125
|
+
},
|
|
126
|
+
"DEVELOPMENT": {
|
|
127
|
+
"RAY_LOGGING_LEVEL": "INFO",
|
|
128
|
+
"RAY_LOGGING_ENCODING": "TEXT",
|
|
129
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "",
|
|
130
|
+
"RAY_DEDUP_LOGS": "1",
|
|
131
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
132
|
+
"RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
|
|
133
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "19", # 20GB total
|
|
134
|
+
"RAY_DISABLE_IMPORT_WARNING": "0",
|
|
135
|
+
"RAY_USAGE_STATS_ENABLED": "1",
|
|
136
|
+
},
|
|
137
|
+
"DEBUG": {
|
|
138
|
+
"RAY_LOGGING_LEVEL": "DEBUG",
|
|
139
|
+
"RAY_LOGGING_ENCODING": "JSON",
|
|
140
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "name,funcName,lineno",
|
|
141
|
+
"RAY_DEDUP_LOGS": "0",
|
|
142
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
143
|
+
"RAY_LOGGING_ROTATE_BYTES": "536870912", # 512MB
|
|
144
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "39", # 20GB total
|
|
145
|
+
"RAY_DISABLE_IMPORT_WARNING": "0",
|
|
146
|
+
"RAY_USAGE_STATS_ENABLED": "1",
|
|
147
|
+
},
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Validate preset level
|
|
151
|
+
if preset_level not in presets:
|
|
152
|
+
logger.warning(
|
|
153
|
+
f"Invalid INGEST_RAY_LOG_LEVEL '{preset_level}', using DEVELOPMENT. "
|
|
154
|
+
f"Valid presets: {list(presets.keys())}"
|
|
155
|
+
)
|
|
156
|
+
preset_level = "DEVELOPMENT"
|
|
157
|
+
|
|
158
|
+
# Apply preset defaults (only if env var not already set)
|
|
159
|
+
preset_config = presets[preset_level]
|
|
160
|
+
for key, default_value in preset_config.items():
|
|
161
|
+
if key not in os.environ:
|
|
162
|
+
os.environ[key] = default_value
|
|
163
|
+
|
|
164
|
+
logger.info(f"Applied Ray logging preset: {preset_level}")
|
|
165
|
+
|
|
166
|
+
# Get log level from environment, default to INFO
|
|
167
|
+
log_level = os.environ.get("RAY_LOGGING_LEVEL", "INFO").upper()
|
|
168
|
+
|
|
169
|
+
# Validate log level
|
|
170
|
+
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
171
|
+
if log_level not in valid_levels:
|
|
172
|
+
logger.warning(f"Invalid RAY_LOGGING_LEVEL '{log_level}', using INFO. Valid levels: {valid_levels}")
|
|
173
|
+
log_level = "INFO"
|
|
174
|
+
|
|
175
|
+
# Get encoding format from environment, default to TEXT
|
|
176
|
+
encoding = os.environ.get("RAY_LOGGING_ENCODING", "TEXT").upper()
|
|
177
|
+
|
|
178
|
+
# Validate encoding
|
|
179
|
+
valid_encodings = ["TEXT", "JSON"]
|
|
180
|
+
if encoding not in valid_encodings:
|
|
181
|
+
logger.warning(f"Invalid RAY_LOGGING_ENCODING '{encoding}', using TEXT. Valid encodings: {valid_encodings}")
|
|
182
|
+
encoding = "TEXT"
|
|
183
|
+
|
|
184
|
+
# Get additional standard logger attributes
|
|
185
|
+
additional_attrs_str = os.environ.get("RAY_LOGGING_ADDITIONAL_ATTRS", "")
|
|
186
|
+
additional_log_standard_attrs = []
|
|
187
|
+
if additional_attrs_str:
|
|
188
|
+
additional_log_standard_attrs = [attr.strip() for attr in additional_attrs_str.split(",") if attr.strip()]
|
|
189
|
+
|
|
190
|
+
# Set log deduplication environment variable if specified
|
|
191
|
+
dedup_logs = os.environ.get("RAY_DEDUP_LOGS", "1")
|
|
192
|
+
if dedup_logs is not None:
|
|
193
|
+
os.environ["RAY_DEDUP_LOGS"] = str(dedup_logs)
|
|
194
|
+
|
|
195
|
+
# Set log to driver environment variable if specified
|
|
196
|
+
log_to_driver = os.environ.get("RAY_LOG_TO_DRIVER", "0")
|
|
197
|
+
if log_to_driver is not None:
|
|
198
|
+
os.environ["RAY_LOG_TO_DRIVER"] = str(log_to_driver)
|
|
199
|
+
|
|
200
|
+
# Configure log rotation settings
|
|
201
|
+
rotate_bytes = os.environ.get("RAY_LOGGING_ROTATE_BYTES", "1073741824") # Default: 1GB per file
|
|
202
|
+
if rotate_bytes is not None:
|
|
203
|
+
try:
|
|
204
|
+
rotate_bytes_int = int(rotate_bytes)
|
|
205
|
+
os.environ["RAY_LOGGING_ROTATE_BYTES"] = str(rotate_bytes_int)
|
|
206
|
+
except ValueError:
|
|
207
|
+
logger.warning(f"Invalid RAY_LOGGING_ROTATE_BYTES '{rotate_bytes}', using default (1GB)")
|
|
208
|
+
os.environ["RAY_LOGGING_ROTATE_BYTES"] = "1073741824"
|
|
209
|
+
|
|
210
|
+
rotate_backup_count = os.environ.get("RAY_LOGGING_ROTATE_BACKUP_COUNT", "19") # Default: 19 backups (20GB Max)
|
|
211
|
+
if rotate_backup_count is not None:
|
|
212
|
+
try:
|
|
213
|
+
backup_count_int = int(rotate_backup_count)
|
|
214
|
+
os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = str(backup_count_int)
|
|
215
|
+
except ValueError:
|
|
216
|
+
logger.warning(f"Invalid RAY_LOGGING_ROTATE_BACKUP_COUNT '{rotate_backup_count}', using default (19)")
|
|
217
|
+
os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = "19"
|
|
218
|
+
|
|
219
|
+
# Configure Ray internal logging verbosity
|
|
220
|
+
disable_import_warning = os.environ.get("RAY_DISABLE_IMPORT_WARNING", "0")
|
|
221
|
+
if disable_import_warning is not None:
|
|
222
|
+
os.environ["RAY_DISABLE_IMPORT_WARNING"] = str(disable_import_warning)
|
|
223
|
+
|
|
224
|
+
# Configure usage stats collection
|
|
225
|
+
usage_stats_enabled = os.environ.get("RAY_USAGE_STATS_ENABLED", "1")
|
|
226
|
+
if usage_stats_enabled is not None:
|
|
227
|
+
os.environ["RAY_USAGE_STATS_ENABLED"] = str(usage_stats_enabled)
|
|
228
|
+
|
|
229
|
+
# Create LoggingConfig with validated parameters
|
|
230
|
+
logging_config = LoggingConfig(
|
|
231
|
+
encoding=encoding,
|
|
232
|
+
log_level=log_level,
|
|
233
|
+
additional_log_standard_attrs=additional_log_standard_attrs,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
logger.info(
|
|
237
|
+
f"Ray logging configured: preset={preset_level}, level={log_level}, encoding={encoding}, "
|
|
238
|
+
f"additional_attrs={additional_log_standard_attrs}, "
|
|
239
|
+
f"dedup_logs={os.environ.get('RAY_DEDUP_LOGS', '1')}, "
|
|
240
|
+
f"log_to_driver={os.environ.get('RAY_LOG_TO_DRIVER', '0')}, "
|
|
241
|
+
f"rotate_bytes={os.environ.get('RAY_LOGGING_ROTATE_BYTES', '1073741824')}, "
|
|
242
|
+
f"rotate_backup_count={os.environ.get('RAY_LOGGING_ROTATE_BACKUP_COUNT', '19')}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
return logging_config
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def launch_pipeline(
|
|
249
|
+
pipeline_config: PipelineConfigSchema,
|
|
250
|
+
block: bool = True,
|
|
251
|
+
disable_dynamic_scaling: Optional[bool] = None,
|
|
252
|
+
dynamic_memory_threshold: Optional[float] = None,
|
|
253
|
+
) -> Tuple[Union[RayPipeline, None], Optional[float]]:
|
|
254
|
+
"""
|
|
255
|
+
Launch a pipeline using the provided configuration.
|
|
256
|
+
|
|
257
|
+
This function handles the core pipeline launching logic including Ray
|
|
258
|
+
initialization, pipeline building, and execution loop.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
pipeline_config : PipelineConfigSchema
|
|
263
|
+
Validated pipeline configuration to execute.
|
|
264
|
+
block : bool, optional
|
|
265
|
+
Whether to block until pipeline completes, by default True.
|
|
266
|
+
disable_dynamic_scaling : Optional[bool], optional
|
|
267
|
+
Override for dynamic scaling behavior, by default None.
|
|
268
|
+
dynamic_memory_threshold : Optional[float], optional
|
|
269
|
+
Override for memory threshold, by default None.
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
Tuple[Union[RayPipeline, None], Optional[float]]
|
|
274
|
+
Raw RayPipeline object and elapsed time. For blocking execution,
|
|
275
|
+
returns (None, elapsed_time). For non-blocking, returns (pipeline, None).
|
|
276
|
+
"""
|
|
277
|
+
logger.info("Starting pipeline setup")
|
|
278
|
+
|
|
279
|
+
# Initialize Ray if not already initialized
|
|
280
|
+
if not ray.is_initialized():
|
|
281
|
+
# Build Ray logging configuration
|
|
282
|
+
logging_config = build_logging_config_from_env()
|
|
283
|
+
|
|
284
|
+
# Clear existing handlers from root logger before Ray adds its handler
|
|
285
|
+
# This prevents duplicate logging caused by multiple handlers on the root logger
|
|
286
|
+
root_logger = logging.getLogger()
|
|
287
|
+
for handler in root_logger.handlers[:]:
|
|
288
|
+
root_logger.removeHandler(handler)
|
|
289
|
+
logger.info("Cleared existing root logger handlers to prevent Ray logging duplicates")
|
|
290
|
+
|
|
291
|
+
ray.init(
|
|
292
|
+
namespace="nv_ingest_ray",
|
|
293
|
+
ignore_reinit_error=True,
|
|
294
|
+
dashboard_host="0.0.0.0",
|
|
295
|
+
dashboard_port=8265,
|
|
296
|
+
logging_config=logging_config, # Ray will add its own StreamHandler
|
|
297
|
+
_system_config={
|
|
298
|
+
"local_fs_capacity_threshold": 0.9,
|
|
299
|
+
"object_spilling_config": json.dumps(
|
|
300
|
+
{
|
|
301
|
+
"type": "filesystem",
|
|
302
|
+
"params": {
|
|
303
|
+
"directory_path": [
|
|
304
|
+
"/tmp/ray_spill_testing_0",
|
|
305
|
+
"/tmp/ray_spill_testing_1",
|
|
306
|
+
"/tmp/ray_spill_testing_2",
|
|
307
|
+
"/tmp/ray_spill_testing_3",
|
|
308
|
+
],
|
|
309
|
+
"buffer_size": 100_000_000,
|
|
310
|
+
},
|
|
311
|
+
},
|
|
312
|
+
),
|
|
313
|
+
},
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Handle disable_dynamic_scaling parameter override
|
|
317
|
+
if disable_dynamic_scaling and not pipeline_config.pipeline.disable_dynamic_scaling:
|
|
318
|
+
# Directly modify the pipeline config to disable dynamic scaling
|
|
319
|
+
pipeline_config.pipeline.disable_dynamic_scaling = True
|
|
320
|
+
logger.info("Dynamic scaling disabled via function parameter override")
|
|
321
|
+
|
|
322
|
+
# Resolve static replicas
|
|
323
|
+
pipeline_config = resolve_static_replicas(pipeline_config)
|
|
324
|
+
|
|
325
|
+
# Pretty print the final pipeline configuration (after replica resolution)
|
|
326
|
+
pretty_output = pretty_print_pipeline_config(pipeline_config, config_path=None)
|
|
327
|
+
logger.info("\n" + pretty_output)
|
|
328
|
+
|
|
329
|
+
# Set up the ingestion pipeline
|
|
330
|
+
start_abs = datetime.now()
|
|
331
|
+
ingest_pipeline = IngestPipelineBuilder(pipeline_config)
|
|
332
|
+
ingest_pipeline.build()
|
|
333
|
+
|
|
334
|
+
# Record setup time
|
|
335
|
+
end_setup = start_run = datetime.now()
|
|
336
|
+
setup_time = (end_setup - start_abs).total_seconds()
|
|
337
|
+
logger.info(f"Pipeline setup complete in {setup_time:.2f} seconds")
|
|
338
|
+
|
|
339
|
+
# Run the pipeline
|
|
340
|
+
logger.debug("Running pipeline")
|
|
341
|
+
ingest_pipeline.start()
|
|
342
|
+
|
|
343
|
+
if block:
|
|
344
|
+
try:
|
|
345
|
+
# Block indefinitely until a KeyboardInterrupt is received
|
|
346
|
+
while True:
|
|
347
|
+
time.sleep(5)
|
|
348
|
+
except KeyboardInterrupt:
|
|
349
|
+
logger.info("Interrupt received, shutting down pipeline.")
|
|
350
|
+
ingest_pipeline.stop()
|
|
351
|
+
ray.shutdown()
|
|
352
|
+
logger.info("Ray shutdown complete.")
|
|
353
|
+
|
|
354
|
+
# Record execution times
|
|
355
|
+
end_run = datetime.now()
|
|
356
|
+
run_time = (end_run - start_run).total_seconds()
|
|
357
|
+
total_elapsed = (end_run - start_abs).total_seconds()
|
|
358
|
+
|
|
359
|
+
logger.info(f"Pipeline execution time: {run_time:.2f} seconds")
|
|
360
|
+
logger.info(f"Total time elapsed: {total_elapsed:.2f} seconds")
|
|
361
|
+
|
|
362
|
+
return None, total_elapsed
|
|
363
|
+
else:
|
|
364
|
+
# Non-blocking - return the pipeline interface
|
|
365
|
+
# Access the internal RayPipeline from IngestPipelineBuilder
|
|
366
|
+
return ingest_pipeline._pipeline, None
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def run_pipeline_process(
|
|
370
|
+
pipeline_config: PipelineConfigSchema,
|
|
371
|
+
stdout: Optional[TextIO] = None,
|
|
372
|
+
stderr: Optional[TextIO] = None,
|
|
373
|
+
) -> None:
|
|
374
|
+
"""
|
|
375
|
+
Entry point for running a pipeline in a subprocess.
|
|
376
|
+
|
|
377
|
+
This function is designed to be the target of a multiprocessing.Process,
|
|
378
|
+
handling output redirection and process group management.
|
|
379
|
+
|
|
380
|
+
Parameters
|
|
381
|
+
----------
|
|
382
|
+
pipeline_config : PipelineConfigSchema
|
|
383
|
+
Pipeline configuration object.
|
|
384
|
+
stdout : Optional[TextIO], optional
|
|
385
|
+
Output stream for subprocess stdout, by default None.
|
|
386
|
+
stderr : Optional[TextIO], optional
|
|
387
|
+
Error stream for subprocess stderr, by default None.
|
|
388
|
+
"""
|
|
389
|
+
# Set up output redirection
|
|
390
|
+
if stdout:
|
|
391
|
+
sys.stdout = stdout
|
|
392
|
+
if stderr:
|
|
393
|
+
sys.stderr = stderr
|
|
394
|
+
|
|
395
|
+
# Create a new process group so we can terminate the entire subtree cleanly
|
|
396
|
+
try:
|
|
397
|
+
os.setpgrp()
|
|
398
|
+
except Exception as e:
|
|
399
|
+
logger.debug(f"os.setpgrp() not available or failed: {e}")
|
|
400
|
+
|
|
401
|
+
# Test output redirection
|
|
402
|
+
print("DEBUG: Direct print to stdout - should appear in parent process")
|
|
403
|
+
sys.stderr.write("DEBUG: Direct write to stderr - should appear in parent process\n")
|
|
404
|
+
|
|
405
|
+
# Test logging output
|
|
406
|
+
logger.info("DEBUG: Logger info - may not appear if logging handlers not redirected")
|
|
407
|
+
|
|
408
|
+
try:
|
|
409
|
+
# Launch the pipeline (blocking)
|
|
410
|
+
launch_pipeline(pipeline_config, block=True)
|
|
411
|
+
|
|
412
|
+
except Exception as e:
|
|
413
|
+
logger.error(f"Subprocess pipeline execution failed: {e}")
|
|
414
|
+
raise
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def kill_pipeline_process_group(process: multiprocessing.Process) -> None:
|
|
418
|
+
"""
|
|
419
|
+
Kill a pipeline process and its entire process group.
|
|
420
|
+
|
|
421
|
+
Note: Although the type annotation specifies a multiprocessing.Process for
|
|
422
|
+
compatibility with existing tests and public API, this function is robust
|
|
423
|
+
to also being passed a raw PID (int) at runtime.
|
|
424
|
+
|
|
425
|
+
Behavior:
|
|
426
|
+
- Send SIGTERM to the process group; if still alive after grace period, escalate to SIGKILL.
|
|
427
|
+
- If a Process object is provided, attempt to join() with timeouts.
|
|
428
|
+
- If only a PID is provided, skip joins and just signal the process group with grace/force.
|
|
429
|
+
|
|
430
|
+
Parameters
|
|
431
|
+
----------
|
|
432
|
+
process : multiprocessing.Process
|
|
433
|
+
Process handle (or a raw PID int) for the process whose process group should be terminated.
|
|
434
|
+
"""
|
|
435
|
+
# Resolve PID and optional Process handle
|
|
436
|
+
proc: Optional[object] = None
|
|
437
|
+
pid: Optional[int] = None
|
|
438
|
+
|
|
439
|
+
if isinstance(process, int):
|
|
440
|
+
pid = process
|
|
441
|
+
elif hasattr(process, "pid"):
|
|
442
|
+
# Duck-type any object that exposes a pid (e.g., multiprocessing.Process or Mock)
|
|
443
|
+
proc = process
|
|
444
|
+
try:
|
|
445
|
+
pid = int(getattr(proc, "pid"))
|
|
446
|
+
except Exception as e:
|
|
447
|
+
raise AttributeError(f"Invalid process-like object without usable pid: {e}")
|
|
448
|
+
else:
|
|
449
|
+
raise AttributeError(
|
|
450
|
+
"kill_pipeline_process_group expects a multiprocessing.Process or a PID int (process-like object with .pid)"
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
# If we have a Process handle and it's already dead, nothing to do
|
|
454
|
+
if proc is not None and hasattr(proc, "is_alive") and not proc.is_alive():
|
|
455
|
+
_safe_log(logging.DEBUG, "Process already terminated")
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
if pid is None:
|
|
459
|
+
# Defensive guard; should not happen
|
|
460
|
+
raise AttributeError("Unable to determine PID for process group termination")
|
|
461
|
+
|
|
462
|
+
_safe_log(logging.INFO, f"Terminating pipeline process group (PID: {pid})")
|
|
463
|
+
try:
|
|
464
|
+
# Send graceful termination to the entire process group
|
|
465
|
+
os.killpg(os.getpgid(pid), signal.SIGTERM)
|
|
466
|
+
|
|
467
|
+
# If we have a Process handle, give it a chance to exit cleanly
|
|
468
|
+
if proc is not None and hasattr(proc, "join"):
|
|
469
|
+
try:
|
|
470
|
+
proc.join(timeout=5.0)
|
|
471
|
+
except Exception:
|
|
472
|
+
pass
|
|
473
|
+
still_alive = getattr(proc, "is_alive", lambda: True)()
|
|
474
|
+
else:
|
|
475
|
+
# Without a handle, provide a small grace period
|
|
476
|
+
time.sleep(2.0)
|
|
477
|
+
# Best-effort check: if getpgid fails, it's gone
|
|
478
|
+
try:
|
|
479
|
+
_ = os.getpgid(pid)
|
|
480
|
+
still_alive = True
|
|
481
|
+
except Exception:
|
|
482
|
+
still_alive = False
|
|
483
|
+
|
|
484
|
+
if still_alive:
|
|
485
|
+
_safe_log(logging.WARNING, "Process group did not terminate gracefully, using SIGKILL")
|
|
486
|
+
try:
|
|
487
|
+
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
|
488
|
+
finally:
|
|
489
|
+
if proc is not None and hasattr(proc, "join"):
|
|
490
|
+
try:
|
|
491
|
+
proc.join(timeout=3.0)
|
|
492
|
+
except Exception:
|
|
493
|
+
pass
|
|
494
|
+
|
|
495
|
+
except (ProcessLookupError, OSError) as e:
|
|
496
|
+
# Process or group may already be gone
|
|
497
|
+
_safe_log(logging.DEBUG, f"Process group already terminated or not found: {e}")
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Pipeline lifecycle management for declarative execution.
|
|
7
|
+
|
|
8
|
+
This module provides high-level lifecycle management for pipelines,
|
|
9
|
+
orchestrating configuration resolution, broker setup, and execution
|
|
10
|
+
using the configured strategy pattern.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
17
|
+
from nv_ingest.framework.orchestration.execution.options import ExecutionOptions, ExecutionResult
|
|
18
|
+
from nv_ingest.framework.orchestration.process.strategies import ProcessExecutionStrategy
|
|
19
|
+
from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class PipelineLifecycleManager:
|
|
25
|
+
"""
|
|
26
|
+
High-level manager for pipeline lifecycle operations.
|
|
27
|
+
|
|
28
|
+
This class orchestrates the complete pipeline lifecycle including
|
|
29
|
+
broker setup, configuration validation, and execution using the
|
|
30
|
+
configured execution strategy.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
strategy : ProcessExecutionStrategy
|
|
35
|
+
The execution strategy to use for running pipelines.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, strategy: ProcessExecutionStrategy):
|
|
39
|
+
"""
|
|
40
|
+
Initialize the lifecycle manager with an execution strategy.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
strategy : ProcessExecutionStrategy
|
|
45
|
+
The strategy to use for pipeline execution.
|
|
46
|
+
"""
|
|
47
|
+
self.strategy = strategy
|
|
48
|
+
|
|
49
|
+
def start(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
50
|
+
"""
|
|
51
|
+
Start a pipeline using the configured execution strategy.
|
|
52
|
+
|
|
53
|
+
This method handles the complete pipeline startup process:
|
|
54
|
+
1. Validate configuration
|
|
55
|
+
2. Start message broker if required
|
|
56
|
+
3. Execute pipeline using the configured strategy
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
config : PipelineConfigSchema
|
|
61
|
+
Validated pipeline configuration to execute.
|
|
62
|
+
options : ExecutionOptions
|
|
63
|
+
Execution options controlling blocking behavior and output.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
ExecutionResult
|
|
68
|
+
Result containing pipeline interface and/or timing information.
|
|
69
|
+
|
|
70
|
+
Raises
|
|
71
|
+
------
|
|
72
|
+
RuntimeError
|
|
73
|
+
If pipeline startup fails.
|
|
74
|
+
"""
|
|
75
|
+
logger.info("Starting pipeline lifecycle")
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# Start message broker if configured
|
|
79
|
+
self._setup_message_broker(config)
|
|
80
|
+
|
|
81
|
+
# Execute pipeline using the configured strategy
|
|
82
|
+
result = self.strategy.execute(config, options)
|
|
83
|
+
|
|
84
|
+
logger.info("Pipeline lifecycle started successfully")
|
|
85
|
+
return result
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Failed to start pipeline lifecycle: {e}")
|
|
89
|
+
raise RuntimeError(f"Pipeline startup failed: {e}") from e
|
|
90
|
+
|
|
91
|
+
def _setup_message_broker(self, config: PipelineConfigSchema) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Set up message broker if required by configuration.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
config : PipelineConfigSchema
|
|
98
|
+
Pipeline configuration containing broker settings.
|
|
99
|
+
"""
|
|
100
|
+
if config.pipeline.launch_simple_broker:
|
|
101
|
+
logger.info("Starting simple message broker")
|
|
102
|
+
start_simple_message_broker({})
|
|
103
|
+
else:
|
|
104
|
+
logger.debug("Simple broker launch not required")
|
|
105
|
+
|
|
106
|
+
def stop(self, pipeline_id: Optional[str] = None) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Stop a running pipeline.
|
|
109
|
+
|
|
110
|
+
This method provides a hook for future pipeline stopping functionality.
|
|
111
|
+
Currently, pipeline stopping is handled by the individual interfaces.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
pipeline_id : Optional[str]
|
|
116
|
+
Identifier of the pipeline to stop. Currently unused.
|
|
117
|
+
"""
|
|
118
|
+
logger.info("Pipeline stop requested")
|
|
119
|
+
# TODO: Implement pipeline stopping logic when needed
|
|
120
|
+
# This would involve coordinating with the execution strategy
|
|
121
|
+
# to gracefully shut down running pipelines
|
|
122
|
+
pass
|