nv-ingest 2025.8.4.dev20250804__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nv_ingest/api/__init__.py +6 -0
- nv_ingest/api/main.py +2 -0
- nv_ingest/api/tracing.py +82 -0
- nv_ingest/api/v2/README.md +203 -0
- nv_ingest/api/v2/__init__.py +3 -0
- nv_ingest/api/v2/ingest.py +1300 -0
- nv_ingest/framework/orchestration/execution/__init__.py +3 -0
- nv_ingest/framework/orchestration/execution/helpers.py +85 -0
- nv_ingest/framework/orchestration/execution/options.py +112 -0
- nv_ingest/framework/orchestration/process/__init__.py +3 -0
- nv_ingest/framework/orchestration/process/dependent_services.py +84 -0
- nv_ingest/framework/orchestration/process/execution.py +495 -0
- nv_ingest/framework/orchestration/process/lifecycle.py +214 -0
- nv_ingest/framework/orchestration/process/strategies.py +218 -0
- nv_ingest/framework/orchestration/process/termination.py +147 -0
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +3 -3
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +32 -38
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +10 -7
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +17 -14
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +11 -6
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +12 -7
- nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
- nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +19 -15
- nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +16 -14
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +16 -13
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +92 -4
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +12 -8
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +12 -9
- nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
- nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +116 -69
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +79 -11
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +12 -6
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +17 -18
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +21 -14
- nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
- nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
- nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
- nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
- nv_ingest/pipeline/__init__.py +3 -0
- nv_ingest/pipeline/config/__init__.py +3 -0
- nv_ingest/pipeline/config/loaders.py +229 -0
- nv_ingest/pipeline/config/replica_resolver.py +237 -0
- nv_ingest/pipeline/default_libmode_pipeline_impl.py +528 -0
- nv_ingest/pipeline/default_pipeline_impl.py +557 -0
- nv_ingest/pipeline/ingest_pipeline.py +389 -0
- nv_ingest/pipeline/pipeline_schema.py +398 -0
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +6 -3
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +64 -43
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.8.4.dev20250804.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Low-level pipeline execution functions.
|
|
7
|
+
|
|
8
|
+
This module contains the core pipeline execution functions that are shared
|
|
9
|
+
between different execution strategies, extracted to avoid circular imports.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import multiprocessing
|
|
14
|
+
import os
|
|
15
|
+
import signal
|
|
16
|
+
import sys
|
|
17
|
+
import time
|
|
18
|
+
from ctypes import CDLL
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from typing import Union, Tuple, Optional, TextIO, Any
|
|
21
|
+
import json
|
|
22
|
+
|
|
23
|
+
import ray
|
|
24
|
+
from ray import LoggingConfig
|
|
25
|
+
|
|
26
|
+
from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
|
|
27
|
+
from nv_ingest.framework.orchestration.process.termination import (
|
|
28
|
+
kill_pipeline_process_group as _kill_pipeline_process_group,
|
|
29
|
+
)
|
|
30
|
+
from nv_ingest.pipeline.ingest_pipeline import IngestPipelineBuilder
|
|
31
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
32
|
+
from nv_ingest.pipeline.config.replica_resolver import resolve_static_replicas
|
|
33
|
+
from nv_ingest_api.util.string_processing.configuration import pretty_print_pipeline_config
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _safe_log(level: int, msg: str) -> None:
|
|
39
|
+
"""Best-effort logging that won't crash during interpreter shutdown.
|
|
40
|
+
|
|
41
|
+
Attempts to emit via the module logger, but if logging handlers/streams
|
|
42
|
+
have already been closed (common in atexit during CI/pytest teardown),
|
|
43
|
+
falls back to writing to sys.__stderr__ and never raises.
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
logger.log(level, msg)
|
|
47
|
+
return
|
|
48
|
+
except Exception:
|
|
49
|
+
pass
|
|
50
|
+
try:
|
|
51
|
+
# Use the original un-captured stderr if available
|
|
52
|
+
if hasattr(sys, "__stderr__") and sys.__stderr__:
|
|
53
|
+
sys.__stderr__.write(msg + "\n")
|
|
54
|
+
sys.__stderr__.flush()
|
|
55
|
+
except Exception:
|
|
56
|
+
# Last resort: swallow any error to avoid noisy shutdowns
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def str_to_bool(value: str) -> bool:
|
|
61
|
+
"""Convert string to boolean value."""
|
|
62
|
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def redirect_os_fds(stdout: Optional[TextIO] = None, stderr: Optional[TextIO] = None):
|
|
66
|
+
"""
|
|
67
|
+
Redirect OS-level stdout (fd=1) and stderr (fd=2) to the given file-like objects,
|
|
68
|
+
or to /dev/null if not provided.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
stdout : Optional[TextIO]
|
|
73
|
+
Stream to receive OS-level stdout. If None, redirected to /dev/null.
|
|
74
|
+
stderr : Optional[TextIO]
|
|
75
|
+
Stream to receive OS-level stderr. If None, redirected to /dev/null.
|
|
76
|
+
"""
|
|
77
|
+
import os
|
|
78
|
+
|
|
79
|
+
# Get file descriptors for stdout and stderr, or use /dev/null
|
|
80
|
+
stdout_fd = stdout.fileno() if stdout else os.open(os.devnull, os.O_WRONLY)
|
|
81
|
+
stderr_fd = stderr.fileno() if stderr else os.open(os.devnull, os.O_WRONLY)
|
|
82
|
+
|
|
83
|
+
# Redirect OS-level file descriptors
|
|
84
|
+
os.dup2(stdout_fd, 1) # Redirect stdout (fd=1)
|
|
85
|
+
os.dup2(stderr_fd, 2) # Redirect stderr (fd=2)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def set_pdeathsig(sig=signal.SIGKILL):
|
|
89
|
+
"""Set parent death signal to kill child when parent dies."""
|
|
90
|
+
libc = CDLL("libc.so.6")
|
|
91
|
+
libc.prctl(1, sig) # PR_SET_PDEATHSIG = 1
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def build_logging_config_from_env() -> LoggingConfig:
|
|
95
|
+
"""
|
|
96
|
+
Build Ray LoggingConfig from environment variables.
|
|
97
|
+
Package-level preset (sets all defaults):
|
|
98
|
+
- INGEST_RAY_LOG_LEVEL: PRODUCTION, DEVELOPMENT, DEBUG. Default: DEVELOPMENT
|
|
99
|
+
Individual environment variables (override preset defaults):
|
|
100
|
+
- RAY_LOGGING_LEVEL: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default: INFO
|
|
101
|
+
- RAY_LOGGING_ENCODING: Log encoding format (TEXT, JSON). Default: TEXT
|
|
102
|
+
- RAY_LOGGING_ADDITIONAL_ATTRS: Comma-separated list of additional standard logger attributes
|
|
103
|
+
- RAY_DEDUP_LOGS: Enable/disable log deduplication (0/1). Default: 1 (enabled)
|
|
104
|
+
- RAY_LOG_TO_DRIVER: Enable/disable logging to driver (true/false). Default: true
|
|
105
|
+
- RAY_LOGGING_ROTATE_BYTES: Maximum log file size before rotation (bytes). Default: 1GB
|
|
106
|
+
- RAY_LOGGING_ROTATE_BACKUP_COUNT: Number of backup log files to keep. Default: 19
|
|
107
|
+
- RAY_DISABLE_IMPORT_WARNING: Disable Ray import warnings (0/1). Default: 0
|
|
108
|
+
- RAY_USAGE_STATS_ENABLED: Enable/disable usage stats collection (0/1). Default: 1
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
# Apply package-level preset defaults first
|
|
112
|
+
preset_level = os.environ.get("INGEST_RAY_LOG_LEVEL", "DEVELOPMENT").upper()
|
|
113
|
+
|
|
114
|
+
# Define preset configurations
|
|
115
|
+
presets = {
|
|
116
|
+
"PRODUCTION": {
|
|
117
|
+
"RAY_LOGGING_LEVEL": "ERROR",
|
|
118
|
+
"RAY_LOGGING_ENCODING": "TEXT",
|
|
119
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "",
|
|
120
|
+
"RAY_DEDUP_LOGS": "1",
|
|
121
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
122
|
+
"RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
|
|
123
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "9", # 10GB total
|
|
124
|
+
"RAY_DISABLE_IMPORT_WARNING": "1",
|
|
125
|
+
"RAY_USAGE_STATS_ENABLED": "0",
|
|
126
|
+
},
|
|
127
|
+
"DEVELOPMENT": {
|
|
128
|
+
"RAY_LOGGING_LEVEL": "INFO",
|
|
129
|
+
"RAY_LOGGING_ENCODING": "TEXT",
|
|
130
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "",
|
|
131
|
+
"RAY_DEDUP_LOGS": "1",
|
|
132
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
133
|
+
"RAY_LOGGING_ROTATE_BYTES": "1073741824", # 1GB
|
|
134
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "19", # 20GB total
|
|
135
|
+
"RAY_DISABLE_IMPORT_WARNING": "0",
|
|
136
|
+
"RAY_USAGE_STATS_ENABLED": "1",
|
|
137
|
+
},
|
|
138
|
+
"DEBUG": {
|
|
139
|
+
"RAY_LOGGING_LEVEL": "DEBUG",
|
|
140
|
+
"RAY_LOGGING_ENCODING": "JSON",
|
|
141
|
+
"RAY_LOGGING_ADDITIONAL_ATTRS": "name,funcName,lineno",
|
|
142
|
+
"RAY_DEDUP_LOGS": "0",
|
|
143
|
+
"RAY_LOG_TO_DRIVER": "0", # false
|
|
144
|
+
"RAY_LOGGING_ROTATE_BYTES": "536870912", # 512MB
|
|
145
|
+
"RAY_LOGGING_ROTATE_BACKUP_COUNT": "39", # 20GB total
|
|
146
|
+
"RAY_DISABLE_IMPORT_WARNING": "0",
|
|
147
|
+
"RAY_USAGE_STATS_ENABLED": "1",
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Validate preset level
|
|
152
|
+
if preset_level not in presets:
|
|
153
|
+
logger.warning(
|
|
154
|
+
f"Invalid INGEST_RAY_LOG_LEVEL '{preset_level}', using DEVELOPMENT. "
|
|
155
|
+
f"Valid presets: {list(presets.keys())}"
|
|
156
|
+
)
|
|
157
|
+
preset_level = "DEVELOPMENT"
|
|
158
|
+
|
|
159
|
+
# Apply preset defaults (only if env var not already set)
|
|
160
|
+
preset_config = presets[preset_level]
|
|
161
|
+
for key, default_value in preset_config.items():
|
|
162
|
+
if key not in os.environ:
|
|
163
|
+
os.environ[key] = default_value
|
|
164
|
+
|
|
165
|
+
logger.info(f"Applied Ray logging preset: {preset_level}")
|
|
166
|
+
|
|
167
|
+
# Get log level from environment, default to INFO
|
|
168
|
+
log_level = os.environ.get("RAY_LOGGING_LEVEL", "INFO").upper()
|
|
169
|
+
|
|
170
|
+
# Validate log level
|
|
171
|
+
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
172
|
+
if log_level not in valid_levels:
|
|
173
|
+
logger.warning(f"Invalid RAY_LOGGING_LEVEL '{log_level}', using INFO. Valid levels: {valid_levels}")
|
|
174
|
+
log_level = "INFO"
|
|
175
|
+
|
|
176
|
+
# Get encoding format from environment, default to TEXT
|
|
177
|
+
encoding = os.environ.get("RAY_LOGGING_ENCODING", "TEXT").upper()
|
|
178
|
+
|
|
179
|
+
# Validate encoding
|
|
180
|
+
valid_encodings = ["TEXT", "JSON"]
|
|
181
|
+
if encoding not in valid_encodings:
|
|
182
|
+
logger.warning(f"Invalid RAY_LOGGING_ENCODING '{encoding}', using TEXT. Valid encodings: {valid_encodings}")
|
|
183
|
+
encoding = "TEXT"
|
|
184
|
+
|
|
185
|
+
# Get additional standard logger attributes
|
|
186
|
+
additional_attrs_str = os.environ.get("RAY_LOGGING_ADDITIONAL_ATTRS", "")
|
|
187
|
+
additional_log_standard_attrs = []
|
|
188
|
+
if additional_attrs_str:
|
|
189
|
+
additional_log_standard_attrs = [attr.strip() for attr in additional_attrs_str.split(",") if attr.strip()]
|
|
190
|
+
|
|
191
|
+
# Set log deduplication environment variable if specified
|
|
192
|
+
dedup_logs = os.environ.get("RAY_DEDUP_LOGS", "1")
|
|
193
|
+
if dedup_logs is not None:
|
|
194
|
+
os.environ["RAY_DEDUP_LOGS"] = str(dedup_logs)
|
|
195
|
+
|
|
196
|
+
# Set log to driver environment variable if specified
|
|
197
|
+
log_to_driver = os.environ.get("RAY_LOG_TO_DRIVER", "0")
|
|
198
|
+
if log_to_driver is not None:
|
|
199
|
+
os.environ["RAY_LOG_TO_DRIVER"] = str(log_to_driver)
|
|
200
|
+
|
|
201
|
+
# Configure log rotation settings
|
|
202
|
+
rotate_bytes = os.environ.get("RAY_LOGGING_ROTATE_BYTES", "1073741824") # Default: 1GB per file
|
|
203
|
+
if rotate_bytes is not None:
|
|
204
|
+
try:
|
|
205
|
+
rotate_bytes_int = int(rotate_bytes)
|
|
206
|
+
os.environ["RAY_LOGGING_ROTATE_BYTES"] = str(rotate_bytes_int)
|
|
207
|
+
except ValueError:
|
|
208
|
+
logger.warning(f"Invalid RAY_LOGGING_ROTATE_BYTES '{rotate_bytes}', using default (1GB)")
|
|
209
|
+
os.environ["RAY_LOGGING_ROTATE_BYTES"] = "1073741824"
|
|
210
|
+
|
|
211
|
+
rotate_backup_count = os.environ.get("RAY_LOGGING_ROTATE_BACKUP_COUNT", "19") # Default: 19 backups (20GB Max)
|
|
212
|
+
if rotate_backup_count is not None:
|
|
213
|
+
try:
|
|
214
|
+
backup_count_int = int(rotate_backup_count)
|
|
215
|
+
os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = str(backup_count_int)
|
|
216
|
+
except ValueError:
|
|
217
|
+
logger.warning(f"Invalid RAY_LOGGING_ROTATE_BACKUP_COUNT '{rotate_backup_count}', using default (19)")
|
|
218
|
+
os.environ["RAY_LOGGING_ROTATE_BACKUP_COUNT"] = "19"
|
|
219
|
+
|
|
220
|
+
# Configure Ray internal logging verbosity
|
|
221
|
+
disable_import_warning = os.environ.get("RAY_DISABLE_IMPORT_WARNING", "0")
|
|
222
|
+
if disable_import_warning is not None:
|
|
223
|
+
os.environ["RAY_DISABLE_IMPORT_WARNING"] = str(disable_import_warning)
|
|
224
|
+
|
|
225
|
+
# Configure usage stats collection
|
|
226
|
+
usage_stats_enabled = os.environ.get("RAY_USAGE_STATS_ENABLED", "1")
|
|
227
|
+
if usage_stats_enabled is not None:
|
|
228
|
+
os.environ["RAY_USAGE_STATS_ENABLED"] = str(usage_stats_enabled)
|
|
229
|
+
|
|
230
|
+
# Create LoggingConfig with validated parameters
|
|
231
|
+
logging_config = LoggingConfig(
|
|
232
|
+
encoding=encoding,
|
|
233
|
+
log_level=log_level,
|
|
234
|
+
additional_log_standard_attrs=additional_log_standard_attrs,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
logger.info(
|
|
238
|
+
f"Ray logging configured: preset={preset_level}, level={log_level}, encoding={encoding}, "
|
|
239
|
+
f"additional_attrs={additional_log_standard_attrs}, "
|
|
240
|
+
f"dedup_logs={os.environ.get('RAY_DEDUP_LOGS', '1')}, "
|
|
241
|
+
f"log_to_driver={os.environ.get('RAY_LOG_TO_DRIVER', '0')}, "
|
|
242
|
+
f"rotate_bytes={os.environ.get('RAY_LOGGING_ROTATE_BYTES', '1073741824')}, "
|
|
243
|
+
f"rotate_backup_count={os.environ.get('RAY_LOGGING_ROTATE_BACKUP_COUNT', '19')}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return logging_config
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def launch_pipeline(
|
|
250
|
+
pipeline_config: PipelineConfigSchema,
|
|
251
|
+
block: bool = True,
|
|
252
|
+
disable_dynamic_scaling: Optional[bool] = None,
|
|
253
|
+
dynamic_memory_threshold: Optional[float] = None,
|
|
254
|
+
) -> Tuple[Union[Any, None], Optional[float]]:
|
|
255
|
+
"""
|
|
256
|
+
Launch a pipeline using the provided configuration.
|
|
257
|
+
|
|
258
|
+
This function handles the core pipeline launching logic including Ray
|
|
259
|
+
initialization, pipeline building, and execution loop.
|
|
260
|
+
|
|
261
|
+
Parameters
|
|
262
|
+
----------
|
|
263
|
+
pipeline_config : PipelineConfigSchema
|
|
264
|
+
Validated pipeline configuration to execute.
|
|
265
|
+
block : bool, optional
|
|
266
|
+
Whether to block until pipeline completes, by default True.
|
|
267
|
+
disable_dynamic_scaling : Optional[bool], optional
|
|
268
|
+
Override for dynamic scaling behavior, by default None.
|
|
269
|
+
dynamic_memory_threshold : Optional[float], optional
|
|
270
|
+
Override for memory threshold, by default None.
|
|
271
|
+
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
Tuple[Union[Any, None], Optional[float]]
|
|
275
|
+
Raw pipeline object (type elided to avoid circular import) and elapsed time. For blocking execution,
|
|
276
|
+
returns (None, elapsed_time). For non-blocking, returns (pipeline, None).
|
|
277
|
+
"""
|
|
278
|
+
logger.info("Starting pipeline setup")
|
|
279
|
+
|
|
280
|
+
# Initialize Ray if not already initialized
|
|
281
|
+
if not ray.is_initialized():
|
|
282
|
+
# Build Ray logging configuration
|
|
283
|
+
logging_config = build_logging_config_from_env()
|
|
284
|
+
|
|
285
|
+
# Clear existing handlers from root logger before Ray adds its handler
|
|
286
|
+
# This prevents duplicate logging caused by multiple handlers on the root logger
|
|
287
|
+
root_logger = logging.getLogger()
|
|
288
|
+
for handler in root_logger.handlers[:]:
|
|
289
|
+
root_logger.removeHandler(handler)
|
|
290
|
+
logger.info("Cleared existing root logger handlers to prevent Ray logging duplicates")
|
|
291
|
+
|
|
292
|
+
ray.init(
|
|
293
|
+
namespace="nv_ingest_ray",
|
|
294
|
+
ignore_reinit_error=True,
|
|
295
|
+
dashboard_host="0.0.0.0",
|
|
296
|
+
dashboard_port=8265,
|
|
297
|
+
logging_config=logging_config, # Ray will add its own StreamHandler
|
|
298
|
+
_system_config={
|
|
299
|
+
"local_fs_capacity_threshold": 0.9,
|
|
300
|
+
"object_spilling_config": json.dumps(
|
|
301
|
+
{
|
|
302
|
+
"type": "filesystem",
|
|
303
|
+
"params": {
|
|
304
|
+
"directory_path": [
|
|
305
|
+
"/tmp/ray_spill_testing_0",
|
|
306
|
+
"/tmp/ray_spill_testing_1",
|
|
307
|
+
"/tmp/ray_spill_testing_2",
|
|
308
|
+
"/tmp/ray_spill_testing_3",
|
|
309
|
+
],
|
|
310
|
+
"buffer_size": 100_000_000,
|
|
311
|
+
},
|
|
312
|
+
},
|
|
313
|
+
),
|
|
314
|
+
},
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Handle disable_dynamic_scaling parameter override
|
|
318
|
+
if disable_dynamic_scaling and not pipeline_config.pipeline.disable_dynamic_scaling:
|
|
319
|
+
# Directly modify the pipeline config to disable dynamic scaling
|
|
320
|
+
pipeline_config.pipeline.disable_dynamic_scaling = True
|
|
321
|
+
logger.info("Dynamic scaling disabled via function parameter override")
|
|
322
|
+
|
|
323
|
+
# Resolve static replicas
|
|
324
|
+
pipeline_config = resolve_static_replicas(pipeline_config)
|
|
325
|
+
|
|
326
|
+
# Pretty print the final pipeline configuration (after replica resolution)
|
|
327
|
+
pretty_output = pretty_print_pipeline_config(pipeline_config, config_path=None)
|
|
328
|
+
logger.info("\n" + pretty_output)
|
|
329
|
+
|
|
330
|
+
# Set up the ingestion pipeline
|
|
331
|
+
start_abs = datetime.now()
|
|
332
|
+
ingest_pipeline = None
|
|
333
|
+
try:
|
|
334
|
+
ingest_pipeline = IngestPipelineBuilder(pipeline_config)
|
|
335
|
+
ingest_pipeline.build()
|
|
336
|
+
|
|
337
|
+
# Record setup time
|
|
338
|
+
end_setup = start_run = datetime.now()
|
|
339
|
+
setup_time = (end_setup - start_abs).total_seconds()
|
|
340
|
+
logger.info(f"Pipeline setup complete in {setup_time:.2f} seconds")
|
|
341
|
+
|
|
342
|
+
# Run the pipeline
|
|
343
|
+
logger.debug("Running pipeline")
|
|
344
|
+
ingest_pipeline.start()
|
|
345
|
+
except Exception as e:
|
|
346
|
+
# Ensure any partial startup is torn down
|
|
347
|
+
logger.error(f"Pipeline startup failed, initiating cleanup: {e}", exc_info=True)
|
|
348
|
+
try:
|
|
349
|
+
if ingest_pipeline is not None:
|
|
350
|
+
try:
|
|
351
|
+
ingest_pipeline.stop()
|
|
352
|
+
except Exception:
|
|
353
|
+
pass
|
|
354
|
+
finally:
|
|
355
|
+
try:
|
|
356
|
+
if ray.is_initialized():
|
|
357
|
+
ray.shutdown()
|
|
358
|
+
logger.info("Ray shutdown complete after startup failure.")
|
|
359
|
+
finally:
|
|
360
|
+
pass
|
|
361
|
+
# Re-raise to surface failure to caller
|
|
362
|
+
raise
|
|
363
|
+
|
|
364
|
+
if block:
|
|
365
|
+
try:
|
|
366
|
+
# Block indefinitely until a KeyboardInterrupt is received
|
|
367
|
+
while True:
|
|
368
|
+
time.sleep(5)
|
|
369
|
+
except KeyboardInterrupt:
|
|
370
|
+
logger.info("Interrupt received, shutting down pipeline.")
|
|
371
|
+
ingest_pipeline.stop()
|
|
372
|
+
ray.shutdown()
|
|
373
|
+
logger.info("Ray shutdown complete.")
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.error(f"Unexpected error during pipeline run: {e}", exc_info=True)
|
|
376
|
+
try:
|
|
377
|
+
ingest_pipeline.stop()
|
|
378
|
+
finally:
|
|
379
|
+
if ray.is_initialized():
|
|
380
|
+
ray.shutdown()
|
|
381
|
+
raise
|
|
382
|
+
|
|
383
|
+
# Record execution times
|
|
384
|
+
end_run = datetime.now()
|
|
385
|
+
run_time = (end_run - start_run).total_seconds()
|
|
386
|
+
total_elapsed = (end_run - start_abs).total_seconds()
|
|
387
|
+
|
|
388
|
+
logger.info(f"Pipeline execution time: {run_time:.2f} seconds")
|
|
389
|
+
logger.info(f"Total time elapsed: {total_elapsed:.2f} seconds")
|
|
390
|
+
|
|
391
|
+
return None, total_elapsed
|
|
392
|
+
else:
|
|
393
|
+
# Non-blocking - return the pipeline interface
|
|
394
|
+
# Access the internal RayPipeline from IngestPipelineBuilder
|
|
395
|
+
return ingest_pipeline._pipeline, None
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def run_pipeline_process(
|
|
399
|
+
pipeline_config: PipelineConfigSchema,
|
|
400
|
+
stdout: Optional[TextIO] = None,
|
|
401
|
+
stderr: Optional[TextIO] = None,
|
|
402
|
+
) -> None:
|
|
403
|
+
"""
|
|
404
|
+
Entry point for running a pipeline in a subprocess.
|
|
405
|
+
|
|
406
|
+
This function is designed to be the target of a multiprocessing.Process,
|
|
407
|
+
handling output redirection and process group management.
|
|
408
|
+
|
|
409
|
+
Parameters
|
|
410
|
+
----------
|
|
411
|
+
pipeline_config : PipelineConfigSchema
|
|
412
|
+
Pipeline configuration object.
|
|
413
|
+
stdout : Optional[TextIO], optional
|
|
414
|
+
Output stream for subprocess stdout, by default None.
|
|
415
|
+
stderr : Optional[TextIO], optional
|
|
416
|
+
Error stream for subprocess stderr, by default None.
|
|
417
|
+
"""
|
|
418
|
+
# Set up output redirection
|
|
419
|
+
if stdout:
|
|
420
|
+
sys.stdout = stdout
|
|
421
|
+
if stderr:
|
|
422
|
+
sys.stderr = stderr
|
|
423
|
+
|
|
424
|
+
# Ensure the subprocess is killed if the parent dies to avoid hangs
|
|
425
|
+
try:
|
|
426
|
+
set_pdeathsig(signal.SIGKILL)
|
|
427
|
+
except Exception as e:
|
|
428
|
+
logger.debug(f"set_pdeathsig not available or failed: {e}")
|
|
429
|
+
|
|
430
|
+
# Create a new process group so we can terminate the entire subtree cleanly
|
|
431
|
+
try:
|
|
432
|
+
os.setpgrp()
|
|
433
|
+
except Exception as e:
|
|
434
|
+
logger.debug(f"os.setpgrp() not available or failed: {e}")
|
|
435
|
+
|
|
436
|
+
# Install signal handlers for graceful shutdown in the subprocess
|
|
437
|
+
def _handle_signal(signum, frame):
|
|
438
|
+
try:
|
|
439
|
+
_safe_log(logging.INFO, f"Received signal {signum}; shutting down Ray and exiting...")
|
|
440
|
+
if ray.is_initialized():
|
|
441
|
+
ray.shutdown()
|
|
442
|
+
finally:
|
|
443
|
+
# Exit immediately after best-effort cleanup
|
|
444
|
+
os._exit(0)
|
|
445
|
+
|
|
446
|
+
try:
|
|
447
|
+
signal.signal(signal.SIGINT, _handle_signal)
|
|
448
|
+
signal.signal(signal.SIGTERM, _handle_signal)
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.debug(f"Signal handlers not set: {e}")
|
|
451
|
+
|
|
452
|
+
# Test output redirection
|
|
453
|
+
print("DEBUG: Direct print to stdout - should appear in parent process")
|
|
454
|
+
sys.stderr.write("DEBUG: Direct write to stderr - should appear in parent process\n")
|
|
455
|
+
|
|
456
|
+
# Test logging output
|
|
457
|
+
logger.info("DEBUG: Logger info - may not appear if logging handlers not redirected")
|
|
458
|
+
|
|
459
|
+
# If requested, start the simple broker inside this subprocess so it shares the process group
|
|
460
|
+
broker_proc = None
|
|
461
|
+
try:
|
|
462
|
+
if os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1":
|
|
463
|
+
try:
|
|
464
|
+
# Only launch if the config requests it
|
|
465
|
+
if getattr(pipeline_config, "pipeline", None) and getattr(
|
|
466
|
+
pipeline_config.pipeline, "launch_simple_broker", False
|
|
467
|
+
):
|
|
468
|
+
_safe_log(logging.INFO, "Starting SimpleMessageBroker inside subprocess")
|
|
469
|
+
broker_proc = start_simple_message_broker({})
|
|
470
|
+
except Exception as e:
|
|
471
|
+
_safe_log(logging.ERROR, f"Failed to start SimpleMessageBroker in subprocess: {e}")
|
|
472
|
+
# Continue without broker; launch will fail fast if required
|
|
473
|
+
|
|
474
|
+
# Launch the pipeline (blocking)
|
|
475
|
+
launch_pipeline(pipeline_config, block=True)
|
|
476
|
+
|
|
477
|
+
except Exception as e:
|
|
478
|
+
logger.error(f"Subprocess pipeline execution failed: {e}")
|
|
479
|
+
raise
|
|
480
|
+
finally:
|
|
481
|
+
# Best-effort: if we created a broker here and the pipeline exits normally,
|
|
482
|
+
# attempt a graceful terminate. In failure/termination paths the process group kill
|
|
483
|
+
# from parent or signal handler will take care of it.
|
|
484
|
+
if broker_proc is not None:
|
|
485
|
+
try:
|
|
486
|
+
if hasattr(broker_proc, "is_alive") and broker_proc.is_alive():
|
|
487
|
+
broker_proc.terminate()
|
|
488
|
+
except Exception:
|
|
489
|
+
pass
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def kill_pipeline_process_group(process: multiprocessing.Process) -> None:
|
|
493
|
+
"""Backward-compatible shim that delegates to process.termination implementation."""
|
|
494
|
+
_safe_log(logging.DEBUG, "Delegating kill_pipeline_process_group to process.termination module")
|
|
495
|
+
_kill_pipeline_process_group(process)
|