nv-ingest 2025.8.14.dev20250814__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- nv_ingest/framework/orchestration/execution/__init__.py +3 -0
- nv_ingest/framework/orchestration/execution/helpers.py +85 -0
- nv_ingest/framework/orchestration/execution/options.py +112 -0
- nv_ingest/framework/orchestration/process/__init__.py +3 -0
- nv_ingest/framework/orchestration/process/dependent_services.py +55 -0
- nv_ingest/framework/orchestration/process/execution.py +497 -0
- nv_ingest/framework/orchestration/process/lifecycle.py +122 -0
- nv_ingest/framework/orchestration/process/strategies.py +182 -0
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +1 -1
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +23 -23
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +16 -16
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +9 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +10 -6
- nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +18 -17
- nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +14 -13
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +15 -13
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +22 -13
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +10 -7
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +10 -8
- nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
- nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +71 -61
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +7 -5
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +7 -5
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +13 -14
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +18 -12
- nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
- nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
- nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
- nv_ingest/pipeline/__init__.py +3 -0
- nv_ingest/pipeline/config/__init__.py +3 -0
- nv_ingest/pipeline/config/loaders.py +198 -0
- nv_ingest/pipeline/config/replica_resolver.py +227 -0
- nv_ingest/pipeline/default_pipeline_impl.py +517 -0
- nv_ingest/pipeline/ingest_pipeline.py +389 -0
- nv_ingest/pipeline/pipeline_schema.py +398 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/METADATA +1 -1
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/RECORD +54 -40
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,13 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
7
8
|
import ray
|
|
8
9
|
|
|
9
10
|
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
10
11
|
from nv_ingest.framework.util.flow_control import filter_by_task
|
|
11
|
-
from nv_ingest_api.internal.primitives.ingest_control_message import
|
|
12
|
+
from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage, remove_task_by_type
|
|
12
13
|
from nv_ingest_api.internal.primitives.tracing.tagging import traceable
|
|
13
14
|
from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
|
|
14
15
|
from nv_ingest_api.internal.transform.split_text import transform_text_split_and_tokenize_internal
|
|
@@ -16,6 +17,8 @@ from nv_ingest_api.util.exception_handlers.decorators import (
|
|
|
16
17
|
nv_ingest_node_failure_try_except,
|
|
17
18
|
)
|
|
18
19
|
|
|
20
|
+
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
21
|
+
|
|
19
22
|
logger = logging.getLogger(__name__)
|
|
20
23
|
|
|
21
24
|
|
|
@@ -29,15 +32,16 @@ class TextSplitterStage(RayActorStage):
|
|
|
29
32
|
and tokenization logic. The updated DataFrame is then set back into the message.
|
|
30
33
|
"""
|
|
31
34
|
|
|
32
|
-
def __init__(self, config: TextSplitterSchema) -> None:
|
|
33
|
-
super().__init__(config)
|
|
35
|
+
def __init__(self, config: TextSplitterSchema, stage_name: Optional[str] = None) -> None:
|
|
36
|
+
super().__init__(config, stage_name=stage_name)
|
|
34
37
|
# Store the validated configuration (assumed to be an instance of TextSplitterSchema)
|
|
35
38
|
self.validated_config: TextSplitterSchema = config
|
|
36
|
-
logger.
|
|
39
|
+
logger.debug("TextSplitterStage initialized with config: %s", config)
|
|
37
40
|
|
|
38
|
-
@
|
|
39
|
-
@
|
|
40
|
-
@
|
|
41
|
+
@nv_ingest_node_failure_try_except()
|
|
42
|
+
@traceable()
|
|
43
|
+
@udf_intercept_hook()
|
|
44
|
+
@filter_by_task(required_tasks=["split"])
|
|
41
45
|
def on_data(self, message: Any) -> Any:
|
|
42
46
|
"""
|
|
43
47
|
Process an incoming IngestControlMessage by splitting and tokenizing its text.
|
|
@@ -68,11 +72,13 @@ class TextSplitterStage(RayActorStage):
|
|
|
68
72
|
transform_config=self.validated_config,
|
|
69
73
|
execution_trace_log=None,
|
|
70
74
|
)
|
|
71
|
-
logger.
|
|
75
|
+
logger.debug(
|
|
76
|
+
"TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated)
|
|
77
|
+
)
|
|
72
78
|
|
|
73
79
|
# Update the message payload.
|
|
74
80
|
message.payload(df_updated)
|
|
75
|
-
logger.
|
|
81
|
+
logger.debug("TextSplitterStage.on_data: Finished processing, returning updated message.")
|
|
76
82
|
|
|
77
83
|
return message
|
|
78
84
|
|
|
@@ -110,10 +116,10 @@ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSp
|
|
|
110
116
|
transform_config=stage_config,
|
|
111
117
|
execution_trace_log=None,
|
|
112
118
|
)
|
|
113
|
-
logger.
|
|
119
|
+
logger.debug("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
|
|
114
120
|
|
|
115
121
|
# Update the message payload.
|
|
116
122
|
control_message.payload(df_updated)
|
|
117
|
-
logger.
|
|
123
|
+
logger.debug("TextSplitterStage.on_data: Finished processing, returning updated message.")
|
|
118
124
|
|
|
119
125
|
return control_message
|
|
@@ -4,11 +4,16 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import time
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
import ray
|
|
10
10
|
|
|
11
11
|
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
12
|
+
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
13
|
+
from nv_ingest_api.internal.primitives.tracing.tagging import traceable
|
|
14
|
+
from nv_ingest_api.util.exception_handlers.decorators import (
|
|
15
|
+
nv_ingest_node_failure_try_except,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
18
|
logger = logging.getLogger(__name__)
|
|
14
19
|
|
|
@@ -22,12 +27,15 @@ class ThroughputMonitorStage(RayActorStage):
|
|
|
22
27
|
It also adds the throughput as metadata on the control message before passing it on.
|
|
23
28
|
"""
|
|
24
29
|
|
|
25
|
-
def __init__(self, config: BaseModel) -> None:
|
|
30
|
+
def __init__(self, config: BaseModel, stage_name: Optional[str] = None) -> None:
|
|
26
31
|
# Initialize base attributes (e.g., self._running, self.start_time) via the base class.
|
|
27
|
-
super().__init__(config)
|
|
32
|
+
super().__init__(config, stage_name=stage_name)
|
|
28
33
|
self.count = 0
|
|
29
34
|
self.last_emit_time = None # Timestamp when the last throughput measure was emitted
|
|
30
35
|
|
|
36
|
+
@nv_ingest_node_failure_try_except()
|
|
37
|
+
@traceable()
|
|
38
|
+
@udf_intercept_hook()
|
|
31
39
|
async def on_data(self, message: Any) -> Any:
|
|
32
40
|
"""
|
|
33
41
|
Process an incoming control message. Increment the internal counter and, every 100 messages,
|
|
@@ -10,7 +10,6 @@ from typing import Dict, Any, List, Tuple, Optional
|
|
|
10
10
|
|
|
11
11
|
from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
|
|
12
12
|
|
|
13
|
-
logging.basicConfig(level=logging.INFO)
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
16
15
|
# --- Constants ---
|
|
@@ -259,7 +258,7 @@ class ResourceConstraintManager:
|
|
|
259
258
|
else:
|
|
260
259
|
self.core_based_replica_limit = None # Treat as unlimited if detection failed
|
|
261
260
|
|
|
262
|
-
logger.
|
|
261
|
+
logger.debug(
|
|
263
262
|
f"[ConstraintMgr] Initialized. MaxReplicas={max_replicas}, "
|
|
264
263
|
f"EffectiveCoreLimit={self.available_cores:.2f} " # Log the potentially fractional value
|
|
265
264
|
f"(Method: {self.core_detection_details.get('detection_method')}), "
|
|
@@ -2,302 +2,39 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
import atexit
|
|
6
5
|
import logging
|
|
7
|
-
import
|
|
8
|
-
import os
|
|
9
|
-
import signal
|
|
10
|
-
import sys
|
|
11
|
-
import time
|
|
12
|
-
from ctypes import CDLL, c_int
|
|
13
|
-
from datetime import datetime
|
|
14
|
-
from typing import Union, Tuple, Optional, TextIO
|
|
6
|
+
from typing import Union, Optional, TextIO
|
|
15
7
|
|
|
16
|
-
import ray
|
|
17
|
-
from pydantic import BaseModel, ConfigDict
|
|
18
8
|
|
|
19
9
|
from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
|
|
20
|
-
RayPipeline,
|
|
21
|
-
ScalingConfig,
|
|
22
10
|
RayPipelineSubprocessInterface,
|
|
23
11
|
RayPipelineInterface,
|
|
24
12
|
)
|
|
25
|
-
from nv_ingest.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
DYNAMIC_MEMORY_PENALTY_FACTOR,
|
|
34
|
-
DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
|
|
35
|
-
DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
|
|
13
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
14
|
+
|
|
15
|
+
from nv_ingest.pipeline.config.loaders import resolve_pipeline_config, apply_runtime_overrides
|
|
16
|
+
from nv_ingest.framework.orchestration.process.lifecycle import PipelineLifecycleManager
|
|
17
|
+
from nv_ingest.framework.orchestration.execution.helpers import (
|
|
18
|
+
create_runtime_overrides,
|
|
19
|
+
create_execution_options,
|
|
20
|
+
select_execution_strategy,
|
|
36
21
|
)
|
|
37
22
|
|
|
38
23
|
logger = logging.getLogger(__name__)
|
|
39
24
|
|
|
40
25
|
|
|
41
|
-
class PipelineCreationSchema(BaseModel):
|
|
42
|
-
"""
|
|
43
|
-
Schema for pipeline creation configuration.
|
|
44
|
-
|
|
45
|
-
Contains all parameters required to set up and execute the pipeline,
|
|
46
|
-
including endpoints, API keys, and processing options.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
arrow_default_memory_pool: str = os.getenv("ARROW_DEFAULT_MEMORY_POOL", "system")
|
|
50
|
-
|
|
51
|
-
# Audio processing settings
|
|
52
|
-
audio_grpc_endpoint: str = os.getenv("AUDIO_GRPC_ENDPOINT", "grpc.nvcf.nvidia.com:443")
|
|
53
|
-
audio_function_id: str = os.getenv("AUDIO_FUNCTION_ID", "1598d209-5e27-4d3c-8079-4751568b1081")
|
|
54
|
-
audio_infer_protocol: str = os.getenv("AUDIO_INFER_PROTOCOL", "grpc")
|
|
55
|
-
|
|
56
|
-
# Embedding model settings
|
|
57
|
-
embedding_nim_endpoint: str = os.getenv("EMBEDDING_NIM_ENDPOINT", "https://integrate.api.nvidia.com/v1")
|
|
58
|
-
embedding_nim_model_name: str = os.getenv("EMBEDDING_NIM_MODEL_NAME", "nvidia/llama-3.2-nv-embedqa-1b-v2")
|
|
59
|
-
|
|
60
|
-
# General pipeline settings
|
|
61
|
-
ingest_log_level: str = os.getenv("INGEST_LOG_LEVEL", "INFO")
|
|
62
|
-
max_ingest_process_workers: str = os.getenv("MAX_INGEST_PROCESS_WORKERS", "16")
|
|
63
|
-
|
|
64
|
-
# Messaging configuration
|
|
65
|
-
message_client_host: str = os.getenv("MESSAGE_CLIENT_HOST", "localhost")
|
|
66
|
-
message_client_port: str = os.getenv("MESSAGE_CLIENT_PORT", "7671")
|
|
67
|
-
message_client_type: str = os.getenv("MESSAGE_CLIENT_TYPE", "simple")
|
|
68
|
-
|
|
69
|
-
# NeMo Retriever settings
|
|
70
|
-
nemoretriever_parse_http_endpoint: str = os.getenv(
|
|
71
|
-
"NEMORETRIEVER_PARSE_HTTP_ENDPOINT", "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
72
|
-
)
|
|
73
|
-
nemoretriever_parse_infer_protocol: str = os.getenv("NEMORETRIEVER_PARSE_INFER_PROTOCOL", "http")
|
|
74
|
-
nemoretriever_parse_model_name: str = os.getenv("NEMORETRIEVER_PARSE_MODEL_NAME", "nvidia/nemoretriever-parse")
|
|
75
|
-
|
|
76
|
-
# API keys
|
|
77
|
-
ngc_api_key: str = os.getenv("NGC_API_KEY", "")
|
|
78
|
-
nvidia_api_key: str = os.getenv("NVIDIA_API_KEY", "")
|
|
79
|
-
|
|
80
|
-
# Observability settings
|
|
81
|
-
otel_exporter_otlp_endpoint: str = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317")
|
|
82
|
-
|
|
83
|
-
# OCR settings
|
|
84
|
-
ocr_http_endpoint: str = os.getenv("OCR_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
|
|
85
|
-
ocr_infer_protocol: str = os.getenv("OCR_INFER_PROTOCOL", "http")
|
|
86
|
-
ocr_model_name: str = os.getenv("OCR_MODEL_NAME", "paddle")
|
|
87
|
-
|
|
88
|
-
# Task queue settings
|
|
89
|
-
REDIS_INGEST_TASK_QUEUE: str = "ingest_task_queue"
|
|
90
|
-
|
|
91
|
-
# Vision language model settings
|
|
92
|
-
vlm_caption_endpoint: str = os.getenv(
|
|
93
|
-
"VLM_CAPTION_ENDPOINT",
|
|
94
|
-
"https://integrate.api.nvidia.com/v1/chat/completions",
|
|
95
|
-
)
|
|
96
|
-
vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
|
|
97
|
-
|
|
98
|
-
# YOLOX image processing settings
|
|
99
|
-
yolox_graphic_elements_http_endpoint: str = os.getenv(
|
|
100
|
-
"YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT",
|
|
101
|
-
"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1",
|
|
102
|
-
)
|
|
103
|
-
yolox_graphic_elements_infer_protocol: str = os.getenv("YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL", "http")
|
|
104
|
-
|
|
105
|
-
# YOLOX page elements settings
|
|
106
|
-
yolox_http_endpoint: str = os.getenv(
|
|
107
|
-
"YOLOX_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
108
|
-
)
|
|
109
|
-
yolox_infer_protocol: str = os.getenv("YOLOX_INFER_PROTOCOL", "http")
|
|
110
|
-
|
|
111
|
-
# YOLOX table structure settings
|
|
112
|
-
yolox_table_structure_http_endpoint: str = os.getenv(
|
|
113
|
-
"YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1"
|
|
114
|
-
)
|
|
115
|
-
yolox_table_structure_infer_protocol: str = os.getenv("YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL", "http")
|
|
116
|
-
|
|
117
|
-
model_config = ConfigDict(extra="forbid")
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def redirect_os_fds(stdout: Optional[TextIO] = None, stderr: Optional[TextIO] = None):
|
|
121
|
-
"""
|
|
122
|
-
Redirect OS-level stdout (fd=1) and stderr (fd=2) to the given file-like objects,
|
|
123
|
-
or to /dev/null if not provided.
|
|
124
|
-
|
|
125
|
-
Parameters
|
|
126
|
-
----------
|
|
127
|
-
stdout : Optional[TextIO]
|
|
128
|
-
Stream to receive OS-level stdout. If None, redirected to /dev/null.
|
|
129
|
-
stderr : Optional[TextIO]
|
|
130
|
-
Stream to receive OS-level stderr. If None, redirected to /dev/null.
|
|
131
|
-
"""
|
|
132
|
-
devnull_fd = os.open(os.devnull, os.O_WRONLY)
|
|
133
|
-
|
|
134
|
-
if stdout is not None:
|
|
135
|
-
os.dup2(stdout.fileno(), 1)
|
|
136
|
-
else:
|
|
137
|
-
os.dup2(devnull_fd, 1)
|
|
138
|
-
|
|
139
|
-
if stderr is not None:
|
|
140
|
-
os.dup2(stderr.fileno(), 2)
|
|
141
|
-
else:
|
|
142
|
-
os.dup2(devnull_fd, 2)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def set_pdeathsig(sig=signal.SIGKILL):
|
|
146
|
-
libc = CDLL("libc.so.6")
|
|
147
|
-
PR_SET_PDEATHSIG = 1
|
|
148
|
-
libc.prctl(PR_SET_PDEATHSIG, c_int(sig))
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def kill_pipeline_process_group(pid: int):
|
|
152
|
-
"""
|
|
153
|
-
Kill the process group associated with the given PID, if it exists and is alive.
|
|
154
|
-
|
|
155
|
-
Parameters
|
|
156
|
-
----------
|
|
157
|
-
pid : int
|
|
158
|
-
The PID of the process whose group should be killed.
|
|
159
|
-
"""
|
|
160
|
-
try:
|
|
161
|
-
# Get the process group ID
|
|
162
|
-
pgid = os.getpgid(pid)
|
|
163
|
-
|
|
164
|
-
# Check if the group is still alive by sending signal 0
|
|
165
|
-
os.killpg(pgid, 0) # Does not kill, just checks if it's alive
|
|
166
|
-
|
|
167
|
-
# If no exception, the group is alive — kill it
|
|
168
|
-
os.killpg(pgid, signal.SIGKILL)
|
|
169
|
-
print(f"Killed subprocess group {pgid}")
|
|
170
|
-
|
|
171
|
-
except ProcessLookupError:
|
|
172
|
-
print(f"Process group for PID {pid} no longer exists.")
|
|
173
|
-
except PermissionError:
|
|
174
|
-
print(f"Permission denied to kill process group for PID {pid}.")
|
|
175
|
-
except Exception as e:
|
|
176
|
-
print(f"Failed to kill subprocess group: {e}")
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def _run_pipeline_process(
|
|
180
|
-
ingest_config: PipelineCreationSchema,
|
|
181
|
-
disable_dynamic_scaling: Optional[bool],
|
|
182
|
-
dynamic_memory_threshold: Optional[float],
|
|
183
|
-
raw_stdout: Optional[TextIO] = None,
|
|
184
|
-
raw_stderr: Optional[TextIO] = None,
|
|
185
|
-
):
|
|
186
|
-
"""
|
|
187
|
-
Subprocess entrypoint to launch the pipeline. Redirects all output to the provided
|
|
188
|
-
file-like streams or /dev/null if not specified.
|
|
189
|
-
|
|
190
|
-
Parameters
|
|
191
|
-
----------
|
|
192
|
-
ingest_config : PipelineCreationSchema
|
|
193
|
-
Validated pipeline configuration.
|
|
194
|
-
disable_dynamic_scaling : Optional[bool]
|
|
195
|
-
Whether to disable dynamic scaling.
|
|
196
|
-
dynamic_memory_threshold : Optional[float]
|
|
197
|
-
Threshold for triggering scaling.
|
|
198
|
-
raw_stdout : Optional[TextIO]
|
|
199
|
-
Destination for stdout. Defaults to /dev/null.
|
|
200
|
-
raw_stderr : Optional[TextIO]
|
|
201
|
-
Destination for stderr. Defaults to /dev/null.
|
|
202
|
-
"""
|
|
203
|
-
# Set the death signal for the subprocess
|
|
204
|
-
set_pdeathsig()
|
|
205
|
-
os.setsid() # Creates new process group so it can be SIGKILLed as a group
|
|
206
|
-
|
|
207
|
-
# Redirect OS-level file descriptors
|
|
208
|
-
redirect_os_fds(stdout=raw_stdout, stderr=raw_stderr)
|
|
209
|
-
|
|
210
|
-
# Redirect Python-level sys.stdout/sys.stderr
|
|
211
|
-
sys.stdout = raw_stdout or open(os.devnull, "w")
|
|
212
|
-
sys.stderr = raw_stderr or open(os.devnull, "w")
|
|
213
|
-
|
|
214
|
-
try:
|
|
215
|
-
_launch_pipeline(
|
|
216
|
-
ingest_config,
|
|
217
|
-
block=True,
|
|
218
|
-
disable_dynamic_scaling=disable_dynamic_scaling,
|
|
219
|
-
dynamic_memory_threshold=dynamic_memory_threshold,
|
|
220
|
-
)
|
|
221
|
-
except Exception as e:
|
|
222
|
-
sys.__stderr__.write(f"Subprocess pipeline run failed: {e}\n")
|
|
223
|
-
raise
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def _launch_pipeline(
|
|
227
|
-
ingest_config: PipelineCreationSchema,
|
|
228
|
-
block: bool,
|
|
229
|
-
disable_dynamic_scaling: bool = None,
|
|
230
|
-
dynamic_memory_threshold: float = None,
|
|
231
|
-
) -> Tuple[Union[RayPipeline, None], float]:
|
|
232
|
-
logger.info("Starting pipeline setup")
|
|
233
|
-
|
|
234
|
-
dynamic_memory_scaling = not DISABLE_DYNAMIC_SCALING
|
|
235
|
-
if disable_dynamic_scaling is not None:
|
|
236
|
-
dynamic_memory_scaling = not disable_dynamic_scaling
|
|
237
|
-
|
|
238
|
-
dynamic_memory_threshold = dynamic_memory_threshold if dynamic_memory_threshold else DYNAMIC_MEMORY_THRESHOLD
|
|
239
|
-
|
|
240
|
-
scaling_config = ScalingConfig(
|
|
241
|
-
dynamic_memory_scaling=dynamic_memory_scaling,
|
|
242
|
-
dynamic_memory_threshold=dynamic_memory_threshold,
|
|
243
|
-
pid_kp=DYNAMIC_MEMORY_KP,
|
|
244
|
-
pid_ki=DYNAMIC_MEMORY_KI,
|
|
245
|
-
pid_ema_alpha=DYNAMIC_MEMORY_EMA_ALPHA,
|
|
246
|
-
pid_target_queue_depth=DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
|
|
247
|
-
pid_penalty_factor=DYNAMIC_MEMORY_PENALTY_FACTOR,
|
|
248
|
-
pid_error_boost_factor=DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
|
|
249
|
-
rcm_memory_safety_buffer_fraction=DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
pipeline = RayPipeline(scaling_config=scaling_config)
|
|
253
|
-
start_abs = datetime.now()
|
|
254
|
-
|
|
255
|
-
# Set up the ingestion pipeline
|
|
256
|
-
_ = setup_ingestion_pipeline(pipeline, ingest_config.model_dump())
|
|
257
|
-
|
|
258
|
-
# Record setup time
|
|
259
|
-
end_setup = start_run = datetime.now()
|
|
260
|
-
setup_elapsed = (end_setup - start_abs).total_seconds()
|
|
261
|
-
logger.info(f"Pipeline setup completed in {setup_elapsed:.2f} seconds")
|
|
262
|
-
|
|
263
|
-
# Run the pipeline
|
|
264
|
-
logger.debug("Running pipeline")
|
|
265
|
-
pipeline.start()
|
|
266
|
-
|
|
267
|
-
if block:
|
|
268
|
-
try:
|
|
269
|
-
while True:
|
|
270
|
-
time.sleep(5)
|
|
271
|
-
except KeyboardInterrupt:
|
|
272
|
-
logger.info("Interrupt received, shutting down pipeline.")
|
|
273
|
-
pipeline.stop()
|
|
274
|
-
ray.shutdown()
|
|
275
|
-
logger.info("Ray shutdown complete.")
|
|
276
|
-
|
|
277
|
-
# Record execution times
|
|
278
|
-
end_run = datetime.now()
|
|
279
|
-
run_elapsed = (end_run - start_run).total_seconds()
|
|
280
|
-
total_elapsed = (end_run - start_abs).total_seconds()
|
|
281
|
-
|
|
282
|
-
logger.info(f"Pipeline run completed in {run_elapsed:.2f} seconds")
|
|
283
|
-
logger.info(f"Total time elapsed: {total_elapsed:.2f} seconds")
|
|
284
|
-
|
|
285
|
-
return None, total_elapsed
|
|
286
|
-
else:
|
|
287
|
-
return pipeline, 0.0
|
|
288
|
-
|
|
289
|
-
|
|
290
26
|
def run_pipeline(
|
|
291
|
-
|
|
27
|
+
pipeline_config: Optional[PipelineConfigSchema] = None,
|
|
292
28
|
block: bool = True,
|
|
293
29
|
disable_dynamic_scaling: Optional[bool] = None,
|
|
294
30
|
dynamic_memory_threshold: Optional[float] = None,
|
|
295
31
|
run_in_subprocess: bool = False,
|
|
296
32
|
stdout: Optional[TextIO] = None,
|
|
297
33
|
stderr: Optional[TextIO] = None,
|
|
34
|
+
libmode: bool = True,
|
|
298
35
|
) -> Union[RayPipelineInterface, float, RayPipelineSubprocessInterface]:
|
|
299
36
|
"""
|
|
300
|
-
Launch and manage a pipeline
|
|
37
|
+
Launch and manage a pipeline using configuration.
|
|
301
38
|
|
|
302
39
|
This function is the primary entry point for executing a Ray pipeline,
|
|
303
40
|
either within the current process or in a separate Python subprocess.
|
|
@@ -306,17 +43,16 @@ def run_pipeline(
|
|
|
306
43
|
|
|
307
44
|
Parameters
|
|
308
45
|
----------
|
|
309
|
-
|
|
46
|
+
pipeline_config : Optional[PipelineConfigSchema], default=None
|
|
310
47
|
The validated configuration object used to construct and launch the pipeline.
|
|
48
|
+
If None and libmode is True, loads the default libmode pipeline.
|
|
311
49
|
block : bool, default=True
|
|
312
50
|
If True, blocks until the pipeline completes.
|
|
313
51
|
If False, returns an interface to control the pipeline externally.
|
|
314
52
|
disable_dynamic_scaling : Optional[bool], default=None
|
|
315
|
-
If
|
|
316
|
-
If None, uses the default or globally defined behavior.
|
|
53
|
+
If provided, overrides the `disable_dynamic_scaling` setting from the pipeline config.
|
|
317
54
|
dynamic_memory_threshold : Optional[float], default=None
|
|
318
|
-
|
|
319
|
-
if dynamic scaling is enabled. Defaults to the globally configured value if None.
|
|
55
|
+
If provided, overrides the `dynamic_memory_threshold` setting from the pipeline config.
|
|
320
56
|
run_in_subprocess : bool, default=False
|
|
321
57
|
If True, launches the pipeline in a separate Python subprocess using `multiprocessing.Process`.
|
|
322
58
|
If False, runs the pipeline in the current process.
|
|
@@ -326,6 +62,9 @@ def run_pipeline(
|
|
|
326
62
|
stderr : Optional[TextIO], default=None
|
|
327
63
|
Optional file-like stream to which subprocess stderr should be redirected.
|
|
328
64
|
If None, stderr is redirected to /dev/null.
|
|
65
|
+
libmode : bool, default=True
|
|
66
|
+
If True and pipeline_config is None, loads the default libmode pipeline configuration.
|
|
67
|
+
If False, requires pipeline_config to be provided.
|
|
329
68
|
|
|
330
69
|
Returns
|
|
331
70
|
-------
|
|
@@ -337,57 +76,25 @@ def run_pipeline(
|
|
|
337
76
|
|
|
338
77
|
Raises
|
|
339
78
|
------
|
|
79
|
+
ValueError
|
|
80
|
+
If pipeline_config is None and libmode is False.
|
|
340
81
|
RuntimeError
|
|
341
82
|
If the subprocess fails to start or exits with an error.
|
|
342
83
|
Exception
|
|
343
84
|
Any other exceptions raised during pipeline launch or configuration.
|
|
344
85
|
"""
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
):
|
|
350
|
-
logger.warning("NGC_API_KEY or NVIDIA_API_KEY are not set. NIM Related functions will not work.")
|
|
351
|
-
|
|
352
|
-
ctx = multiprocessing.get_context("fork")
|
|
353
|
-
process = ctx.Process(
|
|
354
|
-
target=_run_pipeline_process,
|
|
355
|
-
args=(
|
|
356
|
-
ingest_config,
|
|
357
|
-
disable_dynamic_scaling,
|
|
358
|
-
dynamic_memory_threshold,
|
|
359
|
-
stdout, # raw_stdout
|
|
360
|
-
stderr, # raw_stderr
|
|
361
|
-
),
|
|
362
|
-
daemon=False,
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
process.start()
|
|
366
|
-
|
|
367
|
-
interface = RayPipelineSubprocessInterface(process)
|
|
368
|
-
|
|
369
|
-
if block:
|
|
370
|
-
start_time = time.time()
|
|
371
|
-
logger.info("Waiting for subprocess pipeline to complete...")
|
|
372
|
-
process.join()
|
|
373
|
-
logger.info("Pipeline subprocess completed.")
|
|
374
|
-
return time.time() - start_time
|
|
375
|
-
else:
|
|
376
|
-
logger.info(f"Pipeline subprocess started (PID={process.pid})")
|
|
377
|
-
atexit.register(lambda: kill_pipeline_process_group(process.pid))
|
|
86
|
+
# Resolve configuration
|
|
87
|
+
config = resolve_pipeline_config(pipeline_config, libmode)
|
|
88
|
+
overrides = create_runtime_overrides(disable_dynamic_scaling, dynamic_memory_threshold)
|
|
89
|
+
final_config = apply_runtime_overrides(config, overrides)
|
|
378
90
|
|
|
379
|
-
|
|
91
|
+
# Select execution strategy
|
|
92
|
+
strategy = select_execution_strategy(run_in_subprocess)
|
|
93
|
+
options = create_execution_options(block, stdout, stderr)
|
|
380
94
|
|
|
381
|
-
#
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
block=block,
|
|
385
|
-
disable_dynamic_scaling=disable_dynamic_scaling,
|
|
386
|
-
dynamic_memory_threshold=dynamic_memory_threshold,
|
|
387
|
-
)
|
|
95
|
+
# Execute using lifecycle manager
|
|
96
|
+
lifecycle_manager = PipelineLifecycleManager(strategy)
|
|
97
|
+
result = lifecycle_manager.start(final_config, options)
|
|
388
98
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
return total_elapsed
|
|
392
|
-
else:
|
|
393
|
-
return RayPipelineInterface(pipeline)
|
|
99
|
+
# Return in expected format
|
|
100
|
+
return result.get_return_value()
|
|
@@ -4,15 +4,18 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import uuid
|
|
7
|
-
|
|
7
|
+
import inspect
|
|
8
|
+
from typing import Callable, Optional, Union, Dict, Type, List
|
|
8
9
|
|
|
9
10
|
import ray
|
|
10
11
|
from pydantic import BaseModel
|
|
11
12
|
|
|
12
13
|
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
13
|
-
from nv_ingest.framework.util.flow_control import filter_by_task
|
|
14
14
|
from nv_ingest_api.internal.primitives.tracing.tagging import traceable
|
|
15
15
|
from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
|
|
16
|
+
from nv_ingest_api.util.imports.callable_signatures import (
|
|
17
|
+
ingest_stage_callable_signature,
|
|
18
|
+
)
|
|
16
19
|
|
|
17
20
|
logger = logging.getLogger(__name__)
|
|
18
21
|
|
|
@@ -54,6 +57,7 @@ def wrap_callable_as_stage(
|
|
|
54
57
|
- Only `.remote(config)` and `.options(...)` (chained with `.remote(config)`) are supported.
|
|
55
58
|
All other class/actor patterns will raise `NotImplementedError`.
|
|
56
59
|
"""
|
|
60
|
+
ingest_stage_callable_signature(inspect.signature(fn))
|
|
57
61
|
trace_name = trace_id or fn.__name__
|
|
58
62
|
|
|
59
63
|
def make_actor_class():
|
|
@@ -90,7 +94,6 @@ def wrap_callable_as_stage(
|
|
|
90
94
|
|
|
91
95
|
@traceable(trace_name)
|
|
92
96
|
@nv_ingest_node_failure_try_except(annotation_id=trace_name, raise_on_failure=False)
|
|
93
|
-
@filter_by_task(required_tasks=required_tasks) if required_tasks else (lambda f: f)
|
|
94
97
|
def on_data(self, control_message):
|
|
95
98
|
"""
|
|
96
99
|
Processes a control message using the wrapped function.
|
|
@@ -105,6 +108,13 @@ def wrap_callable_as_stage(
|
|
|
105
108
|
IngestControlMessage
|
|
106
109
|
The processed message, or the original on failure.
|
|
107
110
|
"""
|
|
111
|
+
# Apply task filtering if required_tasks is specified and not empty
|
|
112
|
+
if required_tasks:
|
|
113
|
+
# Check if message has any of the required tasks
|
|
114
|
+
message_tasks = {task.type for task in control_message.get_tasks()}
|
|
115
|
+
if not any(task in message_tasks for task in required_tasks):
|
|
116
|
+
return control_message
|
|
117
|
+
|
|
108
118
|
try:
|
|
109
119
|
return fn(control_message, self.validated_config)
|
|
110
120
|
except Exception as e:
|