nv-ingest 2025.8.14.dev20250814__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest might be problematic. Click here for more details.
- nv_ingest/framework/orchestration/execution/__init__.py +3 -0
- nv_ingest/framework/orchestration/execution/helpers.py +85 -0
- nv_ingest/framework/orchestration/execution/options.py +112 -0
- nv_ingest/framework/orchestration/process/__init__.py +3 -0
- nv_ingest/framework/orchestration/process/dependent_services.py +55 -0
- nv_ingest/framework/orchestration/process/execution.py +497 -0
- nv_ingest/framework/orchestration/process/lifecycle.py +122 -0
- nv_ingest/framework/orchestration/process/strategies.py +182 -0
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +1 -1
- nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
- nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +23 -23
- nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +16 -16
- nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +9 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +10 -6
- nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
- nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +18 -17
- nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
- nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +14 -13
- nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +15 -13
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
- nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +22 -13
- nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +10 -7
- nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +10 -8
- nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
- nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +71 -61
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +7 -5
- nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
- nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
- nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +7 -5
- nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +13 -14
- nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +18 -12
- nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
- nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +33 -326
- nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
- nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
- nv_ingest/pipeline/__init__.py +3 -0
- nv_ingest/pipeline/config/__init__.py +3 -0
- nv_ingest/pipeline/config/loaders.py +198 -0
- nv_ingest/pipeline/config/replica_resolver.py +227 -0
- nv_ingest/pipeline/default_pipeline_impl.py +517 -0
- nv_ingest/pipeline/ingest_pipeline.py +389 -0
- nv_ingest/pipeline/pipeline_schema.py +398 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/METADATA +1 -1
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/RECORD +54 -40
- nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
- nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.8.14.dev20250814.dist-info → nv_ingest-2025.8.15.dev20250815.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Process execution strategies for pipeline deployment.
|
|
7
|
+
|
|
8
|
+
This module defines abstract and concrete strategies for executing pipelines
|
|
9
|
+
in different process contexts (in-process vs subprocess), implementing the
|
|
10
|
+
Strategy pattern for clean separation of execution concerns.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import atexit
|
|
14
|
+
import logging
|
|
15
|
+
import multiprocessing
|
|
16
|
+
import time
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
|
|
19
|
+
from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
|
|
20
|
+
from nv_ingest.framework.orchestration.execution.options import ExecutionOptions, ExecutionResult
|
|
21
|
+
from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
|
|
22
|
+
RayPipelineInterface,
|
|
23
|
+
RayPipelineSubprocessInterface,
|
|
24
|
+
)
|
|
25
|
+
from nv_ingest.framework.orchestration.process.execution import (
|
|
26
|
+
launch_pipeline,
|
|
27
|
+
run_pipeline_process,
|
|
28
|
+
kill_pipeline_process_group,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ProcessExecutionStrategy(ABC):
|
|
35
|
+
"""
|
|
36
|
+
Abstract base class for pipeline execution strategies.
|
|
37
|
+
|
|
38
|
+
This class defines the interface for different ways of executing
|
|
39
|
+
a pipeline (in-process, subprocess, etc.) using the Strategy pattern.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def execute(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
44
|
+
"""
|
|
45
|
+
Execute a pipeline using this strategy.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
config : PipelineConfigSchema
|
|
50
|
+
Validated pipeline configuration to execute.
|
|
51
|
+
options : ExecutionOptions
|
|
52
|
+
Execution options controlling blocking behavior and output redirection.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
ExecutionResult
|
|
57
|
+
Result containing pipeline interface and/or timing information.
|
|
58
|
+
"""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class InProcessStrategy(ProcessExecutionStrategy):
|
|
63
|
+
"""
|
|
64
|
+
Strategy for executing pipelines in the current process.
|
|
65
|
+
|
|
66
|
+
This strategy runs the pipeline directly in the current Python process,
|
|
67
|
+
providing the most direct execution path with minimal overhead.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def execute(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
71
|
+
"""
|
|
72
|
+
Execute pipeline in the current process.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
config : PipelineConfigSchema
|
|
77
|
+
Pipeline configuration to execute.
|
|
78
|
+
options : ExecutionOptions
|
|
79
|
+
Execution options. stdout/stderr are ignored for in-process execution.
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
ExecutionResult
|
|
84
|
+
Result with pipeline interface (non-blocking) or elapsed time (blocking).
|
|
85
|
+
"""
|
|
86
|
+
logger.info("Executing pipeline in current process")
|
|
87
|
+
|
|
88
|
+
# Execute the pipeline using existing launch_pipeline function
|
|
89
|
+
# launch_pipeline returns raw RayPipeline object (not wrapped in interface)
|
|
90
|
+
pipeline, total_elapsed = launch_pipeline(
|
|
91
|
+
config,
|
|
92
|
+
block=options.block,
|
|
93
|
+
disable_dynamic_scaling=None, # Already applied in config
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if options.block:
|
|
97
|
+
logger.debug(f"Pipeline execution completed successfully in {total_elapsed:.2f} seconds.")
|
|
98
|
+
return ExecutionResult(interface=None, elapsed_time=total_elapsed)
|
|
99
|
+
else:
|
|
100
|
+
# Wrap the raw RayPipeline in RayPipelineInterface
|
|
101
|
+
interface = RayPipelineInterface(pipeline)
|
|
102
|
+
return ExecutionResult(interface=interface, elapsed_time=None)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class SubprocessStrategy(ProcessExecutionStrategy):
|
|
106
|
+
"""
|
|
107
|
+
Strategy for executing pipelines in a separate subprocess.
|
|
108
|
+
|
|
109
|
+
This strategy launches the pipeline in a separate Python process using
|
|
110
|
+
multiprocessing, providing process isolation and output redirection.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def execute(self, config: PipelineConfigSchema, options: ExecutionOptions) -> ExecutionResult:
|
|
114
|
+
"""
|
|
115
|
+
Execute pipeline in a separate subprocess.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
config : PipelineConfigSchema
|
|
120
|
+
Pipeline configuration to execute.
|
|
121
|
+
options : ExecutionOptions
|
|
122
|
+
Execution options including output redirection streams.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
ExecutionResult
|
|
127
|
+
Result with subprocess interface (non-blocking) or elapsed time (blocking).
|
|
128
|
+
"""
|
|
129
|
+
logger.info("Launching pipeline in Python subprocess using multiprocessing.")
|
|
130
|
+
|
|
131
|
+
# Create subprocess using fork context
|
|
132
|
+
ctx = multiprocessing.get_context("fork")
|
|
133
|
+
process = ctx.Process(
|
|
134
|
+
target=run_pipeline_process,
|
|
135
|
+
args=(
|
|
136
|
+
config,
|
|
137
|
+
options.stdout, # raw_stdout
|
|
138
|
+
options.stderr, # raw_stderr
|
|
139
|
+
),
|
|
140
|
+
daemon=False,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
process.start()
|
|
144
|
+
interface = RayPipelineSubprocessInterface(process)
|
|
145
|
+
|
|
146
|
+
if options.block:
|
|
147
|
+
# Block until subprocess completes
|
|
148
|
+
start_time = time.time()
|
|
149
|
+
logger.info("Waiting for subprocess pipeline to complete...")
|
|
150
|
+
process.join()
|
|
151
|
+
logger.info("Pipeline subprocess completed.")
|
|
152
|
+
elapsed_time = time.time() - start_time
|
|
153
|
+
return ExecutionResult(interface=None, elapsed_time=elapsed_time)
|
|
154
|
+
else:
|
|
155
|
+
# Return interface for non-blocking execution
|
|
156
|
+
logger.info(f"Pipeline subprocess started (PID={process.pid})")
|
|
157
|
+
# Ensure we pass the Process object, not just the PID, to avoid AttributeError
|
|
158
|
+
# kill_pipeline_process_group expects a multiprocessing.Process instance
|
|
159
|
+
# Capture raw PID to avoid using multiprocessing APIs during interpreter shutdown
|
|
160
|
+
pid = int(process.pid)
|
|
161
|
+
atexit.register(kill_pipeline_process_group, pid)
|
|
162
|
+
return ExecutionResult(interface=interface, elapsed_time=None)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def create_execution_strategy(run_in_subprocess: bool) -> ProcessExecutionStrategy:
|
|
166
|
+
"""
|
|
167
|
+
Factory function to create the appropriate execution strategy.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
run_in_subprocess : bool
|
|
172
|
+
If True, creates SubprocessStrategy. If False, creates InProcessStrategy.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
ProcessExecutionStrategy
|
|
177
|
+
Configured execution strategy instance.
|
|
178
|
+
"""
|
|
179
|
+
if run_in_subprocess:
|
|
180
|
+
return SubprocessStrategy()
|
|
181
|
+
else:
|
|
182
|
+
return InProcessStrategy()
|
|
@@ -29,8 +29,8 @@ from nv_ingest.framework.orchestration.ray.stages.sinks.message_broker_task_sink
|
|
|
29
29
|
from nv_ingest.framework.orchestration.ray.stages.sources.message_broker_task_source import (
|
|
30
30
|
MessageBrokerTaskSourceStage,
|
|
31
31
|
MessageBrokerTaskSourceConfig,
|
|
32
|
-
start_simple_message_broker,
|
|
33
32
|
)
|
|
33
|
+
from nv_ingest.framework.orchestration.process.dependent_services import start_simple_message_broker
|
|
34
34
|
from nv_ingest.framework.orchestration.ray.stages.storage.image_storage import ImageStorageStage
|
|
35
35
|
from nv_ingest.framework.orchestration.ray.stages.storage.store_embeddings import EmbeddingStorageStage
|
|
36
36
|
from nv_ingest.framework.orchestration.ray.stages.transforms.image_caption import ImageCaptionTransformStage
|
|
@@ -183,7 +183,7 @@ class PipelineTopology:
|
|
|
183
183
|
"""Marks an actor as pending removal, to be cleaned up by the background thread."""
|
|
184
184
|
with self._lock:
|
|
185
185
|
self._actors_pending_removal.add((stage_name, actor))
|
|
186
|
-
logger.
|
|
186
|
+
logger.debug(f"Marked actor {actor} from stage {stage_name} for removal.")
|
|
187
187
|
|
|
188
188
|
def start_cleanup_thread(self, interval: int = 5) -> None:
|
|
189
189
|
"""Starts the background thread for periodic cleanup tasks."""
|
|
@@ -191,14 +191,14 @@ class PipelineTopology:
|
|
|
191
191
|
self._stop_cleanup.clear()
|
|
192
192
|
self._cleanup_thread = threading.Thread(target=self._cleanup_loop, args=(interval,), daemon=True)
|
|
193
193
|
self._cleanup_thread.start()
|
|
194
|
-
logger.
|
|
194
|
+
logger.debug("Topology cleanup thread started.")
|
|
195
195
|
|
|
196
196
|
def stop_cleanup_thread(self) -> None:
|
|
197
197
|
"""Stops the background cleanup thread."""
|
|
198
198
|
if self._cleanup_thread and self._cleanup_thread.is_alive():
|
|
199
199
|
self._stop_cleanup.set()
|
|
200
200
|
self._cleanup_thread.join(timeout=5)
|
|
201
|
-
logger.
|
|
201
|
+
logger.debug("Topology cleanup thread stopped.")
|
|
202
202
|
|
|
203
203
|
def _cleanup_loop(self, interval: int) -> None:
|
|
204
204
|
"""Periodically checks for and removes actors that have completed shutdown."""
|
|
@@ -235,7 +235,7 @@ class PipelineTopology:
|
|
|
235
235
|
self._actors_pending_removal.remove((stage_name, actor))
|
|
236
236
|
if actor in self._stage_actors.get(stage_name, []):
|
|
237
237
|
self._stage_actors[stage_name].remove(actor)
|
|
238
|
-
logger.
|
|
238
|
+
logger.debug(f"Successfully removed actor {actor} from stage {stage_name} in topology.")
|
|
239
239
|
|
|
240
240
|
time.sleep(interval)
|
|
241
241
|
|
|
@@ -252,7 +252,7 @@ class RayPipeline(PipelineInterface):
|
|
|
252
252
|
penalty_factor=self.scaling_config.pid_penalty_factor,
|
|
253
253
|
error_boost_factor=self.scaling_config.pid_error_boost_factor,
|
|
254
254
|
)
|
|
255
|
-
logger.
|
|
255
|
+
logger.debug("PIDController initialized using ScalingConfig.")
|
|
256
256
|
|
|
257
257
|
try:
|
|
258
258
|
total_system_memory_bytes = psutil.virtual_memory().total
|
|
@@ -270,7 +270,7 @@ class RayPipeline(PipelineInterface):
|
|
|
270
270
|
memory_threshold=absolute_memory_threshold_mb,
|
|
271
271
|
memory_safety_buffer_fraction=self.scaling_config.rcm_memory_safety_buffer_fraction,
|
|
272
272
|
)
|
|
273
|
-
logger.
|
|
273
|
+
logger.debug("ResourceConstraintManager initialized using ScalingConfig.")
|
|
274
274
|
|
|
275
275
|
# --- Instantiate Stats Collector ---
|
|
276
276
|
self._stats_collection_interval_seconds = self.stats_config.collection_interval_seconds
|
|
@@ -282,7 +282,7 @@ class RayPipeline(PipelineInterface):
|
|
|
282
282
|
ema_alpha=self.scaling_config.pid_ema_alpha,
|
|
283
283
|
)
|
|
284
284
|
|
|
285
|
-
logger.
|
|
285
|
+
logger.debug("RayStatsCollector initialized using StatsConfig.")
|
|
286
286
|
|
|
287
287
|
# --- Accessor Methods for Stat Collector (and internal use) ---
|
|
288
288
|
|
|
@@ -349,11 +349,11 @@ class RayPipeline(PipelineInterface):
|
|
|
349
349
|
# Update constraint manager
|
|
350
350
|
self.constraint_manager.max_replicas = total_max_replicas
|
|
351
351
|
|
|
352
|
-
logger.
|
|
352
|
+
logger.debug(f"[Build-Configure] Autoscalers configured. Total Max Replicas: {total_max_replicas}")
|
|
353
353
|
|
|
354
354
|
def _instantiate_initial_actors(self) -> None:
|
|
355
355
|
"""Instantiates initial actors and updates topology."""
|
|
356
|
-
logger.
|
|
356
|
+
logger.debug("[Build-Actors] Instantiating initial stage actors (min_replicas)...")
|
|
357
357
|
# Use topology accessor
|
|
358
358
|
current_stages = self.topology.get_stages_info()
|
|
359
359
|
|
|
@@ -377,7 +377,7 @@ class RayPipeline(PipelineInterface):
|
|
|
377
377
|
)
|
|
378
378
|
try:
|
|
379
379
|
actor = stage.callable.options(name=actor_name, max_concurrency=1, max_restarts=0).remote(
|
|
380
|
-
config=stage.config
|
|
380
|
+
config=stage.config, stage_name=stage.name
|
|
381
381
|
)
|
|
382
382
|
replicas.append(actor)
|
|
383
383
|
except Exception as e:
|
|
@@ -388,7 +388,7 @@ class RayPipeline(PipelineInterface):
|
|
|
388
388
|
self.topology.set_actors_for_stage(stage.name, replicas)
|
|
389
389
|
logger.debug(f"[Build-Actors] Stage '{stage.name}' initial actors set in topology: count={len(replicas)}")
|
|
390
390
|
|
|
391
|
-
logger.
|
|
391
|
+
logger.debug("[Build-Actors] Initial actor instantiation complete.")
|
|
392
392
|
|
|
393
393
|
def _create_and_wire_edges(self) -> List[ray.ObjectRef]:
|
|
394
394
|
"""
|
|
@@ -399,7 +399,7 @@ class RayPipeline(PipelineInterface):
|
|
|
399
399
|
List[ray.ObjectRef]
|
|
400
400
|
A list of object references for the remote wiring calls.
|
|
401
401
|
"""
|
|
402
|
-
logger.
|
|
402
|
+
logger.debug("[Build-Wiring] Creating and wiring edges...")
|
|
403
403
|
wiring_refs = []
|
|
404
404
|
new_edge_queues: Dict[str, Tuple[Any, int]] = {}
|
|
405
405
|
|
|
@@ -628,7 +628,7 @@ class RayPipeline(PipelineInterface):
|
|
|
628
628
|
Dict[str, List[Any]]
|
|
629
629
|
A dictionary mapping stage names to lists of actor handles.
|
|
630
630
|
"""
|
|
631
|
-
logger.
|
|
631
|
+
logger.debug("--- Starting Pipeline Build Process ---")
|
|
632
632
|
try:
|
|
633
633
|
if not self.topology.get_stages_info():
|
|
634
634
|
logger.error("Build failed: No stages defined in topology.")
|
|
@@ -640,7 +640,7 @@ class RayPipeline(PipelineInterface):
|
|
|
640
640
|
wiring_futures = self._create_and_wire_edges()
|
|
641
641
|
self._wait_for_wiring(wiring_futures)
|
|
642
642
|
|
|
643
|
-
logger.
|
|
643
|
+
logger.debug("--- Pipeline Build Completed Successfully ---")
|
|
644
644
|
return self.topology.get_stage_actors() # Return actors from topology
|
|
645
645
|
|
|
646
646
|
except RuntimeError as e:
|
|
@@ -673,7 +673,7 @@ class RayPipeline(PipelineInterface):
|
|
|
673
673
|
logger.debug(f"[ScaleUtil] Creating new actor '{actor_name}' for stage '{stage_info.name}'")
|
|
674
674
|
try:
|
|
675
675
|
new_actor = stage_info.callable.options(name=actor_name, max_concurrency=1, max_restarts=0).remote(
|
|
676
|
-
config=stage_info.config
|
|
676
|
+
config=stage_info.config, stage_name=stage_info.name
|
|
677
677
|
)
|
|
678
678
|
|
|
679
679
|
return new_actor
|
|
@@ -861,7 +861,7 @@ class RayPipeline(PipelineInterface):
|
|
|
861
861
|
# Select actors to remove (e.g., the most recently added)
|
|
862
862
|
actors_to_remove = current_replicas[-num_to_remove:]
|
|
863
863
|
|
|
864
|
-
logger.
|
|
864
|
+
logger.debug(f"[ScaleDown-{stage_name}] Selected {len(actors_to_remove)} actors for removal.")
|
|
865
865
|
|
|
866
866
|
# Signal each actor to stop and mark it for removal by the topology.
|
|
867
867
|
# The topology's cleanup thread will handle polling and final removal.
|
|
@@ -966,7 +966,7 @@ class RayPipeline(PipelineInterface):
|
|
|
966
966
|
True if the pipeline drained successfully, False otherwise.
|
|
967
967
|
"""
|
|
968
968
|
start_time = time.time()
|
|
969
|
-
logger.
|
|
969
|
+
logger.debug(f"Waiting for pipeline drain (Timeout: {timeout_seconds}s)...")
|
|
970
970
|
last_in_flight = -1
|
|
971
971
|
drain_check_interval = 1.0 # Check every second
|
|
972
972
|
|
|
@@ -1172,7 +1172,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1172
1172
|
force : bool, optional
|
|
1173
1173
|
Whether to force the flush, by default False.
|
|
1174
1174
|
"""
|
|
1175
|
-
logger.
|
|
1175
|
+
logger.debug(f"Manual queue flush requested (force={force}).")
|
|
1176
1176
|
|
|
1177
1177
|
if self.topology.get_is_flushing() or self._stopping: # Check topology
|
|
1178
1178
|
logger.warning("Flush already in progress or pipeline is stopping.")
|
|
@@ -1183,7 +1183,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1183
1183
|
# For now, run synchronously:
|
|
1184
1184
|
self._execute_queue_flush()
|
|
1185
1185
|
else:
|
|
1186
|
-
logger.
|
|
1186
|
+
logger.debug("Manual flush denied: pipeline not quiet or interval not met.")
|
|
1187
1187
|
|
|
1188
1188
|
def _gather_controller_metrics(
|
|
1189
1189
|
self, current_stage_stats: Dict[str, Dict[str, int]], global_in_flight: int
|
|
@@ -1409,7 +1409,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1409
1409
|
self._consecutive_quiet_cycles += 1
|
|
1410
1410
|
logger.debug(f"Pipeline is quiet. Consecutive quiet cycles: {self._consecutive_quiet_cycles}")
|
|
1411
1411
|
if self._consecutive_quiet_cycles >= self.consecutive_quiet_cycles_for_flush:
|
|
1412
|
-
logger.
|
|
1412
|
+
logger.debug(
|
|
1413
1413
|
f"Pipeline has been quiet for {self._consecutive_quiet_cycles} cycles. "
|
|
1414
1414
|
"Initiating queue flush."
|
|
1415
1415
|
)
|
|
@@ -1423,7 +1423,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1423
1423
|
)
|
|
1424
1424
|
else:
|
|
1425
1425
|
if self._consecutive_quiet_cycles > 0:
|
|
1426
|
-
logger.
|
|
1426
|
+
logger.debug(
|
|
1427
1427
|
f"Pipeline is no longer quiet. Resetting consecutive quiet cycle count "
|
|
1428
1428
|
f"from {self._consecutive_quiet_cycles} to 0."
|
|
1429
1429
|
)
|
|
@@ -1479,7 +1479,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1479
1479
|
interval : float
|
|
1480
1480
|
The interval in seconds.
|
|
1481
1481
|
"""
|
|
1482
|
-
logger.
|
|
1482
|
+
logger.debug(f"Scaling loop started. Interval: {interval}s")
|
|
1483
1483
|
while self._scaling_monitoring:
|
|
1484
1484
|
try:
|
|
1485
1485
|
self._perform_scaling_and_maintenance()
|
|
@@ -1490,7 +1490,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1490
1490
|
if not self._scaling_monitoring:
|
|
1491
1491
|
break
|
|
1492
1492
|
time.sleep(sleep_time)
|
|
1493
|
-
logger.
|
|
1493
|
+
logger.debug("Scaling loop finished.")
|
|
1494
1494
|
|
|
1495
1495
|
def _start_scaling(self, poll_interval: float = 10.0) -> None:
|
|
1496
1496
|
"""
|
|
@@ -1505,7 +1505,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1505
1505
|
self._scaling_monitoring = True
|
|
1506
1506
|
self._scaling_thread = threading.Thread(target=self._scaling_loop, args=(poll_interval,), daemon=True)
|
|
1507
1507
|
self._scaling_thread.start()
|
|
1508
|
-
logger.
|
|
1508
|
+
logger.debug(f"Scaling/Maintenance thread launched (Interval: {poll_interval}s).")
|
|
1509
1509
|
|
|
1510
1510
|
def _stop_scaling(self) -> None:
|
|
1511
1511
|
"""
|
|
@@ -1519,7 +1519,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1519
1519
|
if self._scaling_thread.is_alive():
|
|
1520
1520
|
logger.warning("Scaling thread did not exit cleanly.")
|
|
1521
1521
|
self._scaling_thread = None
|
|
1522
|
-
logger.
|
|
1522
|
+
logger.debug("Scaling/Maintenance stopped.")
|
|
1523
1523
|
|
|
1524
1524
|
# --- Pipeline Start/Stop ---
|
|
1525
1525
|
def start(self, monitor_poll_interval: float = 5.0, scaling_poll_interval: float = 30.0) -> None:
|
|
@@ -1548,7 +1548,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1548
1548
|
logger.debug(f"Waiting for {len(start_futures)} actors to start...")
|
|
1549
1549
|
try:
|
|
1550
1550
|
ray.get(start_futures, timeout=60.0)
|
|
1551
|
-
logger.
|
|
1551
|
+
logger.debug(f"{len(start_futures)} actors started.")
|
|
1552
1552
|
except Exception as e:
|
|
1553
1553
|
logger.error(f"Error/Timeout starting actors: {e}", exc_info=True)
|
|
1554
1554
|
self.stop() # Attempt cleanup
|
|
@@ -1593,7 +1593,7 @@ class RayPipeline(PipelineInterface):
|
|
|
1593
1593
|
logger.warning(
|
|
1594
1594
|
f"Timeout waiting for {len(not_ready)} actors to stop. " f"Proceeding with shutdown."
|
|
1595
1595
|
)
|
|
1596
|
-
logger.
|
|
1596
|
+
logger.debug(f"{len(ready)} actors confirmed stop.")
|
|
1597
1597
|
except Exception as e:
|
|
1598
1598
|
logger.error(f"An unexpected error occurred during actor shutdown: {e}", exc_info=True)
|
|
1599
1599
|
|
|
@@ -72,7 +72,7 @@ class RayStatsCollector:
|
|
|
72
72
|
self._cumulative_stats: Dict[str, Dict[str, int]] = defaultdict(lambda: {"processed": 0})
|
|
73
73
|
self.ema_memory_per_replica: Dict[str, float] = {} # EMA of memory per replica
|
|
74
74
|
|
|
75
|
-
logger.
|
|
75
|
+
logger.debug(
|
|
76
76
|
f"RayStatsCollector initialized (Interval: {self._interval}s, "
|
|
77
77
|
f"Actor Timeout: {self._actor_timeout}s, Queue Timeout: {self._queue_timeout}s, "
|
|
78
78
|
f"EMA Alpha: {self.ema_alpha})"
|
|
@@ -111,7 +111,7 @@ class RayStatsCollector:
|
|
|
111
111
|
self._running = False # Correct inconsistent state
|
|
112
112
|
|
|
113
113
|
if not self._running:
|
|
114
|
-
logger.
|
|
114
|
+
logger.debug("Starting stats collector thread...")
|
|
115
115
|
self._running = True
|
|
116
116
|
with self._lock:
|
|
117
117
|
self._last_update_successful = False # Mark as stale until first collection
|
|
@@ -129,7 +129,7 @@ class RayStatsCollector:
|
|
|
129
129
|
def stop(self) -> None:
|
|
130
130
|
"""Signals the background stats collection thread to stop and waits for it."""
|
|
131
131
|
if self._running:
|
|
132
|
-
logger.
|
|
132
|
+
logger.debug("Stopping stats collector thread...")
|
|
133
133
|
self._running = False # Signal loop to stop
|
|
134
134
|
|
|
135
135
|
if self._thread is not None:
|
|
@@ -150,7 +150,7 @@ class RayStatsCollector:
|
|
|
150
150
|
with self._lock:
|
|
151
151
|
self._last_update_successful = False
|
|
152
152
|
self._collected_stats = {} # Clear last collected stats
|
|
153
|
-
logger.
|
|
153
|
+
logger.debug("Stats collector thread stopped.")
|
|
154
154
|
else:
|
|
155
155
|
logger.debug("Stats collector thread already stopped or never started.")
|
|
156
156
|
|
|
@@ -230,7 +230,7 @@ class RayStatsCollector:
|
|
|
230
230
|
# but time.sleep is simpler for now.
|
|
231
231
|
time.sleep(sleep_time)
|
|
232
232
|
|
|
233
|
-
logger.
|
|
233
|
+
logger.debug("Stats collector loop finished.")
|
|
234
234
|
|
|
235
235
|
def collect_stats_now(self) -> Tuple[Dict[str, Dict[str, int]], int, bool]:
|
|
236
236
|
"""
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
from typing import Optional
|
|
7
8
|
|
|
8
9
|
import ray
|
|
9
10
|
|
|
@@ -17,6 +18,8 @@ from nv_ingest_api.util.exception_handlers.decorators import (
|
|
|
17
18
|
nv_ingest_node_failure_try_except,
|
|
18
19
|
)
|
|
19
20
|
|
|
21
|
+
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
22
|
+
|
|
20
23
|
logger = logging.getLogger(__name__)
|
|
21
24
|
|
|
22
25
|
|
|
@@ -31,8 +34,8 @@ class AudioExtractorStage(RayActorStage):
|
|
|
31
34
|
3. Updates the message payload with the extracted text DataFrame.
|
|
32
35
|
"""
|
|
33
36
|
|
|
34
|
-
def __init__(self, config: AudioExtractorSchema) -> None:
|
|
35
|
-
super().__init__(config, log_to_stdout=False)
|
|
37
|
+
def __init__(self, config: AudioExtractorSchema, stage_name: Optional[str] = None) -> None:
|
|
38
|
+
super().__init__(config, log_to_stdout=False, stage_name=stage_name)
|
|
36
39
|
try:
|
|
37
40
|
self.validated_config = config
|
|
38
41
|
self._logger.info("AudioExtractorStage configuration validated successfully.")
|
|
@@ -40,9 +43,10 @@ class AudioExtractorStage(RayActorStage):
|
|
|
40
43
|
self._logger.exception(f"Error validating Audio Extractor config: {e}")
|
|
41
44
|
raise
|
|
42
45
|
|
|
43
|
-
@
|
|
46
|
+
@nv_ingest_node_failure_try_except()
|
|
47
|
+
@traceable()
|
|
48
|
+
@udf_intercept_hook()
|
|
44
49
|
@filter_by_task(required_tasks=[("extract", {"document_type": "regex:^(mp3|wav)$"})])
|
|
45
|
-
@nv_ingest_node_failure_try_except(annotation_id="audio_extractor", raise_on_failure=False)
|
|
46
50
|
def on_data(self, control_message: IngestControlMessage) -> IngestControlMessage:
|
|
47
51
|
"""
|
|
48
52
|
Process the control message by extracting text from audio.
|
|
@@ -3,19 +3,18 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
import ray
|
|
9
|
-
|
|
10
|
-
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
11
|
-
from nv_ingest.framework.util.flow_control import filter_by_task
|
|
12
9
|
from nv_ingest_api.internal.extract.image.chart_extractor import extract_chart_data_from_image_internal
|
|
13
10
|
from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type
|
|
11
|
+
from nv_ingest_api.internal.primitives.tracing.tagging import set_trace_timestamps_with_parent_context
|
|
12
|
+
from nv_ingest.framework.util.flow_control import filter_by_task
|
|
13
|
+
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
14
14
|
from nv_ingest_api.internal.primitives.tracing.tagging import traceable
|
|
15
15
|
from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
|
|
16
|
-
from nv_ingest_api.util.exception_handlers.decorators import
|
|
17
|
-
|
|
18
|
-
)
|
|
16
|
+
from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
|
|
17
|
+
from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
|
|
19
18
|
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
21
20
|
|
|
@@ -31,8 +30,8 @@ class ChartExtractorStage(RayActorStage):
|
|
|
31
30
|
and annotates the message metadata with extraction info.
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
|
-
def __init__(self, config: ChartExtractorSchema) -> None:
|
|
35
|
-
super().__init__(config)
|
|
33
|
+
def __init__(self, config: ChartExtractorSchema, stage_name: Optional[str] = None) -> None:
|
|
34
|
+
super().__init__(config, stage_name=stage_name)
|
|
36
35
|
try:
|
|
37
36
|
self.validated_config = config
|
|
38
37
|
# logger.warning(
|
|
@@ -42,9 +41,10 @@ class ChartExtractorStage(RayActorStage):
|
|
|
42
41
|
logger.exception("Error validating chart extractor config")
|
|
43
42
|
raise e
|
|
44
43
|
|
|
45
|
-
@
|
|
44
|
+
@nv_ingest_node_failure_try_except()
|
|
45
|
+
@traceable()
|
|
46
|
+
@udf_intercept_hook()
|
|
46
47
|
@filter_by_task(required_tasks=["chart_data_extract"])
|
|
47
|
-
@nv_ingest_node_failure_try_except(annotation_id="chart_extraction", raise_on_failure=False)
|
|
48
48
|
def on_data(self, control_message: Any) -> Any:
|
|
49
49
|
"""
|
|
50
50
|
Process the control message by extracting chart data.
|
|
@@ -59,7 +59,7 @@ class ChartExtractorStage(RayActorStage):
|
|
|
59
59
|
IngestControlMessage
|
|
60
60
|
The updated message with the extracted chart data and extraction info in metadata.
|
|
61
61
|
"""
|
|
62
|
-
logger.
|
|
62
|
+
logger.debug("ChartExtractorStage.on_data: Starting chart extraction.")
|
|
63
63
|
# Extract the DataFrame payload.
|
|
64
64
|
df_payload = control_message.payload()
|
|
65
65
|
logger.debug("ChartExtractorStage: Extracted payload with %d rows.", len(df_payload))
|
|
@@ -76,17 +76,17 @@ class ChartExtractorStage(RayActorStage):
|
|
|
76
76
|
extraction_config=self.validated_config,
|
|
77
77
|
execution_trace_log=execution_trace_log,
|
|
78
78
|
)
|
|
79
|
-
logger.
|
|
79
|
+
logger.debug("ChartExtractorStage: Chart extraction completed. New payload has %d rows.", len(new_df))
|
|
80
80
|
|
|
81
81
|
# Update the control message with the new DataFrame.
|
|
82
82
|
control_message.payload(new_df)
|
|
83
83
|
# Annotate the message with extraction info.
|
|
84
84
|
control_message.set_metadata("chart_extraction_info", extraction_info)
|
|
85
|
-
logger.
|
|
85
|
+
logger.debug("ChartExtractorStage: Metadata injection complete. Returning updated control message.")
|
|
86
86
|
|
|
87
87
|
do_trace_tagging = control_message.get_metadata("config::add_trace_tagging") is True
|
|
88
88
|
if do_trace_tagging and execution_trace_log:
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
parent_name = self.stage_name if self.stage_name else "chart_extractor"
|
|
90
|
+
set_trace_timestamps_with_parent_context(control_message, execution_trace_log, parent_name, logger)
|
|
91
91
|
|
|
92
92
|
return control_message
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
from typing import Optional
|
|
6
7
|
|
|
7
8
|
import ray
|
|
8
9
|
|
|
@@ -16,6 +17,8 @@ from nv_ingest_api.util.exception_handlers.decorators import (
|
|
|
16
17
|
nv_ingest_node_failure_try_except,
|
|
17
18
|
)
|
|
18
19
|
|
|
20
|
+
from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
|
|
21
|
+
|
|
19
22
|
logger = logging.getLogger(__name__)
|
|
20
23
|
|
|
21
24
|
|
|
@@ -26,12 +29,12 @@ class DocxExtractorStage(RayActorStage):
|
|
|
26
29
|
|
|
27
30
|
It expects an IngestControlMessage containing a DataFrame with DOCX document data. It then:
|
|
28
31
|
1. Removes the "docx-extract" task from the message.
|
|
29
|
-
2. Calls the DOCX extraction logic (via
|
|
32
|
+
2. Calls the DOCX extraction logic (via extract_docx_internal) using a validated configuration.
|
|
30
33
|
3. Updates the message payload with the extracted content DataFrame.
|
|
31
34
|
"""
|
|
32
35
|
|
|
33
|
-
def __init__(self, config: DocxExtractorSchema) -> None:
|
|
34
|
-
super().__init__(config, log_to_stdout=False)
|
|
36
|
+
def __init__(self, config: DocxExtractorSchema, stage_name: Optional[str] = None) -> None:
|
|
37
|
+
super().__init__(config, log_to_stdout=False, stage_name=stage_name)
|
|
35
38
|
try:
|
|
36
39
|
self.validated_config = config
|
|
37
40
|
logger.info("DocxExtractorStage configuration validated successfully.")
|
|
@@ -39,9 +42,10 @@ class DocxExtractorStage(RayActorStage):
|
|
|
39
42
|
logger.exception(f"Error validating DOCX Extractor config: {e}")
|
|
40
43
|
raise
|
|
41
44
|
|
|
42
|
-
@
|
|
45
|
+
@nv_ingest_node_failure_try_except()
|
|
46
|
+
@traceable()
|
|
47
|
+
@udf_intercept_hook()
|
|
43
48
|
@filter_by_task(required_tasks=[("extract", {"document_type": "docx"})])
|
|
44
|
-
@nv_ingest_node_failure_try_except(annotation_id="docx_extractor", raise_on_failure=True)
|
|
45
49
|
def on_data(self, control_message: IngestControlMessage) -> IngestControlMessage:
|
|
46
50
|
"""
|
|
47
51
|
Process the control message by extracting content from DOCX documents.
|