PyPI - nv-ingest - Versions diffs - 2025.8.13.dev20250813__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl - Mend

nv-ingest 2025.8.13.dev20250813py3-none-any.whl → 2025.8.15.dev20250815py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest might be problematic. Click here for more details.

Files changed (56) hide show

nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py CHANGED Viewed

@@ -3,12 +3,13 @@
 # SPDX-License-Identifier: Apache-2.0
 import logging
-from typing import Any
+from typing import Any, Optional
 import ray
 from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
 from nv_ingest.framework.util.flow_control import filter_by_task
-from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type, IngestControlMessage
+from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage, remove_task_by_type
 from nv_ingest_api.internal.primitives.tracing.tagging import traceable
 from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
 from nv_ingest_api.internal.transform.split_text import transform_text_split_and_tokenize_internal
@@ -16,6 +17,8 @@ from nv_ingest_api.util.exception_handlers.decorators import (
     nv_ingest_node_failure_try_except,
 )
+from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
 logger = logging.getLogger(__name__)
@@ -29,15 +32,16 @@ class TextSplitterStage(RayActorStage):
     and tokenization logic. The updated DataFrame is then set back into the message.
     """
-    def __init__(self, config: TextSplitterSchema) -> None:
-        super().__init__(config)
+    def __init__(self, config: TextSplitterSchema, stage_name: Optional[str] = None) -> None:
+        super().__init__(config, stage_name=stage_name)
         # Store the validated configuration (assumed to be an instance of TextSplitterSchema)
         self.validated_config: TextSplitterSchema = config
-        logger.info("TextSplitterStage initialized with config: %s", config)
+        logger.debug("TextSplitterStage initialized with config: %s", config)
-    @traceable("text_splitter")
-    @filter_by_task(["split"])
-    @nv_ingest_node_failure_try_except(annotation_id="text_splitter", raise_on_failure=False)
+    @nv_ingest_node_failure_try_except()
+    @traceable()
+    @udf_intercept_hook()
+    @filter_by_task(required_tasks=["split"])
     def on_data(self, message: Any) -> Any:
         """
         Process an incoming IngestControlMessage by splitting and tokenizing its text.
@@ -68,11 +72,13 @@ class TextSplitterStage(RayActorStage):
             transform_config=self.validated_config,
             execution_trace_log=None,
         )
-        logger.info("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
+        logger.debug(
+            "TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated)
+        )
         # Update the message payload.
         message.payload(df_updated)
-        logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
+        logger.debug("TextSplitterStage.on_data: Finished processing, returning updated message.")
         return message
@@ -110,10 +116,10 @@ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSp
         transform_config=stage_config,
         execution_trace_log=None,
     )
-    logger.info("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
+    logger.debug("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
     # Update the message payload.
     control_message.payload(df_updated)
-    logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
+    logger.debug("TextSplitterStage.on_data: Finished processing, returning updated message.")
     return control_message

nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py CHANGED Viewed

@@ -4,11 +4,16 @@
 import logging
 import time
-from typing import Any
+from typing import Any, Optional
 from pydantic import BaseModel
 import ray
 from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
+from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
+from nv_ingest_api.internal.primitives.tracing.tagging import traceable
+from nv_ingest_api.util.exception_handlers.decorators import (
+    nv_ingest_node_failure_try_except,
+)
 logger = logging.getLogger(__name__)
@@ -22,12 +27,15 @@ class ThroughputMonitorStage(RayActorStage):
     It also adds the throughput as metadata on the control message before passing it on.
     """
-    def __init__(self, config: BaseModel) -> None:
+    def __init__(self, config: BaseModel, stage_name: Optional[str] = None) -> None:
         # Initialize base attributes (e.g., self._running, self.start_time) via the base class.
-        super().__init__(config)
+        super().__init__(config, stage_name=stage_name)
         self.count = 0
         self.last_emit_time = None  # Timestamp when the last throughput measure was emitted
+    @nv_ingest_node_failure_try_except()
+    @traceable()
+    @udf_intercept_hook()
     async def on_data(self, message: Any) -> Any:
         """
         Process an incoming control message. Increment the internal counter and, every 100 messages,

nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py CHANGED Viewed

@@ -10,7 +10,6 @@ from typing import Dict, Any, List, Tuple, Optional
 from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # --- Constants ---
@@ -259,7 +258,7 @@ class ResourceConstraintManager:
         else:
             self.core_based_replica_limit = None  # Treat as unlimited if detection failed
-        logger.info(
+        logger.debug(
             f"[ConstraintMgr] Initialized. MaxReplicas={max_replicas}, "
             f"EffectiveCoreLimit={self.available_cores:.2f} "  # Log the potentially fractional value
             f"(Method: {self.core_detection_details.get('detection_method')}), "

nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py CHANGED Viewed

@@ -2,302 +2,39 @@
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-import atexit
 import logging
-import multiprocessing
-import os
-import signal
-import sys
-import time
-from ctypes import CDLL, c_int
-from datetime import datetime
-from typing import Union, Tuple, Optional, TextIO
+from typing import Union, Optional, TextIO
-import ray
-from pydantic import BaseModel, ConfigDict
 from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
-    RayPipeline,
-    ScalingConfig,
     RayPipelineSubprocessInterface,
     RayPipelineInterface,
 )
-from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_builders import setup_ingestion_pipeline
-from nv_ingest.framework.orchestration.ray.util.env_config import (
-    DISABLE_DYNAMIC_SCALING,
-    DYNAMIC_MEMORY_THRESHOLD,
-    DYNAMIC_MEMORY_KP,
-    DYNAMIC_MEMORY_KI,
-    DYNAMIC_MEMORY_EMA_ALPHA,
-    DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
-    DYNAMIC_MEMORY_PENALTY_FACTOR,
-    DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
-    DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
+from nv_ingest.pipeline.pipeline_schema import PipelineConfigSchema
+from nv_ingest.pipeline.config.loaders import resolve_pipeline_config, apply_runtime_overrides
+from nv_ingest.framework.orchestration.process.lifecycle import PipelineLifecycleManager
+from nv_ingest.framework.orchestration.execution.helpers import (
+    create_runtime_overrides,
+    create_execution_options,
+    select_execution_strategy,
 )
 logger = logging.getLogger(__name__)
-class PipelineCreationSchema(BaseModel):
-    """
-    Schema for pipeline creation configuration.
-    Contains all parameters required to set up and execute the pipeline,
-    including endpoints, API keys, and processing options.
-    """
-    arrow_default_memory_pool: str = os.getenv("ARROW_DEFAULT_MEMORY_POOL", "system")
-    # Audio processing settings
-    audio_grpc_endpoint: str = os.getenv("AUDIO_GRPC_ENDPOINT", "grpc.nvcf.nvidia.com:443")
-    audio_function_id: str = os.getenv("AUDIO_FUNCTION_ID", "1598d209-5e27-4d3c-8079-4751568b1081")
-    audio_infer_protocol: str = os.getenv("AUDIO_INFER_PROTOCOL", "grpc")
-    # Embedding model settings
-    embedding_nim_endpoint: str = os.getenv("EMBEDDING_NIM_ENDPOINT", "https://integrate.api.nvidia.com/v1")
-    embedding_nim_model_name: str = os.getenv("EMBEDDING_NIM_MODEL_NAME", "nvidia/llama-3.2-nv-embedqa-1b-v2")
-    # General pipeline settings
-    ingest_log_level: str = os.getenv("INGEST_LOG_LEVEL", "INFO")
-    max_ingest_process_workers: str = os.getenv("MAX_INGEST_PROCESS_WORKERS", "16")
-    # Messaging configuration
-    message_client_host: str = os.getenv("MESSAGE_CLIENT_HOST", "localhost")
-    message_client_port: str = os.getenv("MESSAGE_CLIENT_PORT", "7671")
-    message_client_type: str = os.getenv("MESSAGE_CLIENT_TYPE", "simple")
-    # NeMo Retriever settings
-    nemoretriever_parse_http_endpoint: str = os.getenv(
-        "NEMORETRIEVER_PARSE_HTTP_ENDPOINT", "https://integrate.api.nvidia.com/v1/chat/completions"
-    )
-    nemoretriever_parse_infer_protocol: str = os.getenv("NEMORETRIEVER_PARSE_INFER_PROTOCOL", "http")
-    nemoretriever_parse_model_name: str = os.getenv("NEMORETRIEVER_PARSE_MODEL_NAME", "nvidia/nemoretriever-parse")
-    # API keys
-    ngc_api_key: str = os.getenv("NGC_API_KEY", "")
-    nvidia_api_key: str = os.getenv("NVIDIA_API_KEY", "")
-    # Observability settings
-    otel_exporter_otlp_endpoint: str = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317")
-    # OCR settings
-    ocr_http_endpoint: str = os.getenv("OCR_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
-    ocr_infer_protocol: str = os.getenv("OCR_INFER_PROTOCOL", "http")
-    ocr_model_name: str = os.getenv("OCR_MODEL_NAME", "paddle")
-    # Task queue settings
-    REDIS_INGEST_TASK_QUEUE: str = "ingest_task_queue"
-    # Vision language model settings
-    vlm_caption_endpoint: str = os.getenv(
-        "VLM_CAPTION_ENDPOINT",
-        "https://integrate.api.nvidia.com/v1/chat/completions",
-    )
-    vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
-    # YOLOX image processing settings
-    yolox_graphic_elements_http_endpoint: str = os.getenv(
-        "YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT",
-        "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1",
-    )
-    yolox_graphic_elements_infer_protocol: str = os.getenv("YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL", "http")
-    # YOLOX page elements settings
-    yolox_http_endpoint: str = os.getenv(
-        "YOLOX_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
-    )
-    yolox_infer_protocol: str = os.getenv("YOLOX_INFER_PROTOCOL", "http")
-    # YOLOX table structure settings
-    yolox_table_structure_http_endpoint: str = os.getenv(
-        "YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1"
-    )
-    yolox_table_structure_infer_protocol: str = os.getenv("YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL", "http")
-    model_config = ConfigDict(extra="forbid")
-def redirect_os_fds(stdout: Optional[TextIO] = None, stderr: Optional[TextIO] = None):
-    """
-    Redirect OS-level stdout (fd=1) and stderr (fd=2) to the given file-like objects,
-    or to /dev/null if not provided.
-    Parameters
-    ----------
-    stdout : Optional[TextIO]
-        Stream to receive OS-level stdout. If None, redirected to /dev/null.
-    stderr : Optional[TextIO]
-        Stream to receive OS-level stderr. If None, redirected to /dev/null.
-    """
-    devnull_fd = os.open(os.devnull, os.O_WRONLY)
-    if stdout is not None:
-        os.dup2(stdout.fileno(), 1)
-    else:
-        os.dup2(devnull_fd, 1)
-    if stderr is not None:
-        os.dup2(stderr.fileno(), 2)
-    else:
-        os.dup2(devnull_fd, 2)
-def set_pdeathsig(sig=signal.SIGKILL):
-    libc = CDLL("libc.so.6")
-    PR_SET_PDEATHSIG = 1
-    libc.prctl(PR_SET_PDEATHSIG, c_int(sig))
-def kill_pipeline_process_group(pid: int):
-    """
-    Kill the process group associated with the given PID, if it exists and is alive.
-    Parameters
-    ----------
-    pid : int
-        The PID of the process whose group should be killed.
-    """
-    try:
-        # Get the process group ID
-        pgid = os.getpgid(pid)
-        # Check if the group is still alive by sending signal 0
-        os.killpg(pgid, 0)  # Does not kill, just checks if it's alive
-        # If no exception, the group is alive — kill it
-        os.killpg(pgid, signal.SIGKILL)
-        print(f"Killed subprocess group {pgid}")
-    except ProcessLookupError:
-        print(f"Process group for PID {pid} no longer exists.")
-    except PermissionError:
-        print(f"Permission denied to kill process group for PID {pid}.")
-    except Exception as e:
-        print(f"Failed to kill subprocess group: {e}")
-def _run_pipeline_process(
-    ingest_config: PipelineCreationSchema,
-    disable_dynamic_scaling: Optional[bool],
-    dynamic_memory_threshold: Optional[float],
-    raw_stdout: Optional[TextIO] = None,
-    raw_stderr: Optional[TextIO] = None,
-):
-    """
-    Subprocess entrypoint to launch the pipeline. Redirects all output to the provided
-    file-like streams or /dev/null if not specified.
-    Parameters
-    ----------
-    ingest_config : PipelineCreationSchema
-        Validated pipeline configuration.
-    disable_dynamic_scaling : Optional[bool]
-        Whether to disable dynamic scaling.
-    dynamic_memory_threshold : Optional[float]
-        Threshold for triggering scaling.
-    raw_stdout : Optional[TextIO]
-        Destination for stdout. Defaults to /dev/null.
-    raw_stderr : Optional[TextIO]
-        Destination for stderr. Defaults to /dev/null.
-    """
-    # Set the death signal for the subprocess
-    set_pdeathsig()
-    os.setsid()  # Creates new process group so it can be SIGKILLed as a group
-    # Redirect OS-level file descriptors
-    redirect_os_fds(stdout=raw_stdout, stderr=raw_stderr)
-    # Redirect Python-level sys.stdout/sys.stderr
-    sys.stdout = raw_stdout or open(os.devnull, "w")
-    sys.stderr = raw_stderr or open(os.devnull, "w")
-    try:
-        _launch_pipeline(
-            ingest_config,
-            block=True,
-            disable_dynamic_scaling=disable_dynamic_scaling,
-            dynamic_memory_threshold=dynamic_memory_threshold,
-        )
-    except Exception as e:
-        sys.__stderr__.write(f"Subprocess pipeline run failed: {e}\n")
-        raise
-def _launch_pipeline(
-    ingest_config: PipelineCreationSchema,
-    block: bool,
-    disable_dynamic_scaling: bool = None,
-    dynamic_memory_threshold: float = None,
-) -> Tuple[Union[RayPipeline, None], float]:
-    logger.info("Starting pipeline setup")
-    dynamic_memory_scaling = not DISABLE_DYNAMIC_SCALING
-    if disable_dynamic_scaling is not None:
-        dynamic_memory_scaling = not disable_dynamic_scaling
-    dynamic_memory_threshold = dynamic_memory_threshold if dynamic_memory_threshold else DYNAMIC_MEMORY_THRESHOLD
-    scaling_config = ScalingConfig(
-        dynamic_memory_scaling=dynamic_memory_scaling,
-        dynamic_memory_threshold=dynamic_memory_threshold,
-        pid_kp=DYNAMIC_MEMORY_KP,
-        pid_ki=DYNAMIC_MEMORY_KI,
-        pid_ema_alpha=DYNAMIC_MEMORY_EMA_ALPHA,
-        pid_target_queue_depth=DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
-        pid_penalty_factor=DYNAMIC_MEMORY_PENALTY_FACTOR,
-        pid_error_boost_factor=DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
-        rcm_memory_safety_buffer_fraction=DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
-    )
-    pipeline = RayPipeline(scaling_config=scaling_config)
-    start_abs = datetime.now()
-    # Set up the ingestion pipeline
-    _ = setup_ingestion_pipeline(pipeline, ingest_config.model_dump())
-    # Record setup time
-    end_setup = start_run = datetime.now()
-    setup_elapsed = (end_setup - start_abs).total_seconds()
-    logger.info(f"Pipeline setup completed in {setup_elapsed:.2f} seconds")
-    # Run the pipeline
-    logger.debug("Running pipeline")
-    pipeline.start()
-    if block:
-        try:
-            while True:
-                time.sleep(5)
-        except KeyboardInterrupt:
-            logger.info("Interrupt received, shutting down pipeline.")
-            pipeline.stop()
-            ray.shutdown()
-            logger.info("Ray shutdown complete.")
-        # Record execution times
-        end_run = datetime.now()
-        run_elapsed = (end_run - start_run).total_seconds()
-        total_elapsed = (end_run - start_abs).total_seconds()
-        logger.info(f"Pipeline run completed in {run_elapsed:.2f} seconds")
-        logger.info(f"Total time elapsed: {total_elapsed:.2f} seconds")
-        return None, total_elapsed
-    else:
-        return pipeline, 0.0
 def run_pipeline(
-    ingest_config: PipelineCreationSchema,
+    pipeline_config: Optional[PipelineConfigSchema] = None,
     block: bool = True,
     disable_dynamic_scaling: Optional[bool] = None,
     dynamic_memory_threshold: Optional[float] = None,
     run_in_subprocess: bool = False,
     stdout: Optional[TextIO] = None,
     stderr: Optional[TextIO] = None,
+    libmode: bool = True,
 ) -> Union[RayPipelineInterface, float, RayPipelineSubprocessInterface]:
     """
-    Launch and manage a pipeline, optionally in a subprocess.
+    Launch and manage a pipeline using configuration.
     This function is the primary entry point for executing a Ray pipeline,
     either within the current process or in a separate Python subprocess.
@@ -306,17 +43,16 @@ def run_pipeline(
     Parameters
     ----------
-    ingest_config : PipelineCreationSchema
+    pipeline_config : Optional[PipelineConfigSchema], default=None
         The validated configuration object used to construct and launch the pipeline.
+        If None and libmode is True, loads the default libmode pipeline.
     block : bool, default=True
         If True, blocks until the pipeline completes.
         If False, returns an interface to control the pipeline externally.
     disable_dynamic_scaling : Optional[bool], default=None
-        If True, disables dynamic memory scaling. Overrides global configuration if set.
-        If None, uses the default or globally defined behavior.
+        If provided, overrides the `disable_dynamic_scaling` setting from the pipeline config.
     dynamic_memory_threshold : Optional[float], default=None
-        The memory usage threshold (as a float between 0 and 1) that triggers autoscaling,
-        if dynamic scaling is enabled. Defaults to the globally configured value if None.
+        If provided, overrides the `dynamic_memory_threshold` setting from the pipeline config.
     run_in_subprocess : bool, default=False
         If True, launches the pipeline in a separate Python subprocess using `multiprocessing.Process`.
         If False, runs the pipeline in the current process.
@@ -326,6 +62,9 @@ def run_pipeline(
     stderr : Optional[TextIO], default=None
         Optional file-like stream to which subprocess stderr should be redirected.
         If None, stderr is redirected to /dev/null.
+    libmode : bool, default=True
+        If True and pipeline_config is None, loads the default libmode pipeline configuration.
+        If False, requires pipeline_config to be provided.
     Returns
     -------
@@ -337,57 +76,25 @@ def run_pipeline(
     Raises
     ------
+    ValueError
+        If pipeline_config is None and libmode is False.
     RuntimeError
         If the subprocess fails to start or exits with an error.
     Exception
         Any other exceptions raised during pipeline launch or configuration.
     """
-    if run_in_subprocess:
-        logger.info("Launching pipeline in Python subprocess using multiprocessing.")
-        if (ingest_config.ngc_api_key is None or ingest_config.ngc_api_key == "") and (
-            ingest_config.nvidia_api_key is None or ingest_config.nvidia_api_key == ""
-        ):
-            logger.warning("NGC_API_KEY or NVIDIA_API_KEY are not set. NIM Related functions will not work.")
-        ctx = multiprocessing.get_context("fork")
-        process = ctx.Process(
-            target=_run_pipeline_process,
-            args=(
-                ingest_config,
-                disable_dynamic_scaling,
-                dynamic_memory_threshold,
-                stdout,  # raw_stdout
-                stderr,  # raw_stderr
-            ),
-            daemon=False,
-        )
-        process.start()
-        interface = RayPipelineSubprocessInterface(process)
-        if block:
-            start_time = time.time()
-            logger.info("Waiting for subprocess pipeline to complete...")
-            process.join()
-            logger.info("Pipeline subprocess completed.")
-            return time.time() - start_time
-        else:
-            logger.info(f"Pipeline subprocess started (PID={process.pid})")
-            atexit.register(lambda: kill_pipeline_process_group(process.pid))
+    # Resolve configuration
+    config = resolve_pipeline_config(pipeline_config, libmode)
+    overrides = create_runtime_overrides(disable_dynamic_scaling, dynamic_memory_threshold)
+    final_config = apply_runtime_overrides(config, overrides)
-            return interface
+    # Select execution strategy
+    strategy = select_execution_strategy(run_in_subprocess)
+    options = create_execution_options(block, stdout, stderr)
-    # Run inline
-    pipeline, total_elapsed = _launch_pipeline(
-        ingest_config,
-        block=block,
-        disable_dynamic_scaling=disable_dynamic_scaling,
-        dynamic_memory_threshold=dynamic_memory_threshold,
-    )
+    # Execute using lifecycle manager
+    lifecycle_manager = PipelineLifecycleManager(strategy)
+    result = lifecycle_manager.start(final_config, options)
-    if block:
-        logger.debug(f"Pipeline execution completed successfully in {total_elapsed:.2f} seconds.")
-        return total_elapsed
-    else:
-        return RayPipelineInterface(pipeline)
+    # Return in expected format
+    return result.get_return_value()

nv_ingest/framework/orchestration/ray/util/pipeline/tools.py CHANGED Viewed

@@ -4,15 +4,18 @@
 import logging
 import uuid
-from typing import Callable, Optional, Union, Dict, List, Type
+import inspect
+from typing import Callable, Optional, Union, Dict, Type, List
 import ray
 from pydantic import BaseModel
 from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
-from nv_ingest.framework.util.flow_control import filter_by_task
 from nv_ingest_api.internal.primitives.tracing.tagging import traceable
 from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
+from nv_ingest_api.util.imports.callable_signatures import (
+    ingest_stage_callable_signature,
+)
 logger = logging.getLogger(__name__)
@@ -54,6 +57,7 @@ def wrap_callable_as_stage(
     - Only `.remote(config)` and `.options(...)` (chained with `.remote(config)`) are supported.
       All other class/actor patterns will raise `NotImplementedError`.
     """
+    ingest_stage_callable_signature(inspect.signature(fn))
     trace_name = trace_id or fn.__name__
     def make_actor_class():
@@ -90,7 +94,6 @@ def wrap_callable_as_stage(
         @traceable(trace_name)
         @nv_ingest_node_failure_try_except(annotation_id=trace_name, raise_on_failure=False)
-        @filter_by_task(required_tasks=required_tasks) if required_tasks else (lambda f: f)
         def on_data(self, control_message):
             """
             Processes a control message using the wrapped function.
@@ -105,6 +108,13 @@ def wrap_callable_as_stage(
             IngestControlMessage
                 The processed message, or the original on failure.
             """
+            # Apply task filtering if required_tasks is specified and not empty
+            if required_tasks:
+                # Check if message has any of the required tasks
+                message_tasks = {task.type for task in control_message.get_tasks()}
+                if not any(task in message_tasks for task in required_tasks):
+                    return control_message
             try:
                 return fn(control_message, self.validated_config)
             except Exception as e:

nv-ingest 2025.8.13.dev20250813__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl

Potentially problematic release.

nv-ingest 2025.8.13.dev20250813py3-none-any.whl → 2025.8.15.dev20250815py3-none-any.whl