PyPI - nv-ingest-api - Versions diffs - 2025.4.21.dev20250421__py3-none-any.whl → 2025.4.23.dev20250423__py3-none-any.whl - Mend

nv-ingest-api 2025.4.21.dev20250421py3-none-any.whl → 2025.4.23.dev20250423py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (153) hide show

nv_ingest_api/internal/primitives/nim/nim_client.py ADDED Viewed

@@ -0,0 +1,344 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import logging
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any
+from typing import Optional
+from typing import Tuple
+import numpy as np
+import requests
+import tritonclient.grpc as grpcclient
+from nv_ingest_api.internal.primitives.tracing.tagging import traceable_func
+from nv_ingest_api.util.string_processing import generate_url
+logger = logging.getLogger(__name__)
+class NimClient:
+    """
+    A client for interfacing with a model inference server using gRPC or HTTP protocols.
+    """
+    def __init__(
+        self,
+        model_interface,
+        protocol: str,
+        endpoints: Tuple[str, str],
+        auth_token: Optional[str] = None,
+        timeout: float = 120.0,
+        max_retries: int = 5,
+    ):
+        """
+        Initialize the NimClient with the specified model interface, protocol, and server endpoints.
+        Parameters
+        ----------
+        model_interface : ModelInterface
+            The model interface implementation to use.
+        protocol : str
+            The protocol to use ("grpc" or "http").
+        endpoints : tuple
+            A tuple containing the gRPC and HTTP endpoints.
+        auth_token : str, optional
+            Authorization token for HTTP requests (default: None).
+        timeout : float, optional
+            Timeout for HTTP requests in seconds (default: 30.0).
+        Raises
+        ------
+        ValueError
+            If an invalid protocol is specified or if required endpoints are missing.
+        """
+        self.client = None
+        self.model_interface = model_interface
+        self.protocol = protocol.lower()
+        self.auth_token = auth_token
+        self.timeout = timeout  # Timeout for HTTP requests
+        self.max_retries = max_retries
+        self._grpc_endpoint, self._http_endpoint = endpoints
+        self._max_batch_sizes = {}
+        self._lock = threading.Lock()
+        if self.protocol == "grpc":
+            if not self._grpc_endpoint:
+                raise ValueError("gRPC endpoint must be provided for gRPC protocol")
+            logger.debug(f"Creating gRPC client with {self._grpc_endpoint}")
+            self.client = grpcclient.InferenceServerClient(url=self._grpc_endpoint)
+        elif self.protocol == "http":
+            if not self._http_endpoint:
+                raise ValueError("HTTP endpoint must be provided for HTTP protocol")
+            logger.debug(f"Creating HTTP client with {self._http_endpoint}")
+            self.endpoint_url = generate_url(self._http_endpoint)
+            self.headers = {"accept": "application/json", "content-type": "application/json"}
+            if self.auth_token:
+                self.headers["Authorization"] = f"Bearer {self.auth_token}"
+        else:
+            raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.")
+    def _fetch_max_batch_size(self, model_name, model_version: str = "") -> int:
+        """Fetch the maximum batch size from the Triton model configuration in a thread-safe manner."""
+        if model_name in self._max_batch_sizes:
+            return self._max_batch_sizes[model_name]
+        with self._lock:
+            # Double check, just in case another thread set the value while we were waiting
+            if model_name in self._max_batch_sizes:
+                return self._max_batch_sizes[model_name]
+            if not self._grpc_endpoint:
+                self._max_batch_sizes[model_name] = 1
+                return 1
+            try:
+                client = self.client if self.client else grpcclient.InferenceServerClient(url=self._grpc_endpoint)
+                model_config = client.get_model_config(model_name=model_name, model_version=model_version)
+                self._max_batch_sizes[model_name] = model_config.config.max_batch_size
+                logger.debug(f"Max batch size for model '{model_name}': {self._max_batch_sizes[model_name]}")
+            except Exception as e:
+                self._max_batch_sizes[model_name] = 1
+                logger.warning(f"Failed to retrieve max batch size: {e}, defaulting to 1")
+            return self._max_batch_sizes[model_name]
+    def _process_batch(self, batch_input, *, batch_data, model_name, **kwargs):
+        """
+        Process a single batch input for inference using its corresponding batch_data.
+        Parameters
+        ----------
+        batch_input : Any
+            The input data for this batch.
+        batch_data : Any
+            The corresponding scratch-pad data for this batch as returned by format_input.
+        model_name : str
+            The model name for inference.
+        kwargs : dict
+            Additional parameters.
+        Returns
+        -------
+        tuple
+            A tuple (parsed_output, batch_data) for subsequent post-processing.
+        """
+        if self.protocol == "grpc":
+            logger.debug("Performing gRPC inference for a batch...")
+            response = self._grpc_infer(batch_input, model_name)
+            logger.debug("gRPC inference received response for a batch")
+        elif self.protocol == "http":
+            logger.debug("Performing HTTP inference for a batch...")
+            response = self._http_infer(batch_input)
+            logger.debug("HTTP inference received response for a batch")
+        else:
+            raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.")
+        parsed_output = self.model_interface.parse_output(response, protocol=self.protocol, data=batch_data, **kwargs)
+        return parsed_output, batch_data
+    def try_set_max_batch_size(self, model_name, model_version: str = ""):
+        """Attempt to set the max batch size for the model if it is not already set, ensuring thread safety."""
+        self._fetch_max_batch_size(model_name, model_version)
+    @traceable_func(trace_name="{stage_name}::{model_name}")
+    def infer(self, data: dict, model_name: str, **kwargs) -> Any:
+        """
+        Perform inference using the specified model and input data.
+        Parameters
+        ----------
+        data : dict
+            The input data for inference.
+        model_name : str
+            The model name.
+        kwargs : dict
+            Additional parameters for inference.
+        Returns
+        -------
+        Any
+            The processed inference results, coalesced in the same order as the input images.
+        """
+        try:
+            # 1. Retrieve or default to the model's maximum batch size.
+            batch_size = self._fetch_max_batch_size(model_name)
+            max_requested_batch_size = kwargs.get("max_batch_size", batch_size)
+            force_requested_batch_size = kwargs.get("force_max_batch_size", False)
+            max_batch_size = (
+                min(batch_size, max_requested_batch_size)
+                if not force_requested_batch_size
+                else max_requested_batch_size
+            )
+            # 2. Prepare data for inference.
+            data = self.model_interface.prepare_data_for_inference(data)
+            # 3. Format the input based on protocol.
+            formatted_batches, formatted_batch_data = self.model_interface.format_input(
+                data, protocol=self.protocol, max_batch_size=max_batch_size, model_name=model_name
+            )
+            # Check for a custom maximum pool worker count, and remove it from kwargs.
+            max_pool_workers = kwargs.pop("max_pool_workers", 16)
+            # 4. Process each batch concurrently using a thread pool.
+            #    We enumerate the batches so that we can later reassemble results in order.
+            results = [None] * len(formatted_batches)
+            with ThreadPoolExecutor(max_workers=max_pool_workers) as executor:
+                futures = []
+                for idx, (batch, batch_data) in enumerate(zip(formatted_batches, formatted_batch_data)):
+                    future = executor.submit(
+                        self._process_batch, batch, batch_data=batch_data, model_name=model_name, **kwargs
+                    )
+                    futures.append((idx, future))
+                for idx, future in futures:
+                    results[idx] = future.result()
+            # 5. Process the parsed outputs for each batch using its corresponding batch_data.
+            #    As the batches are in order, we coalesce their outputs accordingly.
+            all_results = []
+            for parsed_output, batch_data in results:
+                batch_results = self.model_interface.process_inference_results(
+                    parsed_output,
+                    original_image_shapes=batch_data.get("original_image_shapes"),
+                    protocol=self.protocol,
+                    **kwargs,
+                )
+                if isinstance(batch_results, list):
+                    all_results.extend(batch_results)
+                else:
+                    all_results.append(batch_results)
+        except Exception as err:
+            error_str = f"Error during NimClient inference [{self.model_interface.name()}, {self.protocol}]: {err}"
+            logger.error(error_str)
+            raise RuntimeError(error_str)
+        return all_results
+    def _grpc_infer(self, formatted_input: np.ndarray, model_name: str) -> np.ndarray:
+        """
+        Perform inference using the gRPC protocol.
+        Parameters
+        ----------
+        formatted_input : np.ndarray
+            The input data formatted as a numpy array.
+        model_name : str
+            The name of the model to use for inference.
+        Returns
+        -------
+        np.ndarray
+            The output of the model as a numpy array.
+        """
+        input_tensors = [grpcclient.InferInput("input", formatted_input.shape, datatype="FP32")]
+        input_tensors[0].set_data_from_numpy(formatted_input)
+        outputs = [grpcclient.InferRequestedOutput("output")]
+        response = self.client.infer(model_name=model_name, inputs=input_tensors, outputs=outputs)
+        logger.debug(f"gRPC inference response: {response}")
+        # TODO(self.client.has_error(response)) => raise error
+        return response.as_numpy("output")
+    def _http_infer(self, formatted_input: dict) -> dict:
+        """
+        Perform inference using the HTTP protocol, retrying for timeouts or 5xx errors up to 5 times.
+        Parameters
+        ----------
+        formatted_input : dict
+            The input data formatted as a dictionary.
+        Returns
+        -------
+        dict
+            The output of the model as a dictionary.
+        Raises
+        ------
+        TimeoutError
+            If the HTTP request times out repeatedly, up to the max retries.
+        requests.RequestException
+            For other HTTP-related errors that persist after max retries.
+        """
+        base_delay = 2.0
+        attempt = 0
+        while attempt < self.max_retries:
+            try:
+                response = requests.post(
+                    self.endpoint_url, json=formatted_input, headers=self.headers, timeout=self.timeout
+                )
+                status_code = response.status_code
+                # Check for server-side or rate-limit type errors
+                # e.g. 5xx => server error, 429 => too many requests
+                if status_code == 429 or status_code == 503 or (500 <= status_code < 600):
+                    logger.warning(
+                        f"Received HTTP {status_code} ({response.reason}) from "
+                        f"{self.model_interface.name()}. Attempt {attempt + 1} of {self.max_retries}."
+                    )
+                    if attempt == self.max_retries - 1:
+                        # No more retries left
+                        logger.error(f"Max retries exceeded after receiving HTTP {status_code}.")
+                        response.raise_for_status()  # raise the appropriate HTTPError
+                    else:
+                        # Exponential backoff
+                        backoff_time = base_delay * (2**attempt)
+                        time.sleep(backoff_time)
+                        attempt += 1
+                        continue
+                else:
+                    # Not in our "retry" category => just raise_for_status or return
+                    response.raise_for_status()
+                    logger.debug(f"HTTP inference response: {response.json()}")
+                    return response.json()
+            except requests.Timeout:
+                # Treat timeouts similarly to 5xx => attempt a retry
+                logger.warning(
+                    f"HTTP request timed out after {self.timeout} seconds during {self.model_interface.name()} "
+                    f"inference. Attempt {attempt + 1} of {self.max_retries}."
+                )
+                if attempt == self.max_retries - 1:
+                    logger.error("Max retries exceeded after repeated timeouts.")
+                    raise TimeoutError(
+                        f"Repeated timeouts for {self.model_interface.name()} after {attempt + 1} attempts."
+                    )
+                # Exponential backoff
+                backoff_time = base_delay * (2**attempt)
+                time.sleep(backoff_time)
+                attempt += 1
+            except requests.HTTPError as http_err:
+                # If we ended up here, it's a non-retryable 4xx or final 5xx after final attempt
+                logger.error(f"HTTP request failed with status code {response.status_code}: {http_err}")
+                raise
+            except requests.RequestException as e:
+                # ConnectionError or other non-HTTPError
+                logger.error(f"HTTP request encountered a network issue: {e}")
+                if attempt == self.max_retries - 1:
+                    raise
+                # Else retry on next loop iteration
+                backoff_time = base_delay * (2**attempt)
+                time.sleep(backoff_time)
+                attempt += 1
+        # If we exit the loop without returning, we've exhausted all attempts
+        logger.error(f"Failed to get a successful response after {self.max_retries} retries.")
+        raise Exception(f"Failed to get a successful response after {self.max_retries} retries.")
+    def close(self):
+        if self.protocol == "grpc" and hasattr(self.client, "close"):
+            self.client.close()

nv_ingest_api/internal/primitives/nim/nim_model_interface.py ADDED Viewed

@@ -0,0 +1,81 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import logging
+from typing import Optional
+logger = logging.getLogger(__name__)
+class ModelInterface:
+    """
+    Base class for defining a model interface that supports preparing input data, formatting it for
+    inference, parsing output, and processing inference results.
+    """
+    def format_input(self, data: dict, protocol: str, max_batch_size: int):
+        """
+        Format the input data for the specified protocol.
+        Parameters
+        ----------
+        data : dict
+            The input data to format.
+        protocol : str
+            The protocol to format the data for.
+        """
+        raise NotImplementedError("Subclasses should implement this method")
+    def parse_output(self, response, protocol: str, data: Optional[dict] = None, **kwargs):
+        """
+        Parse the output data from the model's inference response.
+        Parameters
+        ----------
+        response : Any
+            The response from the model inference.
+        protocol : str
+            The protocol used ("grpc" or "http").
+        data : dict, optional
+            Additional input data passed to the function.
+        """
+        raise NotImplementedError("Subclasses should implement this method")
+    def prepare_data_for_inference(self, data: dict):
+        """
+        Prepare input data for inference by processing or transforming it as required.
+        Parameters
+        ----------
+        data : dict
+            The input data to prepare.
+        """
+        raise NotImplementedError("Subclasses should implement this method")
+    def process_inference_results(self, output_array, protocol: str, **kwargs):
+        """
+        Process the inference results from the model.
+        Parameters
+        ----------
+        output_array : Any
+            The raw output from the model.
+        kwargs : dict
+            Additional parameters for processing.
+        """
+        raise NotImplementedError("Subclasses should implement this method")
+    def name(self) -> str:
+        """
+        Get the name of the model interface.
+        Returns
+        -------
+        str
+            The name of the model interface.
+        """
+        raise NotImplementedError("Subclasses should implement this method")

nv_ingest_api/internal/primitives/tracing/__init__.py ADDED Viewed

File without changes

nv_ingest_api/internal/primitives/tracing/latency.py ADDED Viewed

@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import logging
+from datetime import datetime
+from functools import wraps
+logger = logging.getLogger(__name__)
+# Define ANSI color codes
+class ColorCodes:
+    RED = "\033[91m"
+    GREEN = "\033[92m"
+    YELLOW = "\033[93m"
+    BLUE = "\033[94m"  # Added Blue
+    RESET = "\033[0m"
+# Function to apply color to a message
+def colorize(message, color_code):
+    return f"{color_code}{message}{ColorCodes.RESET}"
+def latency_logger(name=None):
+    """
+    A decorator to log the elapsed time of function execution. If available, it also logs
+    the latency based on 'latency::ts_send' metadata in a IngestControlMessage object.
+    Parameters
+    ----------
+    name : str, optional
+        Custom name to use in the log message. Defaults to the function's name.
+    """
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            # Ensure there's at least one argument and it has timestamp handling capabilities
+            if args and hasattr(args[0], "get_timestamp"):
+                message = args[0]
+                start_time = datetime.now()
+                result = func(*args, **kwargs)
+                end_time = datetime.now()
+                elapsed_time = end_time - start_time
+                func_name = name if name else func.__name__
+                # Log latency from ts_send if available
+                if message.filter_timestamp("latency::ts_send"):
+                    ts_send = message.get_timestamp("latency::ts_send")
+                    latency_ms = (start_time - ts_send).total_seconds() * 1e3
+                    logger.debug(f"{func_name} since ts_send: {latency_ms} msec.")
+                message.set_timestamp("latency::ts_send", datetime.now())
+                message.set_timestamp(f"latency::{func_name}::elapsed_time", elapsed_time)
+                return result
+            else:
+                raise ValueError(
+                    "The first argument must be a IngestControlMessage object with metadata " "capabilities."
+                )
+        return wrapper
+    return decorator

nv_ingest_api/internal/primitives/tracing/logging.py ADDED Viewed

@@ -0,0 +1,96 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import inspect
+import uuid
+from datetime import datetime
+from enum import Enum
+from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
+class TaskResultStatus(Enum):
+    SUCCESS = "SUCCESS"
+    FAILURE = "FAILURE"
+def annotate_cm(control_message: IngestControlMessage, source_id=None, **kwargs):
+    """
+    Annotate a IngestControlMessage object with arbitrary metadata, a source ID, and a timestamp.
+    Each annotation will be uniquely identified by a UUID.
+    Parameters:
+    - control_message: The IngestControlMessage object to be annotated.
+    - source_id: A unique identifier for the source of the annotation. If None, uses the caller's __name__.
+    - **kwargs: Arbitrary key-value pairs to be included in the annotation.
+    """
+    if source_id is None:
+        # Determine the __name__ of the parent caller's module
+        frame = inspect.currentframe()
+        caller_frame = inspect.getouterframes(frame)[2]
+        module = inspect.getmodule(caller_frame[0])
+        source_id = module.__name__ if module is not None else "UnknownModule"
+    # Ensure 'annotation_timestamp' is not overridden by kwargs
+    if "annotation_timestamp" in kwargs:
+        raise ValueError("'annotation_timestamp' is a reserved key and cannot be specified.")
+    message = kwargs.get("message")
+    annotation_key = f"annotation::{message}" if message else f"annotation::{uuid.uuid4()}"
+    annotation_timestamp = datetime.now()
+    try:
+        control_message.set_timestamp(annotation_key, annotation_timestamp)
+    except Exception as e:
+        print(f"Failed to set annotation timestamp: {e}")
+    # Construct the metadata key uniquely identified by a UUID.
+    metadata_key = f"annotation::{uuid.uuid4()}"
+    # Construct the metadata value with reserved 'annotation_timestamp', source_id, and any provided kwargs.
+    metadata_value = {
+        "source_id": source_id,
+    }
+    metadata_value.update(kwargs)
+    try:
+        # Attempt to set the annotated metadata on the IngestControlMessage object.
+        control_message.set_metadata(metadata_key, metadata_value)
+    except Exception as e:
+        # Handle any exceptions that occur when setting metadata.
+        print(f"Failed to annotate IngestControlMessage: {e}")
+def annotate_task_result(control_message, result, task_id, source_id=None, **kwargs):
+    """
+    Annotate a IngestControlMessage object with the result of a task, identified by a task_id,
+    and an arbitrary number of additional key-value pairs. The result can be a TaskResultStatus
+    enum or a string that will be converted to the corresponding enum.
+    Parameters:
+    - control_message: The IngestControlMessage object to be annotated.
+    - result: The result of the task, either SUCCESS or FAILURE, as an enum or string.
+    - task_id: A unique identifier for the task.
+    - **kwargs: Arbitrary additional key-value pairs to be included in the annotation.
+    """
+    # Convert result to TaskResultStatus enum if it's a string
+    if isinstance(result, str):
+        try:
+            result = TaskResultStatus[result.upper()]
+        except KeyError:
+            raise ValueError(
+                f"Invalid result string: {result}. Must be one of {[status.name for status in TaskResultStatus]}."
+            )
+    elif not isinstance(result, TaskResultStatus):
+        raise ValueError("result must be an instance of TaskResultStatus Enum or a valid result string.")
+    # Annotate the control message with task-related information, including the result and task_id.
+    annotate_cm(
+        control_message,
+        source_id=source_id,
+        task_result=result.value,
+        task_id=task_id,
+        **kwargs,
+    )

nv-ingest-api 2025.4.21.dev20250421__py3-none-any.whl → 2025.4.23.dev20250423__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 2025.4.21.dev20250421py3-none-any.whl → 2025.4.23.dev20250423py3-none-any.whl