PyPI - nv-ingest-api - Versions diffs - 2025.10.22.dev20251022__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl - Mend

nv-ingest-api 2025.10.22.dev20251022py3-none-any.whl → 2025.11.2.dev20251102py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (25) hide show

nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py CHANGED Viewed

@@ -355,6 +355,10 @@ def create_audio_inference_client(
     if (infer_protocol is None) and (grpc_endpoint and grpc_endpoint.strip()):
         infer_protocol = "grpc"
+    # Normalize protocol to lowercase for case-insensitive comparison
+    if infer_protocol:
+        infer_protocol = infer_protocol.lower()
     if infer_protocol == "http":
         raise ValueError("`http` endpoints are not supported for audio. Use `grpc`.")

nv_ingest_api/internal/primitives/nim/nim_client.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import hashlib
 import json
 import logging
+import re
 import threading
 import time
 import queue
@@ -24,6 +25,12 @@ from nv_ingest_api.util.string_processing import generate_url
 logger = logging.getLogger(__name__)
+# Regex pattern to detect CUDA-related errors in Triton gRPC responses
+CUDA_ERROR_REGEX = re.compile(
+    r"(illegal memory access|invalid argument|failed to (copy|load|perform) .*: .*|TritonModelException: failed to copy data: .*)",  # noqa: E501
+    re.IGNORECASE,
+)
 # A simple structure to hold a request's data and its Future for the result
 InferenceRequest = namedtuple("InferenceRequest", ["data", "future", "model_name", "dims", "kwargs"])
@@ -40,7 +47,7 @@ class NimClient:
         endpoints: Tuple[str, str],
         auth_token: Optional[str] = None,
         timeout: float = 120.0,
-        max_retries: int = 5,
+        max_retries: int = 10,
         max_429_retries: int = 5,
         enable_dynamic_batching: bool = False,
         dynamic_batch_timeout: float = 0.1,  # 100 milliseconds
@@ -60,11 +67,11 @@ class NimClient:
         auth_token : str, optional
             Authorization token for HTTP requests (default: None).
         timeout : float, optional
-            Timeout for HTTP requests in seconds (default: 30.0).
+            Timeout for HTTP requests in seconds (default: 120.0).
         max_retries : int, optional
-            The maximum number of retries for non-429 server-side errors (default: 5).
+            The maximum number of retries for non-429 server-side errors (default: 10).
         max_429_retries : int, optional
-            The maximum number of retries specifically for 429 errors (default: 10).
+            The maximum number of retries specifically for 429 errors (default: 5).
         Raises
         ------
@@ -323,7 +330,7 @@ class NimClient:
         outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
-        base_delay = 0.5
+        base_delay = 2.0
         attempt = 0
         retries_429 = 0
         max_grpc_retries = self.max_429_retries
@@ -342,8 +349,58 @@ class NimClient:
                     return [response.as_numpy(output.name()) for output in outputs]
             except grpcclient.InferenceServerException as e:
-                status = e.status()
-                if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in e.message().lower():
+                status = str(e.status())
+                message = e.message()
+                # Handle CUDA memory errors
+                if status == "StatusCode.INTERNAL":
+                    if CUDA_ERROR_REGEX.search(message):
+                        logger.warning(
+                            f"Received gRPC INTERNAL error with CUDA-related message for model '{model_name}'. "
+                            f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
+                        )
+                        if attempt >= self.max_retries - 1:
+                            logger.error(f"Max retries exceeded for CUDA errors on model '{model_name}'.")
+                            raise e
+                        # Try to reload models before retrying
+                        model_reload_succeeded = reload_models(client=self.client, client_timeout=self.timeout)
+                        if not model_reload_succeeded:
+                            logger.error(f"Failed to reload models for model '{model_name}'.")
+                    else:
+                        logger.warning(
+                            f"Received gRPC INTERNAL error for model '{model_name}'. "
+                            f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
+                        )
+                        if attempt >= self.max_retries - 1:
+                            logger.error(f"Max retries exceeded for INTERNAL error on model '{model_name}'.")
+                            raise e
+                    # Common retry logic for both CUDA and non-CUDA INTERNAL errors
+                    backoff_time = base_delay * (2**attempt)
+                    time.sleep(backoff_time)
+                    attempt += 1
+                    continue
+                # Handle errors that can occur after model reload (NOT_FOUND, model not loaded)
+                if status == "StatusCode.NOT_FOUND":
+                    logger.warning(
+                        f"Received gRPC {status} error for model '{model_name}'. "
+                        f"Attempt {attempt + 1} of {self.max_retries}. Message: {message[:500]}"
+                    )
+                    if attempt >= self.max_retries - 1:
+                        logger.error(f"Max retries exceeded for model not found errors on model '{model_name}'.")
+                        raise e
+                    # Retry with exponential backoff WITHOUT reloading
+                    backoff_time = base_delay * (2**attempt)
+                    logger.info(
+                        f"Retrying after {backoff_time}s backoff for model not found error on model '{model_name}'."
+                    )
+                    time.sleep(backoff_time)
+                    attempt += 1
+                    continue
+                if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in message.lower():
                     retries_429 += 1
                     logger.warning(
                         f"Received gRPC {status} for model '{model_name}'. "
@@ -357,13 +414,12 @@ class NimClient:
                     time.sleep(backoff_time)
                     continue
-                else:
-                    # For other server-side errors (e.g., INVALID_ARGUMENT, NOT_FOUND),
-                    # retrying will not help. We should fail fast.
-                    logger.error(
-                        f"Received non-retryable gRPC error from Triton for model '{model_name}': {e.message()}"
-                    )
-                    raise
+                # For other server-side errors (e.g., INVALID_ARGUMENT, etc.),
+                # fail fast as retrying will not help
+                logger.error(
+                    f"Received non-retryable gRPC error {status} from Triton for model '{model_name}': {message}"
+                )
+                raise
             except Exception as e:
                 # Catch any other unexpected exceptions (e.g., network issues not caught by Triton client)
@@ -681,3 +737,57 @@ class NimClientManager:
 def get_nim_client_manager(*args, **kwargs) -> NimClientManager:
     """Returns the singleton instance of the NimClientManager."""
     return NimClientManager(*args, **kwargs)
+def reload_models(client: grpcclient.InferenceServerClient, exclude: list[str] = [], client_timeout: int = 120) -> bool:
+    """
+    Reloads all models in the Triton server except for the models in the exclude list.
+    Parameters
+    ----------
+    client : grpcclient.InferenceServerClient
+        The gRPC client connected to the Triton server.
+    exclude : list[str], optional
+        A list of model names to exclude from reloading.
+    client_timeout : int, optional
+        Timeout for client operations in seconds (default: 120).
+    Returns
+    -------
+    bool
+        True if all models were successfully reloaded, False otherwise.
+    """
+    model_index = client.get_model_repository_index()
+    exclude = set(exclude)
+    names = [m.name for m in model_index.models if m.name not in exclude]
+    logger.info(f"Reloading {len(names)} model(s): {', '.join(names) if names else '(none)'}")
+    # 1) Unload
+    for name in names:
+        try:
+            client.unload_model(name)
+        except grpcclient.InferenceServerException as e:
+            msg = e.message()
+            if "explicit model load / unload" in msg.lower():
+                status = e.status()
+                logger.warning(
+                    f"[SKIP Model Reload] Explicit model control disabled; cannot unload '{name}'. Status: {status}."
+                )
+                return False
+            logger.error(f"[ERROR] Failed to unload '{name}': {msg}")
+            return False
+    # 2) Load
+    for name in names:
+        client.load_model(name)
+    # 3) Readiness check
+    for name in names:
+        ready = client.is_model_ready(model_name=name, client_timeout=client_timeout)
+        if not ready:
+            logger.warning(f"[Warning] Triton Not ready: {name}")
+            return False
+    logger.info("✅ Reload of models complete.")
+    return True

nv_ingest_api/internal/schemas/extract/extract_audio_schema.py CHANGED Viewed

@@ -10,10 +10,12 @@ from typing import Tuple
 from pydantic import BaseModel, Field
 from pydantic import root_validator
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class AudioConfigSchema(BaseModel):
+class AudioConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for audio extraction endpoints and options.
@@ -87,13 +89,13 @@ class AudioConfigSchema(BaseModel):
         values[endpoint_name] = (grpc_service, http_service)
+        # Auto-infer protocol from endpoints if not specified
         protocol_name = "audio_infer_protocol"
         protocol_value = values.get(protocol_name)
         if not protocol_value:
             protocol_value = "http" if http_service else "grpc" if grpc_service else ""
-        protocol_value = protocol_value.lower()
         values[protocol_name] = protocol_value
         return values

nv_ingest_api/internal/schemas/extract/extract_chart_schema.py CHANGED Viewed

@@ -8,10 +8,12 @@ from typing import Tuple
 from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class ChartExtractorConfigSchema(BaseModel):
+class ChartExtractorConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for chart extraction service endpoints and options.
@@ -96,6 +98,13 @@ class ChartExtractorConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
+            protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
+            protocol_value = values.get(protocol_name)
+            if not protocol_value:
+                protocol_value = "http" if http_service else "grpc" if grpc_service else ""
+            values[protocol_name] = protocol_value
         return values
     model_config = ConfigDict(extra="forbid")

nv_ingest_api/internal/schemas/extract/extract_docx_schema.py CHANGED Viewed

@@ -9,10 +9,12 @@ from typing import Tuple
 from pydantic import model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class DocxConfigSchema(BaseModel):
+class DocxConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for docx extraction endpoints and options.
@@ -85,11 +87,11 @@ class DocxConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
             protocol_name = f"{model_name}_infer_protocol"
             protocol_value = values.get(protocol_name)
             if not protocol_value:
                 protocol_value = "http" if http_service else "grpc" if grpc_service else ""
-            protocol_value = protocol_value.lower()
             values[protocol_name] = protocol_value
         return values

nv_ingest_api/internal/schemas/extract/extract_image_schema.py CHANGED Viewed

@@ -9,10 +9,12 @@ from typing import Tuple
 from pydantic import model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class ImageConfigSchema(BaseModel):
+class ImageConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for image extraction endpoints and options.
@@ -85,11 +87,11 @@ class ImageConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
             protocol_name = f"{model_name}_infer_protocol"
             protocol_value = values.get(protocol_name)
             if not protocol_value:
                 protocol_value = "http" if http_service else "grpc" if grpc_service else ""
-            protocol_value = protocol_value.lower()
             values[protocol_name] = protocol_value
         return values

nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py CHANGED Viewed

@@ -8,10 +8,12 @@ from typing import Tuple
 from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class InfographicExtractorConfigSchema(BaseModel):
+class InfographicExtractorConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for infographic extraction service endpoints and options.
@@ -89,6 +91,13 @@ class InfographicExtractorConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
+            protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
+            protocol_value = values.get(protocol_name)
+            if not protocol_value:
+                protocol_value = "http" if http_service else "grpc" if grpc_service else ""
+            values[protocol_name] = protocol_value
         return values
     model_config = ConfigDict(extra="forbid")

nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py CHANGED Viewed

@@ -9,10 +9,12 @@ from typing import Tuple
 from pydantic import model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class PDFiumConfigSchema(BaseModel):
+class PDFiumConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for PDFium endpoints and options.
@@ -82,11 +84,11 @@ class PDFiumConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
             protocol_name = f"{model_name}_infer_protocol"
             protocol_value = values.get(protocol_name)
             if not protocol_value:
                 protocol_value = "http" if http_service else "grpc" if grpc_service else ""
-            protocol_value = protocol_value.lower()
             values[protocol_name] = protocol_value
         return values
@@ -94,7 +96,7 @@ class PDFiumConfigSchema(BaseModel):
     model_config = ConfigDict(extra="forbid")
-class NemoRetrieverParseConfigSchema(BaseModel):
+class NemoRetrieverParseConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for NemoRetrieverParse endpoints and options.
@@ -170,11 +172,11 @@ class NemoRetrieverParseConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
             protocol_name = f"{model_name}_infer_protocol"
             protocol_value = values.get(protocol_name)
             if not protocol_value:
                 protocol_value = "http" if http_service else "grpc" if grpc_service else ""
-            protocol_value = protocol_value.lower()
             values[protocol_name] = protocol_value
         return values

nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py CHANGED Viewed

@@ -9,10 +9,12 @@ from typing import Tuple
 from pydantic import model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class PPTXConfigSchema(BaseModel):
+class PPTXConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for docx extraction endpoints and options.
@@ -85,11 +87,11 @@ class PPTXConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
             protocol_name = f"{model_name}_infer_protocol"
             protocol_value = values.get(protocol_name)
             if not protocol_value:
                 protocol_value = "http" if http_service else "grpc" if grpc_service else ""
-            protocol_value = protocol_value.lower()
             values[protocol_name] = protocol_value
         return values

nv_ingest_api/internal/schemas/extract/extract_table_schema.py CHANGED Viewed

@@ -9,11 +9,12 @@ from typing import Tuple
 from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
+from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
 logger = logging.getLogger(__name__)
-class TableExtractorConfigSchema(BaseModel):
+class TableExtractorConfigSchema(LowercaseProtocolMixin):
     """
     Configuration schema for the table extraction stage settings.
@@ -91,6 +92,13 @@ class TableExtractorConfigSchema(BaseModel):
             values[endpoint_name] = (grpc_service, http_service)
+            # Auto-infer protocol from endpoints if not specified
+            protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
+            protocol_value = values.get(protocol_name)
+            if not protocol_value:
+                protocol_value = "http" if http_service else "grpc" if grpc_service else ""
+            values[protocol_name] = protocol_value
         return values
     model_config = ConfigDict(extra="forbid")

nv_ingest_api/internal/schemas/meta/ingest_job_schema.py CHANGED Viewed

@@ -43,6 +43,24 @@ class PdfConfigSchema(BaseModelNoExt):
     split_page_count: Annotated[int, Field(ge=1)] = 32
+class RoutingOptionsSchema(BaseModelNoExt):
+    # Queue routing hint for QoS scheduler
+    queue_hint: Optional[str] = None
+    @field_validator("queue_hint")
+    @classmethod
+    def validate_queue_hint(cls, v):
+        if v is None:
+            return v
+        if not isinstance(v, str):
+            raise ValueError("queue_hint must be a string")
+        s = v.lower()
+        allowed = {"default", "immediate", "micro", "small", "medium", "large"}
+        if s not in allowed:
+            raise ValueError("queue_hint must be one of: default, immediate, micro, small, medium, large")
+        return s
 # Ingest Task Schemas
@@ -126,6 +144,8 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
     image_elements_modality: Optional[str] = None
     structured_elements_modality: Optional[str] = None
     audio_elements_modality: Optional[str] = None
+    custom_content_field: Optional[str] = None
+    result_target_field: Optional[str] = None
 class IngestTaskVdbUploadSchema(BaseModelNoExt):
@@ -281,8 +301,27 @@ class IngestJobSchema(BaseModelNoExt):
     job_id: Union[str, int]
     tasks: List[IngestTaskSchema]
     tracing_options: Optional[TracingOptionsSchema] = None
+    routing_options: Optional[RoutingOptionsSchema] = None
     pdf_config: Optional[PdfConfigSchema] = None
+    @model_validator(mode="before")
+    @classmethod
+    def migrate_queue_hint(cls, values):
+        """
+        Backward-compatibility shim: if a legacy client sends
+        tracing_options.queue_hint, move it into routing_options.queue_hint.
+        """
+        try:
+            topt = values.get("tracing_options") or {}
+            ropt = values.get("routing_options") or {}
+            if isinstance(topt, dict) and "queue_hint" in topt and "queue_hint" not in ropt:
+                ropt["queue_hint"] = topt.pop("queue_hint")
+                values["routing_options"] = ropt
+                values["tracing_options"] = topt
+        except Exception:
+            pass
+        return values
 # ------------------------------------------------------------------------------
 # Utility Functions

nv_ingest_api/internal/schemas/meta/metadata_schema.py CHANGED Viewed

@@ -352,6 +352,15 @@ class MetadataSchema(BaseModelNoExt):
     raise_on_failure: bool = False
     """If True, indicates that processing should halt on failure."""
+    total_pages: Optional[int] = None
+    """Total number of pages in the source document (V2 API)."""
+    original_source_id: Optional[str] = None
+    """The original source identifier before any splitting or chunking (V2 API)."""
+    original_source_name: Optional[str] = None
+    """The original source name before any splitting or chunking (V2 API)."""
     custom_content: Optional[Dict[str, Any]] = None
     @model_validator(mode="before")

nv_ingest_api/internal/schemas/mixins.py ADDED Viewed

@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Shared mixins for Pydantic schemas.
+"""
+from typing import Any
+from pydantic import BaseModel, field_validator
+class LowercaseProtocolMixin(BaseModel):
+    """
+    Mixin that automatically lowercases any field ending with '_infer_protocol'.
+    This ensures case-insensitive handling of protocol values (e.g., "HTTP" -> "http").
+    Apply this mixin to any schema that has protocol fields to normalize user input.
+    Examples
+    --------
+    >>> class MyConfigSchema(LowercaseProtocolMixin):
+    ...     yolox_infer_protocol: str = ""
+    ...     ocr_infer_protocol: str = ""
+    >>>
+    >>> config = MyConfigSchema(yolox_infer_protocol="GRPC", ocr_infer_protocol="HTTP")
+    >>> config.yolox_infer_protocol
+    'grpc'
+    >>> config.ocr_infer_protocol
+    'http'
+    """
+    @field_validator("*", mode="before")
+    @classmethod
+    def _lowercase_protocol_fields(cls, v: Any, info):
+        """Lowercase any field ending with '_infer_protocol'."""
+        if info.field_name.endswith("_infer_protocol") and v is not None:
+            return str(v).strip().lower()
+        return v

nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py CHANGED Viewed

@@ -7,6 +7,8 @@ import logging
 from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
+from typing import Optional
 from nv_ingest_api.util.logging.configuration import LogLevel
 logger = logging.getLogger(__name__)
@@ -26,6 +28,8 @@ class TextEmbeddingSchema(BaseModel):
     image_elements_modality: str = Field(default="text")
     structured_elements_modality: str = Field(default="text")
     audio_elements_modality: str = Field(default="text")
+    custom_content_field: Optional[str] = None
+    result_target_field: Optional[str] = None
     model_config = ConfigDict(extra="forbid")

nv-ingest-api 2025.10.22.dev20251022__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 2025.10.22.dev20251022py3-none-any.whl → 2025.11.2.dev20251102py3-none-any.whl