PyPI - nv-ingest-api - Versions diffs - 2025.10.4.dev20251004__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl - Mend

nv-ingest-api 2025.10.4.dev20251004py3-none-any.whl → 2025.11.2.dev20251102py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (34) hide show

nv_ingest_api/internal/schemas/meta/ingest_job_schema.py CHANGED Viewed

@@ -24,8 +24,41 @@ logger = logging.getLogger(__name__)
 # Tracing Options Schema
 class TracingOptionsSchema(BaseModelNoExt):
     trace: bool = False
-    ts_send: int
+    ts_send: Optional[int] = None
     trace_id: Optional[str] = None
+    # V2 PDF splitting support
+    parent_job_id: Optional[str] = None
+    page_num: Optional[int] = None
+    total_pages: Optional[int] = None
+# PDF Configuration Schema
+class PdfConfigSchema(BaseModelNoExt):
+    """PDF-specific configuration options for job submission.
+    Note: split_page_count accepts any positive integer but will be clamped
+    to [1, 128] range by the server at runtime.
+    """
+    split_page_count: Annotated[int, Field(ge=1)] = 32
+class RoutingOptionsSchema(BaseModelNoExt):
+    # Queue routing hint for QoS scheduler
+    queue_hint: Optional[str] = None
+    @field_validator("queue_hint")
+    @classmethod
+    def validate_queue_hint(cls, v):
+        if v is None:
+            return v
+        if not isinstance(v, str):
+            raise ValueError("queue_hint must be a string")
+        s = v.lower()
+        allowed = {"default", "immediate", "micro", "small", "medium", "large"}
+        if s not in allowed:
+            raise ValueError("queue_hint must be one of: default, immediate, micro, small, medium, large")
+        return s
 # Ingest Task Schemas
@@ -111,6 +144,8 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
     image_elements_modality: Optional[str] = None
     structured_elements_modality: Optional[str] = None
     audio_elements_modality: Optional[str] = None
+    custom_content_field: Optional[str] = None
+    result_target_field: Optional[str] = None
 class IngestTaskVdbUploadSchema(BaseModelNoExt):
@@ -266,6 +301,26 @@ class IngestJobSchema(BaseModelNoExt):
     job_id: Union[str, int]
     tasks: List[IngestTaskSchema]
     tracing_options: Optional[TracingOptionsSchema] = None
+    routing_options: Optional[RoutingOptionsSchema] = None
+    pdf_config: Optional[PdfConfigSchema] = None
+    @model_validator(mode="before")
+    @classmethod
+    def migrate_queue_hint(cls, values):
+        """
+        Backward-compatibility shim: if a legacy client sends
+        tracing_options.queue_hint, move it into routing_options.queue_hint.
+        """
+        try:
+            topt = values.get("tracing_options") or {}
+            ropt = values.get("routing_options") or {}
+            if isinstance(topt, dict) and "queue_hint" in topt and "queue_hint" not in ropt:
+                ropt["queue_hint"] = topt.pop("queue_hint")
+                values["routing_options"] = ropt
+                values["tracing_options"] = topt
+        except Exception:
+            pass
+        return values
 # ------------------------------------------------------------------------------

nv_ingest_api/internal/schemas/meta/metadata_schema.py CHANGED Viewed

@@ -352,6 +352,15 @@ class MetadataSchema(BaseModelNoExt):
     raise_on_failure: bool = False
     """If True, indicates that processing should halt on failure."""
+    total_pages: Optional[int] = None
+    """Total number of pages in the source document (V2 API)."""
+    original_source_id: Optional[str] = None
+    """The original source identifier before any splitting or chunking (V2 API)."""
+    original_source_name: Optional[str] = None
+    """The original source name before any splitting or chunking (V2 API)."""
     custom_content: Optional[Dict[str, Any]] = None
     @model_validator(mode="before")

nv_ingest_api/internal/schemas/mixins.py ADDED Viewed

@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Shared mixins for Pydantic schemas.
+"""
+from typing import Any
+from pydantic import BaseModel, field_validator
+class LowercaseProtocolMixin(BaseModel):
+    """
+    Mixin that automatically lowercases any field ending with '_infer_protocol'.
+    This ensures case-insensitive handling of protocol values (e.g., "HTTP" -> "http").
+    Apply this mixin to any schema that has protocol fields to normalize user input.
+    Examples
+    --------
+    >>> class MyConfigSchema(LowercaseProtocolMixin):
+    ...     yolox_infer_protocol: str = ""
+    ...     ocr_infer_protocol: str = ""
+    >>>
+    >>> config = MyConfigSchema(yolox_infer_protocol="GRPC", ocr_infer_protocol="HTTP")
+    >>> config.yolox_infer_protocol
+    'grpc'
+    >>> config.ocr_infer_protocol
+    'http'
+    """
+    @field_validator("*", mode="before")
+    @classmethod
+    def _lowercase_protocol_fields(cls, v: Any, info):
+        """Lowercase any field ending with '_infer_protocol'."""
+        if info.field_name.endswith("_infer_protocol") and v is not None:
+            return str(v).strip().lower()
+        return v

nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py CHANGED Viewed

@@ -7,6 +7,8 @@ import logging
 from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
+from typing import Optional
 from nv_ingest_api.util.logging.configuration import LogLevel
 logger = logging.getLogger(__name__)
@@ -26,6 +28,8 @@ class TextEmbeddingSchema(BaseModel):
     image_elements_modality: str = Field(default="text")
     structured_elements_modality: str = Field(default="text")
     audio_elements_modality: str = Field(default="text")
+    custom_content_field: Optional[str] = None
+    result_target_field: Optional[str] = None
     model_config = ConfigDict(extra="forbid")

nv_ingest_api/internal/transform/embed_text.py CHANGED Viewed

@@ -7,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from typing import Any, Dict, Tuple, Optional, Iterable, List
+import glom
 import pandas as pd
 from openai import OpenAI
@@ -282,6 +283,33 @@ def _add_embeddings(row, embeddings, info_msgs):
     return row
+def _add_custom_embeddings(row, embeddings, result_target_field):
+    """
+    Updates a DataFrame row with embedding data and associated error info
+    based on a user supplied custom content field.
+    Parameters
+    ----------
+    row : pandas.Series
+        A row of the DataFrame.
+    embeddings : dict
+        Dictionary mapping row indices to embeddings.
+    result_target_field: str
+        The field in custom_content to output the embeddings to
+    Returns
+    -------
+    pandas.Series
+        The updated row
+    """
+    embedding = embeddings.get(row.name, None)
+    if embedding is not None:
+        row["metadata"] = glom.assign(row["metadata"], "custom_content." + result_target_field, embedding, missing=dict)
+    return row
 def _format_image_input_string(image_b64: Optional[str]) -> str:
     if not image_b64:
         return
@@ -381,6 +409,20 @@ def _get_pandas_audio_content(row, modality="text"):
     return row.get("audio_metadata", {}).get("audio_transcript")
+def _get_pandas_custom_content(row, custom_content_field):
+    custom_content = row.get("custom_content", {})
+    content = glom.glom(custom_content, custom_content_field, default=None)
+    if content is None:
+        logger.warning(f"Custom content field: {custom_content_field} not found")
+        return None
+    try:
+        return str(content)
+    except (TypeError, ValueError):
+        logger.warning(f"Cannot convert custom content field: {custom_content_field} to string")
+        return None
 # ------------------------------------------------------------------------------
 # Batch Processing Utilities
 # ------------------------------------------------------------------------------
@@ -519,6 +561,7 @@ def transform_create_text_embeddings_internal(
     api_key = task_config.get("api_key") or transform_config.api_key
     endpoint_url = task_config.get("endpoint_url") or transform_config.embedding_nim_endpoint
     model_name = task_config.get("model_name") or transform_config.embedding_model
+    custom_content_field = task_config.get("custom_content_field") or transform_config.custom_content_field
     if execution_trace_log is None:
         execution_trace_log = {}
@@ -612,4 +655,43 @@ def transform_create_text_embeddings_internal(
         content_masks.append(content_mask)
     combined_df = _concatenate_extractions_pandas(df_transform_ledger, embedding_dataframes, content_masks)
+    # Embed custom content
+    if custom_content_field is not None:
+        result_target_field = task_config.get("result_target_field") or custom_content_field + "_embedding"
+        extracted_custom_content = (
+            combined_df["metadata"]
+            .apply(partial(_get_pandas_custom_content, custom_content_field=custom_content_field))
+            .apply(lambda x: x.strip() if isinstance(x, str) and x.strip() else None)
+        )
+        valid_custom_content_mask = extracted_custom_content.notna()
+        if valid_custom_content_mask.any():
+            custom_content_list = extracted_custom_content[valid_custom_content_mask].to_list()
+            custom_content_batches = _generate_batches(custom_content_list, batch_size=transform_config.batch_size)
+            custom_content_embeddings = _async_runner(
+                custom_content_batches,
+                api_key,
+                endpoint_url,
+                model_name,
+                transform_config.encoding_format,
+                transform_config.input_type,
+                transform_config.truncate,
+                False,
+            )
+            custom_embeddings_dict = dict(
+                zip(
+                    extracted_custom_content.loc[valid_custom_content_mask].index,
+                    custom_content_embeddings.get("embeddings", []),
+                )
+            )
+        else:
+            custom_embeddings_dict = {}
+        combined_df = combined_df.apply(
+            _add_custom_embeddings, embeddings=custom_embeddings_dict, result_target_field=result_target_field, axis=1
+        )
     return combined_df, {"trace_info": execution_trace_log}

nv_ingest_api/util/dataloader/dataloader.py CHANGED Viewed

@@ -254,22 +254,29 @@ else:
         file = None
         try:
             for file in paths:
+                if thread_stop.is_set():
+                    return
                 if isinstance(file, tuple):
                     video_file, audio_file = file
+                    if thread_stop.is_set():
+                        return
                     with open(video_file, "rb") as f:
                         video = f.read()
+                    if thread_stop.is_set():
+                        return
                     with open(audio_file, "rb") as f:
                         audio = f.read()
                     queue.put((video, audio))
                 else:
-                    if thread_stop:
+                    if thread_stop.is_set():
                         return
                     with open(file, "rb") as f:
                         queue.put(f.read())
         except Exception as e:
             logging.error(f"Error processing file {file}: {e}")
             queue.put(RuntimeError(f"Error processing file {file}: {e}"))
-        queue.put(StopIteration)
+        finally:
+            queue.put(StopIteration)
     class DataLoader:
         """
@@ -290,7 +297,7 @@ else:
         ):
             interface = interface if interface else MediaInterface()
             self.thread = None
-            self.thread_stop = False
+            self.thread_stop = threading.Event()
             self.queue = queue.Queue(size)
             self.path = Path(path)
             self.output_dir = output_dir
@@ -323,16 +330,20 @@ else:
             Reset itertor by stopping the thread and clearing the queue.
             """
             if self.thread:
-                self.thread_stop = True
+                self.thread_stop.set()
                 self.thread.join()
-            self.thread_stop = False
-            while self.queue.qsize() != 0:
-                with self.queue.mutex:
-                    self.queue.queue.clear()
+                self.thread = None
+            try:
+                while True:
+                    self.queue.get_nowait()
+            except Exception:
+                pass
+            finally:
+                self.thread_stop.clear()
         def __iter__(self):
             self.stop()
-            self.thread_stop = False
+            self.thread_stop.clear()
             self.thread = threading.Thread(
                 target=load_data,
                 args=(

nv_ingest_api/util/image_processing/transforms.py CHANGED Viewed

@@ -49,6 +49,68 @@ def _resize_image_opencv(
     return cv2.resize(array, target_size, interpolation=interpolation)
+def rgba_to_rgb_white_bg(rgba_image):
+    """
+    Convert RGBA image to RGB by blending with a white background.
+    This function properly handles transparency by alpha-blending transparent
+    and semi-transparent pixels with a white background, producing visually
+    accurate results that match how the image would appear when displayed.
+    Parameters
+    ----------
+    rgba_image : numpy.ndarray
+        Input image array with shape (height, width, 4) where the channels
+        are Red, Green, Blue, Alpha. Alpha values can be in range [0, 1]
+        (float) or [0, 255] (uint8).
+    Returns
+    -------
+    numpy.ndarray
+        RGB image array with shape (height, width, 3) and dtype uint8.
+        Values are in range [0, 255] representing Red, Green, Blue channels.
+    Notes
+    -----
+    The alpha blending formula used is:
+        RGB_out = RGB_in * alpha + background * (1 - alpha)
+    Where background is white (255, 255, 255).
+    For pixels with alpha = 1.0 (fully opaque), the original RGB values
+    are preserved. For pixels with alpha = 0.0 (fully transparent), the
+    result is white. Semi-transparent pixels are blended proportionally.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> # Create a sample RGBA image with some transparency
+    >>> rgba = np.random.randint(0, 256, (100, 100, 4), dtype=np.uint8)
+    >>> rgb = rgba_to_rgb_white_bg(rgba)
+    >>> print(rgb.shape)  # (100, 100, 3)
+    >>> print(rgb.dtype)  # uint8
+    >>> # Example with float alpha values [0, 1]
+    >>> rgba_float = np.random.rand(50, 50, 4).astype(np.float32)
+    >>> rgb_float = rgba_to_rgb_white_bg(rgba_float)
+    >>> print(rgb_float.dtype)  # uint8
+    """
+    # Extract RGB and alpha channels
+    rgb = rgba_image[:, :, :3]  # RGB channels (H, W, 3)
+    alpha = rgba_image[:, :, 3:4]  # Alpha channel (H, W, 1)
+    # Normalize alpha to [0, 1] range if it's in [0, 255] range
+    if alpha.max() > 1.0:
+        alpha = alpha / 255.0
+    # Alpha blend with white background using the formula:
+    # result = foreground * alpha + background * (1 - alpha)
+    rgb_image = rgb * alpha + 255 * (1 - alpha)
+    # Convert to uint8 format for standard image representation
+    return rgb_image.astype(np.uint8)
 def scale_image_to_encoding_size(
     base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9, format: str = "PNG", **kwargs
 ) -> Tuple[str, Tuple[int, int]]:
@@ -93,7 +155,7 @@ def scale_image_to_encoding_size(
         # Check initial size
         if len(base64_image) <= max_base64_size:
-            return base64_image, original_size
+            return numpy_to_base64(img_array, format=format, **kwargs), original_size
         # Initial reduction step
         reduction_step = initial_reduction
@@ -621,6 +683,10 @@ def base64_to_numpy(base64_string: str) -> np.ndarray:
         if img is None:
             raise ValueError("OpenCV failed to decode image")
+        # Convert 4 channel to 3 channel if necessary
+        if img.shape[2] == 4:
+            img = rgba_to_rgb_white_bg(img)
         # Convert BGR to RGB for consistent processing (OpenCV loads as BGR)
         # Only convert if it's a 3-channel color image
         if img.ndim == 3 and img.shape[2] == 3:

nv-ingest-api 2025.10.4.dev20251004__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 2025.10.4.dev20251004py3-none-any.whl → 2025.11.2.dev20251102py3-none-any.whl