PyPI - nv-ingest-api - Versions diffs - 2025.7.14.dev20250714__py3-none-any.whl → 2025.7.16.dev20250716__py3-none-any.whl - Mend

nv-ingest-api 2025.7.14.dev20250714py3-none-any.whl → 2025.7.16.dev20250716py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (17) hide show

nv_ingest_api/internal/enums/common.py CHANGED Viewed

@@ -52,6 +52,8 @@ class ContentDescriptionEnum(str, Enum):
         Description for image extracted from PDF document.
     PDF_INFOGRAPHIC : str
         Description for structured infographic extracted from PDF document.
+    PDF_PAGE_IMAGE : str
+        Description for a full-page image rendered from a PDF document.
     PDF_TABLE : str
         Description for structured table extracted from PDF document.
     PDF_TEXT : str
@@ -70,6 +72,7 @@ class ContentDescriptionEnum(str, Enum):
     PDF_CHART: str = "Structured chart extracted from PDF document."
     PDF_IMAGE: str = "Image extracted from PDF document."
     PDF_INFOGRAPHIC: str = "Structured infographic extracted from PDF document."
+    PDF_PAGE_IMAGE: str = "Full-page image rendered from a PDF document."
     PDF_TABLE: str = "Structured table extracted from PDF document."
     PDF_TEXT: str = "Unstructured text from PDF document."
     PPTX_IMAGE: str = "Image extracted from PPTX presentation."
@@ -94,6 +97,8 @@ class ContentTypeEnum(str, Enum):
         Represents image content.
     INFO_MSG : str
         Represents an informational message.
+    PAGE_IMAGE : str
+        Represents a full-page image rendered from a document.
     STRUCTURED : str
         Represents structured content.
     TEXT : str
@@ -111,6 +116,7 @@ class ContentTypeEnum(str, Enum):
     INFOGRAPHIC: str = "infographic"
     INFO_MSG: str = "info_message"
     NONE: str = "none"
+    PAGE_IMAGE: str = "page_image"
     STRUCTURED: str = "structured"
     TABLE: str = "table"
     TEXT: str = "text"

nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py CHANGED Viewed

@@ -40,6 +40,7 @@ from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadat
 from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
     YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
     YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
+    YOLOX_PAGE_IMAGE_FORMAT,
 )
 from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import NemoRetrieverParseConfigSchema
 from nv_ingest_api.util.metadata.aggregators import (
@@ -355,7 +356,7 @@ def nemoretriever_parse_extractor(
                 img_numpy = crop_image(page_image, transformed_bbox)
                 if img_numpy is not None:
-                    base64_img = numpy_to_base64(img_numpy)
+                    base64_img = numpy_to_base64(img_numpy, format=YOLOX_PAGE_IMAGE_FORMAT)
                     image = Base64Image(
                         image=base64_img,
                         bbox=transformed_bbox,

nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py CHANGED Viewed

@@ -4,20 +4,21 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 import base64
+import inspect
 import io
-import pandas as pd
-from typing import Any, Dict, List, Optional
 import logging
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Optional
-from nv_ingest_api.internal.extract.pdf.engines import (
-    adobe_extractor,
-    llama_parse_extractor,
-    nemoretriever_parse_extractor,
-    pdfium_extractor,
-    tika_extractor,
-    unstructured_io_extractor,
-)
+import pandas as pd
+from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
+from nv_ingest_api.internal.extract.pdf.engines import llama_parse_extractor
+from nv_ingest_api.internal.extract.pdf.engines import nemoretriever_parse_extractor
+from nv_ingest_api.internal.extract.pdf.engines import pdfium_extractor
+from nv_ingest_api.internal.extract.pdf.engines import tika_extractor
+from nv_ingest_api.internal.extract.pdf.engines import unstructured_io_extractor
 from nv_ingest_api.util.exception_handlers.decorators import unified_exception_handler
 # Import extraction functions for different engines.
@@ -43,6 +44,7 @@ def _work_extract_pdf(
     extract_infographics: bool,
     extract_tables: bool,
     extract_charts: bool,
+    extract_page_as_image: bool,
     extractor_config: dict,
     execution_trace_log=None,
 ) -> Any:
@@ -52,17 +54,25 @@ def _work_extract_pdf(
     extract_method = extractor_config["extract_method"]
     extractor_fn = EXTRACTOR_LOOKUP.get(extract_method, pdfium_extractor)
-    return extractor_fn(
-        pdf_stream,
-        extract_text,
-        extract_images,
-        extract_infographics,
-        extract_tables,
-        extract_charts,
-        extractor_config,
-        execution_trace_log,
+    extractor_fn_args = dict(
+        pdf_stream=pdf_stream,
+        extract_text=extract_text,
+        extract_images=extract_images,
+        extract_infographics=extract_infographics,
+        extract_tables=extract_tables,
+        extract_charts=extract_charts,
+        extractor_config=extractor_config,
+        execution_trace_log=execution_trace_log,
     )
+    if "extract_page_as_image" in inspect.signature(extractor_fn).parameters:
+        extractor_fn_args["extract_page_as_image"] = extract_page_as_image
+    elif extract_page_as_image:
+        logger.warning(f"`extract_page_as_image` is set to True, but {extract_method} does not support it.")
+    return extractor_fn(**extractor_fn_args)
 @unified_exception_handler
 def _orchestrate_row_extraction(
@@ -97,6 +107,7 @@ def _orchestrate_row_extraction(
         extract_tables = params.pop("extract_tables", False)
         extract_charts = params.pop("extract_charts", False)
         extract_infographics = params.pop("extract_infographics", False)
+        extract_page_as_image = params.pop("extract_page_as_image", False)
         extract_method = params.get("extract_method", "pdfium")
     except KeyError as e:
         raise ValueError(f"Missing required extraction flag: {e}")
@@ -137,6 +148,7 @@ def _orchestrate_row_extraction(
         extract_text=extract_text,
         extract_images=extract_images,
         extract_infographics=extract_infographics,
+        extract_page_as_image=extract_page_as_image,
         extract_tables=extract_tables,
         extract_charts=extract_charts,
         extractor_config=extractor_config,

nv_ingest_api/internal/extract/pdf/engines/pdfium.py CHANGED Viewed

@@ -24,16 +24,19 @@ import numpy as np
 import pandas as pd
 import pypdfium2 as libpdfium
+from nv_ingest_api.internal.enums.common import ContentTypeEnum
 from nv_ingest_api.internal.primitives.nim.default_values import YOLOX_MAX_BATCH_SIZE
 from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
     YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
     YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
+    YOLOX_PAGE_IMAGE_FORMAT,
     get_yolox_model_name,
     YoloxPageElementsModelInterface,
 )
 from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema
 from nv_ingest_api.internal.enums.common import TableFormatEnum, TextTypeEnum, AccessLevelEnum
 from nv_ingest_api.util.metadata.aggregators import (
+    construct_image_metadata_from_base64,
     construct_image_metadata_from_pdf_image,
     extract_pdf_metadata,
     construct_text_metadata,
@@ -46,6 +49,7 @@ from nv_ingest_api.util.pdf.pdfium import (
     extract_image_like_objects_from_pdfium_page,
 )
 from nv_ingest_api.util.pdf.pdfium import pdfium_pages_to_numpy
+from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
 from nv_ingest_api.util.image_processing.transforms import numpy_to_base64, crop_image
 logger = logging.getLogger(__name__)
@@ -186,7 +190,7 @@ def _extract_page_element_images(
             if cropped is None:
                 continue
-            base64_img = numpy_to_base64(cropped)
+            base64_img = numpy_to_base64(cropped, format=YOLOX_PAGE_IMAGE_FORMAT)
             bbox_in_orig_coord = (
                 int(w1) - pad_width,
@@ -384,6 +388,7 @@ def pdfium_extractor(
     extract_infographics: bool,
     extract_tables: bool,
     extract_charts: bool,
+    extract_page_as_image: bool,
     extractor_config: dict,
     execution_trace_log: Optional[List[Any]] = None,
 ) -> pd.DataFrame:
@@ -524,6 +529,24 @@ def pdfium_extractor(
                 )
                 extracted_data.extend(image_data)
+            # Full page image extraction
+            if extract_page_as_image:
+                page_text = _extract_page_text(page)
+                image, _ = pdfium_pages_to_numpy([page], scale_tuple=(16384, 16384), trace_info=execution_trace_log)
+                base64_image = numpy_to_base64(image[0])
+                if len(base64_image) > 2**24 - 1:
+                    base64_image, _ = scale_image_to_encoding_size(base64_image, max_base64_size=2**24 - 1)
+                image_meta = construct_image_metadata_from_base64(
+                    base64_image,
+                    page_idx,
+                    page_count,
+                    source_metadata,
+                    base_unified_metadata,
+                    subtype=ContentTypeEnum.PAGE_IMAGE,
+                    text=page_text,
+                )
+                extracted_data.append(image_meta)
             # If we want tables or charts, rasterize the page and store it
             if extract_tables or extract_charts or extract_infographics:
                 image, padding_offsets = pdfium_pages_to_numpy(
@@ -574,6 +597,7 @@ def pdfium_extractor(
                 execution_trace_log=execution_trace_log,
             )
             futures.append(future)
             pages_for_tables.clear()
         # Wait for all asynchronous jobs to complete.

nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py CHANGED Viewed

@@ -120,6 +120,7 @@ class NemoRetrieverParseModelInterface(ModelInterface):
             logger.debug("Formatting input for HTTP NemoRetrieverParse model")
             # Prepare payload for HTTP request
+            ## TODO: Ask @Edward Kim if we want to switch to JPEG/PNG here
             if "images" in data:
                 base64_list = [numpy_to_base64(img) for img in data["images"]]
             else:

nv_ingest_api/internal/primitives/nim/model_interface/yolox.py CHANGED Viewed

@@ -2,9 +2,7 @@
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-import base64
-import io
+import os
 import logging
 import warnings
 from math import log
@@ -20,11 +18,11 @@ import packaging
 import pandas as pd
 import torch
 import torchvision
-from PIL import Image
 from nv_ingest_api.internal.primitives.nim import ModelInterface
 from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
 from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
+from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
 logger = logging.getLogger(__name__)
@@ -35,6 +33,7 @@ YOLOX_PAGE_MIN_SCORE = 0.1
 YOLOX_PAGE_NIM_MAX_IMAGE_SIZE = 512_000
 YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
 YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
+YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
 # yolox-page-elements-v1 contants
 YOLOX_PAGE_V1_NUM_CLASSES = 4
@@ -239,15 +238,11 @@ class YoloxModelInterfaceBase(ModelInterface):
                 # Convert to uint8 if needed.
                 if image.dtype != np.uint8:
                     image = (image * 255).astype(np.uint8)
-                # Convert the numpy array to a PIL Image.
-                image_pil = Image.fromarray(image)
-                original_size = image_pil.size
-                # Save the image to a buffer and encode to base64.
-                buffered = io.BytesIO()
-                image_pil.save(buffered, format="PNG")
-                image_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+                # Get original size directly from numpy array (width, height)
+                original_size = (image.shape[1], image.shape[0])
+                # Convert numpy array directly to base64 using OpenCV
+                image_b64 = numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT)
                 # Scale the image if necessary.
                 scaled_image_b64, new_size = scale_image_to_encoding_size(
                     image_b64, max_base64_size=self.nim_max_image_size

nv_ingest_api/internal/schemas/meta/ingest_job_schema.py CHANGED Viewed

@@ -107,6 +107,10 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
     model_name: Optional[str] = None
     api_key: Optional[str] = None
     filter_errors: bool = False
+    text_elements_modality: Optional[str] = None
+    image_elements_modality: Optional[str] = None
+    structured_elements_modality: Optional[str] = None
+    audio_elements_modality: Optional[str] = None
 class IngestTaskVdbUploadSchema(BaseModelNoExt):
@@ -195,6 +199,7 @@ class IngestTaskSchema(BaseModelNoExt):
         validated_task_properties = expected_schema_cls(**task_properties)
         values["type"] = task_type  # ensure type is now always the enum
         values["task_properties"] = validated_task_properties
         return values
     @field_validator("type", mode="before")

nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py CHANGED Viewed

@@ -22,5 +22,9 @@ class TextEmbeddingSchema(BaseModel):
     input_type: str = Field(default="passage")
     raise_on_failure: bool = Field(default=False)
     truncate: str = Field(default="END")
+    text_elements_modality: str = Field(default="text")
+    image_elements_modality: str = Field(default="text")
+    structured_elements_modality: str = Field(default="text")
+    audio_elements_modality: str = Field(default="text")
     model_config = ConfigDict(extra="forbid")

nv_ingest_api/internal/transform/embed_text.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import logging
 from concurrent.futures import ThreadPoolExecutor
+from functools import partial
 from typing import Any, Dict, Tuple, Optional, Iterable, List
 import pandas as pd
@@ -19,6 +20,9 @@ from nv_ingest_api.util.schema.schema_validator import validate_schema
 logger = logging.getLogger(__name__)
+MULTI_MODAL_MODELS = ["llama-3.2-nemoretriever-1b-vlm-embed-v1"]
 # ------------------------------------------------------------------------------
 # Asynchronous Embedding Requests
 # ------------------------------------------------------------------------------
@@ -33,6 +37,7 @@ def _make_async_request(
     input_type: str,
     truncate: str,
     filter_errors: bool,
+    modalities: Optional[List[str]] = None,
 ) -> list:
     """
     Interacts directly with the NIM embedding service to calculate embeddings for a batch of prompts.
@@ -74,11 +79,18 @@ def _make_async_request(
             base_url=embedding_nim_endpoint,
         )
+        extra_body = {
+            "input_type": input_type,
+            "truncate": truncate,
+        }
+        if modalities:
+            extra_body["modality"] = modalities
         resp = client.embeddings.create(
             input=prompts,
             model=embedding_model,
             encoding_format=encoding_format,
-            extra_body={"input_type": input_type, "truncate": truncate},
+            extra_body=extra_body,
         )
         response["embedding"] = resp.data
@@ -110,6 +122,7 @@ def _async_request_handler(
     input_type: str,
     truncate: str,
     filter_errors: bool,
+    modalities: Optional[List[str]] = None,
 ) -> List[dict]:
     """
     Gathers calculated embedding results from the NIM embedding service concurrently.
@@ -138,6 +151,9 @@ def _async_request_handler(
     List[dict]
         A list of response dictionaries from the embedding service.
     """
+    if modalities is None:
+        modalities = [None] * len(prompts)
     with ThreadPoolExecutor() as executor:
         futures = [
             executor.submit(
@@ -150,8 +166,9 @@ def _async_request_handler(
                 input_type=input_type,
                 truncate=truncate,
                 filter_errors=filter_errors,
+                modalities=modality_batch,
             )
-            for prompt_batch in prompts
+            for prompt_batch, modality_batch in zip(prompts, modalities)
         ]
         results = [future.result() for future in futures]
@@ -167,6 +184,7 @@ def _async_runner(
     input_type: str,
     truncate: str,
     filter_errors: bool,
+    modalities: Optional[List[str]] = None,
 ) -> dict:
     """
     Concurrently launches all NIM embedding requests and flattens the results.
@@ -204,6 +222,7 @@ def _async_runner(
         input_type,
         truncate,
         filter_errors,
+        modalities=modalities,
     )
     flat_results = {"embeddings": [], "info_msgs": []}
@@ -263,7 +282,19 @@ def _add_embeddings(row, embeddings, info_msgs):
     return row
-def _get_pandas_text_content(row):
+def _format_image_input_string(image_b64: Optional[str]) -> str:
+    if not image_b64:
+        return
+    return f"data:image/png;base64,{image_b64}"
+def _format_text_image_pair_input_string(text: Optional[str], image_b64: Optional[str]) -> str:
+    if (not text) or (not text.strip()) or (not image_b64):
+        return
+    return f"{text.strip()} {_format_image_input_string(image_b64)}"
+def _get_pandas_text_content(row, modality="text"):
     """
     Extracts text content from a DataFrame row.
@@ -280,7 +311,7 @@ def _get_pandas_text_content(row):
     return row["content"]
-def _get_pandas_table_content(row):
+def _get_pandas_table_content(row, modality="text"):
     """
     Extracts table/chart content from a DataFrame row.
@@ -294,10 +325,19 @@ def _get_pandas_table_content(row):
     str
         The table/chart content from the row.
     """
-    return row.get("table_metadata", {}).get("table_content")
+    if modality == "text":
+        content = row.get("table_metadata", {}).get("table_content")
+    elif modality == "image":
+        content = _format_image_input_string(row.get("content"))
+    elif modality == "text_image":
+        text = row.get("table_metadata", {}).get("table_content")
+        image = row.get("content")
+        content = _format_text_image_pair_input_string(text, image)
+    return content
-def _get_pandas_image_content(row):
+def _get_pandas_image_content(row, modality="text"):
     """
     Extracts image caption content from a DataFrame row.
@@ -311,10 +351,28 @@ def _get_pandas_image_content(row):
     str
         The image caption from the row.
     """
-    return row.get("image_metadata", {}).get("caption")
+    subtype = row.get("content_metadata", {}).get("subtype")
+    if modality == "text":
+        if subtype == "page_image":
+            content = row.get("image_metadata", {}).get("text")
+        else:
+            content = row.get("image_metadata", {}).get("caption")
+    elif modality == "image":
+        content = _format_image_input_string(row.get("content"))
+    elif modality == "text_image":
+        if subtype == "page_image":
+            text = row.get("image_metadata", {}).get("text")
+        else:
+            text = row.get("image_metadata", {}).get("caption")
+        image = row.get("content")
+        content = _format_text_image_pair_input_string(text, image)
+    # A workaround to save memory.
+    row["content"] = ""
+    return content
-def _get_pandas_audio_content(row):
+def _get_pandas_audio_content(row, modality="text"):
     """
     A pandas UDF used to select extracted audio transcription to be used to create embeddings.
     """
@@ -408,6 +466,23 @@ def _concatenate_extractions_pandas(
 # ------------------------------------------------------------------------------
+def does_model_support_multimodal_embeddings(model: str) -> bool:
+    """
+    Checks if a given model supports multi-modal embeddings.
+    Parameters
+    ----------
+    model : str
+        The name of the model.
+    Returns
+    -------
+    bool
+        True if the model supports multi-modal embeddings, False otherwise.
+    """
+    return model in MULTI_MODAL_MODELS
 def transform_create_text_embeddings_internal(
     df_transform_ledger: pd.DataFrame,
     task_config: Dict[str, Any],
@@ -460,6 +535,15 @@ def transform_create_text_embeddings_internal(
         ContentTypeEnum.AUDIO: _get_pandas_audio_content,
         ContentTypeEnum.VIDEO: lambda x: None,  # Not supported yet.
     }
+    task_type_to_modality = {
+        ContentTypeEnum.TEXT: task_config.get("text_elements_modality") or transform_config.text_elements_modality,
+        ContentTypeEnum.STRUCTURED: (
+            task_config.get("structured_elements_modality") or transform_config.structured_elements_modality
+        ),
+        ContentTypeEnum.IMAGE: task_config.get("image_elements_modality") or transform_config.image_elements_modality,
+        ContentTypeEnum.AUDIO: task_config.get("audio_elements_modality") or transform_config.audio_elements_modality,
+        ContentTypeEnum.VIDEO: lambda x: None,  # Not supported yet.
+    }
     def _content_type_getter(row):
         return row["content_metadata"]["type"]
@@ -480,7 +564,7 @@ def transform_create_text_embeddings_internal(
         # Extract content and normalize empty or non-str to None
         extracted_content = (
             df_content["metadata"]
-            .apply(content_getter)
+            .apply(partial(content_getter, modality=task_type_to_modality[content_type]))
             .apply(lambda x: x.strip() if isinstance(x, str) and x.strip() else None)
         )
         df_content["_content"] = extracted_content
@@ -488,9 +572,15 @@ def transform_create_text_embeddings_internal(
         # Prepare batches for only valid (non-None) content
         valid_content_mask = df_content["_content"].notna()
         if valid_content_mask.any():
-            filtered_content_batches = _generate_batches(
-                df_content.loc[valid_content_mask, "_content"].tolist(), batch_size=transform_config.batch_size
-            )
+            filtered_content_list = df_content.loc[valid_content_mask, "_content"].tolist()
+            filtered_content_batches = _generate_batches(filtered_content_list, batch_size=transform_config.batch_size)
+            if model_name in MULTI_MODAL_MODELS:
+                modality_list = [task_type_to_modality[content_type]] * len(filtered_content_list)
+                modality_batches = _generate_batches(modality_list, batch_size=transform_config.batch_size)
+            else:
+                modality_batches = None
             content_embeddings = _async_runner(
                 filtered_content_batches,
                 api_key,
@@ -500,6 +590,7 @@ def transform_create_text_embeddings_internal(
                 transform_config.input_type,
                 transform_config.truncate,
                 False,
+                modalities=modality_batches,
             )
             # Build a simple row index -> embedding map
             embeddings_dict = dict(

nv_ingest_api/util/image_processing/transforms.py CHANGED Viewed

@@ -2,29 +2,55 @@
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-import base64
-import io
 import logging
-from io import BytesIO
 from math import ceil
 from math import floor
 from typing import Optional
 from typing import Tuple
+import cv2
 import numpy as np
+from io import BytesIO
 from PIL import Image
-from PIL import UnidentifiedImageError
 from nv_ingest_api.util.converters import bytetools
+# Configure OpenCV to use a single thread for image processing
+cv2.setNumThreads(1)
 DEFAULT_MAX_WIDTH = 1024
 DEFAULT_MAX_HEIGHT = 1280
+# Workaround for PIL.Image.DecompressionBombError
+Image.MAX_IMAGE_PIXELS = None
 logger = logging.getLogger(__name__)
+def _resize_image_opencv(
+    array: np.ndarray, target_size: Tuple[int, int], interpolation=cv2.INTER_LANCZOS4
+) -> np.ndarray:
+    """
+    Resizes a NumPy array representing an image using OpenCV.
+    Parameters
+    ----------
+    array : np.ndarray
+        The input image as a NumPy array.
+    target_size : Tuple[int, int]
+        The target size as (width, height).
+    interpolation : int, optional
+        OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
+    Returns
+    -------
+    np.ndarray
+        The resized image as a NumPy array.
+    """
+    return cv2.resize(array, target_size, interpolation=interpolation)
 def scale_image_to_encoding_size(
-    base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9
+    base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9, format: str = "PNG", **kwargs
 ) -> Tuple[str, Tuple[int, int]]:
     """
     Decodes a base64-encoded image, resizes it if needed, and re-encodes it as base64.
@@ -38,12 +64,19 @@ def scale_image_to_encoding_size(
         Maximum allowable size for the base64-encoded image, by default 180,000 characters.
     initial_reduction : float, optional
         Initial reduction step for resizing, by default 0.9.
+    format : str, optional
+        The image format to use for encoding. Supported formats are "PNG" and "JPEG".
+        Defaults to "PNG".
+    **kwargs
+        Additional keyword arguments passed to the format-specific encoding function.
+        For JPEG: quality (int, default=100) - JPEG quality (1-100).
+        For PNG: compression (int, default=3) - PNG compression level (0-9).
     Returns
     -------
     Tuple[str, Tuple[int, int]]
         A tuple containing:
-        - Base64-encoded PNG image string, resized if necessary.
+        - Base64-encoded image string in the specified format, resized if necessary.
         - The new size as a tuple (width, height).
     Raises
@@ -52,12 +85,11 @@ def scale_image_to_encoding_size(
         If the image cannot be resized below the specified max_base64_size.
     """
     try:
-        # Decode the base64 image and open it as a PIL image
-        image_data = base64.b64decode(base64_image)
-        img = Image.open(io.BytesIO(image_data)).convert("RGB")
+        # Decode the base64 image using OpenCV (returns RGB format)
+        img_array = base64_to_numpy(base64_image)
-        # Initial image size
-        original_size = img.size
+        # Initial image size (height, width, channels) -> (width, height)
+        original_size = (img_array.shape[1], img_array.shape[0])
         # Check initial size
         if len(base64_image) <= max_base64_size:
@@ -66,23 +98,24 @@ def scale_image_to_encoding_size(
         # Initial reduction step
         reduction_step = initial_reduction
         new_size = original_size
+        current_img = img_array.copy()
+        original_width, original_height = original_size
         while len(base64_image) > max_base64_size:
-            width, height = img.size
-            new_size = (int(width * reduction_step), int(height * reduction_step))
+            new_size = (int(original_width * reduction_step), int(original_height * reduction_step))
+            if new_size[0] < 1 or new_size[1] < 1:
+                raise ValueError("Image cannot be resized further without becoming too small.")
-            img_resized = img.resize(new_size, Image.LANCZOS)
-            buffered = io.BytesIO()
-            img_resized.save(buffered, format="PNG")
-            base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
+            # Resize the image using OpenCV
+            current_img = _resize_image_opencv(img_array, new_size)
+            # Re-encode as base64 using the specified format
+            base64_image = numpy_to_base64(current_img, format=format, **kwargs)
             # Adjust the reduction step if necessary
             if len(base64_image) > max_base64_size:
                 reduction_step *= 0.95  # Reduce size further if needed
-            # Safety check
-            if new_size[0] < 1 or new_size[1] < 1:
-                raise Exception("Image cannot be resized further without becoming too small.")
         return base64_image, new_size
     except Exception as e:
@@ -90,36 +123,84 @@ def scale_image_to_encoding_size(
         raise
-def ensure_base64_is_png(base64_image: str) -> str:
+def _detect_base64_image_format(base64_string: str) -> Optional[str]:
     """
-    Ensures the given base64-encoded image is in PNG format. Converts to PNG if necessary.
+    Detects the format of a base64-encoded image using Pillow.
     Parameters
     ----------
-    base64_image : str
+    base64_string : str
         Base64-encoded image string.
     Returns
     -------
-    str
-        Base64-encoded PNG image string.
+    The detected format ("PNG", "JPEG", "UNKNOWN")
     """
     try:
-        # Decode the base64 string and load the image
-        image_data = base64.b64decode(base64_image)
-        image = Image.open(io.BytesIO(image_data))
+        image_bytes = bytetools.bytesfrombase64(base64_string)
+    except Exception as e:
+        logger.error(f"Invalid base64 string: {e}")
+        raise ValueError(f"Invalid base64 string: {e}") from e
+    try:
+        with Image.open(BytesIO(image_bytes)) as img:
+            return img.format.upper()
+    except ImportError:
+        raise ImportError("Pillow library not available")
+    except Exception as e:
+        logger.error(f"Error detecting image format: {e}")
+        return "UNKNOWN"
+def ensure_base64_format(base64_image: str, target_format: str = "PNG", **kwargs) -> str:
+    """
+    Ensures the given base64-encoded image is in the specified format. Converts if necessary.
+    Skips conversion if the image is already in the target format.
+    Parameters
+    ----------
+    base64_image : str
+        Base64-encoded image string.
+    target_format : str, optional
+        The target image format. Supported formats are "PNG" and "JPEG". Defaults to "PNG".
+    **kwargs
+        Additional keyword arguments passed to the format-specific encoding function.
+        For JPEG: quality (int, default=100) - JPEG quality (1-100).
+        For PNG: compression (int, default=3) - PNG compression level (0-9).
-        # Check if the image is already in PNG format
-        if image.format != "PNG":
-            # Convert the image to PNG
-            buffered = io.BytesIO()
-            image.convert("RGB").save(buffered, format="PNG")
-            base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    Returns
+    -------
+    str
+        Base64-encoded image string in the specified format.
+    Raises
+    ------
+    ValueError
+        If there is an error during format conversion.
+    """
+    target_format = target_format.upper()
+    if target_format == "JPG":
+        target_format = "JPEG"
+    current_format = _detect_base64_image_format(base64_image)
+    if current_format == "UNKNOWN":
+        raise ValueError(
+            f"Unable to decode image from base64 string: {base64_image}, because current format could not be detected."
+        )
+    if current_format == target_format:
+        logger.debug(f"Image already in {target_format} format, skipping conversion")
         return base64_image
+    try:
+        # Decode the base64 image using OpenCV (returns RGB format)
+        img_array = base64_to_numpy(base64_image)
+        # Re-encode in the target format
+        return numpy_to_base64(img_array, format=target_format, **kwargs)
+    except ImportError as e:
+        raise e
     except Exception as e:
-        logger.error(f"Error ensuring PNG format: {e}")
-        return None
+        logger.error(f"Error converting image to {target_format} format: {e}")
+        raise ValueError(f"Failed to convert image to {target_format} format: {e}") from e
 def pad_image(
@@ -302,66 +383,193 @@ def normalize_image(
     return output_array
-def numpy_to_base64(array: np.ndarray) -> str:
+def _preprocess_numpy_array(array: np.ndarray) -> np.ndarray:
+    """
+    Preprocesses a NumPy array for image encoding by ensuring proper format and data type.
+    Also handles color space conversion for OpenCV encoding.
+    Parameters
+    ----------
+    array : np.ndarray
+        The input image as a NumPy array.
+    Returns
+    -------
+    np.ndarray
+        The preprocessed array in uint8 format, ready for OpenCV encoding (BGR color order for color images).
+    Raises
+    ------
+    ValueError
+        If the input array cannot be converted into a valid image format.
+    """
+    # Check if the array is valid and can be converted to an image
+    try:
+        # If the array represents a grayscale image, drop the redundant axis in
+        # (h, w, 1). cv2 expects (h, w) for grayscale.
+        if array.ndim == 3 and array.shape[2] == 1:
+            array = np.squeeze(array, axis=2)
+        # Ensure uint8 data type
+        processed_array = array.astype(np.uint8)
+        # OpenCV uses BGR color order, so convert RGB to BGR if needed
+        if processed_array.ndim == 3 and processed_array.shape[2] == 3:
+            # Assume input is RGB and convert to BGR for OpenCV
+            processed_array = cv2.cvtColor(processed_array, cv2.COLOR_RGB2BGR)
+        return processed_array
+    except Exception as e:
+        raise ValueError(f"Failed to preprocess NumPy array for image encoding: {e}")
+def _encode_opencv_jpeg(array: np.ndarray, *, quality: int = 100) -> bytes:
+    """NumPy array -> JPEG bytes using OpenCV."""
+    ok, buf = cv2.imencode(".jpg", array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
+    if not ok:
+        raise RuntimeError("cv2.imencode failed")
+    return buf.tobytes()
+def _encode_opencv_png(array: np.ndarray, *, compression: int = 6) -> bytes:
+    """NumPy array -> PNG bytes using OpenCV"""
+    encode_params = [
+        cv2.IMWRITE_PNG_COMPRESSION,
+        compression,
+        cv2.IMWRITE_PNG_STRATEGY,
+        cv2.IMWRITE_PNG_STRATEGY_DEFAULT,
+    ]
+    ok, buf = cv2.imencode(".png", array, encode_params)
+    if not ok:
+        raise RuntimeError("cv2.imencode(.png) failed")
+    return buf.tobytes()
+def numpy_to_base64_png(array: np.ndarray) -> str:
+    """
+    Converts a preprocessed NumPy array representing an image to a base64-encoded PNG string using OpenCV.
+    Parameters
+    ----------
+    array : np.ndarray
+        The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
+    Returns
+    -------
+    str
+        The base64-encoded PNG string representation of the input NumPy array.
+    Raises
+    ------
+    RuntimeError
+        If there is an issue during the image conversion or base64 encoding process.
+    """
+    try:
+        # Encode to PNG bytes using OpenCV
+        png_bytes = _encode_opencv_png(array)
+        # Convert to base64
+        base64_img = bytetools.base64frombytes(png_bytes)
+    except Exception as e:
+        raise RuntimeError(f"Failed to encode image to base64 PNG: {e}")
+    return base64_img
+def numpy_to_base64_jpeg(array: np.ndarray, quality: int = 100) -> str:
+    """
+    Converts a preprocessed NumPy array representing an image to a base64-encoded JPEG string using OpenCV.
+    Parameters
+    ----------
+    array : np.ndarray
+        The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
+    quality : int, optional
+        JPEG quality (1-100), by default 100. Higher values mean better quality but larger file size.
+    Returns
+    -------
+    str
+        The base64-encoded JPEG string representation of the input NumPy array.
+    Raises
+    ------
+    RuntimeError
+        If there is an issue during the image conversion or base64 encoding process.
+    """
+    try:
+        # Encode to JPEG bytes using OpenCV
+        jpeg_bytes = _encode_opencv_jpeg(array, quality=quality)
+        # Convert to base64
+        base64_img = bytetools.base64frombytes(jpeg_bytes)
+    except Exception as e:
+        raise RuntimeError(f"Failed to encode image to base64 JPEG: {e}")
+    return base64_img
+def numpy_to_base64(array: np.ndarray, format: str = "PNG", **kwargs) -> str:
     """
     Converts a NumPy array representing an image to a base64-encoded string.
-    The function takes a NumPy array, converts it to a PIL image, and then encodes
-    the image as a PNG in a base64 string format. The input array is expected to be in
-    a format that can be converted to a valid image, such as having a shape of (H, W, C)
-    where C is the number of channels (e.g., 3 for RGB).
+    The function takes a NumPy array, preprocesses it, and then encodes
+    the image in the specified format as a base64 string. The input array is expected
+    to be in a format that can be converted to a valid image, such as having a shape
+    of (H, W, C) where C is the number of channels (e.g., 3 for RGB).
     Parameters
     ----------
     array : np.ndarray
         The input image as a NumPy array. Must have a shape compatible with image data.
+    format : str, optional
+        The image format to use for encoding. Supported formats are "PNG" and "JPEG".
+        Defaults to "PNG".
+    **kwargs
+        Additional keyword arguments passed to the format-specific encoding function.
+        For JPEG: quality (int, default=100) - JPEG quality (1-100).
     Returns
     -------
     str
-        The base64-encoded string representation of the input NumPy array as a PNG image.
+        The base64-encoded string representation of the input NumPy array in the specified format.
     Raises
     ------
     ValueError
-        If the input array cannot be converted into a valid image format.
+        If the input array cannot be converted into a valid image format, or if an
+        unsupported format is specified.
     RuntimeError
         If there is an issue during the image conversion or base64 encoding process.
     Examples
     --------
     >>> array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
-    >>> encoded_str = numpy_to_base64(array)
+    >>> encoded_str = numpy_to_base64(array, format="PNG")
     >>> isinstance(encoded_str, str)
     True
+    >>> encoded_str_jpeg = numpy_to_base64(array, format="JPEG", quality=90)
+    >>> isinstance(encoded_str_jpeg, str)
+    True
     """
-    # If the array represents a grayscale image, drop the redundant axis in
-    # (h, w, 1). PIL.Image.fromarray() expects an array of form (h, w) if it's
-    # a grayscale image.
-    if array.ndim == 3 and array.shape[2] == 1:
-        array = np.squeeze(array, axis=2)
+    # Centralized preprocessing of the numpy array
+    processed_array = _preprocess_numpy_array(array)
-    # Check if the array is valid and can be converted to an image
-    try:
-        # Convert the NumPy array to a PIL image
-        pil_image = Image.fromarray(array.astype(np.uint8))
-    except Exception as e:
-        raise ValueError(f"Failed to convert NumPy array to image: {e}")
+    format = format.upper()
-    try:
-        # Convert the PIL image to a base64-encoded string
-        with BytesIO() as buffer:
-            pil_image.save(buffer, format="PNG")
-            base64_img = bytetools.base64frombytes(buffer.getvalue())
-    except Exception as e:
-        raise RuntimeError(f"Failed to encode image to base64: {e}")
-    return base64_img
+    if format == "PNG":
+        return numpy_to_base64_png(processed_array)
+    elif format == "JPEG" or format == "JPG":
+        quality = kwargs.get("quality", 100)
+        return numpy_to_base64_jpeg(processed_array, quality=quality)
+    else:
+        raise ValueError(f"Unsupported format: {format}. Supported formats are 'PNG' and 'JPEG'.")
 def base64_to_numpy(base64_string: str) -> np.ndarray:
     """
-    Convert a base64-encoded image string to a NumPy array.
+    Convert a base64-encoded image string to a NumPy array using OpenCV.
+    Returns images in RGB format for consistency.
     Parameters
     ----------
@@ -371,37 +579,82 @@ def base64_to_numpy(base64_string: str) -> np.ndarray:
     Returns
     -------
     numpy.ndarray
-        NumPy array representation of the decoded image.
+        NumPy array representation of the decoded image in RGB format (for color images).
+        Grayscale images are returned as-is.
     Raises
     ------
     ValueError
         If the base64 string is invalid or cannot be decoded into an image.
-    ImportError
-        If required libraries are not installed.
     Examples
     --------
     >>> base64_str = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...'
     >>> img_array = base64_to_numpy(base64_str)
+    >>> # img_array is now in RGB format (for color images)
     """
     try:
-        # Decode the base64 string
-        image_data = base64.b64decode(base64_string)
-    except (base64.binascii.Error, ValueError) as e:
+        # Decode the base64 string to bytes using bytetools
+        image_bytes = bytetools.bytesfrombase64(base64_string)
+    except Exception as e:
         raise ValueError("Invalid base64 string") from e
+    # Create numpy buffer from bytes and decode using OpenCV
+    buf = np.frombuffer(image_bytes, dtype=np.uint8)
     try:
-        # Convert the bytes into a BytesIO object
-        image_bytes = BytesIO(image_data)
-        # Open the image using PIL
-        image = Image.open(image_bytes)
-        image.load()
-    except UnidentifiedImageError as e:
+        img = cv2.imdecode(buf, cv2.IMREAD_UNCHANGED)
+        if img is None:
+            raise ValueError("OpenCV failed to decode image")
+        # Convert BGR to RGB for consistent processing (OpenCV loads as BGR)
+        # Only convert if it's a 3-channel color image
+        if img.ndim == 3 and img.shape[2] == 3:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    except ImportError:
+        raise
+    except Exception as e:
         raise ValueError("Unable to decode image from base64 string") from e
-    # Convert the image to a NumPy array
-    image_array = np.array(image)
+    # Convert to numpy array
+    img = np.array(img)
+    # Assert that 3-channel images are in RGB format after conversion
+    assert img.ndim <= 3, f"Image has unexpected number of dimensions: {img.ndim}"
+    assert img.ndim != 3 or img.shape[2] == 3, f"3-channel image should have 3 channels, got: {img.shape[2]}"
+    return img
+def scale_numpy_image(
+    img_arr: np.ndarray, scale_tuple: Optional[Tuple[int, int]] = None, interpolation=Image.LANCZOS
+) -> np.ndarray:
+    """
+    Scales a NumPy image array using OpenCV with aspect ratio preservation.
-    return image_array
+    This function provides OpenCV-based image scaling that mimics PIL's thumbnail behavior
+    by maintaining aspect ratio and scaling to fit within the specified dimensions.
+    Parameters
+    ----------
+    img_arr : np.ndarray
+        The input image as a NumPy array.
+    scale_tuple : Optional[Tuple[int, int]], optional
+        A tuple (width, height) to resize the image to. If provided, the image
+        will be resized to fit within these dimensions while maintaining aspect ratio
+        (similar to PIL's thumbnail method). Defaults to None.
+    interpolation : int, optional
+        OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
+    Returns
+    -------
+    np.ndarray
+        A NumPy array representing the scaled image data.
+    """
+    # Apply scaling using OpenCV if specified
+    # Using PIL for scaling as CV2 seems to lead to different results
+    # TODO: Remove when we move to YOLOX Ensemble Models
+    if scale_tuple:
+        image = Image.fromarray(img_arr)
+        image.thumbnail(scale_tuple, interpolation)
+        img_arr = np.array(image)
+    # Ensure we return a copy
+    return img_arr.copy()

nv_ingest_api/util/metadata/aggregators.py CHANGED Viewed

@@ -201,6 +201,8 @@ def construct_image_metadata_from_base64(
     page_count: int,
     source_metadata: Dict[str, Any],
     base_unified_metadata: Dict[str, Any],
+    subtype: None | ContentTypeEnum | str = "",
+    text: str = "",
 ) -> List[Any]:
     """
     Extracts image data from a base64-encoded image string, decodes the image to get
@@ -252,6 +254,7 @@ def construct_image_metadata_from_base64(
             "line": -1,
             "span": -1,
         },
+        "subtype": subtype or "",
     }
     # Construct image metadata
@@ -259,7 +262,7 @@ def construct_image_metadata_from_base64(
         "image_type": DocumentTypeEnum.PNG,
         "structured_image_type": ContentTypeEnum.UNKNOWN,
         "caption": "",
-        "text": "",
+        "text": text,
         "image_location": bbox,
         "image_location_max_dimensions": (width, height),
         "height": height,

nv_ingest_api/util/pdf/pdfium.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import List, Any
 from typing import Optional
 from typing import Tuple
-import PIL
 import numpy as np
 import pypdfium2 as pdfium
 import pypdfium2.raw as pdfium_c
@@ -20,8 +19,9 @@ from nv_ingest_api.util.image_processing.clustering import (
     combine_groups_into_bboxes,
     remove_superset_bboxes,
 )
-from nv_ingest_api.util.image_processing.transforms import pad_image, numpy_to_base64, crop_image
+from nv_ingest_api.util.image_processing.transforms import pad_image, numpy_to_base64, crop_image, scale_numpy_image
 from nv_ingest_api.util.metadata.aggregators import Base64Image
+from nv_ingest_api.internal.primitives.nim.model_interface.yolox import YOLOX_PAGE_IMAGE_FORMAT
 logger = logging.getLogger(__name__)
@@ -176,18 +176,10 @@ def pdfium_pages_to_numpy(
     for idx, page in enumerate(pages):
         # Render the page as a bitmap with the specified scale and rotation
         page_bitmap = page.render(scale=scale, rotation=rotation)
-        # Convert the bitmap to a PIL image
-        pil_image = page_bitmap.to_pil()
+        img_arr = convert_bitmap_to_corrected_numpy(page_bitmap)
         # Apply scaling using the thumbnail approach if specified
         if scale_tuple:
-            pil_image.thumbnail(scale_tuple, PIL.Image.LANCZOS)
-        # Convert the PIL image to a NumPy array and force a full copy,
-        # ensuring the returned array is entirely independent of the original buffer.
-        img_arr = np.array(pil_image).copy()
+            img_arr = scale_numpy_image(img_arr, scale_tuple)
         # Apply padding if specified
         if padding_tuple:
             img_arr, (pad_width, pad_height) = pad_image(
@@ -250,7 +242,7 @@ def extract_simple_images_from_pdfium_page(page, max_depth):
         try:
             # Attempt to retrieve the image bitmap
             image_numpy: np.ndarray = pdfium_try_get_bitmap_as_numpy(obj)  # noqa
-            image_base64: str = numpy_to_base64(image_numpy)
+            image_base64: str = numpy_to_base64(image_numpy, format=YOLOX_PAGE_IMAGE_FORMAT)
             image_bbox = obj.get_pos()
             image_size = obj.get_size()
             if image_size[0] < 10 and image_size[1] < 10:

{nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nv-ingest-api
-Version: 2025.7.14.dev20250714
+Version: 2025.7.16.dev20250716
 Summary: Python module with core document ingestion functions.
 Author-email: Jeremy Dyer <jdyer@nvidia.com>
 License:                                  Apache License
@@ -217,6 +217,7 @@ Requires-Dist: backoff==2.2.1
 Requires-Dist: pandas>=2.0
 Requires-Dist: pydantic>2.0.0
 Requires-Dist: pydantic-settings>2.0.0
+Requires-Dist: tritonclient
 Dynamic: license-file
 # nv-ingest-api

{nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ nv_ingest_api/interface/transform.py,sha256=g6YnFR7TpEU0xNtzCvv6kqnFbuCwQ6vRMjjB
 nv_ingest_api/interface/utility.py,sha256=AL4l0cJNvTjG1MAe1YNTk1jbbPED3g4HCewzx6Ffcio,7296
 nv_ingest_api/internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/internal/enums/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest_api/internal/enums/common.py,sha256=HSj7qqNr6KXu_FIyK_Wvel24R-r8lV7dLA173z5XFBc,12321
+nv_ingest_api/internal/enums/common.py,sha256=lzDJ35VWfIwlL_Lx_q0dfHUuwEB7CXudHIQAilpjoRw,12611
 nv_ingest_api/internal/extract/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
 nv_ingest_api/internal/extract/audio/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/extract/audio/audio_extraction.py,sha256=_jf_UC_FTqZr-xEpwG8edwBzdDjM01gGhqm9ulOsDcY,6973
@@ -31,11 +31,11 @@ nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfS
 nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
 nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
 nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
-nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=Uqj1NH7yWga9P6_vCzgny1WKALfF--UdAaGHUF8K_aQ,22926
-nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=fDbrZwJ-lgeHYOq107WXehzdSvyF8zEDza_9UkDm5aE,22360
+nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=XNYz4S2tMFBv0KFzXNERrVs-1raxJ_iIIXpBGlJFcD0,22987
+nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=8hUJUdpx6FhOBgabFmGhJiAQdl12kR8YoSbUfN-geOk,23506
 nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
 nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
-nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=Jk3wrQ2CZs167juvEZ-uV6qXWQjR08hhIu8otk2MWj4,4931
+nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=4bvN6LsPksLicI6jM0JqbJFiOZNHEcuc8MVVW4XfgV8,5875
 nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
 nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
 nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -55,12 +55,12 @@ nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1E
 nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubkHs4WjnexM6rI0wkjWCsrVNEbA4Wjk2oKL9OYCU,1499
 nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
 nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=x35a9AyTYxpESQflLo_YnhVOKblQKVen6vGGFaXmNiE,9927
-nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=MFWPqMTXs_MZG3ripRR21o7f_mVeoE46Q10yvJ8KNr0,7023
+nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=WysjDZeegclO3mZgVcGOwzWbr8wSI4pWRiYD4iC2EXo,7098
 nv_ingest_api/internal/primitives/nim/model_interface/paddle.py,sha256=rSUPwl5XOrqneoS6aKhatVjrNBg_LhP3nwUWS_aTwz0,17950
 nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=5PqD2JuHY2rwd-6SSB4axr2Dd79vm95sAEkcmI3U7ME,12977
 nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=lFhppNqrq5X_fzbCWKphvZQMzaJd3gHrkWsyJORzFrU,5010
 nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
-nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=uYXqdvqgkyS4Yfr9ZoikRDX4e94OV3ch3Xhv3JVg-3s,49581
+nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=nsfDQgeupBe9Tdf3S5sfNpYcObEwVlzCZdfg1ObAW88,49584
 nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/internal/primitives/tracing/latency.py,sha256=5kVTeYRbRdTlT_aI4MeS20N_S7mqCcLqZR6YHtxhXkY,2215
 nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagYDmkkA6rTXmQ-bmtLjoEguhg,3851
@@ -82,7 +82,7 @@ nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDx
 nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
 nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
-nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=szDvgc2A_JetD2Jyewyl4ac4lwpy3NiLxD9dOYz42sM,8116
+nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=ceYQjRjhBSDbbZ6q-Db7Y6GHVOvWPdGAMb3TX1vMWfY,8321
 nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=VnAzkSFat_ckI19mlwQTlFrvP6EZVCwyNl9bt51b8oU,7193
 nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py,sha256=k1JOdlPPpsipc0XhHf-9YxJ_-W0HvpVE1ZhYmr7fzj0,395
@@ -92,14 +92,14 @@ nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RY
 nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=OtM1iPw26uioC3mghbOJQurKGg641uQfhASH462VqOY,578
 nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
-nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=ongmHkJA2953f9_RI7ZYzf5BUnFzVL6Al5E8WKyfgw4,885
+nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=RZCISA8CUqKiY8eJuk4uWxzo4PZ-fuYdzMO7_LYFkoM,1117
 nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
 nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
 nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
 nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
-nv_ingest_api/internal/transform/embed_text.py,sha256=A8JMotTkC8KQ0pmz4AIJhaKebza6JzhQ0aEnHX2oHY8,16539
+nv_ingest_api/internal/transform/embed_text.py,sha256=kvVGlNH1S91UENXWLD31uh3KzlfJYOlYitpIFMsyowU,20033
 nv_ingest_api/internal/transform/split_text.py,sha256=-kwpRWSVZrPldm1hn3-tVz_TkzuKM-kPvNU3HTp9zOY,7476
 nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -123,7 +123,7 @@ nv_ingest_api/util/image_processing/__init__.py,sha256=Jiy8C1ZuSrNb_eBM1ZTV9IKFI
 nv_ingest_api/util/image_processing/clustering.py,sha256=sUGlZI4cx1q8h4Pns1N9JVpdfSM2BOH8zRmn9QFCtzI,9236
 nv_ingest_api/util/image_processing/processing.py,sha256=LSoDDEmahr7a-qSS12McVcowRe3dOrAZwa1h-PD_JPQ,6554
 nv_ingest_api/util/image_processing/table_and_chart.py,sha256=bxOu9PZYkG_WFCDGw_JLaO60S2pDSN8EOWK3xkIwr2A,14376
-nv_ingest_api/util/image_processing/transforms.py,sha256=Kz9hrizV314Hy7cRCYK9ZmhmBbVUOZ_z0HEpzZYcslQ,14081
+nv_ingest_api/util/image_processing/transforms.py,sha256=3-xeUerc2AaXJTYuR23EjwdtjRQ8F85pS5D9zxR4cLA,23452
 nv_ingest_api/util/imports/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/util/imports/callable_signatures.py,sha256=e2bJB1pmkN4Ee-Bf-VggOSBaQ4RXofWF5eKkWXgIj2U,1855
 nv_ingest_api/util/imports/dynamic_resolvers.py,sha256=7GByV_-8z2X0tnVoabCxVioxOP3sYMros3ZllVAW-wY,4343
@@ -135,12 +135,12 @@ nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=h9Q4q_
 nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py,sha256=3p-LRqG8qLnsfEhBNf73_DG22C08JKahTqUvPLS2Apg,2554
 nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py,sha256=fh7Q0wO5H_FtrHV1VdT6V66aZNqglOh_2XdkfLt8hgg,15722
 nv_ingest_api/util/metadata/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
-nv_ingest_api/util/metadata/aggregators.py,sha256=Y5JSKuLhhk_ldpzT3eRIcVg7QM7cTNhfQZn4g5bcbq4,15884
+nv_ingest_api/util/metadata/aggregators.py,sha256=YYdvJ1E04eGFZKKHUxXoH6mzLg8nor9Smvnv0qzqK5w,15988
 nv_ingest_api/util/multi_processing/__init__.py,sha256=4fojP8Rp_5Hu1YAkqGylqTyEZ-HBVVEunn5Z9I99swA,242
 nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3jywTO8rNlLZUniD4FFzwv84_giE,7372
 nv_ingest_api/util/nim/__init__.py,sha256=UqbiXFCqjWcjNvoduXd_0gOUOGBT8JvppiYHOmMyneA,1775
 nv_ingest_api/util/pdf/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
-nv_ingest_api/util/pdf/pdfium.py,sha256=Ch9Gh5jRLcBr3stjCckqWwTUL-T0sI50PlQnZHo_9NA,15761
+nv_ingest_api/util/pdf/pdfium.py,sha256=qTiTlSaiCk_rxm_eoQBoAFKq_5OQrioHVSbPbGDxVkE,15668
 nv_ingest_api/util/schema/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/util/schema/schema_validator.py,sha256=H0yZ_i_HZaiBRUCGmTBfRB9-hURhVqyd10aS_ynM1_0,321
 nv_ingest_api/util/service_clients/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -153,8 +153,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
 nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
 nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
-nv_ingest_api-2025.7.14.dev20250714.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-nv_ingest_api-2025.7.14.dev20250714.dist-info/METADATA,sha256=ZSDiSF9iqAtQvebMJ1Xp4Y_Uee8FqaZwEshVsywq_5I,13919
-nv_ingest_api-2025.7.14.dev20250714.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nv_ingest_api-2025.7.14.dev20250714.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
-nv_ingest_api-2025.7.14.dev20250714.dist-info/RECORD,,
+nv_ingest_api-2025.7.16.dev20250716.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+nv_ingest_api-2025.7.16.dev20250716.dist-info/METADATA,sha256=RaPAkQ4Dtkkrn6hi9Va1t2XDpDgRbe-bFqmCVL3IlEA,13947
+nv_ingest_api-2025.7.16.dev20250716.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nv_ingest_api-2025.7.16.dev20250716.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
+nv_ingest_api-2025.7.16.dev20250716.dist-info/RECORD,,

{nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/WHEEL RENAMED Viewed

File without changes

{nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/top_level.txt RENAMED Viewed

File without changes

nv-ingest-api 2025.7.14.dev20250714__py3-none-any.whl → 2025.7.16.dev20250716__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 2025.7.14.dev20250714py3-none-any.whl → 2025.7.16.dev20250716py3-none-any.whl