PyPI - nv-ingest-api - Versions diffs - 2025.7.17.dev20250717__py3-none-any.whl → 2025.7.19.dev20250719__py3-none-any.whl - Mend

nv-ingest-api 2025.7.17.dev20250717py3-none-any.whl → 2025.7.19.dev20250719py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (13) hide show

nv_ingest_api/internal/extract/image/chart_extractor.py CHANGED Viewed

@@ -13,7 +13,6 @@ from typing import Tuple
 import numpy as np
 import pandas as pd
-from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_version
 from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
 from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskChartExtraction
 from nv_ingest_api.util.image_processing.table_and_chart import join_yolox_graphic_elements_and_ocr_output
@@ -79,10 +78,13 @@ def _run_chart_inference(
     future_yolox_kwargs = dict(
         data=data_yolox,
-        model_name="yolox",
+        model_name="yolox_ensemble",
         stage_name="chart_extraction",
-        max_batch_size=8,
+        input_names=["INPUT_IMAGES", "THRESHOLDS"],
+        dtypes=["BYTES", "FP32"],
+        output_names=["OUTPUT"],
         trace_info=trace_info,
+        max_batch_size=8,
     )
     future_ocr_kwargs = dict(
         data=data_ocr,
@@ -211,24 +213,7 @@ def _create_clients(
     ocr_protocol: str,
     auth_token: str,
 ) -> Tuple[NimClient, NimClient]:
-    # Obtain yolox_version
-    # Assuming that the grpc endpoint is at index 0
-    yolox_http_endpoint = yolox_endpoints[1]
-    try:
-        yolox_version = get_version(yolox_http_endpoint)
-        if not yolox_version:
-            logger.warning(
-                "Failed to obtain yolox-page-elements version from the endpoint. Falling back to the latest version."
-            )
-            yolox_version = None  # Default to the latest version
-    except Exception:
-        logger.warning(
-            "Failed to get yolox-page-elements version after 30 seconds. Falling back to the latest version."
-        )
-        yolox_version = None  # Default to the latest version
-    yolox_model_interface = YoloxGraphicElementsModelInterface(yolox_version=yolox_version)
+    yolox_model_interface = YoloxGraphicElementsModelInterface()
     ocr_model_interface = OCRModelInterface()
     logger.debug(f"Inference protocols: yolox={yolox_protocol}, ocr={ocr_protocol}")

nv_ingest_api/internal/extract/image/image_helpers/common.py CHANGED Viewed

@@ -33,7 +33,6 @@ from PIL import Image
 from nv_ingest_api.internal.enums.common import AccessLevelEnum
 from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
     YoloxPageElementsModelInterface,
-    get_yolox_model_name,
 )
 from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageConfigSchema
 from nv_ingest_api.util.image_processing.transforms import crop_image, numpy_to_base64
@@ -202,11 +201,8 @@ def extract_page_elements_from_images(
     # Obtain yolox_version
     # Assuming that the http endpoint is at index 1
-    yolox_http_endpoint = config.yolox_endpoints[1]
-    yolox_model_name = get_yolox_model_name(yolox_http_endpoint)
     try:
-        model_interface = YoloxPageElementsModelInterface(yolox_model_name=yolox_model_name)
+        model_interface = YoloxPageElementsModelInterface()
         yolox_client = create_inference_client(
             config.yolox_endpoints,
             model_interface,
@@ -220,8 +216,11 @@ def extract_page_elements_from_images(
         # Perform inference in a single call. The NimClient handles batching internally.
         inference_results = yolox_client.infer(
             data,
-            model_name="yolox",
+            model_name="yolox_ensemble",
             max_batch_size=YOLOX_MAX_BATCH_SIZE,
+            input_names=["INPUT_IMAGES", "THRESHOLDS"],
+            dtypes=["BYTES", "FP32"],
+            output_names=["OUTPUT"],
             trace_info=trace_info,
             stage_name="pdf_extraction",
         )

nv_ingest_api/internal/extract/image/table_extractor.py CHANGED Viewed

@@ -77,9 +77,12 @@ def _run_inference(
         data_yolox = {"images": valid_arrays}
         future_yolox_kwargs = dict(
             data=data_yolox,
-            model_name="yolox",
+            model_name="yolox_ensemble",
             stage_name="table_extraction",
             max_batch_size=8,
+            input_names=["INPUT_IMAGES", "THRESHOLDS"],
+            dtypes=["BYTES", "FP32"],
+            output_names=["OUTPUT"],
             trace_info=trace_info,
         )
@@ -102,11 +105,10 @@ def _run_inference(
         )
     with ThreadPoolExecutor(max_workers=2) as executor:
+        future_ocr = executor.submit(ocr_client.infer, **future_ocr_kwargs)
         future_yolox = None
         if enable_yolox:
             future_yolox = executor.submit(yolox_client.infer, **future_yolox_kwargs)
-        future_ocr = executor.submit(ocr_client.infer, **future_ocr_kwargs)
         if enable_yolox:
             try:
                 yolox_results = future_yolox.result()

nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py CHANGED Viewed

@@ -258,6 +258,9 @@ def nemoretriever_parse_extractor(
                     nemoretriever_parse_config.yolox_endpoints,
                     nemoretriever_parse_config.yolox_infer_protocol,
                     nemoretriever_parse_config.auth_token,
+                    input_names=["INPUT_IMAGES", "THRESHOLDS"],
+                    dtypes=["BYTES", "FP32"],
+                    output_names=["OUTPUT"],
                     execution_trace_log=execution_trace_log,
                 )
                 futures.append(future_yolox)

nv_ingest_api/internal/extract/pdf/engines/pdfium.py CHANGED Viewed

@@ -29,9 +29,8 @@ from nv_ingest_api.internal.primitives.nim.default_values import YOLOX_MAX_BATCH
 from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
     YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
     YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
-    YOLOX_PAGE_IMAGE_FORMAT,
-    get_yolox_model_name,
     YoloxPageElementsModelInterface,
+    YOLOX_PAGE_IMAGE_FORMAT,
 )
 from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema
 from nv_ingest_api.internal.enums.common import TableFormatEnum, TextTypeEnum, AccessLevelEnum
@@ -58,7 +57,6 @@ logger = logging.getLogger(__name__)
 def _extract_page_elements_using_image_ensemble(
     pages: List[Tuple[int, np.ndarray, Tuple[int, int]]],
     yolox_client,
-    yolox_model_name: str = "yolox",
     execution_trace_log: Optional[List] = None,
 ) -> List[Tuple[int, object]]:
     """
@@ -72,8 +70,6 @@ def _extract_page_elements_using_image_ensemble(
         and optional padding offset information.
     yolox_client : object
         A pre-configured client instance for the YOLOX inference service.
-    yolox_model_name : str, default="yolox"
-        The name of the YOLOX model to use for inference.
     execution_trace_log : Optional[List], default=None
         List for accumulating execution trace information.
@@ -106,8 +102,11 @@ def _extract_page_elements_using_image_ensemble(
         # Perform inference using the NimClient.
         inference_results = yolox_client.infer(
             data,
-            model_name="yolox",
+            model_name="yolox_ensemble",
             max_batch_size=YOLOX_MAX_BATCH_SIZE,
+            input_names=["INPUT_IMAGES", "THRESHOLDS"],
+            dtypes=["BYTES", "FP32"],
+            output_names=["OUTPUT"],
             trace_info=execution_trace_log,
             stage_name="pdf_extraction",
         )
@@ -317,19 +316,7 @@ def _extract_page_elements(
     try:
         # Default model name
-        yolox_model_name = "yolox"
-        # Get the HTTP endpoint to determine the model name if needed
-        yolox_http_endpoint = yolox_endpoints[1]
-        if yolox_http_endpoint:
-            try:
-                yolox_model_name = get_yolox_model_name(yolox_http_endpoint)
-            except Exception as e:
-                logger.warning(f"Failed to get YOLOX model name from endpoint: {e}. Using default.")
-        # Create the model interface
-        model_interface = YoloxPageElementsModelInterface(yolox_model_name=yolox_model_name)
+        model_interface = YoloxPageElementsModelInterface()
         # Create the inference client
         yolox_client = create_inference_client(
             yolox_endpoints,
@@ -340,7 +327,7 @@ def _extract_page_elements(
         # Extract page elements using the client
         page_element_results = _extract_page_elements_using_image_ensemble(
-            pages, yolox_client, yolox_model_name, execution_trace_log=execution_trace_log
+            pages, yolox_client, execution_trace_log=execution_trace_log
         )
         # Process each extracted element based on extraction flags

nv_ingest_api/internal/primitives/nim/model_interface/helpers.py CHANGED Viewed

@@ -14,6 +14,7 @@ from nv_ingest_api.internal.primitives.nim.model_interface.decorators import mul
 from nv_ingest_api.util.image_processing.transforms import pad_image, normalize_image
 from nv_ingest_api.util.string_processing import generate_url, remove_url_endpoints
+cv2.setNumThreads(1)
 logger = logging.getLogger(__name__)

nv_ingest_api/internal/primitives/nim/model_interface/yolox.py CHANGED Viewed

@@ -12,15 +12,14 @@ from typing import List
 from typing import Optional
 from typing import Tuple
-import cv2
+import backoff
 import numpy as np
-import packaging
+import json
 import pandas as pd
-import torch
-import torchvision
 from nv_ingest_api.internal.primitives.nim import ModelInterface
-from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
+import tritonclient.grpc as grpcclient
+from nv_ingest_api.internal.primitives.nim.model_interface.decorators import multiprocessing_cache
 from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
 from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
@@ -35,15 +34,6 @@ YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
 YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
 YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
-# yolox-page-elements-v1 contants
-YOLOX_PAGE_V1_NUM_CLASSES = 4
-YOLOX_PAGE_V1_FINAL_SCORE = {"table": 0.48, "chart": 0.48}
-YOLOX_PAGE_V1_CLASS_LABELS = [
-    "table",
-    "chart",
-    "title",
-]
 # yolox-page-elements-v2 contants
 YOLOX_PAGE_V2_NUM_CLASSES = 4
 YOLOX_PAGE_V2_FINAL_SCORE = {"table": 0.1, "chart": 0.01, "infographic": 0.01}
@@ -63,11 +53,6 @@ YOLOX_GRAPHIC_MIN_SCORE = 0.1
 YOLOX_GRAPHIC_FINAL_SCORE = 0.0
 YOLOX_GRAPHIC_NIM_MAX_IMAGE_SIZE = 512_000
-# TODO(Devin): Legacy items aren't working right for me. Double check these.
-LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT = 1024
-LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH = 1024
-YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT = 1024
-YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH = 1024
 YOLOX_GRAPHIC_CLASS_LABELS = [
     "chart_title",
@@ -111,8 +96,6 @@ class YoloxModelInterfaceBase(ModelInterface):
     def __init__(
         self,
-        image_preproc_width: Optional[int] = None,
-        image_preproc_height: Optional[int] = None,
         nim_max_image_size: Optional[int] = None,
         num_classes: Optional[int] = None,
         conf_threshold: Optional[float] = None,
@@ -126,8 +109,6 @@ class YoloxModelInterfaceBase(ModelInterface):
         Parameters
         ----------
         """
-        self.image_preproc_width = image_preproc_width
-        self.image_preproc_height = image_preproc_height
         self.nim_max_image_size = nim_max_image_size
         self.num_classes = num_classes
         self.conf_threshold = conf_threshold
@@ -199,6 +180,7 @@ class YoloxModelInterfaceBase(ModelInterface):
         # Helper functions to chunk a list into sublists of length up to chunk_size.
         def chunk_list(lst: list, chunk_size: int) -> List[list]:
+            chunk_size = max(1, chunk_size)
             return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
         def chunk_list_geometrically(lst: list, max_size: int) -> List[list]:
@@ -206,29 +188,28 @@ class YoloxModelInterfaceBase(ModelInterface):
             chunks = []
             i = 0
             while i < len(lst):
-                chunk_size = min(2 ** int(log(len(lst) - i, 2)), max_size)
+                chunk_size = max(1, min(2 ** int(log(len(lst) - i, 2)), max_size))
                 chunks.append(lst[i : i + chunk_size])
                 i += chunk_size
             return chunks
         if protocol == "grpc":
-            logger.debug("Formatting input for gRPC Yolox model")
-            # Resize images for model input (Yolox expects 1024x1024).
-            resized_images = [
-                resize_image(image, (self.image_preproc_width, self.image_preproc_height)) for image in data["images"]
-            ]
-            # Chunk the resized images, the original images, and their shapes.
-            resized_chunks = chunk_list_geometrically(resized_images, max_batch_size)
+            logger.debug("Formatting input for gRPC Yolox Ensemble model")
+            b64_images = [numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT) for image in data["images"]]
+            b64_chunks = chunk_list_geometrically(b64_images, max_batch_size)
             original_chunks = chunk_list_geometrically(data["images"], max_batch_size)
             shape_chunks = chunk_list_geometrically(data["original_image_shapes"], max_batch_size)
             batched_inputs = []
             formatted_batch_data = []
-            for r_chunk, orig_chunk, shapes in zip(resized_chunks, original_chunks, shape_chunks):
-                # Reorder axes from (B, H, W, C) to (B, C, H, W) as expected by the model.
-                input_array = np.einsum("bijk->bkij", r_chunk).astype(np.float32)
-                batched_inputs.append(input_array)
+            for b64_chunk, orig_chunk, shapes in zip(b64_chunks, original_chunks, shape_chunks):
+                input_array = np.array(b64_chunk, dtype=np.object_)
+                current_batch_size = input_array.shape[0]
+                single_threshold_pair = [self.conf_threshold, self.iou_threshold]
+                thresholds = np.tile(single_threshold_pair, (current_batch_size, 1)).astype(np.float32)
+                batched_inputs.append([input_array, thresholds])
                 formatted_batch_data.append({"images": orig_chunk, "original_image_shapes": shapes})
             return batched_inputs, formatted_batch_data
         elif protocol == "http":
@@ -337,32 +318,20 @@ class YoloxModelInterfaceBase(ModelInterface):
         list[dict]
             A list of annotation dictionaries for each image in the batch.
         """
-        original_image_shapes = kwargs.get("original_image_shapes", [])
         if protocol == "http":
             # For http, the output already has postprocessing applied. Skip to table/chart expansion.
             results = output
         elif protocol == "grpc":
+            results = []
             # For grpc, apply the same NIM postprocessing.
-            pred = postprocess_model_prediction(
-                output,
-                self.num_classes,
-                self.conf_threshold,
-                self.iou_threshold,
-                class_agnostic=False,
-            )
-            results = postprocess_results(
-                pred,
-                original_image_shapes,
-                self.image_preproc_width,
-                self.image_preproc_height,
-                self.class_labels,
-                min_score=self.min_score,
-            )
+            for out in output:
+                if isinstance(out, bytes):
+                    out = out.decode("utf-8")
+                if isinstance(out, dict):
+                    continue
+                results.append(json.loads(out))
         inference_results = self.postprocess_annotations(results, **kwargs)
         return inference_results
     def postprocess_annotations(self, annotation_dicts, **kwargs):
@@ -396,22 +365,15 @@ class YoloxPageElementsModelInterface(YoloxModelInterfaceBase):
     An interface for handling inference with yolox-page-elements model, supporting both gRPC and HTTP protocols.
     """
-    def __init__(self, yolox_model_name: str = "nemoretriever-page-elements-v2"):
+    def __init__(self):
         """
         Initialize the yolox-page-elements model interface.
         """
-        if yolox_model_name.endswith("-v1"):
-            num_classes = YOLOX_PAGE_V1_NUM_CLASSES
-            final_score = YOLOX_PAGE_V1_FINAL_SCORE
-            class_labels = YOLOX_PAGE_V1_CLASS_LABELS
-        else:
-            num_classes = YOLOX_PAGE_V2_NUM_CLASSES
-            final_score = YOLOX_PAGE_V2_FINAL_SCORE
-            class_labels = YOLOX_PAGE_V2_CLASS_LABELS
+        num_classes = YOLOX_PAGE_V2_NUM_CLASSES
+        final_score = YOLOX_PAGE_V2_FINAL_SCORE
+        class_labels = YOLOX_PAGE_V2_CLASS_LABELS
         super().__init__(
-            image_preproc_width=YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
-            image_preproc_height=YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
             nim_max_image_size=YOLOX_PAGE_NIM_MAX_IMAGE_SIZE,
             num_classes=num_classes,
             conf_threshold=YOLOX_PAGE_CONF_THRESHOLD,
@@ -478,22 +440,11 @@ class YoloxGraphicElementsModelInterface(YoloxModelInterfaceBase):
     An interface for handling inference with yolox-graphic-elemenents model, supporting both gRPC and HTTP protocols.
     """
-    def __init__(self, yolox_version: Optional[str] = None):
+    def __init__(self):
         """
         Initialize the yolox-graphic-elements model interface.
         """
-        if yolox_version and (
-            packaging.version.Version(yolox_version) >= packaging.version.Version("1.2.0-rc5")  # gtc release
-        ):
-            image_preproc_width = YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH
-            image_preproc_height = YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT
-        else:
-            image_preproc_width = LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH
-            image_preproc_height = LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT
         super().__init__(
-            image_preproc_width=image_preproc_width,
-            image_preproc_height=image_preproc_height,
             nim_max_image_size=YOLOX_GRAPHIC_NIM_MAX_IMAGE_SIZE,
             num_classes=YOLOX_GRAPHIC_NUM_CLASSES,
             conf_threshold=YOLOX_GRAPHIC_CONF_THRESHOLD,
@@ -551,8 +502,6 @@ class YoloxTableStructureModelInterface(YoloxModelInterfaceBase):
         Initialize the yolox-graphic-elements model interface.
         """
         super().__init__(
-            image_preproc_width=YOLOX_TABLE_IMAGE_PREPROC_HEIGHT,
-            image_preproc_height=YOLOX_TABLE_IMAGE_PREPROC_HEIGHT,
             nim_max_image_size=YOLOX_TABLE_NIM_MAX_IMAGE_SIZE,
             num_classes=YOLOX_TABLE_NUM_CLASSES,
             conf_threshold=YOLOX_TABLE_CONF_THRESHOLD,
@@ -600,144 +549,6 @@ class YoloxTableStructureModelInterface(YoloxModelInterfaceBase):
         return inference_results
-def postprocess_model_prediction(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
-    # Convert numpy array to torch tensor
-    prediction = torch.from_numpy(prediction.copy())
-    # Compute box corners
-    box_corner = prediction.new(prediction.shape)
-    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
-    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
-    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
-    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
-    prediction[:, :, :4] = box_corner[:, :, :4]
-    output = [None for _ in range(len(prediction))]
-    for i, image_pred in enumerate(prediction):
-        # If no detections, continue to the next image
-        if not image_pred.size(0):
-            continue
-        # Ensure image_pred is 2D
-        if image_pred.ndim == 1:
-            image_pred = image_pred.unsqueeze(0)
-        # Get score and class with highest confidence
-        class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
-        # Confidence mask
-        squeezed_conf = class_conf.squeeze(dim=1)
-        conf_mask = image_pred[:, 4] * squeezed_conf >= conf_thre
-        # Apply confidence mask
-        detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
-        detections = detections[conf_mask]
-        if not detections.size(0):
-            continue
-        # Apply Non-Maximum Suppression (NMS)
-        if class_agnostic:
-            nms_out_index = torchvision.ops.nms(
-                detections[:, :4],
-                detections[:, 4] * detections[:, 5],
-                nms_thre,
-            )
-        else:
-            nms_out_index = torchvision.ops.batched_nms(
-                detections[:, :4],
-                detections[:, 4] * detections[:, 5],
-                detections[:, 6],
-                nms_thre,
-            )
-        detections = detections[nms_out_index]
-        # Append detections to output
-        output[i] = detections
-    return output
-def postprocess_results(
-    results, original_image_shapes, image_preproc_width, image_preproc_height, class_labels, min_score=0.0
-):
-    """
-    For each item (==image) in results, computes annotations in the form
-     {"table": [[0.0107, 0.0859, 0.7537, 0.1219, 0.9861], ...],
-      "figure": [...],
-      "title": [...]
-      }
-    where each list of 5 floats represents a bounding box in the format [x1, y1, x2, y2, confidence]
-    Keep only bboxes with high enough confidence.
-    """
-    out = []
-    for original_image_shape, result in zip(original_image_shapes, results):
-        annotation_dict = {label: [] for label in class_labels}
-        if result is None:
-            out.append(annotation_dict)
-            continue
-        try:
-            result = result.cpu().numpy()
-            scores = result[:, 4] * result[:, 5]
-            result = result[scores > min_score]
-            # ratio is used when image was padded
-            ratio = min(
-                image_preproc_width / original_image_shape[0],
-                image_preproc_height / original_image_shape[1],
-            )
-            bboxes = result[:, :4] / ratio
-            bboxes[:, [0, 2]] /= original_image_shape[1]
-            bboxes[:, [1, 3]] /= original_image_shape[0]
-            bboxes = np.clip(bboxes, 0.0, 1.0)
-            labels = result[:, 6]
-            scores = scores[scores > min_score]
-        except Exception as e:
-            raise ValueError(f"Error in postprocessing {result.shape} and {original_image_shape}: {e}")
-        for box, score, label in zip(bboxes, scores, labels):
-            # TODO(Devin): Sometimes we get back unexpected class labels?
-            if (label < 0) or (label >= len(class_labels)):
-                logger.warning(f"Invalid class label {label} found in postprocessing")
-                continue
-            else:
-                class_name = class_labels[int(label)]
-            annotation_dict[class_name].append([round(float(x), 4) for x in np.concatenate((box, [score]))])
-        out.append(annotation_dict)
-    return out
-def resize_image(image, target_img_size):
-    w, h, _ = np.array(image).shape
-    if target_img_size is not None:  # Resize + Pad
-        r = min(target_img_size[0] / w, target_img_size[1] / h)
-        image = cv2.resize(
-            image,
-            (int(h * r), int(w * r)),
-            interpolation=cv2.INTER_LINEAR,
-        ).astype(np.uint8)
-        image = np.pad(
-            image,
-            ((0, target_img_size[0] - image.shape[0]), (0, target_img_size[1] - image.shape[1]), (0, 0)),
-            mode="constant",
-            constant_values=114,
-        )
-    return image
 def expand_table_bboxes(annotation_dict, labels=None):
     """
     Additional preprocessing for tables: extend the upper bounds to capture titles if any.
@@ -1383,14 +1194,16 @@ def get_bbox_dict_yolox_table(preds, shape, class_labels, threshold=0.1, delta=0
     return bbox_dict
-def get_yolox_model_name(yolox_http_endpoint, default_model_name="nemoretriever-page-elements-v2"):
+@multiprocessing_cache(max_calls=100)  # Cache results first to avoid redundant retries from backoff
+@backoff.on_predicate(backoff.expo, max_time=30)
+def get_yolox_model_name(yolox_grpc_endpoint, default_model_name="yolox"):
     try:
-        yolox_model_name = get_model_name(yolox_http_endpoint, default_model_name)
-        if not yolox_model_name:
-            logger.warning(
-                "Failed to obtain yolox-page-elements model name from the endpoint. "
-                f"Falling back to '{default_model_name}'."
-            )
+        client = grpcclient.InferenceServerClient(yolox_grpc_endpoint)
+        model_index = client.get_model_repository_index(as_json=True)
+        model_names = [x["name"] for x in model_index.get("models", [])]
+        if "yolox_ensemble" in model_names:
+            yolox_model_name = "yolox_ensemble"
+        else:
             yolox_model_name = default_model_name
     except Exception:
         logger.warning(

nv_ingest_api/internal/primitives/nim/nim_client.py CHANGED Viewed

@@ -8,7 +8,7 @@ import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any
 from typing import Optional
-from typing import Tuple
+from typing import Tuple, Union
 import numpy as np
 import requests
@@ -90,6 +90,10 @@ class NimClient:
     def _fetch_max_batch_size(self, model_name, model_version: str = "") -> int:
         """Fetch the maximum batch size from the Triton model configuration in a thread-safe manner."""
+        if model_name == "yolox_ensemble":
+            model_name = "yolox"
         if model_name in self._max_batch_sizes:
             return self._max_batch_sizes[model_name]
@@ -178,7 +182,7 @@ class NimClient:
             max_requested_batch_size = kwargs.pop("max_batch_size", batch_size)
             force_requested_batch_size = kwargs.pop("force_max_batch_size", False)
             max_batch_size = (
-                min(batch_size, max_requested_batch_size)
+                max(1, min(batch_size, max_requested_batch_size))
                 if not force_requested_batch_size
                 else max_requested_batch_size
             )
@@ -233,7 +237,9 @@ class NimClient:
         return all_results
-    def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
+    def _grpc_infer(
+        self, formatted_input: Union[list, list[np.ndarray]], model_name: str, **kwargs
+    ) -> Union[list, list[np.ndarray]]:
         """
         Perform inference using the gRPC protocol.

{nv_ingest_api-2025.7.17.dev20250717.dist-info → nv_ingest_api-2025.7.19.dev20250719.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nv-ingest-api
-Version: 2025.7.17.dev20250717
+Version: 2025.7.19.dev20250719
 Summary: Python module with core document ingestion functions.
 Author-email: Jeremy Dyer <jdyer@nvidia.com>
 License:                                  Apache License

{nv_ingest_api-2025.7.17.dev20250717.dist-info → nv_ingest_api-2025.7.19.dev20250719.dist-info}/RECORD RENAMED Viewed

@@ -20,19 +20,19 @@ nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py,sha
 nv_ingest_api/internal/extract/html/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/extract/html/html_extractor.py,sha256=I9oWfj6_As4898GDDh0zsSuKxO3lBsvyYzhvUotjzJI,3282
 nv_ingest_api/internal/extract/image/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=KLCSkLUczIlaqIQxyXzKQnwm-HQqTJKMPafnTobOEQs,13873
+nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=gk-O-9wjZBoaLVE_6Erb4gMwsSFk4UtPQ2QLpMCW4H4,13212
 nv_ingest_api/internal/extract/image/image_extractor.py,sha256=4tUWinuFMN3ukWa2tZa2_LtzRiTyUAUCBF6BDkUEvm0,8705
 nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=i7zt_ow1gytU4hK2JCRg7T1wlbokaeuUpXX69LIQkzY,9687
-nv_ingest_api/internal/extract/image/table_extractor.py,sha256=djTRYgGpwhqWBaVFOerh8J6kVH-xGbUtIelcOFecx4o,13641
+nv_ingest_api/internal/extract/image/table_extractor.py,sha256=O0m3N2Tz9W6X7TBI4o-rbBXc8dFOf9zSZq1v9qC1U4M,13780
 nv_ingest_api/internal/extract/image/image_helpers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=P8rcl4YPyeWeMJg7u1yejD3k9EnDVEbJgfYEnJ4WO5c,15025
+nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=80jRhGzisHvQ9Ky3MKUMM7soKUmvZ5LqRVzwNYjgdPY,14988
 nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfSQ9T-Bn87beBkCOuGGjxGt8,2934
 nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
 nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
 nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
-nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=IVbNcH_phMiRSxnkZ04pGfQrPJ-x1zVR3hXyhxv7juc,22977
-nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=hUqLzQgCJOZIVXrP0JLH4jXLKDHQkXmLXh1Nc4KI3nI,23494
+nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=VrIfhDXzYVOwvuoQ9dkiYX-y0CHYhy_Ludadq0YpHqY,23132
+nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=SKmias2iZmAE6Q8WXxmFEjvLOZy-vXRoaRIPpi7Tuhs,22962
 nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
 nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
 nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=4bvN6LsPksLicI6jM0JqbJFiOZNHEcuc8MVVW4XfgV8,5875
@@ -48,19 +48,19 @@ nv_ingest_api/internal/primitives/control_message_task.py,sha256=nWVB3QsP6p8BKwH
 nv_ingest_api/internal/primitives/ingest_control_message.py,sha256=rvipBiiUaHuRhupFCFDCG8rv0PylSJibCiJ7rDeb98A,8514
 nv_ingest_api/internal/primitives/nim/__init__.py,sha256=i_i_fBR2EcRCh2Y19DF6GM3s_Q0VPgo_thPnhEIJUyg,266
 nv_ingest_api/internal/primitives/nim/default_values.py,sha256=W92XjfyeC6uuVxut6J7p00x1kpNsnXIDb97gSVytZJk,380
-nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=rtzqwHpYsEPuzL7aGIdke5P3_Gu1Z8MbxKDIuJA-L8I,16336
+nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=3jXCNIjVTILsATooxA5Yj0EIyyFo5PrzUzmhT4iYxNI,16481
 nv_ingest_api/internal/primitives/nim/nim_model_interface.py,sha256=wMEgoi79YQn_4338MVemkeZgM1J-vnz0aZWpvqDhib4,2392
 nv_ingest_api/internal/primitives/nim/model_interface/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1ExW5V6pXC1ZiHdobeG_BmbPr3rBbVJef13s,11003
 nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubkHs4WjnexM6rI0wkjWCsrVNEbA4Wjk2oKL9OYCU,1499
 nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
-nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=wgcwZJnIn3gKsqe4qhUa9V0gYp3NkIFV8R1qW7Zag1w,11571
+nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=iyGxAr4tG2UZ7LtXXoWO_kF-KsObhPrmZ46Nl0Mi-Ag,11592
 nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=WysjDZeegclO3mZgVcGOwzWbr8wSI4pWRiYD4iC2EXo,7098
 nv_ingest_api/internal/primitives/nim/model_interface/ocr.py,sha256=Vhim3py_rc5jA0BoKubwfekEqOwxUUePzcmc59pRuOk,21458
 nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=5PqD2JuHY2rwd-6SSB4axr2Dd79vm95sAEkcmI3U7ME,12977
 nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=lFhppNqrq5X_fzbCWKphvZQMzaJd3gHrkWsyJORzFrU,5010
 nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
-nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=nsfDQgeupBe9Tdf3S5sfNpYcObEwVlzCZdfg1ObAW88,49584
+nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=zpfEZIPctWhNfREnP6e77zffU8vs_RfnMprBj-2jXXk,42847
 nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/internal/primitives/tracing/latency.py,sha256=5kVTeYRbRdTlT_aI4MeS20N_S7mqCcLqZR6YHtxhXkY,2215
 nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagYDmkkA6rTXmQ-bmtLjoEguhg,3851
@@ -153,8 +153,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
 nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
 nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
-nv_ingest_api-2025.7.17.dev20250717.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-nv_ingest_api-2025.7.17.dev20250717.dist-info/METADATA,sha256=NkhXN1NC8IKy8OWMV5uJHFdcHY8BOj0dQ2IwnvidETk,13947
-nv_ingest_api-2025.7.17.dev20250717.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nv_ingest_api-2025.7.17.dev20250717.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
-nv_ingest_api-2025.7.17.dev20250717.dist-info/RECORD,,
+nv_ingest_api-2025.7.19.dev20250719.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+nv_ingest_api-2025.7.19.dev20250719.dist-info/METADATA,sha256=SnVoo7ElFj94vFFFOFpqqPZWpq35LEkZuFZk0rhpxRw,13947
+nv_ingest_api-2025.7.19.dev20250719.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nv_ingest_api-2025.7.19.dev20250719.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
+nv_ingest_api-2025.7.19.dev20250719.dist-info/RECORD,,

{nv_ingest_api-2025.7.17.dev20250717.dist-info → nv_ingest_api-2025.7.19.dev20250719.dist-info}/WHEEL RENAMED Viewed

File without changes

{nv_ingest_api-2025.7.17.dev20250717.dist-info → nv_ingest_api-2025.7.19.dev20250719.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nv_ingest_api-2025.7.17.dev20250717.dist-info → nv_ingest_api-2025.7.19.dev20250719.dist-info}/top_level.txt RENAMED Viewed

File without changes

nv-ingest-api 2025.7.17.dev20250717__py3-none-any.whl → 2025.7.19.dev20250719__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 2025.7.17.dev20250717py3-none-any.whl → 2025.7.19.dev20250719py3-none-any.whl