PyPI - nv-ingest-api - Versions diffs - 25.7.6.dev20250706__py3-none-any.whl → 25.8.0rc1__py3-none-any.whl - Mend

nv-ingest-api 25.7.6.dev20250706py3-none-any.whl → 25.8.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (33) hide show

nv_ingest_api/internal/primitives/nim/model_interface/yolox.py CHANGED Viewed

@@ -2,9 +2,7 @@
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-import base64
-import io
+import os
 import logging
 import warnings
 from math import log
@@ -14,17 +12,16 @@ from typing import List
 from typing import Optional
 from typing import Tuple
-import cv2
+import backoff
 import numpy as np
-import packaging
+import json
 import pandas as pd
-import torch
-import torchvision
-from PIL import Image
 from nv_ingest_api.internal.primitives.nim import ModelInterface
-from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
+import tritonclient.grpc as grpcclient
+from nv_ingest_api.internal.primitives.nim.model_interface.decorators import multiprocessing_cache
 from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
+from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
 logger = logging.getLogger(__name__)
@@ -35,15 +32,7 @@ YOLOX_PAGE_MIN_SCORE = 0.1
 YOLOX_PAGE_NIM_MAX_IMAGE_SIZE = 512_000
 YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
 YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
-# yolox-page-elements-v1 contants
-YOLOX_PAGE_V1_NUM_CLASSES = 4
-YOLOX_PAGE_V1_FINAL_SCORE = {"table": 0.48, "chart": 0.48}
-YOLOX_PAGE_V1_CLASS_LABELS = [
-    "table",
-    "chart",
-    "title",
-]
+YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
 # yolox-page-elements-v2 contants
 YOLOX_PAGE_V2_NUM_CLASSES = 4
@@ -64,11 +53,6 @@ YOLOX_GRAPHIC_MIN_SCORE = 0.1
 YOLOX_GRAPHIC_FINAL_SCORE = 0.0
 YOLOX_GRAPHIC_NIM_MAX_IMAGE_SIZE = 512_000
-# TODO(Devin): Legacy items aren't working right for me. Double check these.
-LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT = 1024
-LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH = 1024
-YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT = 1024
-YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH = 1024
 YOLOX_GRAPHIC_CLASS_LABELS = [
     "chart_title",
@@ -112,8 +96,6 @@ class YoloxModelInterfaceBase(ModelInterface):
     def __init__(
         self,
-        image_preproc_width: Optional[int] = None,
-        image_preproc_height: Optional[int] = None,
         nim_max_image_size: Optional[int] = None,
         num_classes: Optional[int] = None,
         conf_threshold: Optional[float] = None,
@@ -127,8 +109,6 @@ class YoloxModelInterfaceBase(ModelInterface):
         Parameters
         ----------
         """
-        self.image_preproc_width = image_preproc_width
-        self.image_preproc_height = image_preproc_height
         self.nim_max_image_size = nim_max_image_size
         self.num_classes = num_classes
         self.conf_threshold = conf_threshold
@@ -200,6 +180,7 @@ class YoloxModelInterfaceBase(ModelInterface):
         # Helper functions to chunk a list into sublists of length up to chunk_size.
         def chunk_list(lst: list, chunk_size: int) -> List[list]:
+            chunk_size = max(1, chunk_size)
             return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
         def chunk_list_geometrically(lst: list, max_size: int) -> List[list]:
@@ -207,29 +188,28 @@ class YoloxModelInterfaceBase(ModelInterface):
             chunks = []
             i = 0
             while i < len(lst):
-                chunk_size = min(2 ** int(log(len(lst) - i, 2)), max_size)
+                chunk_size = max(1, min(2 ** int(log(len(lst) - i, 2)), max_size))
                 chunks.append(lst[i : i + chunk_size])
                 i += chunk_size
             return chunks
         if protocol == "grpc":
-            logger.debug("Formatting input for gRPC Yolox model")
-            # Resize images for model input (Yolox expects 1024x1024).
-            resized_images = [
-                resize_image(image, (self.image_preproc_width, self.image_preproc_height)) for image in data["images"]
-            ]
-            # Chunk the resized images, the original images, and their shapes.
-            resized_chunks = chunk_list_geometrically(resized_images, max_batch_size)
+            logger.debug("Formatting input for gRPC Yolox Ensemble model")
+            b64_images = [numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT) for image in data["images"]]
+            b64_chunks = chunk_list_geometrically(b64_images, max_batch_size)
             original_chunks = chunk_list_geometrically(data["images"], max_batch_size)
             shape_chunks = chunk_list_geometrically(data["original_image_shapes"], max_batch_size)
             batched_inputs = []
             formatted_batch_data = []
-            for r_chunk, orig_chunk, shapes in zip(resized_chunks, original_chunks, shape_chunks):
-                # Reorder axes from (B, H, W, C) to (B, C, H, W) as expected by the model.
-                input_array = np.einsum("bijk->bkij", r_chunk).astype(np.float32)
-                batched_inputs.append(input_array)
+            for b64_chunk, orig_chunk, shapes in zip(b64_chunks, original_chunks, shape_chunks):
+                input_array = np.array(b64_chunk, dtype=np.object_)
+                current_batch_size = input_array.shape[0]
+                single_threshold_pair = [self.conf_threshold, self.iou_threshold]
+                thresholds = np.tile(single_threshold_pair, (current_batch_size, 1)).astype(np.float32)
+                batched_inputs.append([input_array, thresholds])
                 formatted_batch_data.append({"images": orig_chunk, "original_image_shapes": shapes})
             return batched_inputs, formatted_batch_data
         elif protocol == "http":
@@ -239,15 +219,11 @@ class YoloxModelInterfaceBase(ModelInterface):
                 # Convert to uint8 if needed.
                 if image.dtype != np.uint8:
                     image = (image * 255).astype(np.uint8)
-                # Convert the numpy array to a PIL Image.
-                image_pil = Image.fromarray(image)
-                original_size = image_pil.size
-                # Save the image to a buffer and encode to base64.
-                buffered = io.BytesIO()
-                image_pil.save(buffered, format="PNG")
-                image_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+                # Get original size directly from numpy array (width, height)
+                original_size = (image.shape[1], image.shape[0])
+                # Convert numpy array directly to base64 using OpenCV
+                image_b64 = numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT)
                 # Scale the image if necessary.
                 scaled_image_b64, new_size = scale_image_to_encoding_size(
                     image_b64, max_base64_size=self.nim_max_image_size
@@ -342,32 +318,20 @@ class YoloxModelInterfaceBase(ModelInterface):
         list[dict]
             A list of annotation dictionaries for each image in the batch.
         """
-        original_image_shapes = kwargs.get("original_image_shapes", [])
         if protocol == "http":
             # For http, the output already has postprocessing applied. Skip to table/chart expansion.
             results = output
         elif protocol == "grpc":
+            results = []
             # For grpc, apply the same NIM postprocessing.
-            pred = postprocess_model_prediction(
-                output,
-                self.num_classes,
-                self.conf_threshold,
-                self.iou_threshold,
-                class_agnostic=False,
-            )
-            results = postprocess_results(
-                pred,
-                original_image_shapes,
-                self.image_preproc_width,
-                self.image_preproc_height,
-                self.class_labels,
-                min_score=self.min_score,
-            )
+            for out in output:
+                if isinstance(out, bytes):
+                    out = out.decode("utf-8")
+                if isinstance(out, dict):
+                    continue
+                results.append(json.loads(out))
         inference_results = self.postprocess_annotations(results, **kwargs)
         return inference_results
     def postprocess_annotations(self, annotation_dicts, **kwargs):
@@ -401,22 +365,15 @@ class YoloxPageElementsModelInterface(YoloxModelInterfaceBase):
     An interface for handling inference with yolox-page-elements model, supporting both gRPC and HTTP protocols.
     """
-    def __init__(self, yolox_model_name: str = "nemoretriever-page-elements-v2"):
+    def __init__(self):
         """
         Initialize the yolox-page-elements model interface.
         """
-        if yolox_model_name.endswith("-v1"):
-            num_classes = YOLOX_PAGE_V1_NUM_CLASSES
-            final_score = YOLOX_PAGE_V1_FINAL_SCORE
-            class_labels = YOLOX_PAGE_V1_CLASS_LABELS
-        else:
-            num_classes = YOLOX_PAGE_V2_NUM_CLASSES
-            final_score = YOLOX_PAGE_V2_FINAL_SCORE
-            class_labels = YOLOX_PAGE_V2_CLASS_LABELS
+        num_classes = YOLOX_PAGE_V2_NUM_CLASSES
+        final_score = YOLOX_PAGE_V2_FINAL_SCORE
+        class_labels = YOLOX_PAGE_V2_CLASS_LABELS
         super().__init__(
-            image_preproc_width=YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
-            image_preproc_height=YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
             nim_max_image_size=YOLOX_PAGE_NIM_MAX_IMAGE_SIZE,
             num_classes=num_classes,
             conf_threshold=YOLOX_PAGE_CONF_THRESHOLD,
@@ -483,22 +440,11 @@ class YoloxGraphicElementsModelInterface(YoloxModelInterfaceBase):
     An interface for handling inference with yolox-graphic-elemenents model, supporting both gRPC and HTTP protocols.
     """
-    def __init__(self, yolox_version: Optional[str] = None):
+    def __init__(self):
         """
         Initialize the yolox-graphic-elements model interface.
         """
-        if yolox_version and (
-            packaging.version.Version(yolox_version) >= packaging.version.Version("1.2.0-rc5")  # gtc release
-        ):
-            image_preproc_width = YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH
-            image_preproc_height = YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT
-        else:
-            image_preproc_width = LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH
-            image_preproc_height = LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT
         super().__init__(
-            image_preproc_width=image_preproc_width,
-            image_preproc_height=image_preproc_height,
             nim_max_image_size=YOLOX_GRAPHIC_NIM_MAX_IMAGE_SIZE,
             num_classes=YOLOX_GRAPHIC_NUM_CLASSES,
             conf_threshold=YOLOX_GRAPHIC_CONF_THRESHOLD,
@@ -556,8 +502,6 @@ class YoloxTableStructureModelInterface(YoloxModelInterfaceBase):
         Initialize the yolox-graphic-elements model interface.
         """
         super().__init__(
-            image_preproc_width=YOLOX_TABLE_IMAGE_PREPROC_HEIGHT,
-            image_preproc_height=YOLOX_TABLE_IMAGE_PREPROC_HEIGHT,
             nim_max_image_size=YOLOX_TABLE_NIM_MAX_IMAGE_SIZE,
             num_classes=YOLOX_TABLE_NUM_CLASSES,
             conf_threshold=YOLOX_TABLE_CONF_THRESHOLD,
@@ -605,144 +549,6 @@ class YoloxTableStructureModelInterface(YoloxModelInterfaceBase):
         return inference_results
-def postprocess_model_prediction(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
-    # Convert numpy array to torch tensor
-    prediction = torch.from_numpy(prediction.copy())
-    # Compute box corners
-    box_corner = prediction.new(prediction.shape)
-    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
-    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
-    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
-    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
-    prediction[:, :, :4] = box_corner[:, :, :4]
-    output = [None for _ in range(len(prediction))]
-    for i, image_pred in enumerate(prediction):
-        # If no detections, continue to the next image
-        if not image_pred.size(0):
-            continue
-        # Ensure image_pred is 2D
-        if image_pred.ndim == 1:
-            image_pred = image_pred.unsqueeze(0)
-        # Get score and class with highest confidence
-        class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
-        # Confidence mask
-        squeezed_conf = class_conf.squeeze(dim=1)
-        conf_mask = image_pred[:, 4] * squeezed_conf >= conf_thre
-        # Apply confidence mask
-        detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
-        detections = detections[conf_mask]
-        if not detections.size(0):
-            continue
-        # Apply Non-Maximum Suppression (NMS)
-        if class_agnostic:
-            nms_out_index = torchvision.ops.nms(
-                detections[:, :4],
-                detections[:, 4] * detections[:, 5],
-                nms_thre,
-            )
-        else:
-            nms_out_index = torchvision.ops.batched_nms(
-                detections[:, :4],
-                detections[:, 4] * detections[:, 5],
-                detections[:, 6],
-                nms_thre,
-            )
-        detections = detections[nms_out_index]
-        # Append detections to output
-        output[i] = detections
-    return output
-def postprocess_results(
-    results, original_image_shapes, image_preproc_width, image_preproc_height, class_labels, min_score=0.0
-):
-    """
-    For each item (==image) in results, computes annotations in the form
-     {"table": [[0.0107, 0.0859, 0.7537, 0.1219, 0.9861], ...],
-      "figure": [...],
-      "title": [...]
-      }
-    where each list of 5 floats represents a bounding box in the format [x1, y1, x2, y2, confidence]
-    Keep only bboxes with high enough confidence.
-    """
-    out = []
-    for original_image_shape, result in zip(original_image_shapes, results):
-        annotation_dict = {label: [] for label in class_labels}
-        if result is None:
-            out.append(annotation_dict)
-            continue
-        try:
-            result = result.cpu().numpy()
-            scores = result[:, 4] * result[:, 5]
-            result = result[scores > min_score]
-            # ratio is used when image was padded
-            ratio = min(
-                image_preproc_width / original_image_shape[0],
-                image_preproc_height / original_image_shape[1],
-            )
-            bboxes = result[:, :4] / ratio
-            bboxes[:, [0, 2]] /= original_image_shape[1]
-            bboxes[:, [1, 3]] /= original_image_shape[0]
-            bboxes = np.clip(bboxes, 0.0, 1.0)
-            labels = result[:, 6]
-            scores = scores[scores > min_score]
-        except Exception as e:
-            raise ValueError(f"Error in postprocessing {result.shape} and {original_image_shape}: {e}")
-        for box, score, label in zip(bboxes, scores, labels):
-            # TODO(Devin): Sometimes we get back unexpected class labels?
-            if (label < 0) or (label >= len(class_labels)):
-                logger.warning(f"Invalid class label {label} found in postprocessing")
-                continue
-            else:
-                class_name = class_labels[int(label)]
-            annotation_dict[class_name].append([round(float(x), 4) for x in np.concatenate((box, [score]))])
-        out.append(annotation_dict)
-    return out
-def resize_image(image, target_img_size):
-    w, h, _ = np.array(image).shape
-    if target_img_size is not None:  # Resize + Pad
-        r = min(target_img_size[0] / w, target_img_size[1] / h)
-        image = cv2.resize(
-            image,
-            (int(h * r), int(w * r)),
-            interpolation=cv2.INTER_LINEAR,
-        ).astype(np.uint8)
-        image = np.pad(
-            image,
-            ((0, target_img_size[0] - image.shape[0]), (0, target_img_size[1] - image.shape[1]), (0, 0)),
-            mode="constant",
-            constant_values=114,
-        )
-    return image
 def expand_table_bboxes(annotation_dict, labels=None):
     """
     Additional preprocessing for tables: extend the upper bounds to capture titles if any.
@@ -1388,14 +1194,16 @@ def get_bbox_dict_yolox_table(preds, shape, class_labels, threshold=0.1, delta=0
     return bbox_dict
-def get_yolox_model_name(yolox_http_endpoint, default_model_name="nemoretriever-page-elements-v2"):
+@multiprocessing_cache(max_calls=100)  # Cache results first to avoid redundant retries from backoff
+@backoff.on_predicate(backoff.expo, max_time=30)
+def get_yolox_model_name(yolox_grpc_endpoint, default_model_name="yolox"):
     try:
-        yolox_model_name = get_model_name(yolox_http_endpoint, default_model_name)
-        if not yolox_model_name:
-            logger.warning(
-                "Failed to obtain yolox-page-elements model name from the endpoint. "
-                f"Falling back to '{default_model_name}'."
-            )
+        client = grpcclient.InferenceServerClient(yolox_grpc_endpoint)
+        model_index = client.get_model_repository_index(as_json=True)
+        model_names = [x["name"] for x in model_index.get("models", [])]
+        if "yolox_ensemble" in model_names:
+            yolox_model_name = "yolox_ensemble"
+        else:
             yolox_model_name = default_model_name
     except Exception:
         logger.warning(

nv_ingest_api/internal/primitives/nim/nim_client.py CHANGED Viewed

@@ -5,10 +5,10 @@
 import logging
 import threading
 import time
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any
 from typing import Optional
-from typing import Tuple
+from typing import Tuple, Union
 import numpy as np
 import requests
@@ -33,6 +33,7 @@ class NimClient:
         auth_token: Optional[str] = None,
         timeout: float = 120.0,
         max_retries: int = 5,
+        max_429_retries: int = 5,
     ):
         """
         Initialize the NimClient with the specified model interface, protocol, and server endpoints.
@@ -49,6 +50,10 @@ class NimClient:
             Authorization token for HTTP requests (default: None).
         timeout : float, optional
             Timeout for HTTP requests in seconds (default: 30.0).
+        max_retries : int, optional
+            The maximum number of retries for non-429 server-side errors (default: 5).
+        max_429_retries : int, optional
+            The maximum number of retries specifically for 429 errors (default: 10).
         Raises
         ------
@@ -62,6 +67,7 @@ class NimClient:
         self.auth_token = auth_token
         self.timeout = timeout  # Timeout for HTTP requests
         self.max_retries = max_retries
+        self.max_429_retries = max_429_retries
         self._grpc_endpoint, self._http_endpoint = endpoints
         self._max_batch_sizes = {}
         self._lock = threading.Lock()
@@ -84,6 +90,10 @@ class NimClient:
     def _fetch_max_batch_size(self, model_name, model_version: str = "") -> int:
         """Fetch the maximum batch size from the Triton model configuration in a thread-safe manner."""
+        if model_name == "yolox_ensemble":
+            model_name = "yolox"
         if model_name in self._max_batch_sizes:
             return self._max_batch_sizes[model_name]
@@ -138,7 +148,9 @@ class NimClient:
         else:
             raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.")
-        parsed_output = self.model_interface.parse_output(response, protocol=self.protocol, data=batch_data, **kwargs)
+        parsed_output = self.model_interface.parse_output(
+            response, protocol=self.protocol, data=batch_data, model_name=model_name, **kwargs
+        )
         return parsed_output, batch_data
     def try_set_max_batch_size(self, model_name, model_version: str = ""):
@@ -167,10 +179,10 @@ class NimClient:
         try:
             # 1. Retrieve or default to the model's maximum batch size.
             batch_size = self._fetch_max_batch_size(model_name)
-            max_requested_batch_size = kwargs.get("max_batch_size", batch_size)
-            force_requested_batch_size = kwargs.get("force_max_batch_size", False)
+            max_requested_batch_size = kwargs.pop("max_batch_size", batch_size)
+            force_requested_batch_size = kwargs.pop("force_max_batch_size", False)
             max_batch_size = (
-                min(batch_size, max_requested_batch_size)
+                max(1, min(batch_size, max_requested_batch_size))
                 if not force_requested_batch_size
                 else max_requested_batch_size
             )
@@ -180,7 +192,11 @@ class NimClient:
             # 3. Format the input based on protocol.
             formatted_batches, formatted_batch_data = self.model_interface.format_input(
-                data, protocol=self.protocol, max_batch_size=max_batch_size, model_name=model_name
+                data,
+                protocol=self.protocol,
+                max_batch_size=max_batch_size,
+                model_name=model_name,
+                **kwargs,
             )
             # Check for a custom maximum pool worker count, and remove it from kwargs.
@@ -190,13 +206,15 @@ class NimClient:
             #    We enumerate the batches so that we can later reassemble results in order.
             results = [None] * len(formatted_batches)
             with ThreadPoolExecutor(max_workers=max_pool_workers) as executor:
-                futures = []
+                future_to_idx = {}
                 for idx, (batch, batch_data) in enumerate(zip(formatted_batches, formatted_batch_data)):
                     future = executor.submit(
                         self._process_batch, batch, batch_data=batch_data, model_name=model_name, **kwargs
                     )
-                    futures.append((idx, future))
-                for idx, future in futures:
+                    future_to_idx[future] = idx
+                for future in as_completed(future_to_idx.keys()):
+                    idx = future_to_idx[future]
                     results[idx] = future.result()
             # 5. Process the parsed outputs for each batch using its corresponding batch_data.
@@ -221,7 +239,9 @@ class NimClient:
         return all_results
-    def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
+    def _grpc_infer(
+        self, formatted_input: Union[list, list[np.ndarray]], model_name: str, **kwargs
+    ) -> Union[list, list[np.ndarray]]:
         """
         Perform inference using the gRPC protocol.
@@ -237,19 +257,27 @@ class NimClient:
         np.ndarray
             The output of the model as a numpy array.
         """
+        if not isinstance(formatted_input, list):
+            formatted_input = [formatted_input]
         parameters = kwargs.get("parameters", {})
-        output_names = kwargs.get("outputs", ["output"])
-        dtype = kwargs.get("dtype", "FP32")
-        input_name = kwargs.get("input_name", "input")
+        output_names = kwargs.get("output_names", ["output"])
+        dtypes = kwargs.get("dtypes", ["FP32"])
+        input_names = kwargs.get("input_names", ["input"])
+        input_tensors = []
+        for input_name, input_data, dtype in zip(input_names, formatted_input, dtypes):
+            input_tensors.append(grpcclient.InferInput(input_name, input_data.shape, datatype=dtype))
-        input_tensors = grpcclient.InferInput(input_name, formatted_input.shape, datatype=dtype)
-        input_tensors.set_data_from_numpy(formatted_input)
+        for idx, input_data in enumerate(formatted_input):
+            input_tensors[idx].set_data_from_numpy(input_data)
         outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
         response = self.client.infer(
-            model_name=model_name, parameters=parameters, inputs=[input_tensors], outputs=outputs
+            model_name=model_name, parameters=parameters, inputs=input_tensors, outputs=outputs
         )
         logger.debug(f"gRPC inference response: {response}")
         if len(outputs) == 1:
@@ -281,6 +309,7 @@ class NimClient:
         base_delay = 2.0
         attempt = 0
+        retries_429 = 0
         while attempt < self.max_retries:
             try:
@@ -291,7 +320,21 @@ class NimClient:
                 # Check for server-side or rate-limit type errors
                 # e.g. 5xx => server error, 429 => too many requests
-                if status_code == 429 or status_code == 503 or (500 <= status_code < 600):
+                if status_code == 429:
+                    retries_429 += 1
+                    logger.warning(
+                        f"Received HTTP 429 (Too Many Requests) from {self.model_interface.name()}. "
+                        f"Attempt {retries_429} of {self.max_429_retries}."
+                    )
+                    if retries_429 >= self.max_429_retries:
+                        logger.error("Max retries for HTTP 429 exceeded.")
+                        response.raise_for_status()
+                    else:
+                        backoff_time = base_delay * (2**retries_429)
+                        time.sleep(backoff_time)
+                        continue  # Retry without incrementing the main attempt counter
+                if status_code == 503 or (500 <= status_code < 600):
                     logger.warning(
                         f"Received HTTP {status_code} ({response.reason}) from "
                         f"{self.model_interface.name()}. Attempt {attempt + 1} of {self.max_retries}."

nv_ingest_api/internal/schemas/extract/extract_chart_schema.py CHANGED Viewed

@@ -24,8 +24,8 @@ class ChartExtractorConfigSchema(BaseModel):
         A tuple containing the gRPC and HTTP services for the yolox endpoint.
         Either the gRPC or HTTP service can be empty, but not both.
-    paddle_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
-        A tuple containing the gRPC and HTTP services for the paddle endpoint.
+    ocr_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
+        A tuple containing the gRPC and HTTP services for the ocr endpoint.
         Either the gRPC or HTTP service can be empty, but not both.
     Methods
@@ -49,8 +49,8 @@ class ChartExtractorConfigSchema(BaseModel):
     yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
     yolox_infer_protocol: str = ""
-    paddle_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
-    paddle_infer_protocol: str = ""
+    ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
+    ocr_infer_protocol: str = ""
     nim_batch_size: int = 2
     workers_per_progress_engine: int = 5
@@ -86,7 +86,7 @@ class ChartExtractorConfigSchema(BaseModel):
                 return None
             return service
-        for endpoint_name in ["yolox_endpoints", "paddle_endpoints"]:
+        for endpoint_name in ["yolox_endpoints", "ocr_endpoints"]:
             grpc_service, http_service = values.get(endpoint_name, (None, None))
             grpc_service = clean_service(grpc_service)
             http_service = clean_service(http_service)
@@ -117,7 +117,7 @@ class ChartExtractorSchema(BaseModel):
         A flag indicating whether to raise an exception if a failure occurs during chart extraction.
     extraction_config: Optional[ChartExtractorConfigSchema], default=None
-        Configuration for the chart extraction stage, including yolox and paddle service endpoints.
+        Configuration for the chart extraction stage, including yolox and ocr service endpoints.
     """
     max_queue_size: int = 1

nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py CHANGED Viewed

@@ -20,8 +20,8 @@ class InfographicExtractorConfigSchema(BaseModel):
     auth_token : Optional[str], default=None
         Authentication token required for secure services.
-    paddle_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
-        A tuple containing the gRPC and HTTP services for the paddle endpoint.
+    ocr_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
+        A tuple containing the gRPC and HTTP services for the ocr endpoint.
         Either the gRPC or HTTP service can be empty, but not both.
     Methods
@@ -42,8 +42,8 @@ class InfographicExtractorConfigSchema(BaseModel):
     auth_token: Optional[str] = None
-    paddle_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
-    paddle_infer_protocol: str = ""
+    ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
+    ocr_infer_protocol: str = ""
     nim_batch_size: int = 2
     workers_per_progress_engine: int = 5
@@ -79,7 +79,7 @@ class InfographicExtractorConfigSchema(BaseModel):
                 return None
             return service
-        for endpoint_name in ["paddle_endpoints"]:
+        for endpoint_name in ["ocr_endpoints"]:
             grpc_service, http_service = values.get(endpoint_name, (None, None))
             grpc_service = clean_service(grpc_service)
             http_service = clean_service(http_service)
@@ -110,7 +110,7 @@ class InfographicExtractorSchema(BaseModel):
         A flag indicating whether to raise an exception if a failure occurs during infographic extraction.
     stage_config : Optional[InfographicExtractorConfigSchema], default=None
-        Configuration for the infographic extraction stage, including yolox and paddle service endpoints.
+        Configuration for the infographic extraction stage, including yolox and ocr service endpoints.
     """
     max_queue_size: int = 1

nv-ingest-api 25.7.6.dev20250706__py3-none-any.whl → 25.8.0rc1__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 25.7.6.dev20250706py3-none-any.whl → 25.8.0rc1py3-none-any.whl