nv-ingest-api 2025.7.17.dev20250717__py3-none-any.whl → 2025.7.19.dev20250719__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

@@ -13,7 +13,6 @@ from typing import Tuple
13
13
  import numpy as np
14
14
  import pandas as pd
15
15
 
16
- from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_version
17
16
  from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
18
17
  from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskChartExtraction
19
18
  from nv_ingest_api.util.image_processing.table_and_chart import join_yolox_graphic_elements_and_ocr_output
@@ -79,10 +78,13 @@ def _run_chart_inference(
79
78
 
80
79
  future_yolox_kwargs = dict(
81
80
  data=data_yolox,
82
- model_name="yolox",
81
+ model_name="yolox_ensemble",
83
82
  stage_name="chart_extraction",
84
- max_batch_size=8,
83
+ input_names=["INPUT_IMAGES", "THRESHOLDS"],
84
+ dtypes=["BYTES", "FP32"],
85
+ output_names=["OUTPUT"],
85
86
  trace_info=trace_info,
87
+ max_batch_size=8,
86
88
  )
87
89
  future_ocr_kwargs = dict(
88
90
  data=data_ocr,
@@ -211,24 +213,7 @@ def _create_clients(
211
213
  ocr_protocol: str,
212
214
  auth_token: str,
213
215
  ) -> Tuple[NimClient, NimClient]:
214
- # Obtain yolox_version
215
- # Assuming that the grpc endpoint is at index 0
216
- yolox_http_endpoint = yolox_endpoints[1]
217
-
218
- try:
219
- yolox_version = get_version(yolox_http_endpoint)
220
- if not yolox_version:
221
- logger.warning(
222
- "Failed to obtain yolox-page-elements version from the endpoint. Falling back to the latest version."
223
- )
224
- yolox_version = None # Default to the latest version
225
- except Exception:
226
- logger.warning(
227
- "Failed to get yolox-page-elements version after 30 seconds. Falling back to the latest version."
228
- )
229
- yolox_version = None # Default to the latest version
230
-
231
- yolox_model_interface = YoloxGraphicElementsModelInterface(yolox_version=yolox_version)
216
+ yolox_model_interface = YoloxGraphicElementsModelInterface()
232
217
  ocr_model_interface = OCRModelInterface()
233
218
 
234
219
  logger.debug(f"Inference protocols: yolox={yolox_protocol}, ocr={ocr_protocol}")
@@ -33,7 +33,6 @@ from PIL import Image
33
33
  from nv_ingest_api.internal.enums.common import AccessLevelEnum
34
34
  from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
35
35
  YoloxPageElementsModelInterface,
36
- get_yolox_model_name,
37
36
  )
38
37
  from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageConfigSchema
39
38
  from nv_ingest_api.util.image_processing.transforms import crop_image, numpy_to_base64
@@ -202,11 +201,8 @@ def extract_page_elements_from_images(
202
201
 
203
202
  # Obtain yolox_version
204
203
  # Assuming that the http endpoint is at index 1
205
- yolox_http_endpoint = config.yolox_endpoints[1]
206
- yolox_model_name = get_yolox_model_name(yolox_http_endpoint)
207
-
208
204
  try:
209
- model_interface = YoloxPageElementsModelInterface(yolox_model_name=yolox_model_name)
205
+ model_interface = YoloxPageElementsModelInterface()
210
206
  yolox_client = create_inference_client(
211
207
  config.yolox_endpoints,
212
208
  model_interface,
@@ -220,8 +216,11 @@ def extract_page_elements_from_images(
220
216
  # Perform inference in a single call. The NimClient handles batching internally.
221
217
  inference_results = yolox_client.infer(
222
218
  data,
223
- model_name="yolox",
219
+ model_name="yolox_ensemble",
224
220
  max_batch_size=YOLOX_MAX_BATCH_SIZE,
221
+ input_names=["INPUT_IMAGES", "THRESHOLDS"],
222
+ dtypes=["BYTES", "FP32"],
223
+ output_names=["OUTPUT"],
225
224
  trace_info=trace_info,
226
225
  stage_name="pdf_extraction",
227
226
  )
@@ -77,9 +77,12 @@ def _run_inference(
77
77
  data_yolox = {"images": valid_arrays}
78
78
  future_yolox_kwargs = dict(
79
79
  data=data_yolox,
80
- model_name="yolox",
80
+ model_name="yolox_ensemble",
81
81
  stage_name="table_extraction",
82
82
  max_batch_size=8,
83
+ input_names=["INPUT_IMAGES", "THRESHOLDS"],
84
+ dtypes=["BYTES", "FP32"],
85
+ output_names=["OUTPUT"],
83
86
  trace_info=trace_info,
84
87
  )
85
88
 
@@ -102,11 +105,10 @@ def _run_inference(
102
105
  )
103
106
 
104
107
  with ThreadPoolExecutor(max_workers=2) as executor:
108
+ future_ocr = executor.submit(ocr_client.infer, **future_ocr_kwargs)
105
109
  future_yolox = None
106
110
  if enable_yolox:
107
111
  future_yolox = executor.submit(yolox_client.infer, **future_yolox_kwargs)
108
- future_ocr = executor.submit(ocr_client.infer, **future_ocr_kwargs)
109
-
110
112
  if enable_yolox:
111
113
  try:
112
114
  yolox_results = future_yolox.result()
@@ -258,6 +258,9 @@ def nemoretriever_parse_extractor(
258
258
  nemoretriever_parse_config.yolox_endpoints,
259
259
  nemoretriever_parse_config.yolox_infer_protocol,
260
260
  nemoretriever_parse_config.auth_token,
261
+ input_names=["INPUT_IMAGES", "THRESHOLDS"],
262
+ dtypes=["BYTES", "FP32"],
263
+ output_names=["OUTPUT"],
261
264
  execution_trace_log=execution_trace_log,
262
265
  )
263
266
  futures.append(future_yolox)
@@ -29,9 +29,8 @@ from nv_ingest_api.internal.primitives.nim.default_values import YOLOX_MAX_BATCH
29
29
  from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
30
30
  YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
31
31
  YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
32
- YOLOX_PAGE_IMAGE_FORMAT,
33
- get_yolox_model_name,
34
32
  YoloxPageElementsModelInterface,
33
+ YOLOX_PAGE_IMAGE_FORMAT,
35
34
  )
36
35
  from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema
37
36
  from nv_ingest_api.internal.enums.common import TableFormatEnum, TextTypeEnum, AccessLevelEnum
@@ -58,7 +57,6 @@ logger = logging.getLogger(__name__)
58
57
  def _extract_page_elements_using_image_ensemble(
59
58
  pages: List[Tuple[int, np.ndarray, Tuple[int, int]]],
60
59
  yolox_client,
61
- yolox_model_name: str = "yolox",
62
60
  execution_trace_log: Optional[List] = None,
63
61
  ) -> List[Tuple[int, object]]:
64
62
  """
@@ -72,8 +70,6 @@ def _extract_page_elements_using_image_ensemble(
72
70
  and optional padding offset information.
73
71
  yolox_client : object
74
72
  A pre-configured client instance for the YOLOX inference service.
75
- yolox_model_name : str, default="yolox"
76
- The name of the YOLOX model to use for inference.
77
73
  execution_trace_log : Optional[List], default=None
78
74
  List for accumulating execution trace information.
79
75
 
@@ -106,8 +102,11 @@ def _extract_page_elements_using_image_ensemble(
106
102
  # Perform inference using the NimClient.
107
103
  inference_results = yolox_client.infer(
108
104
  data,
109
- model_name="yolox",
105
+ model_name="yolox_ensemble",
110
106
  max_batch_size=YOLOX_MAX_BATCH_SIZE,
107
+ input_names=["INPUT_IMAGES", "THRESHOLDS"],
108
+ dtypes=["BYTES", "FP32"],
109
+ output_names=["OUTPUT"],
111
110
  trace_info=execution_trace_log,
112
111
  stage_name="pdf_extraction",
113
112
  )
@@ -317,19 +316,7 @@ def _extract_page_elements(
317
316
 
318
317
  try:
319
318
  # Default model name
320
- yolox_model_name = "yolox"
321
-
322
- # Get the HTTP endpoint to determine the model name if needed
323
- yolox_http_endpoint = yolox_endpoints[1]
324
- if yolox_http_endpoint:
325
- try:
326
- yolox_model_name = get_yolox_model_name(yolox_http_endpoint)
327
- except Exception as e:
328
- logger.warning(f"Failed to get YOLOX model name from endpoint: {e}. Using default.")
329
-
330
- # Create the model interface
331
- model_interface = YoloxPageElementsModelInterface(yolox_model_name=yolox_model_name)
332
-
319
+ model_interface = YoloxPageElementsModelInterface()
333
320
  # Create the inference client
334
321
  yolox_client = create_inference_client(
335
322
  yolox_endpoints,
@@ -340,7 +327,7 @@ def _extract_page_elements(
340
327
 
341
328
  # Extract page elements using the client
342
329
  page_element_results = _extract_page_elements_using_image_ensemble(
343
- pages, yolox_client, yolox_model_name, execution_trace_log=execution_trace_log
330
+ pages, yolox_client, execution_trace_log=execution_trace_log
344
331
  )
345
332
 
346
333
  # Process each extracted element based on extraction flags
@@ -14,6 +14,7 @@ from nv_ingest_api.internal.primitives.nim.model_interface.decorators import mul
14
14
  from nv_ingest_api.util.image_processing.transforms import pad_image, normalize_image
15
15
  from nv_ingest_api.util.string_processing import generate_url, remove_url_endpoints
16
16
 
17
+ cv2.setNumThreads(1)
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
20
 
@@ -12,15 +12,14 @@ from typing import List
12
12
  from typing import Optional
13
13
  from typing import Tuple
14
14
 
15
- import cv2
15
+ import backoff
16
16
  import numpy as np
17
- import packaging
17
+ import json
18
18
  import pandas as pd
19
- import torch
20
- import torchvision
21
19
 
22
20
  from nv_ingest_api.internal.primitives.nim import ModelInterface
23
- from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
21
+ import tritonclient.grpc as grpcclient
22
+ from nv_ingest_api.internal.primitives.nim.model_interface.decorators import multiprocessing_cache
24
23
  from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
25
24
  from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
26
25
 
@@ -35,15 +34,6 @@ YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
35
34
  YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
36
35
  YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
37
36
 
38
- # yolox-page-elements-v1 contants
39
- YOLOX_PAGE_V1_NUM_CLASSES = 4
40
- YOLOX_PAGE_V1_FINAL_SCORE = {"table": 0.48, "chart": 0.48}
41
- YOLOX_PAGE_V1_CLASS_LABELS = [
42
- "table",
43
- "chart",
44
- "title",
45
- ]
46
-
47
37
  # yolox-page-elements-v2 contants
48
38
  YOLOX_PAGE_V2_NUM_CLASSES = 4
49
39
  YOLOX_PAGE_V2_FINAL_SCORE = {"table": 0.1, "chart": 0.01, "infographic": 0.01}
@@ -63,11 +53,6 @@ YOLOX_GRAPHIC_MIN_SCORE = 0.1
63
53
  YOLOX_GRAPHIC_FINAL_SCORE = 0.0
64
54
  YOLOX_GRAPHIC_NIM_MAX_IMAGE_SIZE = 512_000
65
55
 
66
- # TODO(Devin): Legacy items aren't working right for me. Double check these.
67
- LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT = 1024
68
- LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH = 1024
69
- YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT = 1024
70
- YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH = 1024
71
56
 
72
57
  YOLOX_GRAPHIC_CLASS_LABELS = [
73
58
  "chart_title",
@@ -111,8 +96,6 @@ class YoloxModelInterfaceBase(ModelInterface):
111
96
 
112
97
  def __init__(
113
98
  self,
114
- image_preproc_width: Optional[int] = None,
115
- image_preproc_height: Optional[int] = None,
116
99
  nim_max_image_size: Optional[int] = None,
117
100
  num_classes: Optional[int] = None,
118
101
  conf_threshold: Optional[float] = None,
@@ -126,8 +109,6 @@ class YoloxModelInterfaceBase(ModelInterface):
126
109
  Parameters
127
110
  ----------
128
111
  """
129
- self.image_preproc_width = image_preproc_width
130
- self.image_preproc_height = image_preproc_height
131
112
  self.nim_max_image_size = nim_max_image_size
132
113
  self.num_classes = num_classes
133
114
  self.conf_threshold = conf_threshold
@@ -199,6 +180,7 @@ class YoloxModelInterfaceBase(ModelInterface):
199
180
 
200
181
  # Helper functions to chunk a list into sublists of length up to chunk_size.
201
182
  def chunk_list(lst: list, chunk_size: int) -> List[list]:
183
+ chunk_size = max(1, chunk_size)
202
184
  return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
203
185
 
204
186
  def chunk_list_geometrically(lst: list, max_size: int) -> List[list]:
@@ -206,29 +188,28 @@ class YoloxModelInterfaceBase(ModelInterface):
206
188
  chunks = []
207
189
  i = 0
208
190
  while i < len(lst):
209
- chunk_size = min(2 ** int(log(len(lst) - i, 2)), max_size)
191
+ chunk_size = max(1, min(2 ** int(log(len(lst) - i, 2)), max_size))
210
192
  chunks.append(lst[i : i + chunk_size])
211
193
  i += chunk_size
212
194
  return chunks
213
195
 
214
196
  if protocol == "grpc":
215
- logger.debug("Formatting input for gRPC Yolox model")
216
- # Resize images for model input (Yolox expects 1024x1024).
217
- resized_images = [
218
- resize_image(image, (self.image_preproc_width, self.image_preproc_height)) for image in data["images"]
219
- ]
220
- # Chunk the resized images, the original images, and their shapes.
221
- resized_chunks = chunk_list_geometrically(resized_images, max_batch_size)
197
+ logger.debug("Formatting input for gRPC Yolox Ensemble model")
198
+ b64_images = [numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT) for image in data["images"]]
199
+ b64_chunks = chunk_list_geometrically(b64_images, max_batch_size)
222
200
  original_chunks = chunk_list_geometrically(data["images"], max_batch_size)
223
201
  shape_chunks = chunk_list_geometrically(data["original_image_shapes"], max_batch_size)
224
202
 
225
203
  batched_inputs = []
226
204
  formatted_batch_data = []
227
- for r_chunk, orig_chunk, shapes in zip(resized_chunks, original_chunks, shape_chunks):
228
- # Reorder axes from (B, H, W, C) to (B, C, H, W) as expected by the model.
229
- input_array = np.einsum("bijk->bkij", r_chunk).astype(np.float32)
230
- batched_inputs.append(input_array)
205
+ for b64_chunk, orig_chunk, shapes in zip(b64_chunks, original_chunks, shape_chunks):
206
+ input_array = np.array(b64_chunk, dtype=np.object_)
207
+ current_batch_size = input_array.shape[0]
208
+ single_threshold_pair = [self.conf_threshold, self.iou_threshold]
209
+ thresholds = np.tile(single_threshold_pair, (current_batch_size, 1)).astype(np.float32)
210
+ batched_inputs.append([input_array, thresholds])
231
211
  formatted_batch_data.append({"images": orig_chunk, "original_image_shapes": shapes})
212
+
232
213
  return batched_inputs, formatted_batch_data
233
214
 
234
215
  elif protocol == "http":
@@ -337,32 +318,20 @@ class YoloxModelInterfaceBase(ModelInterface):
337
318
  list[dict]
338
319
  A list of annotation dictionaries for each image in the batch.
339
320
  """
340
- original_image_shapes = kwargs.get("original_image_shapes", [])
341
-
342
321
  if protocol == "http":
343
322
  # For http, the output already has postprocessing applied. Skip to table/chart expansion.
344
323
  results = output
345
324
 
346
325
  elif protocol == "grpc":
326
+ results = []
347
327
  # For grpc, apply the same NIM postprocessing.
348
- pred = postprocess_model_prediction(
349
- output,
350
- self.num_classes,
351
- self.conf_threshold,
352
- self.iou_threshold,
353
- class_agnostic=False,
354
- )
355
- results = postprocess_results(
356
- pred,
357
- original_image_shapes,
358
- self.image_preproc_width,
359
- self.image_preproc_height,
360
- self.class_labels,
361
- min_score=self.min_score,
362
- )
363
-
328
+ for out in output:
329
+ if isinstance(out, bytes):
330
+ out = out.decode("utf-8")
331
+ if isinstance(out, dict):
332
+ continue
333
+ results.append(json.loads(out))
364
334
  inference_results = self.postprocess_annotations(results, **kwargs)
365
-
366
335
  return inference_results
367
336
 
368
337
  def postprocess_annotations(self, annotation_dicts, **kwargs):
@@ -396,22 +365,15 @@ class YoloxPageElementsModelInterface(YoloxModelInterfaceBase):
396
365
  An interface for handling inference with yolox-page-elements model, supporting both gRPC and HTTP protocols.
397
366
  """
398
367
 
399
- def __init__(self, yolox_model_name: str = "nemoretriever-page-elements-v2"):
368
+ def __init__(self):
400
369
  """
401
370
  Initialize the yolox-page-elements model interface.
402
371
  """
403
- if yolox_model_name.endswith("-v1"):
404
- num_classes = YOLOX_PAGE_V1_NUM_CLASSES
405
- final_score = YOLOX_PAGE_V1_FINAL_SCORE
406
- class_labels = YOLOX_PAGE_V1_CLASS_LABELS
407
- else:
408
- num_classes = YOLOX_PAGE_V2_NUM_CLASSES
409
- final_score = YOLOX_PAGE_V2_FINAL_SCORE
410
- class_labels = YOLOX_PAGE_V2_CLASS_LABELS
372
+ num_classes = YOLOX_PAGE_V2_NUM_CLASSES
373
+ final_score = YOLOX_PAGE_V2_FINAL_SCORE
374
+ class_labels = YOLOX_PAGE_V2_CLASS_LABELS
411
375
 
412
376
  super().__init__(
413
- image_preproc_width=YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
414
- image_preproc_height=YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
415
377
  nim_max_image_size=YOLOX_PAGE_NIM_MAX_IMAGE_SIZE,
416
378
  num_classes=num_classes,
417
379
  conf_threshold=YOLOX_PAGE_CONF_THRESHOLD,
@@ -478,22 +440,11 @@ class YoloxGraphicElementsModelInterface(YoloxModelInterfaceBase):
478
440
  An interface for handling inference with yolox-graphic-elemenents model, supporting both gRPC and HTTP protocols.
479
441
  """
480
442
 
481
- def __init__(self, yolox_version: Optional[str] = None):
443
+ def __init__(self):
482
444
  """
483
445
  Initialize the yolox-graphic-elements model interface.
484
446
  """
485
- if yolox_version and (
486
- packaging.version.Version(yolox_version) >= packaging.version.Version("1.2.0-rc5") # gtc release
487
- ):
488
- image_preproc_width = YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH
489
- image_preproc_height = YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT
490
- else:
491
- image_preproc_width = LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_WIDTH
492
- image_preproc_height = LEGACY_YOLOX_GRAPHIC_IMAGE_PREPROC_HEIGHT
493
-
494
447
  super().__init__(
495
- image_preproc_width=image_preproc_width,
496
- image_preproc_height=image_preproc_height,
497
448
  nim_max_image_size=YOLOX_GRAPHIC_NIM_MAX_IMAGE_SIZE,
498
449
  num_classes=YOLOX_GRAPHIC_NUM_CLASSES,
499
450
  conf_threshold=YOLOX_GRAPHIC_CONF_THRESHOLD,
@@ -551,8 +502,6 @@ class YoloxTableStructureModelInterface(YoloxModelInterfaceBase):
551
502
  Initialize the yolox-graphic-elements model interface.
552
503
  """
553
504
  super().__init__(
554
- image_preproc_width=YOLOX_TABLE_IMAGE_PREPROC_HEIGHT,
555
- image_preproc_height=YOLOX_TABLE_IMAGE_PREPROC_HEIGHT,
556
505
  nim_max_image_size=YOLOX_TABLE_NIM_MAX_IMAGE_SIZE,
557
506
  num_classes=YOLOX_TABLE_NUM_CLASSES,
558
507
  conf_threshold=YOLOX_TABLE_CONF_THRESHOLD,
@@ -600,144 +549,6 @@ class YoloxTableStructureModelInterface(YoloxModelInterfaceBase):
600
549
  return inference_results
601
550
 
602
551
 
603
- def postprocess_model_prediction(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
604
- # Convert numpy array to torch tensor
605
- prediction = torch.from_numpy(prediction.copy())
606
-
607
- # Compute box corners
608
- box_corner = prediction.new(prediction.shape)
609
- box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
610
- box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
611
- box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
612
- box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
613
- prediction[:, :, :4] = box_corner[:, :, :4]
614
-
615
- output = [None for _ in range(len(prediction))]
616
-
617
- for i, image_pred in enumerate(prediction):
618
- # If no detections, continue to the next image
619
- if not image_pred.size(0):
620
- continue
621
-
622
- # Ensure image_pred is 2D
623
- if image_pred.ndim == 1:
624
- image_pred = image_pred.unsqueeze(0)
625
-
626
- # Get score and class with highest confidence
627
- class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
628
-
629
- # Confidence mask
630
- squeezed_conf = class_conf.squeeze(dim=1)
631
- conf_mask = image_pred[:, 4] * squeezed_conf >= conf_thre
632
-
633
- # Apply confidence mask
634
- detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
635
- detections = detections[conf_mask]
636
-
637
- if not detections.size(0):
638
- continue
639
-
640
- # Apply Non-Maximum Suppression (NMS)
641
- if class_agnostic:
642
- nms_out_index = torchvision.ops.nms(
643
- detections[:, :4],
644
- detections[:, 4] * detections[:, 5],
645
- nms_thre,
646
- )
647
- else:
648
- nms_out_index = torchvision.ops.batched_nms(
649
- detections[:, :4],
650
- detections[:, 4] * detections[:, 5],
651
- detections[:, 6],
652
- nms_thre,
653
- )
654
- detections = detections[nms_out_index]
655
-
656
- # Append detections to output
657
- output[i] = detections
658
-
659
- return output
660
-
661
-
662
- def postprocess_results(
663
- results, original_image_shapes, image_preproc_width, image_preproc_height, class_labels, min_score=0.0
664
- ):
665
- """
666
- For each item (==image) in results, computes annotations in the form
667
-
668
- {"table": [[0.0107, 0.0859, 0.7537, 0.1219, 0.9861], ...],
669
- "figure": [...],
670
- "title": [...]
671
- }
672
- where each list of 5 floats represents a bounding box in the format [x1, y1, x2, y2, confidence]
673
-
674
- Keep only bboxes with high enough confidence.
675
- """
676
- out = []
677
-
678
- for original_image_shape, result in zip(original_image_shapes, results):
679
- annotation_dict = {label: [] for label in class_labels}
680
-
681
- if result is None:
682
- out.append(annotation_dict)
683
- continue
684
-
685
- try:
686
- result = result.cpu().numpy()
687
- scores = result[:, 4] * result[:, 5]
688
- result = result[scores > min_score]
689
-
690
- # ratio is used when image was padded
691
- ratio = min(
692
- image_preproc_width / original_image_shape[0],
693
- image_preproc_height / original_image_shape[1],
694
- )
695
- bboxes = result[:, :4] / ratio
696
-
697
- bboxes[:, [0, 2]] /= original_image_shape[1]
698
- bboxes[:, [1, 3]] /= original_image_shape[0]
699
- bboxes = np.clip(bboxes, 0.0, 1.0)
700
-
701
- labels = result[:, 6]
702
- scores = scores[scores > min_score]
703
- except Exception as e:
704
- raise ValueError(f"Error in postprocessing {result.shape} and {original_image_shape}: {e}")
705
-
706
- for box, score, label in zip(bboxes, scores, labels):
707
- # TODO(Devin): Sometimes we get back unexpected class labels?
708
- if (label < 0) or (label >= len(class_labels)):
709
- logger.warning(f"Invalid class label {label} found in postprocessing")
710
- continue
711
- else:
712
- class_name = class_labels[int(label)]
713
-
714
- annotation_dict[class_name].append([round(float(x), 4) for x in np.concatenate((box, [score]))])
715
-
716
- out.append(annotation_dict)
717
-
718
- return out
719
-
720
-
721
- def resize_image(image, target_img_size):
722
- w, h, _ = np.array(image).shape
723
-
724
- if target_img_size is not None: # Resize + Pad
725
- r = min(target_img_size[0] / w, target_img_size[1] / h)
726
- image = cv2.resize(
727
- image,
728
- (int(h * r), int(w * r)),
729
- interpolation=cv2.INTER_LINEAR,
730
- ).astype(np.uint8)
731
- image = np.pad(
732
- image,
733
- ((0, target_img_size[0] - image.shape[0]), (0, target_img_size[1] - image.shape[1]), (0, 0)),
734
- mode="constant",
735
- constant_values=114,
736
- )
737
-
738
- return image
739
-
740
-
741
552
  def expand_table_bboxes(annotation_dict, labels=None):
742
553
  """
743
554
  Additional preprocessing for tables: extend the upper bounds to capture titles if any.
@@ -1383,14 +1194,16 @@ def get_bbox_dict_yolox_table(preds, shape, class_labels, threshold=0.1, delta=0
1383
1194
  return bbox_dict
1384
1195
 
1385
1196
 
1386
- def get_yolox_model_name(yolox_http_endpoint, default_model_name="nemoretriever-page-elements-v2"):
1197
+ @multiprocessing_cache(max_calls=100) # Cache results first to avoid redundant retries from backoff
1198
+ @backoff.on_predicate(backoff.expo, max_time=30)
1199
+ def get_yolox_model_name(yolox_grpc_endpoint, default_model_name="yolox"):
1387
1200
  try:
1388
- yolox_model_name = get_model_name(yolox_http_endpoint, default_model_name)
1389
- if not yolox_model_name:
1390
- logger.warning(
1391
- "Failed to obtain yolox-page-elements model name from the endpoint. "
1392
- f"Falling back to '{default_model_name}'."
1393
- )
1201
+ client = grpcclient.InferenceServerClient(yolox_grpc_endpoint)
1202
+ model_index = client.get_model_repository_index(as_json=True)
1203
+ model_names = [x["name"] for x in model_index.get("models", [])]
1204
+ if "yolox_ensemble" in model_names:
1205
+ yolox_model_name = "yolox_ensemble"
1206
+ else:
1394
1207
  yolox_model_name = default_model_name
1395
1208
  except Exception:
1396
1209
  logger.warning(
@@ -8,7 +8,7 @@ import time
8
8
  from concurrent.futures import ThreadPoolExecutor
9
9
  from typing import Any
10
10
  from typing import Optional
11
- from typing import Tuple
11
+ from typing import Tuple, Union
12
12
 
13
13
  import numpy as np
14
14
  import requests
@@ -90,6 +90,10 @@ class NimClient:
90
90
 
91
91
  def _fetch_max_batch_size(self, model_name, model_version: str = "") -> int:
92
92
  """Fetch the maximum batch size from the Triton model configuration in a thread-safe manner."""
93
+
94
+ if model_name == "yolox_ensemble":
95
+ model_name = "yolox"
96
+
93
97
  if model_name in self._max_batch_sizes:
94
98
  return self._max_batch_sizes[model_name]
95
99
 
@@ -178,7 +182,7 @@ class NimClient:
178
182
  max_requested_batch_size = kwargs.pop("max_batch_size", batch_size)
179
183
  force_requested_batch_size = kwargs.pop("force_max_batch_size", False)
180
184
  max_batch_size = (
181
- min(batch_size, max_requested_batch_size)
185
+ max(1, min(batch_size, max_requested_batch_size))
182
186
  if not force_requested_batch_size
183
187
  else max_requested_batch_size
184
188
  )
@@ -233,7 +237,9 @@ class NimClient:
233
237
 
234
238
  return all_results
235
239
 
236
- def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
240
+ def _grpc_infer(
241
+ self, formatted_input: Union[list, list[np.ndarray]], model_name: str, **kwargs
242
+ ) -> Union[list, list[np.ndarray]]:
237
243
  """
238
244
  Perform inference using the gRPC protocol.
239
245
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.7.17.dev20250717
3
+ Version: 2025.7.19.dev20250719
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -20,19 +20,19 @@ nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py,sha
20
20
  nv_ingest_api/internal/extract/html/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
21
21
  nv_ingest_api/internal/extract/html/html_extractor.py,sha256=I9oWfj6_As4898GDDh0zsSuKxO3lBsvyYzhvUotjzJI,3282
22
22
  nv_ingest_api/internal/extract/image/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
23
- nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=KLCSkLUczIlaqIQxyXzKQnwm-HQqTJKMPafnTobOEQs,13873
23
+ nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=gk-O-9wjZBoaLVE_6Erb4gMwsSFk4UtPQ2QLpMCW4H4,13212
24
24
  nv_ingest_api/internal/extract/image/image_extractor.py,sha256=4tUWinuFMN3ukWa2tZa2_LtzRiTyUAUCBF6BDkUEvm0,8705
25
25
  nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=i7zt_ow1gytU4hK2JCRg7T1wlbokaeuUpXX69LIQkzY,9687
26
- nv_ingest_api/internal/extract/image/table_extractor.py,sha256=djTRYgGpwhqWBaVFOerh8J6kVH-xGbUtIelcOFecx4o,13641
26
+ nv_ingest_api/internal/extract/image/table_extractor.py,sha256=O0m3N2Tz9W6X7TBI4o-rbBXc8dFOf9zSZq1v9qC1U4M,13780
27
27
  nv_ingest_api/internal/extract/image/image_helpers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
28
- nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=P8rcl4YPyeWeMJg7u1yejD3k9EnDVEbJgfYEnJ4WO5c,15025
28
+ nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=80jRhGzisHvQ9Ky3MKUMM7soKUmvZ5LqRVzwNYjgdPY,14988
29
29
  nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
30
30
  nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfSQ9T-Bn87beBkCOuGGjxGt8,2934
31
31
  nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
32
32
  nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
33
33
  nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
34
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=IVbNcH_phMiRSxnkZ04pGfQrPJ-x1zVR3hXyhxv7juc,22977
35
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=hUqLzQgCJOZIVXrP0JLH4jXLKDHQkXmLXh1Nc4KI3nI,23494
34
+ nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=VrIfhDXzYVOwvuoQ9dkiYX-y0CHYhy_Ludadq0YpHqY,23132
35
+ nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=SKmias2iZmAE6Q8WXxmFEjvLOZy-vXRoaRIPpi7Tuhs,22962
36
36
  nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
37
37
  nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
38
38
  nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=4bvN6LsPksLicI6jM0JqbJFiOZNHEcuc8MVVW4XfgV8,5875
@@ -48,19 +48,19 @@ nv_ingest_api/internal/primitives/control_message_task.py,sha256=nWVB3QsP6p8BKwH
48
48
  nv_ingest_api/internal/primitives/ingest_control_message.py,sha256=rvipBiiUaHuRhupFCFDCG8rv0PylSJibCiJ7rDeb98A,8514
49
49
  nv_ingest_api/internal/primitives/nim/__init__.py,sha256=i_i_fBR2EcRCh2Y19DF6GM3s_Q0VPgo_thPnhEIJUyg,266
50
50
  nv_ingest_api/internal/primitives/nim/default_values.py,sha256=W92XjfyeC6uuVxut6J7p00x1kpNsnXIDb97gSVytZJk,380
51
- nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=rtzqwHpYsEPuzL7aGIdke5P3_Gu1Z8MbxKDIuJA-L8I,16336
51
+ nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=3jXCNIjVTILsATooxA5Yj0EIyyFo5PrzUzmhT4iYxNI,16481
52
52
  nv_ingest_api/internal/primitives/nim/nim_model_interface.py,sha256=wMEgoi79YQn_4338MVemkeZgM1J-vnz0aZWpvqDhib4,2392
53
53
  nv_ingest_api/internal/primitives/nim/model_interface/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
54
54
  nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1ExW5V6pXC1ZiHdobeG_BmbPr3rBbVJef13s,11003
55
55
  nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubkHs4WjnexM6rI0wkjWCsrVNEbA4Wjk2oKL9OYCU,1499
56
56
  nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
57
- nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=wgcwZJnIn3gKsqe4qhUa9V0gYp3NkIFV8R1qW7Zag1w,11571
57
+ nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=iyGxAr4tG2UZ7LtXXoWO_kF-KsObhPrmZ46Nl0Mi-Ag,11592
58
58
  nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=WysjDZeegclO3mZgVcGOwzWbr8wSI4pWRiYD4iC2EXo,7098
59
59
  nv_ingest_api/internal/primitives/nim/model_interface/ocr.py,sha256=Vhim3py_rc5jA0BoKubwfekEqOwxUUePzcmc59pRuOk,21458
60
60
  nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=5PqD2JuHY2rwd-6SSB4axr2Dd79vm95sAEkcmI3U7ME,12977
61
61
  nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=lFhppNqrq5X_fzbCWKphvZQMzaJd3gHrkWsyJORzFrU,5010
62
62
  nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
63
- nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=nsfDQgeupBe9Tdf3S5sfNpYcObEwVlzCZdfg1ObAW88,49584
63
+ nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=zpfEZIPctWhNfREnP6e77zffU8vs_RfnMprBj-2jXXk,42847
64
64
  nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
65
  nv_ingest_api/internal/primitives/tracing/latency.py,sha256=5kVTeYRbRdTlT_aI4MeS20N_S7mqCcLqZR6YHtxhXkY,2215
66
66
  nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagYDmkkA6rTXmQ-bmtLjoEguhg,3851
@@ -153,8 +153,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
153
153
  nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
154
154
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
155
  nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
156
- nv_ingest_api-2025.7.17.dev20250717.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
157
- nv_ingest_api-2025.7.17.dev20250717.dist-info/METADATA,sha256=NkhXN1NC8IKy8OWMV5uJHFdcHY8BOj0dQ2IwnvidETk,13947
158
- nv_ingest_api-2025.7.17.dev20250717.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- nv_ingest_api-2025.7.17.dev20250717.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
160
- nv_ingest_api-2025.7.17.dev20250717.dist-info/RECORD,,
156
+ nv_ingest_api-2025.7.19.dev20250719.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
157
+ nv_ingest_api-2025.7.19.dev20250719.dist-info/METADATA,sha256=SnVoo7ElFj94vFFFOFpqqPZWpq35LEkZuFZk0rhpxRw,13947
158
+ nv_ingest_api-2025.7.19.dev20250719.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ nv_ingest_api-2025.7.19.dev20250719.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
160
+ nv_ingest_api-2025.7.19.dev20250719.dist-info/RECORD,,