nv-ingest-api 2025.10.4.dev20251004__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/extract/image/chart_extractor.py +7 -3
- nv_ingest_api/internal/extract/image/infographic_extractor.py +7 -3
- nv_ingest_api/internal/extract/image/table_extractor.py +7 -3
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +6 -4
- nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +11 -4
- nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +4 -0
- nv_ingest_api/internal/primitives/nim/nim_client.py +158 -15
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +10 -1
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +10 -1
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +6 -4
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +9 -1
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +56 -1
- nv_ingest_api/internal/schemas/meta/metadata_schema.py +9 -0
- nv_ingest_api/internal/schemas/mixins.py +39 -0
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
- nv_ingest_api/internal/transform/embed_text.py +82 -0
- nv_ingest_api/util/dataloader/dataloader.py +20 -9
- nv_ingest_api/util/image_processing/transforms.py +67 -1
- nv_ingest_api/util/message_brokers/qos_scheduler.py +283 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +1 -0
- nv_ingest_api/util/multi_processing/mp_pool_singleton.py +8 -2
- nv_ingest_api/util/service_clients/redis/redis_client.py +160 -0
- nv_ingest_api/util/service_clients/rest/rest_client.py +42 -3
- nv_ingest_api/util/string_processing/yaml.py +41 -4
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/METADATA +2 -1
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/RECORD +34 -32
- udfs/llm_summarizer_udf.py +132 -137
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/top_level.txt +0 -0
|
@@ -97,7 +97,7 @@ def _run_chart_inference(
|
|
|
97
97
|
model_name="paddle",
|
|
98
98
|
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
|
|
99
99
|
)
|
|
100
|
-
elif ocr_model_name
|
|
100
|
+
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
|
|
101
101
|
future_ocr_kwargs.update(
|
|
102
102
|
model_name=ocr_model_name,
|
|
103
103
|
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
|
|
@@ -237,7 +237,9 @@ def _create_ocr_client(
|
|
|
237
237
|
auth_token: str,
|
|
238
238
|
) -> NimClient:
|
|
239
239
|
ocr_model_interface = (
|
|
240
|
-
NemoRetrieverOCRModelInterface()
|
|
240
|
+
NemoRetrieverOCRModelInterface()
|
|
241
|
+
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
|
|
242
|
+
else PaddleOCRModelInterface()
|
|
241
243
|
)
|
|
242
244
|
|
|
243
245
|
ocr_client = create_inference_client(
|
|
@@ -245,7 +247,9 @@ def _create_ocr_client(
|
|
|
245
247
|
model_interface=ocr_model_interface,
|
|
246
248
|
auth_token=auth_token,
|
|
247
249
|
infer_protocol=ocr_protocol,
|
|
248
|
-
enable_dynamic_batching=(
|
|
250
|
+
enable_dynamic_batching=(
|
|
251
|
+
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
|
|
252
|
+
),
|
|
249
253
|
dynamic_batch_memory_budget_mb=32,
|
|
250
254
|
)
|
|
251
255
|
|
|
@@ -107,7 +107,7 @@ def _update_infographic_metadata(
|
|
|
107
107
|
model_name="paddle",
|
|
108
108
|
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
|
|
109
109
|
)
|
|
110
|
-
elif ocr_model_name
|
|
110
|
+
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
|
|
111
111
|
infer_kwargs.update(
|
|
112
112
|
model_name=ocr_model_name,
|
|
113
113
|
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
|
|
@@ -152,7 +152,9 @@ def _create_ocr_client(
|
|
|
152
152
|
auth_token: str,
|
|
153
153
|
) -> NimClient:
|
|
154
154
|
ocr_model_interface = (
|
|
155
|
-
NemoRetrieverOCRModelInterface()
|
|
155
|
+
NemoRetrieverOCRModelInterface()
|
|
156
|
+
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
|
|
157
|
+
else PaddleOCRModelInterface()
|
|
156
158
|
)
|
|
157
159
|
|
|
158
160
|
ocr_client = create_inference_client(
|
|
@@ -160,7 +162,9 @@ def _create_ocr_client(
|
|
|
160
162
|
model_interface=ocr_model_interface,
|
|
161
163
|
auth_token=auth_token,
|
|
162
164
|
infer_protocol=ocr_protocol,
|
|
163
|
-
enable_dynamic_batching=(
|
|
165
|
+
enable_dynamic_batching=(
|
|
166
|
+
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
|
|
167
|
+
),
|
|
164
168
|
dynamic_batch_memory_budget_mb=32,
|
|
165
169
|
)
|
|
166
170
|
|
|
@@ -99,7 +99,7 @@ def _run_inference(
|
|
|
99
99
|
model_name="paddle",
|
|
100
100
|
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
|
|
101
101
|
)
|
|
102
|
-
elif ocr_model_name
|
|
102
|
+
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
|
|
103
103
|
future_ocr_kwargs.update(
|
|
104
104
|
model_name=ocr_model_name,
|
|
105
105
|
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
|
|
@@ -246,7 +246,9 @@ def _create_ocr_client(
|
|
|
246
246
|
auth_token: str,
|
|
247
247
|
) -> NimClient:
|
|
248
248
|
ocr_model_interface = (
|
|
249
|
-
NemoRetrieverOCRModelInterface()
|
|
249
|
+
NemoRetrieverOCRModelInterface()
|
|
250
|
+
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
|
|
251
|
+
else PaddleOCRModelInterface()
|
|
250
252
|
)
|
|
251
253
|
|
|
252
254
|
ocr_client = create_inference_client(
|
|
@@ -254,7 +256,9 @@ def _create_ocr_client(
|
|
|
254
256
|
model_interface=ocr_model_interface,
|
|
255
257
|
auth_token=auth_token,
|
|
256
258
|
infer_protocol=ocr_protocol,
|
|
257
|
-
enable_dynamic_batching=(
|
|
259
|
+
enable_dynamic_batching=(
|
|
260
|
+
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
|
|
261
|
+
),
|
|
258
262
|
dynamic_batch_memory_budget_mb=32,
|
|
259
263
|
)
|
|
260
264
|
|
|
@@ -332,6 +332,7 @@ def _extract_page_elements(
|
|
|
332
332
|
|
|
333
333
|
# Process each extracted element based on extraction flags
|
|
334
334
|
for page_idx, page_element in page_element_results:
|
|
335
|
+
page_reading_index = page_idx + 1
|
|
335
336
|
# Skip elements that shouldn't be extracted based on flags
|
|
336
337
|
if (not extract_tables) and (page_element.type_string == "table"):
|
|
337
338
|
continue
|
|
@@ -347,7 +348,7 @@ def _extract_page_elements(
|
|
|
347
348
|
# Construct metadata for the page element
|
|
348
349
|
page_element_meta = construct_page_element_metadata(
|
|
349
350
|
page_element,
|
|
350
|
-
|
|
351
|
+
page_reading_index,
|
|
351
352
|
page_count,
|
|
352
353
|
source_metadata,
|
|
353
354
|
base_unified_metadata,
|
|
@@ -473,6 +474,7 @@ def pdfium_extractor(
|
|
|
473
474
|
for page_idx in range(page_count):
|
|
474
475
|
page = doc.get_page(page_idx)
|
|
475
476
|
page_width, page_height = page.get_size()
|
|
477
|
+
page_reading_index = page_idx + 1
|
|
476
478
|
|
|
477
479
|
# Text extraction
|
|
478
480
|
if extract_text:
|
|
@@ -481,7 +483,7 @@ def pdfium_extractor(
|
|
|
481
483
|
text_meta = construct_text_metadata(
|
|
482
484
|
[page_text],
|
|
483
485
|
pdf_metadata.keywords,
|
|
484
|
-
|
|
486
|
+
page_reading_index,
|
|
485
487
|
-1,
|
|
486
488
|
-1,
|
|
487
489
|
-1,
|
|
@@ -499,7 +501,7 @@ def pdfium_extractor(
|
|
|
499
501
|
image_data = _extract_page_images(
|
|
500
502
|
extract_images_method,
|
|
501
503
|
page,
|
|
502
|
-
|
|
504
|
+
page_reading_index,
|
|
503
505
|
page_width,
|
|
504
506
|
page_height,
|
|
505
507
|
page_count,
|
|
@@ -518,7 +520,7 @@ def pdfium_extractor(
|
|
|
518
520
|
base64_image, _ = scale_image_to_encoding_size(base64_image, max_base64_size=2**24 - 1)
|
|
519
521
|
image_meta = construct_image_metadata_from_base64(
|
|
520
522
|
base64_image,
|
|
521
|
-
|
|
523
|
+
page_reading_index,
|
|
522
524
|
page_count,
|
|
523
525
|
source_metadata,
|
|
524
526
|
base_unified_metadata,
|
|
@@ -21,7 +21,10 @@ from nv_ingest_api.internal.primitives.nim.model_interface.helpers import prepro
|
|
|
21
21
|
from nv_ingest_api.util.image_processing.transforms import base64_to_numpy
|
|
22
22
|
|
|
23
23
|
DEFAULT_OCR_MODEL_NAME = "paddle"
|
|
24
|
-
NEMORETRIEVER_OCR_MODEL_NAME = "
|
|
24
|
+
NEMORETRIEVER_OCR_MODEL_NAME = "scene_text_wrapper"
|
|
25
|
+
NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME = "scene_text_ensemble"
|
|
26
|
+
NEMORETRIEVER_OCR_BLS_MODEL_NAME = "scene_text_python"
|
|
27
|
+
|
|
25
28
|
|
|
26
29
|
logger = logging.getLogger(__name__)
|
|
27
30
|
|
|
@@ -231,7 +234,11 @@ class OCRModelInterfaceBase(ModelInterface):
|
|
|
231
234
|
if not isinstance(response, np.ndarray):
|
|
232
235
|
raise ValueError("Unexpected response format: response is not a NumPy array.")
|
|
233
236
|
|
|
234
|
-
if model_name
|
|
237
|
+
if model_name in [
|
|
238
|
+
NEMORETRIEVER_OCR_MODEL_NAME,
|
|
239
|
+
NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME,
|
|
240
|
+
NEMORETRIEVER_OCR_BLS_MODEL_NAME,
|
|
241
|
+
]:
|
|
235
242
|
response = response.transpose((1, 0))
|
|
236
243
|
|
|
237
244
|
# If we have shape (3,), convert to (3, 1)
|
|
@@ -751,8 +758,8 @@ def get_ocr_model_name(ocr_grpc_endpoint=None, default_model_name=DEFAULT_OCR_MO
|
|
|
751
758
|
if ocr_model_name is not None:
|
|
752
759
|
return ocr_model_name
|
|
753
760
|
|
|
754
|
-
# 2. If no gRPC endpoint is provided, fall back to the default immediately.
|
|
755
|
-
if not ocr_grpc_endpoint:
|
|
761
|
+
# 2. If no gRPC endpoint is provided or the endpoint is a NVCF endpoint, fall back to the default immediately.
|
|
762
|
+
if (not ocr_grpc_endpoint) or ("grpc.nvcf.nvidia.com" in ocr_grpc_endpoint):
|
|
756
763
|
logger.debug(f"No OCR gRPC endpoint provided. Falling back to default model name '{default_model_name}'.")
|
|
757
764
|
return default_model_name
|
|
758
765
|
|
|
@@ -355,6 +355,10 @@ def create_audio_inference_client(
|
|
|
355
355
|
if (infer_protocol is None) and (grpc_endpoint and grpc_endpoint.strip()):
|
|
356
356
|
infer_protocol = "grpc"
|
|
357
357
|
|
|
358
|
+
# Normalize protocol to lowercase for case-insensitive comparison
|
|
359
|
+
if infer_protocol:
|
|
360
|
+
infer_protocol = infer_protocol.lower()
|
|
361
|
+
|
|
358
362
|
if infer_protocol == "http":
|
|
359
363
|
raise ValueError("`http` endpoints are not supported for audio. Use `grpc`.")
|
|
360
364
|
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
+
import re
|
|
8
9
|
import threading
|
|
9
10
|
import time
|
|
10
11
|
import queue
|
|
@@ -24,6 +25,12 @@ from nv_ingest_api.util.string_processing import generate_url
|
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
26
27
|
|
|
28
|
+
# Regex pattern to detect CUDA-related errors in Triton gRPC responses
|
|
29
|
+
CUDA_ERROR_REGEX = re.compile(
|
|
30
|
+
r"(illegal memory access|invalid argument|failed to (copy|load|perform) .*: .*|TritonModelException: failed to copy data: .*)", # noqa: E501
|
|
31
|
+
re.IGNORECASE,
|
|
32
|
+
)
|
|
33
|
+
|
|
27
34
|
# A simple structure to hold a request's data and its Future for the result
|
|
28
35
|
InferenceRequest = namedtuple("InferenceRequest", ["data", "future", "model_name", "dims", "kwargs"])
|
|
29
36
|
|
|
@@ -40,7 +47,7 @@ class NimClient:
|
|
|
40
47
|
endpoints: Tuple[str, str],
|
|
41
48
|
auth_token: Optional[str] = None,
|
|
42
49
|
timeout: float = 120.0,
|
|
43
|
-
max_retries: int =
|
|
50
|
+
max_retries: int = 10,
|
|
44
51
|
max_429_retries: int = 5,
|
|
45
52
|
enable_dynamic_batching: bool = False,
|
|
46
53
|
dynamic_batch_timeout: float = 0.1, # 100 milliseconds
|
|
@@ -60,11 +67,11 @@ class NimClient:
|
|
|
60
67
|
auth_token : str, optional
|
|
61
68
|
Authorization token for HTTP requests (default: None).
|
|
62
69
|
timeout : float, optional
|
|
63
|
-
Timeout for HTTP requests in seconds (default:
|
|
70
|
+
Timeout for HTTP requests in seconds (default: 120.0).
|
|
64
71
|
max_retries : int, optional
|
|
65
|
-
The maximum number of retries for non-429 server-side errors (default:
|
|
72
|
+
The maximum number of retries for non-429 server-side errors (default: 10).
|
|
66
73
|
max_429_retries : int, optional
|
|
67
|
-
The maximum number of retries specifically for 429 errors (default:
|
|
74
|
+
The maximum number of retries specifically for 429 errors (default: 5).
|
|
68
75
|
|
|
69
76
|
Raises
|
|
70
77
|
------
|
|
@@ -121,9 +128,6 @@ class NimClient:
|
|
|
121
128
|
if model_name == "yolox_ensemble":
|
|
122
129
|
model_name = "yolox"
|
|
123
130
|
|
|
124
|
-
if model_name == "scene_text_ensemble":
|
|
125
|
-
model_name = "scene_text_pre"
|
|
126
|
-
|
|
127
131
|
if model_name in self._max_batch_sizes:
|
|
128
132
|
return self._max_batch_sizes[model_name]
|
|
129
133
|
|
|
@@ -326,16 +330,101 @@ class NimClient:
|
|
|
326
330
|
|
|
327
331
|
outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
|
|
328
332
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
333
|
+
base_delay = 2.0
|
|
334
|
+
attempt = 0
|
|
335
|
+
retries_429 = 0
|
|
336
|
+
max_grpc_retries = self.max_429_retries
|
|
332
337
|
|
|
333
|
-
|
|
338
|
+
while attempt < self.max_retries:
|
|
339
|
+
try:
|
|
340
|
+
response = self.client.infer(
|
|
341
|
+
model_name=model_name, parameters=parameters, inputs=input_tensors, outputs=outputs
|
|
342
|
+
)
|
|
334
343
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
344
|
+
logger.debug(f"gRPC inference response: {response}")
|
|
345
|
+
|
|
346
|
+
if len(outputs) == 1:
|
|
347
|
+
return response.as_numpy(outputs[0].name())
|
|
348
|
+
else:
|
|
349
|
+
return [response.as_numpy(output.name()) for output in outputs]
|
|
350
|
+
|
|
351
|
+
except grpcclient.InferenceServerException as e:
|
|
352
|
+
status = str(e.status())
|
|
353
|
+
message = e.message()
|
|
354
|
+
|
|
355
|
+
# Handle CUDA memory errors
|
|
356
|
+
if status == "StatusCode.INTERNAL":
|
|
357
|
+
if CUDA_ERROR_REGEX.search(message):
|
|
358
|
+
logger.warning(
|
|
359
|
+
f"Received gRPC INTERNAL error with CUDA-related message for model '{model_name}'. "
|
|
360
|
+
f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
|
|
361
|
+
)
|
|
362
|
+
if attempt >= self.max_retries - 1:
|
|
363
|
+
logger.error(f"Max retries exceeded for CUDA errors on model '{model_name}'.")
|
|
364
|
+
raise e
|
|
365
|
+
# Try to reload models before retrying
|
|
366
|
+
model_reload_succeeded = reload_models(client=self.client, client_timeout=self.timeout)
|
|
367
|
+
if not model_reload_succeeded:
|
|
368
|
+
logger.error(f"Failed to reload models for model '{model_name}'.")
|
|
369
|
+
else:
|
|
370
|
+
logger.warning(
|
|
371
|
+
f"Received gRPC INTERNAL error for model '{model_name}'. "
|
|
372
|
+
f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
|
|
373
|
+
)
|
|
374
|
+
if attempt >= self.max_retries - 1:
|
|
375
|
+
logger.error(f"Max retries exceeded for INTERNAL error on model '{model_name}'.")
|
|
376
|
+
raise e
|
|
377
|
+
|
|
378
|
+
# Common retry logic for both CUDA and non-CUDA INTERNAL errors
|
|
379
|
+
backoff_time = base_delay * (2**attempt)
|
|
380
|
+
time.sleep(backoff_time)
|
|
381
|
+
attempt += 1
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
# Handle errors that can occur after model reload (NOT_FOUND, model not loaded)
|
|
385
|
+
if status == "StatusCode.NOT_FOUND":
|
|
386
|
+
logger.warning(
|
|
387
|
+
f"Received gRPC {status} error for model '{model_name}'. "
|
|
388
|
+
f"Attempt {attempt + 1} of {self.max_retries}. Message: {message[:500]}"
|
|
389
|
+
)
|
|
390
|
+
if attempt >= self.max_retries - 1:
|
|
391
|
+
logger.error(f"Max retries exceeded for model not found errors on model '{model_name}'.")
|
|
392
|
+
raise e
|
|
393
|
+
|
|
394
|
+
# Retry with exponential backoff WITHOUT reloading
|
|
395
|
+
backoff_time = base_delay * (2**attempt)
|
|
396
|
+
logger.info(
|
|
397
|
+
f"Retrying after {backoff_time}s backoff for model not found error on model '{model_name}'."
|
|
398
|
+
)
|
|
399
|
+
time.sleep(backoff_time)
|
|
400
|
+
attempt += 1
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in message.lower():
|
|
404
|
+
retries_429 += 1
|
|
405
|
+
logger.warning(
|
|
406
|
+
f"Received gRPC {status} for model '{model_name}'. "
|
|
407
|
+
f"Attempt {retries_429} of {max_grpc_retries}."
|
|
408
|
+
)
|
|
409
|
+
if retries_429 >= max_grpc_retries:
|
|
410
|
+
logger.error(f"Max retries for gRPC {status} exceeded for model '{model_name}'.")
|
|
411
|
+
raise
|
|
412
|
+
|
|
413
|
+
backoff_time = base_delay * (2**retries_429)
|
|
414
|
+
time.sleep(backoff_time)
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
# For other server-side errors (e.g., INVALID_ARGUMENT, etc.),
|
|
418
|
+
# fail fast as retrying will not help
|
|
419
|
+
logger.error(
|
|
420
|
+
f"Received non-retryable gRPC error {status} from Triton for model '{model_name}': {message}"
|
|
421
|
+
)
|
|
422
|
+
raise
|
|
423
|
+
|
|
424
|
+
except Exception as e:
|
|
425
|
+
# Catch any other unexpected exceptions (e.g., network issues not caught by Triton client)
|
|
426
|
+
logger.error(f"An unexpected error occurred during gRPC inference for model '{model_name}': {e}")
|
|
427
|
+
raise
|
|
339
428
|
|
|
340
429
|
def _http_infer(self, formatted_input: dict) -> dict:
|
|
341
430
|
"""
|
|
@@ -648,3 +737,57 @@ class NimClientManager:
|
|
|
648
737
|
def get_nim_client_manager(*args, **kwargs) -> NimClientManager:
|
|
649
738
|
"""Returns the singleton instance of the NimClientManager."""
|
|
650
739
|
return NimClientManager(*args, **kwargs)
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
def reload_models(client: grpcclient.InferenceServerClient, exclude: list[str] = [], client_timeout: int = 120) -> bool:
|
|
743
|
+
"""
|
|
744
|
+
Reloads all models in the Triton server except for the models in the exclude list.
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
client : grpcclient.InferenceServerClient
|
|
749
|
+
The gRPC client connected to the Triton server.
|
|
750
|
+
exclude : list[str], optional
|
|
751
|
+
A list of model names to exclude from reloading.
|
|
752
|
+
client_timeout : int, optional
|
|
753
|
+
Timeout for client operations in seconds (default: 120).
|
|
754
|
+
|
|
755
|
+
Returns
|
|
756
|
+
-------
|
|
757
|
+
bool
|
|
758
|
+
True if all models were successfully reloaded, False otherwise.
|
|
759
|
+
"""
|
|
760
|
+
model_index = client.get_model_repository_index()
|
|
761
|
+
exclude = set(exclude)
|
|
762
|
+
names = [m.name for m in model_index.models if m.name not in exclude]
|
|
763
|
+
|
|
764
|
+
logger.info(f"Reloading {len(names)} model(s): {', '.join(names) if names else '(none)'}")
|
|
765
|
+
|
|
766
|
+
# 1) Unload
|
|
767
|
+
for name in names:
|
|
768
|
+
try:
|
|
769
|
+
client.unload_model(name)
|
|
770
|
+
except grpcclient.InferenceServerException as e:
|
|
771
|
+
msg = e.message()
|
|
772
|
+
if "explicit model load / unload" in msg.lower():
|
|
773
|
+
status = e.status()
|
|
774
|
+
logger.warning(
|
|
775
|
+
f"[SKIP Model Reload] Explicit model control disabled; cannot unload '{name}'. Status: {status}."
|
|
776
|
+
)
|
|
777
|
+
return False
|
|
778
|
+
logger.error(f"[ERROR] Failed to unload '{name}': {msg}")
|
|
779
|
+
return False
|
|
780
|
+
|
|
781
|
+
# 2) Load
|
|
782
|
+
for name in names:
|
|
783
|
+
client.load_model(name)
|
|
784
|
+
|
|
785
|
+
# 3) Readiness check
|
|
786
|
+
for name in names:
|
|
787
|
+
ready = client.is_model_ready(model_name=name, client_timeout=client_timeout)
|
|
788
|
+
if not ready:
|
|
789
|
+
logger.warning(f"[Warning] Triton Not ready: {name}")
|
|
790
|
+
return False
|
|
791
|
+
|
|
792
|
+
logger.info("✅ Reload of models complete.")
|
|
793
|
+
return True
|
|
@@ -10,10 +10,12 @@ from typing import Tuple
|
|
|
10
10
|
from pydantic import BaseModel, Field
|
|
11
11
|
from pydantic import root_validator
|
|
12
12
|
|
|
13
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
14
|
+
|
|
13
15
|
logger = logging.getLogger(__name__)
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
class AudioConfigSchema(
|
|
18
|
+
class AudioConfigSchema(LowercaseProtocolMixin):
|
|
17
19
|
"""
|
|
18
20
|
Configuration schema for audio extraction endpoints and options.
|
|
19
21
|
|
|
@@ -87,13 +89,13 @@ class AudioConfigSchema(BaseModel):
|
|
|
87
89
|
|
|
88
90
|
values[endpoint_name] = (grpc_service, http_service)
|
|
89
91
|
|
|
92
|
+
# Auto-infer protocol from endpoints if not specified
|
|
90
93
|
protocol_name = "audio_infer_protocol"
|
|
91
94
|
protocol_value = values.get(protocol_name)
|
|
92
95
|
|
|
93
96
|
if not protocol_value:
|
|
94
97
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
95
98
|
|
|
96
|
-
protocol_value = protocol_value.lower()
|
|
97
99
|
values[protocol_name] = protocol_value
|
|
98
100
|
|
|
99
101
|
return values
|
|
@@ -8,10 +8,12 @@ from typing import Tuple
|
|
|
8
8
|
|
|
9
9
|
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
12
|
+
|
|
11
13
|
logger = logging.getLogger(__name__)
|
|
12
14
|
|
|
13
15
|
|
|
14
|
-
class ChartExtractorConfigSchema(
|
|
16
|
+
class ChartExtractorConfigSchema(LowercaseProtocolMixin):
|
|
15
17
|
"""
|
|
16
18
|
Configuration schema for chart extraction service endpoints and options.
|
|
17
19
|
|
|
@@ -96,6 +98,13 @@ class ChartExtractorConfigSchema(BaseModel):
|
|
|
96
98
|
|
|
97
99
|
values[endpoint_name] = (grpc_service, http_service)
|
|
98
100
|
|
|
101
|
+
# Auto-infer protocol from endpoints if not specified
|
|
102
|
+
protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
|
|
103
|
+
protocol_value = values.get(protocol_name)
|
|
104
|
+
if not protocol_value:
|
|
105
|
+
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
106
|
+
values[protocol_name] = protocol_value
|
|
107
|
+
|
|
99
108
|
return values
|
|
100
109
|
|
|
101
110
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class DocxConfigSchema(
|
|
17
|
+
class DocxConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for docx extraction endpoints and options.
|
|
18
20
|
|
|
@@ -85,11 +87,11 @@ class DocxConfigSchema(BaseModel):
|
|
|
85
87
|
|
|
86
88
|
values[endpoint_name] = (grpc_service, http_service)
|
|
87
89
|
|
|
90
|
+
# Auto-infer protocol from endpoints if not specified
|
|
88
91
|
protocol_name = f"{model_name}_infer_protocol"
|
|
89
92
|
protocol_value = values.get(protocol_name)
|
|
90
93
|
if not protocol_value:
|
|
91
94
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
92
|
-
protocol_value = protocol_value.lower()
|
|
93
95
|
values[protocol_name] = protocol_value
|
|
94
96
|
|
|
95
97
|
return values
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class ImageConfigSchema(
|
|
17
|
+
class ImageConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for image extraction endpoints and options.
|
|
18
20
|
|
|
@@ -85,11 +87,11 @@ class ImageConfigSchema(BaseModel):
|
|
|
85
87
|
|
|
86
88
|
values[endpoint_name] = (grpc_service, http_service)
|
|
87
89
|
|
|
90
|
+
# Auto-infer protocol from endpoints if not specified
|
|
88
91
|
protocol_name = f"{model_name}_infer_protocol"
|
|
89
92
|
protocol_value = values.get(protocol_name)
|
|
90
93
|
if not protocol_value:
|
|
91
94
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
92
|
-
protocol_value = protocol_value.lower()
|
|
93
95
|
values[protocol_name] = protocol_value
|
|
94
96
|
|
|
95
97
|
return values
|
|
@@ -8,10 +8,12 @@ from typing import Tuple
|
|
|
8
8
|
|
|
9
9
|
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
12
|
+
|
|
11
13
|
logger = logging.getLogger(__name__)
|
|
12
14
|
|
|
13
15
|
|
|
14
|
-
class InfographicExtractorConfigSchema(
|
|
16
|
+
class InfographicExtractorConfigSchema(LowercaseProtocolMixin):
|
|
15
17
|
"""
|
|
16
18
|
Configuration schema for infographic extraction service endpoints and options.
|
|
17
19
|
|
|
@@ -89,6 +91,13 @@ class InfographicExtractorConfigSchema(BaseModel):
|
|
|
89
91
|
|
|
90
92
|
values[endpoint_name] = (grpc_service, http_service)
|
|
91
93
|
|
|
94
|
+
# Auto-infer protocol from endpoints if not specified
|
|
95
|
+
protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
|
|
96
|
+
protocol_value = values.get(protocol_name)
|
|
97
|
+
if not protocol_value:
|
|
98
|
+
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
99
|
+
values[protocol_name] = protocol_value
|
|
100
|
+
|
|
92
101
|
return values
|
|
93
102
|
|
|
94
103
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class PDFiumConfigSchema(
|
|
17
|
+
class PDFiumConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for PDFium endpoints and options.
|
|
18
20
|
|
|
@@ -82,11 +84,11 @@ class PDFiumConfigSchema(BaseModel):
|
|
|
82
84
|
|
|
83
85
|
values[endpoint_name] = (grpc_service, http_service)
|
|
84
86
|
|
|
87
|
+
# Auto-infer protocol from endpoints if not specified
|
|
85
88
|
protocol_name = f"{model_name}_infer_protocol"
|
|
86
89
|
protocol_value = values.get(protocol_name)
|
|
87
90
|
if not protocol_value:
|
|
88
91
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
89
|
-
protocol_value = protocol_value.lower()
|
|
90
92
|
values[protocol_name] = protocol_value
|
|
91
93
|
|
|
92
94
|
return values
|
|
@@ -94,7 +96,7 @@ class PDFiumConfigSchema(BaseModel):
|
|
|
94
96
|
model_config = ConfigDict(extra="forbid")
|
|
95
97
|
|
|
96
98
|
|
|
97
|
-
class NemoRetrieverParseConfigSchema(
|
|
99
|
+
class NemoRetrieverParseConfigSchema(LowercaseProtocolMixin):
|
|
98
100
|
"""
|
|
99
101
|
Configuration schema for NemoRetrieverParse endpoints and options.
|
|
100
102
|
|
|
@@ -170,11 +172,11 @@ class NemoRetrieverParseConfigSchema(BaseModel):
|
|
|
170
172
|
|
|
171
173
|
values[endpoint_name] = (grpc_service, http_service)
|
|
172
174
|
|
|
175
|
+
# Auto-infer protocol from endpoints if not specified
|
|
173
176
|
protocol_name = f"{model_name}_infer_protocol"
|
|
174
177
|
protocol_value = values.get(protocol_name)
|
|
175
178
|
if not protocol_value:
|
|
176
179
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
177
|
-
protocol_value = protocol_value.lower()
|
|
178
180
|
values[protocol_name] = protocol_value
|
|
179
181
|
|
|
180
182
|
return values
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class PPTXConfigSchema(
|
|
17
|
+
class PPTXConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for docx extraction endpoints and options.
|
|
18
20
|
|
|
@@ -85,11 +87,11 @@ class PPTXConfigSchema(BaseModel):
|
|
|
85
87
|
|
|
86
88
|
values[endpoint_name] = (grpc_service, http_service)
|
|
87
89
|
|
|
90
|
+
# Auto-infer protocol from endpoints if not specified
|
|
88
91
|
protocol_name = f"{model_name}_infer_protocol"
|
|
89
92
|
protocol_value = values.get(protocol_name)
|
|
90
93
|
if not protocol_value:
|
|
91
94
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
92
|
-
protocol_value = protocol_value.lower()
|
|
93
95
|
values[protocol_name] = protocol_value
|
|
94
96
|
|
|
95
97
|
return values
|
|
@@ -9,11 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
class TableExtractorConfigSchema(
|
|
17
|
+
class TableExtractorConfigSchema(LowercaseProtocolMixin):
|
|
17
18
|
"""
|
|
18
19
|
Configuration schema for the table extraction stage settings.
|
|
19
20
|
|
|
@@ -91,6 +92,13 @@ class TableExtractorConfigSchema(BaseModel):
|
|
|
91
92
|
|
|
92
93
|
values[endpoint_name] = (grpc_service, http_service)
|
|
93
94
|
|
|
95
|
+
# Auto-infer protocol from endpoints if not specified
|
|
96
|
+
protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
|
|
97
|
+
protocol_value = values.get(protocol_name)
|
|
98
|
+
if not protocol_value:
|
|
99
|
+
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
100
|
+
values[protocol_name] = protocol_value
|
|
101
|
+
|
|
94
102
|
return values
|
|
95
103
|
|
|
96
104
|
model_config = ConfigDict(extra="forbid")
|