nv-ingest-api 2025.4.20.dev20250420__py3-none-any.whl → 2025.4.22.dev20250422__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/__init__.py +3 -0
- nv_ingest_api/interface/__init__.py +215 -0
- nv_ingest_api/interface/extract.py +972 -0
- nv_ingest_api/interface/mutate.py +154 -0
- nv_ingest_api/interface/store.py +218 -0
- nv_ingest_api/interface/transform.py +382 -0
- nv_ingest_api/interface/utility.py +200 -0
- nv_ingest_api/internal/enums/__init__.py +3 -0
- nv_ingest_api/internal/enums/common.py +494 -0
- nv_ingest_api/internal/extract/__init__.py +3 -0
- nv_ingest_api/internal/extract/audio/__init__.py +3 -0
- nv_ingest_api/internal/extract/audio/audio_extraction.py +149 -0
- nv_ingest_api/internal/extract/docx/__init__.py +5 -0
- nv_ingest_api/internal/extract/docx/docx_extractor.py +205 -0
- nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +3 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +122 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +895 -0
- nv_ingest_api/internal/extract/image/__init__.py +3 -0
- nv_ingest_api/internal/extract/image/chart_extractor.py +353 -0
- nv_ingest_api/internal/extract/image/image_extractor.py +204 -0
- nv_ingest_api/internal/extract/image/image_helpers/__init__.py +3 -0
- nv_ingest_api/internal/extract/image/image_helpers/common.py +403 -0
- nv_ingest_api/internal/extract/image/infographic_extractor.py +253 -0
- nv_ingest_api/internal/extract/image/table_extractor.py +344 -0
- nv_ingest_api/internal/extract/pdf/__init__.py +3 -0
- nv_ingest_api/internal/extract/pdf/engines/__init__.py +19 -0
- nv_ingest_api/internal/extract/pdf/engines/adobe.py +484 -0
- nv_ingest_api/internal/extract/pdf/engines/llama.py +243 -0
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +597 -0
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +146 -0
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +603 -0
- nv_ingest_api/internal/extract/pdf/engines/tika.py +96 -0
- nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +426 -0
- nv_ingest_api/internal/extract/pdf/pdf_extractor.py +74 -0
- nv_ingest_api/internal/extract/pptx/__init__.py +5 -0
- nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +799 -0
- nv_ingest_api/internal/extract/pptx/pptx_extractor.py +187 -0
- nv_ingest_api/internal/mutate/__init__.py +3 -0
- nv_ingest_api/internal/mutate/deduplicate.py +110 -0
- nv_ingest_api/internal/mutate/filter.py +133 -0
- nv_ingest_api/internal/primitives/__init__.py +0 -0
- nv_ingest_api/{primitives → internal/primitives}/control_message_task.py +4 -0
- nv_ingest_api/{primitives → internal/primitives}/ingest_control_message.py +5 -2
- nv_ingest_api/internal/primitives/nim/__init__.py +8 -0
- nv_ingest_api/internal/primitives/nim/default_values.py +15 -0
- nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +3 -0
- nv_ingest_api/internal/primitives/nim/model_interface/cached.py +274 -0
- nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +56 -0
- nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +270 -0
- nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +275 -0
- nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +238 -0
- nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +462 -0
- nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +367 -0
- nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +132 -0
- nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +152 -0
- nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +1400 -0
- nv_ingest_api/internal/primitives/nim/nim_client.py +344 -0
- nv_ingest_api/internal/primitives/nim/nim_model_interface.py +81 -0
- nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- nv_ingest_api/internal/primitives/tracing/latency.py +69 -0
- nv_ingest_api/internal/primitives/tracing/logging.py +96 -0
- nv_ingest_api/internal/primitives/tracing/tagging.py +197 -0
- nv_ingest_api/internal/schemas/__init__.py +3 -0
- nv_ingest_api/internal/schemas/extract/__init__.py +3 -0
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +130 -0
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +135 -0
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +124 -0
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py +124 -0
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +128 -0
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +218 -0
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +124 -0
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +129 -0
- nv_ingest_api/internal/schemas/message_brokers/__init__.py +3 -0
- nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +23 -0
- nv_ingest_api/internal/schemas/message_brokers/request_schema.py +34 -0
- nv_ingest_api/internal/schemas/message_brokers/response_schema.py +19 -0
- nv_ingest_api/internal/schemas/meta/__init__.py +3 -0
- nv_ingest_api/internal/schemas/meta/base_model_noext.py +11 -0
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +237 -0
- nv_ingest_api/internal/schemas/meta/metadata_schema.py +221 -0
- nv_ingest_api/internal/schemas/mutate/__init__.py +3 -0
- nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +16 -0
- nv_ingest_api/internal/schemas/store/__init__.py +3 -0
- nv_ingest_api/internal/schemas/store/store_embedding_schema.py +28 -0
- nv_ingest_api/internal/schemas/store/store_image_schema.py +30 -0
- nv_ingest_api/internal/schemas/transform/__init__.py +3 -0
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +15 -0
- nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +17 -0
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +25 -0
- nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +22 -0
- nv_ingest_api/internal/store/__init__.py +3 -0
- nv_ingest_api/internal/store/embed_text_upload.py +236 -0
- nv_ingest_api/internal/store/image_upload.py +232 -0
- nv_ingest_api/internal/transform/__init__.py +3 -0
- nv_ingest_api/internal/transform/caption_image.py +205 -0
- nv_ingest_api/internal/transform/embed_text.py +496 -0
- nv_ingest_api/internal/transform/split_text.py +157 -0
- nv_ingest_api/util/__init__.py +0 -0
- nv_ingest_api/util/control_message/__init__.py +0 -0
- nv_ingest_api/util/control_message/validators.py +47 -0
- nv_ingest_api/util/converters/__init__.py +0 -0
- nv_ingest_api/util/converters/bytetools.py +78 -0
- nv_ingest_api/util/converters/containers.py +65 -0
- nv_ingest_api/util/converters/datetools.py +90 -0
- nv_ingest_api/util/converters/dftools.py +127 -0
- nv_ingest_api/util/converters/formats.py +64 -0
- nv_ingest_api/util/converters/type_mappings.py +27 -0
- nv_ingest_api/util/detectors/__init__.py +5 -0
- nv_ingest_api/util/detectors/language.py +38 -0
- nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- nv_ingest_api/util/exception_handlers/converters.py +72 -0
- nv_ingest_api/util/exception_handlers/decorators.py +223 -0
- nv_ingest_api/util/exception_handlers/detectors.py +74 -0
- nv_ingest_api/util/exception_handlers/pdf.py +116 -0
- nv_ingest_api/util/exception_handlers/schemas.py +68 -0
- nv_ingest_api/util/image_processing/__init__.py +5 -0
- nv_ingest_api/util/image_processing/clustering.py +260 -0
- nv_ingest_api/util/image_processing/processing.py +179 -0
- nv_ingest_api/util/image_processing/table_and_chart.py +449 -0
- nv_ingest_api/util/image_processing/transforms.py +407 -0
- nv_ingest_api/util/logging/__init__.py +0 -0
- nv_ingest_api/util/logging/configuration.py +31 -0
- nv_ingest_api/util/message_brokers/__init__.py +3 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +9 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +465 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +71 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +451 -0
- nv_ingest_api/util/metadata/__init__.py +5 -0
- nv_ingest_api/util/metadata/aggregators.py +469 -0
- nv_ingest_api/util/multi_processing/__init__.py +8 -0
- nv_ingest_api/util/multi_processing/mp_pool_singleton.py +194 -0
- nv_ingest_api/util/nim/__init__.py +56 -0
- nv_ingest_api/util/pdf/__init__.py +3 -0
- nv_ingest_api/util/pdf/pdfium.py +427 -0
- nv_ingest_api/util/schema/__init__.py +0 -0
- nv_ingest_api/util/schema/schema_validator.py +10 -0
- nv_ingest_api/util/service_clients/__init__.py +3 -0
- nv_ingest_api/util/service_clients/client_base.py +86 -0
- nv_ingest_api/util/service_clients/kafka/__init__.py +3 -0
- nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- nv_ingest_api/util/service_clients/redis/redis_client.py +823 -0
- nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- nv_ingest_api/util/service_clients/rest/rest_client.py +531 -0
- nv_ingest_api/util/string_processing/__init__.py +51 -0
- {nv_ingest_api-2025.4.20.dev20250420.dist-info → nv_ingest_api-2025.4.22.dev20250422.dist-info}/METADATA +1 -1
- nv_ingest_api-2025.4.22.dev20250422.dist-info/RECORD +152 -0
- nv_ingest_api-2025.4.20.dev20250420.dist-info/RECORD +0 -9
- /nv_ingest_api/{primitives → internal}/__init__.py +0 -0
- {nv_ingest_api-2025.4.20.dev20250420.dist-info → nv_ingest_api-2025.4.22.dev20250422.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.4.20.dev20250420.dist-info → nv_ingest_api-2025.4.22.dev20250422.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.4.20.dev20250420.dist-info → nv_ingest_api-2025.4.22.dev20250422.dist-info}/top_level.txt +0 -0
|
File without changes
|
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Union, Tuple, Optional, Dict, Callable
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
from nv_ingest_api.internal.schemas.message_brokers.response_schema import ResponseSchema
|
|
14
|
+
from nv_ingest_api.util.service_clients.client_base import MessageBrokerClientBase
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# HTTP Response Statuses that result in marking submission as failed
|
|
19
|
+
# 4XX - Any 4XX status is considered a client derived error and will result in failure
|
|
20
|
+
# 5XX - Not all 500's are terminal but most are. Those which are listed below
|
|
21
|
+
_TERMINAL_RESPONSE_STATUSES = [
|
|
22
|
+
400,
|
|
23
|
+
401,
|
|
24
|
+
402,
|
|
25
|
+
403,
|
|
26
|
+
404,
|
|
27
|
+
405,
|
|
28
|
+
406,
|
|
29
|
+
407,
|
|
30
|
+
408,
|
|
31
|
+
409,
|
|
32
|
+
410,
|
|
33
|
+
411,
|
|
34
|
+
412,
|
|
35
|
+
413,
|
|
36
|
+
414,
|
|
37
|
+
415,
|
|
38
|
+
416,
|
|
39
|
+
417,
|
|
40
|
+
418,
|
|
41
|
+
421,
|
|
42
|
+
422,
|
|
43
|
+
423,
|
|
44
|
+
424,
|
|
45
|
+
425,
|
|
46
|
+
426,
|
|
47
|
+
428,
|
|
48
|
+
429,
|
|
49
|
+
431,
|
|
50
|
+
451,
|
|
51
|
+
500,
|
|
52
|
+
501,
|
|
53
|
+
503,
|
|
54
|
+
505,
|
|
55
|
+
506,
|
|
56
|
+
507,
|
|
57
|
+
508,
|
|
58
|
+
510,
|
|
59
|
+
511,
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RestClient(MessageBrokerClientBase):
|
|
64
|
+
"""
|
|
65
|
+
A client for interfacing with an HTTP endpoint (e.g., nv-ingest), providing mechanisms for sending
|
|
66
|
+
and receiving messages with retry logic using the `requests` library by default, but allowing a custom
|
|
67
|
+
HTTP client allocator.
|
|
68
|
+
|
|
69
|
+
Extends MessageBrokerClientBase for interface compatibility.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
host: str,
|
|
75
|
+
port: int,
|
|
76
|
+
max_retries: int = 0,
|
|
77
|
+
max_backoff: int = 32,
|
|
78
|
+
default_connect_timeout: float = 300.0,
|
|
79
|
+
default_read_timeout: Optional[float] = None,
|
|
80
|
+
http_allocator: Optional[Callable[[], Any]] = None,
|
|
81
|
+
**kwargs,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Initializes the RestClient.
|
|
85
|
+
|
|
86
|
+
By default, uses `requests.Session`. If `http_allocator` is provided, it will be called to instantiate
|
|
87
|
+
the client. If a custom allocator is used, the internal methods (`fetch_message`, `submit_message`)
|
|
88
|
+
might need adjustments if the allocated client's API differs significantly from `requests.Session`.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
host : str
|
|
93
|
+
The hostname or IP address of the HTTP server.
|
|
94
|
+
port : int
|
|
95
|
+
The port number of the HTTP server.
|
|
96
|
+
max_retries : int, optional
|
|
97
|
+
Maximum number of retry attempts for connection errors or specific retryable HTTP statuses. Default is 0.
|
|
98
|
+
max_backoff : int, optional
|
|
99
|
+
Maximum backoff delay between retries, in seconds. Default is 32.
|
|
100
|
+
default_connect_timeout : float, optional
|
|
101
|
+
Default timeout in seconds for establishing a connection. Default is 300.0.
|
|
102
|
+
default_read_timeout : float, optional
|
|
103
|
+
Default timeout in seconds for waiting for data after connection. Default is None.
|
|
104
|
+
http_allocator : Optional[Callable[[], Any]], optional
|
|
105
|
+
A callable that returns an HTTP client instance. If None, `requests.Session()` is used.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
None
|
|
110
|
+
"""
|
|
111
|
+
self._host: str = host
|
|
112
|
+
self._port: int = port
|
|
113
|
+
self._max_retries: int = max_retries
|
|
114
|
+
self._max_backoff: int = max_backoff
|
|
115
|
+
self._default_connect_timeout: float = default_connect_timeout
|
|
116
|
+
self._default_read_timeout: Optional[float] = default_read_timeout
|
|
117
|
+
self._http_allocator: Optional[Callable[[], Any]] = http_allocator
|
|
118
|
+
|
|
119
|
+
self._timeout: Tuple[float, Optional[float]] = (self._default_connect_timeout, default_read_timeout)
|
|
120
|
+
|
|
121
|
+
if self._http_allocator is None:
|
|
122
|
+
self._client: Any = requests.Session()
|
|
123
|
+
logger.debug("RestClient initialized using default requests.Session.")
|
|
124
|
+
else:
|
|
125
|
+
try:
|
|
126
|
+
self._client = self._http_allocator()
|
|
127
|
+
logger.debug(f"RestClient initialized using provided http_allocator: {self._http_allocator.__name__}")
|
|
128
|
+
if not isinstance(self._client, requests.Session):
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Provided http_allocator does not create a requests.Session. "
|
|
131
|
+
"Internal HTTP calls may fail if the client API is incompatible."
|
|
132
|
+
)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.exception(
|
|
135
|
+
f"Failed to instantiate client using provided http_allocator: {e}. "
|
|
136
|
+
f"Falling back to requests.Session."
|
|
137
|
+
)
|
|
138
|
+
self._client = requests.Session()
|
|
139
|
+
|
|
140
|
+
self._submit_endpoint: str = "/v1/submit_job"
|
|
141
|
+
self._fetch_endpoint: str = "/v1/fetch_job"
|
|
142
|
+
self._base_url: str = kwargs.get("base_url") or self._generate_url(self._host, self._port)
|
|
143
|
+
self._headers = kwargs.get("headers", {})
|
|
144
|
+
self._auth = kwargs.get("auth", None)
|
|
145
|
+
|
|
146
|
+
logger.debug(f"RestClient base URL set to: {self._base_url}")
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _generate_url(host: str, port: int) -> str:
|
|
150
|
+
"""
|
|
151
|
+
Constructs a base URL from host and port, intelligently handling schemes and existing ports.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
host : str
|
|
156
|
+
Hostname, IP address, or full URL (e.g., "localhost", "192.168.1.100",
|
|
157
|
+
"http://example.com", "https://api.example.com:8443/v1").
|
|
158
|
+
port : int
|
|
159
|
+
The default port number to use if the host string does not explicitly specify one.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
str
|
|
164
|
+
A fully constructed base URL string, including scheme, hostname, port,
|
|
165
|
+
and any original path, without a trailing slash.
|
|
166
|
+
|
|
167
|
+
Raises
|
|
168
|
+
------
|
|
169
|
+
ValueError
|
|
170
|
+
If the host string appears to be a URL but lacks a valid hostname.
|
|
171
|
+
"""
|
|
172
|
+
url_str: str = str(host).strip()
|
|
173
|
+
scheme: str = "http"
|
|
174
|
+
parsed_path: Optional[str] = None
|
|
175
|
+
effective_port: int = port
|
|
176
|
+
hostname: Optional[str] = None
|
|
177
|
+
|
|
178
|
+
if re.match(r"^https?://", url_str, re.IGNORECASE):
|
|
179
|
+
parsed_url = urlparse(url_str)
|
|
180
|
+
hostname = parsed_url.hostname
|
|
181
|
+
if hostname is None:
|
|
182
|
+
raise ValueError(f"Invalid URL provided in host string: '{url_str}'. Could not parse a valid hostname.")
|
|
183
|
+
scheme = parsed_url.scheme
|
|
184
|
+
if parsed_url.port is not None:
|
|
185
|
+
effective_port = parsed_url.port
|
|
186
|
+
else:
|
|
187
|
+
effective_port = port
|
|
188
|
+
if parsed_url.path and parsed_url.path.strip("/"):
|
|
189
|
+
parsed_path = parsed_url.path
|
|
190
|
+
else:
|
|
191
|
+
hostname = url_str
|
|
192
|
+
effective_port = port
|
|
193
|
+
|
|
194
|
+
if not hostname:
|
|
195
|
+
raise ValueError(f"Could not determine a valid hostname from input: '{host}'")
|
|
196
|
+
|
|
197
|
+
base_url: str = f"{scheme}://{hostname}:{effective_port}"
|
|
198
|
+
if parsed_path:
|
|
199
|
+
if not parsed_path.startswith("/"):
|
|
200
|
+
parsed_path = "/" + parsed_path
|
|
201
|
+
base_url += parsed_path
|
|
202
|
+
|
|
203
|
+
final_url: str = base_url.rstrip("/")
|
|
204
|
+
logger.debug(f"Generated base URL: {final_url}")
|
|
205
|
+
return final_url
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def max_retries(self) -> int:
|
|
209
|
+
"""
|
|
210
|
+
Maximum number of retry attempts configured for operations.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
int
|
|
215
|
+
The maximum number of retries.
|
|
216
|
+
"""
|
|
217
|
+
return self._max_retries
|
|
218
|
+
|
|
219
|
+
@max_retries.setter
|
|
220
|
+
def max_retries(self, value: int) -> None:
|
|
221
|
+
"""
|
|
222
|
+
Sets the maximum number of retry attempts.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
value : int
|
|
227
|
+
The new maximum number of retries. Must be a non-negative integer.
|
|
228
|
+
|
|
229
|
+
Raises
|
|
230
|
+
------
|
|
231
|
+
ValueError
|
|
232
|
+
If value is not a non-negative integer.
|
|
233
|
+
"""
|
|
234
|
+
if not isinstance(value, int) or value < 0:
|
|
235
|
+
raise ValueError("max_retries must be a non-negative integer.")
|
|
236
|
+
self._max_retries = value
|
|
237
|
+
|
|
238
|
+
def get_client(self) -> Any:
|
|
239
|
+
"""
|
|
240
|
+
Returns the underlying HTTP client instance.
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
Any
|
|
245
|
+
The active HTTP client instance.
|
|
246
|
+
"""
|
|
247
|
+
return self._client
|
|
248
|
+
|
|
249
|
+
def ping(self) -> "ResponseSchema":
|
|
250
|
+
"""
|
|
251
|
+
Checks if the HTTP server endpoint is responsive using an HTTP GET request.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
ResponseSchema
|
|
256
|
+
An object encapsulating the outcome:
|
|
257
|
+
- response_code = 0 indicates success (HTTP status code < 400).
|
|
258
|
+
- response_code = 1 indicates failure, with details in response_reason.
|
|
259
|
+
"""
|
|
260
|
+
ping_timeout: Tuple[float, float] = (min(self._default_connect_timeout, 5.0), 10.0)
|
|
261
|
+
logger.debug(f"Attempting to ping server at {self._base_url} with timeout {ping_timeout}")
|
|
262
|
+
try:
|
|
263
|
+
if isinstance(self._client, requests.Session):
|
|
264
|
+
response: requests.Response = self._client.get(self._base_url, timeout=ping_timeout)
|
|
265
|
+
response.raise_for_status()
|
|
266
|
+
logger.debug(f"Ping successful to {self._base_url} (Status: {response.status_code})")
|
|
267
|
+
return ResponseSchema(response_code=0, response_reason="Ping OK")
|
|
268
|
+
except requests.exceptions.RequestException as e:
|
|
269
|
+
error_reason: str = f"Ping failed due to RequestException for {self._base_url}: {e}"
|
|
270
|
+
logger.warning(error_reason)
|
|
271
|
+
return ResponseSchema(response_code=1, response_reason=error_reason)
|
|
272
|
+
except Exception as e:
|
|
273
|
+
error_reason: str = f"Unexpected error during ping to {self._base_url}: {e}"
|
|
274
|
+
logger.exception(error_reason)
|
|
275
|
+
return ResponseSchema(response_code=1, response_reason=error_reason)
|
|
276
|
+
|
|
277
|
+
def fetch_message(
|
|
278
|
+
self, job_id: str, timeout: Optional[Union[float, Tuple[float, float]]] = None
|
|
279
|
+
) -> "ResponseSchema":
|
|
280
|
+
"""
|
|
281
|
+
Fetches a job result message from the server's fetch endpoint.
|
|
282
|
+
|
|
283
|
+
Handles retries for connection errors and non-terminal HTTP errors based on the max_retries configuration.
|
|
284
|
+
Specific HTTP statuses are treated as immediate failures (terminal) or as job not ready (HTTP 202).
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
job_id : str
|
|
289
|
+
The server-assigned identifier of the job to fetch.
|
|
290
|
+
timeout : float or tuple of float, optional
|
|
291
|
+
Specific timeout override for this request.
|
|
292
|
+
|
|
293
|
+
Returns
|
|
294
|
+
-------
|
|
295
|
+
ResponseSchema
|
|
296
|
+
- response_code = 0: Success (HTTP 200) with the job result.
|
|
297
|
+
- response_code = 1: Terminal failure (e.g., 404, 400, 5xx, or max retries exceeded).
|
|
298
|
+
- response_code = 2: Job not ready (HTTP 202).
|
|
299
|
+
|
|
300
|
+
Raises
|
|
301
|
+
------
|
|
302
|
+
TypeError
|
|
303
|
+
If the configured client does not support the required HTTP GET method.
|
|
304
|
+
"""
|
|
305
|
+
# Ensure headers are included
|
|
306
|
+
headers = {"Content-Type": "application/json"}
|
|
307
|
+
headers.update(self._headers)
|
|
308
|
+
|
|
309
|
+
retries: int = 0
|
|
310
|
+
url: str = f"{self._base_url}{self._fetch_endpoint}/{job_id}"
|
|
311
|
+
req_timeout: Tuple[float, Optional[float]] = self._timeout
|
|
312
|
+
|
|
313
|
+
while True:
|
|
314
|
+
result: Optional[Any] = None
|
|
315
|
+
trace_id: Optional[str] = None
|
|
316
|
+
response_code: int = -1
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
if isinstance(self._client, requests.Session):
|
|
320
|
+
with self._client.get(
|
|
321
|
+
url, timeout=req_timeout, headers=headers, stream=True, auth=self._auth
|
|
322
|
+
) as result:
|
|
323
|
+
response_code = result.status_code
|
|
324
|
+
response_text = result.text
|
|
325
|
+
|
|
326
|
+
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
327
|
+
error_reason: str = f"Terminal response code {response_code} fetching {job_id}."
|
|
328
|
+
logger.error(f"{error_reason} Response: {response_text[:200]}")
|
|
329
|
+
return ResponseSchema(
|
|
330
|
+
response_code=1, response_reason=error_reason, response=response_text, trace_id=trace_id
|
|
331
|
+
)
|
|
332
|
+
elif response_code == 200:
|
|
333
|
+
try:
|
|
334
|
+
full_response: str = b"".join(c for c in result.iter_content(1024 * 1024) if c).decode(
|
|
335
|
+
"utf-8"
|
|
336
|
+
)
|
|
337
|
+
return ResponseSchema(
|
|
338
|
+
response_code=0, response_reason="OK", response=full_response, trace_id=trace_id
|
|
339
|
+
)
|
|
340
|
+
except Exception as e:
|
|
341
|
+
logger.error(f"Stream processing error for {job_id}: {e}")
|
|
342
|
+
return ResponseSchema(
|
|
343
|
+
response_code=1, response_reason=f"Stream processing error: {e}", trace_id=trace_id
|
|
344
|
+
)
|
|
345
|
+
elif response_code == 202:
|
|
346
|
+
logger.debug(f"Job {job_id} not ready (202)")
|
|
347
|
+
return ResponseSchema(
|
|
348
|
+
response_code=2, response_reason="Job not ready yet. Retry later.", trace_id=trace_id
|
|
349
|
+
)
|
|
350
|
+
else:
|
|
351
|
+
logger.warning(f"Unexpected status {response_code} for {job_id}. Retrying if possible.")
|
|
352
|
+
else:
|
|
353
|
+
raise TypeError(
|
|
354
|
+
f"Unsupported client type for fetch_message: {type(self._client)}. "
|
|
355
|
+
f"Requires a requests.Session compatible API."
|
|
356
|
+
)
|
|
357
|
+
except requests.exceptions.RequestException as err:
|
|
358
|
+
logger.debug(
|
|
359
|
+
f"RequestException fetching {job_id}: {err}. "
|
|
360
|
+
f"Attempting retry ({retries + 1}/{self._max_retries})..."
|
|
361
|
+
)
|
|
362
|
+
try:
|
|
363
|
+
retries = self.perform_retry_backoff(retries)
|
|
364
|
+
continue
|
|
365
|
+
except RuntimeError as rte:
|
|
366
|
+
logger.error(f"Max retries hit fetching {job_id} after RequestException: {rte}")
|
|
367
|
+
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(err))
|
|
368
|
+
except Exception as e:
|
|
369
|
+
logger.exception(f"Unexpected error fetching {job_id}: {e}")
|
|
370
|
+
return ResponseSchema(response_code=1, response_reason=f"Unexpected fetch error: {e}")
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
retries = self.perform_retry_backoff(retries)
|
|
374
|
+
continue
|
|
375
|
+
except RuntimeError as rte:
|
|
376
|
+
logger.error(f"Max retries hit fetching {job_id} after HTTP {response_code}: {rte}")
|
|
377
|
+
resp_text_snippet: Optional[str] = response_text[:500] if "response_text" in locals() else None
|
|
378
|
+
return ResponseSchema(
|
|
379
|
+
response_code=1,
|
|
380
|
+
response_reason=f"Max retries after HTTP {response_code}: {rte}",
|
|
381
|
+
response=resp_text_snippet,
|
|
382
|
+
trace_id=trace_id,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
def submit_message(
|
|
386
|
+
self,
|
|
387
|
+
channel_name: str,
|
|
388
|
+
message: str,
|
|
389
|
+
for_nv_ingest: bool = False,
|
|
390
|
+
timeout: Optional[Union[float, Tuple[float, float]]] = None,
|
|
391
|
+
) -> "ResponseSchema":
|
|
392
|
+
"""
|
|
393
|
+
Submits a job message payload to the server's submit endpoint.
|
|
394
|
+
|
|
395
|
+
Handles retries for connection errors and non-terminal HTTP errors based on the max_retries configuration.
|
|
396
|
+
Specific HTTP statuses are treated as immediate failures.
|
|
397
|
+
|
|
398
|
+
Parameters
|
|
399
|
+
----------
|
|
400
|
+
channel_name : str
|
|
401
|
+
Not used by RestClient; included for interface compatibility.
|
|
402
|
+
message : str
|
|
403
|
+
The JSON string representing the job specification payload.
|
|
404
|
+
for_nv_ingest : bool, optional
|
|
405
|
+
Not used by RestClient. Default is False.
|
|
406
|
+
timeout : float or tuple of float, optional
|
|
407
|
+
Specific timeout override for this request.
|
|
408
|
+
|
|
409
|
+
Returns
|
|
410
|
+
-------
|
|
411
|
+
ResponseSchema
|
|
412
|
+
- response_code = 0: Success (HTTP 200) with a successful job submission.
|
|
413
|
+
- response_code = 1: Terminal failure (e.g., 422, 400, 5xx, or max retries exceeded).
|
|
414
|
+
|
|
415
|
+
Raises
|
|
416
|
+
------
|
|
417
|
+
TypeError
|
|
418
|
+
If the configured client does not support the required HTTP POST method.
|
|
419
|
+
"""
|
|
420
|
+
retries: int = 0
|
|
421
|
+
url: str = f"{self._base_url}{self._submit_endpoint}"
|
|
422
|
+
headers: Dict[str, str] = {"Content-Type": "application/json"}
|
|
423
|
+
request_payload: Dict[str, str] = {"payload": message}
|
|
424
|
+
req_timeout: Tuple[float, Optional[float]] = self._timeout
|
|
425
|
+
|
|
426
|
+
# Ensure content-type is present
|
|
427
|
+
headers = {"Content-Type": "application/json"}
|
|
428
|
+
headers.update(self._headers)
|
|
429
|
+
|
|
430
|
+
while True:
|
|
431
|
+
result: Optional[Any] = None
|
|
432
|
+
trace_id: Optional[str] = None
|
|
433
|
+
response_code: int = -1
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
if isinstance(self._client, requests.Session):
|
|
437
|
+
result = self._client.post(
|
|
438
|
+
url,
|
|
439
|
+
json=request_payload,
|
|
440
|
+
headers=headers,
|
|
441
|
+
auth=self._auth,
|
|
442
|
+
timeout=req_timeout,
|
|
443
|
+
)
|
|
444
|
+
response_code = result.status_code
|
|
445
|
+
trace_id = result.headers.get("x-trace-id")
|
|
446
|
+
response_text: str = result.text
|
|
447
|
+
|
|
448
|
+
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
449
|
+
error_reason: str = f"Terminal response code {response_code} submitting job."
|
|
450
|
+
logger.error(f"{error_reason} Response: {response_text[:200]}")
|
|
451
|
+
return ResponseSchema(
|
|
452
|
+
response_code=1, response_reason=error_reason, response=response_text, trace_id=trace_id
|
|
453
|
+
)
|
|
454
|
+
elif response_code == 200:
|
|
455
|
+
server_job_id_raw: str = response_text
|
|
456
|
+
cleaned_job_id: str = server_job_id_raw.strip('"')
|
|
457
|
+
logger.debug(f"Submit successful. Server Job ID: {cleaned_job_id}, Trace: {trace_id}")
|
|
458
|
+
return ResponseSchema(
|
|
459
|
+
response_code=0,
|
|
460
|
+
response_reason="OK",
|
|
461
|
+
response=server_job_id_raw,
|
|
462
|
+
transaction_id=cleaned_job_id,
|
|
463
|
+
trace_id=trace_id,
|
|
464
|
+
)
|
|
465
|
+
else:
|
|
466
|
+
logger.warning(f"Unexpected status {response_code} on submit. Retrying if possible.")
|
|
467
|
+
else:
|
|
468
|
+
raise TypeError(
|
|
469
|
+
f"Unsupported client type for submit_message: {type(self._client)}. "
|
|
470
|
+
f"Requires a requests.Session compatible API."
|
|
471
|
+
)
|
|
472
|
+
except requests.exceptions.RequestException as err:
|
|
473
|
+
logger.warning(
|
|
474
|
+
f"RequestException submitting job: {err}. Attempting retry ({retries + 1}/{self._max_retries})..."
|
|
475
|
+
)
|
|
476
|
+
try:
|
|
477
|
+
retries = self.perform_retry_backoff(retries)
|
|
478
|
+
continue
|
|
479
|
+
except RuntimeError as rte:
|
|
480
|
+
logger.error(f"Max retries hit submitting job after RequestException: {rte}")
|
|
481
|
+
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(err))
|
|
482
|
+
except Exception as e:
|
|
483
|
+
logger.exception(f"Unexpected error submitting job: {e}")
|
|
484
|
+
return ResponseSchema(response_code=1, response_reason=f"Unexpected submit error: {e}")
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
retries = self.perform_retry_backoff(retries)
|
|
488
|
+
continue
|
|
489
|
+
except RuntimeError as rte:
|
|
490
|
+
logger.error(f"Max retries hit submitting job after HTTP {response_code}: {rte}")
|
|
491
|
+
resp_text_snippet: Optional[str] = response_text[:500] if "response_text" in locals() else None
|
|
492
|
+
return ResponseSchema(
|
|
493
|
+
response_code=1,
|
|
494
|
+
response_reason=f"Max retries after HTTP {response_code}: {rte}",
|
|
495
|
+
response=resp_text_snippet,
|
|
496
|
+
trace_id=trace_id,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
def perform_retry_backoff(self, existing_retries: int) -> int:
|
|
500
|
+
"""
|
|
501
|
+
Performs exponential backoff sleep if retries are permitted.
|
|
502
|
+
|
|
503
|
+
Calculates the delay using exponential backoff (2^existing_retries) capped by self._max_backoff.
|
|
504
|
+
Sleeps for the calculated delay if the number of existing_retries is less than max_retries.
|
|
505
|
+
|
|
506
|
+
Parameters
|
|
507
|
+
----------
|
|
508
|
+
existing_retries : int
|
|
509
|
+
The number of retries already attempted for the current operation.
|
|
510
|
+
|
|
511
|
+
Returns
|
|
512
|
+
-------
|
|
513
|
+
int
|
|
514
|
+
The incremented retry count (existing_retries + 1).
|
|
515
|
+
|
|
516
|
+
Raises
|
|
517
|
+
------
|
|
518
|
+
RuntimeError
|
|
519
|
+
If existing_retries is greater than or equal to max_retries (when max_retries > 0).
|
|
520
|
+
"""
|
|
521
|
+
if self._max_retries > 0 and existing_retries >= self._max_retries:
|
|
522
|
+
raise RuntimeError(f"Max retry attempts ({self._max_retries}) reached")
|
|
523
|
+
backoff_delay: int = min(2**existing_retries, self._max_backoff)
|
|
524
|
+
retry_attempt_num: int = existing_retries + 1
|
|
525
|
+
logger.debug(
|
|
526
|
+
f"Operation failed. Retrying attempt "
|
|
527
|
+
f"{retry_attempt_num}/{self._max_retries if self._max_retries > 0 else 'infinite'} "
|
|
528
|
+
f"in {backoff_delay:.2f}s..."
|
|
529
|
+
)
|
|
530
|
+
time.sleep(backoff_delay)
|
|
531
|
+
return retry_attempt_num
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
DEPLOT_MAX_TOKENS = 128
|
|
11
|
+
DEPLOT_TEMPERATURE = 1.0
|
|
12
|
+
DEPLOT_TOP_P = 1.0
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def remove_url_endpoints(url) -> str:
|
|
16
|
+
"""Some configurations provide the full endpoint in the URL.
|
|
17
|
+
Ex: http://deplot:8000/v1/chat/completions. For hitting the
|
|
18
|
+
health endpoint we need to get just the hostname:port combo
|
|
19
|
+
that we can append the health/ready endpoint to so we attempt
|
|
20
|
+
to parse that information here.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
url str: Incoming URL
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
str: URL with just the hostname:port portion remaining
|
|
27
|
+
"""
|
|
28
|
+
if "/v1" in url:
|
|
29
|
+
url = url.split("/v1")[0]
|
|
30
|
+
|
|
31
|
+
return url
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def generate_url(url) -> str:
|
|
35
|
+
"""Examines the user defined URL for http*://. If that
|
|
36
|
+
pattern is detected the URL is used as provided by the user.
|
|
37
|
+
If that pattern does not exist then the assumption is made that
|
|
38
|
+
the endpoint is simply `http://` and that is prepended
|
|
39
|
+
to the user supplied endpoint.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
url str: Endpoint where the Rest service is running
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
str: Fully validated URL
|
|
46
|
+
"""
|
|
47
|
+
if not re.match(r"^https?://", url):
|
|
48
|
+
# Add the default `http://` if it's not already present in the URL
|
|
49
|
+
url = f"http://{url}"
|
|
50
|
+
|
|
51
|
+
return url
|