nv-ingest-api 2025.4.17.dev20250417__py3-none-any.whl → 2025.4.18.dev20250418__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +22 -6
- nv_ingest_api/util/service_clients/client_base.py +16 -2
- nv_ingest_api/util/service_clients/redis/redis_client.py +669 -180
- nv_ingest_api/util/service_clients/rest/rest_client.py +356 -223
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.18.dev20250418.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.18.dev20250418.dist-info}/RECORD +9 -9
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.18.dev20250418.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.18.dev20250418.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.18.dev20250418.dist-info}/top_level.txt +0 -0
|
@@ -2,14 +2,12 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
# pylint: skip-file
|
|
6
|
-
|
|
7
5
|
import logging
|
|
8
6
|
import re
|
|
9
7
|
import time
|
|
10
|
-
from typing import Any
|
|
8
|
+
from typing import Any, Union, Tuple, Optional, Dict, Callable
|
|
9
|
+
from urllib.parse import urlparse
|
|
11
10
|
|
|
12
|
-
import httpx
|
|
13
11
|
import requests
|
|
14
12
|
|
|
15
13
|
from nv_ingest_api.internal.schemas.message_brokers.response_schema import ResponseSchema
|
|
@@ -64,28 +62,11 @@ _TERMINAL_RESPONSE_STATUSES = [
|
|
|
64
62
|
|
|
65
63
|
class RestClient(MessageBrokerClientBase):
|
|
66
64
|
"""
|
|
67
|
-
A client for interfacing with
|
|
68
|
-
with retry logic
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
host : str
|
|
73
|
-
The hostname of the HTTP server.
|
|
74
|
-
port : int
|
|
75
|
-
The port number of the HTTP server.
|
|
76
|
-
max_retries : int, optional
|
|
77
|
-
The maximum number of retry attempts for operations. Default is 0 (no retries).
|
|
78
|
-
max_backoff : int, optional
|
|
79
|
-
The maximum backoff delay between retries in seconds. Default is 32 seconds.
|
|
80
|
-
connection_timeout : int, optional
|
|
81
|
-
The timeout in seconds for connecting to the HTTP server. Default is 300 seconds.
|
|
82
|
-
http_allocator : Any, optional
|
|
83
|
-
The HTTP client allocator.
|
|
84
|
-
|
|
85
|
-
Attributes
|
|
86
|
-
----------
|
|
87
|
-
client : Any
|
|
88
|
-
The HTTP client instance used for operations.
|
|
65
|
+
A client for interfacing with an HTTP endpoint (e.g., nv-ingest), providing mechanisms for sending
|
|
66
|
+
and receiving messages with retry logic using the `requests` library by default, but allowing a custom
|
|
67
|
+
HTTP client allocator.
|
|
68
|
+
|
|
69
|
+
Extends MessageBrokerClientBase for interface compatibility.
|
|
89
70
|
"""
|
|
90
71
|
|
|
91
72
|
def __init__(
|
|
@@ -94,305 +75,457 @@ class RestClient(MessageBrokerClientBase):
|
|
|
94
75
|
port: int,
|
|
95
76
|
max_retries: int = 0,
|
|
96
77
|
max_backoff: int = 32,
|
|
97
|
-
|
|
98
|
-
|
|
78
|
+
default_connect_timeout: float = 300.0,
|
|
79
|
+
default_read_timeout: Optional[float] = None,
|
|
80
|
+
http_allocator: Optional[Callable[[], Any]] = None,
|
|
99
81
|
**kwargs,
|
|
100
|
-
):
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Initializes the RestClient.
|
|
85
|
+
|
|
86
|
+
By default, uses `requests.Session`. If `http_allocator` is provided, it will be called to instantiate
|
|
87
|
+
the client. If a custom allocator is used, the internal methods (`fetch_message`, `submit_message`)
|
|
88
|
+
might need adjustments if the allocated client's API differs significantly from `requests.Session`.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
host : str
|
|
93
|
+
The hostname or IP address of the HTTP server.
|
|
94
|
+
port : int
|
|
95
|
+
The port number of the HTTP server.
|
|
96
|
+
max_retries : int, optional
|
|
97
|
+
Maximum number of retry attempts for connection errors or specific retryable HTTP statuses. Default is 0.
|
|
98
|
+
max_backoff : int, optional
|
|
99
|
+
Maximum backoff delay between retries, in seconds. Default is 32.
|
|
100
|
+
default_connect_timeout : float, optional
|
|
101
|
+
Default timeout in seconds for establishing a connection. Default is 300.0.
|
|
102
|
+
default_read_timeout : float, optional
|
|
103
|
+
Default timeout in seconds for waiting for data after connection. Default is None.
|
|
104
|
+
http_allocator : Optional[Callable[[], Any]], optional
|
|
105
|
+
A callable that returns an HTTP client instance. If None, `requests.Session()` is used.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
None
|
|
110
|
+
"""
|
|
111
|
+
self._host: str = host
|
|
112
|
+
self._port: int = port
|
|
113
|
+
self._max_retries: int = max_retries
|
|
114
|
+
self._max_backoff: int = max_backoff
|
|
115
|
+
self._default_connect_timeout: float = default_connect_timeout
|
|
116
|
+
self._default_read_timeout: Optional[float] = default_read_timeout
|
|
117
|
+
self._http_allocator: Optional[Callable[[], Any]] = http_allocator
|
|
118
|
+
|
|
119
|
+
self._timeout: Tuple[float, Optional[float]] = (self._default_connect_timeout, default_read_timeout)
|
|
120
|
+
|
|
121
|
+
if self._http_allocator is None:
|
|
122
|
+
self._client: Any = requests.Session()
|
|
123
|
+
logger.debug("RestClient initialized using default requests.Session.")
|
|
124
|
+
else:
|
|
125
|
+
try:
|
|
126
|
+
self._client = self._http_allocator()
|
|
127
|
+
logger.debug(f"RestClient initialized using provided http_allocator: {self._http_allocator.__name__}")
|
|
128
|
+
if not isinstance(self._client, requests.Session):
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Provided http_allocator does not create a requests.Session. "
|
|
131
|
+
"Internal HTTP calls may fail if the client API is incompatible."
|
|
132
|
+
)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.exception(
|
|
135
|
+
f"Failed to instantiate client using provided http_allocator: {e}. "
|
|
136
|
+
f"Falling back to requests.Session."
|
|
137
|
+
)
|
|
138
|
+
self._client = requests.Session()
|
|
139
|
+
|
|
140
|
+
self._submit_endpoint: str = "/v1/submit_job"
|
|
141
|
+
self._fetch_endpoint: str = "/v1/fetch_job"
|
|
142
|
+
self._base_url: str = kwargs.get("base_url") or self._generate_url(self._host, self._port)
|
|
117
143
|
self._headers = kwargs.get("headers", {})
|
|
118
144
|
self._auth = kwargs.get("auth", None)
|
|
119
145
|
|
|
120
|
-
|
|
146
|
+
logger.debug(f"RestClient base URL set to: {self._base_url}")
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _generate_url(host: str, port: int) -> str:
|
|
121
150
|
"""
|
|
122
|
-
|
|
151
|
+
Constructs a base URL from host and port, intelligently handling schemes and existing ports.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
host : str
|
|
156
|
+
Hostname, IP address, or full URL (e.g., "localhost", "192.168.1.100",
|
|
157
|
+
"http://example.com", "https://api.example.com:8443/v1").
|
|
158
|
+
port : int
|
|
159
|
+
The default port number to use if the host string does not explicitly specify one.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
str
|
|
164
|
+
A fully constructed base URL string, including scheme, hostname, port,
|
|
165
|
+
and any original path, without a trailing slash.
|
|
166
|
+
|
|
167
|
+
Raises
|
|
168
|
+
------
|
|
169
|
+
ValueError
|
|
170
|
+
If the host string appears to be a URL but lacks a valid hostname.
|
|
123
171
|
"""
|
|
124
|
-
|
|
172
|
+
url_str: str = str(host).strip()
|
|
173
|
+
scheme: str = "http"
|
|
174
|
+
parsed_path: Optional[str] = None
|
|
175
|
+
effective_port: int = port
|
|
176
|
+
hostname: Optional[str] = None
|
|
177
|
+
|
|
178
|
+
if re.match(r"^https?://", url_str, re.IGNORECASE):
|
|
179
|
+
parsed_url = urlparse(url_str)
|
|
180
|
+
hostname = parsed_url.hostname
|
|
181
|
+
if hostname is None:
|
|
182
|
+
raise ValueError(f"Invalid URL provided in host string: '{url_str}'. Could not parse a valid hostname.")
|
|
183
|
+
scheme = parsed_url.scheme
|
|
184
|
+
if parsed_url.port is not None:
|
|
185
|
+
effective_port = parsed_url.port
|
|
186
|
+
else:
|
|
187
|
+
effective_port = port
|
|
188
|
+
if parsed_url.path and parsed_url.path.strip("/"):
|
|
189
|
+
parsed_path = parsed_url.path
|
|
190
|
+
else:
|
|
191
|
+
hostname = url_str
|
|
192
|
+
effective_port = port
|
|
193
|
+
|
|
194
|
+
if not hostname:
|
|
195
|
+
raise ValueError(f"Could not determine a valid hostname from input: '{host}'")
|
|
196
|
+
|
|
197
|
+
base_url: str = f"{scheme}://{hostname}:{effective_port}"
|
|
198
|
+
if parsed_path:
|
|
199
|
+
if not parsed_path.startswith("/"):
|
|
200
|
+
parsed_path = "/" + parsed_path
|
|
201
|
+
base_url += parsed_path
|
|
125
202
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
203
|
+
final_url: str = base_url.rstrip("/")
|
|
204
|
+
logger.debug(f"Generated base URL: {final_url}")
|
|
205
|
+
return final_url
|
|
129
206
|
|
|
130
207
|
@property
|
|
131
208
|
def max_retries(self) -> int:
|
|
209
|
+
"""
|
|
210
|
+
Maximum number of retry attempts configured for operations.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
int
|
|
215
|
+
The maximum number of retries.
|
|
216
|
+
"""
|
|
132
217
|
return self._max_retries
|
|
133
218
|
|
|
134
219
|
@max_retries.setter
|
|
135
220
|
def max_retries(self, value: int) -> None:
|
|
221
|
+
"""
|
|
222
|
+
Sets the maximum number of retry attempts.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
value : int
|
|
227
|
+
The new maximum number of retries. Must be a non-negative integer.
|
|
228
|
+
|
|
229
|
+
Raises
|
|
230
|
+
------
|
|
231
|
+
ValueError
|
|
232
|
+
If value is not a non-negative integer.
|
|
233
|
+
"""
|
|
234
|
+
if not isinstance(value, int) or value < 0:
|
|
235
|
+
raise ValueError("max_retries must be a non-negative integer.")
|
|
136
236
|
self._max_retries = value
|
|
137
237
|
|
|
138
238
|
def get_client(self) -> Any:
|
|
139
239
|
"""
|
|
140
|
-
Returns
|
|
240
|
+
Returns the underlying HTTP client instance.
|
|
141
241
|
|
|
142
242
|
Returns
|
|
143
243
|
-------
|
|
144
244
|
Any
|
|
145
|
-
The HTTP client instance.
|
|
245
|
+
The active HTTP client instance.
|
|
146
246
|
"""
|
|
147
|
-
if self._client is None:
|
|
148
|
-
self._connect()
|
|
149
247
|
return self._client
|
|
150
248
|
|
|
151
|
-
def ping(self) -> ResponseSchema:
|
|
249
|
+
def ping(self) -> "ResponseSchema":
|
|
152
250
|
"""
|
|
153
|
-
Checks if the HTTP server is responsive.
|
|
251
|
+
Checks if the HTTP server endpoint is responsive using an HTTP GET request.
|
|
154
252
|
|
|
155
253
|
Returns
|
|
156
254
|
-------
|
|
157
|
-
|
|
158
|
-
|
|
255
|
+
ResponseSchema
|
|
256
|
+
An object encapsulating the outcome:
|
|
257
|
+
- response_code = 0 indicates success (HTTP status code < 400).
|
|
258
|
+
- response_code = 1 indicates failure, with details in response_reason.
|
|
159
259
|
"""
|
|
260
|
+
ping_timeout: Tuple[float, float] = (min(self._default_connect_timeout, 5.0), 10.0)
|
|
261
|
+
logger.debug(f"Attempting to ping server at {self._base_url} with timeout {ping_timeout}")
|
|
160
262
|
try:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
Returns:
|
|
179
|
-
str: Fully validated URL
|
|
263
|
+
if isinstance(self._client, requests.Session):
|
|
264
|
+
response: requests.Response = self._client.get(self._base_url, timeout=ping_timeout)
|
|
265
|
+
response.raise_for_status()
|
|
266
|
+
logger.debug(f"Ping successful to {self._base_url} (Status: {response.status_code})")
|
|
267
|
+
return ResponseSchema(response_code=0, response_reason="Ping OK")
|
|
268
|
+
except requests.exceptions.RequestException as e:
|
|
269
|
+
error_reason: str = f"Ping failed due to RequestException for {self._base_url}: {e}"
|
|
270
|
+
logger.warning(error_reason)
|
|
271
|
+
return ResponseSchema(response_code=1, response_reason=error_reason)
|
|
272
|
+
except Exception as e:
|
|
273
|
+
error_reason: str = f"Unexpected error during ping to {self._base_url}: {e}"
|
|
274
|
+
logger.exception(error_reason)
|
|
275
|
+
return ResponseSchema(response_code=1, response_reason=error_reason)
|
|
276
|
+
|
|
277
|
+
def fetch_message(
|
|
278
|
+
self, job_id: str, timeout: Optional[Union[float, Tuple[float, float]]] = None
|
|
279
|
+
) -> "ResponseSchema":
|
|
180
280
|
"""
|
|
181
|
-
|
|
182
|
-
# Add the default `http://` if it's not already present in the URL
|
|
183
|
-
user_provided_url = f"http://{user_provided_url}:{user_provided_port}"
|
|
184
|
-
else:
|
|
185
|
-
user_provided_url = f"{user_provided_url}:{user_provided_port}"
|
|
186
|
-
return user_provided_url
|
|
281
|
+
Fetches a job result message from the server's fetch endpoint.
|
|
187
282
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
Fetches a message from the specified queue with retries on failure, handling streaming HTTP responses.
|
|
283
|
+
Handles retries for connection errors and non-terminal HTTP errors based on the max_retries configuration.
|
|
284
|
+
Specific HTTP statuses are treated as immediate failures (terminal) or as job not ready (HTTP 202).
|
|
191
285
|
|
|
192
286
|
Parameters
|
|
193
287
|
----------
|
|
194
288
|
job_id : str
|
|
195
|
-
The server-
|
|
196
|
-
timeout : float
|
|
197
|
-
|
|
289
|
+
The server-assigned identifier of the job to fetch.
|
|
290
|
+
timeout : float or tuple of float, optional
|
|
291
|
+
Specific timeout override for this request.
|
|
198
292
|
|
|
199
293
|
Returns
|
|
200
294
|
-------
|
|
201
295
|
ResponseSchema
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
url = f"{self._base_url}{self._fetch_endpoint}/{job_id}"
|
|
296
|
+
- response_code = 0: Success (HTTP 200) with the job result.
|
|
297
|
+
- response_code = 1: Terminal failure (e.g., 404, 400, 5xx, or max retries exceeded).
|
|
298
|
+
- response_code = 2: Job not ready (HTTP 202).
|
|
206
299
|
|
|
300
|
+
Raises
|
|
301
|
+
------
|
|
302
|
+
TypeError
|
|
303
|
+
If the configured client does not support the required HTTP GET method.
|
|
304
|
+
"""
|
|
207
305
|
# Ensure headers are included
|
|
208
306
|
headers = {"Content-Type": "application/json"}
|
|
209
307
|
headers.update(self._headers)
|
|
210
308
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
# Fetch using streaming response
|
|
216
|
-
with requests.get(
|
|
217
|
-
url,
|
|
218
|
-
timeout=(30, 600),
|
|
219
|
-
stream=True,
|
|
220
|
-
headers=headers,
|
|
221
|
-
auth=self._auth,
|
|
222
|
-
) as result:
|
|
223
|
-
response_code = result.status_code
|
|
224
|
-
|
|
225
|
-
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
226
|
-
# Terminal response code; return error ResponseSchema
|
|
227
|
-
return ResponseSchema(
|
|
228
|
-
response_code=1,
|
|
229
|
-
response_reason=(
|
|
230
|
-
f"Terminal response code {response_code} received when fetching JobSpec: {job_id}"
|
|
231
|
-
),
|
|
232
|
-
response=result.text,
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
if response_code == 200:
|
|
236
|
-
# Handle streaming response, reconstructing payload incrementally
|
|
237
|
-
response_chunks = []
|
|
238
|
-
for chunk in result.iter_content(chunk_size=1024 * 1024): # 1MB chunks
|
|
239
|
-
if chunk:
|
|
240
|
-
response_chunks.append(chunk)
|
|
241
|
-
full_response = b"".join(response_chunks).decode("utf-8")
|
|
309
|
+
retries: int = 0
|
|
310
|
+
url: str = f"{self._base_url}{self._fetch_endpoint}/{job_id}"
|
|
311
|
+
req_timeout: Tuple[float, Optional[float]] = self._timeout
|
|
242
312
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
elif response_code == 202:
|
|
250
|
-
# Job is not ready yet
|
|
251
|
-
return ResponseSchema(
|
|
252
|
-
response_code=1,
|
|
253
|
-
response_reason="Job is not ready yet. Retry later.",
|
|
254
|
-
)
|
|
313
|
+
while True:
|
|
314
|
+
result: Optional[Any] = None
|
|
315
|
+
trace_id: Optional[str] = None
|
|
316
|
+
response_code: int = -1
|
|
255
317
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
318
|
+
try:
|
|
319
|
+
if isinstance(self._client, requests.Session):
|
|
320
|
+
with self._client.get(
|
|
321
|
+
url, timeout=req_timeout, headers=headers, stream=True, auth=self._auth
|
|
322
|
+
) as result:
|
|
323
|
+
response_code = result.status_code
|
|
324
|
+
response_text = result.text
|
|
325
|
+
|
|
326
|
+
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
327
|
+
error_reason: str = f"Terminal response code {response_code} fetching {job_id}."
|
|
328
|
+
logger.error(f"{error_reason} Response: {response_text[:200]}")
|
|
329
|
+
return ResponseSchema(
|
|
330
|
+
response_code=1, response_reason=error_reason, response=response_text, trace_id=trace_id
|
|
331
|
+
)
|
|
332
|
+
elif response_code == 200:
|
|
333
|
+
try:
|
|
334
|
+
full_response: str = b"".join(c for c in result.iter_content(1024 * 1024) if c).decode(
|
|
335
|
+
"utf-8"
|
|
336
|
+
)
|
|
337
|
+
return ResponseSchema(
|
|
338
|
+
response_code=0, response_reason="OK", response=full_response, trace_id=trace_id
|
|
339
|
+
)
|
|
340
|
+
except Exception as e:
|
|
341
|
+
logger.error(f"Stream processing error for {job_id}: {e}")
|
|
342
|
+
return ResponseSchema(
|
|
343
|
+
response_code=1, response_reason=f"Stream processing error: {e}", trace_id=trace_id
|
|
344
|
+
)
|
|
345
|
+
elif response_code == 202:
|
|
346
|
+
logger.debug(f"Job {job_id} not ready (202)")
|
|
347
|
+
return ResponseSchema(
|
|
348
|
+
response_code=2, response_reason="Job not ready yet. Retry later.", trace_id=trace_id
|
|
349
|
+
)
|
|
350
|
+
else:
|
|
351
|
+
logger.warning(f"Unexpected status {response_code} for {job_id}. Retrying if possible.")
|
|
352
|
+
else:
|
|
353
|
+
raise TypeError(
|
|
354
|
+
f"Unsupported client type for fetch_message: {type(self._client)}. "
|
|
355
|
+
f"Requires a requests.Session compatible API."
|
|
269
356
|
)
|
|
270
|
-
|
|
357
|
+
except requests.exceptions.RequestException as err:
|
|
358
|
+
logger.debug(
|
|
359
|
+
f"RequestException fetching {job_id}: {err}. "
|
|
360
|
+
f"Attempting retry ({retries + 1}/{self._max_retries})..."
|
|
361
|
+
)
|
|
271
362
|
try:
|
|
272
363
|
retries = self.perform_retry_backoff(retries)
|
|
364
|
+
continue
|
|
273
365
|
except RuntimeError as rte:
|
|
274
|
-
|
|
366
|
+
logger.error(f"Max retries hit fetching {job_id} after RequestException: {rte}")
|
|
275
367
|
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(err))
|
|
276
|
-
except TimeoutError:
|
|
277
|
-
raise
|
|
278
368
|
except Exception as e:
|
|
279
|
-
|
|
280
|
-
|
|
369
|
+
logger.exception(f"Unexpected error fetching {job_id}: {e}")
|
|
370
|
+
return ResponseSchema(response_code=1, response_reason=f"Unexpected fetch error: {e}")
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
retries = self.perform_retry_backoff(retries)
|
|
374
|
+
continue
|
|
375
|
+
except RuntimeError as rte:
|
|
376
|
+
logger.error(f"Max retries hit fetching {job_id} after HTTP {response_code}: {rte}")
|
|
377
|
+
resp_text_snippet: Optional[str] = response_text[:500] if "response_text" in locals() else None
|
|
281
378
|
return ResponseSchema(
|
|
282
|
-
response_code=1,
|
|
379
|
+
response_code=1,
|
|
380
|
+
response_reason=f"Max retries after HTTP {response_code}: {rte}",
|
|
381
|
+
response=resp_text_snippet,
|
|
382
|
+
trace_id=trace_id,
|
|
283
383
|
)
|
|
284
384
|
|
|
285
|
-
def submit_message(
|
|
385
|
+
def submit_message(
|
|
386
|
+
self,
|
|
387
|
+
channel_name: str,
|
|
388
|
+
message: str,
|
|
389
|
+
for_nv_ingest: bool = False,
|
|
390
|
+
timeout: Optional[Union[float, Tuple[float, float]]] = None,
|
|
391
|
+
) -> "ResponseSchema":
|
|
286
392
|
"""
|
|
287
|
-
Submits a
|
|
393
|
+
Submits a job message payload to the server's submit endpoint.
|
|
394
|
+
|
|
395
|
+
Handles retries for connection errors and non-terminal HTTP errors based on the max_retries configuration.
|
|
396
|
+
Specific HTTP statuses are treated as immediate failures.
|
|
288
397
|
|
|
289
398
|
Parameters
|
|
290
399
|
----------
|
|
291
400
|
channel_name : str
|
|
292
|
-
Not used
|
|
401
|
+
Not used by RestClient; included for interface compatibility.
|
|
293
402
|
message : str
|
|
294
|
-
The
|
|
295
|
-
for_nv_ingest : bool
|
|
296
|
-
Not used
|
|
403
|
+
The JSON string representing the job specification payload.
|
|
404
|
+
for_nv_ingest : bool, optional
|
|
405
|
+
Not used by RestClient. Default is False.
|
|
406
|
+
timeout : float or tuple of float, optional
|
|
407
|
+
Specific timeout override for this request.
|
|
297
408
|
|
|
298
409
|
Returns
|
|
299
410
|
-------
|
|
300
411
|
ResponseSchema
|
|
301
|
-
|
|
412
|
+
- response_code = 0: Success (HTTP 200) with a successful job submission.
|
|
413
|
+
- response_code = 1: Terminal failure (e.g., 422, 400, 5xx, or max retries exceeded).
|
|
414
|
+
|
|
415
|
+
Raises
|
|
416
|
+
------
|
|
417
|
+
TypeError
|
|
418
|
+
If the configured client does not support the required HTTP POST method.
|
|
302
419
|
"""
|
|
303
|
-
retries = 0
|
|
304
|
-
url = f"{self._base_url}{self._submit_endpoint}"
|
|
420
|
+
retries: int = 0
|
|
421
|
+
url: str = f"{self._base_url}{self._submit_endpoint}"
|
|
422
|
+
headers: Dict[str, str] = {"Content-Type": "application/json"}
|
|
423
|
+
request_payload: Dict[str, str] = {"payload": message}
|
|
424
|
+
req_timeout: Tuple[float, Optional[float]] = self._timeout
|
|
305
425
|
|
|
306
426
|
# Ensure content-type is present
|
|
307
427
|
headers = {"Content-Type": "application/json"}
|
|
308
428
|
headers.update(self._headers)
|
|
309
429
|
|
|
310
430
|
while True:
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
url,
|
|
315
|
-
json={"payload": message},
|
|
316
|
-
headers=headers,
|
|
317
|
-
auth=self._auth,
|
|
318
|
-
timeout=self._connection_timeout,
|
|
319
|
-
)
|
|
431
|
+
result: Optional[Any] = None
|
|
432
|
+
trace_id: Optional[str] = None
|
|
433
|
+
response_code: int = -1
|
|
320
434
|
|
|
321
|
-
|
|
322
|
-
if
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
435
|
+
try:
|
|
436
|
+
if isinstance(self._client, requests.Session):
|
|
437
|
+
result = self._client.post(
|
|
438
|
+
url,
|
|
439
|
+
json=request_payload,
|
|
440
|
+
headers=headers,
|
|
441
|
+
auth=self._auth,
|
|
442
|
+
timeout=req_timeout,
|
|
328
443
|
)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
444
|
+
response_code = result.status_code
|
|
445
|
+
trace_id = result.headers.get("x-trace-id")
|
|
446
|
+
response_text: str = result.text
|
|
447
|
+
|
|
448
|
+
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
449
|
+
error_reason: str = f"Terminal response code {response_code} submitting job."
|
|
450
|
+
logger.error(f"{error_reason} Response: {response_text[:200]}")
|
|
451
|
+
return ResponseSchema(
|
|
452
|
+
response_code=1, response_reason=error_reason, response=response_text, trace_id=trace_id
|
|
453
|
+
)
|
|
454
|
+
elif response_code == 200:
|
|
455
|
+
server_job_id_raw: str = response_text
|
|
456
|
+
cleaned_job_id: str = server_job_id_raw.strip('"')
|
|
457
|
+
logger.debug(f"Submit successful. Server Job ID: {cleaned_job_id}, Trace: {trace_id}")
|
|
335
458
|
return ResponseSchema(
|
|
336
459
|
response_code=0,
|
|
337
460
|
response_reason="OK",
|
|
338
|
-
response=
|
|
339
|
-
transaction_id=
|
|
340
|
-
trace_id=
|
|
461
|
+
response=server_job_id_raw,
|
|
462
|
+
transaction_id=cleaned_job_id,
|
|
463
|
+
trace_id=trace_id,
|
|
341
464
|
)
|
|
342
465
|
else:
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
if "Connection refused" in str(e):
|
|
349
|
-
logger.debug(
|
|
350
|
-
"Connection refused encountered during submission; sleeping for 10 seconds before retrying."
|
|
466
|
+
logger.warning(f"Unexpected status {response_code} on submit. Retrying if possible.")
|
|
467
|
+
else:
|
|
468
|
+
raise TypeError(
|
|
469
|
+
f"Unsupported client type for submit_message: {type(self._client)}. "
|
|
470
|
+
f"Requires a requests.Session compatible API."
|
|
351
471
|
)
|
|
352
|
-
|
|
472
|
+
except requests.exceptions.RequestException as err:
|
|
473
|
+
logger.warning(
|
|
474
|
+
f"RequestException submitting job: {err}. Attempting retry ({retries + 1}/{self._max_retries})..."
|
|
475
|
+
)
|
|
353
476
|
try:
|
|
354
477
|
retries = self.perform_retry_backoff(retries)
|
|
478
|
+
continue
|
|
355
479
|
except RuntimeError as rte:
|
|
356
|
-
|
|
357
|
-
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(
|
|
480
|
+
logger.error(f"Max retries hit submitting job after RequestException: {rte}")
|
|
481
|
+
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(err))
|
|
358
482
|
except Exception as e:
|
|
359
|
-
|
|
360
|
-
|
|
483
|
+
logger.exception(f"Unexpected error submitting job: {e}")
|
|
484
|
+
return ResponseSchema(response_code=1, response_reason=f"Unexpected submit error: {e}")
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
retries = self.perform_retry_backoff(retries)
|
|
488
|
+
continue
|
|
489
|
+
except RuntimeError as rte:
|
|
490
|
+
logger.error(f"Max retries hit submitting job after HTTP {response_code}: {rte}")
|
|
491
|
+
resp_text_snippet: Optional[str] = response_text[:500] if "response_text" in locals() else None
|
|
361
492
|
return ResponseSchema(
|
|
362
|
-
response_code=1,
|
|
493
|
+
response_code=1,
|
|
494
|
+
response_reason=f"Max retries after HTTP {response_code}: {rte}",
|
|
495
|
+
response=resp_text_snippet,
|
|
496
|
+
trace_id=trace_id,
|
|
363
497
|
)
|
|
364
498
|
|
|
365
|
-
def perform_retry_backoff(self, existing_retries) -> int:
|
|
499
|
+
def perform_retry_backoff(self, existing_retries: int) -> int:
|
|
366
500
|
"""
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
501
|
+
Performs exponential backoff sleep if retries are permitted.
|
|
502
|
+
|
|
503
|
+
Calculates the delay using exponential backoff (2^existing_retries) capped by self._max_backoff.
|
|
504
|
+
Sleeps for the calculated delay if the number of existing_retries is less than max_retries.
|
|
371
505
|
|
|
372
506
|
Parameters
|
|
373
507
|
----------
|
|
374
508
|
existing_retries : int
|
|
375
|
-
The number of retries
|
|
509
|
+
The number of retries already attempted for the current operation.
|
|
376
510
|
|
|
377
511
|
Returns
|
|
378
512
|
-------
|
|
379
513
|
int
|
|
380
|
-
The
|
|
514
|
+
The incremented retry count (existing_retries + 1).
|
|
381
515
|
|
|
382
516
|
Raises
|
|
383
517
|
------
|
|
384
518
|
RuntimeError
|
|
385
|
-
|
|
519
|
+
If existing_retries is greater than or equal to max_retries (when max_retries > 0).
|
|
386
520
|
"""
|
|
387
|
-
|
|
521
|
+
if self._max_retries > 0 and existing_retries >= self._max_retries:
|
|
522
|
+
raise RuntimeError(f"Max retry attempts ({self._max_retries}) reached")
|
|
523
|
+
backoff_delay: int = min(2**existing_retries, self._max_backoff)
|
|
524
|
+
retry_attempt_num: int = existing_retries + 1
|
|
388
525
|
logger.debug(
|
|
389
|
-
f"
|
|
390
|
-
f"
|
|
526
|
+
f"Operation failed. Retrying attempt "
|
|
527
|
+
f"{retry_attempt_num}/{self._max_retries if self._max_retries > 0 else 'infinite'} "
|
|
528
|
+
f"in {backoff_delay:.2f}s..."
|
|
391
529
|
)
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
logger.error(f"Operation failed, retrying in {backoff_delay}s...")
|
|
395
|
-
time.sleep(backoff_delay)
|
|
396
|
-
return existing_retries + 1
|
|
397
|
-
else:
|
|
398
|
-
raise RuntimeError(f"Max retry attempts of {self.max_retries} reached")
|
|
530
|
+
time.sleep(backoff_delay)
|
|
531
|
+
return retry_attempt_num
|