oracle-ads 2.12.10rc0__py3-none-any.whl → 2.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. ads/aqua/__init__.py +2 -1
  2. ads/aqua/app.py +46 -19
  3. ads/aqua/client/__init__.py +3 -0
  4. ads/aqua/client/client.py +799 -0
  5. ads/aqua/common/enums.py +19 -14
  6. ads/aqua/common/errors.py +3 -4
  7. ads/aqua/common/utils.py +2 -2
  8. ads/aqua/constants.py +1 -0
  9. ads/aqua/evaluation/constants.py +7 -7
  10. ads/aqua/evaluation/errors.py +3 -4
  11. ads/aqua/evaluation/evaluation.py +20 -12
  12. ads/aqua/extension/aqua_ws_msg_handler.py +14 -7
  13. ads/aqua/extension/base_handler.py +12 -9
  14. ads/aqua/extension/model_handler.py +29 -1
  15. ads/aqua/extension/models/ws_models.py +5 -6
  16. ads/aqua/finetuning/constants.py +3 -3
  17. ads/aqua/finetuning/entities.py +3 -0
  18. ads/aqua/finetuning/finetuning.py +32 -1
  19. ads/aqua/model/constants.py +7 -7
  20. ads/aqua/model/entities.py +2 -1
  21. ads/aqua/model/enums.py +4 -5
  22. ads/aqua/model/model.py +158 -76
  23. ads/aqua/modeldeployment/deployment.py +22 -10
  24. ads/aqua/modeldeployment/entities.py +3 -1
  25. ads/cli.py +16 -8
  26. ads/common/auth.py +33 -20
  27. ads/common/extended_enum.py +52 -44
  28. ads/llm/__init__.py +11 -8
  29. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  30. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  31. ads/model/artifact_downloader.py +3 -4
  32. ads/model/datascience_model.py +84 -64
  33. ads/model/generic_model.py +3 -3
  34. ads/model/model_metadata.py +17 -11
  35. ads/model/service/oci_datascience_model.py +12 -14
  36. ads/opctl/backend/marketplace/helm_helper.py +13 -14
  37. ads/opctl/cli.py +4 -5
  38. ads/opctl/cmds.py +28 -32
  39. ads/opctl/config/merger.py +8 -11
  40. ads/opctl/config/resolver.py +25 -30
  41. ads/opctl/operator/cli.py +9 -9
  42. ads/opctl/operator/common/backend_factory.py +56 -60
  43. ads/opctl/operator/common/const.py +5 -5
  44. ads/opctl/operator/lowcode/anomaly/const.py +8 -9
  45. ads/opctl/operator/lowcode/common/transformations.py +38 -3
  46. ads/opctl/operator/lowcode/common/utils.py +11 -1
  47. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +43 -48
  48. ads/opctl/operator/lowcode/forecast/__main__.py +10 -0
  49. ads/opctl/operator/lowcode/forecast/const.py +6 -6
  50. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +1 -1
  51. ads/opctl/operator/lowcode/forecast/operator_config.py +31 -0
  52. ads/opctl/operator/lowcode/forecast/schema.yaml +63 -0
  53. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  54. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +233 -0
  55. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +238 -0
  56. ads/opctl/operator/lowcode/pii/constant.py +6 -7
  57. ads/opctl/operator/lowcode/recommender/constant.py +12 -7
  58. ads/opctl/operator/runtime/marketplace_runtime.py +4 -10
  59. ads/opctl/operator/runtime/runtime.py +4 -6
  60. ads/pipeline/ads_pipeline_run.py +13 -25
  61. ads/pipeline/visualizer/graph_renderer.py +3 -4
  62. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.13.0.dist-info}/METADATA +4 -2
  63. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.13.0.dist-info}/RECORD +66 -59
  64. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.13.0.dist-info}/LICENSE.txt +0 -0
  65. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.13.0.dist-info}/WHEEL +0 -0
  66. {oracle_ads-2.12.10rc0.dist-info → oracle_ads-2.13.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,799 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+
5
+ import asyncio
6
+ import functools
7
+ import json
8
+ import logging
9
+ import time
10
+ from types import TracebackType
11
+ from typing import (
12
+ Any,
13
+ AnyStr,
14
+ AsyncIterator,
15
+ Callable,
16
+ Dict,
17
+ Iterator,
18
+ List,
19
+ Mapping,
20
+ Optional,
21
+ Sequence,
22
+ Tuple,
23
+ Type,
24
+ TypeVar,
25
+ Union,
26
+ )
27
+
28
+ import httpx
29
+ import oci
30
+ import requests
31
+ from tenacity import (
32
+ before_sleep_log,
33
+ retry,
34
+ retry_if_exception,
35
+ stop_after_attempt,
36
+ stop_after_delay,
37
+ wait_exponential,
38
+ wait_random_exponential,
39
+ )
40
+
41
+ from ads.common import auth as authutil
42
+
43
+ DEFAULT_RETRIES = 3
44
+ DEFAULT_BACKOFF_FACTOR = 3
45
+ TIMEOUT = 600 # Timeout in seconds
46
+ STATUS_FORCE_LIST = [429, 500, 502, 503, 504]
47
+ DEFAULT_ENCODING = "utf-8"
48
+
49
+ _T = TypeVar("_T", bound="BaseClient")
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+
54
+ class OCIAuth(httpx.Auth):
55
+ """
56
+ Custom HTTPX authentication class that uses the OCI Signer for request signing.
57
+
58
+ Attributes:
59
+ signer (oci.signer.Signer): The OCI signer used to sign requests.
60
+ """
61
+
62
+ def __init__(self, signer: oci.signer.Signer):
63
+ """
64
+ Initialize the OCIAuth instance.
65
+
66
+ Args:
67
+ signer (oci.signer.Signer): The OCI signer to use for signing requests.
68
+ """
69
+ self.signer = signer
70
+
71
+ def auth_flow(self, request: httpx.Request) -> Iterator[httpx.Request]:
72
+ """
73
+ The authentication flow that signs the HTTPX request using the OCI signer.
74
+
75
+ Args:
76
+ request (httpx.Request): The outgoing HTTPX request to be signed.
77
+
78
+ Yields:
79
+ httpx.Request: The signed HTTPX request.
80
+ """
81
+ # Create a requests.Request object from the HTTPX request
82
+ req = requests.Request(
83
+ method=request.method,
84
+ url=str(request.url),
85
+ headers=dict(request.headers),
86
+ data=request.content,
87
+ )
88
+ prepared_request = req.prepare()
89
+
90
+ # Sign the request using the OCI Signer
91
+ self.signer.do_request_sign(prepared_request)
92
+
93
+ # Update the original HTTPX request with the signed headers
94
+ request.headers.update(prepared_request.headers)
95
+
96
+ # Proceed with the request
97
+ yield request
98
+
99
+
100
+ class ExtendedRequestError(Exception):
101
+ """
102
+ Custom exception for handling request errors with additional context.
103
+
104
+ Attributes:
105
+ original_exception (Exception): The original exception that caused the error.
106
+ response_text (str): The text of the response received from the request, if available.
107
+ """
108
+
109
+ def __init__(self, message: str, original_exception: Exception, response_text: str):
110
+ """
111
+ Initialize the ExtendedRequestError.
112
+
113
+ Args:
114
+ message (str): The error message associated with the exception.
115
+ original_exception (Exception): The original exception that caused the error.
116
+ response_text (str): The text of the response received from the request, if available.
117
+ """
118
+ super().__init__(message)
119
+ self.original_exception = original_exception
120
+ self.response_text = response_text
121
+
122
+
123
+ def _should_retry_exception(e: ExtendedRequestError) -> bool:
124
+ """
125
+ Determine whether the exception should trigger a retry.
126
+
127
+ Args:
128
+ e (ExtendedRequestError): The exception raised.
129
+
130
+ Returns:
131
+ bool: True if the exception should trigger a retry, False otherwise.
132
+ """
133
+ original_exception = e.original_exception if hasattr(e, "original_exception") else e
134
+ if isinstance(original_exception, httpx.HTTPStatusError):
135
+ return original_exception.response.status_code in STATUS_FORCE_LIST
136
+ elif isinstance(original_exception, httpx.RequestError):
137
+ return True
138
+ return False
139
+
140
+
141
+ def _create_retry_decorator(
142
+ max_retries: int,
143
+ backoff_factor: float,
144
+ random_exponential: bool = False,
145
+ stop_after_delay_seconds: Optional[float] = None,
146
+ min_seconds: float = 0,
147
+ max_seconds: float = 60,
148
+ ) -> Callable[[Any], Any]:
149
+ """
150
+ Create a tenacity retry decorator with the specified configuration.
151
+
152
+ Args:
153
+ max_retries (int): The maximum number of retry attempts.
154
+ backoff_factor (float): The backoff factor for calculating retry delays.
155
+ random_exponential (bool): Whether to use random exponential backoff.
156
+ stop_after_delay_seconds (Optional[float]): Maximum total time to retry.
157
+ min_seconds (float): Minimum wait time between retries.
158
+ max_seconds (float): Maximum wait time between retries.
159
+
160
+ Returns:
161
+ Callable[[Any], Any]: A tenacity retry decorator configured with the specified strategy.
162
+ """
163
+ wait_strategy = (
164
+ wait_random_exponential(min=min_seconds, max=max_seconds)
165
+ if random_exponential
166
+ else wait_exponential(
167
+ multiplier=backoff_factor, min=min_seconds, max=max_seconds
168
+ )
169
+ )
170
+
171
+ stop_strategy = stop_after_attempt(max_retries)
172
+ if stop_after_delay_seconds is not None:
173
+ stop_strategy = stop_strategy | stop_after_delay(stop_after_delay_seconds)
174
+
175
+ retry_strategy = retry_if_exception(_should_retry_exception)
176
+ return retry(
177
+ wait=wait_strategy,
178
+ stop=stop_strategy,
179
+ retry=retry_strategy,
180
+ reraise=True,
181
+ before_sleep=before_sleep_log(logger, logging.WARNING),
182
+ )
183
+
184
+
185
+ def _retry_decorator(f: Callable) -> Callable:
186
+ """
187
+ Decorator to apply retry logic to a function using tenacity.
188
+
189
+ Args:
190
+ f (Callable): The function to be decorated.
191
+
192
+ Returns:
193
+ Callable: The decorated function with retry logic applied.
194
+ """
195
+
196
+ @functools.wraps(f)
197
+ def wrapper(self, *args: Any, **kwargs: Any):
198
+ retries = getattr(self, "retries", DEFAULT_RETRIES)
199
+ if retries <= 0:
200
+ return f(self, *args, **kwargs)
201
+ backoff_factor = getattr(self, "backoff_factor", DEFAULT_BACKOFF_FACTOR)
202
+ retry_func = _create_retry_decorator(
203
+ max_retries=retries,
204
+ backoff_factor=backoff_factor,
205
+ random_exponential=False,
206
+ stop_after_delay_seconds=getattr(self, "timeout", TIMEOUT),
207
+ min_seconds=0,
208
+ max_seconds=60,
209
+ )
210
+
211
+ return retry_func(f)(self, *args, **kwargs)
212
+
213
+ return wrapper
214
+
215
+
216
+ class BaseClient:
217
+ """
218
+ Base class for invoking models via HTTP requests with retry logic.
219
+
220
+ Attributes:
221
+ endpoint (str): The URL endpoint to send the request.
222
+ auth (Any): The authentication signer for the requests.
223
+ retries (int): The number of retry attempts for the request.
224
+ backoff_factor (float): The factor to determine the delay between retries.
225
+ timeout (Union[float, Tuple[float, float]]): The timeout setting for the HTTP request.
226
+ kwargs (Dict): Additional keyword arguments.
227
+ """
228
+
229
+ def __init__(
230
+ self,
231
+ endpoint: str,
232
+ auth: Optional[Any] = None,
233
+ retries: Optional[int] = DEFAULT_RETRIES,
234
+ backoff_factor: Optional[float] = DEFAULT_BACKOFF_FACTOR,
235
+ timeout: Optional[Union[float, Tuple[float, float]]] = None,
236
+ **kwargs: Any,
237
+ ) -> None:
238
+ """
239
+ Initialize the BaseClient.
240
+
241
+ Args:
242
+ endpoint (str): The URL endpoint to send the request.
243
+ auth (Optional[Any]): The authentication signer for the requests.
244
+ retries (Optional[int]): The number of retry attempts for the request.
245
+ backoff_factor (Optional[float]): The factor to determine the delay between retries.
246
+ timeout (Optional[Union[float, Tuple[float, float]]]): The timeout setting for the HTTP request.
247
+ **kwargs: Additional keyword arguments.
248
+ """
249
+ self.endpoint = endpoint
250
+ self.retries = retries or DEFAULT_RETRIES
251
+ self.backoff_factor = backoff_factor or DEFAULT_BACKOFF_FACTOR
252
+ self.timeout = timeout or TIMEOUT
253
+ self.kwargs = kwargs
254
+
255
+ # Validate auth object
256
+ auth = auth or authutil.default_signer()
257
+ if not callable(auth.get("signer")):
258
+ raise ValueError("Auth object must have a 'signer' callable attribute.")
259
+ self.auth = OCIAuth(auth["signer"])
260
+
261
+ logger.debug(
262
+ f"Initialized {self.__class__.__name__} with endpoint={self.endpoint}, "
263
+ f"retries={self.retries}, backoff_factor={self.backoff_factor}, timeout={self.timeout}"
264
+ )
265
+
266
+ def _parse_streaming_line(
267
+ self, line: Union[bytes, str]
268
+ ) -> Optional[Dict[str, Any]]:
269
+ """
270
+ Parse a single line from the streaming response.
271
+
272
+ Args:
273
+ line (Union[bytes, str]): A line of the response in bytes or string format.
274
+
275
+ Returns:
276
+ Optional[Dict[str, Any]]: Parsed JSON object, or None if the line is to be ignored.
277
+
278
+ Raises:
279
+ Exception: Raised if the line contains an error object.
280
+ json.JSONDecodeError: Raised if the line cannot be decoded as JSON.
281
+ """
282
+ logger.debug(f"Parsing streaming line: {line}")
283
+
284
+ if isinstance(line, bytes):
285
+ line = line.decode(DEFAULT_ENCODING)
286
+
287
+ line = line.strip()
288
+
289
+ if line.lower().startswith("data:"):
290
+ line = line[5:].lstrip()
291
+
292
+ if not line or line.startswith("[DONE]"):
293
+ logger.debug("Received end of stream signal or empty line.")
294
+ return None
295
+
296
+ try:
297
+ json_line = json.loads(line)
298
+ logger.debug(f"Parsed JSON line: {json_line}")
299
+ except json.JSONDecodeError as e:
300
+ logger.debug(f"Error decoding JSON from line: {line}")
301
+ raise json.JSONDecodeError(
302
+ f"Error decoding JSON from line: {e!s}", e.doc, e.pos
303
+ ) from e
304
+
305
+ if json_line.get("object") == "error":
306
+ # Raise an error for error objects in the stream
307
+ error_message = json_line.get("message", "Unknown error")
308
+ logger.debug(f"Error in streaming response: {error_message}")
309
+ raise Exception(f"Error in streaming response: {error_message}")
310
+
311
+ return json_line
312
+
313
+ def _prepare_headers(
314
+ self,
315
+ stream: bool,
316
+ headers: Optional[Dict[str, str]] = None,
317
+ ) -> Dict[str, str]:
318
+ """
319
+ Construct and return the headers for a request.
320
+
321
+ Args:
322
+ stream (bool): Whether to use streaming for the response.
323
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
324
+
325
+ Returns:
326
+ Dict[str, str]: The prepared headers.
327
+ """
328
+ default_headers = {
329
+ "Content-Type": "application/json",
330
+ "Accept": "text/event-stream" if stream else "application/json",
331
+ }
332
+ if stream:
333
+ default_headers["enable-streaming"] = "true"
334
+ if headers:
335
+ default_headers.update(headers)
336
+
337
+ logger.debug(f"Prepared headers: {default_headers}")
338
+ return default_headers
339
+
340
+
341
+ class Client(BaseClient):
342
+ """
343
+ Synchronous HTTP client for invoking models with support for request and streaming APIs.
344
+ """
345
+
346
+ def __init__(self, *args, **kwargs) -> None:
347
+ """
348
+ Initialize the Client.
349
+
350
+ Args:
351
+ *args: Positional arguments forwarded to BaseClient.
352
+ **kwargs: Keyword arguments forwarded to BaseClient.
353
+ """
354
+ super().__init__(*args, **kwargs)
355
+ self._client = httpx.Client(timeout=self.timeout)
356
+
357
+ def is_closed(self) -> bool:
358
+ return self._client.is_closed
359
+
360
+ def close(self) -> None:
361
+ """Close the underlying HTTPX client."""
362
+ self._client.close()
363
+
364
+ def __enter__(self: _T) -> _T: # noqa: PYI019
365
+ return self
366
+
367
+ def __exit__(
368
+ self,
369
+ exc_type: Optional[Type[BaseException]] = None,
370
+ exc: Optional[BaseException] = None,
371
+ exc_tb: Optional[TracebackType] = None,
372
+ ) -> None:
373
+ self.close()
374
+
375
+ def __del__(self) -> None:
376
+ try:
377
+ self.close()
378
+ except Exception:
379
+ pass
380
+
381
+ @_retry_decorator
382
+ def _request(
383
+ self, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None
384
+ ) -> Dict[str, Any]:
385
+ """
386
+ Send a POST request to the configured endpoint with retry and error handling.
387
+
388
+ Args:
389
+ payload (Dict[str, Any]): Parameters for the request payload.
390
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
391
+
392
+ Returns:
393
+ Dict[str, Any]: Decoded JSON response.
394
+
395
+ Raises:
396
+ ExtendedRequestError: Raised when the request fails.
397
+ """
398
+ logger.debug(f"Starting synchronous request with payload: {payload}")
399
+ try:
400
+ response = self._client.post(
401
+ self.endpoint,
402
+ headers=self._prepare_headers(stream=False, headers=headers),
403
+ auth=self.auth,
404
+ json=payload,
405
+ )
406
+ logger.debug(f"Received response with status code: {response.status_code}")
407
+ response.raise_for_status()
408
+ json_response = response.json()
409
+ logger.debug(f"Response JSON: {json_response}")
410
+ return json_response
411
+ except Exception as e:
412
+ last_exception_text = (
413
+ e.response.text if hasattr(e, "response") and e.response else str(e)
414
+ )
415
+ logger.error(
416
+ f"Request failed. Error: {e!s}. Details: {last_exception_text}"
417
+ )
418
+ raise ExtendedRequestError(
419
+ f"Request failed: {e!s}. Details: {last_exception_text}",
420
+ e,
421
+ last_exception_text,
422
+ ) from e
423
+
424
+ def _stream(
425
+ self, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None
426
+ ) -> Iterator[Mapping[str, Any]]:
427
+ """
428
+ Send a POST request expecting a streaming response.
429
+
430
+ Args:
431
+ payload (Dict[str, Any]): Parameters for the request payload.
432
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
433
+
434
+ Yields:
435
+ Mapping[str, Any]: Decoded JSON response line-by-line.
436
+
437
+ Raises:
438
+ ExtendedRequestError: Raised when the request fails.
439
+ """
440
+ logger.debug(f"Starting synchronous streaming request with payload: {payload}")
441
+ last_exception_text = None
442
+
443
+ for attempt in range(1, self.retries + 2): # retries + initial attempt
444
+ logger.debug(f"Attempt {attempt} for synchronous streaming request.")
445
+ try:
446
+ with self._client.stream(
447
+ "POST",
448
+ self.endpoint,
449
+ headers=self._prepare_headers(stream=True, headers=headers),
450
+ auth=self.auth,
451
+ json={**payload, "stream": True},
452
+ ) as response:
453
+ try:
454
+ logger.debug(
455
+ f"Received streaming response with status code: {response.status_code}"
456
+ )
457
+ response.raise_for_status()
458
+ for line in response.iter_lines():
459
+ if not line: # Skip empty lines
460
+ continue
461
+
462
+ parsed_line = self._parse_streaming_line(line)
463
+ if parsed_line:
464
+ logger.debug(f"Yielding parsed line: {parsed_line}")
465
+ yield parsed_line
466
+ return
467
+ except Exception as e:
468
+ last_exception_text = (
469
+ e.response.read().decode(
470
+ e.response.encoding or DEFAULT_ENCODING
471
+ )
472
+ if hasattr(e, "response") and e.response
473
+ else str(e)
474
+ )
475
+ raise
476
+
477
+ except Exception as e:
478
+ if attempt <= self.retries and _should_retry_exception(e):
479
+ delay = self.backoff_factor * (2 ** (attempt - 1))
480
+ logger.warning(
481
+ f"Streaming attempt {attempt} failed: {e}. Retrying in {delay} seconds..."
482
+ )
483
+ time.sleep(delay)
484
+ else:
485
+ logger.error(
486
+ f"Streaming request failed. Error: {e!s}. Details: {last_exception_text}"
487
+ )
488
+ raise ExtendedRequestError(
489
+ f"Streaming request failed: {e!s}. Details: {last_exception_text}",
490
+ e,
491
+ last_exception_text,
492
+ ) from e
493
+
494
+ def generate(
495
+ self,
496
+ prompt: str,
497
+ payload: Optional[Dict[str, Any]] = None,
498
+ headers: Optional[Dict[str, str]] = None,
499
+ stream: bool = True,
500
+ ) -> Union[Dict[str, Any], Iterator[Mapping[str, Any]]]:
501
+ """
502
+ Generate text completion for the given prompt.
503
+
504
+ Args:
505
+ prompt (str): Input text prompt for the model.
506
+ payload (Optional[Dict[str, Any]]): Additional parameters for the request payload.
507
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
508
+ stream (bool): Whether to use streaming for the response.
509
+
510
+ Returns:
511
+ Union[Dict[str, Any], Iterator[Mapping[str, Any]]]: A full JSON response or an iterator for streaming responses.
512
+ """
513
+ logger.debug(f"Generating text with prompt: {prompt}, stream: {stream}")
514
+ payload = {**(payload or {}), "prompt": prompt}
515
+ headers = {"route": "/v1/completions", **(headers or {})}
516
+ if stream:
517
+ return self._stream(payload=payload, headers=headers)
518
+ return self._request(payload=payload, headers=headers)
519
+
520
+ def chat(
521
+ self,
522
+ messages: List[Dict[str, Any]],
523
+ payload: Optional[Dict[str, Any]] = None,
524
+ headers: Optional[Dict[str, str]] = None,
525
+ stream: bool = True,
526
+ ) -> Union[Dict[str, Any], Iterator[Mapping[str, Any]]]:
527
+ """
528
+ Perform a chat interaction with the model.
529
+
530
+ Args:
531
+ messages (List[Dict[str, Any]]): List of message dictionaries for chat interaction.
532
+ payload (Optional[Dict[str, Any]]): Additional parameters for the request payload.
533
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
534
+ stream (bool): Whether to use streaming for the response.
535
+
536
+ Returns:
537
+ Union[Dict[str, Any], Iterator[Mapping[str, Any]]]: A full JSON response or an iterator for streaming responses.
538
+ """
539
+ logger.debug(f"Starting chat with messages: {messages}, stream: {stream}")
540
+ payload = {**(payload or {}), "messages": messages}
541
+ headers = {"route": "/v1/chat/completions", **(headers or {})}
542
+ if stream:
543
+ return self._stream(payload=payload, headers=headers)
544
+ return self._request(payload=payload, headers=headers)
545
+
546
+ def embeddings(
547
+ self,
548
+ input: Union[str, Sequence[AnyStr]] = "",
549
+ payload: Optional[Dict[str, Any]] = None,
550
+ headers: Optional[Dict[str, str]] = None,
551
+ ) -> Union[Dict[str, Any], Iterator[Mapping[str, Any]]]:
552
+ """Generate embeddings by sending a request to the endpoint.
553
+
554
+ Args:
555
+ input (Union[str, Sequence[AnyStr]], optional): The input text or sequence of texts for which to generate embeddings.
556
+ Defaults to "".
557
+ payload (Optional[Dict[str, Any]], optional): Additional parameters to include in the request payload.
558
+ Defaults to None.
559
+ headers (Optional[Dict[str, str]], optional): HTTP headers to include in the request.
560
+ Defaults to None.
561
+
562
+ Returns:
563
+ Union[Dict[str, Any], Iterator[Mapping[str, Any]]]: The server's response, typically including the generated embeddings.
564
+ """
565
+ logger.debug(f"Generating embeddings with input: {input}, payload: {payload}")
566
+ payload = {**(payload or {}), "input": input}
567
+ return self._request(payload=payload, headers=headers)
568
+
569
+
570
+ class AsyncClient(BaseClient):
571
+ """
572
+ Asynchronous HTTP client for invoking models with support for request and streaming APIs, including retry logic.
573
+ """
574
+
575
+ def __init__(self, *args, **kwargs) -> None:
576
+ """
577
+ Initialize the AsyncClient.
578
+
579
+ Args:
580
+ *args: Positional arguments forwarded to BaseClient.
581
+ **kwargs: Keyword arguments forwarded to BaseClient.
582
+ """
583
+ super().__init__(*args, **kwargs)
584
+ self._client = httpx.AsyncClient(timeout=self.timeout)
585
+
586
+ def is_closed(self) -> bool:
587
+ return self._client.is_closed
588
+
589
+ async def close(self) -> None:
590
+ """Close the underlying HTTPX client.
591
+
592
+ The client will *not* be usable after this.
593
+ """
594
+ await self._client.aclose()
595
+
596
+ async def __aenter__(self: _T) -> _T: # noqa: PYI019
597
+ return self
598
+
599
+ async def __aexit__(
600
+ self,
601
+ exc_type: Optional[Type[BaseException]] = None,
602
+ exc: Optional[BaseException] = None,
603
+ exc_tb: Optional[TracebackType] = None,
604
+ ) -> None:
605
+ await self.close()
606
+
607
+ def __del__(self) -> None:
608
+ try:
609
+ if not self._client.is_closed:
610
+ loop = asyncio.get_event_loop()
611
+ if loop.is_running():
612
+ loop.create_task(self.close())
613
+ else:
614
+ loop.run_until_complete(self.close())
615
+ except Exception:
616
+ pass
617
+
618
+ @_retry_decorator
619
+ async def _request(
620
+ self, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None
621
+ ) -> Dict[str, Any]:
622
+ """
623
+ Send a POST request to the configured endpoint with retry and error handling.
624
+
625
+ Args:
626
+ payload (Dict[str, Any]): Parameters for the request payload.
627
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
628
+
629
+ Returns:
630
+ Dict[str, Any]: Decoded JSON response.
631
+
632
+ Raises:
633
+ ExtendedRequestError: Raised when the request fails.
634
+ """
635
+ logger.debug(f"Starting asynchronous request with payload: {payload}")
636
+ try:
637
+ response = await self._client.post(
638
+ self.endpoint,
639
+ headers=self._prepare_headers(stream=False, headers=headers),
640
+ auth=self.auth,
641
+ json=payload,
642
+ )
643
+ logger.debug(f"Received response with status code: {response.status_code}")
644
+ response.raise_for_status()
645
+ json_response = response.json()
646
+ logger.debug(f"Response JSON: {json_response}")
647
+ return json_response
648
+ except Exception as e:
649
+ last_exception_text = (
650
+ e.response.text if hasattr(e, "response") and e.response else str(e)
651
+ )
652
+ logger.error(
653
+ f"Request failed. Error: {e!s}. Details: {last_exception_text}"
654
+ )
655
+ raise ExtendedRequestError(
656
+ f"Request failed: {e!s}. Details: {last_exception_text}",
657
+ e,
658
+ last_exception_text,
659
+ ) from e
660
+
661
+ async def _stream(
662
+ self, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None
663
+ ) -> AsyncIterator[Mapping[str, Any]]:
664
+ """
665
+ Send a POST request expecting a streaming response with retry logic.
666
+
667
+ Args:
668
+ payload (Dict[str, Any]): Parameters for the request payload.
669
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
670
+
671
+ Yields:
672
+ Mapping[str, Any]: Decoded JSON response line-by-line.
673
+
674
+ Raises:
675
+ ExtendedRequestError: Raised when the request fails.
676
+ """
677
+ logger.debug(f"Starting asynchronous streaming request with payload: {payload}")
678
+ last_exception_text = None
679
+ for attempt in range(1, self.retries + 2): # retries + initial attempt
680
+ logger.debug(f"Attempt {attempt} for asynchronous streaming request.")
681
+ try:
682
+ async with self._client.stream(
683
+ "POST",
684
+ self.endpoint,
685
+ headers=self._prepare_headers(stream=True, headers=headers),
686
+ auth=self.auth,
687
+ json={**payload, "stream": True},
688
+ ) as response:
689
+ try:
690
+ logger.debug(
691
+ f"Received streaming response with status code: {response.status_code}"
692
+ )
693
+ response.raise_for_status()
694
+ async for line in response.aiter_lines():
695
+ if not line: # Skip empty lines
696
+ continue
697
+ parsed_line = self._parse_streaming_line(line)
698
+ if parsed_line:
699
+ logger.debug(f"Yielding parsed line: {parsed_line}")
700
+ yield parsed_line
701
+ return
702
+ except Exception as e:
703
+ if hasattr(e, "response") and e.response:
704
+ content = await e.response.aread()
705
+ last_exception_text = content.decode(
706
+ e.response.encoding or DEFAULT_ENCODING
707
+ )
708
+ raise
709
+ except Exception as e:
710
+ if attempt <= self.retries and _should_retry_exception(e):
711
+ delay = self.backoff_factor * (2 ** (attempt - 1))
712
+ logger.warning(
713
+ f"Streaming attempt {attempt} failed: {e}. Retrying in {delay} seconds..."
714
+ )
715
+ await asyncio.sleep(delay)
716
+ else:
717
+ logger.error(
718
+ f"Streaming request failed. Error: {e!s}. Details: {last_exception_text}"
719
+ )
720
+ raise ExtendedRequestError(
721
+ f"Streaming request failed: {e!s}. Details: {last_exception_text}",
722
+ e,
723
+ last_exception_text,
724
+ ) from e
725
+
726
+ async def generate(
727
+ self,
728
+ prompt: str,
729
+ payload: Optional[Dict[str, Any]] = None,
730
+ headers: Optional[Dict[str, str]] = None,
731
+ stream: bool = False,
732
+ ) -> Union[Dict[str, Any], AsyncIterator[Mapping[str, Any]]]:
733
+ """
734
+ Generate text completion for the given prompt.
735
+
736
+ Args:
737
+ prompt (str): Input text prompt for the model.
738
+ payload (Optional[Dict[str, Any]]): Additional parameters for the request payload.
739
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
740
+ stream (bool): Whether to use streaming for the response.
741
+
742
+ Returns:
743
+ Union[Dict[str, Any], AsyncIterator[Mapping[str, Any]]]: A full JSON response or an async iterator for streaming responses.
744
+ """
745
+ logger.debug(f"Generating text with prompt: {prompt}, stream: {stream}")
746
+ payload = {**(payload or {}), "prompt": prompt}
747
+ headers = {"route": "/v1/completions", **(headers or {})}
748
+ if stream:
749
+ return self._stream(payload=payload, headers=headers)
750
+ return await self._request(payload=payload, headers=headers)
751
+
752
+ async def chat(
753
+ self,
754
+ messages: List[Dict[str, Any]],
755
+ payload: Optional[Dict[str, Any]] = None,
756
+ headers: Optional[Dict[str, str]] = None,
757
+ stream: bool = False,
758
+ ) -> Union[Dict[str, Any], AsyncIterator[Mapping[str, Any]]]:
759
+ """
760
+ Perform a chat interaction with the model.
761
+
762
+ Args:
763
+ messages (List[Dict[str, Any]]): List of message dictionaries for chat interaction.
764
+ payload (Optional[Dict[str, Any]]): Additional parameters for the request payload.
765
+ headers (Optional[Dict[str, str]]): HTTP headers to include in the request.
766
+ stream (bool): Whether to use streaming for the response.
767
+
768
+ Returns:
769
+ Union[Dict[str, Any], AsyncIterator[Mapping[str, Any]]]: A full JSON response or an async iterator for streaming responses.
770
+ """
771
+ logger.debug(f"Starting chat with messages: {messages}, stream: {stream}")
772
+ payload = {**(payload or {}), "messages": messages}
773
+ headers = {"route": "/v1/chat/completions", **(headers or {})}
774
+ if stream:
775
+ return self._stream(payload=payload, headers=headers)
776
+ return await self._request(payload=payload, headers=headers)
777
+
778
+ async def embeddings(
779
+ self,
780
+ input: Union[str, Sequence[AnyStr]] = "",
781
+ payload: Optional[Dict[str, Any]] = None,
782
+ headers: Optional[Dict[str, str]] = None,
783
+ ) -> Union[Dict[str, Any], Iterator[Mapping[str, Any]]]:
784
+ """Generate embeddings asynchronously by sending a request to the endpoint.
785
+
786
+ Args:
787
+ input (Union[str, Sequence[AnyStr]], optional): The input text or sequence of texts for which to generate embeddings.
788
+ Defaults to "".
789
+ payload (Optional[Dict[str, Any]], optional): Additional parameters to include in the request payload.
790
+ Defaults to None.
791
+ headers (Optional[Dict[str, str]], optional): HTTP headers to include in the request.
792
+ Defaults to None.
793
+
794
+ Returns:
795
+ Union[Dict[str, Any], Iterator[Mapping[str, Any]]]: The server's response, typically including the generated embeddings.
796
+ """
797
+ logger.debug(f"Generating embeddings with input: {input}, payload: {payload}")
798
+ payload = {**(payload or {}), "input": input}
799
+ return await self._request(payload=payload, headers=headers)