llama-stack-client 0.0.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. llama_stack/__init__.py +95 -0
  2. llama_stack/_base_client.py +2019 -0
  3. llama_stack/_client.py +518 -0
  4. llama_stack/_compat.py +217 -0
  5. llama_stack/_constants.py +14 -0
  6. llama_stack/_exceptions.py +108 -0
  7. llama_stack/_files.py +123 -0
  8. llama_stack/_models.py +785 -0
  9. llama_stack/_qs.py +150 -0
  10. llama_stack/_resource.py +43 -0
  11. llama_stack/_response.py +823 -0
  12. llama_stack/_streaming.py +333 -0
  13. llama_stack/_types.py +217 -0
  14. llama_stack/_utils/__init__.py +55 -0
  15. llama_stack/_utils/_logs.py +25 -0
  16. llama_stack/_utils/_proxy.py +62 -0
  17. llama_stack/_utils/_reflection.py +42 -0
  18. llama_stack/_utils/_streams.py +12 -0
  19. llama_stack/_utils/_sync.py +81 -0
  20. llama_stack/_utils/_transform.py +382 -0
  21. llama_stack/_utils/_typing.py +120 -0
  22. llama_stack/_utils/_utils.py +397 -0
  23. llama_stack/_version.py +4 -0
  24. llama_stack/lib/.keep +4 -0
  25. llama_stack/py.typed +0 -0
  26. llama_stack/resources/__init__.py +173 -0
  27. llama_stack/resources/agentic_system/__init__.py +61 -0
  28. llama_stack/resources/agentic_system/agentic_system.py +338 -0
  29. llama_stack/resources/agentic_system/sessions.py +363 -0
  30. llama_stack/resources/agentic_system/steps.py +186 -0
  31. llama_stack/resources/agentic_system/turns.py +443 -0
  32. llama_stack/resources/batch_inference.py +315 -0
  33. llama_stack/resources/datasets.py +331 -0
  34. llama_stack/resources/evaluate/__init__.py +47 -0
  35. llama_stack/resources/evaluate/evaluate.py +135 -0
  36. llama_stack/resources/evaluate/jobs/__init__.py +61 -0
  37. llama_stack/resources/evaluate/jobs/artifacts.py +168 -0
  38. llama_stack/resources/evaluate/jobs/jobs.py +310 -0
  39. llama_stack/resources/evaluate/jobs/logs.py +168 -0
  40. llama_stack/resources/evaluate/jobs/status.py +168 -0
  41. llama_stack/resources/evaluate/question_answering.py +167 -0
  42. llama_stack/resources/evaluations.py +243 -0
  43. llama_stack/resources/inference/__init__.py +33 -0
  44. llama_stack/resources/inference/embeddings.py +178 -0
  45. llama_stack/resources/inference/inference.py +591 -0
  46. llama_stack/resources/memory_banks/__init__.py +33 -0
  47. llama_stack/resources/memory_banks/documents.py +268 -0
  48. llama_stack/resources/memory_banks/memory_banks.py +675 -0
  49. llama_stack/resources/post_training/__init__.py +33 -0
  50. llama_stack/resources/post_training/jobs.py +451 -0
  51. llama_stack/resources/post_training/post_training.py +365 -0
  52. llama_stack/resources/reward_scoring.py +178 -0
  53. llama_stack/resources/safety.py +179 -0
  54. llama_stack/resources/synthetic_data_generation.py +183 -0
  55. llama_stack/resources/telemetry.py +244 -0
  56. llama_stack/types/__init__.py +75 -0
  57. llama_stack/types/agentic_system/__init__.py +16 -0
  58. llama_stack/types/agentic_system/agentic_system_step.py +18 -0
  59. llama_stack/types/agentic_system/agentic_system_turn_stream_chunk.py +12 -0
  60. llama_stack/types/agentic_system/session.py +21 -0
  61. llama_stack/types/agentic_system/session_create_params.py +13 -0
  62. llama_stack/types/agentic_system/session_create_response.py +11 -0
  63. llama_stack/types/agentic_system/session_delete_params.py +13 -0
  64. llama_stack/types/agentic_system/session_retrieve_params.py +16 -0
  65. llama_stack/types/agentic_system/step_retrieve_params.py +15 -0
  66. llama_stack/types/agentic_system/turn.py +39 -0
  67. llama_stack/types/agentic_system/turn_create_params.py +36 -0
  68. llama_stack/types/agentic_system/turn_retrieve_params.py +13 -0
  69. llama_stack/types/agentic_system/turn_stream_event.py +98 -0
  70. llama_stack/types/agentic_system_create_params.py +191 -0
  71. llama_stack/types/agentic_system_create_response.py +11 -0
  72. llama_stack/types/agentic_system_delete_params.py +11 -0
  73. llama_stack/types/batch_chat_completion.py +12 -0
  74. llama_stack/types/batch_inference_chat_completion_params.py +57 -0
  75. llama_stack/types/batch_inference_completion_params.py +24 -0
  76. llama_stack/types/chat_completion_stream_chunk.py +41 -0
  77. llama_stack/types/completion_stream_chunk.py +17 -0
  78. llama_stack/types/custom_query_generator_config_param.py +11 -0
  79. llama_stack/types/dataset_create_params.py +15 -0
  80. llama_stack/types/dataset_delete_params.py +11 -0
  81. llama_stack/types/dataset_get_params.py +11 -0
  82. llama_stack/types/default_query_generator_config_param.py +13 -0
  83. llama_stack/types/evaluate/__init__.py +9 -0
  84. llama_stack/types/evaluate/evaluation_job_artifacts.py +11 -0
  85. llama_stack/types/evaluate/evaluation_job_log_stream.py +11 -0
  86. llama_stack/types/evaluate/evaluation_job_status.py +11 -0
  87. llama_stack/types/evaluate/job_cancel_params.py +11 -0
  88. llama_stack/types/evaluate/jobs/__init__.py +7 -0
  89. llama_stack/types/evaluate/jobs/artifact_list_params.py +11 -0
  90. llama_stack/types/evaluate/jobs/log_list_params.py +11 -0
  91. llama_stack/types/evaluate/jobs/status_list_params.py +11 -0
  92. llama_stack/types/evaluate/question_answering_create_params.py +12 -0
  93. llama_stack/types/evaluation_job.py +11 -0
  94. llama_stack/types/evaluation_summarization_params.py +12 -0
  95. llama_stack/types/evaluation_text_generation_params.py +12 -0
  96. llama_stack/types/inference/__init__.py +6 -0
  97. llama_stack/types/inference/embedding_create_params.py +14 -0
  98. llama_stack/types/inference/embeddings.py +11 -0
  99. llama_stack/types/inference_chat_completion_params.py +75 -0
  100. llama_stack/types/inference_chat_completion_response.py +20 -0
  101. llama_stack/types/inference_completion_params.py +26 -0
  102. llama_stack/types/inference_completion_response.py +20 -0
  103. llama_stack/types/inference_step.py +26 -0
  104. llama_stack/types/llm_query_generator_config_param.py +15 -0
  105. llama_stack/types/memory_bank_create_params.py +11 -0
  106. llama_stack/types/memory_bank_drop_params.py +11 -0
  107. llama_stack/types/memory_bank_drop_response.py +7 -0
  108. llama_stack/types/memory_bank_insert_params.py +26 -0
  109. llama_stack/types/memory_bank_query_params.py +16 -0
  110. llama_stack/types/memory_bank_retrieve_params.py +11 -0
  111. llama_stack/types/memory_bank_update_params.py +24 -0
  112. llama_stack/types/memory_banks/__init__.py +7 -0
  113. llama_stack/types/memory_banks/document_delete_params.py +14 -0
  114. llama_stack/types/memory_banks/document_retrieve_params.py +14 -0
  115. llama_stack/types/memory_banks/document_retrieve_response.py +17 -0
  116. llama_stack/types/memory_retrieval_step.py +25 -0
  117. llama_stack/types/post_training/__init__.py +11 -0
  118. llama_stack/types/post_training/job_artifacts_params.py +11 -0
  119. llama_stack/types/post_training/job_cancel_params.py +11 -0
  120. llama_stack/types/post_training/job_logs_params.py +11 -0
  121. llama_stack/types/post_training/job_status_params.py +11 -0
  122. llama_stack/types/post_training/post_training_job_artifacts.py +13 -0
  123. llama_stack/types/post_training/post_training_job_log_stream.py +13 -0
  124. llama_stack/types/post_training/post_training_job_status.py +25 -0
  125. llama_stack/types/post_training_job.py +11 -0
  126. llama_stack/types/post_training_preference_optimize_params.py +68 -0
  127. llama_stack/types/post_training_supervised_fine_tune_params.py +107 -0
  128. llama_stack/types/query_documents.py +21 -0
  129. llama_stack/types/rest_api_execution_config_param.py +20 -0
  130. llama_stack/types/reward_scoring.py +12 -0
  131. llama_stack/types/reward_scoring_score_params.py +35 -0
  132. llama_stack/types/safety_run_shields_params.py +23 -0
  133. llama_stack/types/safety_run_shields_response.py +12 -0
  134. llama_stack/types/scored_dialog_generations.py +28 -0
  135. llama_stack/types/shared/__init__.py +10 -0
  136. llama_stack/types/shared/attachment.py +13 -0
  137. llama_stack/types/shared/batch_completion.py +12 -0
  138. llama_stack/types/shared/completion_message.py +19 -0
  139. llama_stack/types/shared/sampling_params.py +22 -0
  140. llama_stack/types/shared/system_message.py +14 -0
  141. llama_stack/types/shared/tool_call.py +19 -0
  142. llama_stack/types/shared/tool_response_message.py +18 -0
  143. llama_stack/types/shared/user_message.py +16 -0
  144. llama_stack/types/shared_params/__init__.py +9 -0
  145. llama_stack/types/shared_params/attachment.py +14 -0
  146. llama_stack/types/shared_params/completion_message.py +20 -0
  147. llama_stack/types/shared_params/sampling_params.py +21 -0
  148. llama_stack/types/shared_params/system_message.py +14 -0
  149. llama_stack/types/shared_params/tool_call.py +23 -0
  150. llama_stack/types/shared_params/tool_response_message.py +18 -0
  151. llama_stack/types/shared_params/user_message.py +16 -0
  152. llama_stack/types/sheid_response.py +20 -0
  153. llama_stack/types/shield_call_step.py +24 -0
  154. llama_stack/types/shield_definition_param.py +28 -0
  155. llama_stack/types/synthetic_data_generation.py +14 -0
  156. llama_stack/types/synthetic_data_generation_generate_params.py +24 -0
  157. llama_stack/types/telemetry_get_trace_params.py +11 -0
  158. llama_stack/types/telemetry_get_trace_response.py +18 -0
  159. llama_stack/types/telemetry_log_params.py +94 -0
  160. llama_stack/types/token_log_probs.py +11 -0
  161. llama_stack/types/tool_execution_step.py +34 -0
  162. llama_stack/types/tool_param_definition_param.py +15 -0
  163. llama_stack/types/train_eval_dataset.py +16 -0
  164. llama_stack/types/train_eval_dataset_param.py +16 -0
  165. llama_stack_client-0.0.1a0.dist-info/METADATA +365 -0
  166. llama_stack_client-0.0.1a0.dist-info/RECORD +168 -0
  167. llama_stack_client-0.0.1a0.dist-info/WHEEL +4 -0
  168. llama_stack_client-0.0.1a0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,823 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import inspect
5
+ import logging
6
+ import datetime
7
+ import functools
8
+ from types import TracebackType
9
+ from typing import (
10
+ TYPE_CHECKING,
11
+ Any,
12
+ Union,
13
+ Generic,
14
+ TypeVar,
15
+ Callable,
16
+ Iterator,
17
+ AsyncIterator,
18
+ cast,
19
+ overload,
20
+ )
21
+ from typing_extensions import Awaitable, ParamSpec, override, get_origin
22
+
23
+ import anyio
24
+ import httpx
25
+ import pydantic
26
+
27
+ from ._types import NoneType
28
+ from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
29
+ from ._models import BaseModel, is_basemodel
30
+ from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
31
+ from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
32
+ from ._exceptions import LlamaStackError, APIResponseValidationError
33
+
34
+ if TYPE_CHECKING:
35
+ from ._models import FinalRequestOptions
36
+ from ._base_client import BaseClient
37
+
38
+
39
+ P = ParamSpec("P")
40
+ R = TypeVar("R")
41
+ _T = TypeVar("_T")
42
+ _APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
43
+ _AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
44
+
45
+ log: logging.Logger = logging.getLogger(__name__)
46
+
47
+
48
+ class BaseAPIResponse(Generic[R]):
49
+ _cast_to: type[R]
50
+ _client: BaseClient[Any, Any]
51
+ _parsed_by_type: dict[type[Any], Any]
52
+ _is_sse_stream: bool
53
+ _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
54
+ _options: FinalRequestOptions
55
+
56
+ http_response: httpx.Response
57
+
58
+ retries_taken: int
59
+ """The number of retries made. If no retries happened this will be `0`"""
60
+
61
+ def __init__(
62
+ self,
63
+ *,
64
+ raw: httpx.Response,
65
+ cast_to: type[R],
66
+ client: BaseClient[Any, Any],
67
+ stream: bool,
68
+ stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
69
+ options: FinalRequestOptions,
70
+ retries_taken: int = 0,
71
+ ) -> None:
72
+ self._cast_to = cast_to
73
+ self._client = client
74
+ self._parsed_by_type = {}
75
+ self._is_sse_stream = stream
76
+ self._stream_cls = stream_cls
77
+ self._options = options
78
+ self.http_response = raw
79
+ self.retries_taken = retries_taken
80
+
81
+ @property
82
+ def headers(self) -> httpx.Headers:
83
+ return self.http_response.headers
84
+
85
+ @property
86
+ def http_request(self) -> httpx.Request:
87
+ """Returns the httpx Request instance associated with the current response."""
88
+ return self.http_response.request
89
+
90
+ @property
91
+ def status_code(self) -> int:
92
+ return self.http_response.status_code
93
+
94
+ @property
95
+ def url(self) -> httpx.URL:
96
+ """Returns the URL for which the request was made."""
97
+ return self.http_response.url
98
+
99
+ @property
100
+ def method(self) -> str:
101
+ return self.http_request.method
102
+
103
+ @property
104
+ def http_version(self) -> str:
105
+ return self.http_response.http_version
106
+
107
+ @property
108
+ def elapsed(self) -> datetime.timedelta:
109
+ """The time taken for the complete request/response cycle to complete."""
110
+ return self.http_response.elapsed
111
+
112
+ @property
113
+ def is_closed(self) -> bool:
114
+ """Whether or not the response body has been closed.
115
+
116
+ If this is False then there is response data that has not been read yet.
117
+ You must either fully consume the response body or call `.close()`
118
+ before discarding the response to prevent resource leaks.
119
+ """
120
+ return self.http_response.is_closed
121
+
122
+ @override
123
+ def __repr__(self) -> str:
124
+ return (
125
+ f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
126
+ )
127
+
128
+ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
129
+ # unwrap `Annotated[T, ...]` -> `T`
130
+ if to and is_annotated_type(to):
131
+ to = extract_type_arg(to, 0)
132
+
133
+ if self._is_sse_stream:
134
+ if to:
135
+ if not is_stream_class_type(to):
136
+ raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
137
+
138
+ return cast(
139
+ _T,
140
+ to(
141
+ cast_to=extract_stream_chunk_type(
142
+ to,
143
+ failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
144
+ ),
145
+ response=self.http_response,
146
+ client=cast(Any, self._client),
147
+ ),
148
+ )
149
+
150
+ if self._stream_cls:
151
+ return cast(
152
+ R,
153
+ self._stream_cls(
154
+ cast_to=extract_stream_chunk_type(self._stream_cls),
155
+ response=self.http_response,
156
+ client=cast(Any, self._client),
157
+ ),
158
+ )
159
+
160
+ stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
161
+ if stream_cls is None:
162
+ raise MissingStreamClassError()
163
+
164
+ return cast(
165
+ R,
166
+ stream_cls(
167
+ cast_to=self._cast_to,
168
+ response=self.http_response,
169
+ client=cast(Any, self._client),
170
+ ),
171
+ )
172
+
173
+ cast_to = to if to is not None else self._cast_to
174
+
175
+ # unwrap `Annotated[T, ...]` -> `T`
176
+ if is_annotated_type(cast_to):
177
+ cast_to = extract_type_arg(cast_to, 0)
178
+
179
+ if cast_to is NoneType:
180
+ return cast(R, None)
181
+
182
+ response = self.http_response
183
+ if cast_to == str:
184
+ return cast(R, response.text)
185
+
186
+ if cast_to == bytes:
187
+ return cast(R, response.content)
188
+
189
+ if cast_to == int:
190
+ return cast(R, int(response.text))
191
+
192
+ if cast_to == float:
193
+ return cast(R, float(response.text))
194
+
195
+ origin = get_origin(cast_to) or cast_to
196
+
197
+ if origin == APIResponse:
198
+ raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
199
+
200
+ if inspect.isclass(origin) and issubclass(origin, httpx.Response):
201
+ # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
202
+ # and pass that class to our request functions. We cannot change the variance to be either
203
+ # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
204
+ # the response class ourselves but that is something that should be supported directly in httpx
205
+ # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
206
+ if cast_to != httpx.Response:
207
+ raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
208
+ return cast(R, response)
209
+
210
+ if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
211
+ raise TypeError(
212
+ "Pydantic models must subclass our base model type, e.g. `from llama_stack import BaseModel`"
213
+ )
214
+
215
+ if (
216
+ cast_to is not object
217
+ and not origin is list
218
+ and not origin is dict
219
+ and not origin is Union
220
+ and not issubclass(origin, BaseModel)
221
+ ):
222
+ raise RuntimeError(
223
+ f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
224
+ )
225
+
226
+ # split is required to handle cases where additional information is included
227
+ # in the response, e.g. application/json; charset=utf-8
228
+ content_type, *_ = response.headers.get("content-type", "*").split(";")
229
+ if content_type != "application/json":
230
+ if is_basemodel(cast_to):
231
+ try:
232
+ data = response.json()
233
+ except Exception as exc:
234
+ log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
235
+ else:
236
+ return self._client._process_response_data(
237
+ data=data,
238
+ cast_to=cast_to, # type: ignore
239
+ response=response,
240
+ )
241
+
242
+ if self._client._strict_response_validation:
243
+ raise APIResponseValidationError(
244
+ response=response,
245
+ message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
246
+ body=response.text,
247
+ )
248
+
249
+ # If the API responds with content that isn't JSON then we just return
250
+ # the (decoded) text without performing any parsing so that you can still
251
+ # handle the response however you need to.
252
+ return response.text # type: ignore
253
+
254
+ data = response.json()
255
+
256
+ return self._client._process_response_data(
257
+ data=data,
258
+ cast_to=cast_to, # type: ignore
259
+ response=response,
260
+ )
261
+
262
+
263
+ class APIResponse(BaseAPIResponse[R]):
264
+ @overload
265
+ def parse(self, *, to: type[_T]) -> _T: ...
266
+
267
+ @overload
268
+ def parse(self) -> R: ...
269
+
270
+ def parse(self, *, to: type[_T] | None = None) -> R | _T:
271
+ """Returns the rich python representation of this response's data.
272
+
273
+ For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
274
+
275
+ You can customise the type that the response is parsed into through
276
+ the `to` argument, e.g.
277
+
278
+ ```py
279
+ from llama_stack import BaseModel
280
+
281
+
282
+ class MyModel(BaseModel):
283
+ foo: str
284
+
285
+
286
+ obj = response.parse(to=MyModel)
287
+ print(obj.foo)
288
+ ```
289
+
290
+ We support parsing:
291
+ - `BaseModel`
292
+ - `dict`
293
+ - `list`
294
+ - `Union`
295
+ - `str`
296
+ - `int`
297
+ - `float`
298
+ - `httpx.Response`
299
+ """
300
+ cache_key = to if to is not None else self._cast_to
301
+ cached = self._parsed_by_type.get(cache_key)
302
+ if cached is not None:
303
+ return cached # type: ignore[no-any-return]
304
+
305
+ if not self._is_sse_stream:
306
+ self.read()
307
+
308
+ parsed = self._parse(to=to)
309
+ if is_given(self._options.post_parser):
310
+ parsed = self._options.post_parser(parsed)
311
+
312
+ self._parsed_by_type[cache_key] = parsed
313
+ return parsed
314
+
315
+ def read(self) -> bytes:
316
+ """Read and return the binary response content."""
317
+ try:
318
+ return self.http_response.read()
319
+ except httpx.StreamConsumed as exc:
320
+ # The default error raised by httpx isn't very
321
+ # helpful in our case so we re-raise it with
322
+ # a different error message.
323
+ raise StreamAlreadyConsumed() from exc
324
+
325
+ def text(self) -> str:
326
+ """Read and decode the response content into a string."""
327
+ self.read()
328
+ return self.http_response.text
329
+
330
+ def json(self) -> object:
331
+ """Read and decode the JSON response content."""
332
+ self.read()
333
+ return self.http_response.json()
334
+
335
+ def close(self) -> None:
336
+ """Close the response and release the connection.
337
+
338
+ Automatically called if the response body is read to completion.
339
+ """
340
+ self.http_response.close()
341
+
342
+ def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
343
+ """
344
+ A byte-iterator over the decoded response content.
345
+
346
+ This automatically handles gzip, deflate and brotli encoded responses.
347
+ """
348
+ for chunk in self.http_response.iter_bytes(chunk_size):
349
+ yield chunk
350
+
351
+ def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
352
+ """A str-iterator over the decoded response content
353
+ that handles both gzip, deflate, etc but also detects the content's
354
+ string encoding.
355
+ """
356
+ for chunk in self.http_response.iter_text(chunk_size):
357
+ yield chunk
358
+
359
+ def iter_lines(self) -> Iterator[str]:
360
+ """Like `iter_text()` but will only yield chunks for each line"""
361
+ for chunk in self.http_response.iter_lines():
362
+ yield chunk
363
+
364
+
365
+ class AsyncAPIResponse(BaseAPIResponse[R]):
366
+ @overload
367
+ async def parse(self, *, to: type[_T]) -> _T: ...
368
+
369
+ @overload
370
+ async def parse(self) -> R: ...
371
+
372
+ async def parse(self, *, to: type[_T] | None = None) -> R | _T:
373
+ """Returns the rich python representation of this response's data.
374
+
375
+ For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
376
+
377
+ You can customise the type that the response is parsed into through
378
+ the `to` argument, e.g.
379
+
380
+ ```py
381
+ from llama_stack import BaseModel
382
+
383
+
384
+ class MyModel(BaseModel):
385
+ foo: str
386
+
387
+
388
+ obj = response.parse(to=MyModel)
389
+ print(obj.foo)
390
+ ```
391
+
392
+ We support parsing:
393
+ - `BaseModel`
394
+ - `dict`
395
+ - `list`
396
+ - `Union`
397
+ - `str`
398
+ - `httpx.Response`
399
+ """
400
+ cache_key = to if to is not None else self._cast_to
401
+ cached = self._parsed_by_type.get(cache_key)
402
+ if cached is not None:
403
+ return cached # type: ignore[no-any-return]
404
+
405
+ if not self._is_sse_stream:
406
+ await self.read()
407
+
408
+ parsed = self._parse(to=to)
409
+ if is_given(self._options.post_parser):
410
+ parsed = self._options.post_parser(parsed)
411
+
412
+ self._parsed_by_type[cache_key] = parsed
413
+ return parsed
414
+
415
+ async def read(self) -> bytes:
416
+ """Read and return the binary response content."""
417
+ try:
418
+ return await self.http_response.aread()
419
+ except httpx.StreamConsumed as exc:
420
+ # the default error raised by httpx isn't very
421
+ # helpful in our case so we re-raise it with
422
+ # a different error message
423
+ raise StreamAlreadyConsumed() from exc
424
+
425
+ async def text(self) -> str:
426
+ """Read and decode the response content into a string."""
427
+ await self.read()
428
+ return self.http_response.text
429
+
430
+ async def json(self) -> object:
431
+ """Read and decode the JSON response content."""
432
+ await self.read()
433
+ return self.http_response.json()
434
+
435
+ async def close(self) -> None:
436
+ """Close the response and release the connection.
437
+
438
+ Automatically called if the response body is read to completion.
439
+ """
440
+ await self.http_response.aclose()
441
+
442
+ async def iter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
443
+ """
444
+ A byte-iterator over the decoded response content.
445
+
446
+ This automatically handles gzip, deflate and brotli encoded responses.
447
+ """
448
+ async for chunk in self.http_response.aiter_bytes(chunk_size):
449
+ yield chunk
450
+
451
+ async def iter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
452
+ """A str-iterator over the decoded response content
453
+ that handles both gzip, deflate, etc but also detects the content's
454
+ string encoding.
455
+ """
456
+ async for chunk in self.http_response.aiter_text(chunk_size):
457
+ yield chunk
458
+
459
+ async def iter_lines(self) -> AsyncIterator[str]:
460
+ """Like `iter_text()` but will only yield chunks for each line"""
461
+ async for chunk in self.http_response.aiter_lines():
462
+ yield chunk
463
+
464
+
465
+ class BinaryAPIResponse(APIResponse[bytes]):
466
+ """Subclass of APIResponse providing helpers for dealing with binary data.
467
+
468
+ Note: If you want to stream the response data instead of eagerly reading it
469
+ all at once then you should use `.with_streaming_response` when making
470
+ the API request, e.g. `.with_streaming_response.get_binary_response()`
471
+ """
472
+
473
+ def write_to_file(
474
+ self,
475
+ file: str | os.PathLike[str],
476
+ ) -> None:
477
+ """Write the output to the given file.
478
+
479
+ Accepts a filename or any path-like object, e.g. pathlib.Path
480
+
481
+ Note: if you want to stream the data to the file instead of writing
482
+ all at once then you should use `.with_streaming_response` when making
483
+ the API request, e.g. `.with_streaming_response.get_binary_response()`
484
+ """
485
+ with open(file, mode="wb") as f:
486
+ for data in self.iter_bytes():
487
+ f.write(data)
488
+
489
+
490
+ class AsyncBinaryAPIResponse(AsyncAPIResponse[bytes]):
491
+ """Subclass of APIResponse providing helpers for dealing with binary data.
492
+
493
+ Note: If you want to stream the response data instead of eagerly reading it
494
+ all at once then you should use `.with_streaming_response` when making
495
+ the API request, e.g. `.with_streaming_response.get_binary_response()`
496
+ """
497
+
498
+ async def write_to_file(
499
+ self,
500
+ file: str | os.PathLike[str],
501
+ ) -> None:
502
+ """Write the output to the given file.
503
+
504
+ Accepts a filename or any path-like object, e.g. pathlib.Path
505
+
506
+ Note: if you want to stream the data to the file instead of writing
507
+ all at once then you should use `.with_streaming_response` when making
508
+ the API request, e.g. `.with_streaming_response.get_binary_response()`
509
+ """
510
+ path = anyio.Path(file)
511
+ async with await path.open(mode="wb") as f:
512
+ async for data in self.iter_bytes():
513
+ await f.write(data)
514
+
515
+
516
+ class StreamedBinaryAPIResponse(APIResponse[bytes]):
517
+ def stream_to_file(
518
+ self,
519
+ file: str | os.PathLike[str],
520
+ *,
521
+ chunk_size: int | None = None,
522
+ ) -> None:
523
+ """Streams the output to the given file.
524
+
525
+ Accepts a filename or any path-like object, e.g. pathlib.Path
526
+ """
527
+ with open(file, mode="wb") as f:
528
+ for data in self.iter_bytes(chunk_size):
529
+ f.write(data)
530
+
531
+
532
+ class AsyncStreamedBinaryAPIResponse(AsyncAPIResponse[bytes]):
533
+ async def stream_to_file(
534
+ self,
535
+ file: str | os.PathLike[str],
536
+ *,
537
+ chunk_size: int | None = None,
538
+ ) -> None:
539
+ """Streams the output to the given file.
540
+
541
+ Accepts a filename or any path-like object, e.g. pathlib.Path
542
+ """
543
+ path = anyio.Path(file)
544
+ async with await path.open(mode="wb") as f:
545
+ async for data in self.iter_bytes(chunk_size):
546
+ await f.write(data)
547
+
548
+
549
+ class MissingStreamClassError(TypeError):
550
+ def __init__(self) -> None:
551
+ super().__init__(
552
+ "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `llama_stack._streaming` for reference",
553
+ )
554
+
555
+
556
+ class StreamAlreadyConsumed(LlamaStackError):
557
+ """
558
+ Attempted to read or stream content, but the content has already
559
+ been streamed.
560
+
561
+ This can happen if you use a method like `.iter_lines()` and then attempt
562
+ to read th entire response body afterwards, e.g.
563
+
564
+ ```py
565
+ response = await client.post(...)
566
+ async for line in response.iter_lines():
567
+ ... # do something with `line`
568
+
569
+ content = await response.read()
570
+ # ^ error
571
+ ```
572
+
573
+ If you want this behaviour you'll need to either manually accumulate the response
574
+ content or call `await response.read()` before iterating over the stream.
575
+ """
576
+
577
+ def __init__(self) -> None:
578
+ message = (
579
+ "Attempted to read or stream some content, but the content has "
580
+ "already been streamed. "
581
+ "This could be due to attempting to stream the response "
582
+ "content more than once."
583
+ "\n\n"
584
+ "You can fix this by manually accumulating the response content while streaming "
585
+ "or by calling `.read()` before starting to stream."
586
+ )
587
+ super().__init__(message)
588
+
589
+
590
+ class ResponseContextManager(Generic[_APIResponseT]):
591
+ """Context manager for ensuring that a request is not made
592
+ until it is entered and that the response will always be closed
593
+ when the context manager exits
594
+ """
595
+
596
+ def __init__(self, request_func: Callable[[], _APIResponseT]) -> None:
597
+ self._request_func = request_func
598
+ self.__response: _APIResponseT | None = None
599
+
600
+ def __enter__(self) -> _APIResponseT:
601
+ self.__response = self._request_func()
602
+ return self.__response
603
+
604
+ def __exit__(
605
+ self,
606
+ exc_type: type[BaseException] | None,
607
+ exc: BaseException | None,
608
+ exc_tb: TracebackType | None,
609
+ ) -> None:
610
+ if self.__response is not None:
611
+ self.__response.close()
612
+
613
+
614
+ class AsyncResponseContextManager(Generic[_AsyncAPIResponseT]):
615
+ """Context manager for ensuring that a request is not made
616
+ until it is entered and that the response will always be closed
617
+ when the context manager exits
618
+ """
619
+
620
+ def __init__(self, api_request: Awaitable[_AsyncAPIResponseT]) -> None:
621
+ self._api_request = api_request
622
+ self.__response: _AsyncAPIResponseT | None = None
623
+
624
+ async def __aenter__(self) -> _AsyncAPIResponseT:
625
+ self.__response = await self._api_request
626
+ return self.__response
627
+
628
+ async def __aexit__(
629
+ self,
630
+ exc_type: type[BaseException] | None,
631
+ exc: BaseException | None,
632
+ exc_tb: TracebackType | None,
633
+ ) -> None:
634
+ if self.__response is not None:
635
+ await self.__response.close()
636
+
637
+
638
+ def to_streamed_response_wrapper(func: Callable[P, R]) -> Callable[P, ResponseContextManager[APIResponse[R]]]:
639
+ """Higher order function that takes one of our bound API methods and wraps it
640
+ to support streaming and returning the raw `APIResponse` object directly.
641
+ """
642
+
643
+ @functools.wraps(func)
644
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[APIResponse[R]]:
645
+ extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
646
+ extra_headers[RAW_RESPONSE_HEADER] = "stream"
647
+
648
+ kwargs["extra_headers"] = extra_headers
649
+
650
+ make_request = functools.partial(func, *args, **kwargs)
651
+
652
+ return ResponseContextManager(cast(Callable[[], APIResponse[R]], make_request))
653
+
654
+ return wrapped
655
+
656
+
657
+ def async_to_streamed_response_wrapper(
658
+ func: Callable[P, Awaitable[R]],
659
+ ) -> Callable[P, AsyncResponseContextManager[AsyncAPIResponse[R]]]:
660
+ """Higher order function that takes one of our bound API methods and wraps it
661
+ to support streaming and returning the raw `APIResponse` object directly.
662
+ """
663
+
664
+ @functools.wraps(func)
665
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
666
+ extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
667
+ extra_headers[RAW_RESPONSE_HEADER] = "stream"
668
+
669
+ kwargs["extra_headers"] = extra_headers
670
+
671
+ make_request = func(*args, **kwargs)
672
+
673
+ return AsyncResponseContextManager(cast(Awaitable[AsyncAPIResponse[R]], make_request))
674
+
675
+ return wrapped
676
+
677
+
678
+ def to_custom_streamed_response_wrapper(
679
+ func: Callable[P, object],
680
+ response_cls: type[_APIResponseT],
681
+ ) -> Callable[P, ResponseContextManager[_APIResponseT]]:
682
+ """Higher order function that takes one of our bound API methods and an `APIResponse` class
683
+ and wraps the method to support streaming and returning the given response class directly.
684
+
685
+ Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
686
+ """
687
+
688
+ @functools.wraps(func)
689
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[_APIResponseT]:
690
+ extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
691
+ extra_headers[RAW_RESPONSE_HEADER] = "stream"
692
+ extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
693
+
694
+ kwargs["extra_headers"] = extra_headers
695
+
696
+ make_request = functools.partial(func, *args, **kwargs)
697
+
698
+ return ResponseContextManager(cast(Callable[[], _APIResponseT], make_request))
699
+
700
+ return wrapped
701
+
702
+
703
+ def async_to_custom_streamed_response_wrapper(
704
+ func: Callable[P, Awaitable[object]],
705
+ response_cls: type[_AsyncAPIResponseT],
706
+ ) -> Callable[P, AsyncResponseContextManager[_AsyncAPIResponseT]]:
707
+ """Higher order function that takes one of our bound API methods and an `APIResponse` class
708
+ and wraps the method to support streaming and returning the given response class directly.
709
+
710
+ Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
711
+ """
712
+
713
+ @functools.wraps(func)
714
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
715
+ extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
716
+ extra_headers[RAW_RESPONSE_HEADER] = "stream"
717
+ extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
718
+
719
+ kwargs["extra_headers"] = extra_headers
720
+
721
+ make_request = func(*args, **kwargs)
722
+
723
+ return AsyncResponseContextManager(cast(Awaitable[_AsyncAPIResponseT], make_request))
724
+
725
+ return wrapped
726
+
727
+
728
+ def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]:
729
+ """Higher order function that takes one of our bound API methods and wraps it
730
+ to support returning the raw `APIResponse` object directly.
731
+ """
732
+
733
+ @functools.wraps(func)
734
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
735
+ extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
736
+ extra_headers[RAW_RESPONSE_HEADER] = "raw"
737
+
738
+ kwargs["extra_headers"] = extra_headers
739
+
740
+ return cast(APIResponse[R], func(*args, **kwargs))
741
+
742
+ return wrapped
743
+
744
+
745
+ def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[AsyncAPIResponse[R]]]:
746
+ """Higher order function that takes one of our bound API methods and wraps it
747
+ to support returning the raw `APIResponse` object directly.
748
+ """
749
+
750
+ @functools.wraps(func)
751
+ async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
752
+ extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
753
+ extra_headers[RAW_RESPONSE_HEADER] = "raw"
754
+
755
+ kwargs["extra_headers"] = extra_headers
756
+
757
+ return cast(AsyncAPIResponse[R], await func(*args, **kwargs))
758
+
759
+ return wrapped
760
+
761
+
762
+ def to_custom_raw_response_wrapper(
763
+ func: Callable[P, object],
764
+ response_cls: type[_APIResponseT],
765
+ ) -> Callable[P, _APIResponseT]:
766
+ """Higher order function that takes one of our bound API methods and an `APIResponse` class
767
+ and wraps the method to support returning the given response class directly.
768
+
769
+ Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
770
+ """
771
+
772
+ @functools.wraps(func)
773
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
774
+ extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
775
+ extra_headers[RAW_RESPONSE_HEADER] = "raw"
776
+ extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
777
+
778
+ kwargs["extra_headers"] = extra_headers
779
+
780
+ return cast(_APIResponseT, func(*args, **kwargs))
781
+
782
+ return wrapped
783
+
784
+
785
+ def async_to_custom_raw_response_wrapper(
786
+ func: Callable[P, Awaitable[object]],
787
+ response_cls: type[_AsyncAPIResponseT],
788
+ ) -> Callable[P, Awaitable[_AsyncAPIResponseT]]:
789
+ """Higher order function that takes one of our bound API methods and an `APIResponse` class
790
+ and wraps the method to support returning the given response class directly.
791
+
792
+ Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
793
+ """
794
+
795
+ @functools.wraps(func)
796
+ def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
797
+ extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
798
+ extra_headers[RAW_RESPONSE_HEADER] = "raw"
799
+ extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
800
+
801
+ kwargs["extra_headers"] = extra_headers
802
+
803
+ return cast(Awaitable[_AsyncAPIResponseT], func(*args, **kwargs))
804
+
805
+ return wrapped
806
+
807
+
808
+ def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
809
+ """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
810
+
811
+ This also handles the case where a concrete subclass is given, e.g.
812
+ ```py
813
+ class MyResponse(APIResponse[bytes]):
814
+ ...
815
+
816
+ extract_response_type(MyResponse) -> bytes
817
+ ```
818
+ """
819
+ return extract_type_var_from_base(
820
+ typ,
821
+ generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse, AsyncAPIResponse)),
822
+ index=0,
823
+ )