hammad-python 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +177 -0
- hammad/{performance/imports.py → _internal.py} +7 -1
- hammad/cache/__init__.py +1 -1
- hammad/cli/__init__.py +3 -1
- hammad/cli/_runner.py +265 -0
- hammad/cli/animations.py +1 -1
- hammad/cli/plugins.py +133 -78
- hammad/cli/styles/__init__.py +1 -1
- hammad/cli/styles/utils.py +149 -3
- hammad/data/__init__.py +56 -29
- hammad/data/collections/__init__.py +27 -17
- hammad/data/collections/collection.py +205 -383
- hammad/data/collections/indexes/__init__.py +37 -0
- hammad/data/collections/indexes/qdrant/__init__.py +1 -0
- hammad/data/collections/indexes/qdrant/index.py +735 -0
- hammad/data/collections/indexes/qdrant/settings.py +94 -0
- hammad/data/collections/indexes/qdrant/utils.py +220 -0
- hammad/data/collections/indexes/tantivy/__init__.py +1 -0
- hammad/data/collections/indexes/tantivy/index.py +428 -0
- hammad/data/collections/indexes/tantivy/settings.py +51 -0
- hammad/data/collections/indexes/tantivy/utils.py +200 -0
- hammad/data/configurations/__init__.py +2 -2
- hammad/data/configurations/configuration.py +2 -2
- hammad/data/models/__init__.py +20 -9
- hammad/data/models/extensions/__init__.py +4 -0
- hammad/data/models/{pydantic → extensions/pydantic}/__init__.py +6 -19
- hammad/data/models/{pydantic → extensions/pydantic}/converters.py +143 -16
- hammad/data/models/{base/fields.py → fields.py} +1 -1
- hammad/data/models/{base/model.py → model.py} +1 -1
- hammad/data/models/{base/utils.py → utils.py} +1 -1
- hammad/data/sql/__init__.py +23 -0
- hammad/data/sql/database.py +578 -0
- hammad/data/sql/types.py +141 -0
- hammad/data/types/__init__.py +1 -3
- hammad/data/types/file.py +3 -3
- hammad/data/types/multimodal/__init__.py +2 -2
- hammad/data/types/multimodal/audio.py +2 -2
- hammad/data/types/multimodal/image.py +2 -2
- hammad/formatting/__init__.py +9 -27
- hammad/formatting/json/__init__.py +8 -2
- hammad/formatting/json/converters.py +7 -1
- hammad/formatting/text/__init__.py +1 -1
- hammad/formatting/yaml/__init__.py +1 -1
- hammad/genai/__init__.py +78 -0
- hammad/genai/agents/__init__.py +1 -0
- hammad/genai/agents/types/__init__.py +35 -0
- hammad/genai/agents/types/history.py +277 -0
- hammad/genai/agents/types/tool.py +490 -0
- hammad/genai/embedding_models/__init__.py +41 -0
- hammad/{ai/embeddings/client/litellm_embeddings_client.py → genai/embedding_models/embedding_model.py} +47 -142
- hammad/genai/embedding_models/embedding_model_name.py +77 -0
- hammad/genai/embedding_models/embedding_model_request.py +65 -0
- hammad/{ai/embeddings/types.py → genai/embedding_models/embedding_model_response.py} +3 -3
- hammad/genai/embedding_models/run.py +161 -0
- hammad/genai/language_models/__init__.py +35 -0
- hammad/genai/language_models/_streaming.py +622 -0
- hammad/genai/language_models/_types.py +276 -0
- hammad/genai/language_models/_utils/__init__.py +31 -0
- hammad/genai/language_models/_utils/_completions.py +131 -0
- hammad/genai/language_models/_utils/_messages.py +89 -0
- hammad/genai/language_models/_utils/_requests.py +202 -0
- hammad/genai/language_models/_utils/_structured_outputs.py +124 -0
- hammad/genai/language_models/language_model.py +734 -0
- hammad/genai/language_models/language_model_request.py +135 -0
- hammad/genai/language_models/language_model_response.py +219 -0
- hammad/genai/language_models/language_model_response_chunk.py +53 -0
- hammad/genai/language_models/run.py +530 -0
- hammad/genai/multimodal_models.py +48 -0
- hammad/genai/rerank_models.py +26 -0
- hammad/logging/__init__.py +1 -1
- hammad/logging/decorators.py +1 -1
- hammad/logging/logger.py +2 -2
- hammad/mcp/__init__.py +1 -1
- hammad/mcp/client/__init__.py +35 -0
- hammad/mcp/client/client.py +105 -4
- hammad/mcp/client/client_service.py +10 -3
- hammad/mcp/servers/__init__.py +24 -0
- hammad/{performance/runtime → runtime}/__init__.py +2 -2
- hammad/{performance/runtime → runtime}/decorators.py +1 -1
- hammad/{performance/runtime → runtime}/run.py +1 -1
- hammad/service/__init__.py +1 -1
- hammad/service/create.py +3 -8
- hammad/service/decorators.py +8 -8
- hammad/typing/__init__.py +28 -0
- hammad/web/__init__.py +3 -3
- hammad/web/http/client.py +1 -1
- hammad/web/models.py +53 -21
- hammad/web/search/client.py +99 -52
- hammad/web/utils.py +13 -13
- hammad_python-0.0.16.dist-info/METADATA +191 -0
- hammad_python-0.0.16.dist-info/RECORD +110 -0
- hammad/ai/__init__.py +0 -1
- hammad/ai/_utils.py +0 -142
- hammad/ai/completions/__init__.py +0 -45
- hammad/ai/completions/client.py +0 -684
- hammad/ai/completions/create.py +0 -710
- hammad/ai/completions/settings.py +0 -100
- hammad/ai/completions/types.py +0 -792
- hammad/ai/completions/utils.py +0 -486
- hammad/ai/embeddings/__init__.py +0 -35
- hammad/ai/embeddings/client/__init__.py +0 -1
- hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
- hammad/ai/embeddings/create.py +0 -159
- hammad/data/collections/base_collection.py +0 -58
- hammad/data/collections/searchable_collection.py +0 -556
- hammad/data/collections/vector_collection.py +0 -596
- hammad/data/databases/__init__.py +0 -21
- hammad/data/databases/database.py +0 -902
- hammad/data/models/base/__init__.py +0 -35
- hammad/data/models/pydantic/models/__init__.py +0 -28
- hammad/data/models/pydantic/models/arbitrary_model.py +0 -46
- hammad/data/models/pydantic/models/cacheable_model.py +0 -79
- hammad/data/models/pydantic/models/fast_model.py +0 -318
- hammad/data/models/pydantic/models/function_model.py +0 -176
- hammad/data/models/pydantic/models/subscriptable_model.py +0 -63
- hammad/performance/__init__.py +0 -36
- hammad/py.typed +0 -0
- hammad_python-0.0.14.dist-info/METADATA +0 -70
- hammad_python-0.0.14.dist-info/RECORD +0 -99
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,734 @@
|
|
1
|
+
"""hammad.genai.language_models.language_model"""
|
2
|
+
|
3
|
+
from typing import (
|
4
|
+
Any,
|
5
|
+
Callable,
|
6
|
+
List,
|
7
|
+
TypeVar,
|
8
|
+
Generic,
|
9
|
+
Union,
|
10
|
+
Optional,
|
11
|
+
Type,
|
12
|
+
overload,
|
13
|
+
Dict,
|
14
|
+
TYPE_CHECKING,
|
15
|
+
)
|
16
|
+
from typing_extensions import Literal
|
17
|
+
|
18
|
+
if TYPE_CHECKING:
|
19
|
+
from httpx import Timeout
|
20
|
+
|
21
|
+
from ._types import LanguageModelName, LanguageModelInstructorMode
|
22
|
+
from ._utils import (
|
23
|
+
parse_messages_input,
|
24
|
+
handle_completion_request_params,
|
25
|
+
handle_completion_response,
|
26
|
+
handle_structured_output_request_params,
|
27
|
+
prepare_response_model,
|
28
|
+
handle_structured_output_response,
|
29
|
+
format_tool_calls,
|
30
|
+
LanguageModelRequestBuilder,
|
31
|
+
)
|
32
|
+
from .language_model_request import LanguageModelRequest, LanguageModelMessagesParam
|
33
|
+
from .language_model_response import LanguageModelResponse
|
34
|
+
from ._streaming import Stream, AsyncStream
|
35
|
+
|
36
|
+
__all__ = [
|
37
|
+
"LanguageModel",
|
38
|
+
"LanguageModelError",
|
39
|
+
]
|
40
|
+
|
41
|
+
T = TypeVar("T")
|
42
|
+
|
43
|
+
|
44
|
+
class LanguageModelError(Exception):
|
45
|
+
"""Error raised when an error occurs during a language model operation."""
|
46
|
+
|
47
|
+
def __init__(self, message: str, *args: Any, **kwargs: Any):
|
48
|
+
super().__init__(message, *args, **kwargs)
|
49
|
+
self.message = message
|
50
|
+
self.args = args
|
51
|
+
self.kwargs = kwargs
|
52
|
+
|
53
|
+
|
54
|
+
class _AIProvider:
|
55
|
+
"""Provider for accessing litellm and instructor instances."""
|
56
|
+
|
57
|
+
_LITELLM = None
|
58
|
+
_INSTRUCTOR = None
|
59
|
+
|
60
|
+
@staticmethod
|
61
|
+
def get_litellm():
|
62
|
+
"""Returns the `litellm` module."""
|
63
|
+
if _AIProvider._LITELLM is None:
|
64
|
+
try:
|
65
|
+
import litellm
|
66
|
+
litellm.drop_params = True
|
67
|
+
litellm.modify_params = True
|
68
|
+
_AIProvider._LITELLM = litellm
|
69
|
+
|
70
|
+
# Rebuild LanguageModelResponse model now that litellm is available
|
71
|
+
LanguageModelResponse.model_rebuild()
|
72
|
+
except ImportError as e:
|
73
|
+
raise ImportError(
|
74
|
+
"Using the `hammad.ai.llms` extension requires the `litellm` package to be installed.\n"
|
75
|
+
"Please either install the `litellm` package, or install the `hammad.ai` extension with:\n"
|
76
|
+
"`pip install 'hammad-python[ai]'`"
|
77
|
+
) from e
|
78
|
+
return _AIProvider._LITELLM
|
79
|
+
|
80
|
+
@staticmethod
|
81
|
+
def get_instructor():
|
82
|
+
"""Returns the `instructor` module."""
|
83
|
+
if _AIProvider._INSTRUCTOR is None:
|
84
|
+
try:
|
85
|
+
import instructor
|
86
|
+
_AIProvider._INSTRUCTOR = instructor
|
87
|
+
except ImportError as e:
|
88
|
+
raise ImportError(
|
89
|
+
"Using the `hammad.ai.llms` extension requires the `instructor` package to be installed.\n"
|
90
|
+
"Please either install the `instructor` package, or install the `hammad.ai` extension with:\n"
|
91
|
+
"`pip install 'hammad-python[ai]'`"
|
92
|
+
) from e
|
93
|
+
return _AIProvider._INSTRUCTOR
|
94
|
+
|
95
|
+
|
96
|
+
class LanguageModel(Generic[T]):
|
97
|
+
"""A clean language model interface for generating responses with comprehensive
|
98
|
+
parameter handling and type safety."""
|
99
|
+
|
100
|
+
def __init__(
|
101
|
+
self,
|
102
|
+
model: LanguageModelName = "openai/gpt-4o-mini",
|
103
|
+
base_url: Optional[str] = None,
|
104
|
+
api_key: Optional[str] = None,
|
105
|
+
instructor_mode: LanguageModelInstructorMode = "tool_call",
|
106
|
+
):
|
107
|
+
"""Initialize the language model.
|
108
|
+
|
109
|
+
Args:
|
110
|
+
model: The model to use for requests
|
111
|
+
instructor_mode: Default instructor mode for structured outputs
|
112
|
+
"""
|
113
|
+
self.model = model
|
114
|
+
self.base_url = base_url
|
115
|
+
self.api_key = api_key
|
116
|
+
self.instructor_mode = instructor_mode
|
117
|
+
self._instructor_client = None
|
118
|
+
|
119
|
+
def _get_instructor_client(self, mode: Optional[LanguageModelInstructorMode] = None):
|
120
|
+
"""Get or create an instructor client with the specified mode."""
|
121
|
+
effective_mode = mode or self.instructor_mode
|
122
|
+
|
123
|
+
# Create a new client if mode changed or client doesn't exist
|
124
|
+
if (self._instructor_client is None or
|
125
|
+
getattr(self._instructor_client, '_mode', None) != effective_mode):
|
126
|
+
|
127
|
+
instructor = _AIProvider.get_instructor()
|
128
|
+
self._instructor_client = instructor.from_litellm(
|
129
|
+
completion=_AIProvider.get_litellm().completion,
|
130
|
+
mode=instructor.Mode(effective_mode)
|
131
|
+
)
|
132
|
+
self._instructor_client._mode = effective_mode
|
133
|
+
|
134
|
+
return self._instructor_client
|
135
|
+
|
136
|
+
def _get_async_instructor_client(self, mode: Optional[LanguageModelInstructorMode] = None):
|
137
|
+
"""Get or create an async instructor client with the specified mode."""
|
138
|
+
effective_mode = mode or self.instructor_mode
|
139
|
+
|
140
|
+
instructor = _AIProvider.get_instructor()
|
141
|
+
return instructor.from_litellm(
|
142
|
+
completion=_AIProvider.get_litellm().acompletion,
|
143
|
+
mode=instructor.Mode(effective_mode)
|
144
|
+
)
|
145
|
+
|
146
|
+
# Overloaded run methods for different return types
|
147
|
+
|
148
|
+
@overload
|
149
|
+
def run(
|
150
|
+
self,
|
151
|
+
messages: LanguageModelMessagesParam,
|
152
|
+
instructions: Optional[str] = None,
|
153
|
+
*,
|
154
|
+
stream: Literal[False] = False,
|
155
|
+
model: Optional[LanguageModelName | str] = None,
|
156
|
+
base_url: Optional[str] = None,
|
157
|
+
api_key: Optional[str] = None,
|
158
|
+
**kwargs: Any,
|
159
|
+
) -> LanguageModelResponse[str]: ...
|
160
|
+
|
161
|
+
@overload
|
162
|
+
def run(
|
163
|
+
self,
|
164
|
+
messages: LanguageModelMessagesParam,
|
165
|
+
instructions: Optional[str] = None,
|
166
|
+
*,
|
167
|
+
stream: Literal[False] = False,
|
168
|
+
model: Optional[LanguageModelName | str] = None,
|
169
|
+
base_url: Optional[str] = None,
|
170
|
+
api_key: Optional[str] = None,
|
171
|
+
temperature: Optional[float] = None,
|
172
|
+
max_tokens: Optional[int] = None,
|
173
|
+
top_p: Optional[float] = None,
|
174
|
+
tools: Optional[List[Any]] = None,
|
175
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
176
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
177
|
+
presence_penalty: Optional[float] = None,
|
178
|
+
frequency_penalty: Optional[float] = None,
|
179
|
+
seed: Optional[int] = None,
|
180
|
+
user: Optional[str] = None,
|
181
|
+
**kwargs: Any,
|
182
|
+
) -> LanguageModelResponse[str]: ...
|
183
|
+
|
184
|
+
@overload
|
185
|
+
def run(
|
186
|
+
self,
|
187
|
+
messages: LanguageModelMessagesParam,
|
188
|
+
instructions: Optional[str] = None,
|
189
|
+
*,
|
190
|
+
stream: Literal[True],
|
191
|
+
model: Optional[LanguageModelName | str] = None,
|
192
|
+
base_url: Optional[str] = None,
|
193
|
+
api_key: Optional[str] = None,
|
194
|
+
**kwargs: Any,
|
195
|
+
) -> Stream[str]: ...
|
196
|
+
|
197
|
+
@overload
|
198
|
+
def run(
|
199
|
+
self,
|
200
|
+
messages: LanguageModelMessagesParam,
|
201
|
+
instructions: Optional[str] = None,
|
202
|
+
*,
|
203
|
+
stream: Literal[True],
|
204
|
+
model: Optional[LanguageModelName | str] = None,
|
205
|
+
base_url: Optional[str] = None,
|
206
|
+
api_key: Optional[str] = None,
|
207
|
+
temperature: Optional[float] = None,
|
208
|
+
max_tokens: Optional[int] = None,
|
209
|
+
top_p: Optional[float] = None,
|
210
|
+
tools: Optional[List[Any]] = None,
|
211
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
212
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
213
|
+
presence_penalty: Optional[float] = None,
|
214
|
+
frequency_penalty: Optional[float] = None,
|
215
|
+
seed: Optional[int] = None,
|
216
|
+
user: Optional[str] = None,
|
217
|
+
**kwargs: Any,
|
218
|
+
) -> Stream[str]: ...
|
219
|
+
|
220
|
+
@overload
|
221
|
+
def run(
|
222
|
+
self,
|
223
|
+
messages: LanguageModelMessagesParam,
|
224
|
+
instructions: Optional[str] = None,
|
225
|
+
*,
|
226
|
+
type: Type[T],
|
227
|
+
stream: Literal[False] = False,
|
228
|
+
model: Optional[LanguageModelName | str] = None,
|
229
|
+
base_url: Optional[str] = None,
|
230
|
+
api_key: Optional[str] = None,
|
231
|
+
**kwargs: Any,
|
232
|
+
) -> LanguageModelResponse[T]: ...
|
233
|
+
|
234
|
+
@overload
|
235
|
+
def run(
|
236
|
+
self,
|
237
|
+
messages: LanguageModelMessagesParam,
|
238
|
+
instructions: Optional[str] = None,
|
239
|
+
*,
|
240
|
+
type: Type[T],
|
241
|
+
stream: Literal[False] = False,
|
242
|
+
model: Optional[LanguageModelName | str] = None,
|
243
|
+
base_url: Optional[str] = None,
|
244
|
+
api_key: Optional[str] = None,
|
245
|
+
temperature: Optional[float] = None,
|
246
|
+
max_tokens: Optional[int] = None,
|
247
|
+
top_p: Optional[float] = None,
|
248
|
+
instructor_mode: Optional[LanguageModelInstructorMode] = None,
|
249
|
+
response_field_name: Optional[str] = None,
|
250
|
+
response_field_instruction: Optional[str] = None,
|
251
|
+
response_model_name: Optional[str] = None,
|
252
|
+
max_retries: Optional[int] = None,
|
253
|
+
strict: Optional[bool] = None,
|
254
|
+
validation_context: Optional[Dict[str, Any]] = None,
|
255
|
+
context: Optional[Dict[str, Any]] = None,
|
256
|
+
completion_kwargs_hooks: Optional[List[Callable[..., None]]] = None,
|
257
|
+
completion_response_hooks: Optional[List[Callable[..., None]]] = None,
|
258
|
+
completion_error_hooks: Optional[List[Callable[..., None]]] = None,
|
259
|
+
completion_last_attempt_hooks: Optional[List[Callable[..., None]]] = None,
|
260
|
+
parse_error_hooks: Optional[List[Callable[..., None]]] = None,
|
261
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
262
|
+
presence_penalty: Optional[float] = None,
|
263
|
+
frequency_penalty: Optional[float] = None,
|
264
|
+
seed: Optional[int] = None,
|
265
|
+
user: Optional[str] = None,
|
266
|
+
**kwargs: Any,
|
267
|
+
) -> LanguageModelResponse[T]: ...
|
268
|
+
|
269
|
+
@overload
|
270
|
+
def run(
|
271
|
+
self,
|
272
|
+
messages: LanguageModelMessagesParam,
|
273
|
+
instructions: Optional[str] = None,
|
274
|
+
*,
|
275
|
+
type: Type[T],
|
276
|
+
stream: Literal[True],
|
277
|
+
model: Optional[LanguageModelName | str] = None,
|
278
|
+
base_url: Optional[str] = None,
|
279
|
+
api_key: Optional[str] = None,
|
280
|
+
**kwargs: Any,
|
281
|
+
) -> Stream[T]: ...
|
282
|
+
|
283
|
+
@overload
|
284
|
+
def run(
|
285
|
+
self,
|
286
|
+
messages: LanguageModelMessagesParam,
|
287
|
+
instructions: Optional[str] = None,
|
288
|
+
*,
|
289
|
+
type: Type[T],
|
290
|
+
stream: Literal[True],
|
291
|
+
model: Optional[LanguageModelName | str] = None,
|
292
|
+
base_url: Optional[str] = None,
|
293
|
+
api_key: Optional[str] = None,
|
294
|
+
temperature: Optional[float] = None,
|
295
|
+
max_tokens: Optional[int] = None,
|
296
|
+
top_p: Optional[float] = None,
|
297
|
+
instructor_mode: Optional[LanguageModelInstructorMode] = None,
|
298
|
+
response_field_name: Optional[str] = None,
|
299
|
+
response_field_instruction: Optional[str] = None,
|
300
|
+
response_model_name: Optional[str] = None,
|
301
|
+
max_retries: Optional[int] = None,
|
302
|
+
strict: Optional[bool] = None,
|
303
|
+
validation_context: Optional[Dict[str, Any]] = None,
|
304
|
+
context: Optional[Dict[str, Any]] = None,
|
305
|
+
completion_kwargs_hooks: Optional[List[Callable[..., None]]] = None,
|
306
|
+
completion_response_hooks: Optional[List[Callable[..., None]]] = None,
|
307
|
+
completion_error_hooks: Optional[List[Callable[..., None]]] = None,
|
308
|
+
completion_last_attempt_hooks: Optional[List[Callable[..., None]]] = None,
|
309
|
+
parse_error_hooks: Optional[List[Callable[..., None]]] = None,
|
310
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
311
|
+
presence_penalty: Optional[float] = None,
|
312
|
+
frequency_penalty: Optional[float] = None,
|
313
|
+
seed: Optional[int] = None,
|
314
|
+
user: Optional[str] = None,
|
315
|
+
**kwargs: Any,
|
316
|
+
) -> Stream[T]: ...
|
317
|
+
|
318
|
+
def run(
|
319
|
+
self,
|
320
|
+
messages: LanguageModelMessagesParam,
|
321
|
+
instructions: Optional[str] = None,
|
322
|
+
**kwargs: Any,
|
323
|
+
) -> Union[LanguageModelResponse[Any], Stream[Any]]:
|
324
|
+
"""Run a language model request.
|
325
|
+
|
326
|
+
Args:
|
327
|
+
messages: The input messages/content for the request
|
328
|
+
instructions: Optional system instructions to prepend
|
329
|
+
**kwargs: Additional request parameters
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
LanguageModelResponse or LanguageModelStream depending on parameters
|
333
|
+
"""
|
334
|
+
try:
|
335
|
+
# Extract model, base_url, and api_key from kwargs, using instance defaults
|
336
|
+
model = kwargs.pop("model", None) or self.model
|
337
|
+
base_url = kwargs.pop("base_url", None) or self.base_url
|
338
|
+
api_key = kwargs.pop("api_key", None) or self.api_key
|
339
|
+
|
340
|
+
# Add base_url and api_key to kwargs if they are set
|
341
|
+
if base_url is not None:
|
342
|
+
kwargs["base_url"] = base_url
|
343
|
+
if api_key is not None:
|
344
|
+
kwargs["api_key"] = api_key
|
345
|
+
|
346
|
+
# Create the request
|
347
|
+
request = LanguageModelRequestBuilder(
|
348
|
+
messages=messages,
|
349
|
+
instructions=instructions,
|
350
|
+
model=model,
|
351
|
+
**kwargs
|
352
|
+
)
|
353
|
+
|
354
|
+
# Parse messages
|
355
|
+
parsed_messages = parse_messages_input(request.messages, request.instructions)
|
356
|
+
parsed_messages = format_tool_calls(parsed_messages)
|
357
|
+
|
358
|
+
# Handle different request types
|
359
|
+
if request.is_structured_output():
|
360
|
+
return self._handle_structured_output_request(request, parsed_messages)
|
361
|
+
else:
|
362
|
+
return self._handle_completion_request(request, parsed_messages)
|
363
|
+
|
364
|
+
except Exception as e:
|
365
|
+
raise LanguageModelError(f"Error in language model request: {e}") from e
|
366
|
+
|
367
|
+
# Overloaded async_run methods for different return types
|
368
|
+
|
369
|
+
@overload
|
370
|
+
async def async_run(
|
371
|
+
self,
|
372
|
+
messages: LanguageModelMessagesParam,
|
373
|
+
instructions: Optional[str] = None,
|
374
|
+
*,
|
375
|
+
stream: Literal[False] = False,
|
376
|
+
model: Optional[LanguageModelName | str] = None,
|
377
|
+
base_url: Optional[str] = None,
|
378
|
+
api_key: Optional[str] = None,
|
379
|
+
**kwargs: Any,
|
380
|
+
) -> LanguageModelResponse[str]: ...
|
381
|
+
|
382
|
+
@overload
|
383
|
+
async def async_run(
|
384
|
+
self,
|
385
|
+
messages: LanguageModelMessagesParam,
|
386
|
+
instructions: Optional[str] = None,
|
387
|
+
*,
|
388
|
+
stream: Literal[False] = False,
|
389
|
+
model: Optional[LanguageModelName | str] = None,
|
390
|
+
base_url: Optional[str] = None,
|
391
|
+
api_key: Optional[str] = None,
|
392
|
+
temperature: Optional[float] = None,
|
393
|
+
max_tokens: Optional[int] = None,
|
394
|
+
top_p: Optional[float] = None,
|
395
|
+
tools: Optional[List[Any]] = None,
|
396
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
397
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
398
|
+
presence_penalty: Optional[float] = None,
|
399
|
+
frequency_penalty: Optional[float] = None,
|
400
|
+
seed: Optional[int] = None,
|
401
|
+
user: Optional[str] = None,
|
402
|
+
**kwargs: Any,
|
403
|
+
) -> LanguageModelResponse[str]: ...
|
404
|
+
|
405
|
+
@overload
|
406
|
+
async def async_run(
|
407
|
+
self,
|
408
|
+
messages: LanguageModelMessagesParam,
|
409
|
+
instructions: Optional[str] = None,
|
410
|
+
*,
|
411
|
+
stream: Literal[True],
|
412
|
+
model: Optional[LanguageModelName | str] = None,
|
413
|
+
base_url: Optional[str] = None,
|
414
|
+
api_key: Optional[str] = None,
|
415
|
+
**kwargs: Any,
|
416
|
+
) -> AsyncStream[str]: ...
|
417
|
+
|
418
|
+
@overload
|
419
|
+
async def async_run(
|
420
|
+
self,
|
421
|
+
messages: LanguageModelMessagesParam,
|
422
|
+
instructions: Optional[str] = None,
|
423
|
+
*,
|
424
|
+
stream: Literal[True],
|
425
|
+
model: Optional[LanguageModelName | str] = None,
|
426
|
+
base_url: Optional[str] = None,
|
427
|
+
api_key: Optional[str] = None,
|
428
|
+
temperature: Optional[float] = None,
|
429
|
+
max_tokens: Optional[int] = None,
|
430
|
+
top_p: Optional[float] = None,
|
431
|
+
tools: Optional[List[Any]] = None,
|
432
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
433
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
434
|
+
presence_penalty: Optional[float] = None,
|
435
|
+
frequency_penalty: Optional[float] = None,
|
436
|
+
seed: Optional[int] = None,
|
437
|
+
user: Optional[str] = None,
|
438
|
+
**kwargs: Any,
|
439
|
+
) -> AsyncStream[str]: ...
|
440
|
+
|
441
|
+
@overload
|
442
|
+
async def async_run(
|
443
|
+
self,
|
444
|
+
messages: LanguageModelMessagesParam,
|
445
|
+
instructions: Optional[str] = None,
|
446
|
+
*,
|
447
|
+
type: Type[T],
|
448
|
+
stream: Literal[False] = False,
|
449
|
+
model: Optional[LanguageModelName | str] = None,
|
450
|
+
base_url: Optional[str] = None,
|
451
|
+
api_key: Optional[str] = None,
|
452
|
+
**kwargs: Any,
|
453
|
+
) -> LanguageModelResponse[T]: ...
|
454
|
+
|
455
|
+
@overload
|
456
|
+
async def async_run(
|
457
|
+
self,
|
458
|
+
messages: LanguageModelMessagesParam,
|
459
|
+
instructions: Optional[str] = None,
|
460
|
+
*,
|
461
|
+
type: Type[T],
|
462
|
+
stream: Literal[False] = False,
|
463
|
+
model: Optional[LanguageModelName | str] = None,
|
464
|
+
base_url: Optional[str] = None,
|
465
|
+
api_key: Optional[str] = None,
|
466
|
+
temperature: Optional[float] = None,
|
467
|
+
max_tokens: Optional[int] = None,
|
468
|
+
top_p: Optional[float] = None,
|
469
|
+
instructor_mode: Optional[LanguageModelInstructorMode] = None,
|
470
|
+
response_field_name: Optional[str] = None,
|
471
|
+
response_field_instruction: Optional[str] = None,
|
472
|
+
response_model_name: Optional[str] = None,
|
473
|
+
max_retries: Optional[int] = None,
|
474
|
+
strict: Optional[bool] = None,
|
475
|
+
validation_context: Optional[Dict[str, Any]] = None,
|
476
|
+
context: Optional[Dict[str, Any]] = None,
|
477
|
+
completion_kwargs_hooks: Optional[List[Callable[..., None]]] = None,
|
478
|
+
completion_response_hooks: Optional[List[Callable[..., None]]] = None,
|
479
|
+
completion_error_hooks: Optional[List[Callable[..., None]]] = None,
|
480
|
+
completion_last_attempt_hooks: Optional[List[Callable[..., None]]] = None,
|
481
|
+
parse_error_hooks: Optional[List[Callable[..., None]]] = None,
|
482
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
483
|
+
presence_penalty: Optional[float] = None,
|
484
|
+
frequency_penalty: Optional[float] = None,
|
485
|
+
seed: Optional[int] = None,
|
486
|
+
user: Optional[str] = None,
|
487
|
+
**kwargs: Any,
|
488
|
+
) -> LanguageModelResponse[T]: ...
|
489
|
+
|
490
|
+
@overload
|
491
|
+
async def async_run(
|
492
|
+
self,
|
493
|
+
messages: LanguageModelMessagesParam,
|
494
|
+
instructions: Optional[str] = None,
|
495
|
+
*,
|
496
|
+
type: Type[T],
|
497
|
+
stream: Literal[True],
|
498
|
+
model: Optional[LanguageModelName | str] = None,
|
499
|
+
base_url: Optional[str] = None,
|
500
|
+
api_key: Optional[str] = None,
|
501
|
+
**kwargs: Any,
|
502
|
+
) -> AsyncStream[T]: ...
|
503
|
+
|
504
|
+
@overload
|
505
|
+
async def async_run(
|
506
|
+
self,
|
507
|
+
messages: LanguageModelMessagesParam,
|
508
|
+
instructions: Optional[str] = None,
|
509
|
+
*,
|
510
|
+
type: Type[T],
|
511
|
+
stream: Literal[True],
|
512
|
+
model: Optional[LanguageModelName | str] = None,
|
513
|
+
base_url: Optional[str] = None,
|
514
|
+
api_key: Optional[str] = None,
|
515
|
+
temperature: Optional[float] = None,
|
516
|
+
max_tokens: Optional[int] = None,
|
517
|
+
top_p: Optional[float] = None,
|
518
|
+
instructor_mode: Optional[LanguageModelInstructorMode] = None,
|
519
|
+
response_field_name: Optional[str] = None,
|
520
|
+
response_field_instruction: Optional[str] = None,
|
521
|
+
response_model_name: Optional[str] = None,
|
522
|
+
max_retries: Optional[int] = None,
|
523
|
+
strict: Optional[bool] = None,
|
524
|
+
validation_context: Optional[Dict[str, Any]] = None,
|
525
|
+
context: Optional[Dict[str, Any]] = None,
|
526
|
+
completion_kwargs_hooks: Optional[List[Callable[..., None]]] = None,
|
527
|
+
completion_response_hooks: Optional[List[Callable[..., None]]] = None,
|
528
|
+
completion_error_hooks: Optional[List[Callable[..., None]]] = None,
|
529
|
+
completion_last_attempt_hooks: Optional[List[Callable[..., None]]] = None,
|
530
|
+
parse_error_hooks: Optional[List[Callable[..., None]]] = None,
|
531
|
+
timeout: Optional[Union[float, str, "Timeout"]] = None,
|
532
|
+
presence_penalty: Optional[float] = None,
|
533
|
+
frequency_penalty: Optional[float] = None,
|
534
|
+
seed: Optional[int] = None,
|
535
|
+
user: Optional[str] = None,
|
536
|
+
**kwargs: Any,
|
537
|
+
) -> AsyncStream[T]: ...
|
538
|
+
|
539
|
+
async def async_run(
|
540
|
+
self,
|
541
|
+
messages: LanguageModelMessagesParam,
|
542
|
+
instructions: Optional[str] = None,
|
543
|
+
**kwargs: Any,
|
544
|
+
) -> Union[LanguageModelResponse[Any], AsyncStream[Any]]:
|
545
|
+
"""Run an async language model request.
|
546
|
+
|
547
|
+
Args:
|
548
|
+
messages: The input messages/content for the request
|
549
|
+
instructions: Optional system instructions to prepend
|
550
|
+
**kwargs: Additional request parameters
|
551
|
+
|
552
|
+
Returns:
|
553
|
+
LanguageModelResponse or LanguageModelAsyncStream depending on parameters
|
554
|
+
"""
|
555
|
+
try:
|
556
|
+
# Extract model, base_url, and api_key from kwargs, using instance defaults
|
557
|
+
model = kwargs.pop("model", None) or self.model
|
558
|
+
base_url = kwargs.pop("base_url", None) or self.base_url
|
559
|
+
api_key = kwargs.pop("api_key", None) or self.api_key
|
560
|
+
|
561
|
+
# Add base_url and api_key to kwargs if they are set
|
562
|
+
if base_url is not None:
|
563
|
+
kwargs["base_url"] = base_url
|
564
|
+
if api_key is not None:
|
565
|
+
kwargs["api_key"] = api_key
|
566
|
+
|
567
|
+
# Create the request
|
568
|
+
request = LanguageModelRequestBuilder(
|
569
|
+
messages=messages,
|
570
|
+
instructions=instructions,
|
571
|
+
model=model,
|
572
|
+
**kwargs
|
573
|
+
)
|
574
|
+
|
575
|
+
# Parse messages
|
576
|
+
parsed_messages = parse_messages_input(request.messages, request.instructions)
|
577
|
+
parsed_messages = format_tool_calls(parsed_messages)
|
578
|
+
|
579
|
+
# Handle different request types
|
580
|
+
if request.is_structured_output():
|
581
|
+
return await self._handle_async_structured_output_request(request, parsed_messages)
|
582
|
+
else:
|
583
|
+
return await self._handle_async_completion_request(request, parsed_messages)
|
584
|
+
|
585
|
+
except Exception as e:
|
586
|
+
raise LanguageModelError(f"Error in async language model request: {e}") from e
|
587
|
+
|
588
|
+
def _handle_completion_request(
|
589
|
+
self,
|
590
|
+
request: LanguageModelRequestBuilder,
|
591
|
+
parsed_messages: List[Any]
|
592
|
+
) -> Union[LanguageModelResponse[str], Stream[str]]:
|
593
|
+
"""Handle a standard completion request."""
|
594
|
+
# Get filtered parameters
|
595
|
+
params = handle_completion_request_params(request.get_completion_settings())
|
596
|
+
params["messages"] = parsed_messages
|
597
|
+
|
598
|
+
litellm = _AIProvider.get_litellm()
|
599
|
+
|
600
|
+
if request.is_streaming():
|
601
|
+
# Handle streaming - stream parameter is already in params
|
602
|
+
if "stream_options" not in params and "stream_options" in request.settings:
|
603
|
+
params["stream_options"] = request.settings["stream_options"]
|
604
|
+
stream = litellm.completion(**params)
|
605
|
+
return Stream(stream, output_type=str, model=request.model)
|
606
|
+
else:
|
607
|
+
# Handle non-streaming
|
608
|
+
response = litellm.completion(**params)
|
609
|
+
return handle_completion_response(response, request.model)
|
610
|
+
|
611
|
+
async def _handle_async_completion_request(
|
612
|
+
self,
|
613
|
+
request: LanguageModelRequestBuilder,
|
614
|
+
parsed_messages: List[Any]
|
615
|
+
) -> Union[LanguageModelResponse[str], AsyncStream[str]]:
|
616
|
+
"""Handle an async standard completion request."""
|
617
|
+
# Get filtered parameters
|
618
|
+
params = handle_completion_request_params(request.get_completion_settings())
|
619
|
+
params["messages"] = parsed_messages
|
620
|
+
|
621
|
+
litellm = _AIProvider.get_litellm()
|
622
|
+
|
623
|
+
if request.is_streaming():
|
624
|
+
# Handle streaming - stream parameter is already in params
|
625
|
+
if "stream_options" not in params and "stream_options" in request.settings:
|
626
|
+
params["stream_options"] = request.settings["stream_options"]
|
627
|
+
stream = await litellm.acompletion(**params)
|
628
|
+
return AsyncStream(stream, output_type=str, model=request.model)
|
629
|
+
else:
|
630
|
+
# Handle non-streaming
|
631
|
+
response = await litellm.acompletion(**params)
|
632
|
+
return handle_completion_response(response, request.model)
|
633
|
+
|
634
|
+
def _handle_structured_output_request(
|
635
|
+
self,
|
636
|
+
request: LanguageModelRequestBuilder,
|
637
|
+
parsed_messages: List[Any]
|
638
|
+
) -> Union[LanguageModelResponse[Any], Stream[Any]]:
|
639
|
+
"""Handle a structured output request."""
|
640
|
+
# Get filtered parameters
|
641
|
+
params = handle_structured_output_request_params(request.get_structured_output_settings())
|
642
|
+
params["messages"] = parsed_messages
|
643
|
+
|
644
|
+
# Prepare response model
|
645
|
+
response_model = prepare_response_model(
|
646
|
+
request.get_output_type(),
|
647
|
+
request.get_response_field_name(),
|
648
|
+
request.get_response_field_instruction(),
|
649
|
+
request.get_response_model_name(),
|
650
|
+
)
|
651
|
+
|
652
|
+
# Get instructor client
|
653
|
+
client = self._get_instructor_client(request.get_instructor_mode())
|
654
|
+
|
655
|
+
if request.is_streaming():
|
656
|
+
if isinstance(request.get_output_type(), list):
|
657
|
+
# Handle streaming - stream parameter is already in params
|
658
|
+
stream = client.chat.completions.create_iterable(
|
659
|
+
response_model=response_model,
|
660
|
+
max_retries=request.get_max_retries(),
|
661
|
+
strict=request.get_strict_mode(),
|
662
|
+
**params,
|
663
|
+
)
|
664
|
+
else:
|
665
|
+
# Handle streaming - stream parameter is already in params
|
666
|
+
stream = client.chat.completions.create_partial(
|
667
|
+
response_model=response_model,
|
668
|
+
max_retries=request.get_max_retries(),
|
669
|
+
strict=request.get_strict_mode(),
|
670
|
+
**params,
|
671
|
+
)
|
672
|
+
return Stream(stream, output_type=request.get_output_type(), model=request.model, response_field_name=request.get_response_field_name())
|
673
|
+
else:
|
674
|
+
# Handle non-streaming
|
675
|
+
response, completion = client.chat.completions.create_with_completion(
|
676
|
+
response_model=response_model,
|
677
|
+
max_retries=request.get_max_retries(),
|
678
|
+
strict=request.get_strict_mode(),
|
679
|
+
**params,
|
680
|
+
)
|
681
|
+
return handle_structured_output_response(
|
682
|
+
response, completion, request.model, request.get_output_type(), request.get_response_field_name()
|
683
|
+
)
|
684
|
+
|
685
|
+
async def _handle_async_structured_output_request(
|
686
|
+
self,
|
687
|
+
request: LanguageModelRequestBuilder,
|
688
|
+
parsed_messages: List[Any]
|
689
|
+
) -> Union[LanguageModelResponse[Any], AsyncStream[Any]]:
|
690
|
+
"""Handle an async structured output request."""
|
691
|
+
# Get filtered parameters
|
692
|
+
params = handle_structured_output_request_params(request.get_structured_output_settings())
|
693
|
+
params["messages"] = parsed_messages
|
694
|
+
|
695
|
+
# Prepare response model
|
696
|
+
response_model = prepare_response_model(
|
697
|
+
request.get_output_type(),
|
698
|
+
request.get_response_field_name(),
|
699
|
+
request.get_response_field_instruction(),
|
700
|
+
request.get_response_model_name(),
|
701
|
+
)
|
702
|
+
|
703
|
+
# Get async instructor client
|
704
|
+
client = self._get_async_instructor_client(request.get_instructor_mode())
|
705
|
+
|
706
|
+
if request.is_streaming():
|
707
|
+
if isinstance(request.get_output_type(), list):
|
708
|
+
# Handle streaming - stream parameter is already in params
|
709
|
+
stream = client.chat.completions.create_iterable(
|
710
|
+
response_model=response_model,
|
711
|
+
max_retries=request.get_max_retries(),
|
712
|
+
strict=request.get_strict_mode(),
|
713
|
+
**params,
|
714
|
+
)
|
715
|
+
else:
|
716
|
+
# Handle streaming - stream parameter is already in params
|
717
|
+
stream = client.chat.completions.create_partial(
|
718
|
+
response_model=response_model,
|
719
|
+
max_retries=request.get_max_retries(),
|
720
|
+
strict=request.get_strict_mode(),
|
721
|
+
**params,
|
722
|
+
)
|
723
|
+
return AsyncStream(stream, output_type=request.get_output_type(), model=request.model, response_field_name=request.get_response_field_name())
|
724
|
+
else:
|
725
|
+
# Handle non-streaming
|
726
|
+
response, completion = await client.chat.completions.create_with_completion(
|
727
|
+
response_model=response_model,
|
728
|
+
max_retries=request.get_max_retries(),
|
729
|
+
strict=request.get_strict_mode(),
|
730
|
+
**params,
|
731
|
+
)
|
732
|
+
return handle_structured_output_response(
|
733
|
+
response, completion, request.model, request.get_output_type(), request.get_response_field_name()
|
734
|
+
)
|