hammad-python 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +169 -56
- hammad/_core/__init__.py +1 -0
- hammad/_core/_utils/__init__.py +4 -0
- hammad/_core/_utils/_import_utils.py +182 -0
- hammad/ai/__init__.py +59 -0
- hammad/ai/_utils.py +142 -0
- hammad/ai/completions/__init__.py +44 -0
- hammad/ai/completions/client.py +729 -0
- hammad/ai/completions/create.py +686 -0
- hammad/ai/completions/types.py +711 -0
- hammad/ai/completions/utils.py +374 -0
- hammad/ai/embeddings/__init__.py +35 -0
- hammad/ai/embeddings/client/__init__.py +1 -0
- hammad/ai/embeddings/client/base_embeddings_client.py +26 -0
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +200 -0
- hammad/ai/embeddings/client/litellm_embeddings_client.py +288 -0
- hammad/ai/embeddings/create.py +159 -0
- hammad/ai/embeddings/types.py +69 -0
- hammad/base/__init__.py +35 -0
- hammad/{based → base}/fields.py +23 -23
- hammad/{based → base}/model.py +124 -14
- hammad/base/utils.py +280 -0
- hammad/cache/__init__.py +30 -12
- hammad/cache/base_cache.py +181 -0
- hammad/cache/cache.py +169 -0
- hammad/cache/decorators.py +261 -0
- hammad/cache/file_cache.py +80 -0
- hammad/cache/ttl_cache.py +74 -0
- hammad/cli/__init__.py +10 -2
- hammad/cli/{styles/animations.py → animations.py} +79 -23
- hammad/cli/{plugins/__init__.py → plugins.py} +85 -90
- hammad/cli/styles/__init__.py +50 -0
- hammad/cli/styles/settings.py +4 -0
- hammad/configuration/__init__.py +35 -0
- hammad/{data/types/files → configuration}/configuration.py +96 -7
- hammad/data/__init__.py +14 -26
- hammad/data/collections/__init__.py +4 -2
- hammad/data/collections/collection.py +300 -75
- hammad/data/collections/vector_collection.py +118 -12
- hammad/data/databases/__init__.py +2 -2
- hammad/data/databases/database.py +383 -32
- hammad/json/__init__.py +2 -2
- hammad/logging/__init__.py +13 -5
- hammad/logging/decorators.py +404 -2
- hammad/logging/logger.py +442 -22
- hammad/multimodal/__init__.py +24 -0
- hammad/{data/types/files → multimodal}/audio.py +21 -6
- hammad/{data/types/files → multimodal}/image.py +5 -5
- hammad/multithreading/__init__.py +304 -0
- hammad/pydantic/__init__.py +2 -2
- hammad/pydantic/converters.py +1 -1
- hammad/pydantic/models/__init__.py +2 -2
- hammad/text/__init__.py +59 -14
- hammad/text/converters.py +723 -0
- hammad/text/{utils/markdown/formatting.py → markdown.py} +25 -23
- hammad/text/text.py +12 -14
- hammad/types/__init__.py +11 -0
- hammad/{data/types/files → types}/file.py +18 -18
- hammad/typing/__init__.py +138 -84
- hammad/web/__init__.py +3 -2
- hammad/web/models.py +245 -0
- hammad/web/search/client.py +75 -23
- hammad/web/utils.py +14 -5
- hammad/yaml/__init__.py +2 -2
- hammad/yaml/converters.py +1 -1
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/METADATA +4 -1
- hammad_python-0.0.13.dist-info/RECORD +85 -0
- hammad/based/__init__.py +0 -52
- hammad/based/utils.py +0 -455
- hammad/cache/_cache.py +0 -746
- hammad/data/types/__init__.py +0 -33
- hammad/data/types/files/__init__.py +0 -1
- hammad/data/types/files/document.py +0 -195
- hammad/text/utils/__init__.py +0 -1
- hammad/text/utils/converters.py +0 -229
- hammad/text/utils/markdown/__init__.py +0 -1
- hammad/text/utils/markdown/converters.py +0 -506
- hammad_python-0.0.11.dist-info/RECORD +0 -65
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,729 @@
|
|
1
|
+
"""hammad.ai.completions.client"""
|
2
|
+
|
3
|
+
from httpx import Timeout
|
4
|
+
from typing import Any, Dict, List, Generic, Literal, TypeVar, Optional, Union, Type
|
5
|
+
import sys
|
6
|
+
|
7
|
+
if sys.version_info >= (3, 12):
|
8
|
+
from typing import TypedDict, Required, NotRequired
|
9
|
+
else:
|
10
|
+
from typing_extensions import TypedDict, Required, NotRequired
|
11
|
+
|
12
|
+
try:
|
13
|
+
from openai.types.chat import (
|
14
|
+
ChatCompletionModality,
|
15
|
+
ChatCompletionPredictionContentParam,
|
16
|
+
ChatCompletionAudioParam,
|
17
|
+
)
|
18
|
+
except ImportError:
|
19
|
+
raise ImportError(
|
20
|
+
"Using the `hammad.ai.completions` extension requires the `openai` package to be installed.\n"
|
21
|
+
"Please either install the `openai` package, or install the `hammad.ai` extension with:\n"
|
22
|
+
"`pip install 'hammad-python[ai]'"
|
23
|
+
)
|
24
|
+
|
25
|
+
from ...pydantic.converters import convert_to_pydantic_model
|
26
|
+
from .._utils import get_litellm, get_instructor
|
27
|
+
from ...base.model import Model
|
28
|
+
from ...typing import is_pydantic_basemodel
|
29
|
+
from .utils import (
|
30
|
+
format_tool_calls,
|
31
|
+
parse_completions_input,
|
32
|
+
convert_response_to_completion,
|
33
|
+
create_async_completion_stream,
|
34
|
+
create_completion_stream,
|
35
|
+
)
|
36
|
+
from .types import (
|
37
|
+
CompletionsInputParam,
|
38
|
+
CompletionsOutputType,
|
39
|
+
Completion,
|
40
|
+
CompletionChunk,
|
41
|
+
CompletionStream,
|
42
|
+
AsyncCompletionStream,
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
class OpenAIWebSearchUserLocationApproximate(TypedDict):
|
47
|
+
city: str
|
48
|
+
country: str
|
49
|
+
region: str
|
50
|
+
timezone: str
|
51
|
+
|
52
|
+
|
53
|
+
class OpenAIWebSearchUserLocation(TypedDict):
|
54
|
+
approximate: OpenAIWebSearchUserLocationApproximate
|
55
|
+
type: Literal["approximate"]
|
56
|
+
|
57
|
+
|
58
|
+
class OpenAIWebSearchOptions(TypedDict, total=False):
|
59
|
+
search_context_size: Optional[Literal["low", "medium", "high"]]
|
60
|
+
user_location: Optional[OpenAIWebSearchUserLocation]
|
61
|
+
|
62
|
+
|
63
|
+
class AnthropicThinkingParam(TypedDict, total=False):
|
64
|
+
type: Literal["enabled"]
|
65
|
+
budget_tokens: int
|
66
|
+
|
67
|
+
|
68
|
+
InstructorModeParam = Literal[
|
69
|
+
"function_call",
|
70
|
+
"parallel_tool_call",
|
71
|
+
"tool_call",
|
72
|
+
"tools_strict",
|
73
|
+
"json_mode",
|
74
|
+
"json_o1",
|
75
|
+
"markdown_json_mode",
|
76
|
+
"json_schema_mode",
|
77
|
+
"anthropic_tools",
|
78
|
+
"anthropic_reasoning_tools",
|
79
|
+
"anthropic_json",
|
80
|
+
"mistral_tools",
|
81
|
+
"mistral_structured_outputs",
|
82
|
+
"vertexai_tools",
|
83
|
+
"vertexai_json",
|
84
|
+
"vertexai_parallel_tools",
|
85
|
+
"gemini_json",
|
86
|
+
"gemini_tools",
|
87
|
+
"genai_tools",
|
88
|
+
"genai_structured_outputs",
|
89
|
+
"cohere_tools",
|
90
|
+
"cohere_json_object",
|
91
|
+
"cerebras_tools",
|
92
|
+
"cerebras_json",
|
93
|
+
"fireworks_tools",
|
94
|
+
"fireworks_json",
|
95
|
+
"writer_tools",
|
96
|
+
"bedrock_tools",
|
97
|
+
"bedrock_json",
|
98
|
+
"perplexity_json",
|
99
|
+
"openrouter_structured_outputs",
|
100
|
+
]
|
101
|
+
"""Instructor prompt/parsing mode for structured outputs."""
|
102
|
+
|
103
|
+
|
104
|
+
class CompletionsSettings(TypedDict):
|
105
|
+
"""Accepted settings for the `litellm` completion function."""
|
106
|
+
|
107
|
+
model: str
|
108
|
+
messages: List
|
109
|
+
timeout: Optional[Union[float, str, Timeout]]
|
110
|
+
temperature: Optional[float]
|
111
|
+
top_p: Optional[float]
|
112
|
+
n: Optional[int]
|
113
|
+
stream: Optional[bool]
|
114
|
+
stream_options: Optional[Dict[str, Any]]
|
115
|
+
stop: Optional[str]
|
116
|
+
max_completion_tokens: Optional[int]
|
117
|
+
max_tokens: Optional[int]
|
118
|
+
modalities: Optional[List[ChatCompletionModality]]
|
119
|
+
prediction: Optional[ChatCompletionPredictionContentParam]
|
120
|
+
audio: Optional[ChatCompletionAudioParam]
|
121
|
+
presence_penalty: Optional[float]
|
122
|
+
frequency_penalty: Optional[float]
|
123
|
+
logit_bias: Optional[Dict[str, float]]
|
124
|
+
user: Optional[str]
|
125
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]]
|
126
|
+
# NOTE: response_format is not used within the `completions` resource
|
127
|
+
# in place of `instructor` and the `type` parameter
|
128
|
+
seed: Optional[int]
|
129
|
+
tools: Optional[List]
|
130
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]]
|
131
|
+
logprobs: Optional[bool]
|
132
|
+
top_logprobs: Optional[int]
|
133
|
+
parallel_tool_calls: Optional[bool]
|
134
|
+
web_search_options: Optional[OpenAIWebSearchOptions]
|
135
|
+
deployment_id: Optional[str]
|
136
|
+
extra_headers: Optional[Dict[str, str]]
|
137
|
+
base_url: Optional[str]
|
138
|
+
functions: Optional[List]
|
139
|
+
function_call: Optional[str]
|
140
|
+
# set api_base, api_version, api_key
|
141
|
+
api_version: Optional[str]
|
142
|
+
api_key: Optional[str]
|
143
|
+
model_list: Optional[list]
|
144
|
+
# Optional liteLLM function params
|
145
|
+
thinking: Optional[AnthropicThinkingParam]
|
146
|
+
|
147
|
+
|
148
|
+
class CompletionsError(Exception):
|
149
|
+
"""Error raised when an error occurs during a completion."""
|
150
|
+
|
151
|
+
def __init__(
|
152
|
+
self,
|
153
|
+
message: str,
|
154
|
+
*args: Any,
|
155
|
+
**kwargs: Any,
|
156
|
+
):
|
157
|
+
super().__init__(message, *args, **kwargs)
|
158
|
+
self.message = message
|
159
|
+
self.args = args
|
160
|
+
self.kwargs = kwargs
|
161
|
+
|
162
|
+
|
163
|
+
class CompletionsClient(Generic[CompletionsOutputType]):
|
164
|
+
"""Client for working with language model completions and structured
|
165
|
+
outputs using the `litellm` and `instructor` libraries."""
|
166
|
+
|
167
|
+
@staticmethod
|
168
|
+
async def async_chat_completion(
|
169
|
+
messages: CompletionsInputParam,
|
170
|
+
instructions: Optional[str] = None,
|
171
|
+
model: str = "openai/gpt-4o-mini",
|
172
|
+
*,
|
173
|
+
timeout: Optional[Union[float, str, Timeout]] = None,
|
174
|
+
temperature: Optional[float] = None,
|
175
|
+
top_p: Optional[float] = None,
|
176
|
+
n: Optional[int] = None,
|
177
|
+
stream: Optional[bool] = None,
|
178
|
+
stream_options: Optional[Dict[str, Any]] = None,
|
179
|
+
stop: Optional[str] = None,
|
180
|
+
max_completion_tokens: Optional[int] = None,
|
181
|
+
max_tokens: Optional[int] = None,
|
182
|
+
modalities: Optional[List[ChatCompletionModality]] = None,
|
183
|
+
prediction: Optional[ChatCompletionPredictionContentParam] = None,
|
184
|
+
audio: Optional[ChatCompletionAudioParam] = None,
|
185
|
+
presence_penalty: Optional[float] = None,
|
186
|
+
frequency_penalty: Optional[float] = None,
|
187
|
+
logit_bias: Optional[Dict[str, float]] = None,
|
188
|
+
user: Optional[str] = None,
|
189
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
|
190
|
+
# NOTE: response_format is not used within the `completions` resource
|
191
|
+
# in place of `instructor` and the `type` parameter
|
192
|
+
seed: Optional[int] = None,
|
193
|
+
tools: Optional[List] = None,
|
194
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
195
|
+
logprobs: Optional[bool] = None,
|
196
|
+
top_logprobs: Optional[int] = None,
|
197
|
+
parallel_tool_calls: Optional[bool] = None,
|
198
|
+
web_search_options: Optional[OpenAIWebSearchOptions] = None,
|
199
|
+
deployment_id: Optional[str] = None,
|
200
|
+
extra_headers: Optional[Dict[str, str]] = None,
|
201
|
+
base_url: Optional[str] = None,
|
202
|
+
functions: Optional[List] = None,
|
203
|
+
function_call: Optional[str] = None,
|
204
|
+
# set api_base, api_version, api_key
|
205
|
+
api_version: Optional[str] = None,
|
206
|
+
api_key: Optional[str] = None,
|
207
|
+
model_list: Optional[list] = None,
|
208
|
+
# Optional liteLLM function params
|
209
|
+
thinking: Optional[AnthropicThinkingParam] = None,
|
210
|
+
):
|
211
|
+
try:
|
212
|
+
parsed_messages = parse_completions_input(messages, instructions)
|
213
|
+
except Exception as e:
|
214
|
+
raise CompletionsError(
|
215
|
+
f"Error parsing completions input: {e}",
|
216
|
+
input=messages,
|
217
|
+
) from e
|
218
|
+
|
219
|
+
params: CompletionsSettings = {
|
220
|
+
"model": model,
|
221
|
+
"messages": parsed_messages,
|
222
|
+
"timeout": timeout,
|
223
|
+
"temperature": temperature,
|
224
|
+
"top_p": top_p,
|
225
|
+
"n": n,
|
226
|
+
"stop": stop,
|
227
|
+
"max_completion_tokens": max_completion_tokens,
|
228
|
+
"max_tokens": max_tokens,
|
229
|
+
"modalities": modalities,
|
230
|
+
"prediction": prediction,
|
231
|
+
"audio": audio,
|
232
|
+
"presence_penalty": presence_penalty,
|
233
|
+
"frequency_penalty": frequency_penalty,
|
234
|
+
"logit_bias": logit_bias,
|
235
|
+
"user": user,
|
236
|
+
"reasoning_effort": reasoning_effort,
|
237
|
+
"seed": seed,
|
238
|
+
"tools": tools,
|
239
|
+
"tool_choice": tool_choice,
|
240
|
+
"logprobs": logprobs,
|
241
|
+
"top_logprobs": top_logprobs,
|
242
|
+
"parallel_tool_calls": parallel_tool_calls,
|
243
|
+
"web_search_options": web_search_options,
|
244
|
+
"deployment_id": deployment_id,
|
245
|
+
"extra_headers": extra_headers,
|
246
|
+
"base_url": base_url,
|
247
|
+
"functions": functions,
|
248
|
+
"function_call": function_call,
|
249
|
+
"api_version": api_version,
|
250
|
+
"api_key": api_key,
|
251
|
+
"model_list": model_list,
|
252
|
+
"thinking": thinking,
|
253
|
+
}
|
254
|
+
|
255
|
+
if not stream:
|
256
|
+
response = await get_litellm().acompletion(
|
257
|
+
**{k: v for k, v in params.items() if v is not None}
|
258
|
+
)
|
259
|
+
return convert_response_to_completion(response)
|
260
|
+
else:
|
261
|
+
stream = await get_litellm().acompletion(
|
262
|
+
**{k: v for k, v in params.items() if v is not None},
|
263
|
+
stream=True,
|
264
|
+
stream_options=stream_options if stream_options else None,
|
265
|
+
)
|
266
|
+
return create_async_completion_stream(stream, output_type=str, model=model)
|
267
|
+
|
268
|
+
@staticmethod
|
269
|
+
def chat_completion(
|
270
|
+
messages: CompletionsInputParam,
|
271
|
+
instructions: Optional[str] = None,
|
272
|
+
model: str = "openai/gpt-4o-mini",
|
273
|
+
*,
|
274
|
+
timeout: Optional[Union[float, str, Timeout]] = None,
|
275
|
+
temperature: Optional[float] = None,
|
276
|
+
top_p: Optional[float] = None,
|
277
|
+
n: Optional[int] = None,
|
278
|
+
stream: Optional[bool] = None,
|
279
|
+
stream_options: Optional[Dict[str, Any]] = None,
|
280
|
+
stop: Optional[str] = None,
|
281
|
+
max_completion_tokens: Optional[int] = None,
|
282
|
+
max_tokens: Optional[int] = None,
|
283
|
+
modalities: Optional[List[ChatCompletionModality]] = None,
|
284
|
+
prediction: Optional[ChatCompletionPredictionContentParam] = None,
|
285
|
+
audio: Optional[ChatCompletionAudioParam] = None,
|
286
|
+
presence_penalty: Optional[float] = None,
|
287
|
+
frequency_penalty: Optional[float] = None,
|
288
|
+
logit_bias: Optional[Dict[str, float]] = None,
|
289
|
+
user: Optional[str] = None,
|
290
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
|
291
|
+
# NOTE: response_format is not used within the `completions` resource
|
292
|
+
# in place of `instructor` and the `type` parameter
|
293
|
+
seed: Optional[int] = None,
|
294
|
+
tools: Optional[List] = None,
|
295
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
296
|
+
logprobs: Optional[bool] = None,
|
297
|
+
top_logprobs: Optional[int] = None,
|
298
|
+
parallel_tool_calls: Optional[bool] = None,
|
299
|
+
web_search_options: Optional[OpenAIWebSearchOptions] = None,
|
300
|
+
deployment_id: Optional[str] = None,
|
301
|
+
extra_headers: Optional[Dict[str, str]] = None,
|
302
|
+
base_url: Optional[str] = None,
|
303
|
+
functions: Optional[List] = None,
|
304
|
+
function_call: Optional[str] = None,
|
305
|
+
# set api_base, api_version, api_key
|
306
|
+
api_version: Optional[str] = None,
|
307
|
+
api_key: Optional[str] = None,
|
308
|
+
model_list: Optional[list] = None,
|
309
|
+
# Optional liteLLM function params
|
310
|
+
thinking: Optional[AnthropicThinkingParam] = None,
|
311
|
+
):
|
312
|
+
try:
|
313
|
+
parsed_messages = parse_completions_input(messages, instructions)
|
314
|
+
except Exception as e:
|
315
|
+
raise CompletionsError(
|
316
|
+
f"Error parsing completions input: {e}",
|
317
|
+
input=messages,
|
318
|
+
) from e
|
319
|
+
|
320
|
+
params: CompletionsSettings = {
|
321
|
+
"model": model,
|
322
|
+
"messages": parsed_messages,
|
323
|
+
"timeout": timeout,
|
324
|
+
"temperature": temperature,
|
325
|
+
"top_p": top_p,
|
326
|
+
"n": n,
|
327
|
+
"stop": stop,
|
328
|
+
"max_completion_tokens": max_completion_tokens,
|
329
|
+
"max_tokens": max_tokens,
|
330
|
+
"modalities": modalities,
|
331
|
+
"prediction": prediction,
|
332
|
+
"audio": audio,
|
333
|
+
"presence_penalty": presence_penalty,
|
334
|
+
"frequency_penalty": frequency_penalty,
|
335
|
+
"logit_bias": logit_bias,
|
336
|
+
"user": user,
|
337
|
+
"reasoning_effort": reasoning_effort,
|
338
|
+
"seed": seed,
|
339
|
+
"tools": tools,
|
340
|
+
"tool_choice": tool_choice,
|
341
|
+
"logprobs": logprobs,
|
342
|
+
"top_logprobs": top_logprobs,
|
343
|
+
"parallel_tool_calls": parallel_tool_calls,
|
344
|
+
"web_search_options": web_search_options,
|
345
|
+
"deployment_id": deployment_id,
|
346
|
+
"extra_headers": extra_headers,
|
347
|
+
"base_url": base_url,
|
348
|
+
"functions": functions,
|
349
|
+
"function_call": function_call,
|
350
|
+
"api_version": api_version,
|
351
|
+
"api_key": api_key,
|
352
|
+
"model_list": model_list,
|
353
|
+
"thinking": thinking,
|
354
|
+
}
|
355
|
+
|
356
|
+
if not stream:
|
357
|
+
response = get_litellm().completion(
|
358
|
+
**{k: v for k, v in params.items() if v is not None}
|
359
|
+
)
|
360
|
+
return convert_response_to_completion(response)
|
361
|
+
else:
|
362
|
+
stream = get_litellm().completion(
|
363
|
+
**{k: v for k, v in params.items() if v is not None},
|
364
|
+
stream=True,
|
365
|
+
stream_options=stream_options if stream_options else None,
|
366
|
+
)
|
367
|
+
return create_completion_stream(stream, output_type=str, model=model)
|
368
|
+
|
369
|
+
@staticmethod
|
370
|
+
async def async_structured_output(
|
371
|
+
messages: CompletionsInputParam,
|
372
|
+
instructions: Optional[str] = None,
|
373
|
+
model: str = "openai/gpt-4o-mini",
|
374
|
+
type: CompletionsOutputType = str,
|
375
|
+
instructor_mode: InstructorModeParam = "tool_call",
|
376
|
+
max_retries: int = 3,
|
377
|
+
strict: bool = True,
|
378
|
+
*,
|
379
|
+
timeout: Optional[Union[float, str, Timeout]] = None,
|
380
|
+
temperature: Optional[float] = None,
|
381
|
+
top_p: Optional[float] = None,
|
382
|
+
n: Optional[int] = None,
|
383
|
+
stream: Optional[bool] = None,
|
384
|
+
stream_options: Optional[Dict[str, Any]] = None,
|
385
|
+
stop: Optional[str] = None,
|
386
|
+
max_completion_tokens: Optional[int] = None,
|
387
|
+
max_tokens: Optional[int] = None,
|
388
|
+
modalities: Optional[List[ChatCompletionModality]] = None,
|
389
|
+
prediction: Optional[ChatCompletionPredictionContentParam] = None,
|
390
|
+
audio: Optional[ChatCompletionAudioParam] = None,
|
391
|
+
presence_penalty: Optional[float] = None,
|
392
|
+
frequency_penalty: Optional[float] = None,
|
393
|
+
logit_bias: Optional[Dict[str, float]] = None,
|
394
|
+
user: Optional[str] = None,
|
395
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
|
396
|
+
# NOTE: response_format is not used within the `completions` resource
|
397
|
+
# in place of `instructor` and the `type` parameter
|
398
|
+
seed: Optional[int] = None,
|
399
|
+
tools: Optional[List] = None,
|
400
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
401
|
+
logprobs: Optional[bool] = None,
|
402
|
+
top_logprobs: Optional[int] = None,
|
403
|
+
parallel_tool_calls: Optional[bool] = None,
|
404
|
+
web_search_options: Optional[OpenAIWebSearchOptions] = None,
|
405
|
+
deployment_id: Optional[str] = None,
|
406
|
+
extra_headers: Optional[Dict[str, str]] = None,
|
407
|
+
base_url: Optional[str] = None,
|
408
|
+
functions: Optional[List] = None,
|
409
|
+
function_call: Optional[str] = None,
|
410
|
+
# set api_base, api_version, api_key
|
411
|
+
api_version: Optional[str] = None,
|
412
|
+
api_key: Optional[str] = None,
|
413
|
+
model_list: Optional[list] = None,
|
414
|
+
# Optional liteLLM function params
|
415
|
+
thinking: Optional[AnthropicThinkingParam] = None,
|
416
|
+
):
|
417
|
+
try:
|
418
|
+
parsed_messages = parse_completions_input(messages, instructions)
|
419
|
+
except Exception as e:
|
420
|
+
raise CompletionsError(
|
421
|
+
f"Error parsing completions input: {e}",
|
422
|
+
input=messages,
|
423
|
+
) from e
|
424
|
+
|
425
|
+
parsed_messages = format_tool_calls(parsed_messages)
|
426
|
+
|
427
|
+
params: CompletionsSettings = {
|
428
|
+
"model": model,
|
429
|
+
"messages": parsed_messages,
|
430
|
+
"timeout": timeout,
|
431
|
+
"temperature": temperature,
|
432
|
+
"top_p": top_p,
|
433
|
+
"n": n,
|
434
|
+
"stop": stop,
|
435
|
+
"max_completion_tokens": max_completion_tokens,
|
436
|
+
"max_tokens": max_tokens,
|
437
|
+
"modalities": modalities,
|
438
|
+
"prediction": prediction,
|
439
|
+
"audio": audio,
|
440
|
+
"presence_penalty": presence_penalty,
|
441
|
+
"frequency_penalty": frequency_penalty,
|
442
|
+
"logit_bias": logit_bias,
|
443
|
+
"user": user,
|
444
|
+
"reasoning_effort": reasoning_effort,
|
445
|
+
"seed": seed,
|
446
|
+
"tools": tools,
|
447
|
+
"tool_choice": tool_choice,
|
448
|
+
"logprobs": logprobs,
|
449
|
+
"top_logprobs": top_logprobs,
|
450
|
+
"parallel_tool_calls": parallel_tool_calls,
|
451
|
+
"web_search_options": web_search_options,
|
452
|
+
"deployment_id": deployment_id,
|
453
|
+
"extra_headers": extra_headers,
|
454
|
+
"base_url": base_url,
|
455
|
+
"functions": functions,
|
456
|
+
"function_call": function_call,
|
457
|
+
"api_version": api_version,
|
458
|
+
"api_key": api_key,
|
459
|
+
"model_list": model_list,
|
460
|
+
"thinking": thinking,
|
461
|
+
}
|
462
|
+
|
463
|
+
if type is str:
|
464
|
+
return await CompletionsClient.async_chat_completion(
|
465
|
+
messages=messages,
|
466
|
+
instructions=instructions,
|
467
|
+
model=model,
|
468
|
+
timeout=timeout,
|
469
|
+
temperature=temperature,
|
470
|
+
top_p=top_p,
|
471
|
+
n=n,
|
472
|
+
stream=stream,
|
473
|
+
stream_options=stream_options,
|
474
|
+
stop=stop,
|
475
|
+
max_completion_tokens=max_completion_tokens,
|
476
|
+
max_tokens=max_tokens,
|
477
|
+
modalities=modalities,
|
478
|
+
prediction=prediction,
|
479
|
+
audio=audio,
|
480
|
+
presence_penalty=presence_penalty,
|
481
|
+
frequency_penalty=frequency_penalty,
|
482
|
+
logit_bias=logit_bias,
|
483
|
+
user=user,
|
484
|
+
reasoning_effort=reasoning_effort,
|
485
|
+
seed=seed,
|
486
|
+
tools=tools,
|
487
|
+
tool_choice=tool_choice,
|
488
|
+
logprobs=logprobs,
|
489
|
+
top_logprobs=top_logprobs,
|
490
|
+
parallel_tool_calls=parallel_tool_calls,
|
491
|
+
web_search_options=web_search_options,
|
492
|
+
deployment_id=deployment_id,
|
493
|
+
extra_headers=extra_headers,
|
494
|
+
base_url=base_url,
|
495
|
+
functions=functions,
|
496
|
+
function_call=function_call,
|
497
|
+
api_version=api_version,
|
498
|
+
api_key=api_key,
|
499
|
+
model_list=model_list,
|
500
|
+
thinking=thinking,
|
501
|
+
)
|
502
|
+
|
503
|
+
try:
|
504
|
+
client = get_instructor().from_litellm(
|
505
|
+
completion=get_litellm().acompletion,
|
506
|
+
mode=get_instructor().Mode(instructor_mode),
|
507
|
+
)
|
508
|
+
except Exception as e:
|
509
|
+
raise CompletionsError(
|
510
|
+
f"Error creating instructor client: {e}",
|
511
|
+
input=messages,
|
512
|
+
) from e
|
513
|
+
|
514
|
+
if not is_pydantic_basemodel(type):
|
515
|
+
response_model = convert_to_pydantic_model(
|
516
|
+
target=type,
|
517
|
+
name="Response",
|
518
|
+
field_name="value",
|
519
|
+
description="A single field response in the correct type.",
|
520
|
+
)
|
521
|
+
else:
|
522
|
+
response_model = type
|
523
|
+
|
524
|
+
if stream:
|
525
|
+
stream = await client.chat.completions.create_partial(
|
526
|
+
response_model=response_model,
|
527
|
+
max_retries=max_retries,
|
528
|
+
strict=strict,
|
529
|
+
**{k: v for k, v in params.items() if v is not None},
|
530
|
+
)
|
531
|
+
return create_async_completion_stream(stream, output_type=type, model=model)
|
532
|
+
else:
|
533
|
+
response = await client.chat.completions.create(
|
534
|
+
response_model=response_model,
|
535
|
+
max_retries=max_retries,
|
536
|
+
strict=strict,
|
537
|
+
**{k: v for k, v in params.items() if v is not None},
|
538
|
+
)
|
539
|
+
|
540
|
+
# Extract the actual value if using converted pydantic model
|
541
|
+
if not is_pydantic_basemodel(type) and hasattr(response, "value"):
|
542
|
+
actual_output = response.value
|
543
|
+
else:
|
544
|
+
actual_output = response
|
545
|
+
|
546
|
+
return Completion(
|
547
|
+
output=actual_output, model=model, content=None, completion=None
|
548
|
+
)
|
549
|
+
|
550
|
+
@staticmethod
|
551
|
+
def structured_output(
|
552
|
+
messages: CompletionsInputParam,
|
553
|
+
instructions: Optional[str] = None,
|
554
|
+
model: str = "openai/gpt-4o-mini",
|
555
|
+
type: CompletionsOutputType = str,
|
556
|
+
instructor_mode: InstructorModeParam = "tool_call",
|
557
|
+
max_retries: int = 3,
|
558
|
+
strict: bool = True,
|
559
|
+
*,
|
560
|
+
timeout: Optional[Union[float, str, Timeout]] = None,
|
561
|
+
temperature: Optional[float] = None,
|
562
|
+
top_p: Optional[float] = None,
|
563
|
+
n: Optional[int] = None,
|
564
|
+
stream: Optional[bool] = None,
|
565
|
+
stream_options: Optional[Dict[str, Any]] = None,
|
566
|
+
stop: Optional[str] = None,
|
567
|
+
max_completion_tokens: Optional[int] = None,
|
568
|
+
max_tokens: Optional[int] = None,
|
569
|
+
modalities: Optional[List[ChatCompletionModality]] = None,
|
570
|
+
prediction: Optional[ChatCompletionPredictionContentParam] = None,
|
571
|
+
audio: Optional[ChatCompletionAudioParam] = None,
|
572
|
+
presence_penalty: Optional[float] = None,
|
573
|
+
frequency_penalty: Optional[float] = None,
|
574
|
+
logit_bias: Optional[Dict[str, float]] = None,
|
575
|
+
user: Optional[str] = None,
|
576
|
+
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
|
577
|
+
# NOTE: response_format is not used within the `completions` resource
|
578
|
+
# in place of `instructor` and the `type` parameter
|
579
|
+
seed: Optional[int] = None,
|
580
|
+
tools: Optional[List] = None,
|
581
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
582
|
+
logprobs: Optional[bool] = None,
|
583
|
+
top_logprobs: Optional[int] = None,
|
584
|
+
parallel_tool_calls: Optional[bool] = None,
|
585
|
+
web_search_options: Optional[OpenAIWebSearchOptions] = None,
|
586
|
+
deployment_id: Optional[str] = None,
|
587
|
+
extra_headers: Optional[Dict[str, str]] = None,
|
588
|
+
base_url: Optional[str] = None,
|
589
|
+
functions: Optional[List] = None,
|
590
|
+
function_call: Optional[str] = None,
|
591
|
+
# set api_base, api_version, api_key
|
592
|
+
api_version: Optional[str] = None,
|
593
|
+
api_key: Optional[str] = None,
|
594
|
+
model_list: Optional[list] = None,
|
595
|
+
# Optional liteLLM function params
|
596
|
+
thinking: Optional[AnthropicThinkingParam] = None,
|
597
|
+
):
|
598
|
+
try:
|
599
|
+
parsed_messages = parse_completions_input(messages, instructions)
|
600
|
+
except Exception as e:
|
601
|
+
raise CompletionsError(
|
602
|
+
f"Error parsing completions input: {e}",
|
603
|
+
input=messages,
|
604
|
+
) from e
|
605
|
+
|
606
|
+
parsed_messages = format_tool_calls(parsed_messages)
|
607
|
+
|
608
|
+
params: CompletionsSettings = {
|
609
|
+
"model": model,
|
610
|
+
"messages": parsed_messages,
|
611
|
+
"timeout": timeout,
|
612
|
+
"temperature": temperature,
|
613
|
+
"top_p": top_p,
|
614
|
+
"n": n,
|
615
|
+
"stop": stop,
|
616
|
+
"max_completion_tokens": max_completion_tokens,
|
617
|
+
"max_tokens": max_tokens,
|
618
|
+
"modalities": modalities,
|
619
|
+
"prediction": prediction,
|
620
|
+
"audio": audio,
|
621
|
+
"presence_penalty": presence_penalty,
|
622
|
+
"frequency_penalty": frequency_penalty,
|
623
|
+
"logit_bias": logit_bias,
|
624
|
+
"user": user,
|
625
|
+
"reasoning_effort": reasoning_effort,
|
626
|
+
"seed": seed,
|
627
|
+
"tools": tools,
|
628
|
+
"tool_choice": tool_choice,
|
629
|
+
"logprobs": logprobs,
|
630
|
+
"top_logprobs": top_logprobs,
|
631
|
+
"parallel_tool_calls": parallel_tool_calls,
|
632
|
+
"web_search_options": web_search_options,
|
633
|
+
"deployment_id": deployment_id,
|
634
|
+
"extra_headers": extra_headers,
|
635
|
+
"base_url": base_url,
|
636
|
+
"functions": functions,
|
637
|
+
"function_call": function_call,
|
638
|
+
"api_version": api_version,
|
639
|
+
"api_key": api_key,
|
640
|
+
"model_list": model_list,
|
641
|
+
"thinking": thinking,
|
642
|
+
}
|
643
|
+
|
644
|
+
if type is str:
|
645
|
+
return CompletionsClient.chat_completion(
|
646
|
+
messages=messages,
|
647
|
+
instructions=instructions,
|
648
|
+
model=model,
|
649
|
+
timeout=timeout,
|
650
|
+
temperature=temperature,
|
651
|
+
top_p=top_p,
|
652
|
+
n=n,
|
653
|
+
stream=stream,
|
654
|
+
stream_options=stream_options,
|
655
|
+
stop=stop,
|
656
|
+
max_completion_tokens=max_completion_tokens,
|
657
|
+
max_tokens=max_tokens,
|
658
|
+
modalities=modalities,
|
659
|
+
prediction=prediction,
|
660
|
+
audio=audio,
|
661
|
+
presence_penalty=presence_penalty,
|
662
|
+
frequency_penalty=frequency_penalty,
|
663
|
+
logit_bias=logit_bias,
|
664
|
+
user=user,
|
665
|
+
reasoning_effort=reasoning_effort,
|
666
|
+
seed=seed,
|
667
|
+
tools=tools,
|
668
|
+
tool_choice=tool_choice,
|
669
|
+
logprobs=logprobs,
|
670
|
+
top_logprobs=top_logprobs,
|
671
|
+
parallel_tool_calls=parallel_tool_calls,
|
672
|
+
web_search_options=web_search_options,
|
673
|
+
deployment_id=deployment_id,
|
674
|
+
extra_headers=extra_headers,
|
675
|
+
base_url=base_url,
|
676
|
+
functions=functions,
|
677
|
+
function_call=function_call,
|
678
|
+
api_version=api_version,
|
679
|
+
api_key=api_key,
|
680
|
+
model_list=model_list,
|
681
|
+
thinking=thinking,
|
682
|
+
)
|
683
|
+
|
684
|
+
try:
|
685
|
+
client = get_instructor().from_litellm(
|
686
|
+
completion=get_litellm().completion,
|
687
|
+
mode=get_instructor().Mode(instructor_mode),
|
688
|
+
)
|
689
|
+
except Exception as e:
|
690
|
+
raise CompletionsError(
|
691
|
+
f"Error creating instructor client: {e}",
|
692
|
+
input=messages,
|
693
|
+
) from e
|
694
|
+
|
695
|
+
if not is_pydantic_basemodel(type):
|
696
|
+
response_model = convert_to_pydantic_model(
|
697
|
+
target=type,
|
698
|
+
name="Response",
|
699
|
+
field_name="value",
|
700
|
+
description="A single field response in the correct type.",
|
701
|
+
)
|
702
|
+
else:
|
703
|
+
response_model = type
|
704
|
+
|
705
|
+
if stream:
|
706
|
+
stream = client.chat.completions.create_partial(
|
707
|
+
response_model=response_model,
|
708
|
+
max_retries=max_retries,
|
709
|
+
strict=strict,
|
710
|
+
**{k: v for k, v in params.items() if v is not None},
|
711
|
+
)
|
712
|
+
return create_completion_stream(stream, output_type=type, model=model)
|
713
|
+
else:
|
714
|
+
response = client.chat.completions.create(
|
715
|
+
response_model=response_model,
|
716
|
+
max_retries=max_retries,
|
717
|
+
strict=strict,
|
718
|
+
**{k: v for k, v in params.items() if v is not None},
|
719
|
+
)
|
720
|
+
|
721
|
+
# Extract the actual value if using converted pydantic model
|
722
|
+
if not is_pydantic_basemodel(type) and hasattr(response, "value"):
|
723
|
+
actual_output = response.value
|
724
|
+
else:
|
725
|
+
actual_output = response
|
726
|
+
|
727
|
+
return Completion(
|
728
|
+
output=actual_output, model=model, content=None, completion=None
|
729
|
+
)
|