donkit-llm 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- donkit/llm/factory.py +1 -1
- donkit/llm/openai_model.py +107 -59
- donkit/llm/vertex_model.py +2 -2
- {donkit_llm-0.1.3.dist-info → donkit_llm-0.1.4.dist-info}/METADATA +2 -2
- {donkit_llm-0.1.3.dist-info → donkit_llm-0.1.4.dist-info}/RECORD +6 -6
- {donkit_llm-0.1.3.dist-info → donkit_llm-0.1.4.dist-info}/WHEEL +0 -0
donkit/llm/factory.py
CHANGED
|
@@ -153,7 +153,7 @@ class ModelFactory:
|
|
|
153
153
|
def create_donkit_model(
|
|
154
154
|
model_name: str | None,
|
|
155
155
|
api_key: str,
|
|
156
|
-
base_url: str = "
|
|
156
|
+
base_url: str = "https://api.donkit.ai",
|
|
157
157
|
provider: str = "default",
|
|
158
158
|
) -> DonkitModel:
|
|
159
159
|
"""Create a Donkit model that proxies through RagOps API Gateway.
|
donkit/llm/openai_model.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import AsyncIterator
|
|
1
|
+
from typing import Any, AsyncIterator
|
|
2
2
|
|
|
3
3
|
from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
4
4
|
|
|
@@ -43,6 +43,41 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
43
43
|
self._init_client(api_key, base_url, organization)
|
|
44
44
|
self._capabilities = self._determine_capabilities()
|
|
45
45
|
|
|
46
|
+
def _get_base_model_name(self) -> str:
|
|
47
|
+
"""Get base model name for capability/parameter detection.
|
|
48
|
+
|
|
49
|
+
For Azure models, use _base_model_name; for OpenAI, use _model_name.
|
|
50
|
+
"""
|
|
51
|
+
return getattr(self, "_base_model_name", self._model_name)
|
|
52
|
+
|
|
53
|
+
def _is_reasoning_model(self) -> bool:
|
|
54
|
+
"""Check if model is a reasoning model (GPT-5, o1, o3, o4 series).
|
|
55
|
+
|
|
56
|
+
Reasoning models don't support temperature, top_p, presence_penalty, frequency_penalty.
|
|
57
|
+
They only support max_completion_tokens (not max_tokens).
|
|
58
|
+
"""
|
|
59
|
+
model_lower = self._get_base_model_name().lower()
|
|
60
|
+
# Check for reasoning model prefixes
|
|
61
|
+
reasoning_prefixes = ("gpt-5", "o1", "o3", "o4")
|
|
62
|
+
return any(model_lower.startswith(prefix) for prefix in reasoning_prefixes)
|
|
63
|
+
|
|
64
|
+
def _supports_max_completion_tokens(self) -> bool:
|
|
65
|
+
"""Check if model uses max_completion_tokens instead of max_tokens.
|
|
66
|
+
|
|
67
|
+
GPT-4.1+, GPT-5, and reasoning models (o1, o3, o4) use max_completion_tokens.
|
|
68
|
+
"""
|
|
69
|
+
model_lower = self._get_base_model_name().lower()
|
|
70
|
+
# Reasoning models always use max_completion_tokens
|
|
71
|
+
if self._is_reasoning_model():
|
|
72
|
+
return True
|
|
73
|
+
# GPT-4.1+ series use max_completion_tokens
|
|
74
|
+
if "gpt-4.1" in model_lower or "gpt-5" in model_lower:
|
|
75
|
+
return True
|
|
76
|
+
# GPT-4o and newer also use max_completion_tokens
|
|
77
|
+
if "gpt-4o" in model_lower:
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
80
|
+
|
|
46
81
|
def _init_client(
|
|
47
82
|
self,
|
|
48
83
|
api_key: str,
|
|
@@ -165,34 +200,54 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
165
200
|
for tool in tools
|
|
166
201
|
]
|
|
167
202
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
203
|
+
def _build_request_kwargs(
|
|
204
|
+
self,
|
|
205
|
+
request: GenerateRequest,
|
|
206
|
+
messages: list[dict],
|
|
207
|
+
stream: bool = False,
|
|
208
|
+
) -> dict:
|
|
209
|
+
"""Build kwargs for OpenAI API request with parameter filtering.
|
|
171
210
|
|
|
172
|
-
|
|
211
|
+
Args:
|
|
212
|
+
request: Generate request with parameters
|
|
213
|
+
messages: Converted messages in OpenAI format
|
|
214
|
+
stream: Whether this is a streaming request
|
|
173
215
|
|
|
174
|
-
|
|
216
|
+
Returns:
|
|
217
|
+
Dictionary of kwargs for OpenAI API call
|
|
218
|
+
"""
|
|
219
|
+
kwargs: dict[str, Any] = {
|
|
175
220
|
"model": self._model_name,
|
|
176
221
|
"messages": messages,
|
|
177
222
|
}
|
|
178
223
|
|
|
179
|
-
if
|
|
180
|
-
kwargs["
|
|
224
|
+
if stream:
|
|
225
|
+
kwargs["stream"] = True
|
|
226
|
+
|
|
227
|
+
is_reasoning = self._is_reasoning_model()
|
|
228
|
+
|
|
229
|
+
# Reasoning models (GPT-5, o1, o3, o4) don't support temperature/top_p
|
|
230
|
+
# They use fixed temperature=1 and top_p=1 internally
|
|
231
|
+
if not is_reasoning:
|
|
232
|
+
if request.temperature is not None:
|
|
233
|
+
kwargs["temperature"] = request.temperature
|
|
234
|
+
if request.top_p is not None:
|
|
235
|
+
kwargs["top_p"] = request.top_p
|
|
236
|
+
|
|
237
|
+
# Handle max_tokens vs max_completion_tokens
|
|
181
238
|
if request.max_tokens is not None:
|
|
182
|
-
#
|
|
183
|
-
|
|
184
|
-
if
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
)
|
|
239
|
+
# Clamp value between 8192 and 16384
|
|
240
|
+
clamped_tokens = max(8192, min(request.max_tokens, 16384))
|
|
241
|
+
if self._supports_max_completion_tokens():
|
|
242
|
+
# GPT-4.1+, GPT-5, reasoning models use max_completion_tokens
|
|
243
|
+
kwargs["max_completion_tokens"] = clamped_tokens
|
|
188
244
|
else:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
if request.top_p is not None:
|
|
193
|
-
kwargs["top_p"] = request.top_p
|
|
245
|
+
# Older models use max_tokens
|
|
246
|
+
kwargs["max_tokens"] = clamped_tokens
|
|
247
|
+
|
|
194
248
|
if request.stop:
|
|
195
249
|
kwargs["stop"] = request.stop
|
|
250
|
+
|
|
196
251
|
if request.tools:
|
|
197
252
|
kwargs["tools"] = self._convert_tools(request.tools)
|
|
198
253
|
# Only add tool_choice if tools are present
|
|
@@ -206,9 +261,19 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
206
261
|
kwargs["tool_choice"] = "auto"
|
|
207
262
|
elif isinstance(request.tool_choice, dict):
|
|
208
263
|
kwargs["tool_choice"] = request.tool_choice
|
|
264
|
+
|
|
209
265
|
if request.response_format:
|
|
210
266
|
kwargs["response_format"] = request.response_format
|
|
211
267
|
|
|
268
|
+
return kwargs
|
|
269
|
+
|
|
270
|
+
async def generate(self, request: GenerateRequest) -> GenerateResponse:
|
|
271
|
+
"""Generate a response using OpenAI API."""
|
|
272
|
+
await self.validate_request(request)
|
|
273
|
+
|
|
274
|
+
messages = [self._convert_message(msg) for msg in request.messages]
|
|
275
|
+
kwargs = self._build_request_kwargs(request, messages, stream=False)
|
|
276
|
+
|
|
212
277
|
try:
|
|
213
278
|
response = await self.client.chat.completions.create(**kwargs)
|
|
214
279
|
|
|
@@ -258,45 +323,7 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
258
323
|
await self.validate_request(request)
|
|
259
324
|
|
|
260
325
|
messages = [self._convert_message(msg) for msg in request.messages]
|
|
261
|
-
|
|
262
|
-
kwargs = {
|
|
263
|
-
"model": self._model_name,
|
|
264
|
-
"messages": messages,
|
|
265
|
-
"stream": True,
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if request.temperature is not None:
|
|
269
|
-
kwargs["temperature"] = request.temperature
|
|
270
|
-
if request.max_tokens is not None:
|
|
271
|
-
# Use max_completion_tokens for GPT models, max_tokens for others
|
|
272
|
-
model_lower = self._model_name.lower()
|
|
273
|
-
if "gpt" in model_lower and "oss" not in model_lower:
|
|
274
|
-
kwargs["max_completion_tokens"] = (
|
|
275
|
-
request.max_tokens if request.max_tokens <= 16384 else 16384
|
|
276
|
-
)
|
|
277
|
-
else:
|
|
278
|
-
kwargs["max_tokens"] = (
|
|
279
|
-
request.max_tokens if request.max_tokens <= 16384 else 16384
|
|
280
|
-
)
|
|
281
|
-
if request.top_p is not None:
|
|
282
|
-
kwargs["top_p"] = request.top_p
|
|
283
|
-
if request.stop:
|
|
284
|
-
kwargs["stop"] = request.stop
|
|
285
|
-
if request.tools:
|
|
286
|
-
kwargs["tools"] = self._convert_tools(request.tools)
|
|
287
|
-
# Only add tool_choice if tools are present
|
|
288
|
-
if request.tool_choice:
|
|
289
|
-
# Validate tool_choice - OpenAI only supports 'none', 'auto', 'required', or dict
|
|
290
|
-
if isinstance(request.tool_choice, str):
|
|
291
|
-
if request.tool_choice in ("none", "auto", "required"):
|
|
292
|
-
kwargs["tool_choice"] = request.tool_choice
|
|
293
|
-
else:
|
|
294
|
-
# Invalid string value - default to 'auto'
|
|
295
|
-
kwargs["tool_choice"] = "auto"
|
|
296
|
-
elif isinstance(request.tool_choice, dict):
|
|
297
|
-
kwargs["tool_choice"] = request.tool_choice
|
|
298
|
-
if request.response_format:
|
|
299
|
-
kwargs["response_format"] = request.response_format
|
|
326
|
+
kwargs = self._build_request_kwargs(request, messages, stream=True)
|
|
300
327
|
|
|
301
328
|
try:
|
|
302
329
|
stream = await self.client.chat.completions.create(**kwargs)
|
|
@@ -389,7 +416,7 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
389
416
|
self._api_key = api_key
|
|
390
417
|
self._azure_endpoint = azure_endpoint
|
|
391
418
|
self._api_version = api_version
|
|
392
|
-
self.
|
|
419
|
+
self._model_name = model_name
|
|
393
420
|
self._deployment_name = deployment_name
|
|
394
421
|
|
|
395
422
|
# Call parent constructor (will call our overridden _init_client)
|
|
@@ -438,6 +465,27 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
438
465
|
# Reinitialize client with new deployment name
|
|
439
466
|
self._init_client(self._api_key)
|
|
440
467
|
|
|
468
|
+
@property
|
|
469
|
+
def model_name(self) -> str:
|
|
470
|
+
return self._model_name
|
|
471
|
+
|
|
472
|
+
@model_name.setter
|
|
473
|
+
def model_name(self, value: str):
|
|
474
|
+
"""
|
|
475
|
+
Set new model name and recalculate capabilities.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
value: New model name
|
|
479
|
+
"""
|
|
480
|
+
self._model_name = value
|
|
481
|
+
self._deployment_name = value
|
|
482
|
+
self.client = AsyncAzureOpenAI(
|
|
483
|
+
api_key=self._api_key,
|
|
484
|
+
azure_endpoint=self._azure_endpoint,
|
|
485
|
+
api_version=self._api_version,
|
|
486
|
+
azure_deployment=value,
|
|
487
|
+
)
|
|
488
|
+
|
|
441
489
|
async def generate(self, request: GenerateRequest) -> GenerateResponse:
|
|
442
490
|
"""Generate a response using Azure OpenAI API with parameter adaptation."""
|
|
443
491
|
# Azure OpenAI uses deployment name instead of model name
|
donkit/llm/vertex_model.py
CHANGED
|
@@ -643,9 +643,9 @@ class VertexAIModel(LLMModelAbstract):
|
|
|
643
643
|
yield StreamChunk(content=None, tool_calls=tool_calls)
|
|
644
644
|
|
|
645
645
|
except Exception as e:
|
|
646
|
-
error_msg = str(e)
|
|
646
|
+
# error_msg = str(e)
|
|
647
647
|
# Yield error message instead of empty response
|
|
648
|
-
|
|
648
|
+
raise e
|
|
649
649
|
|
|
650
650
|
|
|
651
651
|
class VertexEmbeddingModel(LLMModelAbstract):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: donkit-llm
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: Unified LLM model implementations for Donkit (OpenAI, Azure OpenAI, Claude, Vertex AI, Ollama)
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Donkit AI
|
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Requires-Dist: anthropic[vertex] (>=0.42.0,<0.43.0)
|
|
14
|
-
Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.
|
|
14
|
+
Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.5,<0.2.0)
|
|
15
15
|
Requires-Dist: google-auth (>=2.0.0,<3.0.0)
|
|
16
16
|
Requires-Dist: google-genai (>=1.38.0,<2.0.0)
|
|
17
17
|
Requires-Dist: openai (>=2.1.0,<3.0.0)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
donkit/llm/__init__.py,sha256=0w5hPdaJDpzL1EpnBZm-7WV1Rz1OUdcCMcnxNRpXdiM,1357
|
|
2
2
|
donkit/llm/claude_model.py,sha256=9UjNkACc6wHFus2gOXLKOi9yjX2FkP3cpQ9zFZEcXWU,16650
|
|
3
3
|
donkit/llm/donkit_model.py,sha256=rEPxBW6k_BhIBF4XkgLzibVfwW6OJHiX89yMadcJkY4,8497
|
|
4
|
-
donkit/llm/factory.py,sha256=
|
|
4
|
+
donkit/llm/factory.py,sha256=KoZ9bD6FsZjU3ldKL7szznDSB8gI1slnI1jGGwKIuVY,9195
|
|
5
5
|
donkit/llm/gemini_model.py,sha256=2uLoZr9HjUf1wxiZRGLQFcURCutsB2SV9f-1VaR6kGI,14413
|
|
6
6
|
donkit/llm/model_abstract.py,sha256=aOgYh3I96PsxSxnkIJ1ETx5UFeRxozCD1c44wiKoBSs,8191
|
|
7
7
|
donkit/llm/ollama_integration.py,sha256=WXeV2xNxP7gd1JyMsHMKaQOjvH7QYkLIPs7pmTPWFrg,13236
|
|
8
|
-
donkit/llm/openai_model.py,sha256=
|
|
9
|
-
donkit/llm/vertex_model.py,sha256=
|
|
10
|
-
donkit_llm-0.1.
|
|
11
|
-
donkit_llm-0.1.
|
|
12
|
-
donkit_llm-0.1.
|
|
8
|
+
donkit/llm/openai_model.py,sha256=66ioYAaoOS9Fo0C0w2LYdSnAKwXIt6qXvJsKXTuajm0,24609
|
|
9
|
+
donkit/llm/vertex_model.py,sha256=XOo_uwJOa0wgArkD3pac7SulUYWkCc7lTRjyrBSpHPM,29284
|
|
10
|
+
donkit_llm-0.1.4.dist-info/METADATA,sha256=tR6fRwBE36XEf_X4AjGlNg1e4_3xLLPLeVuk2denyho,742
|
|
11
|
+
donkit_llm-0.1.4.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
12
|
+
donkit_llm-0.1.4.dist-info/RECORD,,
|
|
File without changes
|