donkit-llm 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- donkit/llm/factory.py +1 -1
- donkit/llm/openai_model.py +125 -60
- donkit/llm/vertex_model.py +37 -41
- {donkit_llm-0.1.3.dist-info → donkit_llm-0.1.5.dist-info}/METADATA +2 -2
- {donkit_llm-0.1.3.dist-info → donkit_llm-0.1.5.dist-info}/RECORD +6 -6
- {donkit_llm-0.1.3.dist-info → donkit_llm-0.1.5.dist-info}/WHEEL +0 -0
donkit/llm/factory.py
CHANGED
|
@@ -153,7 +153,7 @@ class ModelFactory:
|
|
|
153
153
|
def create_donkit_model(
|
|
154
154
|
model_name: str | None,
|
|
155
155
|
api_key: str,
|
|
156
|
-
base_url: str = "
|
|
156
|
+
base_url: str = "https://api.donkit.ai",
|
|
157
157
|
provider: str = "default",
|
|
158
158
|
) -> DonkitModel:
|
|
159
159
|
"""Create a Donkit model that proxies through RagOps API Gateway.
|
donkit/llm/openai_model.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import AsyncIterator
|
|
1
|
+
from typing import Any, AsyncIterator
|
|
2
2
|
|
|
3
3
|
from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
4
4
|
|
|
@@ -43,6 +43,41 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
43
43
|
self._init_client(api_key, base_url, organization)
|
|
44
44
|
self._capabilities = self._determine_capabilities()
|
|
45
45
|
|
|
46
|
+
def _get_base_model_name(self) -> str:
|
|
47
|
+
"""Get base model name for capability/parameter detection.
|
|
48
|
+
|
|
49
|
+
For Azure models, use _base_model_name; for OpenAI, use _model_name.
|
|
50
|
+
"""
|
|
51
|
+
return getattr(self, "_base_model_name", self._model_name)
|
|
52
|
+
|
|
53
|
+
def _is_reasoning_model(self) -> bool:
|
|
54
|
+
"""Check if model is a reasoning model (GPT-5, o1, o3, o4 series).
|
|
55
|
+
|
|
56
|
+
Reasoning models don't support temperature, top_p, presence_penalty, frequency_penalty.
|
|
57
|
+
They only support max_completion_tokens (not max_tokens).
|
|
58
|
+
"""
|
|
59
|
+
model_lower = self._get_base_model_name().lower()
|
|
60
|
+
# Check for reasoning model prefixes
|
|
61
|
+
reasoning_prefixes = ("gpt-5", "o1", "o3", "o4")
|
|
62
|
+
return any(model_lower.startswith(prefix) for prefix in reasoning_prefixes)
|
|
63
|
+
|
|
64
|
+
def _supports_max_completion_tokens(self) -> bool:
|
|
65
|
+
"""Check if model uses max_completion_tokens instead of max_tokens.
|
|
66
|
+
|
|
67
|
+
GPT-4.1+, GPT-5, and reasoning models (o1, o3, o4) use max_completion_tokens.
|
|
68
|
+
"""
|
|
69
|
+
model_lower = self._get_base_model_name().lower()
|
|
70
|
+
# Reasoning models always use max_completion_tokens
|
|
71
|
+
if self._is_reasoning_model():
|
|
72
|
+
return True
|
|
73
|
+
# GPT-4.1+ series use max_completion_tokens
|
|
74
|
+
if "gpt-4.1" in model_lower or "gpt-5" in model_lower:
|
|
75
|
+
return True
|
|
76
|
+
# GPT-4o and newer also use max_completion_tokens
|
|
77
|
+
if "gpt-4o" in model_lower:
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
80
|
+
|
|
46
81
|
def _init_client(
|
|
47
82
|
self,
|
|
48
83
|
api_key: str,
|
|
@@ -165,34 +200,54 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
165
200
|
for tool in tools
|
|
166
201
|
]
|
|
167
202
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
203
|
+
def _build_request_kwargs(
|
|
204
|
+
self,
|
|
205
|
+
request: GenerateRequest,
|
|
206
|
+
messages: list[dict],
|
|
207
|
+
stream: bool = False,
|
|
208
|
+
) -> dict:
|
|
209
|
+
"""Build kwargs for OpenAI API request with parameter filtering.
|
|
171
210
|
|
|
172
|
-
|
|
211
|
+
Args:
|
|
212
|
+
request: Generate request with parameters
|
|
213
|
+
messages: Converted messages in OpenAI format
|
|
214
|
+
stream: Whether this is a streaming request
|
|
173
215
|
|
|
174
|
-
|
|
216
|
+
Returns:
|
|
217
|
+
Dictionary of kwargs for OpenAI API call
|
|
218
|
+
"""
|
|
219
|
+
kwargs: dict[str, Any] = {
|
|
175
220
|
"model": self._model_name,
|
|
176
221
|
"messages": messages,
|
|
177
222
|
}
|
|
178
223
|
|
|
179
|
-
if
|
|
180
|
-
kwargs["
|
|
224
|
+
if stream:
|
|
225
|
+
kwargs["stream"] = True
|
|
226
|
+
|
|
227
|
+
is_reasoning = self._is_reasoning_model()
|
|
228
|
+
|
|
229
|
+
# Reasoning models (GPT-5, o1, o3, o4) don't support temperature/top_p
|
|
230
|
+
# They use fixed temperature=1 and top_p=1 internally
|
|
231
|
+
if not is_reasoning:
|
|
232
|
+
if request.temperature is not None:
|
|
233
|
+
kwargs["temperature"] = request.temperature
|
|
234
|
+
if request.top_p is not None:
|
|
235
|
+
kwargs["top_p"] = request.top_p
|
|
236
|
+
|
|
237
|
+
# Handle max_tokens vs max_completion_tokens
|
|
181
238
|
if request.max_tokens is not None:
|
|
182
|
-
#
|
|
183
|
-
|
|
184
|
-
if
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
)
|
|
239
|
+
# Clamp value between 8192 and 16384
|
|
240
|
+
clamped_tokens = max(8192, min(request.max_tokens, 16384))
|
|
241
|
+
if self._supports_max_completion_tokens():
|
|
242
|
+
# GPT-4.1+, GPT-5, reasoning models use max_completion_tokens
|
|
243
|
+
kwargs["max_completion_tokens"] = clamped_tokens
|
|
188
244
|
else:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
if request.top_p is not None:
|
|
193
|
-
kwargs["top_p"] = request.top_p
|
|
245
|
+
# Older models use max_tokens
|
|
246
|
+
kwargs["max_tokens"] = clamped_tokens
|
|
247
|
+
|
|
194
248
|
if request.stop:
|
|
195
249
|
kwargs["stop"] = request.stop
|
|
250
|
+
|
|
196
251
|
if request.tools:
|
|
197
252
|
kwargs["tools"] = self._convert_tools(request.tools)
|
|
198
253
|
# Only add tool_choice if tools are present
|
|
@@ -206,8 +261,35 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
206
261
|
kwargs["tool_choice"] = "auto"
|
|
207
262
|
elif isinstance(request.tool_choice, dict):
|
|
208
263
|
kwargs["tool_choice"] = request.tool_choice
|
|
264
|
+
|
|
209
265
|
if request.response_format:
|
|
210
|
-
|
|
266
|
+
# OpenAI requires specific format for structured output
|
|
267
|
+
# If response_format is a JSON Schema dict with "type": "object", wrap it
|
|
268
|
+
if isinstance(request.response_format, dict):
|
|
269
|
+
if request.response_format.get("type") == "object":
|
|
270
|
+
# This is a JSON Schema - wrap it in json_schema format
|
|
271
|
+
kwargs["response_format"] = {
|
|
272
|
+
"type": "json_schema",
|
|
273
|
+
"json_schema": {
|
|
274
|
+
"name": "response",
|
|
275
|
+
"strict": True,
|
|
276
|
+
"schema": request.response_format,
|
|
277
|
+
},
|
|
278
|
+
}
|
|
279
|
+
else:
|
|
280
|
+
# Already in correct format or simple type
|
|
281
|
+
kwargs["response_format"] = request.response_format
|
|
282
|
+
else:
|
|
283
|
+
kwargs["response_format"] = request.response_format
|
|
284
|
+
|
|
285
|
+
return kwargs
|
|
286
|
+
|
|
287
|
+
async def generate(self, request: GenerateRequest) -> GenerateResponse:
|
|
288
|
+
"""Generate a response using OpenAI API."""
|
|
289
|
+
await self.validate_request(request)
|
|
290
|
+
|
|
291
|
+
messages = [self._convert_message(msg) for msg in request.messages]
|
|
292
|
+
kwargs = self._build_request_kwargs(request, messages, stream=False)
|
|
211
293
|
|
|
212
294
|
try:
|
|
213
295
|
response = await self.client.chat.completions.create(**kwargs)
|
|
@@ -258,45 +340,7 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
258
340
|
await self.validate_request(request)
|
|
259
341
|
|
|
260
342
|
messages = [self._convert_message(msg) for msg in request.messages]
|
|
261
|
-
|
|
262
|
-
kwargs = {
|
|
263
|
-
"model": self._model_name,
|
|
264
|
-
"messages": messages,
|
|
265
|
-
"stream": True,
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if request.temperature is not None:
|
|
269
|
-
kwargs["temperature"] = request.temperature
|
|
270
|
-
if request.max_tokens is not None:
|
|
271
|
-
# Use max_completion_tokens for GPT models, max_tokens for others
|
|
272
|
-
model_lower = self._model_name.lower()
|
|
273
|
-
if "gpt" in model_lower and "oss" not in model_lower:
|
|
274
|
-
kwargs["max_completion_tokens"] = (
|
|
275
|
-
request.max_tokens if request.max_tokens <= 16384 else 16384
|
|
276
|
-
)
|
|
277
|
-
else:
|
|
278
|
-
kwargs["max_tokens"] = (
|
|
279
|
-
request.max_tokens if request.max_tokens <= 16384 else 16384
|
|
280
|
-
)
|
|
281
|
-
if request.top_p is not None:
|
|
282
|
-
kwargs["top_p"] = request.top_p
|
|
283
|
-
if request.stop:
|
|
284
|
-
kwargs["stop"] = request.stop
|
|
285
|
-
if request.tools:
|
|
286
|
-
kwargs["tools"] = self._convert_tools(request.tools)
|
|
287
|
-
# Only add tool_choice if tools are present
|
|
288
|
-
if request.tool_choice:
|
|
289
|
-
# Validate tool_choice - OpenAI only supports 'none', 'auto', 'required', or dict
|
|
290
|
-
if isinstance(request.tool_choice, str):
|
|
291
|
-
if request.tool_choice in ("none", "auto", "required"):
|
|
292
|
-
kwargs["tool_choice"] = request.tool_choice
|
|
293
|
-
else:
|
|
294
|
-
# Invalid string value - default to 'auto'
|
|
295
|
-
kwargs["tool_choice"] = "auto"
|
|
296
|
-
elif isinstance(request.tool_choice, dict):
|
|
297
|
-
kwargs["tool_choice"] = request.tool_choice
|
|
298
|
-
if request.response_format:
|
|
299
|
-
kwargs["response_format"] = request.response_format
|
|
343
|
+
kwargs = self._build_request_kwargs(request, messages, stream=True)
|
|
300
344
|
|
|
301
345
|
try:
|
|
302
346
|
stream = await self.client.chat.completions.create(**kwargs)
|
|
@@ -389,7 +433,7 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
389
433
|
self._api_key = api_key
|
|
390
434
|
self._azure_endpoint = azure_endpoint
|
|
391
435
|
self._api_version = api_version
|
|
392
|
-
self.
|
|
436
|
+
self._model_name = model_name
|
|
393
437
|
self._deployment_name = deployment_name
|
|
394
438
|
|
|
395
439
|
# Call parent constructor (will call our overridden _init_client)
|
|
@@ -438,6 +482,27 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
438
482
|
# Reinitialize client with new deployment name
|
|
439
483
|
self._init_client(self._api_key)
|
|
440
484
|
|
|
485
|
+
@property
|
|
486
|
+
def model_name(self) -> str:
|
|
487
|
+
return self._model_name
|
|
488
|
+
|
|
489
|
+
@model_name.setter
|
|
490
|
+
def model_name(self, value: str):
|
|
491
|
+
"""
|
|
492
|
+
Set new model name and recalculate capabilities.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
value: New model name
|
|
496
|
+
"""
|
|
497
|
+
self._model_name = value
|
|
498
|
+
self._deployment_name = value
|
|
499
|
+
self.client = AsyncAzureOpenAI(
|
|
500
|
+
api_key=self._api_key,
|
|
501
|
+
azure_endpoint=self._azure_endpoint,
|
|
502
|
+
api_version=self._api_version,
|
|
503
|
+
azure_deployment=value,
|
|
504
|
+
)
|
|
505
|
+
|
|
441
506
|
async def generate(self, request: GenerateRequest) -> GenerateResponse:
|
|
442
507
|
"""Generate a response using Azure OpenAI API with parameter adaptation."""
|
|
443
508
|
# Azure OpenAI uses deployment name instead of model name
|
donkit/llm/vertex_model.py
CHANGED
|
@@ -330,6 +330,39 @@ class VertexAIModel(LLMModelAbstract):
|
|
|
330
330
|
|
|
331
331
|
return convert(schema)
|
|
332
332
|
|
|
333
|
+
def _build_config_kwargs(
|
|
334
|
+
self, request: GenerateRequest, system_instruction: str | None = None
|
|
335
|
+
) -> dict[str, Any]:
|
|
336
|
+
"""Build configuration kwargs for Vertex AI generate/generate_stream."""
|
|
337
|
+
config_kwargs: dict[str, Any] = {
|
|
338
|
+
"temperature": request.temperature
|
|
339
|
+
if request.temperature is not None
|
|
340
|
+
else 0.2,
|
|
341
|
+
"top_p": request.top_p if request.top_p is not None else 0.95,
|
|
342
|
+
"max_output_tokens": request.max_tokens
|
|
343
|
+
if request.max_tokens is not None
|
|
344
|
+
else 8192,
|
|
345
|
+
}
|
|
346
|
+
if system_instruction:
|
|
347
|
+
config_kwargs["system_instruction"] = system_instruction
|
|
348
|
+
if request.stop:
|
|
349
|
+
config_kwargs["stop_sequences"] = request.stop
|
|
350
|
+
if request.response_format:
|
|
351
|
+
config_kwargs["response_mime_type"] = "application/json"
|
|
352
|
+
# If response_format is a JSON Schema dict with "type": "object", use it directly
|
|
353
|
+
if isinstance(request.response_format, dict):
|
|
354
|
+
if request.response_format.get("type") == "object":
|
|
355
|
+
# This is a JSON Schema - use it directly
|
|
356
|
+
config_kwargs["response_schema"] = self._clean_json_schema(
|
|
357
|
+
request.response_format
|
|
358
|
+
)
|
|
359
|
+
elif "schema" in request.response_format:
|
|
360
|
+
# Already wrapped in schema key
|
|
361
|
+
config_kwargs["response_schema"] = self._clean_json_schema(
|
|
362
|
+
request.response_format["schema"]
|
|
363
|
+
)
|
|
364
|
+
return config_kwargs
|
|
365
|
+
|
|
333
366
|
async def generate(self, request: GenerateRequest) -> GenerateResponse:
|
|
334
367
|
"""Generate a response using Vertex AI."""
|
|
335
368
|
await self.validate_request(request)
|
|
@@ -410,26 +443,7 @@ class VertexAIModel(LLMModelAbstract):
|
|
|
410
443
|
contents.append(user_content)
|
|
411
444
|
i += 1
|
|
412
445
|
|
|
413
|
-
config_kwargs =
|
|
414
|
-
"temperature": request.temperature
|
|
415
|
-
if request.temperature is not None
|
|
416
|
-
else 0.2,
|
|
417
|
-
"top_p": request.top_p if request.top_p is not None else 0.95,
|
|
418
|
-
"max_output_tokens": request.max_tokens
|
|
419
|
-
if request.max_tokens is not None
|
|
420
|
-
else 8192,
|
|
421
|
-
}
|
|
422
|
-
if system_instruction:
|
|
423
|
-
config_kwargs["system_instruction"] = system_instruction
|
|
424
|
-
if request.stop:
|
|
425
|
-
config_kwargs["stop_sequences"] = request.stop
|
|
426
|
-
if request.response_format:
|
|
427
|
-
config_kwargs["response_mime_type"] = "application/json"
|
|
428
|
-
if "schema" in request.response_format:
|
|
429
|
-
config_kwargs["response_schema"] = self._clean_json_schema(
|
|
430
|
-
request.response_format["schema"]
|
|
431
|
-
)
|
|
432
|
-
|
|
446
|
+
config_kwargs = self._build_config_kwargs(request, system_instruction)
|
|
433
447
|
config = genai.types.GenerateContentConfig(**config_kwargs)
|
|
434
448
|
|
|
435
449
|
if request.tools:
|
|
@@ -584,25 +598,7 @@ class VertexAIModel(LLMModelAbstract):
|
|
|
584
598
|
contents.append(user_content)
|
|
585
599
|
i += 1
|
|
586
600
|
|
|
587
|
-
config_kwargs
|
|
588
|
-
"temperature": request.temperature
|
|
589
|
-
if request.temperature is not None
|
|
590
|
-
else 0.2,
|
|
591
|
-
"top_p": request.top_p if request.top_p is not None else 0.95,
|
|
592
|
-
"max_output_tokens": request.max_tokens
|
|
593
|
-
if request.max_tokens is not None
|
|
594
|
-
else 8192,
|
|
595
|
-
}
|
|
596
|
-
if system_instruction:
|
|
597
|
-
config_kwargs["system_instruction"] = system_instruction
|
|
598
|
-
if request.stop:
|
|
599
|
-
config_kwargs["stop_sequences"] = request.stop
|
|
600
|
-
if request.response_format:
|
|
601
|
-
config_kwargs["response_mime_type"] = "application/json"
|
|
602
|
-
if "schema" in request.response_format:
|
|
603
|
-
config_kwargs["response_schema"] = self._clean_json_schema(
|
|
604
|
-
request.response_format["schema"]
|
|
605
|
-
)
|
|
601
|
+
config_kwargs = self._build_config_kwargs(request, system_instruction)
|
|
606
602
|
config_kwargs["automatic_function_calling"] = (
|
|
607
603
|
genai.types.AutomaticFunctionCallingConfig(maximum_remote_calls=100)
|
|
608
604
|
)
|
|
@@ -643,9 +639,9 @@ class VertexAIModel(LLMModelAbstract):
|
|
|
643
639
|
yield StreamChunk(content=None, tool_calls=tool_calls)
|
|
644
640
|
|
|
645
641
|
except Exception as e:
|
|
646
|
-
error_msg = str(e)
|
|
642
|
+
# error_msg = str(e)
|
|
647
643
|
# Yield error message instead of empty response
|
|
648
|
-
|
|
644
|
+
raise e
|
|
649
645
|
|
|
650
646
|
|
|
651
647
|
class VertexEmbeddingModel(LLMModelAbstract):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: donkit-llm
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Unified LLM model implementations for Donkit (OpenAI, Azure OpenAI, Claude, Vertex AI, Ollama)
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Donkit AI
|
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Requires-Dist: anthropic[vertex] (>=0.42.0,<0.43.0)
|
|
14
|
-
Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.
|
|
14
|
+
Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.5,<0.2.0)
|
|
15
15
|
Requires-Dist: google-auth (>=2.0.0,<3.0.0)
|
|
16
16
|
Requires-Dist: google-genai (>=1.38.0,<2.0.0)
|
|
17
17
|
Requires-Dist: openai (>=2.1.0,<3.0.0)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
donkit/llm/__init__.py,sha256=0w5hPdaJDpzL1EpnBZm-7WV1Rz1OUdcCMcnxNRpXdiM,1357
|
|
2
2
|
donkit/llm/claude_model.py,sha256=9UjNkACc6wHFus2gOXLKOi9yjX2FkP3cpQ9zFZEcXWU,16650
|
|
3
3
|
donkit/llm/donkit_model.py,sha256=rEPxBW6k_BhIBF4XkgLzibVfwW6OJHiX89yMadcJkY4,8497
|
|
4
|
-
donkit/llm/factory.py,sha256=
|
|
4
|
+
donkit/llm/factory.py,sha256=KoZ9bD6FsZjU3ldKL7szznDSB8gI1slnI1jGGwKIuVY,9195
|
|
5
5
|
donkit/llm/gemini_model.py,sha256=2uLoZr9HjUf1wxiZRGLQFcURCutsB2SV9f-1VaR6kGI,14413
|
|
6
6
|
donkit/llm/model_abstract.py,sha256=aOgYh3I96PsxSxnkIJ1ETx5UFeRxozCD1c44wiKoBSs,8191
|
|
7
7
|
donkit/llm/ollama_integration.py,sha256=WXeV2xNxP7gd1JyMsHMKaQOjvH7QYkLIPs7pmTPWFrg,13236
|
|
8
|
-
donkit/llm/openai_model.py,sha256=
|
|
9
|
-
donkit/llm/vertex_model.py,sha256=
|
|
10
|
-
donkit_llm-0.1.
|
|
11
|
-
donkit_llm-0.1.
|
|
12
|
-
donkit_llm-0.1.
|
|
8
|
+
donkit/llm/openai_model.py,sha256=xi3rRh5cJ8NcAvodwCNaU3_9UYHZwfUjdxnLJZs-qxg,25486
|
|
9
|
+
donkit/llm/vertex_model.py,sha256=LcdWBdx4JYzom2IsXxhNGEsrYf0N6JmwuRc3sqfKIos,29350
|
|
10
|
+
donkit_llm-0.1.5.dist-info/METADATA,sha256=O20eq1h0Kr0fBbDvD1WA0RkZBy9ex_SNk_CLDKXi1mI,742
|
|
11
|
+
donkit_llm-0.1.5.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
12
|
+
donkit_llm-0.1.5.dist-info/RECORD,,
|
|
File without changes
|