donkit-llm 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
donkit/llm/factory.py CHANGED
@@ -153,7 +153,7 @@ class ModelFactory:
153
153
  def create_donkit_model(
154
154
  model_name: str | None,
155
155
  api_key: str,
156
- base_url: str = "http://localhost:9017",
156
+ base_url: str = "https://api.donkit.ai",
157
157
  provider: str = "default",
158
158
  ) -> DonkitModel:
159
159
  """Create a Donkit model that proxies through RagOps API Gateway.
@@ -1,4 +1,4 @@
1
- from typing import AsyncIterator
1
+ from typing import Any, AsyncIterator
2
2
 
3
3
  from openai import AsyncAzureOpenAI, AsyncOpenAI
4
4
 
@@ -43,6 +43,41 @@ class OpenAIModel(LLMModelAbstract):
43
43
  self._init_client(api_key, base_url, organization)
44
44
  self._capabilities = self._determine_capabilities()
45
45
 
46
+ def _get_base_model_name(self) -> str:
47
+ """Get base model name for capability/parameter detection.
48
+
49
+ For Azure models, use _base_model_name; for OpenAI, use _model_name.
50
+ """
51
+ return getattr(self, "_base_model_name", self._model_name)
52
+
53
+ def _is_reasoning_model(self) -> bool:
54
+ """Check if model is a reasoning model (GPT-5, o1, o3, o4 series).
55
+
56
+ Reasoning models don't support temperature, top_p, presence_penalty, frequency_penalty.
57
+ They only support max_completion_tokens (not max_tokens).
58
+ """
59
+ model_lower = self._get_base_model_name().lower()
60
+ # Check for reasoning model prefixes
61
+ reasoning_prefixes = ("gpt-5", "o1", "o3", "o4")
62
+ return any(model_lower.startswith(prefix) for prefix in reasoning_prefixes)
63
+
64
+ def _supports_max_completion_tokens(self) -> bool:
65
+ """Check if model uses max_completion_tokens instead of max_tokens.
66
+
67
+ GPT-4.1+, GPT-5, and reasoning models (o1, o3, o4) use max_completion_tokens.
68
+ """
69
+ model_lower = self._get_base_model_name().lower()
70
+ # Reasoning models always use max_completion_tokens
71
+ if self._is_reasoning_model():
72
+ return True
73
+ # GPT-4.1+ series use max_completion_tokens
74
+ if "gpt-4.1" in model_lower or "gpt-5" in model_lower:
75
+ return True
76
+ # GPT-4o and newer also use max_completion_tokens
77
+ if "gpt-4o" in model_lower:
78
+ return True
79
+ return False
80
+
46
81
  def _init_client(
47
82
  self,
48
83
  api_key: str,
@@ -165,34 +200,54 @@ class OpenAIModel(LLMModelAbstract):
165
200
  for tool in tools
166
201
  ]
167
202
 
168
- async def generate(self, request: GenerateRequest) -> GenerateResponse:
169
- """Generate a response using OpenAI API."""
170
- await self.validate_request(request)
203
+ def _build_request_kwargs(
204
+ self,
205
+ request: GenerateRequest,
206
+ messages: list[dict],
207
+ stream: bool = False,
208
+ ) -> dict:
209
+ """Build kwargs for OpenAI API request with parameter filtering.
171
210
 
172
- messages = [self._convert_message(msg) for msg in request.messages]
211
+ Args:
212
+ request: Generate request with parameters
213
+ messages: Converted messages in OpenAI format
214
+ stream: Whether this is a streaming request
173
215
 
174
- kwargs = {
216
+ Returns:
217
+ Dictionary of kwargs for OpenAI API call
218
+ """
219
+ kwargs: dict[str, Any] = {
175
220
  "model": self._model_name,
176
221
  "messages": messages,
177
222
  }
178
223
 
179
- if request.temperature is not None:
180
- kwargs["temperature"] = request.temperature
224
+ if stream:
225
+ kwargs["stream"] = True
226
+
227
+ is_reasoning = self._is_reasoning_model()
228
+
229
+ # Reasoning models (GPT-5, o1, o3, o4) don't support temperature/top_p
230
+ # They use fixed temperature=1 and top_p=1 internally
231
+ if not is_reasoning:
232
+ if request.temperature is not None:
233
+ kwargs["temperature"] = request.temperature
234
+ if request.top_p is not None:
235
+ kwargs["top_p"] = request.top_p
236
+
237
+ # Handle max_tokens vs max_completion_tokens
181
238
  if request.max_tokens is not None:
182
- # Use max_completion_tokens for GPT models, max_tokens for others
183
- model_lower = self._model_name.lower()
184
- if "gpt" in model_lower and "oss" not in model_lower:
185
- kwargs["max_completion_tokens"] = (
186
- request.max_tokens if request.max_tokens <= 16384 else 16384
187
- )
239
+ # Clamp value between 8192 and 16384
240
+ clamped_tokens = max(8192, min(request.max_tokens, 16384))
241
+ if self._supports_max_completion_tokens():
242
+ # GPT-4.1+, GPT-5, reasoning models use max_completion_tokens
243
+ kwargs["max_completion_tokens"] = clamped_tokens
188
244
  else:
189
- kwargs["max_tokens"] = (
190
- request.max_tokens if request.max_tokens <= 16384 else 16384
191
- )
192
- if request.top_p is not None:
193
- kwargs["top_p"] = request.top_p
245
+ # Older models use max_tokens
246
+ kwargs["max_tokens"] = clamped_tokens
247
+
194
248
  if request.stop:
195
249
  kwargs["stop"] = request.stop
250
+
196
251
  if request.tools:
197
252
  kwargs["tools"] = self._convert_tools(request.tools)
198
253
  # Only add tool_choice if tools are present
@@ -206,9 +261,19 @@ class OpenAIModel(LLMModelAbstract):
206
261
  kwargs["tool_choice"] = "auto"
207
262
  elif isinstance(request.tool_choice, dict):
208
263
  kwargs["tool_choice"] = request.tool_choice
264
+
209
265
  if request.response_format:
210
266
  kwargs["response_format"] = request.response_format
211
267
 
268
+ return kwargs
269
+
270
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
271
+ """Generate a response using OpenAI API."""
272
+ await self.validate_request(request)
273
+
274
+ messages = [self._convert_message(msg) for msg in request.messages]
275
+ kwargs = self._build_request_kwargs(request, messages, stream=False)
276
+
212
277
  try:
213
278
  response = await self.client.chat.completions.create(**kwargs)
214
279
 
@@ -258,45 +323,7 @@ class OpenAIModel(LLMModelAbstract):
258
323
  await self.validate_request(request)
259
324
 
260
325
  messages = [self._convert_message(msg) for msg in request.messages]
261
-
262
- kwargs = {
263
- "model": self._model_name,
264
- "messages": messages,
265
- "stream": True,
266
- }
267
-
268
- if request.temperature is not None:
269
- kwargs["temperature"] = request.temperature
270
- if request.max_tokens is not None:
271
- # Use max_completion_tokens for GPT models, max_tokens for others
272
- model_lower = self._model_name.lower()
273
- if "gpt" in model_lower and "oss" not in model_lower:
274
- kwargs["max_completion_tokens"] = (
275
- request.max_tokens if request.max_tokens <= 16384 else 16384
276
- )
277
- else:
278
- kwargs["max_tokens"] = (
279
- request.max_tokens if request.max_tokens <= 16384 else 16384
280
- )
281
- if request.top_p is not None:
282
- kwargs["top_p"] = request.top_p
283
- if request.stop:
284
- kwargs["stop"] = request.stop
285
- if request.tools:
286
- kwargs["tools"] = self._convert_tools(request.tools)
287
- # Only add tool_choice if tools are present
288
- if request.tool_choice:
289
- # Validate tool_choice - OpenAI only supports 'none', 'auto', 'required', or dict
290
- if isinstance(request.tool_choice, str):
291
- if request.tool_choice in ("none", "auto", "required"):
292
- kwargs["tool_choice"] = request.tool_choice
293
- else:
294
- # Invalid string value - default to 'auto'
295
- kwargs["tool_choice"] = "auto"
296
- elif isinstance(request.tool_choice, dict):
297
- kwargs["tool_choice"] = request.tool_choice
298
- if request.response_format:
299
- kwargs["response_format"] = request.response_format
326
+ kwargs = self._build_request_kwargs(request, messages, stream=True)
300
327
 
301
328
  try:
302
329
  stream = await self.client.chat.completions.create(**kwargs)
@@ -389,7 +416,7 @@ class AzureOpenAIModel(OpenAIModel):
389
416
  self._api_key = api_key
390
417
  self._azure_endpoint = azure_endpoint
391
418
  self._api_version = api_version
392
- self._base_model_name = model_name
419
+ self._model_name = model_name
393
420
  self._deployment_name = deployment_name
394
421
 
395
422
  # Call parent constructor (will call our overridden _init_client)
@@ -438,6 +465,27 @@ class AzureOpenAIModel(OpenAIModel):
438
465
  # Reinitialize client with new deployment name
439
466
  self._init_client(self._api_key)
440
467
 
468
+ @property
469
+ def model_name(self) -> str:
470
+ return self._model_name
471
+
472
+ @model_name.setter
473
+ def model_name(self, value: str):
474
+ """
475
+ Set new model name and recalculate capabilities.
476
+
477
+ Args:
478
+ value: New model name
479
+ """
480
+ self._model_name = value
481
+ self._deployment_name = value
482
+ self.client = AsyncAzureOpenAI(
483
+ api_key=self._api_key,
484
+ azure_endpoint=self._azure_endpoint,
485
+ api_version=self._api_version,
486
+ azure_deployment=value,
487
+ )
488
+
441
489
  async def generate(self, request: GenerateRequest) -> GenerateResponse:
442
490
  """Generate a response using Azure OpenAI API with parameter adaptation."""
443
491
  # Azure OpenAI uses deployment name instead of model name
@@ -643,9 +643,9 @@ class VertexAIModel(LLMModelAbstract):
643
643
  yield StreamChunk(content=None, tool_calls=tool_calls)
644
644
 
645
645
  except Exception as e:
646
- error_msg = str(e)
646
+ # error_msg = str(e)
647
647
  # Yield error message instead of empty response
648
- yield StreamChunk(content=f"Error: {error_msg}")
648
+ raise e
649
649
 
650
650
 
651
651
  class VertexEmbeddingModel(LLMModelAbstract):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: donkit-llm
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Unified LLM model implementations for Donkit (OpenAI, Azure OpenAI, Claude, Vertex AI, Ollama)
5
5
  License: MIT
6
6
  Author: Donkit AI
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Classifier: Programming Language :: Python :: 3.13
13
13
  Requires-Dist: anthropic[vertex] (>=0.42.0,<0.43.0)
14
- Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.4,<0.2.0)
14
+ Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.5,<0.2.0)
15
15
  Requires-Dist: google-auth (>=2.0.0,<3.0.0)
16
16
  Requires-Dist: google-genai (>=1.38.0,<2.0.0)
17
17
  Requires-Dist: openai (>=2.1.0,<3.0.0)
@@ -1,12 +1,12 @@
1
1
  donkit/llm/__init__.py,sha256=0w5hPdaJDpzL1EpnBZm-7WV1Rz1OUdcCMcnxNRpXdiM,1357
2
2
  donkit/llm/claude_model.py,sha256=9UjNkACc6wHFus2gOXLKOi9yjX2FkP3cpQ9zFZEcXWU,16650
3
3
  donkit/llm/donkit_model.py,sha256=rEPxBW6k_BhIBF4XkgLzibVfwW6OJHiX89yMadcJkY4,8497
4
- donkit/llm/factory.py,sha256=IVz9fY_XMDRTgLCCUhBOxKtOcGdzuwFIrUlG2QLD1PE,9195
4
+ donkit/llm/factory.py,sha256=KoZ9bD6FsZjU3ldKL7szznDSB8gI1slnI1jGGwKIuVY,9195
5
5
  donkit/llm/gemini_model.py,sha256=2uLoZr9HjUf1wxiZRGLQFcURCutsB2SV9f-1VaR6kGI,14413
6
6
  donkit/llm/model_abstract.py,sha256=aOgYh3I96PsxSxnkIJ1ETx5UFeRxozCD1c44wiKoBSs,8191
7
7
  donkit/llm/ollama_integration.py,sha256=WXeV2xNxP7gd1JyMsHMKaQOjvH7QYkLIPs7pmTPWFrg,13236
8
- donkit/llm/openai_model.py,sha256=P8gBw_WqZiclAt6QvZLZ8Q1-HCMCgS6O_lKGI1YDHOI,23372
9
- donkit/llm/vertex_model.py,sha256=HavW0iam3EmnlznB9KXADxo5SgaNCdwMjzgp5AOdoOU,29323
10
- donkit_llm-0.1.3.dist-info/METADATA,sha256=H65OHtU9OK0lrsV9dmbMKA16SR0CzPIFTaDX6GJVn_E,742
11
- donkit_llm-0.1.3.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
12
- donkit_llm-0.1.3.dist-info/RECORD,,
8
+ donkit/llm/openai_model.py,sha256=66ioYAaoOS9Fo0C0w2LYdSnAKwXIt6qXvJsKXTuajm0,24609
9
+ donkit/llm/vertex_model.py,sha256=XOo_uwJOa0wgArkD3pac7SulUYWkCc7lTRjyrBSpHPM,29284
10
+ donkit_llm-0.1.4.dist-info/METADATA,sha256=tR6fRwBE36XEf_X4AjGlNg1e4_3xLLPLeVuk2denyho,742
11
+ donkit_llm-0.1.4.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
12
+ donkit_llm-0.1.4.dist-info/RECORD,,