donkit-llm 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
donkit/llm/factory.py CHANGED
@@ -153,7 +153,7 @@ class ModelFactory:
153
153
  def create_donkit_model(
154
154
  model_name: str | None,
155
155
  api_key: str,
156
- base_url: str = "http://localhost:9017",
156
+ base_url: str = "https://api.donkit.ai",
157
157
  provider: str = "default",
158
158
  ) -> DonkitModel:
159
159
  """Create a Donkit model that proxies through RagOps API Gateway.
@@ -1,4 +1,4 @@
1
- from typing import AsyncIterator
1
+ from typing import Any, AsyncIterator
2
2
 
3
3
  from openai import AsyncAzureOpenAI, AsyncOpenAI
4
4
 
@@ -43,6 +43,41 @@ class OpenAIModel(LLMModelAbstract):
43
43
  self._init_client(api_key, base_url, organization)
44
44
  self._capabilities = self._determine_capabilities()
45
45
 
46
+ def _get_base_model_name(self) -> str:
47
+ """Get base model name for capability/parameter detection.
48
+
49
+ For Azure models, use _base_model_name; for OpenAI, use _model_name.
50
+ """
51
+ return getattr(self, "_base_model_name", self._model_name)
52
+
53
+ def _is_reasoning_model(self) -> bool:
54
+ """Check if model is a reasoning model (GPT-5, o1, o3, o4 series).
55
+
56
+ Reasoning models don't support temperature, top_p, presence_penalty, frequency_penalty.
57
+ They only support max_completion_tokens (not max_tokens).
58
+ """
59
+ model_lower = self._get_base_model_name().lower()
60
+ # Check for reasoning model prefixes
61
+ reasoning_prefixes = ("gpt-5", "o1", "o3", "o4")
62
+ return any(model_lower.startswith(prefix) for prefix in reasoning_prefixes)
63
+
64
+ def _supports_max_completion_tokens(self) -> bool:
65
+ """Check if model uses max_completion_tokens instead of max_tokens.
66
+
67
+ GPT-4.1+, GPT-5, and reasoning models (o1, o3, o4) use max_completion_tokens.
68
+ """
69
+ model_lower = self._get_base_model_name().lower()
70
+ # Reasoning models always use max_completion_tokens
71
+ if self._is_reasoning_model():
72
+ return True
73
+ # GPT-4.1+ series use max_completion_tokens
74
+ if "gpt-4.1" in model_lower or "gpt-5" in model_lower:
75
+ return True
76
+ # GPT-4o and newer also use max_completion_tokens
77
+ if "gpt-4o" in model_lower:
78
+ return True
79
+ return False
80
+
46
81
  def _init_client(
47
82
  self,
48
83
  api_key: str,
@@ -165,34 +200,54 @@ class OpenAIModel(LLMModelAbstract):
165
200
  for tool in tools
166
201
  ]
167
202
 
168
- async def generate(self, request: GenerateRequest) -> GenerateResponse:
169
- """Generate a response using OpenAI API."""
170
- await self.validate_request(request)
203
+ def _build_request_kwargs(
204
+ self,
205
+ request: GenerateRequest,
206
+ messages: list[dict],
207
+ stream: bool = False,
208
+ ) -> dict:
209
+ """Build kwargs for OpenAI API request with parameter filtering.
171
210
 
172
- messages = [self._convert_message(msg) for msg in request.messages]
211
+ Args:
212
+ request: Generate request with parameters
213
+ messages: Converted messages in OpenAI format
214
+ stream: Whether this is a streaming request
173
215
 
174
- kwargs = {
216
+ Returns:
217
+ Dictionary of kwargs for OpenAI API call
218
+ """
219
+ kwargs: dict[str, Any] = {
175
220
  "model": self._model_name,
176
221
  "messages": messages,
177
222
  }
178
223
 
179
- if request.temperature is not None:
180
- kwargs["temperature"] = request.temperature
224
+ if stream:
225
+ kwargs["stream"] = True
226
+
227
+ is_reasoning = self._is_reasoning_model()
228
+
229
+ # Reasoning models (GPT-5, o1, o3, o4) don't support temperature/top_p
230
+ # They use fixed temperature=1 and top_p=1 internally
231
+ if not is_reasoning:
232
+ if request.temperature is not None:
233
+ kwargs["temperature"] = request.temperature
234
+ if request.top_p is not None:
235
+ kwargs["top_p"] = request.top_p
236
+
237
+ # Handle max_tokens vs max_completion_tokens
181
238
  if request.max_tokens is not None:
182
- # Use max_completion_tokens for GPT models, max_tokens for others
183
- model_lower = self._model_name.lower()
184
- if "gpt" in model_lower and "oss" not in model_lower:
185
- kwargs["max_completion_tokens"] = (
186
- request.max_tokens if request.max_tokens <= 16384 else 16384
187
- )
239
+ # Clamp value between 8192 and 16384
240
+ clamped_tokens = max(8192, min(request.max_tokens, 16384))
241
+ if self._supports_max_completion_tokens():
242
+ # GPT-4.1+, GPT-5, reasoning models use max_completion_tokens
243
+ kwargs["max_completion_tokens"] = clamped_tokens
188
244
  else:
189
- kwargs["max_tokens"] = (
190
- request.max_tokens if request.max_tokens <= 16384 else 16384
191
- )
192
- if request.top_p is not None:
193
- kwargs["top_p"] = request.top_p
245
+ # Older models use max_tokens
246
+ kwargs["max_tokens"] = clamped_tokens
247
+
194
248
  if request.stop:
195
249
  kwargs["stop"] = request.stop
250
+
196
251
  if request.tools:
197
252
  kwargs["tools"] = self._convert_tools(request.tools)
198
253
  # Only add tool_choice if tools are present
@@ -206,8 +261,35 @@ class OpenAIModel(LLMModelAbstract):
206
261
  kwargs["tool_choice"] = "auto"
207
262
  elif isinstance(request.tool_choice, dict):
208
263
  kwargs["tool_choice"] = request.tool_choice
264
+
209
265
  if request.response_format:
210
- kwargs["response_format"] = request.response_format
266
+ # OpenAI requires specific format for structured output
267
+ # If response_format is a JSON Schema dict with "type": "object", wrap it
268
+ if isinstance(request.response_format, dict):
269
+ if request.response_format.get("type") == "object":
270
+ # This is a JSON Schema - wrap it in json_schema format
271
+ kwargs["response_format"] = {
272
+ "type": "json_schema",
273
+ "json_schema": {
274
+ "name": "response",
275
+ "strict": True,
276
+ "schema": request.response_format,
277
+ },
278
+ }
279
+ else:
280
+ # Already in correct format or simple type
281
+ kwargs["response_format"] = request.response_format
282
+ else:
283
+ kwargs["response_format"] = request.response_format
284
+
285
+ return kwargs
286
+
287
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
288
+ """Generate a response using OpenAI API."""
289
+ await self.validate_request(request)
290
+
291
+ messages = [self._convert_message(msg) for msg in request.messages]
292
+ kwargs = self._build_request_kwargs(request, messages, stream=False)
211
293
 
212
294
  try:
213
295
  response = await self.client.chat.completions.create(**kwargs)
@@ -258,45 +340,7 @@ class OpenAIModel(LLMModelAbstract):
258
340
  await self.validate_request(request)
259
341
 
260
342
  messages = [self._convert_message(msg) for msg in request.messages]
261
-
262
- kwargs = {
263
- "model": self._model_name,
264
- "messages": messages,
265
- "stream": True,
266
- }
267
-
268
- if request.temperature is not None:
269
- kwargs["temperature"] = request.temperature
270
- if request.max_tokens is not None:
271
- # Use max_completion_tokens for GPT models, max_tokens for others
272
- model_lower = self._model_name.lower()
273
- if "gpt" in model_lower and "oss" not in model_lower:
274
- kwargs["max_completion_tokens"] = (
275
- request.max_tokens if request.max_tokens <= 16384 else 16384
276
- )
277
- else:
278
- kwargs["max_tokens"] = (
279
- request.max_tokens if request.max_tokens <= 16384 else 16384
280
- )
281
- if request.top_p is not None:
282
- kwargs["top_p"] = request.top_p
283
- if request.stop:
284
- kwargs["stop"] = request.stop
285
- if request.tools:
286
- kwargs["tools"] = self._convert_tools(request.tools)
287
- # Only add tool_choice if tools are present
288
- if request.tool_choice:
289
- # Validate tool_choice - OpenAI only supports 'none', 'auto', 'required', or dict
290
- if isinstance(request.tool_choice, str):
291
- if request.tool_choice in ("none", "auto", "required"):
292
- kwargs["tool_choice"] = request.tool_choice
293
- else:
294
- # Invalid string value - default to 'auto'
295
- kwargs["tool_choice"] = "auto"
296
- elif isinstance(request.tool_choice, dict):
297
- kwargs["tool_choice"] = request.tool_choice
298
- if request.response_format:
299
- kwargs["response_format"] = request.response_format
343
+ kwargs = self._build_request_kwargs(request, messages, stream=True)
300
344
 
301
345
  try:
302
346
  stream = await self.client.chat.completions.create(**kwargs)
@@ -389,7 +433,7 @@ class AzureOpenAIModel(OpenAIModel):
389
433
  self._api_key = api_key
390
434
  self._azure_endpoint = azure_endpoint
391
435
  self._api_version = api_version
392
- self._base_model_name = model_name
436
+ self._model_name = model_name
393
437
  self._deployment_name = deployment_name
394
438
 
395
439
  # Call parent constructor (will call our overridden _init_client)
@@ -438,6 +482,27 @@ class AzureOpenAIModel(OpenAIModel):
438
482
  # Reinitialize client with new deployment name
439
483
  self._init_client(self._api_key)
440
484
 
485
+ @property
486
+ def model_name(self) -> str:
487
+ return self._model_name
488
+
489
+ @model_name.setter
490
+ def model_name(self, value: str):
491
+ """
492
+ Set new model name and recalculate capabilities.
493
+
494
+ Args:
495
+ value: New model name
496
+ """
497
+ self._model_name = value
498
+ self._deployment_name = value
499
+ self.client = AsyncAzureOpenAI(
500
+ api_key=self._api_key,
501
+ azure_endpoint=self._azure_endpoint,
502
+ api_version=self._api_version,
503
+ azure_deployment=value,
504
+ )
505
+
441
506
  async def generate(self, request: GenerateRequest) -> GenerateResponse:
442
507
  """Generate a response using Azure OpenAI API with parameter adaptation."""
443
508
  # Azure OpenAI uses deployment name instead of model name
@@ -330,6 +330,39 @@ class VertexAIModel(LLMModelAbstract):
330
330
 
331
331
  return convert(schema)
332
332
 
333
+ def _build_config_kwargs(
334
+ self, request: GenerateRequest, system_instruction: str | None = None
335
+ ) -> dict[str, Any]:
336
+ """Build configuration kwargs for Vertex AI generate/generate_stream."""
337
+ config_kwargs: dict[str, Any] = {
338
+ "temperature": request.temperature
339
+ if request.temperature is not None
340
+ else 0.2,
341
+ "top_p": request.top_p if request.top_p is not None else 0.95,
342
+ "max_output_tokens": request.max_tokens
343
+ if request.max_tokens is not None
344
+ else 8192,
345
+ }
346
+ if system_instruction:
347
+ config_kwargs["system_instruction"] = system_instruction
348
+ if request.stop:
349
+ config_kwargs["stop_sequences"] = request.stop
350
+ if request.response_format:
351
+ config_kwargs["response_mime_type"] = "application/json"
352
+ # If response_format is a JSON Schema dict with "type": "object", use it directly
353
+ if isinstance(request.response_format, dict):
354
+ if request.response_format.get("type") == "object":
355
+ # This is a JSON Schema - use it directly
356
+ config_kwargs["response_schema"] = self._clean_json_schema(
357
+ request.response_format
358
+ )
359
+ elif "schema" in request.response_format:
360
+ # Already wrapped in schema key
361
+ config_kwargs["response_schema"] = self._clean_json_schema(
362
+ request.response_format["schema"]
363
+ )
364
+ return config_kwargs
365
+
333
366
  async def generate(self, request: GenerateRequest) -> GenerateResponse:
334
367
  """Generate a response using Vertex AI."""
335
368
  await self.validate_request(request)
@@ -410,26 +443,7 @@ class VertexAIModel(LLMModelAbstract):
410
443
  contents.append(user_content)
411
444
  i += 1
412
445
 
413
- config_kwargs = {
414
- "temperature": request.temperature
415
- if request.temperature is not None
416
- else 0.2,
417
- "top_p": request.top_p if request.top_p is not None else 0.95,
418
- "max_output_tokens": request.max_tokens
419
- if request.max_tokens is not None
420
- else 8192,
421
- }
422
- if system_instruction:
423
- config_kwargs["system_instruction"] = system_instruction
424
- if request.stop:
425
- config_kwargs["stop_sequences"] = request.stop
426
- if request.response_format:
427
- config_kwargs["response_mime_type"] = "application/json"
428
- if "schema" in request.response_format:
429
- config_kwargs["response_schema"] = self._clean_json_schema(
430
- request.response_format["schema"]
431
- )
432
-
446
+ config_kwargs = self._build_config_kwargs(request, system_instruction)
433
447
  config = genai.types.GenerateContentConfig(**config_kwargs)
434
448
 
435
449
  if request.tools:
@@ -584,25 +598,7 @@ class VertexAIModel(LLMModelAbstract):
584
598
  contents.append(user_content)
585
599
  i += 1
586
600
 
587
- config_kwargs: dict[str, Any] = {
588
- "temperature": request.temperature
589
- if request.temperature is not None
590
- else 0.2,
591
- "top_p": request.top_p if request.top_p is not None else 0.95,
592
- "max_output_tokens": request.max_tokens
593
- if request.max_tokens is not None
594
- else 8192,
595
- }
596
- if system_instruction:
597
- config_kwargs["system_instruction"] = system_instruction
598
- if request.stop:
599
- config_kwargs["stop_sequences"] = request.stop
600
- if request.response_format:
601
- config_kwargs["response_mime_type"] = "application/json"
602
- if "schema" in request.response_format:
603
- config_kwargs["response_schema"] = self._clean_json_schema(
604
- request.response_format["schema"]
605
- )
601
+ config_kwargs = self._build_config_kwargs(request, system_instruction)
606
602
  config_kwargs["automatic_function_calling"] = (
607
603
  genai.types.AutomaticFunctionCallingConfig(maximum_remote_calls=100)
608
604
  )
@@ -643,9 +639,9 @@ class VertexAIModel(LLMModelAbstract):
643
639
  yield StreamChunk(content=None, tool_calls=tool_calls)
644
640
 
645
641
  except Exception as e:
646
- error_msg = str(e)
642
+ # error_msg = str(e)
647
643
  # Yield error message instead of empty response
648
- yield StreamChunk(content=f"Error: {error_msg}")
644
+ raise e
649
645
 
650
646
 
651
647
  class VertexEmbeddingModel(LLMModelAbstract):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: donkit-llm
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Unified LLM model implementations for Donkit (OpenAI, Azure OpenAI, Claude, Vertex AI, Ollama)
5
5
  License: MIT
6
6
  Author: Donkit AI
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Classifier: Programming Language :: Python :: 3.13
13
13
  Requires-Dist: anthropic[vertex] (>=0.42.0,<0.43.0)
14
- Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.4,<0.2.0)
14
+ Requires-Dist: donkit-ragops-api-gateway-client (>=0.1.5,<0.2.0)
15
15
  Requires-Dist: google-auth (>=2.0.0,<3.0.0)
16
16
  Requires-Dist: google-genai (>=1.38.0,<2.0.0)
17
17
  Requires-Dist: openai (>=2.1.0,<3.0.0)
@@ -1,12 +1,12 @@
1
1
  donkit/llm/__init__.py,sha256=0w5hPdaJDpzL1EpnBZm-7WV1Rz1OUdcCMcnxNRpXdiM,1357
2
2
  donkit/llm/claude_model.py,sha256=9UjNkACc6wHFus2gOXLKOi9yjX2FkP3cpQ9zFZEcXWU,16650
3
3
  donkit/llm/donkit_model.py,sha256=rEPxBW6k_BhIBF4XkgLzibVfwW6OJHiX89yMadcJkY4,8497
4
- donkit/llm/factory.py,sha256=IVz9fY_XMDRTgLCCUhBOxKtOcGdzuwFIrUlG2QLD1PE,9195
4
+ donkit/llm/factory.py,sha256=KoZ9bD6FsZjU3ldKL7szznDSB8gI1slnI1jGGwKIuVY,9195
5
5
  donkit/llm/gemini_model.py,sha256=2uLoZr9HjUf1wxiZRGLQFcURCutsB2SV9f-1VaR6kGI,14413
6
6
  donkit/llm/model_abstract.py,sha256=aOgYh3I96PsxSxnkIJ1ETx5UFeRxozCD1c44wiKoBSs,8191
7
7
  donkit/llm/ollama_integration.py,sha256=WXeV2xNxP7gd1JyMsHMKaQOjvH7QYkLIPs7pmTPWFrg,13236
8
- donkit/llm/openai_model.py,sha256=P8gBw_WqZiclAt6QvZLZ8Q1-HCMCgS6O_lKGI1YDHOI,23372
9
- donkit/llm/vertex_model.py,sha256=HavW0iam3EmnlznB9KXADxo5SgaNCdwMjzgp5AOdoOU,29323
10
- donkit_llm-0.1.3.dist-info/METADATA,sha256=H65OHtU9OK0lrsV9dmbMKA16SR0CzPIFTaDX6GJVn_E,742
11
- donkit_llm-0.1.3.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
12
- donkit_llm-0.1.3.dist-info/RECORD,,
8
+ donkit/llm/openai_model.py,sha256=xi3rRh5cJ8NcAvodwCNaU3_9UYHZwfUjdxnLJZs-qxg,25486
9
+ donkit/llm/vertex_model.py,sha256=LcdWBdx4JYzom2IsXxhNGEsrYf0N6JmwuRc3sqfKIos,29350
10
+ donkit_llm-0.1.5.dist-info/METADATA,sha256=O20eq1h0Kr0fBbDvD1WA0RkZBy9ex_SNk_CLDKXi1mI,742
11
+ donkit_llm-0.1.5.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
12
+ donkit_llm-0.1.5.dist-info/RECORD,,