donkit-llm 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: donkit-llm
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Unified LLM model implementations for Donkit (OpenAI, Azure OpenAI, Claude, Vertex AI, Ollama)
5
5
  License: MIT
6
6
  Author: Donkit AI
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "donkit-llm"
3
- version = "0.1.8"
3
+ version = "0.1.10"
4
4
  description = "Unified LLM model implementations for Donkit (OpenAI, Azure OpenAI, Claude, Vertex AI, Ollama)"
5
5
  authors = ["Donkit AI <opensource@donkit.ai>"]
6
6
  license = "MIT"
@@ -19,6 +19,7 @@ from .openai_model import (
19
19
  AzureOpenAIModel,
20
20
  OpenAIEmbeddingModel,
21
21
  OpenAIModel,
22
+ is_codex_model,
22
23
  )
23
24
  from .claude_model import ClaudeModel, ClaudeVertexModel
24
25
  from .vertex_model import VertexAIModel, VertexEmbeddingModel
@@ -26,11 +27,10 @@ from .factory import ModelFactory
26
27
  from .gemini_model import GeminiModel, GeminiEmbeddingModel
27
28
  from .donkit_model import DonkitModel
28
29
 
29
- import importlib.util
30
30
 
31
- if importlib.util.find_spec("donkit.llm_gate.client") is not None:
31
+ try:
32
32
  from .llm_gate_model import LLMGateModel
33
- else:
33
+ except ModuleNotFoundError:
34
34
  LLMGateModel = None
35
35
 
36
36
  __all__ = [
@@ -57,6 +57,7 @@ __all__ = [
57
57
  "AzureOpenAIModel",
58
58
  "OpenAIEmbeddingModel",
59
59
  "AzureOpenAIEmbeddingModel",
60
+ "is_codex_model",
60
61
  "ClaudeModel",
61
62
  "ClaudeVertexModel",
62
63
  "VertexAIModel",
@@ -5,11 +5,9 @@ from .claude_model import ClaudeVertexModel
5
5
  from .donkit_model import DonkitModel
6
6
  from .gemini_model import GeminiModel
7
7
 
8
- import importlib.util
9
-
10
- if importlib.util.find_spec("donkit.llm_gate.client") is not None:
8
+ try:
11
9
  from .llm_gate_model import LLMGateModel
12
- else:
10
+ except ModuleNotFoundError:
13
11
  LLMGateModel = None
14
12
  from .model_abstract import LLMModelAbstract
15
13
  from .openai_model import AzureOpenAIEmbeddingModel
@@ -45,8 +43,9 @@ class ModelFactory:
45
43
  api_version: str = "2024-08-01-preview",
46
44
  deployment_name: str | None = None,
47
45
  ) -> AzureOpenAIModel:
46
+ effective_model = deployment_name or model_name
48
47
  return AzureOpenAIModel(
49
- model_name=deployment_name or model_name,
48
+ model_name=effective_model,
50
49
  api_key=api_key,
51
50
  azure_endpoint=azure_endpoint,
52
51
  api_version=api_version,
@@ -210,6 +209,7 @@ class ModelFactory:
210
209
  provider: Literal[
211
210
  "openai",
212
211
  "azure_openai",
212
+ "azure_openai_codex",
213
213
  "claude",
214
214
  "claude_vertex",
215
215
  "vertex",
@@ -224,6 +224,7 @@ class ModelFactory:
224
224
  default_models = {
225
225
  "openai": "gpt-5-mini",
226
226
  "azure_openai": "gpt-4.1-mini",
227
+ "azure_openai_codex": "gpt-5.1-codex",
227
228
  "claude": "claude-4-5-sonnet",
228
229
  "claude_vertex": "claude-4-5-sonnet",
229
230
  "gemini": "gemini-2.5-flash",
@@ -1,5 +1,6 @@
1
1
  from typing import Any, AsyncIterator
2
2
 
3
+ from loguru import logger
3
4
  from openai import AsyncAzureOpenAI, AsyncOpenAI
4
5
 
5
6
  from .model_abstract import (
@@ -268,10 +269,12 @@ class OpenAIModel(LLMModelAbstract):
268
269
  # Reasoning models (GPT-5, o1, o3, o4) don't support temperature/top_p
269
270
  # They use fixed temperature=1 and top_p=1 internally
270
271
  if not is_reasoning:
271
- if request.temperature is not None:
272
- kwargs["temperature"] = request.temperature
273
272
  if request.top_p is not None:
274
273
  kwargs["top_p"] = request.top_p
274
+ if request.temperature is not None and request.top_p is None:
275
+ kwargs["temperature"] = request.temperature
276
+ else:
277
+ kwargs["top_p"] = 0.0
275
278
 
276
279
  # Handle max_tokens vs max_completion_tokens
277
280
  if request.max_tokens is not None:
@@ -449,6 +452,7 @@ class OpenAIModel(LLMModelAbstract):
449
452
 
450
453
  async for chunk in stream:
451
454
  if not chunk.choices:
455
+ logger.info("No choices in chunk, continue")
452
456
  continue
453
457
 
454
458
  choice = chunk.choices[0]
@@ -461,7 +465,7 @@ class OpenAIModel(LLMModelAbstract):
461
465
  # logger.info(
462
466
  # f"LLM Stream Chunk: {json.dumps(chunk_log, ensure_ascii=False)}"
463
467
  # )
464
- yield StreamChunk(content=delta.content, tool_calls=None)
468
+ yield StreamChunk(content=delta.content)
465
469
 
466
470
  # Accumulate tool calls
467
471
  if delta.tool_calls:
@@ -528,7 +532,7 @@ class OpenAIModel(LLMModelAbstract):
528
532
 
529
533
 
530
534
  class AzureOpenAIModel(OpenAIModel):
531
- """Azure OpenAI model implementation."""
535
+ """Azure OpenAI model implementation with dynamic Codex support."""
532
536
 
533
537
  def __init__(
534
538
  self,
@@ -554,6 +558,7 @@ class AzureOpenAIModel(OpenAIModel):
554
558
  self._api_version = api_version
555
559
  self._model_name = model_name
556
560
  self._deployment_name = deployment_name
561
+ self._is_codex = is_codex_model(deployment_name or model_name)
557
562
 
558
563
  # Call parent constructor (will call our overridden _init_client)
559
564
  super().__init__(model_name, api_key)
@@ -564,13 +569,21 @@ class AzureOpenAIModel(OpenAIModel):
564
569
  base_url: str | None = None,
565
570
  organization: str | None = None,
566
571
  ) -> None:
567
- """Initialize Azure OpenAI client."""
568
- self.client = AsyncAzureOpenAI(
569
- api_key=self._api_key,
570
- azure_endpoint=self._azure_endpoint,
571
- api_version=self._api_version,
572
- azure_deployment=self._deployment_name,
573
- )
572
+ """Initialize Azure OpenAI client (or Responses API client for Codex)."""
573
+ if self._is_codex:
574
+ # Codex models use Responses API with /openai/v1/ path
575
+ responses_base_url = f"{self._azure_endpoint.rstrip('/')}/openai/v1/"
576
+ self.client = AsyncOpenAI(
577
+ api_key=self._api_key,
578
+ base_url=responses_base_url,
579
+ )
580
+ else:
581
+ self.client = AsyncAzureOpenAI(
582
+ api_key=self._api_key,
583
+ azure_endpoint=self._azure_endpoint,
584
+ api_version=self._api_version,
585
+ azure_deployment=self._deployment_name,
586
+ )
574
587
 
575
588
  def _determine_capabilities(self) -> ModelCapability:
576
589
  """Determine capabilities based on base model name."""
@@ -598,6 +611,7 @@ class AzureOpenAIModel(OpenAIModel):
598
611
  value: New deployment name
599
612
  """
600
613
  self._deployment_name = value
614
+ self._is_codex = is_codex_model(value)
601
615
  # Reinitialize client with new deployment name
602
616
  self._init_client(self._api_key)
603
617
 
@@ -615,17 +629,276 @@ class AzureOpenAIModel(OpenAIModel):
615
629
  """
616
630
  self._model_name = value
617
631
  self._deployment_name = value
618
- self.client = AsyncAzureOpenAI(
619
- api_key=self._api_key,
620
- azure_endpoint=self._azure_endpoint,
621
- api_version=self._api_version,
622
- azure_deployment=value,
623
- )
632
+ self._is_codex = is_codex_model(value)
633
+ self._init_client(self._api_key)
634
+
635
+ # ---- Codex (Responses API) helper methods ----
636
+
637
+ def _convert_message_for_responses(self, msg: Message) -> dict | list[dict]:
638
+ """Convert internal Message to Responses API format."""
639
+ role = msg.role
640
+ if role == "system":
641
+ role = "developer"
642
+
643
+ if msg.role == "tool" and msg.tool_call_id:
644
+ return {
645
+ "type": "function_call_output",
646
+ "call_id": msg.tool_call_id,
647
+ "output": msg.content
648
+ if isinstance(msg.content, str)
649
+ else str(msg.content),
650
+ }
651
+
652
+ if msg.role == "assistant" and msg.tool_calls:
653
+ items = []
654
+ if msg.content:
655
+ items.append(
656
+ {
657
+ "type": "message",
658
+ "role": "assistant",
659
+ "content": [{"type": "output_text", "text": msg.content}],
660
+ }
661
+ )
662
+ for tc in msg.tool_calls:
663
+ items.append(
664
+ {
665
+ "type": "function_call",
666
+ "call_id": tc.id,
667
+ "name": tc.function.name,
668
+ "arguments": tc.function.arguments,
669
+ }
670
+ )
671
+ return items
672
+
673
+ if isinstance(msg.content, str):
674
+ return {"role": role, "content": msg.content}
675
+
676
+ content_parts = []
677
+ for part in msg.content:
678
+ if part.content_type == ContentType.TEXT:
679
+ content_parts.append({"type": "input_text", "text": part.content})
680
+ elif part.content_type == ContentType.IMAGE_URL:
681
+ content_parts.append({"type": "input_image", "image_url": part.content})
682
+ elif part.content_type == ContentType.IMAGE_BASE64:
683
+ content_parts.append(
684
+ {
685
+ "type": "input_image",
686
+ "image_url": f"data:{part.mime_type or 'image/png'};base64,{part.content}",
687
+ }
688
+ )
689
+ return {"role": role, "content": content_parts}
690
+
691
+ def _convert_tools_for_responses(self, tools: list[Tool]) -> list[dict]:
692
+ """Convert tools to Responses API format."""
693
+ return [
694
+ {
695
+ "type": "function",
696
+ "name": tool.function.name,
697
+ "description": tool.function.description or "",
698
+ "parameters": tool.function.parameters
699
+ or {"type": "object", "properties": {}},
700
+ }
701
+ for tool in tools
702
+ ]
703
+
704
+ def _extract_system_instruction(
705
+ self, messages: list[Message]
706
+ ) -> tuple[str | None, list[Message]]:
707
+ """Extract system message as instructions."""
708
+ instructions = None
709
+ remaining = []
710
+ for msg in messages:
711
+ if msg.role == "system":
712
+ content = msg.content if isinstance(msg.content, str) else ""
713
+ instructions = (
714
+ content if instructions is None else instructions + "\n" + content
715
+ )
716
+ else:
717
+ remaining.append(msg)
718
+ return instructions, remaining
719
+
720
+ async def _generate_codex(self, request: GenerateRequest) -> GenerateResponse:
721
+ """Generate using Responses API for Codex models."""
722
+ await self.validate_request(request)
723
+
724
+ instructions, messages = self._extract_system_instruction(request.messages)
725
+
726
+ input_items = []
727
+ for msg in messages:
728
+ converted = self._convert_message_for_responses(msg)
729
+ if isinstance(converted, list):
730
+ input_items.extend(converted)
731
+ else:
732
+ input_items.append(converted)
733
+
734
+ kwargs: dict[str, Any] = {"model": self._deployment_name, "input": input_items}
735
+
736
+ if instructions:
737
+ kwargs["instructions"] = instructions
738
+ if request.max_tokens:
739
+ kwargs["max_output_tokens"] = max(8192, min(request.max_tokens, 16384))
740
+ if request.tools:
741
+ kwargs["tools"] = self._convert_tools_for_responses(request.tools)
742
+ if request.tool_choice and isinstance(request.tool_choice, str):
743
+ if request.tool_choice in ("none", "auto", "required"):
744
+ kwargs["tool_choice"] = request.tool_choice
745
+
746
+ try:
747
+ response = await self.client.responses.create(**kwargs)
748
+
749
+ content = getattr(response, "output_text", None)
750
+ tool_calls = None
751
+
752
+ if hasattr(response, "output") and response.output:
753
+ parsed_tool_calls = []
754
+ for item in response.output:
755
+ if getattr(item, "type", None) == "function_call":
756
+ parsed_tool_calls.append(
757
+ ToolCall(
758
+ id=getattr(item, "call_id", ""),
759
+ type="function",
760
+ function=FunctionCall(
761
+ name=getattr(item, "name", ""),
762
+ arguments=getattr(item, "arguments", "{}"),
763
+ ),
764
+ )
765
+ )
766
+ elif getattr(item, "type", None) == "message" and not content:
767
+ for part in getattr(item, "content", []):
768
+ if getattr(part, "type", None) == "output_text":
769
+ content = getattr(part, "text", "")
770
+ break
771
+ if parsed_tool_calls:
772
+ tool_calls = parsed_tool_calls
773
+
774
+ usage = None
775
+ if hasattr(response, "usage") and response.usage:
776
+ usage = {
777
+ "prompt_tokens": getattr(response.usage, "input_tokens", 0),
778
+ "completion_tokens": getattr(response.usage, "output_tokens", 0),
779
+ "total_tokens": getattr(response.usage, "total_tokens", 0),
780
+ }
781
+
782
+ return GenerateResponse(
783
+ content=content,
784
+ tool_calls=tool_calls,
785
+ finish_reason=getattr(response, "status", None),
786
+ usage=usage,
787
+ )
788
+ except Exception as e:
789
+ logger.error(f"Codex API error: {e}")
790
+ return GenerateResponse(content=f"Error: {e}")
791
+
792
+ async def _generate_stream_codex(
793
+ self, request: GenerateRequest
794
+ ) -> AsyncIterator[StreamChunk]:
795
+ """Generate streaming using Responses API for Codex models."""
796
+ await self.validate_request(request)
797
+
798
+ instructions, messages = self._extract_system_instruction(request.messages)
799
+
800
+ input_items = []
801
+ for msg in messages:
802
+ converted = self._convert_message_for_responses(msg)
803
+ if isinstance(converted, list):
804
+ input_items.extend(converted)
805
+ else:
806
+ input_items.append(converted)
807
+ logger.info(input_items)
808
+
809
+ kwargs: dict[str, Any] = {
810
+ "model": self._deployment_name,
811
+ "input": input_items,
812
+ "stream": True,
813
+ }
814
+
815
+ if instructions:
816
+ kwargs["instructions"] = instructions
817
+ if request.max_tokens:
818
+ kwargs["max_output_tokens"] = max(8192, min(request.max_tokens, 16384))
819
+ if request.tools:
820
+ kwargs["tools"] = self._convert_tools_for_responses(request.tools)
821
+ if request.tool_choice and isinstance(request.tool_choice, str):
822
+ if request.tool_choice in ("none", "auto", "required"):
823
+ kwargs["tool_choice"] = request.tool_choice
824
+
825
+ try:
826
+ stream = await self.client.responses.create(**kwargs)
827
+ # Buffer for accumulating function calls by output_index
828
+ tool_call_buffers: dict[int, dict] = {}
829
+
830
+ async for event in stream:
831
+ logger.info(event)
832
+ event_type = getattr(event, "type", None)
833
+
834
+ if event_type == "response.output_text.delta":
835
+ delta = getattr(event, "delta", "")
836
+ if delta:
837
+ yield StreamChunk(content=delta)
838
+
839
+ # Capture function call name when output item is added
840
+ elif event_type == "response.output_item.added":
841
+ item = getattr(event, "item", None)
842
+ output_index = getattr(event, "output_index", 0)
843
+ if item and getattr(item, "type", None) == "function_call":
844
+ tool_call_buffers[output_index] = {
845
+ "call_id": getattr(item, "call_id", ""),
846
+ "name": getattr(item, "name", ""),
847
+ "arguments": "",
848
+ }
849
+
850
+ elif event_type == "response.function_call_arguments.delta":
851
+ output_index = getattr(event, "output_index", 0)
852
+ delta = getattr(event, "delta", "")
853
+ if output_index in tool_call_buffers:
854
+ tool_call_buffers[output_index]["arguments"] += delta
855
+
856
+ elif event_type == "response.function_call_arguments.done":
857
+ output_index = getattr(event, "output_index", 0)
858
+ if output_index in tool_call_buffers:
859
+ tc_data = tool_call_buffers[output_index]
860
+ # Get final arguments from event, fallback to accumulated
861
+ final_args = getattr(event, "arguments", None)
862
+ if final_args is None:
863
+ final_args = tc_data.get("arguments", "{}")
864
+ yield StreamChunk(
865
+ tool_calls=[
866
+ ToolCall(
867
+ id=tc_data["call_id"],
868
+ type="function",
869
+ function=FunctionCall(
870
+ name=tc_data["name"],
871
+ arguments=final_args,
872
+ ),
873
+ )
874
+ ],
875
+ )
876
+ del tool_call_buffers[output_index]
877
+
878
+ elif event_type == "response.completed":
879
+ response_obj = getattr(event, "response", None)
880
+ finish_reason = (
881
+ getattr(response_obj, "status", None) if response_obj else None
882
+ )
883
+ yield StreamChunk(finish_reason=finish_reason)
884
+
885
+ elif event_type == "error":
886
+ yield StreamChunk(
887
+ content=f"Error: {getattr(event, 'message', 'Unknown')}"
888
+ )
889
+
890
+ except Exception as e:
891
+ logger.error(f"Codex streaming error: {e}")
892
+ yield StreamChunk(content=f"Error: {e}")
893
+
894
+ # ---- Main generate methods ----
624
895
 
625
896
  async def generate(self, request: GenerateRequest) -> GenerateResponse:
626
- """Generate a response using Azure OpenAI API with parameter adaptation."""
897
+ """Generate a response using Azure OpenAI API (or Responses API for Codex)."""
898
+ if self._is_codex:
899
+ return await self._generate_codex(request)
900
+
627
901
  # Azure OpenAI uses deployment name instead of model name
628
- # Temporarily override model_name with deployment_name
629
902
  original_model = self._model_name
630
903
  self._model_name = self._deployment_name
631
904
  try:
@@ -636,7 +909,12 @@ class AzureOpenAIModel(OpenAIModel):
636
909
  async def generate_stream(
637
910
  self, request: GenerateRequest
638
911
  ) -> AsyncIterator[StreamChunk]:
639
- """Generate a streaming response using Azure OpenAI API."""
912
+ """Generate a streaming response using Azure OpenAI API (or Responses API for Codex)."""
913
+ if self._is_codex:
914
+ async for chunk in self._generate_stream_codex(request):
915
+ yield chunk
916
+ return
917
+
640
918
  # Azure OpenAI uses deployment name instead of model name
641
919
  original_model = self._model_name
642
920
  self._model_name = self._deployment_name
@@ -850,3 +1128,10 @@ class AzureOpenAIEmbeddingModel(LLMModelAbstract):
850
1128
  )
851
1129
  except Exception as e:
852
1130
  raise Exception(f"Failed to generate embeddings: {e}")
1131
+
1132
+
1133
+ def is_codex_model(model_name: str) -> bool:
1134
+ """Check if the model requires Responses API (Codex models)."""
1135
+ codex_patterns = ["codex", "gpt-5.1-codex", "gpt-5-codex"]
1136
+ model_lower = model_name.lower()
1137
+ return any(pattern in model_lower for pattern in codex_patterns)
@@ -6,6 +6,7 @@ import google.genai as genai
6
6
  from google.genai.types import Blob, Content, FunctionDeclaration, Part
7
7
  from google.genai.types import Tool as GeminiTool
8
8
  from google.oauth2 import service_account
9
+ from loguru import logger
9
10
 
10
11
  from .model_abstract import (
11
12
  ContentType,
@@ -628,15 +629,33 @@ class VertexAIModel(LLMModelAbstract):
628
629
  )
629
630
 
630
631
  async for chunk in stream:
632
+ logger.info(chunk)
631
633
  text, tool_calls = self._parse_response(chunk)
632
634
 
635
+ # Extract finish_reason from chunk
636
+ finish_reason = None
637
+ if hasattr(chunk, "candidates") and chunk.candidates:
638
+ cand = chunk.candidates[0]
639
+ if hasattr(cand, "finish_reason") and cand.finish_reason:
640
+ finish_reason = str(cand.finish_reason)
641
+
633
642
  # Yield text chunks as they come
634
643
  if text:
635
- yield StreamChunk(content=text, tool_calls=None)
644
+ yield StreamChunk(
645
+ content=text, tool_calls=None, finish_reason=finish_reason
646
+ )
636
647
 
637
648
  # Tool calls come in final chunk - yield them separately
638
649
  if tool_calls:
639
- yield StreamChunk(content=None, tool_calls=tool_calls)
650
+ yield StreamChunk(
651
+ content=None, tool_calls=tool_calls, finish_reason=finish_reason
652
+ )
653
+
654
+ # If no text and no tool_calls but we have finish_reason, yield it
655
+ if not text and not tool_calls and finish_reason:
656
+ yield StreamChunk(
657
+ content=None, tool_calls=None, finish_reason=finish_reason
658
+ )
640
659
 
641
660
  except Exception as e:
642
661
  # error_msg = str(e)