donkit-llm 0.1.8__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/PKG-INFO +1 -1
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/pyproject.toml +1 -1
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/__init__.py +4 -3
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/factory.py +6 -5
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/openai_model.py +305 -20
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/vertex_model.py +21 -2
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/claude_model.py +0 -0
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/donkit_model.py +0 -0
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/gemini_model.py +0 -0
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/llm_gate_model.py +0 -0
- {donkit_llm-0.1.8 → donkit_llm-0.1.10}/src/donkit/llm/model_abstract.py +0 -0
|
@@ -19,6 +19,7 @@ from .openai_model import (
|
|
|
19
19
|
AzureOpenAIModel,
|
|
20
20
|
OpenAIEmbeddingModel,
|
|
21
21
|
OpenAIModel,
|
|
22
|
+
is_codex_model,
|
|
22
23
|
)
|
|
23
24
|
from .claude_model import ClaudeModel, ClaudeVertexModel
|
|
24
25
|
from .vertex_model import VertexAIModel, VertexEmbeddingModel
|
|
@@ -26,11 +27,10 @@ from .factory import ModelFactory
|
|
|
26
27
|
from .gemini_model import GeminiModel, GeminiEmbeddingModel
|
|
27
28
|
from .donkit_model import DonkitModel
|
|
28
29
|
|
|
29
|
-
import importlib.util
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
try:
|
|
32
32
|
from .llm_gate_model import LLMGateModel
|
|
33
|
-
|
|
33
|
+
except ModuleNotFoundError:
|
|
34
34
|
LLMGateModel = None
|
|
35
35
|
|
|
36
36
|
__all__ = [
|
|
@@ -57,6 +57,7 @@ __all__ = [
|
|
|
57
57
|
"AzureOpenAIModel",
|
|
58
58
|
"OpenAIEmbeddingModel",
|
|
59
59
|
"AzureOpenAIEmbeddingModel",
|
|
60
|
+
"is_codex_model",
|
|
60
61
|
"ClaudeModel",
|
|
61
62
|
"ClaudeVertexModel",
|
|
62
63
|
"VertexAIModel",
|
|
@@ -5,11 +5,9 @@ from .claude_model import ClaudeVertexModel
|
|
|
5
5
|
from .donkit_model import DonkitModel
|
|
6
6
|
from .gemini_model import GeminiModel
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
if importlib.util.find_spec("donkit.llm_gate.client") is not None:
|
|
8
|
+
try:
|
|
11
9
|
from .llm_gate_model import LLMGateModel
|
|
12
|
-
|
|
10
|
+
except ModuleNotFoundError:
|
|
13
11
|
LLMGateModel = None
|
|
14
12
|
from .model_abstract import LLMModelAbstract
|
|
15
13
|
from .openai_model import AzureOpenAIEmbeddingModel
|
|
@@ -45,8 +43,9 @@ class ModelFactory:
|
|
|
45
43
|
api_version: str = "2024-08-01-preview",
|
|
46
44
|
deployment_name: str | None = None,
|
|
47
45
|
) -> AzureOpenAIModel:
|
|
46
|
+
effective_model = deployment_name or model_name
|
|
48
47
|
return AzureOpenAIModel(
|
|
49
|
-
model_name=
|
|
48
|
+
model_name=effective_model,
|
|
50
49
|
api_key=api_key,
|
|
51
50
|
azure_endpoint=azure_endpoint,
|
|
52
51
|
api_version=api_version,
|
|
@@ -210,6 +209,7 @@ class ModelFactory:
|
|
|
210
209
|
provider: Literal[
|
|
211
210
|
"openai",
|
|
212
211
|
"azure_openai",
|
|
212
|
+
"azure_openai_codex",
|
|
213
213
|
"claude",
|
|
214
214
|
"claude_vertex",
|
|
215
215
|
"vertex",
|
|
@@ -224,6 +224,7 @@ class ModelFactory:
|
|
|
224
224
|
default_models = {
|
|
225
225
|
"openai": "gpt-5-mini",
|
|
226
226
|
"azure_openai": "gpt-4.1-mini",
|
|
227
|
+
"azure_openai_codex": "gpt-5.1-codex",
|
|
227
228
|
"claude": "claude-4-5-sonnet",
|
|
228
229
|
"claude_vertex": "claude-4-5-sonnet",
|
|
229
230
|
"gemini": "gemini-2.5-flash",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Any, AsyncIterator
|
|
2
2
|
|
|
3
|
+
from loguru import logger
|
|
3
4
|
from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
4
5
|
|
|
5
6
|
from .model_abstract import (
|
|
@@ -268,10 +269,12 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
268
269
|
# Reasoning models (GPT-5, o1, o3, o4) don't support temperature/top_p
|
|
269
270
|
# They use fixed temperature=1 and top_p=1 internally
|
|
270
271
|
if not is_reasoning:
|
|
271
|
-
if request.temperature is not None:
|
|
272
|
-
kwargs["temperature"] = request.temperature
|
|
273
272
|
if request.top_p is not None:
|
|
274
273
|
kwargs["top_p"] = request.top_p
|
|
274
|
+
if request.temperature is not None and request.top_p is None:
|
|
275
|
+
kwargs["temperature"] = request.temperature
|
|
276
|
+
else:
|
|
277
|
+
kwargs["top_p"] = 0.0
|
|
275
278
|
|
|
276
279
|
# Handle max_tokens vs max_completion_tokens
|
|
277
280
|
if request.max_tokens is not None:
|
|
@@ -449,6 +452,7 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
449
452
|
|
|
450
453
|
async for chunk in stream:
|
|
451
454
|
if not chunk.choices:
|
|
455
|
+
logger.info("No choices in chunk, continue")
|
|
452
456
|
continue
|
|
453
457
|
|
|
454
458
|
choice = chunk.choices[0]
|
|
@@ -461,7 +465,7 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
461
465
|
# logger.info(
|
|
462
466
|
# f"LLM Stream Chunk: {json.dumps(chunk_log, ensure_ascii=False)}"
|
|
463
467
|
# )
|
|
464
|
-
yield StreamChunk(content=delta.content
|
|
468
|
+
yield StreamChunk(content=delta.content)
|
|
465
469
|
|
|
466
470
|
# Accumulate tool calls
|
|
467
471
|
if delta.tool_calls:
|
|
@@ -528,7 +532,7 @@ class OpenAIModel(LLMModelAbstract):
|
|
|
528
532
|
|
|
529
533
|
|
|
530
534
|
class AzureOpenAIModel(OpenAIModel):
|
|
531
|
-
"""Azure OpenAI model implementation."""
|
|
535
|
+
"""Azure OpenAI model implementation with dynamic Codex support."""
|
|
532
536
|
|
|
533
537
|
def __init__(
|
|
534
538
|
self,
|
|
@@ -554,6 +558,7 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
554
558
|
self._api_version = api_version
|
|
555
559
|
self._model_name = model_name
|
|
556
560
|
self._deployment_name = deployment_name
|
|
561
|
+
self._is_codex = is_codex_model(deployment_name or model_name)
|
|
557
562
|
|
|
558
563
|
# Call parent constructor (will call our overridden _init_client)
|
|
559
564
|
super().__init__(model_name, api_key)
|
|
@@ -564,13 +569,21 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
564
569
|
base_url: str | None = None,
|
|
565
570
|
organization: str | None = None,
|
|
566
571
|
) -> None:
|
|
567
|
-
"""Initialize Azure OpenAI client."""
|
|
568
|
-
self.
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
572
|
+
"""Initialize Azure OpenAI client (or Responses API client for Codex)."""
|
|
573
|
+
if self._is_codex:
|
|
574
|
+
# Codex models use Responses API with /openai/v1/ path
|
|
575
|
+
responses_base_url = f"{self._azure_endpoint.rstrip('/')}/openai/v1/"
|
|
576
|
+
self.client = AsyncOpenAI(
|
|
577
|
+
api_key=self._api_key,
|
|
578
|
+
base_url=responses_base_url,
|
|
579
|
+
)
|
|
580
|
+
else:
|
|
581
|
+
self.client = AsyncAzureOpenAI(
|
|
582
|
+
api_key=self._api_key,
|
|
583
|
+
azure_endpoint=self._azure_endpoint,
|
|
584
|
+
api_version=self._api_version,
|
|
585
|
+
azure_deployment=self._deployment_name,
|
|
586
|
+
)
|
|
574
587
|
|
|
575
588
|
def _determine_capabilities(self) -> ModelCapability:
|
|
576
589
|
"""Determine capabilities based on base model name."""
|
|
@@ -598,6 +611,7 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
598
611
|
value: New deployment name
|
|
599
612
|
"""
|
|
600
613
|
self._deployment_name = value
|
|
614
|
+
self._is_codex = is_codex_model(value)
|
|
601
615
|
# Reinitialize client with new deployment name
|
|
602
616
|
self._init_client(self._api_key)
|
|
603
617
|
|
|
@@ -615,17 +629,276 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
615
629
|
"""
|
|
616
630
|
self._model_name = value
|
|
617
631
|
self._deployment_name = value
|
|
618
|
-
self.
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
632
|
+
self._is_codex = is_codex_model(value)
|
|
633
|
+
self._init_client(self._api_key)
|
|
634
|
+
|
|
635
|
+
# ---- Codex (Responses API) helper methods ----
|
|
636
|
+
|
|
637
|
+
def _convert_message_for_responses(self, msg: Message) -> dict | list[dict]:
|
|
638
|
+
"""Convert internal Message to Responses API format."""
|
|
639
|
+
role = msg.role
|
|
640
|
+
if role == "system":
|
|
641
|
+
role = "developer"
|
|
642
|
+
|
|
643
|
+
if msg.role == "tool" and msg.tool_call_id:
|
|
644
|
+
return {
|
|
645
|
+
"type": "function_call_output",
|
|
646
|
+
"call_id": msg.tool_call_id,
|
|
647
|
+
"output": msg.content
|
|
648
|
+
if isinstance(msg.content, str)
|
|
649
|
+
else str(msg.content),
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
if msg.role == "assistant" and msg.tool_calls:
|
|
653
|
+
items = []
|
|
654
|
+
if msg.content:
|
|
655
|
+
items.append(
|
|
656
|
+
{
|
|
657
|
+
"type": "message",
|
|
658
|
+
"role": "assistant",
|
|
659
|
+
"content": [{"type": "output_text", "text": msg.content}],
|
|
660
|
+
}
|
|
661
|
+
)
|
|
662
|
+
for tc in msg.tool_calls:
|
|
663
|
+
items.append(
|
|
664
|
+
{
|
|
665
|
+
"type": "function_call",
|
|
666
|
+
"call_id": tc.id,
|
|
667
|
+
"name": tc.function.name,
|
|
668
|
+
"arguments": tc.function.arguments,
|
|
669
|
+
}
|
|
670
|
+
)
|
|
671
|
+
return items
|
|
672
|
+
|
|
673
|
+
if isinstance(msg.content, str):
|
|
674
|
+
return {"role": role, "content": msg.content}
|
|
675
|
+
|
|
676
|
+
content_parts = []
|
|
677
|
+
for part in msg.content:
|
|
678
|
+
if part.content_type == ContentType.TEXT:
|
|
679
|
+
content_parts.append({"type": "input_text", "text": part.content})
|
|
680
|
+
elif part.content_type == ContentType.IMAGE_URL:
|
|
681
|
+
content_parts.append({"type": "input_image", "image_url": part.content})
|
|
682
|
+
elif part.content_type == ContentType.IMAGE_BASE64:
|
|
683
|
+
content_parts.append(
|
|
684
|
+
{
|
|
685
|
+
"type": "input_image",
|
|
686
|
+
"image_url": f"data:{part.mime_type or 'image/png'};base64,{part.content}",
|
|
687
|
+
}
|
|
688
|
+
)
|
|
689
|
+
return {"role": role, "content": content_parts}
|
|
690
|
+
|
|
691
|
+
def _convert_tools_for_responses(self, tools: list[Tool]) -> list[dict]:
|
|
692
|
+
"""Convert tools to Responses API format."""
|
|
693
|
+
return [
|
|
694
|
+
{
|
|
695
|
+
"type": "function",
|
|
696
|
+
"name": tool.function.name,
|
|
697
|
+
"description": tool.function.description or "",
|
|
698
|
+
"parameters": tool.function.parameters
|
|
699
|
+
or {"type": "object", "properties": {}},
|
|
700
|
+
}
|
|
701
|
+
for tool in tools
|
|
702
|
+
]
|
|
703
|
+
|
|
704
|
+
def _extract_system_instruction(
|
|
705
|
+
self, messages: list[Message]
|
|
706
|
+
) -> tuple[str | None, list[Message]]:
|
|
707
|
+
"""Extract system message as instructions."""
|
|
708
|
+
instructions = None
|
|
709
|
+
remaining = []
|
|
710
|
+
for msg in messages:
|
|
711
|
+
if msg.role == "system":
|
|
712
|
+
content = msg.content if isinstance(msg.content, str) else ""
|
|
713
|
+
instructions = (
|
|
714
|
+
content if instructions is None else instructions + "\n" + content
|
|
715
|
+
)
|
|
716
|
+
else:
|
|
717
|
+
remaining.append(msg)
|
|
718
|
+
return instructions, remaining
|
|
719
|
+
|
|
720
|
+
async def _generate_codex(self, request: GenerateRequest) -> GenerateResponse:
|
|
721
|
+
"""Generate using Responses API for Codex models."""
|
|
722
|
+
await self.validate_request(request)
|
|
723
|
+
|
|
724
|
+
instructions, messages = self._extract_system_instruction(request.messages)
|
|
725
|
+
|
|
726
|
+
input_items = []
|
|
727
|
+
for msg in messages:
|
|
728
|
+
converted = self._convert_message_for_responses(msg)
|
|
729
|
+
if isinstance(converted, list):
|
|
730
|
+
input_items.extend(converted)
|
|
731
|
+
else:
|
|
732
|
+
input_items.append(converted)
|
|
733
|
+
|
|
734
|
+
kwargs: dict[str, Any] = {"model": self._deployment_name, "input": input_items}
|
|
735
|
+
|
|
736
|
+
if instructions:
|
|
737
|
+
kwargs["instructions"] = instructions
|
|
738
|
+
if request.max_tokens:
|
|
739
|
+
kwargs["max_output_tokens"] = max(8192, min(request.max_tokens, 16384))
|
|
740
|
+
if request.tools:
|
|
741
|
+
kwargs["tools"] = self._convert_tools_for_responses(request.tools)
|
|
742
|
+
if request.tool_choice and isinstance(request.tool_choice, str):
|
|
743
|
+
if request.tool_choice in ("none", "auto", "required"):
|
|
744
|
+
kwargs["tool_choice"] = request.tool_choice
|
|
745
|
+
|
|
746
|
+
try:
|
|
747
|
+
response = await self.client.responses.create(**kwargs)
|
|
748
|
+
|
|
749
|
+
content = getattr(response, "output_text", None)
|
|
750
|
+
tool_calls = None
|
|
751
|
+
|
|
752
|
+
if hasattr(response, "output") and response.output:
|
|
753
|
+
parsed_tool_calls = []
|
|
754
|
+
for item in response.output:
|
|
755
|
+
if getattr(item, "type", None) == "function_call":
|
|
756
|
+
parsed_tool_calls.append(
|
|
757
|
+
ToolCall(
|
|
758
|
+
id=getattr(item, "call_id", ""),
|
|
759
|
+
type="function",
|
|
760
|
+
function=FunctionCall(
|
|
761
|
+
name=getattr(item, "name", ""),
|
|
762
|
+
arguments=getattr(item, "arguments", "{}"),
|
|
763
|
+
),
|
|
764
|
+
)
|
|
765
|
+
)
|
|
766
|
+
elif getattr(item, "type", None) == "message" and not content:
|
|
767
|
+
for part in getattr(item, "content", []):
|
|
768
|
+
if getattr(part, "type", None) == "output_text":
|
|
769
|
+
content = getattr(part, "text", "")
|
|
770
|
+
break
|
|
771
|
+
if parsed_tool_calls:
|
|
772
|
+
tool_calls = parsed_tool_calls
|
|
773
|
+
|
|
774
|
+
usage = None
|
|
775
|
+
if hasattr(response, "usage") and response.usage:
|
|
776
|
+
usage = {
|
|
777
|
+
"prompt_tokens": getattr(response.usage, "input_tokens", 0),
|
|
778
|
+
"completion_tokens": getattr(response.usage, "output_tokens", 0),
|
|
779
|
+
"total_tokens": getattr(response.usage, "total_tokens", 0),
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
return GenerateResponse(
|
|
783
|
+
content=content,
|
|
784
|
+
tool_calls=tool_calls,
|
|
785
|
+
finish_reason=getattr(response, "status", None),
|
|
786
|
+
usage=usage,
|
|
787
|
+
)
|
|
788
|
+
except Exception as e:
|
|
789
|
+
logger.error(f"Codex API error: {e}")
|
|
790
|
+
return GenerateResponse(content=f"Error: {e}")
|
|
791
|
+
|
|
792
|
+
async def _generate_stream_codex(
|
|
793
|
+
self, request: GenerateRequest
|
|
794
|
+
) -> AsyncIterator[StreamChunk]:
|
|
795
|
+
"""Generate streaming using Responses API for Codex models."""
|
|
796
|
+
await self.validate_request(request)
|
|
797
|
+
|
|
798
|
+
instructions, messages = self._extract_system_instruction(request.messages)
|
|
799
|
+
|
|
800
|
+
input_items = []
|
|
801
|
+
for msg in messages:
|
|
802
|
+
converted = self._convert_message_for_responses(msg)
|
|
803
|
+
if isinstance(converted, list):
|
|
804
|
+
input_items.extend(converted)
|
|
805
|
+
else:
|
|
806
|
+
input_items.append(converted)
|
|
807
|
+
logger.info(input_items)
|
|
808
|
+
|
|
809
|
+
kwargs: dict[str, Any] = {
|
|
810
|
+
"model": self._deployment_name,
|
|
811
|
+
"input": input_items,
|
|
812
|
+
"stream": True,
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
if instructions:
|
|
816
|
+
kwargs["instructions"] = instructions
|
|
817
|
+
if request.max_tokens:
|
|
818
|
+
kwargs["max_output_tokens"] = max(8192, min(request.max_tokens, 16384))
|
|
819
|
+
if request.tools:
|
|
820
|
+
kwargs["tools"] = self._convert_tools_for_responses(request.tools)
|
|
821
|
+
if request.tool_choice and isinstance(request.tool_choice, str):
|
|
822
|
+
if request.tool_choice in ("none", "auto", "required"):
|
|
823
|
+
kwargs["tool_choice"] = request.tool_choice
|
|
824
|
+
|
|
825
|
+
try:
|
|
826
|
+
stream = await self.client.responses.create(**kwargs)
|
|
827
|
+
# Buffer for accumulating function calls by output_index
|
|
828
|
+
tool_call_buffers: dict[int, dict] = {}
|
|
829
|
+
|
|
830
|
+
async for event in stream:
|
|
831
|
+
logger.info(event)
|
|
832
|
+
event_type = getattr(event, "type", None)
|
|
833
|
+
|
|
834
|
+
if event_type == "response.output_text.delta":
|
|
835
|
+
delta = getattr(event, "delta", "")
|
|
836
|
+
if delta:
|
|
837
|
+
yield StreamChunk(content=delta)
|
|
838
|
+
|
|
839
|
+
# Capture function call name when output item is added
|
|
840
|
+
elif event_type == "response.output_item.added":
|
|
841
|
+
item = getattr(event, "item", None)
|
|
842
|
+
output_index = getattr(event, "output_index", 0)
|
|
843
|
+
if item and getattr(item, "type", None) == "function_call":
|
|
844
|
+
tool_call_buffers[output_index] = {
|
|
845
|
+
"call_id": getattr(item, "call_id", ""),
|
|
846
|
+
"name": getattr(item, "name", ""),
|
|
847
|
+
"arguments": "",
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
elif event_type == "response.function_call_arguments.delta":
|
|
851
|
+
output_index = getattr(event, "output_index", 0)
|
|
852
|
+
delta = getattr(event, "delta", "")
|
|
853
|
+
if output_index in tool_call_buffers:
|
|
854
|
+
tool_call_buffers[output_index]["arguments"] += delta
|
|
855
|
+
|
|
856
|
+
elif event_type == "response.function_call_arguments.done":
|
|
857
|
+
output_index = getattr(event, "output_index", 0)
|
|
858
|
+
if output_index in tool_call_buffers:
|
|
859
|
+
tc_data = tool_call_buffers[output_index]
|
|
860
|
+
# Get final arguments from event, fallback to accumulated
|
|
861
|
+
final_args = getattr(event, "arguments", None)
|
|
862
|
+
if final_args is None:
|
|
863
|
+
final_args = tc_data.get("arguments", "{}")
|
|
864
|
+
yield StreamChunk(
|
|
865
|
+
tool_calls=[
|
|
866
|
+
ToolCall(
|
|
867
|
+
id=tc_data["call_id"],
|
|
868
|
+
type="function",
|
|
869
|
+
function=FunctionCall(
|
|
870
|
+
name=tc_data["name"],
|
|
871
|
+
arguments=final_args,
|
|
872
|
+
),
|
|
873
|
+
)
|
|
874
|
+
],
|
|
875
|
+
)
|
|
876
|
+
del tool_call_buffers[output_index]
|
|
877
|
+
|
|
878
|
+
elif event_type == "response.completed":
|
|
879
|
+
response_obj = getattr(event, "response", None)
|
|
880
|
+
finish_reason = (
|
|
881
|
+
getattr(response_obj, "status", None) if response_obj else None
|
|
882
|
+
)
|
|
883
|
+
yield StreamChunk(finish_reason=finish_reason)
|
|
884
|
+
|
|
885
|
+
elif event_type == "error":
|
|
886
|
+
yield StreamChunk(
|
|
887
|
+
content=f"Error: {getattr(event, 'message', 'Unknown')}"
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
except Exception as e:
|
|
891
|
+
logger.error(f"Codex streaming error: {e}")
|
|
892
|
+
yield StreamChunk(content=f"Error: {e}")
|
|
893
|
+
|
|
894
|
+
# ---- Main generate methods ----
|
|
624
895
|
|
|
625
896
|
async def generate(self, request: GenerateRequest) -> GenerateResponse:
|
|
626
|
-
"""Generate a response using Azure OpenAI API
|
|
897
|
+
"""Generate a response using Azure OpenAI API (or Responses API for Codex)."""
|
|
898
|
+
if self._is_codex:
|
|
899
|
+
return await self._generate_codex(request)
|
|
900
|
+
|
|
627
901
|
# Azure OpenAI uses deployment name instead of model name
|
|
628
|
-
# Temporarily override model_name with deployment_name
|
|
629
902
|
original_model = self._model_name
|
|
630
903
|
self._model_name = self._deployment_name
|
|
631
904
|
try:
|
|
@@ -636,7 +909,12 @@ class AzureOpenAIModel(OpenAIModel):
|
|
|
636
909
|
async def generate_stream(
|
|
637
910
|
self, request: GenerateRequest
|
|
638
911
|
) -> AsyncIterator[StreamChunk]:
|
|
639
|
-
"""Generate a streaming response using Azure OpenAI API."""
|
|
912
|
+
"""Generate a streaming response using Azure OpenAI API (or Responses API for Codex)."""
|
|
913
|
+
if self._is_codex:
|
|
914
|
+
async for chunk in self._generate_stream_codex(request):
|
|
915
|
+
yield chunk
|
|
916
|
+
return
|
|
917
|
+
|
|
640
918
|
# Azure OpenAI uses deployment name instead of model name
|
|
641
919
|
original_model = self._model_name
|
|
642
920
|
self._model_name = self._deployment_name
|
|
@@ -850,3 +1128,10 @@ class AzureOpenAIEmbeddingModel(LLMModelAbstract):
|
|
|
850
1128
|
)
|
|
851
1129
|
except Exception as e:
|
|
852
1130
|
raise Exception(f"Failed to generate embeddings: {e}")
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def is_codex_model(model_name: str) -> bool:
|
|
1134
|
+
"""Check if the model requires Responses API (Codex models)."""
|
|
1135
|
+
codex_patterns = ["codex", "gpt-5.1-codex", "gpt-5-codex"]
|
|
1136
|
+
model_lower = model_name.lower()
|
|
1137
|
+
return any(pattern in model_lower for pattern in codex_patterns)
|
|
@@ -6,6 +6,7 @@ import google.genai as genai
|
|
|
6
6
|
from google.genai.types import Blob, Content, FunctionDeclaration, Part
|
|
7
7
|
from google.genai.types import Tool as GeminiTool
|
|
8
8
|
from google.oauth2 import service_account
|
|
9
|
+
from loguru import logger
|
|
9
10
|
|
|
10
11
|
from .model_abstract import (
|
|
11
12
|
ContentType,
|
|
@@ -628,15 +629,33 @@ class VertexAIModel(LLMModelAbstract):
|
|
|
628
629
|
)
|
|
629
630
|
|
|
630
631
|
async for chunk in stream:
|
|
632
|
+
logger.info(chunk)
|
|
631
633
|
text, tool_calls = self._parse_response(chunk)
|
|
632
634
|
|
|
635
|
+
# Extract finish_reason from chunk
|
|
636
|
+
finish_reason = None
|
|
637
|
+
if hasattr(chunk, "candidates") and chunk.candidates:
|
|
638
|
+
cand = chunk.candidates[0]
|
|
639
|
+
if hasattr(cand, "finish_reason") and cand.finish_reason:
|
|
640
|
+
finish_reason = str(cand.finish_reason)
|
|
641
|
+
|
|
633
642
|
# Yield text chunks as they come
|
|
634
643
|
if text:
|
|
635
|
-
yield StreamChunk(
|
|
644
|
+
yield StreamChunk(
|
|
645
|
+
content=text, tool_calls=None, finish_reason=finish_reason
|
|
646
|
+
)
|
|
636
647
|
|
|
637
648
|
# Tool calls come in final chunk - yield them separately
|
|
638
649
|
if tool_calls:
|
|
639
|
-
yield StreamChunk(
|
|
650
|
+
yield StreamChunk(
|
|
651
|
+
content=None, tool_calls=tool_calls, finish_reason=finish_reason
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# If no text and no tool_calls but we have finish_reason, yield it
|
|
655
|
+
if not text and not tool_calls and finish_reason:
|
|
656
|
+
yield StreamChunk(
|
|
657
|
+
content=None, tool_calls=None, finish_reason=finish_reason
|
|
658
|
+
)
|
|
640
659
|
|
|
641
660
|
except Exception as e:
|
|
642
661
|
# error_msg = str(e)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|