iac-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iac_code/__init__.py +2 -0
- iac_code/acp/__init__.py +97 -0
- iac_code/acp/convert.py +423 -0
- iac_code/acp/http_sse.py +448 -0
- iac_code/acp/mcp.py +54 -0
- iac_code/acp/metrics.py +71 -0
- iac_code/acp/server.py +662 -0
- iac_code/acp/session.py +446 -0
- iac_code/acp/slash_registry.py +125 -0
- iac_code/acp/state.py +99 -0
- iac_code/acp/tools.py +112 -0
- iac_code/acp/types.py +13 -0
- iac_code/acp/version.py +26 -0
- iac_code/agent/__init__.py +19 -0
- iac_code/agent/agent_loop.py +640 -0
- iac_code/agent/agent_tool.py +269 -0
- iac_code/agent/agent_types.py +87 -0
- iac_code/agent/message.py +153 -0
- iac_code/agent/system_prompt.py +313 -0
- iac_code/cli/__init__.py +3 -0
- iac_code/cli/headless.py +114 -0
- iac_code/cli/main.py +246 -0
- iac_code/cli/output_formats.py +125 -0
- iac_code/commands/__init__.py +93 -0
- iac_code/commands/auth.py +1055 -0
- iac_code/commands/clear.py +34 -0
- iac_code/commands/compact.py +43 -0
- iac_code/commands/debug.py +45 -0
- iac_code/commands/effort.py +116 -0
- iac_code/commands/exit.py +10 -0
- iac_code/commands/help.py +49 -0
- iac_code/commands/model.py +130 -0
- iac_code/commands/registry.py +245 -0
- iac_code/commands/resume.py +49 -0
- iac_code/commands/tasks.py +41 -0
- iac_code/config.py +304 -0
- iac_code/i18n/__init__.py +141 -0
- iac_code/i18n/locales/zh/LC_MESSAGES/messages.po +1355 -0
- iac_code/memory/__init__.py +1 -0
- iac_code/memory/memory_manager.py +92 -0
- iac_code/memory/memory_tools.py +88 -0
- iac_code/providers/__init__.py +1 -0
- iac_code/providers/anthropic_provider.py +284 -0
- iac_code/providers/base.py +128 -0
- iac_code/providers/dashscope_provider.py +47 -0
- iac_code/providers/deepseek_provider.py +36 -0
- iac_code/providers/manager.py +399 -0
- iac_code/providers/openai_provider.py +344 -0
- iac_code/providers/retry.py +58 -0
- iac_code/providers/stream_watchdog.py +47 -0
- iac_code/providers/thinking.py +164 -0
- iac_code/services/__init__.py +1 -0
- iac_code/services/agent_factory.py +127 -0
- iac_code/services/cloud_credentials.py +22 -0
- iac_code/services/context_manager.py +221 -0
- iac_code/services/providers/__init__.py +1 -0
- iac_code/services/providers/aliyun.py +232 -0
- iac_code/services/session_index.py +281 -0
- iac_code/services/session_storage.py +245 -0
- iac_code/services/telemetry/__init__.py +66 -0
- iac_code/services/telemetry/attributes.py +84 -0
- iac_code/services/telemetry/client.py +330 -0
- iac_code/services/telemetry/config.py +76 -0
- iac_code/services/telemetry/constants.py +75 -0
- iac_code/services/telemetry/content_serializer.py +124 -0
- iac_code/services/telemetry/events.py +42 -0
- iac_code/services/telemetry/fallback.py +59 -0
- iac_code/services/telemetry/identity.py +73 -0
- iac_code/services/telemetry/metrics.py +62 -0
- iac_code/services/telemetry/names.py +199 -0
- iac_code/services/telemetry/sanitize.py +88 -0
- iac_code/services/telemetry/sink.py +67 -0
- iac_code/services/telemetry/tracing.py +38 -0
- iac_code/services/telemetry/types.py +13 -0
- iac_code/services/token_budget.py +54 -0
- iac_code/services/token_counter.py +76 -0
- iac_code/skills/__init__.py +1 -0
- iac_code/skills/bundled/__init__.py +94 -0
- iac_code/skills/bundled/iac_aliyun/SKILL.md +192 -0
- iac_code/skills/bundled/iac_aliyun/__init__.py +16 -0
- iac_code/skills/bundled/iac_aliyun/references/cloud-products/ecs.md +167 -0
- iac_code/skills/bundled/iac_aliyun/references/cloud-products/oss.md +69 -0
- iac_code/skills/bundled/iac_aliyun/references/cloud-products/rds.md +95 -0
- iac_code/skills/bundled/iac_aliyun/references/cloud-products/redis.md +100 -0
- iac_code/skills/bundled/iac_aliyun/references/cloud-products/slb.md +60 -0
- iac_code/skills/bundled/iac_aliyun/references/cloud-products/vpc.md +54 -0
- iac_code/skills/bundled/iac_aliyun/references/ros-template.md +155 -0
- iac_code/skills/bundled/iac_aliyun/references/template-parameters.md +206 -0
- iac_code/skills/bundled/iac_aliyun/references/terraform-template.md +101 -0
- iac_code/skills/bundled/iac_aliyun/scripts/tf2ros.py +77 -0
- iac_code/skills/bundled/simplify.py +28 -0
- iac_code/skills/discovery.py +136 -0
- iac_code/skills/frontmatter.py +119 -0
- iac_code/skills/listing.py +92 -0
- iac_code/skills/loader.py +42 -0
- iac_code/skills/processor.py +81 -0
- iac_code/skills/renderer.py +157 -0
- iac_code/skills/skill_definition.py +82 -0
- iac_code/skills/skill_tool.py +261 -0
- iac_code/state/__init__.py +5 -0
- iac_code/state/app_state.py +122 -0
- iac_code/tasks/__init__.py +1 -0
- iac_code/tasks/notification_queue.py +28 -0
- iac_code/tasks/task_state.py +66 -0
- iac_code/tasks/task_tools.py +114 -0
- iac_code/tools/__init__.py +8 -0
- iac_code/tools/base.py +226 -0
- iac_code/tools/bash.py +133 -0
- iac_code/tools/cloud/__init__.py +0 -0
- iac_code/tools/cloud/aliyun/__init__.py +0 -0
- iac_code/tools/cloud/aliyun/aliyun_api.py +510 -0
- iac_code/tools/cloud/aliyun/aliyun_doc_search.py +145 -0
- iac_code/tools/cloud/aliyun/endpoints.yml +343 -0
- iac_code/tools/cloud/aliyun/ros_client.py +56 -0
- iac_code/tools/cloud/aliyun/ros_stack.py +633 -0
- iac_code/tools/cloud/aliyun/ros_stack_instances.py +247 -0
- iac_code/tools/cloud/base_api.py +162 -0
- iac_code/tools/cloud/base_stack.py +242 -0
- iac_code/tools/cloud/registry.py +20 -0
- iac_code/tools/cloud/types.py +105 -0
- iac_code/tools/edit_file.py +121 -0
- iac_code/tools/glob.py +103 -0
- iac_code/tools/grep.py +254 -0
- iac_code/tools/list_files.py +104 -0
- iac_code/tools/read_file.py +127 -0
- iac_code/tools/result_storage.py +39 -0
- iac_code/tools/tool_executor.py +165 -0
- iac_code/tools/web_fetch.py +177 -0
- iac_code/tools/write_file.py +88 -0
- iac_code/types/__init__.py +40 -0
- iac_code/types/permissions.py +26 -0
- iac_code/types/skill_source.py +11 -0
- iac_code/types/stream_events.py +227 -0
- iac_code/ui/__init__.py +5 -0
- iac_code/ui/banner.py +110 -0
- iac_code/ui/components/__init__.py +0 -0
- iac_code/ui/components/dialog.py +142 -0
- iac_code/ui/components/divider.py +20 -0
- iac_code/ui/components/fuzzy_picker.py +308 -0
- iac_code/ui/components/progress_bar.py +54 -0
- iac_code/ui/components/search_box.py +165 -0
- iac_code/ui/components/select.py +319 -0
- iac_code/ui/components/status_icon.py +42 -0
- iac_code/ui/components/tabs.py +128 -0
- iac_code/ui/core/__init__.py +0 -0
- iac_code/ui/core/in_place_render.py +129 -0
- iac_code/ui/core/input_history.py +118 -0
- iac_code/ui/core/key_event.py +41 -0
- iac_code/ui/core/prompt_input.py +507 -0
- iac_code/ui/core/raw_input.py +302 -0
- iac_code/ui/core/screen.py +80 -0
- iac_code/ui/dialogs/__init__.py +0 -0
- iac_code/ui/dialogs/global_search.py +178 -0
- iac_code/ui/dialogs/history_search.py +100 -0
- iac_code/ui/dialogs/model_picker.py +280 -0
- iac_code/ui/dialogs/quick_open.py +108 -0
- iac_code/ui/dialogs/resume_picker.py +749 -0
- iac_code/ui/keybindings/__init__.py +0 -0
- iac_code/ui/keybindings/manager.py +124 -0
- iac_code/ui/renderer.py +1535 -0
- iac_code/ui/repl.py +772 -0
- iac_code/ui/spinner.py +112 -0
- iac_code/ui/suggestions/__init__.py +0 -0
- iac_code/ui/suggestions/aggregator.py +171 -0
- iac_code/ui/suggestions/command_provider.py +43 -0
- iac_code/ui/suggestions/directory_provider.py +95 -0
- iac_code/ui/suggestions/file_provider.py +121 -0
- iac_code/ui/suggestions/shell_history_provider.py +108 -0
- iac_code/ui/suggestions/token_extractor.py +77 -0
- iac_code/ui/suggestions/types.py +45 -0
- iac_code/ui/transcript_view.py +199 -0
- iac_code/utils/__init__.py +0 -0
- iac_code/utils/background_housekeeping.py +53 -0
- iac_code/utils/cleanup.py +68 -0
- iac_code/utils/json_utils.py +60 -0
- iac_code/utils/log.py +150 -0
- iac_code/utils/project_paths.py +74 -0
- iac_code/utils/tool_input_parser.py +62 -0
- iac_code-0.1.0.dist-info/LICENSE +201 -0
- iac_code-0.1.0.dist-info/METADATA +64 -0
- iac_code-0.1.0.dist-info/RECORD +184 -0
- iac_code-0.1.0.dist-info/WHEEL +5 -0
- iac_code-0.1.0.dist-info/entry_points.txt +2 -0
- iac_code-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""OpenAI Provider implementation with streaming and tool call support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import uuid
|
|
7
|
+
from collections.abc import AsyncGenerator
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from openai import AsyncOpenAI
|
|
11
|
+
|
|
12
|
+
from iac_code.i18n import _
|
|
13
|
+
from iac_code.providers.base import (
|
|
14
|
+
ContentBlock,
|
|
15
|
+
Message,
|
|
16
|
+
NonStreamingResponse,
|
|
17
|
+
Provider,
|
|
18
|
+
ToolDefinition,
|
|
19
|
+
)
|
|
20
|
+
from iac_code.providers.thinking import ThinkingFamily, get_thinking_spec, normalize_effort
|
|
21
|
+
from iac_code.types.stream_events import (
|
|
22
|
+
MessageEndEvent,
|
|
23
|
+
MessageStartEvent,
|
|
24
|
+
StreamEvent,
|
|
25
|
+
TextDeltaEvent,
|
|
26
|
+
ThinkingDeltaEvent,
|
|
27
|
+
ToolInputDeltaEvent,
|
|
28
|
+
ToolUseEndEvent,
|
|
29
|
+
ToolUseStartEvent,
|
|
30
|
+
Usage,
|
|
31
|
+
)
|
|
32
|
+
from iac_code.utils.tool_input_parser import parse_tool_input_events
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OpenAIProvider(Provider):
|
|
36
|
+
"""Provider implementation for OpenAI API (GPT-4, etc.)."""
|
|
37
|
+
|
|
38
|
+
_PROVIDER_KEY = "openai"
|
|
39
|
+
|
|
40
|
+
# Subclasses can set this to True for endpoints known to support stream_options
|
|
41
|
+
supports_stream_options: bool = False
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
model: str,
|
|
46
|
+
api_key: str | None = None,
|
|
47
|
+
base_url: str | None = None,
|
|
48
|
+
client: Any = None,
|
|
49
|
+
effort: str | None = None,
|
|
50
|
+
):
|
|
51
|
+
self._model = model
|
|
52
|
+
self._base_url = base_url
|
|
53
|
+
self._effort = effort
|
|
54
|
+
# Subclasses may set this before calling super().stream/complete to
|
|
55
|
+
# inject provider-specific kwargs (e.g. DeepSeek thinking mode).
|
|
56
|
+
self._extra_request_kwargs: dict[str, Any] = {}
|
|
57
|
+
if client is not None:
|
|
58
|
+
self._client = client
|
|
59
|
+
else:
|
|
60
|
+
self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
61
|
+
|
|
62
|
+
def _build_thinking_kwargs(self) -> dict[str, Any]:
|
|
63
|
+
spec = get_thinking_spec(self._PROVIDER_KEY, self._model)
|
|
64
|
+
if spec.family is not ThinkingFamily.OPENAI:
|
|
65
|
+
return {}
|
|
66
|
+
effort = normalize_effort(self._effort)
|
|
67
|
+
if effort is None or effort == "auto":
|
|
68
|
+
return {}
|
|
69
|
+
allowed = {e.value for e in spec.allowed_efforts}
|
|
70
|
+
if effort not in allowed:
|
|
71
|
+
if spec.default_effort is None:
|
|
72
|
+
return {}
|
|
73
|
+
effort = spec.default_effort.value
|
|
74
|
+
return {
|
|
75
|
+
"reasoning_effort": effort,
|
|
76
|
+
"extra_body": {"thinking": {"type": "enabled"}},
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
def _effort_request_kwargs(self) -> dict[str, Any]:
|
|
80
|
+
# Backwards-compatible alias used by the streaming/non-streaming paths.
|
|
81
|
+
return self._build_thinking_kwargs()
|
|
82
|
+
|
|
83
|
+
def get_model_name(self) -> str:
|
|
84
|
+
return self._model
|
|
85
|
+
|
|
86
|
+
# -- Message conversion ----------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def _convert_messages(self, messages: list[Message]) -> list[dict[str, Any]]:
|
|
89
|
+
"""Convert unified Message objects to OpenAI API format."""
|
|
90
|
+
result: list[dict[str, Any]] = []
|
|
91
|
+
for msg in messages:
|
|
92
|
+
if isinstance(msg.content, str):
|
|
93
|
+
result.append({"role": msg.role, "content": msg.content})
|
|
94
|
+
elif isinstance(msg.content, list):
|
|
95
|
+
result.extend(self._convert_content_blocks(msg.role, msg.content))
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
def _convert_content_blocks(self, role: str, blocks: list[ContentBlock]) -> list[dict[str, Any]]:
|
|
99
|
+
"""Convert a list of ContentBlocks into one or more OpenAI messages."""
|
|
100
|
+
messages: list[dict[str, Any]] = []
|
|
101
|
+
|
|
102
|
+
# Group tool_use blocks into a single assistant message with tool_calls
|
|
103
|
+
tool_uses = [b for b in blocks if b.type == "tool_use"]
|
|
104
|
+
text_blocks = [b for b in blocks if b.type == "text"]
|
|
105
|
+
thinking_blocks = [b for b in blocks if b.type == "thinking"]
|
|
106
|
+
tool_results = [b for b in blocks if b.type == "tool_result"]
|
|
107
|
+
|
|
108
|
+
# Assistant message with text and/or tool_calls
|
|
109
|
+
if role == "assistant" and (text_blocks or tool_uses or thinking_blocks):
|
|
110
|
+
msg: dict[str, Any] = {"role": "assistant"}
|
|
111
|
+
if text_blocks:
|
|
112
|
+
msg["content"] = "".join(b.text or "" for b in text_blocks)
|
|
113
|
+
else:
|
|
114
|
+
msg["content"] = None
|
|
115
|
+
if thinking_blocks:
|
|
116
|
+
# DeepSeek / Qwen thinking-mode models require the prior-turn
|
|
117
|
+
# reasoning_content to be echoed back in assistant messages.
|
|
118
|
+
msg["reasoning_content"] = "".join(b.text or "" for b in thinking_blocks)
|
|
119
|
+
if tool_uses:
|
|
120
|
+
msg["tool_calls"] = [
|
|
121
|
+
{
|
|
122
|
+
"id": b.tool_use_id or "",
|
|
123
|
+
"type": "function",
|
|
124
|
+
"function": {
|
|
125
|
+
"name": b.name or "",
|
|
126
|
+
"arguments": json.dumps(b.input or {}),
|
|
127
|
+
},
|
|
128
|
+
}
|
|
129
|
+
for b in tool_uses
|
|
130
|
+
]
|
|
131
|
+
messages.append(msg)
|
|
132
|
+
|
|
133
|
+
# Tool result messages (role="tool")
|
|
134
|
+
for b in tool_results:
|
|
135
|
+
messages.append(
|
|
136
|
+
{
|
|
137
|
+
"role": "tool",
|
|
138
|
+
"tool_call_id": b.tool_use_id or "",
|
|
139
|
+
"content": b.content or "",
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return messages
|
|
144
|
+
|
|
145
|
+
# -- Tool conversion -------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
def _convert_tools(self, tools: list[ToolDefinition]) -> list[dict[str, Any]]:
|
|
148
|
+
"""Convert unified ToolDefinition objects to OpenAI function-calling format."""
|
|
149
|
+
return [
|
|
150
|
+
{
|
|
151
|
+
"type": "function",
|
|
152
|
+
"function": {
|
|
153
|
+
"name": t.name,
|
|
154
|
+
"description": t.description,
|
|
155
|
+
"parameters": t.input_schema,
|
|
156
|
+
},
|
|
157
|
+
}
|
|
158
|
+
for t in tools
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# -- Streaming -------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
async def stream(
|
|
164
|
+
self,
|
|
165
|
+
messages: list[Message],
|
|
166
|
+
system: str,
|
|
167
|
+
tools: list[ToolDefinition] | None = None,
|
|
168
|
+
max_tokens: int = 8192,
|
|
169
|
+
) -> AsyncGenerator[StreamEvent, None]:
|
|
170
|
+
api_messages: list[dict[str, Any]] = []
|
|
171
|
+
if system:
|
|
172
|
+
api_messages.append({"role": "system", "content": system})
|
|
173
|
+
api_messages.extend(self._convert_messages(messages))
|
|
174
|
+
|
|
175
|
+
kwargs: dict[str, Any] = {
|
|
176
|
+
"model": self._model,
|
|
177
|
+
"messages": api_messages,
|
|
178
|
+
"max_tokens": max_tokens,
|
|
179
|
+
"stream": True,
|
|
180
|
+
}
|
|
181
|
+
if self.supports_stream_options:
|
|
182
|
+
kwargs["stream_options"] = {"include_usage": True}
|
|
183
|
+
if tools:
|
|
184
|
+
kwargs["tools"] = self._convert_tools(tools)
|
|
185
|
+
for k, v in self._effort_request_kwargs().items():
|
|
186
|
+
kwargs[k] = v
|
|
187
|
+
for k, v in self._extra_request_kwargs.items():
|
|
188
|
+
kwargs[k] = v
|
|
189
|
+
|
|
190
|
+
message_id = f"msg_{uuid.uuid4().hex[:24]}"
|
|
191
|
+
yield MessageStartEvent(message_id=message_id)
|
|
192
|
+
|
|
193
|
+
# Accumulators for tool calls (index-based)
|
|
194
|
+
tool_calls_acc: dict[int, dict[str, Any]] = {}
|
|
195
|
+
stop_reason = "end_turn"
|
|
196
|
+
usage = Usage()
|
|
197
|
+
has_content = False
|
|
198
|
+
|
|
199
|
+
response = await self._client.chat.completions.create(**kwargs)
|
|
200
|
+
async for chunk in response:
|
|
201
|
+
has_content = True
|
|
202
|
+
# Usage info (final chunk)
|
|
203
|
+
if chunk.usage is not None:
|
|
204
|
+
usage = Usage(
|
|
205
|
+
input_tokens=chunk.usage.prompt_tokens or 0,
|
|
206
|
+
output_tokens=chunk.usage.completion_tokens or 0,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if not chunk.choices:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
choice = chunk.choices[0]
|
|
213
|
+
|
|
214
|
+
# Finish reason
|
|
215
|
+
if choice.finish_reason:
|
|
216
|
+
if choice.finish_reason == "tool_calls":
|
|
217
|
+
stop_reason = "tool_use"
|
|
218
|
+
elif choice.finish_reason == "length":
|
|
219
|
+
stop_reason = "max_tokens"
|
|
220
|
+
else:
|
|
221
|
+
stop_reason = "end_turn"
|
|
222
|
+
|
|
223
|
+
delta = choice.delta
|
|
224
|
+
if delta is None:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
# Reasoning content (DeepSeek V4, Qwen thinking mode via OpenAI-compat)
|
|
228
|
+
reasoning = getattr(delta, "reasoning_content", None)
|
|
229
|
+
if reasoning:
|
|
230
|
+
yield ThinkingDeltaEvent(text=reasoning)
|
|
231
|
+
|
|
232
|
+
# Text content
|
|
233
|
+
if delta.content:
|
|
234
|
+
yield TextDeltaEvent(text=delta.content)
|
|
235
|
+
|
|
236
|
+
# Tool calls (streamed with index-based accumulation)
|
|
237
|
+
if delta.tool_calls:
|
|
238
|
+
for tc_delta in delta.tool_calls:
|
|
239
|
+
idx = tc_delta.index
|
|
240
|
+
if idx not in tool_calls_acc:
|
|
241
|
+
tool_calls_acc[idx] = {
|
|
242
|
+
"id": tc_delta.id or "",
|
|
243
|
+
"name": "",
|
|
244
|
+
"arguments": "",
|
|
245
|
+
}
|
|
246
|
+
if tc_delta.function and tc_delta.function.name:
|
|
247
|
+
tool_calls_acc[idx]["name"] = tc_delta.function.name
|
|
248
|
+
yield ToolUseStartEvent(
|
|
249
|
+
tool_use_id=tool_calls_acc[idx]["id"],
|
|
250
|
+
name=tc_delta.function.name,
|
|
251
|
+
)
|
|
252
|
+
if tc_delta.function and tc_delta.function.arguments:
|
|
253
|
+
tool_calls_acc[idx]["arguments"] += tc_delta.function.arguments
|
|
254
|
+
yield ToolInputDeltaEvent(
|
|
255
|
+
tool_use_id=tool_calls_acc[idx]["id"],
|
|
256
|
+
partial_json=tc_delta.function.arguments,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if not has_content:
|
|
260
|
+
base_url = str(self._base_url or self._client.base_url).rstrip("/")
|
|
261
|
+
raise RuntimeError(
|
|
262
|
+
_(
|
|
263
|
+
"API returned no data. Please check that your API Base URL is correct (current: {base_url}). "
|
|
264
|
+
"Many OpenAI-compatible endpoints require a /v1 suffix (e.g. {base_url}/v1)."
|
|
265
|
+
).format(base_url=base_url)
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Emit ToolUseEndEvent for each accumulated tool call
|
|
269
|
+
for idx in sorted(tool_calls_acc):
|
|
270
|
+
tc = tool_calls_acc[idx]
|
|
271
|
+
for ev in parse_tool_input_events(tc["id"], tc["name"], tc["arguments"]):
|
|
272
|
+
yield ev
|
|
273
|
+
|
|
274
|
+
yield MessageEndEvent(stop_reason=stop_reason, usage=usage)
|
|
275
|
+
|
|
276
|
+
# -- Non-streaming ---------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
async def complete(
|
|
279
|
+
self,
|
|
280
|
+
messages: list[Message],
|
|
281
|
+
system: str,
|
|
282
|
+
tools: list[ToolDefinition] | None = None,
|
|
283
|
+
max_tokens: int = 8192,
|
|
284
|
+
) -> NonStreamingResponse:
|
|
285
|
+
api_messages: list[dict[str, Any]] = []
|
|
286
|
+
if system:
|
|
287
|
+
api_messages.append({"role": "system", "content": system})
|
|
288
|
+
api_messages.extend(self._convert_messages(messages))
|
|
289
|
+
|
|
290
|
+
kwargs: dict[str, Any] = {
|
|
291
|
+
"model": self._model,
|
|
292
|
+
"messages": api_messages,
|
|
293
|
+
"max_tokens": max_tokens,
|
|
294
|
+
}
|
|
295
|
+
if tools:
|
|
296
|
+
kwargs["tools"] = self._convert_tools(tools)
|
|
297
|
+
for k, v in self._effort_request_kwargs().items():
|
|
298
|
+
kwargs[k] = v
|
|
299
|
+
for k, v in self._extra_request_kwargs.items():
|
|
300
|
+
kwargs[k] = v
|
|
301
|
+
|
|
302
|
+
response = await self._client.chat.completions.create(**kwargs)
|
|
303
|
+
if not hasattr(response, "choices"):
|
|
304
|
+
base_url = str(self._base_url or self._client.base_url).rstrip("/")
|
|
305
|
+
raise RuntimeError(
|
|
306
|
+
_(
|
|
307
|
+
"API returned an invalid response. Please check that your "
|
|
308
|
+
"API Base URL is correct (current: {base_url}). "
|
|
309
|
+
"Many OpenAI-compatible endpoints require a /v1 suffix "
|
|
310
|
+
"(e.g. {base_url}/v1)."
|
|
311
|
+
).format(base_url=base_url)
|
|
312
|
+
)
|
|
313
|
+
choice = response.choices[0]
|
|
314
|
+
message = choice.message
|
|
315
|
+
|
|
316
|
+
text = message.content or ""
|
|
317
|
+
thinking = getattr(message, "reasoning_content", None) or ""
|
|
318
|
+
tool_uses: list[dict[str, Any]] = []
|
|
319
|
+
if message.tool_calls:
|
|
320
|
+
for tc in message.tool_calls:
|
|
321
|
+
raw_args = tc.function.arguments or ""
|
|
322
|
+
for ev in parse_tool_input_events(tc.id, tc.function.name, raw_args):
|
|
323
|
+
if isinstance(ev, ToolUseEndEvent):
|
|
324
|
+
tool_uses.append({"id": ev.tool_use_id, "name": tc.function.name, "input": ev.input})
|
|
325
|
+
|
|
326
|
+
stop_reason = "end_turn"
|
|
327
|
+
if choice.finish_reason == "tool_calls":
|
|
328
|
+
stop_reason = "tool_use"
|
|
329
|
+
elif choice.finish_reason == "length":
|
|
330
|
+
stop_reason = "max_tokens"
|
|
331
|
+
|
|
332
|
+
usage = Usage(
|
|
333
|
+
input_tokens=response.usage.prompt_tokens if response.usage else 0,
|
|
334
|
+
output_tokens=response.usage.completion_tokens if response.usage else 0,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
return NonStreamingResponse(
|
|
338
|
+
message_id=response.id,
|
|
339
|
+
text=text,
|
|
340
|
+
tool_uses=tool_uses,
|
|
341
|
+
stop_reason=stop_reason,
|
|
342
|
+
usage=usage,
|
|
343
|
+
thinking=thinking,
|
|
344
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Retry strategy with exponential backoff and jitter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import math
|
|
7
|
+
import random
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any, Awaitable, Callable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RetryableError(Exception):
|
|
13
|
+
def __init__(self, message: str, status_code: int | None = None):
|
|
14
|
+
super().__init__(message)
|
|
15
|
+
self.status_code = status_code
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class NonRetryableError(Exception):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class RetryConfig:
|
|
24
|
+
max_retries: int = 5
|
|
25
|
+
base_delay: float = 0.5
|
|
26
|
+
max_delay: float = 32.0
|
|
27
|
+
jitter_factor: float = 0.25
|
|
28
|
+
|
|
29
|
+
def calculate_delay(self, attempt: int) -> float:
|
|
30
|
+
base = min(self.base_delay * math.pow(2, attempt), self.max_delay)
|
|
31
|
+
jitter = random.random() * self.jitter_factor * base
|
|
32
|
+
return base + jitter
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
OnRetryCallback = Callable[[int, Exception, float], Awaitable[None]]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def with_retry(
|
|
39
|
+
operation: Callable[[], Awaitable[Any]],
|
|
40
|
+
config: RetryConfig,
|
|
41
|
+
on_retry: OnRetryCallback | None = None,
|
|
42
|
+
) -> Any:
|
|
43
|
+
last_error: Exception | None = None
|
|
44
|
+
for attempt in range(config.max_retries + 1):
|
|
45
|
+
try:
|
|
46
|
+
return await operation()
|
|
47
|
+
except NonRetryableError:
|
|
48
|
+
raise
|
|
49
|
+
except RetryableError as e:
|
|
50
|
+
last_error = e
|
|
51
|
+
if attempt >= config.max_retries:
|
|
52
|
+
raise
|
|
53
|
+
delay = config.calculate_delay(attempt)
|
|
54
|
+
if on_retry:
|
|
55
|
+
await on_retry(attempt + 1, e, delay)
|
|
56
|
+
await asyncio.sleep(delay)
|
|
57
|
+
assert last_error is not None # pragma: no cover
|
|
58
|
+
raise last_error # pragma: no cover
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Streaming idle timeout watchdog."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from types import TracebackType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StreamIdleTimeoutError(Exception):
|
|
10
|
+
def __init__(self, idle_timeout: float):
|
|
11
|
+
super().__init__(f"Stream idle for more than {idle_timeout}s")
|
|
12
|
+
self.idle_timeout = idle_timeout
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StreamWatchdog:
|
|
16
|
+
def __init__(self, idle_timeout: float = 90.0):
|
|
17
|
+
self._idle_timeout = idle_timeout
|
|
18
|
+
self._last_ping: float = 0.0
|
|
19
|
+
self._running = False
|
|
20
|
+
|
|
21
|
+
def start(self) -> None:
|
|
22
|
+
self._last_ping = time.monotonic()
|
|
23
|
+
self._running = True
|
|
24
|
+
|
|
25
|
+
def stop(self) -> None:
|
|
26
|
+
self._running = False
|
|
27
|
+
|
|
28
|
+
def ping(self) -> None:
|
|
29
|
+
"""Record activity and check for idle timeout.
|
|
30
|
+
|
|
31
|
+
Raises StreamIdleTimeoutError if the time since the last ping
|
|
32
|
+
exceeds the idle timeout threshold.
|
|
33
|
+
"""
|
|
34
|
+
now = time.monotonic()
|
|
35
|
+
if self._running and self._last_ping > 0:
|
|
36
|
+
if now - self._last_ping > self._idle_timeout:
|
|
37
|
+
raise StreamIdleTimeoutError(self._idle_timeout)
|
|
38
|
+
self._last_ping = now
|
|
39
|
+
|
|
40
|
+
async def __aenter__(self) -> StreamWatchdog:
|
|
41
|
+
self.start()
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
async def __aexit__(
|
|
45
|
+
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
|
|
46
|
+
) -> None:
|
|
47
|
+
self.stop()
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Centralized thinking-mode registry keyed by (provider_key, model_name).
|
|
2
|
+
|
|
3
|
+
Two-layer registry: outer key is the provider key (matches ``auth.py``
|
|
4
|
+
``key_name`` and ``settings.yml`` ``providers.<key>``); inner key is the model
|
|
5
|
+
name. The same model name can appear under multiple providers with different
|
|
6
|
+
specs — e.g. ``deepseek-v4-pro`` is ``OPENAI`` family on the official DeepSeek
|
|
7
|
+
endpoint but ``DASHSCOPE`` family when proxied through Aliyun's compatible-mode
|
|
8
|
+
service.
|
|
9
|
+
|
|
10
|
+
Wire-format assembly lives in each provider subclass's
|
|
11
|
+
``_build_thinking_kwargs()``. This module only declares capabilities.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from enum import Enum
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EffortLevel(Enum):
|
|
21
|
+
LOW = "low"
|
|
22
|
+
MEDIUM = "medium"
|
|
23
|
+
HIGH = "high"
|
|
24
|
+
XHIGH = "xhigh"
|
|
25
|
+
MAX = "max"
|
|
26
|
+
AUTO = "auto"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
EFFORT_ORDER: list[EffortLevel] = [
|
|
30
|
+
EffortLevel.LOW,
|
|
31
|
+
EffortLevel.MEDIUM,
|
|
32
|
+
EffortLevel.HIGH,
|
|
33
|
+
EffortLevel.XHIGH,
|
|
34
|
+
EffortLevel.MAX,
|
|
35
|
+
EffortLevel.AUTO,
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
EFFORT_SYMBOLS: dict[EffortLevel, str] = {
|
|
40
|
+
EffortLevel.LOW: "◆",
|
|
41
|
+
EffortLevel.MEDIUM: "◆◆",
|
|
42
|
+
EffortLevel.HIGH: "◆◆◆",
|
|
43
|
+
EffortLevel.XHIGH: "◆◆◆◆",
|
|
44
|
+
EffortLevel.MAX: "◆◆◆◆◆",
|
|
45
|
+
EffortLevel.AUTO: "◆",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ThinkingFamily(Enum):
|
|
50
|
+
"""The model's thinking protocol family. Wire format depends on provider."""
|
|
51
|
+
|
|
52
|
+
NONE = "none"
|
|
53
|
+
ANTHROPIC = "anthropic"
|
|
54
|
+
OPENAI = "openai" # reasoning_effort + extra_body.thinking.type=enabled
|
|
55
|
+
DASHSCOPE = "dashscope" # extra_body.enable_thinking [+ thinking_budget]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class ThinkingSpec:
|
|
60
|
+
family: ThinkingFamily
|
|
61
|
+
allowed_efforts: tuple[EffortLevel, ...] = ()
|
|
62
|
+
default_effort: EffortLevel | None = None
|
|
63
|
+
default_thinking_budget: int | None = None # reserved; not yet emitted
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def supports_effort(self) -> bool:
|
|
67
|
+
return bool(self.allowed_efforts)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def effort_range(self) -> tuple[EffortLevel, EffortLevel] | None:
|
|
71
|
+
if not self.allowed_efforts:
|
|
72
|
+
return None
|
|
73
|
+
return self.allowed_efforts[0], self.allowed_efforts[-1]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# Per-(provider, model) registry
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_OPENAI_EFFORTS: tuple[EffortLevel, ...] = (
|
|
82
|
+
EffortLevel.LOW,
|
|
83
|
+
EffortLevel.MEDIUM,
|
|
84
|
+
EffortLevel.HIGH,
|
|
85
|
+
EffortLevel.XHIGH,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
_ANTHROPIC_EFFORTS: tuple[EffortLevel, ...] = (
|
|
89
|
+
EffortLevel.LOW,
|
|
90
|
+
EffortLevel.MEDIUM,
|
|
91
|
+
EffortLevel.HIGH,
|
|
92
|
+
EffortLevel.XHIGH,
|
|
93
|
+
EffortLevel.MAX,
|
|
94
|
+
EffortLevel.AUTO,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# DeepSeek V4 accepts only high/max — XHIGH is intentionally skipped.
|
|
98
|
+
_DEEPSEEK_EFFORTS: tuple[EffortLevel, ...] = (EffortLevel.HIGH, EffortLevel.MAX)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
_NONE_SPEC = ThinkingSpec(family=ThinkingFamily.NONE)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
MODEL_THINKING: dict[str, dict[str, ThinkingSpec]] = {
|
|
105
|
+
"anthropic": {
|
|
106
|
+
"claude-opus-4-7": ThinkingSpec(ThinkingFamily.ANTHROPIC, _ANTHROPIC_EFFORTS, EffortLevel.HIGH),
|
|
107
|
+
"claude-opus-4-6": ThinkingSpec(ThinkingFamily.ANTHROPIC, _ANTHROPIC_EFFORTS, EffortLevel.HIGH),
|
|
108
|
+
"claude-sonnet-4-6": ThinkingSpec(ThinkingFamily.ANTHROPIC, _ANTHROPIC_EFFORTS, EffortLevel.HIGH),
|
|
109
|
+
"claude-sonnet-4-6-1m": ThinkingSpec(ThinkingFamily.ANTHROPIC, _ANTHROPIC_EFFORTS, EffortLevel.HIGH),
|
|
110
|
+
"claude-haiku-4-5-20251001": ThinkingSpec(ThinkingFamily.ANTHROPIC, _ANTHROPIC_EFFORTS, EffortLevel.HIGH),
|
|
111
|
+
},
|
|
112
|
+
"openai": {
|
|
113
|
+
"gpt-5.5": ThinkingSpec(ThinkingFamily.OPENAI, _OPENAI_EFFORTS, EffortLevel.HIGH),
|
|
114
|
+
"gpt-5.4": ThinkingSpec(ThinkingFamily.OPENAI, _OPENAI_EFFORTS, EffortLevel.HIGH),
|
|
115
|
+
"gpt-5.4-mini": ThinkingSpec(ThinkingFamily.OPENAI, _OPENAI_EFFORTS, EffortLevel.HIGH),
|
|
116
|
+
"gpt-5.3-codex": ThinkingSpec(ThinkingFamily.OPENAI, _OPENAI_EFFORTS, EffortLevel.HIGH),
|
|
117
|
+
"gpt-5.2": ThinkingSpec(ThinkingFamily.OPENAI, _OPENAI_EFFORTS, EffortLevel.HIGH),
|
|
118
|
+
},
|
|
119
|
+
"deepseek": {
|
|
120
|
+
"deepseek-v4-pro": ThinkingSpec(ThinkingFamily.OPENAI, _DEEPSEEK_EFFORTS, EffortLevel.HIGH),
|
|
121
|
+
"deepseek-v4-flash": ThinkingSpec(ThinkingFamily.OPENAI, _DEEPSEEK_EFFORTS, EffortLevel.HIGH),
|
|
122
|
+
},
|
|
123
|
+
"dashscope": {
|
|
124
|
+
"qwen3.6-max-preview": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
125
|
+
"qwen3.6-plus": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
126
|
+
"qwen3.5-plus": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
127
|
+
"qwen3.5-flash": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
128
|
+
"qwq-plus": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
129
|
+
"kimi-k2.6": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
130
|
+
"glm-5.1": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
131
|
+
"deepseek-v4-pro": ThinkingSpec(ThinkingFamily.DASHSCOPE, _DEEPSEEK_EFFORTS, EffortLevel.HIGH),
|
|
132
|
+
"deepseek-v4-flash": ThinkingSpec(ThinkingFamily.DASHSCOPE, _DEEPSEEK_EFFORTS, EffortLevel.HIGH),
|
|
133
|
+
},
|
|
134
|
+
"dashscope_token_plan": {
|
|
135
|
+
"qwen3.6-plus": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
136
|
+
"deepseek-v3.2": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
137
|
+
"glm-5": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
138
|
+
"MiniMax-M2.5": ThinkingSpec(ThinkingFamily.DASHSCOPE),
|
|
139
|
+
},
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get_thinking_spec(provider_key: str, model: str) -> ThinkingSpec:
|
|
144
|
+
"""Return spec for (provider_key, model). Unknown combos → ``NONE`` spec."""
|
|
145
|
+
return MODEL_THINKING.get(provider_key, {}).get(model, _NONE_SPEC)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def normalize_effort(effort: str | None) -> str | None:
|
|
149
|
+
"""Lowercased, stripped effort string; empty returns None."""
|
|
150
|
+
if effort is None:
|
|
151
|
+
return None
|
|
152
|
+
value = effort.strip().lower()
|
|
153
|
+
return value or None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# Anthropic extended-thinking budget tokens per effort level.
|
|
157
|
+
# Used by ``AnthropicProvider._build_thinking_kwargs``.
|
|
158
|
+
ANTHROPIC_BUDGET: dict[str, int] = {
|
|
159
|
+
"low": 1024,
|
|
160
|
+
"medium": 4096,
|
|
161
|
+
"high": 16384,
|
|
162
|
+
"xhigh": 32000,
|
|
163
|
+
"max": 64000,
|
|
164
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__: list[str] = []
|