klaude-code 1.9.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/base.py +2 -6
- klaude_code/cli/auth_cmd.py +4 -4
- klaude_code/cli/cost_cmd.py +1 -1
- klaude_code/cli/list_model.py +1 -1
- klaude_code/cli/main.py +1 -1
- klaude_code/cli/runtime.py +7 -5
- klaude_code/cli/self_update.py +1 -1
- klaude_code/cli/session_cmd.py +1 -1
- klaude_code/command/clear_cmd.py +6 -2
- klaude_code/command/command_abc.py +2 -2
- klaude_code/command/debug_cmd.py +4 -4
- klaude_code/command/export_cmd.py +2 -2
- klaude_code/command/export_online_cmd.py +12 -12
- klaude_code/command/fork_session_cmd.py +29 -23
- klaude_code/command/help_cmd.py +4 -4
- klaude_code/command/model_cmd.py +4 -4
- klaude_code/command/model_select.py +1 -1
- klaude_code/command/prompt-commit.md +11 -2
- klaude_code/command/prompt_command.py +3 -3
- klaude_code/command/refresh_cmd.py +2 -2
- klaude_code/command/registry.py +7 -5
- klaude_code/command/release_notes_cmd.py +4 -4
- klaude_code/command/resume_cmd.py +15 -11
- klaude_code/command/status_cmd.py +4 -4
- klaude_code/command/terminal_setup_cmd.py +8 -8
- klaude_code/command/thinking_cmd.py +4 -4
- klaude_code/config/assets/builtin_config.yaml +20 -0
- klaude_code/config/builtin_config.py +16 -5
- klaude_code/config/config.py +7 -2
- klaude_code/const.py +147 -91
- klaude_code/core/agent.py +3 -12
- klaude_code/core/executor.py +18 -39
- klaude_code/core/manager/sub_agent_manager.py +71 -7
- klaude_code/core/prompts/prompt-sub-agent-image-gen.md +1 -0
- klaude_code/core/prompts/prompt-sub-agent-web.md +27 -1
- klaude_code/core/reminders.py +88 -69
- klaude_code/core/task.py +44 -45
- klaude_code/core/tool/file/apply_patch_tool.py +9 -9
- klaude_code/core/tool/file/diff_builder.py +3 -5
- klaude_code/core/tool/file/edit_tool.py +23 -23
- klaude_code/core/tool/file/move_tool.py +43 -43
- klaude_code/core/tool/file/read_tool.py +44 -39
- klaude_code/core/tool/file/write_tool.py +14 -14
- klaude_code/core/tool/report_back_tool.py +4 -4
- klaude_code/core/tool/shell/bash_tool.py +23 -23
- klaude_code/core/tool/skill/skill_tool.py +7 -7
- klaude_code/core/tool/sub_agent_tool.py +38 -9
- klaude_code/core/tool/todo/todo_write_tool.py +9 -10
- klaude_code/core/tool/todo/update_plan_tool.py +6 -6
- klaude_code/core/tool/tool_abc.py +2 -2
- klaude_code/core/tool/tool_context.py +27 -0
- klaude_code/core/tool/tool_runner.py +88 -42
- klaude_code/core/tool/truncation.py +38 -20
- klaude_code/core/tool/web/mermaid_tool.py +6 -7
- klaude_code/core/tool/web/web_fetch_tool.py +68 -30
- klaude_code/core/tool/web/web_search_tool.py +15 -17
- klaude_code/core/turn.py +120 -73
- klaude_code/llm/anthropic/client.py +79 -44
- klaude_code/llm/anthropic/input.py +116 -108
- klaude_code/llm/bedrock/client.py +8 -5
- klaude_code/llm/claude/client.py +18 -8
- klaude_code/llm/client.py +4 -3
- klaude_code/llm/codex/client.py +15 -9
- klaude_code/llm/google/client.py +122 -60
- klaude_code/llm/google/input.py +94 -108
- klaude_code/llm/image.py +123 -0
- klaude_code/llm/input_common.py +136 -189
- klaude_code/llm/openai_compatible/client.py +17 -7
- klaude_code/llm/openai_compatible/input.py +36 -66
- klaude_code/llm/openai_compatible/stream.py +119 -67
- klaude_code/llm/openai_compatible/tool_call_accumulator.py +23 -11
- klaude_code/llm/openrouter/client.py +34 -9
- klaude_code/llm/openrouter/input.py +63 -64
- klaude_code/llm/openrouter/reasoning.py +22 -24
- klaude_code/llm/registry.py +20 -17
- klaude_code/llm/responses/client.py +107 -45
- klaude_code/llm/responses/input.py +115 -98
- klaude_code/llm/usage.py +52 -25
- klaude_code/protocol/__init__.py +1 -0
- klaude_code/protocol/events.py +16 -12
- klaude_code/protocol/llm_param.py +20 -2
- klaude_code/protocol/message.py +250 -0
- klaude_code/protocol/model.py +95 -285
- klaude_code/protocol/op.py +2 -15
- klaude_code/protocol/op_handler.py +0 -5
- klaude_code/protocol/sub_agent/__init__.py +1 -0
- klaude_code/protocol/sub_agent/explore.py +10 -0
- klaude_code/protocol/sub_agent/image_gen.py +119 -0
- klaude_code/protocol/sub_agent/task.py +10 -0
- klaude_code/protocol/sub_agent/web.py +10 -0
- klaude_code/session/codec.py +6 -6
- klaude_code/session/export.py +261 -62
- klaude_code/session/selector.py +7 -24
- klaude_code/session/session.py +126 -54
- klaude_code/session/store.py +5 -32
- klaude_code/session/templates/export_session.html +1 -1
- klaude_code/session/templates/mermaid_viewer.html +1 -1
- klaude_code/trace/log.py +11 -6
- klaude_code/ui/core/input.py +1 -1
- klaude_code/ui/core/stage_manager.py +1 -8
- klaude_code/ui/modes/debug/display.py +2 -2
- klaude_code/ui/modes/repl/clipboard.py +2 -2
- klaude_code/ui/modes/repl/completers.py +18 -10
- klaude_code/ui/modes/repl/event_handler.py +138 -132
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +1 -1
- klaude_code/ui/modes/repl/key_bindings.py +136 -2
- klaude_code/ui/modes/repl/renderer.py +107 -15
- klaude_code/ui/renderers/assistant.py +2 -2
- klaude_code/ui/renderers/bash_syntax.py +36 -4
- klaude_code/ui/renderers/common.py +70 -10
- klaude_code/ui/renderers/developer.py +7 -6
- klaude_code/ui/renderers/diffs.py +11 -11
- klaude_code/ui/renderers/mermaid_viewer.py +49 -2
- klaude_code/ui/renderers/metadata.py +33 -5
- klaude_code/ui/renderers/sub_agent.py +57 -16
- klaude_code/ui/renderers/thinking.py +37 -2
- klaude_code/ui/renderers/tools.py +188 -178
- klaude_code/ui/rich/live.py +3 -1
- klaude_code/ui/rich/markdown.py +39 -7
- klaude_code/ui/rich/quote.py +76 -1
- klaude_code/ui/rich/status.py +14 -8
- klaude_code/ui/rich/theme.py +20 -14
- klaude_code/ui/terminal/image.py +34 -0
- klaude_code/ui/terminal/notifier.py +2 -1
- klaude_code/ui/terminal/progress_bar.py +4 -4
- klaude_code/ui/terminal/selector.py +22 -4
- klaude_code/ui/utils/common.py +11 -2
- {klaude_code-1.9.0.dist-info → klaude_code-2.0.1.dist-info}/METADATA +4 -2
- klaude_code-2.0.1.dist-info/RECORD +229 -0
- klaude_code-1.9.0.dist-info/RECORD +0 -224
- {klaude_code-1.9.0.dist-info → klaude_code-2.0.1.dist-info}/WHEEL +0 -0
- {klaude_code-1.9.0.dist-info → klaude_code-2.0.1.dist-info}/entry_points.txt +0 -0
|
@@ -4,7 +4,7 @@ This module provides reusable primitives for OpenAI-compatible providers:
|
|
|
4
4
|
|
|
5
5
|
- ``StreamStateManager``: accumulates assistant content and tool calls.
|
|
6
6
|
- ``ReasoningHandlerABC``: provider-specific reasoning extraction + buffering.
|
|
7
|
-
- ``parse_chat_completions_stream``: shared stream loop that emits
|
|
7
|
+
- ``parse_chat_completions_stream``: shared stream loop that emits stream/history items.
|
|
8
8
|
|
|
9
9
|
OpenRouter uses the same OpenAI Chat Completions API surface but differs in
|
|
10
10
|
how reasoning is represented (``reasoning_details`` vs ``reasoning_content``).
|
|
@@ -24,9 +24,10 @@ import pydantic
|
|
|
24
24
|
from openai import AsyncStream
|
|
25
25
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
26
26
|
|
|
27
|
+
from klaude_code.llm.image import save_assistant_image
|
|
27
28
|
from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
|
|
28
29
|
from klaude_code.llm.usage import MetadataTracker, convert_usage
|
|
29
|
-
from klaude_code.protocol import llm_param, model
|
|
30
|
+
from klaude_code.protocol import llm_param, message, model
|
|
30
31
|
|
|
31
32
|
StreamStage = Literal["waiting", "reasoning", "assistant", "tool"]
|
|
32
33
|
|
|
@@ -42,62 +43,55 @@ class StreamStateManager:
|
|
|
42
43
|
self,
|
|
43
44
|
param_model: str,
|
|
44
45
|
response_id: str | None = None,
|
|
45
|
-
reasoning_flusher: Callable[[], list[
|
|
46
|
+
reasoning_flusher: Callable[[], list[message.Part]] | None = None,
|
|
46
47
|
):
|
|
47
48
|
self.param_model = param_model
|
|
48
49
|
self.response_id = response_id
|
|
49
50
|
self.stage: StreamStage = "waiting"
|
|
50
|
-
self.accumulated_reasoning: list[str] = []
|
|
51
51
|
self.accumulated_content: list[str] = []
|
|
52
|
+
self.accumulated_images: list[message.ImageFilePart] = []
|
|
52
53
|
self.accumulated_tool_calls: ToolCallAccumulatorABC = BasicToolCallAccumulator()
|
|
53
54
|
self.emitted_tool_start_indices: set[int] = set()
|
|
54
55
|
self._reasoning_flusher = reasoning_flusher
|
|
56
|
+
self.parts: list[message.Part] = []
|
|
57
|
+
self.stop_reason: model.StopReason | None = None
|
|
55
58
|
|
|
56
59
|
def set_response_id(self, response_id: str) -> None:
|
|
57
60
|
"""Set the response ID once received from the stream."""
|
|
58
61
|
self.response_id = response_id
|
|
59
|
-
self.accumulated_tool_calls.response_id
|
|
62
|
+
self.accumulated_tool_calls.set_response_id(response_id)
|
|
60
63
|
|
|
61
|
-
def flush_reasoning(self) ->
|
|
62
|
-
"""Flush accumulated reasoning content
|
|
64
|
+
def flush_reasoning(self) -> None:
|
|
65
|
+
"""Flush accumulated reasoning content into parts."""
|
|
63
66
|
if self._reasoning_flusher is not None:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
self.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def flush_assistant(self) -> list[model.ConversationItem]:
|
|
76
|
-
"""Flush accumulated assistant content and return items."""
|
|
77
|
-
if not self.accumulated_content:
|
|
78
|
-
return []
|
|
79
|
-
item = model.AssistantMessageItem(
|
|
80
|
-
content="".join(self.accumulated_content),
|
|
81
|
-
response_id=self.response_id,
|
|
82
|
-
)
|
|
67
|
+
self.parts.extend(self._reasoning_flusher())
|
|
68
|
+
|
|
69
|
+
def flush_assistant(self) -> None:
|
|
70
|
+
"""Flush accumulated assistant content into parts."""
|
|
71
|
+
if not self.accumulated_content and not self.accumulated_images:
|
|
72
|
+
return
|
|
73
|
+
if self.accumulated_content:
|
|
74
|
+
self.parts.append(message.TextPart(text="".join(self.accumulated_content)))
|
|
75
|
+
if self.accumulated_images:
|
|
76
|
+
self.parts.extend(self.accumulated_images)
|
|
83
77
|
self.accumulated_content = []
|
|
84
|
-
|
|
78
|
+
self.accumulated_images = []
|
|
79
|
+
return
|
|
85
80
|
|
|
86
|
-
def flush_tool_calls(self) ->
|
|
87
|
-
"""Flush accumulated tool calls
|
|
88
|
-
items
|
|
81
|
+
def flush_tool_calls(self) -> None:
|
|
82
|
+
"""Flush accumulated tool calls into parts."""
|
|
83
|
+
items = self.accumulated_tool_calls.get()
|
|
89
84
|
if items:
|
|
90
|
-
self.
|
|
91
|
-
|
|
85
|
+
self.parts.extend(items)
|
|
86
|
+
self.accumulated_tool_calls.reset()
|
|
92
87
|
|
|
93
|
-
def flush_all(self) -> list[
|
|
88
|
+
def flush_all(self) -> list[message.Part]:
|
|
94
89
|
"""Flush all accumulated content in order: reasoning, assistant, tool calls."""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
items.extend(self.flush_assistant())
|
|
90
|
+
self.flush_reasoning()
|
|
91
|
+
self.flush_assistant()
|
|
98
92
|
if self.stage == "tool":
|
|
99
|
-
|
|
100
|
-
return
|
|
93
|
+
self.flush_tool_calls()
|
|
94
|
+
return list(self.parts)
|
|
101
95
|
|
|
102
96
|
|
|
103
97
|
@dataclass(slots=True)
|
|
@@ -105,7 +99,7 @@ class ReasoningDeltaResult:
|
|
|
105
99
|
"""Result of processing a single provider delta for reasoning signals."""
|
|
106
100
|
|
|
107
101
|
handled: bool
|
|
108
|
-
outputs: list[str |
|
|
102
|
+
outputs: list[str | message.Part]
|
|
109
103
|
|
|
110
104
|
|
|
111
105
|
class ReasoningHandlerABC(ABC):
|
|
@@ -120,7 +114,7 @@ class ReasoningHandlerABC(ABC):
|
|
|
120
114
|
"""Process a single delta and return ordered reasoning outputs."""
|
|
121
115
|
|
|
122
116
|
@abstractmethod
|
|
123
|
-
def flush(self) -> list[
|
|
117
|
+
def flush(self) -> list[message.Part]:
|
|
124
118
|
"""Flush buffered reasoning content (usually at stage transition/finalize)."""
|
|
125
119
|
|
|
126
120
|
|
|
@@ -148,18 +142,29 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
|
|
|
148
142
|
self._accumulated.append(text)
|
|
149
143
|
return ReasoningDeltaResult(handled=True, outputs=[text])
|
|
150
144
|
|
|
151
|
-
def flush(self) -> list[
|
|
145
|
+
def flush(self) -> list[message.Part]:
|
|
152
146
|
if not self._accumulated:
|
|
153
147
|
return []
|
|
154
|
-
item =
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
model=self._param_model,
|
|
148
|
+
item = message.ThinkingTextPart(
|
|
149
|
+
text="".join(self._accumulated),
|
|
150
|
+
model_id=self._param_model,
|
|
158
151
|
)
|
|
159
152
|
self._accumulated = []
|
|
160
153
|
return [item]
|
|
161
154
|
|
|
162
155
|
|
|
156
|
+
def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
157
|
+
mapping: dict[str, model.StopReason] = {
|
|
158
|
+
"stop": "stop",
|
|
159
|
+
"length": "length",
|
|
160
|
+
"tool_calls": "tool_use",
|
|
161
|
+
"content_filter": "error",
|
|
162
|
+
"error": "error",
|
|
163
|
+
"cancelled": "aborted",
|
|
164
|
+
}
|
|
165
|
+
return mapping.get(reason)
|
|
166
|
+
|
|
167
|
+
|
|
163
168
|
async def parse_chat_completions_stream(
|
|
164
169
|
stream: AsyncStream[ChatCompletionChunk],
|
|
165
170
|
*,
|
|
@@ -167,8 +172,8 @@ async def parse_chat_completions_stream(
|
|
|
167
172
|
metadata_tracker: MetadataTracker,
|
|
168
173
|
reasoning_handler: ReasoningHandlerABC,
|
|
169
174
|
on_event: Callable[[object], None] | None = None,
|
|
170
|
-
) -> AsyncGenerator[
|
|
171
|
-
"""Parse OpenAI Chat Completions stream into
|
|
175
|
+
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
176
|
+
"""Parse OpenAI Chat Completions stream into stream items.
|
|
172
177
|
|
|
173
178
|
This is shared by OpenAI-compatible and OpenRouter clients.
|
|
174
179
|
"""
|
|
@@ -178,6 +183,20 @@ async def parse_chat_completions_stream(
|
|
|
178
183
|
reasoning_flusher=reasoning_handler.flush,
|
|
179
184
|
)
|
|
180
185
|
|
|
186
|
+
def _extract_image_url(image_obj: object) -> str | None:
|
|
187
|
+
image_url = getattr(image_obj, "image_url", None)
|
|
188
|
+
if image_url is not None:
|
|
189
|
+
url = getattr(image_url, "url", None)
|
|
190
|
+
return str(url) if isinstance(url, str) else None
|
|
191
|
+
if isinstance(image_obj, dict):
|
|
192
|
+
image_dict = cast(dict[str, Any], image_obj)
|
|
193
|
+
url_dict_raw = image_dict.get("image_url")
|
|
194
|
+
if isinstance(url_dict_raw, dict):
|
|
195
|
+
url_dict = cast(dict[str, Any], url_dict_raw)
|
|
196
|
+
url_raw = url_dict.get("url")
|
|
197
|
+
return url_raw if isinstance(url_raw, str) else None
|
|
198
|
+
return None
|
|
199
|
+
|
|
181
200
|
try:
|
|
182
201
|
async for event in stream:
|
|
183
202
|
if on_event is not None:
|
|
@@ -186,7 +205,6 @@ async def parse_chat_completions_stream(
|
|
|
186
205
|
if not state.response_id and (event_id := getattr(event, "id", None)):
|
|
187
206
|
state.set_response_id(str(event_id))
|
|
188
207
|
reasoning_handler.set_response_id(str(event_id))
|
|
189
|
-
yield model.StartItem(response_id=str(event_id))
|
|
190
208
|
|
|
191
209
|
if (event_usage := getattr(event, "usage", None)) is not None:
|
|
192
210
|
metadata_tracker.set_usage(convert_usage(event_usage, param.context_limit, param.max_tokens))
|
|
@@ -212,6 +230,10 @@ async def parse_chat_completions_stream(
|
|
|
212
230
|
if delta is None:
|
|
213
231
|
continue
|
|
214
232
|
|
|
233
|
+
finish_reason = getattr(choice0, "finish_reason", None)
|
|
234
|
+
if isinstance(finish_reason, str):
|
|
235
|
+
state.stop_reason = _map_finish_reason(finish_reason)
|
|
236
|
+
|
|
215
237
|
# Reasoning
|
|
216
238
|
reasoning_result = reasoning_handler.on_delta(delta)
|
|
217
239
|
if reasoning_result.handled:
|
|
@@ -221,22 +243,51 @@ async def parse_chat_completions_stream(
|
|
|
221
243
|
if not output:
|
|
222
244
|
continue
|
|
223
245
|
metadata_tracker.record_token()
|
|
224
|
-
yield
|
|
246
|
+
yield message.ThinkingTextDelta(content=output, response_id=state.response_id)
|
|
225
247
|
else:
|
|
226
|
-
|
|
248
|
+
state.parts.append(output)
|
|
227
249
|
|
|
228
250
|
# Assistant
|
|
251
|
+
images = getattr(delta, "images", None)
|
|
252
|
+
if isinstance(images, list) and images:
|
|
253
|
+
images_list = cast(list[object], images)
|
|
254
|
+
metadata_tracker.record_token()
|
|
255
|
+
if state.stage == "reasoning":
|
|
256
|
+
state.flush_reasoning()
|
|
257
|
+
elif state.stage == "tool":
|
|
258
|
+
state.flush_tool_calls()
|
|
259
|
+
state.stage = "assistant"
|
|
260
|
+
for image_obj in images_list:
|
|
261
|
+
url = _extract_image_url(image_obj)
|
|
262
|
+
if not url:
|
|
263
|
+
continue
|
|
264
|
+
if not url.startswith("data:"):
|
|
265
|
+
# Only data URLs are supported for now.
|
|
266
|
+
continue
|
|
267
|
+
try:
|
|
268
|
+
assistant_image = save_assistant_image(
|
|
269
|
+
data_url=url,
|
|
270
|
+
session_id=param.session_id,
|
|
271
|
+
response_id=state.response_id,
|
|
272
|
+
image_index=len(state.accumulated_images),
|
|
273
|
+
)
|
|
274
|
+
except ValueError as exc:
|
|
275
|
+
yield message.StreamErrorItem(error=str(exc))
|
|
276
|
+
return
|
|
277
|
+
state.accumulated_images.append(assistant_image)
|
|
278
|
+
yield message.AssistantImageDelta(
|
|
279
|
+
response_id=state.response_id, file_path=assistant_image.file_path
|
|
280
|
+
)
|
|
281
|
+
|
|
229
282
|
if (content := getattr(delta, "content", None)) and (state.stage == "assistant" or str(content).strip()):
|
|
230
283
|
metadata_tracker.record_token()
|
|
231
284
|
if state.stage == "reasoning":
|
|
232
|
-
|
|
233
|
-
yield item
|
|
285
|
+
state.flush_reasoning()
|
|
234
286
|
elif state.stage == "tool":
|
|
235
|
-
|
|
236
|
-
yield item
|
|
287
|
+
state.flush_tool_calls()
|
|
237
288
|
state.stage = "assistant"
|
|
238
289
|
state.accumulated_content.append(str(content))
|
|
239
|
-
yield
|
|
290
|
+
yield message.AssistantTextDelta(
|
|
240
291
|
content=str(content),
|
|
241
292
|
response_id=state.response_id,
|
|
242
293
|
)
|
|
@@ -245,29 +296,30 @@ async def parse_chat_completions_stream(
|
|
|
245
296
|
if (tool_calls := getattr(delta, "tool_calls", None)) and len(tool_calls) > 0:
|
|
246
297
|
metadata_tracker.record_token()
|
|
247
298
|
if state.stage == "reasoning":
|
|
248
|
-
|
|
249
|
-
yield item
|
|
299
|
+
state.flush_reasoning()
|
|
250
300
|
elif state.stage == "assistant":
|
|
251
|
-
|
|
252
|
-
yield item
|
|
301
|
+
state.flush_assistant()
|
|
253
302
|
state.stage = "tool"
|
|
254
303
|
for tc in tool_calls:
|
|
255
304
|
if tc.index not in state.emitted_tool_start_indices and tc.function and tc.function.name:
|
|
256
305
|
state.emitted_tool_start_indices.add(tc.index)
|
|
257
|
-
yield
|
|
306
|
+
yield message.ToolCallStartItem(
|
|
258
307
|
response_id=state.response_id,
|
|
259
308
|
call_id=tc.id or "",
|
|
260
309
|
name=tc.function.name,
|
|
261
310
|
)
|
|
262
311
|
state.accumulated_tool_calls.add(tool_calls)
|
|
263
312
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
264
|
-
yield
|
|
313
|
+
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
265
314
|
|
|
266
|
-
|
|
267
|
-
if
|
|
315
|
+
parts = state.flush_all()
|
|
316
|
+
if parts:
|
|
268
317
|
metadata_tracker.record_token()
|
|
269
|
-
for item in flushed_items:
|
|
270
|
-
yield item
|
|
271
|
-
|
|
272
318
|
metadata_tracker.set_response_id(state.response_id)
|
|
273
|
-
|
|
319
|
+
metadata = metadata_tracker.finalize()
|
|
320
|
+
yield message.AssistantMessage(
|
|
321
|
+
parts=parts,
|
|
322
|
+
response_id=state.response_id,
|
|
323
|
+
usage=metadata,
|
|
324
|
+
stop_reason=state.stop_reason,
|
|
325
|
+
)
|
|
@@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
|
|
5
5
|
from pydantic import BaseModel, Field
|
|
6
6
|
|
|
7
|
-
from klaude_code.protocol import
|
|
7
|
+
from klaude_code.protocol import message
|
|
8
8
|
from klaude_code.trace.log import log_debug
|
|
9
9
|
|
|
10
10
|
|
|
@@ -28,7 +28,15 @@ class ToolCallAccumulatorABC(ABC):
|
|
|
28
28
|
pass
|
|
29
29
|
|
|
30
30
|
@abstractmethod
|
|
31
|
-
def get(self) -> list[
|
|
31
|
+
def get(self) -> list[message.ToolCallPart]:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def set_response_id(self, response_id: str | None) -> None:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def reset(self) -> None:
|
|
32
40
|
pass
|
|
33
41
|
|
|
34
42
|
|
|
@@ -69,8 +77,8 @@ class BasicToolCallAccumulator(ToolCallAccumulatorABC, BaseModel):
|
|
|
69
77
|
def add(self, chunks: list[ChoiceDeltaToolCall]) -> None:
|
|
70
78
|
self.chunks_by_step.append(chunks)
|
|
71
79
|
|
|
72
|
-
def get(self) -> list[
|
|
73
|
-
result: list[
|
|
80
|
+
def get(self) -> list[message.ToolCallPart]:
|
|
81
|
+
result: list[message.ToolCallPart] = []
|
|
74
82
|
current_index = -1
|
|
75
83
|
for current_step in self.chunks_by_step:
|
|
76
84
|
if len(current_step) == 0:
|
|
@@ -79,18 +87,22 @@ class BasicToolCallAccumulator(ToolCallAccumulatorABC, BaseModel):
|
|
|
79
87
|
if first_chunk.index != current_index:
|
|
80
88
|
current_index = first_chunk.index
|
|
81
89
|
result.append(
|
|
82
|
-
|
|
83
|
-
id=first_chunk.id,
|
|
84
|
-
name="",
|
|
85
|
-
arguments="",
|
|
90
|
+
message.ToolCallPart(
|
|
86
91
|
call_id=first_chunk.id or "",
|
|
87
|
-
|
|
92
|
+
tool_name="",
|
|
93
|
+
arguments_json="",
|
|
88
94
|
)
|
|
89
95
|
)
|
|
90
96
|
if first_chunk.function is None:
|
|
91
97
|
continue
|
|
92
98
|
if first_chunk.function.name:
|
|
93
|
-
result[-1].
|
|
99
|
+
result[-1].tool_name = normalize_tool_name(first_chunk.function.name)
|
|
94
100
|
if first_chunk.function.arguments:
|
|
95
|
-
result[-1].
|
|
101
|
+
result[-1].arguments_json += first_chunk.function.arguments
|
|
96
102
|
return result
|
|
103
|
+
|
|
104
|
+
def set_response_id(self, response_id: str | None) -> None:
|
|
105
|
+
self.response_id = response_id
|
|
106
|
+
|
|
107
|
+
def reset(self) -> None:
|
|
108
|
+
self.chunks_by_step = []
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections.abc import AsyncGenerator
|
|
3
|
-
from typing import Any, override
|
|
3
|
+
from typing import Any, cast, override
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
import openai
|
|
7
7
|
from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
|
|
8
8
|
|
|
9
|
+
from klaude_code.const import (
|
|
10
|
+
ANTHROPIC_BETA_FINE_GRAINED_TOOL_STREAMING,
|
|
11
|
+
ANTHROPIC_BETA_INTERLEAVED_THINKING,
|
|
12
|
+
LLM_HTTP_TIMEOUT_CONNECT,
|
|
13
|
+
LLM_HTTP_TIMEOUT_READ,
|
|
14
|
+
LLM_HTTP_TIMEOUT_TOTAL,
|
|
15
|
+
OPENROUTER_BASE_URL,
|
|
16
|
+
)
|
|
9
17
|
from klaude_code.llm.client import LLMClientABC
|
|
10
18
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
11
19
|
from klaude_code.llm.openai_compatible.input import convert_tool_schema
|
|
@@ -14,7 +22,7 @@ from klaude_code.llm.openrouter.input import convert_history_to_input, is_claude
|
|
|
14
22
|
from klaude_code.llm.openrouter.reasoning import ReasoningStreamHandler
|
|
15
23
|
from klaude_code.llm.registry import register
|
|
16
24
|
from klaude_code.llm.usage import MetadataTracker
|
|
17
|
-
from klaude_code.protocol import llm_param,
|
|
25
|
+
from klaude_code.protocol import llm_param, message
|
|
18
26
|
from klaude_code.trace import DebugType, is_debug_enabled, log_debug
|
|
19
27
|
|
|
20
28
|
|
|
@@ -28,6 +36,16 @@ def build_payload(
|
|
|
28
36
|
extra_body: dict[str, object] = {
|
|
29
37
|
"usage": {"include": True}, # To get the cache tokens at the end of the response
|
|
30
38
|
}
|
|
39
|
+
|
|
40
|
+
if param.modalities:
|
|
41
|
+
extra_body["modalities"] = list(param.modalities)
|
|
42
|
+
if param.image_config is not None:
|
|
43
|
+
image_config = param.image_config.model_dump(exclude_none=True)
|
|
44
|
+
extra_raw = image_config.pop("extra", None)
|
|
45
|
+
extra_dict: dict[str, Any] | None = cast(dict[str, Any], extra_raw) if isinstance(extra_raw, dict) else None
|
|
46
|
+
if extra_dict is not None and extra_dict:
|
|
47
|
+
image_config.update(extra_dict)
|
|
48
|
+
extra_body["image_config"] = image_config
|
|
31
49
|
if is_debug_enabled():
|
|
32
50
|
extra_body["debug"] = {
|
|
33
51
|
"echo_upstream_body": True
|
|
@@ -49,7 +67,9 @@ def build_payload(
|
|
|
49
67
|
extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
|
|
50
68
|
|
|
51
69
|
if is_claude_model(param.model):
|
|
52
|
-
extra_headers["x-anthropic-beta"] =
|
|
70
|
+
extra_headers["x-anthropic-beta"] = (
|
|
71
|
+
f"{ANTHROPIC_BETA_FINE_GRAINED_TOOL_STREAMING},{ANTHROPIC_BETA_INTERLEAVED_THINKING}"
|
|
72
|
+
)
|
|
53
73
|
|
|
54
74
|
payload: CompletionCreateParamsStreaming = {
|
|
55
75
|
"model": str(param.model),
|
|
@@ -72,8 +92,8 @@ class OpenRouterClient(LLMClientABC):
|
|
|
72
92
|
super().__init__(config)
|
|
73
93
|
client = openai.AsyncOpenAI(
|
|
74
94
|
api_key=config.api_key,
|
|
75
|
-
base_url=
|
|
76
|
-
timeout=httpx.Timeout(
|
|
95
|
+
base_url=OPENROUTER_BASE_URL,
|
|
96
|
+
timeout=httpx.Timeout(LLM_HTTP_TIMEOUT_TOTAL, connect=LLM_HTTP_TIMEOUT_CONNECT, read=LLM_HTTP_TIMEOUT_READ),
|
|
77
97
|
)
|
|
78
98
|
self.client: openai.AsyncOpenAI = client
|
|
79
99
|
|
|
@@ -83,12 +103,17 @@ class OpenRouterClient(LLMClientABC):
|
|
|
83
103
|
return cls(config)
|
|
84
104
|
|
|
85
105
|
@override
|
|
86
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[
|
|
106
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
|
|
87
107
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
88
108
|
|
|
89
109
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
90
110
|
|
|
91
|
-
|
|
111
|
+
try:
|
|
112
|
+
payload, extra_body, extra_headers = build_payload(param)
|
|
113
|
+
except (ValueError, OSError) as e:
|
|
114
|
+
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
115
|
+
yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
|
|
116
|
+
return
|
|
92
117
|
|
|
93
118
|
log_debug(
|
|
94
119
|
json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
|
|
@@ -103,8 +128,8 @@ class OpenRouterClient(LLMClientABC):
|
|
|
103
128
|
extra_headers=extra_headers,
|
|
104
129
|
)
|
|
105
130
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
106
|
-
yield
|
|
107
|
-
yield metadata_tracker.finalize()
|
|
131
|
+
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
132
|
+
yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
|
|
108
133
|
return
|
|
109
134
|
|
|
110
135
|
reasoning_handler = ReasoningStreamHandler(
|
|
@@ -8,9 +8,16 @@
|
|
|
8
8
|
|
|
9
9
|
from openai.types import chat
|
|
10
10
|
|
|
11
|
-
from klaude_code.llm.
|
|
12
|
-
from klaude_code.llm.
|
|
13
|
-
|
|
11
|
+
from klaude_code.llm.image import assistant_image_to_data_url
|
|
12
|
+
from klaude_code.llm.input_common import (
|
|
13
|
+
attach_developer_messages,
|
|
14
|
+
build_assistant_common_fields,
|
|
15
|
+
build_chat_content_parts,
|
|
16
|
+
build_tool_message,
|
|
17
|
+
collect_text_content,
|
|
18
|
+
split_thinking_parts,
|
|
19
|
+
)
|
|
20
|
+
from klaude_code.protocol import message
|
|
14
21
|
|
|
15
22
|
|
|
16
23
|
def is_claude_model(model_name: str | None) -> bool:
|
|
@@ -25,64 +32,42 @@ def is_gemini_model(model_name: str | None) -> bool:
|
|
|
25
32
|
return model_name is not None and model_name.startswith("google/gemini")
|
|
26
33
|
|
|
27
34
|
|
|
28
|
-
def
|
|
35
|
+
def _assistant_message_to_openrouter(
|
|
36
|
+
msg: message.AssistantMessage, model_name: str | None
|
|
37
|
+
) -> chat.ChatCompletionMessageParam:
|
|
29
38
|
assistant_message: dict[str, object] = {"role": "assistant"}
|
|
30
|
-
|
|
31
|
-
if group.tool_calls:
|
|
32
|
-
assistant_message["tool_calls"] = [
|
|
33
|
-
{
|
|
34
|
-
"id": tc.call_id,
|
|
35
|
-
"type": "function",
|
|
36
|
-
"function": {
|
|
37
|
-
"name": tc.name,
|
|
38
|
-
"arguments": tc.arguments,
|
|
39
|
-
},
|
|
40
|
-
}
|
|
41
|
-
for tc in group.tool_calls
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
# Handle reasoning for OpenRouter (reasoning_details array).
|
|
45
|
-
# The order of items in reasoning_details must match the original
|
|
46
|
-
# stream order from the provider, so we iterate reasoning_items
|
|
47
|
-
# instead of the separated reasoning_text / reasoning_encrypted lists.
|
|
48
|
-
# For cross-model scenarios, degrade thinking to plain text.
|
|
39
|
+
assistant_message.update(build_assistant_common_fields(msg, image_to_data_url=assistant_image_to_data_url))
|
|
49
40
|
reasoning_details: list[dict[str, object]] = []
|
|
50
|
-
degraded_thinking_texts
|
|
51
|
-
for
|
|
52
|
-
if
|
|
53
|
-
# Cross-model: collect thinking text for degradation
|
|
54
|
-
if isinstance(item, model.ReasoningTextItem) and item.content:
|
|
55
|
-
degraded_thinking_texts.append(item.content)
|
|
56
|
-
continue
|
|
57
|
-
if isinstance(item, model.ReasoningEncryptedItem):
|
|
58
|
-
if item.encrypted_content and len(item.encrypted_content) > 0:
|
|
59
|
-
reasoning_details.append(
|
|
60
|
-
{
|
|
61
|
-
"id": item.id,
|
|
62
|
-
"type": "reasoning.encrypted",
|
|
63
|
-
"data": item.encrypted_content,
|
|
64
|
-
"format": item.format,
|
|
65
|
-
"index": len(reasoning_details),
|
|
66
|
-
}
|
|
67
|
-
)
|
|
68
|
-
elif isinstance(item, model.ReasoningTextItem):
|
|
41
|
+
native_thinking_parts, degraded_thinking_texts = split_thinking_parts(msg, model_name)
|
|
42
|
+
for part in native_thinking_parts:
|
|
43
|
+
if isinstance(part, message.ThinkingTextPart):
|
|
69
44
|
reasoning_details.append(
|
|
70
45
|
{
|
|
71
|
-
"id":
|
|
46
|
+
"id": part.id,
|
|
72
47
|
"type": "reasoning.text",
|
|
73
|
-
"text":
|
|
48
|
+
"text": part.text,
|
|
49
|
+
"index": len(reasoning_details),
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
elif isinstance(part, message.ThinkingSignaturePart) and part.signature:
|
|
53
|
+
reasoning_details.append(
|
|
54
|
+
{
|
|
55
|
+
"id": part.id,
|
|
56
|
+
"type": "reasoning.encrypted",
|
|
57
|
+
"data": part.signature,
|
|
58
|
+
"format": part.format,
|
|
74
59
|
"index": len(reasoning_details),
|
|
75
60
|
}
|
|
76
61
|
)
|
|
77
62
|
if reasoning_details:
|
|
78
63
|
assistant_message["reasoning_details"] = reasoning_details
|
|
79
64
|
|
|
80
|
-
# Build content with optional degraded thinking prefix
|
|
81
65
|
content_parts: list[str] = []
|
|
82
66
|
if degraded_thinking_texts:
|
|
83
67
|
content_parts.append("<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>")
|
|
84
|
-
|
|
85
|
-
|
|
68
|
+
text_content = collect_text_content(msg.parts)
|
|
69
|
+
if text_content:
|
|
70
|
+
content_parts.append(text_content)
|
|
86
71
|
if content_parts:
|
|
87
72
|
assistant_message["content"] = "\n".join(content_parts)
|
|
88
73
|
|
|
@@ -104,18 +89,11 @@ def _add_cache_control(messages: list[chat.ChatCompletionMessageParam], use_cach
|
|
|
104
89
|
|
|
105
90
|
|
|
106
91
|
def convert_history_to_input(
|
|
107
|
-
history: list[
|
|
92
|
+
history: list[message.Message],
|
|
108
93
|
system: str | None = None,
|
|
109
94
|
model_name: str | None = None,
|
|
110
95
|
) -> list[chat.ChatCompletionMessageParam]:
|
|
111
|
-
"""
|
|
112
|
-
Convert a list of conversation items to a list of chat completion message params.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
history: List of conversation items.
|
|
116
|
-
system: System message.
|
|
117
|
-
model_name: Model name. Used to verify that signatures are valid for the same model.
|
|
118
|
-
"""
|
|
96
|
+
"""Convert a list of messages to chat completion params."""
|
|
119
97
|
use_cache_control = is_claude_model(model_name) or is_gemini_model(model_name)
|
|
120
98
|
|
|
121
99
|
messages: list[chat.ChatCompletionMessageParam] = (
|
|
@@ -135,14 +113,35 @@ def convert_history_to_input(
|
|
|
135
113
|
else ([{"role": "system", "content": system}] if system else [])
|
|
136
114
|
)
|
|
137
115
|
|
|
138
|
-
for
|
|
139
|
-
match
|
|
140
|
-
case
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
116
|
+
for msg, attachment in attach_developer_messages(history):
|
|
117
|
+
match msg:
|
|
118
|
+
case message.SystemMessage():
|
|
119
|
+
system_text = "\n".join(part.text for part in msg.parts)
|
|
120
|
+
if system_text:
|
|
121
|
+
if use_cache_control:
|
|
122
|
+
messages.append(
|
|
123
|
+
{
|
|
124
|
+
"role": "system",
|
|
125
|
+
"content": [
|
|
126
|
+
{
|
|
127
|
+
"type": "text",
|
|
128
|
+
"text": system_text,
|
|
129
|
+
"cache_control": {"type": "ephemeral"},
|
|
130
|
+
}
|
|
131
|
+
],
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
else:
|
|
135
|
+
messages.append({"role": "system", "content": system_text})
|
|
136
|
+
case message.UserMessage():
|
|
137
|
+
parts = build_chat_content_parts(msg, attachment)
|
|
138
|
+
messages.append({"role": "user", "content": parts})
|
|
139
|
+
case message.ToolResultMessage():
|
|
140
|
+
messages.append(build_tool_message(msg, attachment))
|
|
141
|
+
case message.AssistantMessage():
|
|
142
|
+
messages.append(_assistant_message_to_openrouter(msg, model_name))
|
|
143
|
+
case _:
|
|
144
|
+
continue
|
|
146
145
|
|
|
147
146
|
_add_cache_control(messages, use_cache_control)
|
|
148
147
|
return messages
|