klaude-code 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/cli/auth_cmd.py +2 -13
- klaude_code/cli/cost_cmd.py +10 -10
- klaude_code/cli/main.py +40 -7
- klaude_code/cli/session_cmd.py +2 -11
- klaude_code/config/assets/builtin_config.yaml +45 -24
- klaude_code/config/model_matcher.py +1 -1
- klaude_code/const.py +2 -1
- klaude_code/core/tool/file/edit_tool.py +1 -1
- klaude_code/core/tool/file/read_tool.py +2 -2
- klaude_code/core/tool/file/write_tool.py +1 -1
- klaude_code/core/turn.py +19 -1
- klaude_code/llm/anthropic/client.py +75 -50
- klaude_code/llm/anthropic/input.py +20 -9
- klaude_code/llm/google/client.py +223 -148
- klaude_code/llm/google/input.py +44 -36
- klaude_code/llm/openai_compatible/stream.py +109 -99
- klaude_code/llm/openrouter/reasoning.py +4 -29
- klaude_code/llm/partial_message.py +2 -32
- klaude_code/llm/responses/client.py +99 -81
- klaude_code/llm/responses/input.py +11 -25
- klaude_code/llm/stream_parts.py +94 -0
- klaude_code/log.py +57 -0
- klaude_code/tui/command/fork_session_cmd.py +14 -23
- klaude_code/tui/command/model_picker.py +2 -17
- klaude_code/tui/command/resume_cmd.py +2 -18
- klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
- klaude_code/tui/command/thinking_cmd.py +2 -14
- klaude_code/tui/components/common.py +1 -1
- klaude_code/tui/components/metadata.py +17 -16
- klaude_code/tui/components/rich/quote.py +36 -8
- klaude_code/tui/components/rich/theme.py +2 -0
- klaude_code/tui/input/prompt_toolkit.py +3 -1
- klaude_code/tui/machine.py +19 -1
- klaude_code/tui/renderer.py +3 -3
- klaude_code/tui/terminal/selector.py +174 -31
- {klaude_code-2.5.2.dist-info → klaude_code-2.5.3.dist-info}/METADATA +1 -1
- {klaude_code-2.5.2.dist-info → klaude_code-2.5.3.dist-info}/RECORD +39 -38
- {klaude_code-2.5.2.dist-info → klaude_code-2.5.3.dist-info}/WHEEL +0 -0
- {klaude_code-2.5.2.dist-info → klaude_code-2.5.3.dist-info}/entry_points.txt +0 -0
klaude_code/llm/google/input.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Any
|
|
|
11
11
|
from google.genai import types
|
|
12
12
|
|
|
13
13
|
from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
|
|
14
|
-
from klaude_code.llm.image import parse_data_url
|
|
14
|
+
from klaude_code.llm.image import assistant_image_to_data_url, parse_data_url
|
|
15
15
|
from klaude_code.llm.input_common import (
|
|
16
16
|
DeveloperAttachment,
|
|
17
17
|
attach_developer_messages,
|
|
@@ -108,51 +108,49 @@ def _tool_messages_to_contents(
|
|
|
108
108
|
return contents
|
|
109
109
|
|
|
110
110
|
|
|
111
|
+
def _decode_thought_signature(sig: str | None) -> bytes | None:
|
|
112
|
+
"""Decode base64 thought signature to bytes."""
|
|
113
|
+
if not sig:
|
|
114
|
+
return None
|
|
115
|
+
try:
|
|
116
|
+
return b64decode(sig)
|
|
117
|
+
except (BinasciiError, ValueError):
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
111
121
|
def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str | None) -> types.Content | None:
|
|
112
122
|
parts: list[types.Part] = []
|
|
113
123
|
native_thinking_parts, degraded_thinking_texts = split_thinking_parts(msg, model_name)
|
|
114
124
|
native_thinking_ids = {id(part) for part in native_thinking_parts}
|
|
115
|
-
pending_thought_text: str | None = None
|
|
116
|
-
pending_thought_signature: str | None = None
|
|
117
|
-
|
|
118
|
-
def flush_thought() -> None:
|
|
119
|
-
nonlocal pending_thought_text, pending_thought_signature
|
|
120
|
-
if pending_thought_text is None and pending_thought_signature is None:
|
|
121
|
-
return
|
|
122
|
-
|
|
123
|
-
signature_bytes: bytes | None = None
|
|
124
|
-
if pending_thought_signature:
|
|
125
|
-
try:
|
|
126
|
-
signature_bytes = b64decode(pending_thought_signature)
|
|
127
|
-
except (BinasciiError, ValueError):
|
|
128
|
-
signature_bytes = None
|
|
129
|
-
|
|
130
|
-
parts.append(
|
|
131
|
-
types.Part(
|
|
132
|
-
text=pending_thought_text or "",
|
|
133
|
-
thought=True,
|
|
134
|
-
thought_signature=signature_bytes,
|
|
135
|
-
)
|
|
136
|
-
)
|
|
137
|
-
pending_thought_text = None
|
|
138
|
-
pending_thought_signature = None
|
|
139
125
|
|
|
140
126
|
for part in msg.parts:
|
|
141
127
|
if isinstance(part, message.ThinkingTextPart):
|
|
142
128
|
if id(part) not in native_thinking_ids:
|
|
143
129
|
continue
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
130
|
+
parts.append(types.Part(text=part.text, thought=True))
|
|
131
|
+
|
|
132
|
+
elif isinstance(part, message.ThinkingSignaturePart):
|
|
147
133
|
if id(part) not in native_thinking_ids:
|
|
148
134
|
continue
|
|
149
|
-
if part.signature
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
135
|
+
if not part.signature or part.format != "google":
|
|
136
|
+
continue
|
|
137
|
+
# Attach signature to the previous part
|
|
138
|
+
if parts:
|
|
139
|
+
sig_bytes = _decode_thought_signature(part.signature)
|
|
140
|
+
if sig_bytes:
|
|
141
|
+
last_part = parts[-1]
|
|
142
|
+
parts[-1] = types.Part(
|
|
143
|
+
text=last_part.text,
|
|
144
|
+
thought=last_part.thought,
|
|
145
|
+
function_call=last_part.function_call,
|
|
146
|
+
inline_data=last_part.inline_data,
|
|
147
|
+
file_data=last_part.file_data,
|
|
148
|
+
thought_signature=sig_bytes,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
elif isinstance(part, message.TextPart):
|
|
155
152
|
parts.append(types.Part(text=part.text))
|
|
153
|
+
|
|
156
154
|
elif isinstance(part, message.ToolCallPart):
|
|
157
155
|
args: dict[str, Any]
|
|
158
156
|
if part.arguments_json:
|
|
@@ -162,9 +160,19 @@ def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str
|
|
|
162
160
|
args = {"_raw": part.arguments_json}
|
|
163
161
|
else:
|
|
164
162
|
args = {}
|
|
165
|
-
parts.append(
|
|
163
|
+
parts.append(
|
|
164
|
+
types.Part(
|
|
165
|
+
function_call=types.FunctionCall(id=part.call_id, name=part.tool_name, args=args),
|
|
166
|
+
)
|
|
167
|
+
)
|
|
166
168
|
|
|
167
|
-
|
|
169
|
+
elif isinstance(part, message.ImageFilePart):
|
|
170
|
+
# Convert saved image back to inline_data for multi-turn
|
|
171
|
+
try:
|
|
172
|
+
data_url = assistant_image_to_data_url(part)
|
|
173
|
+
parts.append(_image_part_to_part(message.ImageURLPart(url=data_url)))
|
|
174
|
+
except (ValueError, FileNotFoundError):
|
|
175
|
+
pass # Skip if image cannot be loaded
|
|
168
176
|
|
|
169
177
|
if degraded_thinking_texts:
|
|
170
178
|
parts.insert(0, types.Part(text="<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>"))
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
This module provides reusable primitives for OpenAI-compatible providers:
|
|
4
4
|
|
|
5
|
-
- ``StreamStateManager``: accumulates assistant
|
|
6
|
-
- ``ReasoningHandlerABC``: provider-specific reasoning extraction
|
|
5
|
+
- ``StreamStateManager``: accumulates assistant parts in stream order.
|
|
6
|
+
- ``ReasoningHandlerABC``: provider-specific reasoning extraction.
|
|
7
7
|
- ``OpenAILLMStream``: LLMStream implementation for OpenAI-compatible clients.
|
|
8
8
|
|
|
9
9
|
OpenRouter uses the same OpenAI Chat Completions API surface but differs in
|
|
@@ -15,7 +15,7 @@ from __future__ import annotations
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
from collections.abc import AsyncGenerator, Callable
|
|
17
17
|
from dataclasses import dataclass
|
|
18
|
-
from typing import Any,
|
|
18
|
+
from typing import Any, cast
|
|
19
19
|
|
|
20
20
|
import httpx
|
|
21
21
|
import openai
|
|
@@ -26,91 +26,107 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
|
26
26
|
|
|
27
27
|
from klaude_code.llm.client import LLMStreamABC
|
|
28
28
|
from klaude_code.llm.image import save_assistant_image
|
|
29
|
-
from klaude_code.llm.openai_compatible.tool_call_accumulator import
|
|
30
|
-
from klaude_code.llm.
|
|
29
|
+
from klaude_code.llm.openai_compatible.tool_call_accumulator import normalize_tool_name
|
|
30
|
+
from klaude_code.llm.stream_parts import (
|
|
31
|
+
append_text_part,
|
|
32
|
+
append_thinking_text_part,
|
|
33
|
+
build_partial_message,
|
|
34
|
+
build_partial_parts,
|
|
35
|
+
)
|
|
31
36
|
from klaude_code.llm.usage import MetadataTracker, convert_usage
|
|
32
37
|
from klaude_code.protocol import llm_param, message, model
|
|
33
38
|
|
|
34
|
-
StreamStage = Literal["waiting", "reasoning", "assistant", "tool"]
|
|
35
|
-
|
|
36
39
|
|
|
37
40
|
class StreamStateManager:
|
|
38
|
-
"""Manages streaming state and
|
|
41
|
+
"""Manages streaming state and accumulates parts in stream order.
|
|
39
42
|
|
|
40
|
-
|
|
41
|
-
|
|
43
|
+
The persisted AssistantMessage is built directly from ``assistant_parts``.
|
|
44
|
+
``get_partial_message()`` returns a best-effort message on cancellation.
|
|
42
45
|
"""
|
|
43
46
|
|
|
44
47
|
def __init__(
|
|
45
48
|
self,
|
|
46
49
|
param_model: str,
|
|
47
50
|
response_id: str | None = None,
|
|
48
|
-
reasoning_flusher: Callable[[], list[message.Part]] | None = None,
|
|
49
51
|
):
|
|
50
52
|
self.param_model = param_model
|
|
51
53
|
self.response_id = response_id
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.
|
|
56
|
-
self.emitted_tool_start_indices: set[int] = set()
|
|
57
|
-
self._reasoning_flusher = reasoning_flusher
|
|
58
|
-
self.parts: list[message.Part] = []
|
|
54
|
+
self.assistant_parts: list[message.Part] = []
|
|
55
|
+
self._image_index: int = 0
|
|
56
|
+
self._tool_part_index_by_tc_index: dict[int, int] = {}
|
|
57
|
+
self._emitted_tool_start_indices: set[int] = set()
|
|
59
58
|
self.stop_reason: model.StopReason | None = None
|
|
60
59
|
|
|
61
60
|
def set_response_id(self, response_id: str) -> None:
|
|
62
61
|
"""Set the response ID once received from the stream."""
|
|
63
62
|
self.response_id = response_id
|
|
64
|
-
self.accumulated_tool_calls.set_response_id(response_id)
|
|
65
63
|
|
|
66
|
-
def
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
|
|
64
|
+
def append_thinking_text(self, text: str) -> None:
|
|
65
|
+
"""Append thinking text, merging with the previous ThinkingTextPart when possible."""
|
|
66
|
+
append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
|
|
67
|
+
|
|
68
|
+
def append_text(self, text: str) -> None:
|
|
69
|
+
"""Append assistant text, merging with the previous TextPart when possible."""
|
|
70
|
+
append_text_part(self.assistant_parts, text)
|
|
71
|
+
|
|
72
|
+
def append_image(self, image_part: message.ImageFilePart) -> None:
|
|
73
|
+
self.assistant_parts.append(image_part)
|
|
74
|
+
self._image_index += 1
|
|
75
|
+
|
|
76
|
+
def upsert_tool_call(self, *, tc_index: int, call_id: str | None, name: str | None, arguments: str | None) -> None:
|
|
77
|
+
"""Insert a ToolCallPart at first sight and keep updating its fields.
|
|
78
|
+
|
|
79
|
+
Chat Completions streams tool call fields incrementally (name/id first,
|
|
80
|
+
then argument fragments). We keep the ToolCallPart in-place to preserve
|
|
81
|
+
stream order in the persisted AssistantMessage.
|
|
82
|
+
"""
|
|
70
83
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
84
|
+
part_index = self._tool_part_index_by_tc_index.get(tc_index)
|
|
85
|
+
if part_index is None:
|
|
86
|
+
tool_part = message.ToolCallPart(
|
|
87
|
+
call_id=call_id or "",
|
|
88
|
+
tool_name=normalize_tool_name(name or ""),
|
|
89
|
+
arguments_json=arguments or "",
|
|
90
|
+
)
|
|
91
|
+
self.assistant_parts.append(tool_part)
|
|
92
|
+
self._tool_part_index_by_tc_index[tc_index] = len(self.assistant_parts) - 1
|
|
74
93
|
return
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
|
|
95
|
+
existing = self.assistant_parts[part_index]
|
|
96
|
+
if not isinstance(existing, message.ToolCallPart):
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
if call_id and not existing.call_id:
|
|
100
|
+
existing.call_id = call_id
|
|
101
|
+
if name and not existing.tool_name:
|
|
102
|
+
existing.tool_name = normalize_tool_name(name)
|
|
103
|
+
if arguments:
|
|
104
|
+
existing.arguments_json += arguments
|
|
105
|
+
|
|
106
|
+
def mark_tool_start_emitted(self, tc_index: int) -> bool:
|
|
107
|
+
"""Return True if this is the first time we emit ToolCallStartDelta for this index."""
|
|
108
|
+
if tc_index in self._emitted_tool_start_indices:
|
|
109
|
+
return False
|
|
110
|
+
self._emitted_tool_start_indices.add(tc_index)
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def next_image_index(self) -> int:
|
|
114
|
+
return self._image_index
|
|
115
|
+
|
|
116
|
+
def get_partial_parts(self) -> list[message.Part]:
|
|
117
|
+
"""Get accumulated parts excluding tool calls, with thinking degraded.
|
|
118
|
+
|
|
119
|
+
Filters out ToolCallPart and applies degrade_thinking_to_text.
|
|
120
|
+
"""
|
|
121
|
+
return build_partial_parts(self.assistant_parts)
|
|
97
122
|
|
|
98
123
|
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
99
124
|
"""Build a partial AssistantMessage from accumulated state.
|
|
100
125
|
|
|
101
|
-
|
|
102
|
-
|
|
126
|
+
Filters out tool calls and degrades thinking content for safety.
|
|
127
|
+
Returns None if no content has been accumulated.
|
|
103
128
|
"""
|
|
104
|
-
self.
|
|
105
|
-
self.flush_assistant()
|
|
106
|
-
parts = degrade_thinking_to_text(list(self.parts))
|
|
107
|
-
if not parts:
|
|
108
|
-
return None
|
|
109
|
-
return message.AssistantMessage(
|
|
110
|
-
parts=parts,
|
|
111
|
-
response_id=self.response_id,
|
|
112
|
-
stop_reason="aborted",
|
|
113
|
-
)
|
|
129
|
+
return build_partial_message(self.assistant_parts, response_id=self.response_id)
|
|
114
130
|
|
|
115
131
|
|
|
116
132
|
@dataclass(slots=True)
|
|
@@ -148,7 +164,6 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
|
|
|
148
164
|
) -> None:
|
|
149
165
|
self._param_model = param_model
|
|
150
166
|
self._response_id = response_id
|
|
151
|
-
self._accumulated: list[str] = []
|
|
152
167
|
|
|
153
168
|
def set_response_id(self, response_id: str | None) -> None:
|
|
154
169
|
self._response_id = response_id
|
|
@@ -158,18 +173,10 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
|
|
|
158
173
|
if not reasoning_content:
|
|
159
174
|
return ReasoningDeltaResult(handled=False, outputs=[])
|
|
160
175
|
text = str(reasoning_content)
|
|
161
|
-
self._accumulated.append(text)
|
|
162
176
|
return ReasoningDeltaResult(handled=True, outputs=[text])
|
|
163
177
|
|
|
164
178
|
def flush(self) -> list[message.Part]:
|
|
165
|
-
|
|
166
|
-
return []
|
|
167
|
-
item = message.ThinkingTextPart(
|
|
168
|
-
text="".join(self._accumulated),
|
|
169
|
-
model_id=self._param_model,
|
|
170
|
-
)
|
|
171
|
-
self._accumulated = []
|
|
172
|
-
return [item]
|
|
179
|
+
return []
|
|
173
180
|
|
|
174
181
|
|
|
175
182
|
def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
@@ -254,26 +261,21 @@ async def parse_chat_completions_stream(
|
|
|
254
261
|
# Reasoning
|
|
255
262
|
reasoning_result = reasoning_handler.on_delta(delta)
|
|
256
263
|
if reasoning_result.handled:
|
|
257
|
-
state.stage = "reasoning"
|
|
258
264
|
for output in reasoning_result.outputs:
|
|
259
265
|
if isinstance(output, str):
|
|
260
266
|
if not output:
|
|
261
267
|
continue
|
|
262
268
|
metadata_tracker.record_token()
|
|
269
|
+
state.append_thinking_text(output)
|
|
263
270
|
yield message.ThinkingTextDelta(content=output, response_id=state.response_id)
|
|
264
271
|
else:
|
|
265
|
-
state.
|
|
272
|
+
state.assistant_parts.append(output)
|
|
266
273
|
|
|
267
274
|
# Assistant
|
|
268
275
|
images = getattr(delta, "images", None)
|
|
269
276
|
if isinstance(images, list) and images:
|
|
270
277
|
images_list = cast(list[object], images)
|
|
271
278
|
metadata_tracker.record_token()
|
|
272
|
-
if state.stage == "reasoning":
|
|
273
|
-
state.flush_reasoning()
|
|
274
|
-
elif state.stage == "tool":
|
|
275
|
-
state.flush_tool_calls()
|
|
276
|
-
state.stage = "assistant"
|
|
277
279
|
for image_obj in images_list:
|
|
278
280
|
url = _extract_image_url(image_obj)
|
|
279
281
|
if not url:
|
|
@@ -286,50 +288,59 @@ async def parse_chat_completions_stream(
|
|
|
286
288
|
data_url=url,
|
|
287
289
|
session_id=param.session_id,
|
|
288
290
|
response_id=state.response_id,
|
|
289
|
-
image_index=
|
|
291
|
+
image_index=state.next_image_index(),
|
|
290
292
|
)
|
|
291
293
|
except ValueError as exc:
|
|
292
294
|
yield message.StreamErrorItem(error=str(exc))
|
|
293
295
|
return
|
|
294
|
-
state.
|
|
296
|
+
state.append_image(assistant_image)
|
|
295
297
|
yield message.AssistantImageDelta(
|
|
296
298
|
response_id=state.response_id, file_path=assistant_image.file_path
|
|
297
299
|
)
|
|
298
300
|
|
|
299
|
-
if (content := getattr(delta, "content", None))
|
|
301
|
+
content_str = str(content) if (content := getattr(delta, "content", None)) is not None else ""
|
|
302
|
+
|
|
303
|
+
if content_str and (
|
|
304
|
+
(state.assistant_parts and isinstance(state.assistant_parts[-1], message.TextPart))
|
|
305
|
+
or content_str.strip()
|
|
306
|
+
):
|
|
300
307
|
metadata_tracker.record_token()
|
|
301
|
-
|
|
302
|
-
state.flush_reasoning()
|
|
303
|
-
elif state.stage == "tool":
|
|
304
|
-
state.flush_tool_calls()
|
|
305
|
-
state.stage = "assistant"
|
|
306
|
-
state.accumulated_content.append(str(content))
|
|
308
|
+
state.append_text(content_str)
|
|
307
309
|
yield message.AssistantTextDelta(
|
|
308
|
-
content=
|
|
310
|
+
content=content_str,
|
|
309
311
|
response_id=state.response_id,
|
|
310
312
|
)
|
|
311
313
|
|
|
312
314
|
# Tool
|
|
313
315
|
if (tool_calls := getattr(delta, "tool_calls", None)) and len(tool_calls) > 0:
|
|
314
316
|
metadata_tracker.record_token()
|
|
315
|
-
if state.stage == "reasoning":
|
|
316
|
-
state.flush_reasoning()
|
|
317
|
-
elif state.stage == "assistant":
|
|
318
|
-
state.flush_assistant()
|
|
319
|
-
state.stage = "tool"
|
|
320
317
|
for tc in tool_calls:
|
|
321
|
-
|
|
322
|
-
|
|
318
|
+
tc_index = getattr(tc, "index", None)
|
|
319
|
+
if not isinstance(tc_index, int):
|
|
320
|
+
continue
|
|
321
|
+
fn = getattr(tc, "function", None)
|
|
322
|
+
fn_name = getattr(fn, "name", None) if fn is not None else None
|
|
323
|
+
fn_args = getattr(fn, "arguments", None) if fn is not None else None
|
|
324
|
+
tc_id = getattr(tc, "id", None)
|
|
325
|
+
|
|
326
|
+
if fn_name and state.mark_tool_start_emitted(tc_index):
|
|
323
327
|
yield message.ToolCallStartDelta(
|
|
324
328
|
response_id=state.response_id,
|
|
325
|
-
call_id=
|
|
326
|
-
name=
|
|
329
|
+
call_id=str(tc_id or ""),
|
|
330
|
+
name=str(fn_name),
|
|
327
331
|
)
|
|
328
|
-
|
|
332
|
+
state.upsert_tool_call(
|
|
333
|
+
tc_index=tc_index,
|
|
334
|
+
call_id=str(tc_id) if isinstance(tc_id, str) else None,
|
|
335
|
+
name=str(fn_name) if isinstance(fn_name, str) else None,
|
|
336
|
+
arguments=str(fn_args) if isinstance(fn_args, str) else None,
|
|
337
|
+
)
|
|
329
338
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
330
339
|
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
340
|
+
state.stop_reason = "error"
|
|
331
341
|
|
|
332
|
-
parts
|
|
342
|
+
# On error, use partial parts (excluding incomplete tool calls) for potential prefill on retry
|
|
343
|
+
parts = state.get_partial_parts() if state.stop_reason == "error" else list(state.assistant_parts)
|
|
333
344
|
if parts:
|
|
334
345
|
metadata_tracker.record_token()
|
|
335
346
|
metadata_tracker.set_response_id(state.response_id)
|
|
@@ -361,7 +372,6 @@ class OpenAILLMStream(LLMStreamABC):
|
|
|
361
372
|
self._on_event = on_event
|
|
362
373
|
self._state = StreamStateManager(
|
|
363
374
|
param_model=str(param.model_id),
|
|
364
|
-
reasoning_flusher=reasoning_handler.flush,
|
|
365
375
|
)
|
|
366
376
|
self._completed = False
|
|
367
377
|
|
|
@@ -30,7 +30,6 @@ class ReasoningStreamHandler(ReasoningHandlerABC):
|
|
|
30
30
|
self._response_id = response_id
|
|
31
31
|
|
|
32
32
|
self._reasoning_id: str | None = None
|
|
33
|
-
self._accumulated_reasoning: list[str] = []
|
|
34
33
|
|
|
35
34
|
def set_response_id(self, response_id: str | None) -> None:
|
|
36
35
|
"""Update the response identifier used for emitted items."""
|
|
@@ -62,44 +61,20 @@ class ReasoningStreamHandler(ReasoningHandlerABC):
|
|
|
62
61
|
|
|
63
62
|
if detail.type == "reasoning.encrypted":
|
|
64
63
|
self._reasoning_id = detail.id
|
|
65
|
-
# Flush accumulated text before encrypted content
|
|
66
|
-
items.extend(self._flush_text())
|
|
67
64
|
if signature_part := self._build_signature_part(detail.data, detail):
|
|
68
65
|
items.append(signature_part)
|
|
69
66
|
return items
|
|
70
67
|
|
|
71
68
|
if detail.type in ("reasoning.text", "reasoning.summary"):
|
|
72
69
|
self._reasoning_id = detail.id
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
self._accumulated_reasoning.append(text)
|
|
77
|
-
# Flush on signature (encrypted content)
|
|
78
|
-
if detail.signature:
|
|
79
|
-
items.extend(self._flush_text())
|
|
80
|
-
if signature_part := self._build_signature_part(detail.signature, detail):
|
|
81
|
-
items.append(signature_part)
|
|
70
|
+
# Signature (Anthropic-style) can arrive alongside text/summary.
|
|
71
|
+
if detail.signature and (signature_part := self._build_signature_part(detail.signature, detail)):
|
|
72
|
+
items.append(signature_part)
|
|
82
73
|
|
|
83
74
|
return items
|
|
84
75
|
|
|
85
76
|
def flush(self) -> list[message.Part]:
|
|
86
|
-
|
|
87
|
-
return self._flush_text()
|
|
88
|
-
|
|
89
|
-
def _flush_text(self) -> list[message.Part]:
|
|
90
|
-
"""Flush accumulated reasoning text as a single part."""
|
|
91
|
-
if not self._accumulated_reasoning:
|
|
92
|
-
return []
|
|
93
|
-
item = self._build_text_part("".join(self._accumulated_reasoning))
|
|
94
|
-
self._accumulated_reasoning = []
|
|
95
|
-
return [item]
|
|
96
|
-
|
|
97
|
-
def _build_text_part(self, content: str) -> message.ThinkingTextPart:
|
|
98
|
-
return message.ThinkingTextPart(
|
|
99
|
-
id=self._reasoning_id,
|
|
100
|
-
text=content,
|
|
101
|
-
model_id=self._param_model,
|
|
102
|
-
)
|
|
77
|
+
return []
|
|
103
78
|
|
|
104
79
|
def _build_signature_part(
|
|
105
80
|
self,
|
|
@@ -1,35 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from klaude_code.
|
|
3
|
+
from klaude_code.llm.stream_parts import degrade_thinking_to_text
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
def degrade_thinking_to_text(parts: list[message.Part]) -> list[message.Part]:
|
|
7
|
-
"""Degrade thinking parts into a regular TextPart.
|
|
8
|
-
|
|
9
|
-
Some providers require thinking signatures/encrypted content to be echoed back
|
|
10
|
-
for subsequent calls. During interruption we cannot reliably determine whether
|
|
11
|
-
we have a complete signature, so we persist thinking as plain text instead.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
thinking_texts: list[str] = []
|
|
15
|
-
non_thinking_parts: list[message.Part] = []
|
|
16
|
-
|
|
17
|
-
for part in parts:
|
|
18
|
-
if isinstance(part, message.ThinkingTextPart):
|
|
19
|
-
text = part.text
|
|
20
|
-
if text and text.strip():
|
|
21
|
-
thinking_texts.append(text)
|
|
22
|
-
continue
|
|
23
|
-
if isinstance(part, message.ThinkingSignaturePart):
|
|
24
|
-
continue
|
|
25
|
-
non_thinking_parts.append(part)
|
|
26
|
-
|
|
27
|
-
if not thinking_texts:
|
|
28
|
-
return non_thinking_parts
|
|
29
|
-
|
|
30
|
-
joined = "\n".join(thinking_texts).strip()
|
|
31
|
-
thinking_block = f"<thinking>\n{joined}\n</thinking>"
|
|
32
|
-
if non_thinking_parts:
|
|
33
|
-
thinking_block += "\n\n"
|
|
34
|
-
|
|
35
|
-
return [message.TextPart(text=thinking_block), *non_thinking_parts]
|
|
5
|
+
__all__ = ["degrade_thinking_to_text"]
|