klaude-code 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/__init__.py +10 -0
- klaude_code/auth/env.py +77 -0
- klaude_code/cli/auth_cmd.py +89 -21
- klaude_code/cli/config_cmd.py +5 -5
- klaude_code/cli/cost_cmd.py +167 -68
- klaude_code/cli/main.py +51 -27
- klaude_code/cli/self_update.py +7 -7
- klaude_code/config/assets/builtin_config.yaml +45 -24
- klaude_code/config/builtin_config.py +23 -9
- klaude_code/config/config.py +19 -9
- klaude_code/config/model_matcher.py +1 -1
- klaude_code/const.py +2 -1
- klaude_code/core/tool/file/edit_tool.py +1 -1
- klaude_code/core/tool/file/read_tool.py +2 -2
- klaude_code/core/tool/file/write_tool.py +1 -1
- klaude_code/core/turn.py +21 -4
- klaude_code/llm/anthropic/client.py +75 -50
- klaude_code/llm/anthropic/input.py +20 -9
- klaude_code/llm/google/client.py +235 -148
- klaude_code/llm/google/input.py +44 -36
- klaude_code/llm/openai_compatible/stream.py +114 -100
- klaude_code/llm/openrouter/client.py +1 -0
- klaude_code/llm/openrouter/reasoning.py +4 -29
- klaude_code/llm/partial_message.py +2 -32
- klaude_code/llm/responses/client.py +99 -81
- klaude_code/llm/responses/input.py +11 -25
- klaude_code/llm/stream_parts.py +94 -0
- klaude_code/log.py +57 -0
- klaude_code/protocol/events.py +214 -0
- klaude_code/protocol/sub_agent/image_gen.py +0 -4
- klaude_code/session/session.py +51 -18
- klaude_code/tui/command/fork_session_cmd.py +14 -23
- klaude_code/tui/command/model_picker.py +2 -17
- klaude_code/tui/command/resume_cmd.py +2 -18
- klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
- klaude_code/tui/command/thinking_cmd.py +2 -14
- klaude_code/tui/commands.py +0 -5
- klaude_code/tui/components/common.py +1 -1
- klaude_code/tui/components/metadata.py +21 -21
- klaude_code/tui/components/rich/quote.py +36 -8
- klaude_code/tui/components/rich/theme.py +2 -0
- klaude_code/tui/components/sub_agent.py +6 -0
- klaude_code/tui/display.py +11 -1
- klaude_code/tui/input/completers.py +11 -7
- klaude_code/tui/input/prompt_toolkit.py +3 -1
- klaude_code/tui/machine.py +108 -56
- klaude_code/tui/renderer.py +4 -65
- klaude_code/tui/terminal/selector.py +174 -31
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/METADATA +23 -31
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/RECORD +52 -58
- klaude_code/cli/session_cmd.py +0 -96
- klaude_code/protocol/events/__init__.py +0 -63
- klaude_code/protocol/events/base.py +0 -18
- klaude_code/protocol/events/chat.py +0 -30
- klaude_code/protocol/events/lifecycle.py +0 -23
- klaude_code/protocol/events/metadata.py +0 -16
- klaude_code/protocol/events/streaming.py +0 -43
- klaude_code/protocol/events/system.py +0 -56
- klaude_code/protocol/events/tools.py +0 -27
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/WHEEL +0 -0
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/entry_points.txt +0 -0
klaude_code/llm/google/input.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Any
|
|
|
11
11
|
from google.genai import types
|
|
12
12
|
|
|
13
13
|
from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
|
|
14
|
-
from klaude_code.llm.image import parse_data_url
|
|
14
|
+
from klaude_code.llm.image import assistant_image_to_data_url, parse_data_url
|
|
15
15
|
from klaude_code.llm.input_common import (
|
|
16
16
|
DeveloperAttachment,
|
|
17
17
|
attach_developer_messages,
|
|
@@ -108,51 +108,49 @@ def _tool_messages_to_contents(
|
|
|
108
108
|
return contents
|
|
109
109
|
|
|
110
110
|
|
|
111
|
+
def _decode_thought_signature(sig: str | None) -> bytes | None:
|
|
112
|
+
"""Decode base64 thought signature to bytes."""
|
|
113
|
+
if not sig:
|
|
114
|
+
return None
|
|
115
|
+
try:
|
|
116
|
+
return b64decode(sig)
|
|
117
|
+
except (BinasciiError, ValueError):
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
111
121
|
def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str | None) -> types.Content | None:
|
|
112
122
|
parts: list[types.Part] = []
|
|
113
123
|
native_thinking_parts, degraded_thinking_texts = split_thinking_parts(msg, model_name)
|
|
114
124
|
native_thinking_ids = {id(part) for part in native_thinking_parts}
|
|
115
|
-
pending_thought_text: str | None = None
|
|
116
|
-
pending_thought_signature: str | None = None
|
|
117
|
-
|
|
118
|
-
def flush_thought() -> None:
|
|
119
|
-
nonlocal pending_thought_text, pending_thought_signature
|
|
120
|
-
if pending_thought_text is None and pending_thought_signature is None:
|
|
121
|
-
return
|
|
122
|
-
|
|
123
|
-
signature_bytes: bytes | None = None
|
|
124
|
-
if pending_thought_signature:
|
|
125
|
-
try:
|
|
126
|
-
signature_bytes = b64decode(pending_thought_signature)
|
|
127
|
-
except (BinasciiError, ValueError):
|
|
128
|
-
signature_bytes = None
|
|
129
|
-
|
|
130
|
-
parts.append(
|
|
131
|
-
types.Part(
|
|
132
|
-
text=pending_thought_text or "",
|
|
133
|
-
thought=True,
|
|
134
|
-
thought_signature=signature_bytes,
|
|
135
|
-
)
|
|
136
|
-
)
|
|
137
|
-
pending_thought_text = None
|
|
138
|
-
pending_thought_signature = None
|
|
139
125
|
|
|
140
126
|
for part in msg.parts:
|
|
141
127
|
if isinstance(part, message.ThinkingTextPart):
|
|
142
128
|
if id(part) not in native_thinking_ids:
|
|
143
129
|
continue
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
130
|
+
parts.append(types.Part(text=part.text, thought=True))
|
|
131
|
+
|
|
132
|
+
elif isinstance(part, message.ThinkingSignaturePart):
|
|
147
133
|
if id(part) not in native_thinking_ids:
|
|
148
134
|
continue
|
|
149
|
-
if part.signature
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
135
|
+
if not part.signature or part.format != "google":
|
|
136
|
+
continue
|
|
137
|
+
# Attach signature to the previous part
|
|
138
|
+
if parts:
|
|
139
|
+
sig_bytes = _decode_thought_signature(part.signature)
|
|
140
|
+
if sig_bytes:
|
|
141
|
+
last_part = parts[-1]
|
|
142
|
+
parts[-1] = types.Part(
|
|
143
|
+
text=last_part.text,
|
|
144
|
+
thought=last_part.thought,
|
|
145
|
+
function_call=last_part.function_call,
|
|
146
|
+
inline_data=last_part.inline_data,
|
|
147
|
+
file_data=last_part.file_data,
|
|
148
|
+
thought_signature=sig_bytes,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
elif isinstance(part, message.TextPart):
|
|
155
152
|
parts.append(types.Part(text=part.text))
|
|
153
|
+
|
|
156
154
|
elif isinstance(part, message.ToolCallPart):
|
|
157
155
|
args: dict[str, Any]
|
|
158
156
|
if part.arguments_json:
|
|
@@ -162,9 +160,19 @@ def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str
|
|
|
162
160
|
args = {"_raw": part.arguments_json}
|
|
163
161
|
else:
|
|
164
162
|
args = {}
|
|
165
|
-
parts.append(
|
|
163
|
+
parts.append(
|
|
164
|
+
types.Part(
|
|
165
|
+
function_call=types.FunctionCall(id=part.call_id, name=part.tool_name, args=args),
|
|
166
|
+
)
|
|
167
|
+
)
|
|
166
168
|
|
|
167
|
-
|
|
169
|
+
elif isinstance(part, message.ImageFilePart):
|
|
170
|
+
# Convert saved image back to inline_data for multi-turn
|
|
171
|
+
try:
|
|
172
|
+
data_url = assistant_image_to_data_url(part)
|
|
173
|
+
parts.append(_image_part_to_part(message.ImageURLPart(url=data_url)))
|
|
174
|
+
except (ValueError, FileNotFoundError):
|
|
175
|
+
pass # Skip if image cannot be loaded
|
|
168
176
|
|
|
169
177
|
if degraded_thinking_texts:
|
|
170
178
|
parts.insert(0, types.Part(text="<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>"))
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
This module provides reusable primitives for OpenAI-compatible providers:
|
|
4
4
|
|
|
5
|
-
- ``StreamStateManager``: accumulates assistant
|
|
6
|
-
- ``ReasoningHandlerABC``: provider-specific reasoning extraction
|
|
5
|
+
- ``StreamStateManager``: accumulates assistant parts in stream order.
|
|
6
|
+
- ``ReasoningHandlerABC``: provider-specific reasoning extraction.
|
|
7
7
|
- ``OpenAILLMStream``: LLMStream implementation for OpenAI-compatible clients.
|
|
8
8
|
|
|
9
9
|
OpenRouter uses the same OpenAI Chat Completions API surface but differs in
|
|
@@ -15,7 +15,7 @@ from __future__ import annotations
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
from collections.abc import AsyncGenerator, Callable
|
|
17
17
|
from dataclasses import dataclass
|
|
18
|
-
from typing import Any,
|
|
18
|
+
from typing import Any, cast
|
|
19
19
|
|
|
20
20
|
import httpx
|
|
21
21
|
import openai
|
|
@@ -26,91 +26,107 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
|
26
26
|
|
|
27
27
|
from klaude_code.llm.client import LLMStreamABC
|
|
28
28
|
from klaude_code.llm.image import save_assistant_image
|
|
29
|
-
from klaude_code.llm.openai_compatible.tool_call_accumulator import
|
|
30
|
-
from klaude_code.llm.
|
|
29
|
+
from klaude_code.llm.openai_compatible.tool_call_accumulator import normalize_tool_name
|
|
30
|
+
from klaude_code.llm.stream_parts import (
|
|
31
|
+
append_text_part,
|
|
32
|
+
append_thinking_text_part,
|
|
33
|
+
build_partial_message,
|
|
34
|
+
build_partial_parts,
|
|
35
|
+
)
|
|
31
36
|
from klaude_code.llm.usage import MetadataTracker, convert_usage
|
|
32
37
|
from klaude_code.protocol import llm_param, message, model
|
|
33
38
|
|
|
34
|
-
StreamStage = Literal["waiting", "reasoning", "assistant", "tool"]
|
|
35
|
-
|
|
36
39
|
|
|
37
40
|
class StreamStateManager:
|
|
38
|
-
"""Manages streaming state and
|
|
41
|
+
"""Manages streaming state and accumulates parts in stream order.
|
|
39
42
|
|
|
40
|
-
|
|
41
|
-
|
|
43
|
+
The persisted AssistantMessage is built directly from ``assistant_parts``.
|
|
44
|
+
``get_partial_message()`` returns a best-effort message on cancellation.
|
|
42
45
|
"""
|
|
43
46
|
|
|
44
47
|
def __init__(
|
|
45
48
|
self,
|
|
46
49
|
param_model: str,
|
|
47
50
|
response_id: str | None = None,
|
|
48
|
-
reasoning_flusher: Callable[[], list[message.Part]] | None = None,
|
|
49
51
|
):
|
|
50
52
|
self.param_model = param_model
|
|
51
53
|
self.response_id = response_id
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.
|
|
56
|
-
self.emitted_tool_start_indices: set[int] = set()
|
|
57
|
-
self._reasoning_flusher = reasoning_flusher
|
|
58
|
-
self.parts: list[message.Part] = []
|
|
54
|
+
self.assistant_parts: list[message.Part] = []
|
|
55
|
+
self._image_index: int = 0
|
|
56
|
+
self._tool_part_index_by_tc_index: dict[int, int] = {}
|
|
57
|
+
self._emitted_tool_start_indices: set[int] = set()
|
|
59
58
|
self.stop_reason: model.StopReason | None = None
|
|
60
59
|
|
|
61
60
|
def set_response_id(self, response_id: str) -> None:
|
|
62
61
|
"""Set the response ID once received from the stream."""
|
|
63
62
|
self.response_id = response_id
|
|
64
|
-
self.accumulated_tool_calls.set_response_id(response_id)
|
|
65
63
|
|
|
66
|
-
def
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
|
|
64
|
+
def append_thinking_text(self, text: str) -> None:
|
|
65
|
+
"""Append thinking text, merging with the previous ThinkingTextPart when possible."""
|
|
66
|
+
append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
|
|
67
|
+
|
|
68
|
+
def append_text(self, text: str) -> None:
|
|
69
|
+
"""Append assistant text, merging with the previous TextPart when possible."""
|
|
70
|
+
append_text_part(self.assistant_parts, text)
|
|
71
|
+
|
|
72
|
+
def append_image(self, image_part: message.ImageFilePart) -> None:
|
|
73
|
+
self.assistant_parts.append(image_part)
|
|
74
|
+
self._image_index += 1
|
|
75
|
+
|
|
76
|
+
def upsert_tool_call(self, *, tc_index: int, call_id: str | None, name: str | None, arguments: str | None) -> None:
|
|
77
|
+
"""Insert a ToolCallPart at first sight and keep updating its fields.
|
|
78
|
+
|
|
79
|
+
Chat Completions streams tool call fields incrementally (name/id first,
|
|
80
|
+
then argument fragments). We keep the ToolCallPart in-place to preserve
|
|
81
|
+
stream order in the persisted AssistantMessage.
|
|
82
|
+
"""
|
|
70
83
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
84
|
+
part_index = self._tool_part_index_by_tc_index.get(tc_index)
|
|
85
|
+
if part_index is None:
|
|
86
|
+
tool_part = message.ToolCallPart(
|
|
87
|
+
call_id=call_id or "",
|
|
88
|
+
tool_name=normalize_tool_name(name or ""),
|
|
89
|
+
arguments_json=arguments or "",
|
|
90
|
+
)
|
|
91
|
+
self.assistant_parts.append(tool_part)
|
|
92
|
+
self._tool_part_index_by_tc_index[tc_index] = len(self.assistant_parts) - 1
|
|
74
93
|
return
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
|
|
95
|
+
existing = self.assistant_parts[part_index]
|
|
96
|
+
if not isinstance(existing, message.ToolCallPart):
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
if call_id and not existing.call_id:
|
|
100
|
+
existing.call_id = call_id
|
|
101
|
+
if name and not existing.tool_name:
|
|
102
|
+
existing.tool_name = normalize_tool_name(name)
|
|
103
|
+
if arguments:
|
|
104
|
+
existing.arguments_json += arguments
|
|
105
|
+
|
|
106
|
+
def mark_tool_start_emitted(self, tc_index: int) -> bool:
|
|
107
|
+
"""Return True if this is the first time we emit ToolCallStartDelta for this index."""
|
|
108
|
+
if tc_index in self._emitted_tool_start_indices:
|
|
109
|
+
return False
|
|
110
|
+
self._emitted_tool_start_indices.add(tc_index)
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def next_image_index(self) -> int:
|
|
114
|
+
return self._image_index
|
|
115
|
+
|
|
116
|
+
def get_partial_parts(self) -> list[message.Part]:
|
|
117
|
+
"""Get accumulated parts excluding tool calls, with thinking degraded.
|
|
118
|
+
|
|
119
|
+
Filters out ToolCallPart and applies degrade_thinking_to_text.
|
|
120
|
+
"""
|
|
121
|
+
return build_partial_parts(self.assistant_parts)
|
|
97
122
|
|
|
98
123
|
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
99
124
|
"""Build a partial AssistantMessage from accumulated state.
|
|
100
125
|
|
|
101
|
-
|
|
102
|
-
|
|
126
|
+
Filters out tool calls and degrades thinking content for safety.
|
|
127
|
+
Returns None if no content has been accumulated.
|
|
103
128
|
"""
|
|
104
|
-
self.
|
|
105
|
-
self.flush_assistant()
|
|
106
|
-
parts = degrade_thinking_to_text(list(self.parts))
|
|
107
|
-
if not parts:
|
|
108
|
-
return None
|
|
109
|
-
return message.AssistantMessage(
|
|
110
|
-
parts=parts,
|
|
111
|
-
response_id=self.response_id,
|
|
112
|
-
stop_reason="aborted",
|
|
113
|
-
)
|
|
129
|
+
return build_partial_message(self.assistant_parts, response_id=self.response_id)
|
|
114
130
|
|
|
115
131
|
|
|
116
132
|
@dataclass(slots=True)
|
|
@@ -148,7 +164,6 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
|
|
|
148
164
|
) -> None:
|
|
149
165
|
self._param_model = param_model
|
|
150
166
|
self._response_id = response_id
|
|
151
|
-
self._accumulated: list[str] = []
|
|
152
167
|
|
|
153
168
|
def set_response_id(self, response_id: str | None) -> None:
|
|
154
169
|
self._response_id = response_id
|
|
@@ -158,18 +173,10 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
|
|
|
158
173
|
if not reasoning_content:
|
|
159
174
|
return ReasoningDeltaResult(handled=False, outputs=[])
|
|
160
175
|
text = str(reasoning_content)
|
|
161
|
-
self._accumulated.append(text)
|
|
162
176
|
return ReasoningDeltaResult(handled=True, outputs=[text])
|
|
163
177
|
|
|
164
178
|
def flush(self) -> list[message.Part]:
|
|
165
|
-
|
|
166
|
-
return []
|
|
167
|
-
item = message.ThinkingTextPart(
|
|
168
|
-
text="".join(self._accumulated),
|
|
169
|
-
model_id=self._param_model,
|
|
170
|
-
)
|
|
171
|
-
self._accumulated = []
|
|
172
|
-
return [item]
|
|
179
|
+
return []
|
|
173
180
|
|
|
174
181
|
|
|
175
182
|
def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
@@ -192,6 +199,7 @@ async def parse_chat_completions_stream(
|
|
|
192
199
|
metadata_tracker: MetadataTracker,
|
|
193
200
|
reasoning_handler: ReasoningHandlerABC,
|
|
194
201
|
on_event: Callable[[object], None] | None = None,
|
|
202
|
+
provider_prefix: str = "",
|
|
195
203
|
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
196
204
|
"""Parse OpenAI Chat Completions stream into stream items.
|
|
197
205
|
|
|
@@ -228,7 +236,7 @@ async def parse_chat_completions_stream(
|
|
|
228
236
|
if event_model := getattr(event, "model", None):
|
|
229
237
|
metadata_tracker.set_model_name(str(event_model))
|
|
230
238
|
if provider := getattr(event, "provider", None):
|
|
231
|
-
metadata_tracker.set_provider(
|
|
239
|
+
metadata_tracker.set_provider(f"{provider_prefix}{provider}")
|
|
232
240
|
|
|
233
241
|
choices = cast(Any, getattr(event, "choices", None))
|
|
234
242
|
if not choices:
|
|
@@ -254,26 +262,21 @@ async def parse_chat_completions_stream(
|
|
|
254
262
|
# Reasoning
|
|
255
263
|
reasoning_result = reasoning_handler.on_delta(delta)
|
|
256
264
|
if reasoning_result.handled:
|
|
257
|
-
state.stage = "reasoning"
|
|
258
265
|
for output in reasoning_result.outputs:
|
|
259
266
|
if isinstance(output, str):
|
|
260
267
|
if not output:
|
|
261
268
|
continue
|
|
262
269
|
metadata_tracker.record_token()
|
|
270
|
+
state.append_thinking_text(output)
|
|
263
271
|
yield message.ThinkingTextDelta(content=output, response_id=state.response_id)
|
|
264
272
|
else:
|
|
265
|
-
state.
|
|
273
|
+
state.assistant_parts.append(output)
|
|
266
274
|
|
|
267
275
|
# Assistant
|
|
268
276
|
images = getattr(delta, "images", None)
|
|
269
277
|
if isinstance(images, list) and images:
|
|
270
278
|
images_list = cast(list[object], images)
|
|
271
279
|
metadata_tracker.record_token()
|
|
272
|
-
if state.stage == "reasoning":
|
|
273
|
-
state.flush_reasoning()
|
|
274
|
-
elif state.stage == "tool":
|
|
275
|
-
state.flush_tool_calls()
|
|
276
|
-
state.stage = "assistant"
|
|
277
280
|
for image_obj in images_list:
|
|
278
281
|
url = _extract_image_url(image_obj)
|
|
279
282
|
if not url:
|
|
@@ -286,50 +289,59 @@ async def parse_chat_completions_stream(
|
|
|
286
289
|
data_url=url,
|
|
287
290
|
session_id=param.session_id,
|
|
288
291
|
response_id=state.response_id,
|
|
289
|
-
image_index=
|
|
292
|
+
image_index=state.next_image_index(),
|
|
290
293
|
)
|
|
291
294
|
except ValueError as exc:
|
|
292
295
|
yield message.StreamErrorItem(error=str(exc))
|
|
293
296
|
return
|
|
294
|
-
state.
|
|
297
|
+
state.append_image(assistant_image)
|
|
295
298
|
yield message.AssistantImageDelta(
|
|
296
299
|
response_id=state.response_id, file_path=assistant_image.file_path
|
|
297
300
|
)
|
|
298
301
|
|
|
299
|
-
if (content := getattr(delta, "content", None))
|
|
302
|
+
content_str = str(content) if (content := getattr(delta, "content", None)) is not None else ""
|
|
303
|
+
|
|
304
|
+
if content_str and (
|
|
305
|
+
(state.assistant_parts and isinstance(state.assistant_parts[-1], message.TextPart))
|
|
306
|
+
or content_str.strip()
|
|
307
|
+
):
|
|
300
308
|
metadata_tracker.record_token()
|
|
301
|
-
|
|
302
|
-
state.flush_reasoning()
|
|
303
|
-
elif state.stage == "tool":
|
|
304
|
-
state.flush_tool_calls()
|
|
305
|
-
state.stage = "assistant"
|
|
306
|
-
state.accumulated_content.append(str(content))
|
|
309
|
+
state.append_text(content_str)
|
|
307
310
|
yield message.AssistantTextDelta(
|
|
308
|
-
content=
|
|
311
|
+
content=content_str,
|
|
309
312
|
response_id=state.response_id,
|
|
310
313
|
)
|
|
311
314
|
|
|
312
315
|
# Tool
|
|
313
316
|
if (tool_calls := getattr(delta, "tool_calls", None)) and len(tool_calls) > 0:
|
|
314
317
|
metadata_tracker.record_token()
|
|
315
|
-
if state.stage == "reasoning":
|
|
316
|
-
state.flush_reasoning()
|
|
317
|
-
elif state.stage == "assistant":
|
|
318
|
-
state.flush_assistant()
|
|
319
|
-
state.stage = "tool"
|
|
320
318
|
for tc in tool_calls:
|
|
321
|
-
|
|
322
|
-
|
|
319
|
+
tc_index = getattr(tc, "index", None)
|
|
320
|
+
if not isinstance(tc_index, int):
|
|
321
|
+
continue
|
|
322
|
+
fn = getattr(tc, "function", None)
|
|
323
|
+
fn_name = getattr(fn, "name", None) if fn is not None else None
|
|
324
|
+
fn_args = getattr(fn, "arguments", None) if fn is not None else None
|
|
325
|
+
tc_id = getattr(tc, "id", None)
|
|
326
|
+
|
|
327
|
+
if fn_name and state.mark_tool_start_emitted(tc_index):
|
|
323
328
|
yield message.ToolCallStartDelta(
|
|
324
329
|
response_id=state.response_id,
|
|
325
|
-
call_id=
|
|
326
|
-
name=
|
|
330
|
+
call_id=str(tc_id or ""),
|
|
331
|
+
name=str(fn_name),
|
|
327
332
|
)
|
|
328
|
-
|
|
333
|
+
state.upsert_tool_call(
|
|
334
|
+
tc_index=tc_index,
|
|
335
|
+
call_id=str(tc_id) if isinstance(tc_id, str) else None,
|
|
336
|
+
name=str(fn_name) if isinstance(fn_name, str) else None,
|
|
337
|
+
arguments=str(fn_args) if isinstance(fn_args, str) else None,
|
|
338
|
+
)
|
|
329
339
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
330
340
|
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
341
|
+
state.stop_reason = "error"
|
|
331
342
|
|
|
332
|
-
parts
|
|
343
|
+
# On error, use partial parts (excluding incomplete tool calls) for potential prefill on retry
|
|
344
|
+
parts = state.get_partial_parts() if state.stop_reason == "error" else list(state.assistant_parts)
|
|
333
345
|
if parts:
|
|
334
346
|
metadata_tracker.record_token()
|
|
335
347
|
metadata_tracker.set_response_id(state.response_id)
|
|
@@ -353,15 +365,16 @@ class OpenAILLMStream(LLMStreamABC):
|
|
|
353
365
|
metadata_tracker: MetadataTracker,
|
|
354
366
|
reasoning_handler: ReasoningHandlerABC,
|
|
355
367
|
on_event: Callable[[object], None] | None = None,
|
|
368
|
+
provider_prefix: str = "",
|
|
356
369
|
) -> None:
|
|
357
370
|
self._stream = stream
|
|
358
371
|
self._param = param
|
|
359
372
|
self._metadata_tracker = metadata_tracker
|
|
360
373
|
self._reasoning_handler = reasoning_handler
|
|
361
374
|
self._on_event = on_event
|
|
375
|
+
self._provider_prefix = provider_prefix
|
|
362
376
|
self._state = StreamStateManager(
|
|
363
377
|
param_model=str(param.model_id),
|
|
364
|
-
reasoning_flusher=reasoning_handler.flush,
|
|
365
378
|
)
|
|
366
379
|
self._completed = False
|
|
367
380
|
|
|
@@ -376,6 +389,7 @@ class OpenAILLMStream(LLMStreamABC):
|
|
|
376
389
|
metadata_tracker=self._metadata_tracker,
|
|
377
390
|
reasoning_handler=self._reasoning_handler,
|
|
378
391
|
on_event=self._on_event,
|
|
392
|
+
provider_prefix=self._provider_prefix,
|
|
379
393
|
):
|
|
380
394
|
if isinstance(item, message.AssistantMessage):
|
|
381
395
|
self._completed = True
|
|
@@ -30,7 +30,6 @@ class ReasoningStreamHandler(ReasoningHandlerABC):
|
|
|
30
30
|
self._response_id = response_id
|
|
31
31
|
|
|
32
32
|
self._reasoning_id: str | None = None
|
|
33
|
-
self._accumulated_reasoning: list[str] = []
|
|
34
33
|
|
|
35
34
|
def set_response_id(self, response_id: str | None) -> None:
|
|
36
35
|
"""Update the response identifier used for emitted items."""
|
|
@@ -62,44 +61,20 @@ class ReasoningStreamHandler(ReasoningHandlerABC):
|
|
|
62
61
|
|
|
63
62
|
if detail.type == "reasoning.encrypted":
|
|
64
63
|
self._reasoning_id = detail.id
|
|
65
|
-
# Flush accumulated text before encrypted content
|
|
66
|
-
items.extend(self._flush_text())
|
|
67
64
|
if signature_part := self._build_signature_part(detail.data, detail):
|
|
68
65
|
items.append(signature_part)
|
|
69
66
|
return items
|
|
70
67
|
|
|
71
68
|
if detail.type in ("reasoning.text", "reasoning.summary"):
|
|
72
69
|
self._reasoning_id = detail.id
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
self._accumulated_reasoning.append(text)
|
|
77
|
-
# Flush on signature (encrypted content)
|
|
78
|
-
if detail.signature:
|
|
79
|
-
items.extend(self._flush_text())
|
|
80
|
-
if signature_part := self._build_signature_part(detail.signature, detail):
|
|
81
|
-
items.append(signature_part)
|
|
70
|
+
# Signature (Anthropic-style) can arrive alongside text/summary.
|
|
71
|
+
if detail.signature and (signature_part := self._build_signature_part(detail.signature, detail)):
|
|
72
|
+
items.append(signature_part)
|
|
82
73
|
|
|
83
74
|
return items
|
|
84
75
|
|
|
85
76
|
def flush(self) -> list[message.Part]:
|
|
86
|
-
|
|
87
|
-
return self._flush_text()
|
|
88
|
-
|
|
89
|
-
def _flush_text(self) -> list[message.Part]:
|
|
90
|
-
"""Flush accumulated reasoning text as a single part."""
|
|
91
|
-
if not self._accumulated_reasoning:
|
|
92
|
-
return []
|
|
93
|
-
item = self._build_text_part("".join(self._accumulated_reasoning))
|
|
94
|
-
self._accumulated_reasoning = []
|
|
95
|
-
return [item]
|
|
96
|
-
|
|
97
|
-
def _build_text_part(self, content: str) -> message.ThinkingTextPart:
|
|
98
|
-
return message.ThinkingTextPart(
|
|
99
|
-
id=self._reasoning_id,
|
|
100
|
-
text=content,
|
|
101
|
-
model_id=self._param_model,
|
|
102
|
-
)
|
|
77
|
+
return []
|
|
103
78
|
|
|
104
79
|
def _build_signature_part(
|
|
105
80
|
self,
|
|
@@ -1,35 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from klaude_code.
|
|
3
|
+
from klaude_code.llm.stream_parts import degrade_thinking_to_text
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
def degrade_thinking_to_text(parts: list[message.Part]) -> list[message.Part]:
|
|
7
|
-
"""Degrade thinking parts into a regular TextPart.
|
|
8
|
-
|
|
9
|
-
Some providers require thinking signatures/encrypted content to be echoed back
|
|
10
|
-
for subsequent calls. During interruption we cannot reliably determine whether
|
|
11
|
-
we have a complete signature, so we persist thinking as plain text instead.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
thinking_texts: list[str] = []
|
|
15
|
-
non_thinking_parts: list[message.Part] = []
|
|
16
|
-
|
|
17
|
-
for part in parts:
|
|
18
|
-
if isinstance(part, message.ThinkingTextPart):
|
|
19
|
-
text = part.text
|
|
20
|
-
if text and text.strip():
|
|
21
|
-
thinking_texts.append(text)
|
|
22
|
-
continue
|
|
23
|
-
if isinstance(part, message.ThinkingSignaturePart):
|
|
24
|
-
continue
|
|
25
|
-
non_thinking_parts.append(part)
|
|
26
|
-
|
|
27
|
-
if not thinking_texts:
|
|
28
|
-
return non_thinking_parts
|
|
29
|
-
|
|
30
|
-
joined = "\n".join(thinking_texts).strip()
|
|
31
|
-
thinking_block = f"<thinking>\n{joined}\n</thinking>"
|
|
32
|
-
if non_thinking_parts:
|
|
33
|
-
thinking_block += "\n\n"
|
|
34
|
-
|
|
35
|
-
return [message.TextPart(text=thinking_block), *non_thinking_parts]
|
|
5
|
+
__all__ = ["degrade_thinking_to_text"]
|