klaude-code 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/__init__.py +10 -0
- klaude_code/auth/env.py +77 -0
- klaude_code/cli/auth_cmd.py +89 -21
- klaude_code/cli/config_cmd.py +5 -5
- klaude_code/cli/cost_cmd.py +167 -68
- klaude_code/cli/main.py +51 -27
- klaude_code/cli/self_update.py +7 -7
- klaude_code/config/assets/builtin_config.yaml +45 -24
- klaude_code/config/builtin_config.py +23 -9
- klaude_code/config/config.py +19 -9
- klaude_code/config/model_matcher.py +1 -1
- klaude_code/const.py +2 -1
- klaude_code/core/tool/file/edit_tool.py +1 -1
- klaude_code/core/tool/file/read_tool.py +2 -2
- klaude_code/core/tool/file/write_tool.py +1 -1
- klaude_code/core/turn.py +21 -4
- klaude_code/llm/anthropic/client.py +75 -50
- klaude_code/llm/anthropic/input.py +20 -9
- klaude_code/llm/google/client.py +235 -148
- klaude_code/llm/google/input.py +44 -36
- klaude_code/llm/openai_compatible/stream.py +114 -100
- klaude_code/llm/openrouter/client.py +1 -0
- klaude_code/llm/openrouter/reasoning.py +4 -29
- klaude_code/llm/partial_message.py +2 -32
- klaude_code/llm/responses/client.py +99 -81
- klaude_code/llm/responses/input.py +11 -25
- klaude_code/llm/stream_parts.py +94 -0
- klaude_code/log.py +57 -0
- klaude_code/protocol/events.py +214 -0
- klaude_code/protocol/sub_agent/image_gen.py +0 -4
- klaude_code/session/session.py +51 -18
- klaude_code/tui/command/fork_session_cmd.py +14 -23
- klaude_code/tui/command/model_picker.py +2 -17
- klaude_code/tui/command/resume_cmd.py +2 -18
- klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
- klaude_code/tui/command/thinking_cmd.py +2 -14
- klaude_code/tui/commands.py +0 -5
- klaude_code/tui/components/common.py +1 -1
- klaude_code/tui/components/metadata.py +21 -21
- klaude_code/tui/components/rich/quote.py +36 -8
- klaude_code/tui/components/rich/theme.py +2 -0
- klaude_code/tui/components/sub_agent.py +6 -0
- klaude_code/tui/display.py +11 -1
- klaude_code/tui/input/completers.py +11 -7
- klaude_code/tui/input/prompt_toolkit.py +3 -1
- klaude_code/tui/machine.py +108 -56
- klaude_code/tui/renderer.py +4 -65
- klaude_code/tui/terminal/selector.py +174 -31
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/METADATA +23 -31
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/RECORD +52 -58
- klaude_code/cli/session_cmd.py +0 -96
- klaude_code/protocol/events/__init__.py +0 -63
- klaude_code/protocol/events/base.py +0 -18
- klaude_code/protocol/events/chat.py +0 -30
- klaude_code/protocol/events/lifecycle.py +0 -23
- klaude_code/protocol/events/metadata.py +0 -16
- klaude_code/protocol/events/streaming.py +0 -43
- klaude_code/protocol/events/system.py +0 -56
- klaude_code/protocol/events/tools.py +0 -27
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/WHEEL +0 -0
- {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/entry_points.txt +0 -0
klaude_code/llm/google/client.py
CHANGED
|
@@ -4,32 +4,72 @@
|
|
|
4
4
|
# pyright: reportAttributeAccessIssue=false
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
|
+
from base64 import b64encode
|
|
7
8
|
from collections.abc import AsyncGenerator, AsyncIterator
|
|
8
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, cast, override
|
|
9
10
|
from uuid import uuid4
|
|
10
11
|
|
|
11
12
|
import httpx
|
|
12
13
|
from google.genai import Client
|
|
13
14
|
from google.genai.errors import APIError, ClientError, ServerError
|
|
14
15
|
from google.genai.types import (
|
|
16
|
+
ContentListUnion,
|
|
15
17
|
FunctionCallingConfig,
|
|
16
18
|
FunctionCallingConfigMode,
|
|
17
19
|
GenerateContentConfig,
|
|
20
|
+
GenerateContentResponse,
|
|
21
|
+
GenerateContentResponseUsageMetadata,
|
|
18
22
|
HttpOptions,
|
|
23
|
+
PartialArg,
|
|
19
24
|
ThinkingConfig,
|
|
25
|
+
ThinkingLevel,
|
|
20
26
|
ToolConfig,
|
|
21
|
-
|
|
27
|
+
)
|
|
28
|
+
from google.genai.types import (
|
|
29
|
+
ImageConfig as GoogleImageConfig,
|
|
22
30
|
)
|
|
23
31
|
|
|
24
32
|
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
25
33
|
from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
|
|
34
|
+
from klaude_code.llm.image import save_assistant_image
|
|
26
35
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
27
|
-
from klaude_code.llm.partial_message import degrade_thinking_to_text
|
|
28
36
|
from klaude_code.llm.registry import register
|
|
37
|
+
from klaude_code.llm.stream_parts import (
|
|
38
|
+
append_text_part,
|
|
39
|
+
append_thinking_text_part,
|
|
40
|
+
build_partial_message,
|
|
41
|
+
build_partial_parts,
|
|
42
|
+
)
|
|
29
43
|
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
30
|
-
from klaude_code.log import DebugType, log_debug
|
|
44
|
+
from klaude_code.log import DebugType, debug_json, log_debug
|
|
31
45
|
from klaude_code.protocol import llm_param, message, model
|
|
32
46
|
|
|
47
|
+
# Unified format for Google thought signatures
|
|
48
|
+
GOOGLE_THOUGHT_SIGNATURE_FORMAT = "google"
|
|
49
|
+
|
|
50
|
+
# Synthetic signature for image parts that need one but don't have it.
|
|
51
|
+
# See: https://ai.google.dev/gemini-api/docs/thought-signatures
|
|
52
|
+
SYNTHETIC_THOUGHT_SIGNATURE = b"skip_thought_signature_validator"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def support_thinking(model_id: str | None) -> bool:
|
|
56
|
+
return bool(model_id) and ("gemini-3" in model_id or "gemini-2.5-pro" in model_id)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def convert_gemini_thinking_level(reasoning_effort: str | None) -> ThinkingLevel | None:
|
|
60
|
+
"""Convert reasoning_effort to Gemini ThinkingLevel."""
|
|
61
|
+
if reasoning_effort is None:
|
|
62
|
+
return None
|
|
63
|
+
mapping: dict[str, ThinkingLevel] = {
|
|
64
|
+
"xhigh": ThinkingLevel.HIGH,
|
|
65
|
+
"high": ThinkingLevel.HIGH,
|
|
66
|
+
"medium": ThinkingLevel.MEDIUM,
|
|
67
|
+
"low": ThinkingLevel.LOW,
|
|
68
|
+
"minimal": ThinkingLevel.MINIMAL,
|
|
69
|
+
"none": ThinkingLevel.MINIMAL,
|
|
70
|
+
}
|
|
71
|
+
return mapping.get(reasoning_effort)
|
|
72
|
+
|
|
33
73
|
|
|
34
74
|
def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
35
75
|
tool_list = convert_tool_schema(param.tools)
|
|
@@ -39,16 +79,27 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
|
39
79
|
tool_config = ToolConfig(
|
|
40
80
|
function_calling_config=FunctionCallingConfig(
|
|
41
81
|
mode=FunctionCallingConfigMode.AUTO,
|
|
42
|
-
# Gemini streams tool args; keep this enabled to maximize fidelity.
|
|
43
|
-
stream_function_call_arguments=True,
|
|
44
82
|
)
|
|
45
83
|
)
|
|
46
84
|
|
|
47
85
|
thinking_config: ThinkingConfig | None = None
|
|
48
|
-
if param.
|
|
49
|
-
thinking_config = ThinkingConfig(
|
|
86
|
+
if support_thinking(param.model_id):
|
|
87
|
+
thinking_config: ThinkingConfig | None = ThinkingConfig(
|
|
50
88
|
include_thoughts=True,
|
|
51
|
-
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if param.thinking:
|
|
92
|
+
if param.thinking.budget_tokens:
|
|
93
|
+
thinking_config.thinking_budget = param.thinking.budget_tokens
|
|
94
|
+
if param.thinking.reasoning_effort:
|
|
95
|
+
thinking_config.thinking_level = convert_gemini_thinking_level(param.thinking.reasoning_effort)
|
|
96
|
+
|
|
97
|
+
# ImageGen per-call overrides
|
|
98
|
+
image_config: GoogleImageConfig | None = None
|
|
99
|
+
if param.image_config is not None:
|
|
100
|
+
image_config = GoogleImageConfig(
|
|
101
|
+
aspect_ratio=param.image_config.aspect_ratio,
|
|
102
|
+
image_size=param.image_config.image_size,
|
|
52
103
|
)
|
|
53
104
|
|
|
54
105
|
return GenerateContentConfig(
|
|
@@ -58,11 +109,12 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
|
58
109
|
tools=cast(Any, tool_list) if tool_list else None,
|
|
59
110
|
tool_config=tool_config,
|
|
60
111
|
thinking_config=thinking_config,
|
|
112
|
+
image_config=image_config,
|
|
61
113
|
)
|
|
62
114
|
|
|
63
115
|
|
|
64
116
|
def _usage_from_metadata(
|
|
65
|
-
usage:
|
|
117
|
+
usage: GenerateContentResponseUsageMetadata | None,
|
|
66
118
|
*,
|
|
67
119
|
context_limit: int | None,
|
|
68
120
|
max_tokens: int | None,
|
|
@@ -72,9 +124,16 @@ def _usage_from_metadata(
|
|
|
72
124
|
|
|
73
125
|
cached = usage.cached_content_token_count or 0
|
|
74
126
|
prompt = usage.prompt_token_count or 0
|
|
75
|
-
response = usage.
|
|
127
|
+
response = usage.candidates_token_count or 0
|
|
76
128
|
thoughts = usage.thoughts_token_count or 0
|
|
77
129
|
|
|
130
|
+
# Extract image tokens from candidates_tokens_details
|
|
131
|
+
image_tokens = 0
|
|
132
|
+
if usage.candidates_tokens_details:
|
|
133
|
+
for detail in usage.candidates_tokens_details:
|
|
134
|
+
if detail.modality and detail.modality.name == "IMAGE" and detail.token_count:
|
|
135
|
+
image_tokens += detail.token_count
|
|
136
|
+
|
|
78
137
|
total = usage.total_token_count
|
|
79
138
|
if total is None:
|
|
80
139
|
total = prompt + cached + response + thoughts
|
|
@@ -84,30 +143,29 @@ def _usage_from_metadata(
|
|
|
84
143
|
cached_tokens=cached,
|
|
85
144
|
output_tokens=response + thoughts,
|
|
86
145
|
reasoning_tokens=thoughts,
|
|
146
|
+
image_tokens=image_tokens,
|
|
87
147
|
context_size=total,
|
|
88
148
|
context_limit=context_limit,
|
|
89
149
|
max_tokens=max_tokens,
|
|
90
150
|
)
|
|
91
151
|
|
|
92
152
|
|
|
93
|
-
def _partial_arg_value(partial:
|
|
94
|
-
if
|
|
153
|
+
def _partial_arg_value(partial: PartialArg) -> str | float | bool | None:
|
|
154
|
+
if partial.string_value is not None:
|
|
95
155
|
return partial.string_value
|
|
96
|
-
if
|
|
156
|
+
if partial.number_value is not None:
|
|
97
157
|
return partial.number_value
|
|
98
|
-
if
|
|
158
|
+
if partial.bool_value is not None:
|
|
99
159
|
return partial.bool_value
|
|
100
|
-
if getattr(partial, "null_value", None) is not None:
|
|
101
|
-
return None
|
|
102
160
|
return None
|
|
103
161
|
|
|
104
162
|
|
|
105
|
-
def _merge_partial_args(dst: dict[str, Any], partial_args: list[
|
|
163
|
+
def _merge_partial_args(dst: dict[str, Any], partial_args: list[PartialArg] | None) -> None:
|
|
106
164
|
if not partial_args:
|
|
107
165
|
return
|
|
108
166
|
for partial in partial_args:
|
|
109
|
-
json_path =
|
|
110
|
-
if not
|
|
167
|
+
json_path = partial.json_path
|
|
168
|
+
if not json_path or not json_path.startswith("$."):
|
|
111
169
|
continue
|
|
112
170
|
key = json_path[2:]
|
|
113
171
|
if not key or any(ch in key for ch in "[]"):
|
|
@@ -115,6 +173,15 @@ def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) ->
|
|
|
115
173
|
dst[key] = _partial_arg_value(partial)
|
|
116
174
|
|
|
117
175
|
|
|
176
|
+
def _encode_thought_signature(sig: bytes | str | None) -> str | None:
|
|
177
|
+
"""Encode thought signature bytes to base64 string."""
|
|
178
|
+
if sig is None:
|
|
179
|
+
return None
|
|
180
|
+
if isinstance(sig, bytes):
|
|
181
|
+
return b64encode(sig).decode("ascii")
|
|
182
|
+
return sig
|
|
183
|
+
|
|
184
|
+
|
|
118
185
|
def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
119
186
|
normalized = reason.strip().lower()
|
|
120
187
|
mapping: dict[str, model.StopReason] = {
|
|
@@ -139,202 +206,216 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
|
139
206
|
class GoogleStreamStateManager:
|
|
140
207
|
"""Manages streaming state for Google LLM responses.
|
|
141
208
|
|
|
142
|
-
Accumulates
|
|
143
|
-
|
|
209
|
+
Accumulates parts directly during streaming to support get_partial_message()
|
|
210
|
+
for cancellation scenarios. Merges consecutive text parts of the same type.
|
|
144
211
|
"""
|
|
145
212
|
|
|
146
213
|
def __init__(self, param_model: str) -> None:
|
|
147
214
|
self.param_model = param_model
|
|
148
|
-
self.accumulated_thoughts: list[str] = []
|
|
149
|
-
self.accumulated_text: list[str] = []
|
|
150
|
-
self.thought_signature: str | None = None
|
|
151
215
|
self.assistant_parts: list[message.Part] = []
|
|
152
216
|
self.response_id: str | None = None
|
|
153
217
|
self.stop_reason: model.StopReason | None = None
|
|
154
218
|
|
|
155
|
-
def
|
|
156
|
-
"""
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
219
|
+
def append_thinking_text(self, text: str) -> None:
|
|
220
|
+
"""Append thinking text, merging with previous ThinkingTextPart if possible."""
|
|
221
|
+
append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
|
|
222
|
+
|
|
223
|
+
def append_text(self, text: str) -> None:
|
|
224
|
+
"""Append text, merging with previous TextPart if possible."""
|
|
225
|
+
append_text_part(self.assistant_parts, text)
|
|
226
|
+
|
|
227
|
+
def append_thinking_signature(self, signature: str) -> None:
|
|
228
|
+
"""Append a ThinkingSignaturePart after the current part."""
|
|
229
|
+
self.assistant_parts.append(
|
|
230
|
+
message.ThinkingSignaturePart(
|
|
231
|
+
signature=signature,
|
|
232
|
+
model_id=self.param_model,
|
|
233
|
+
format=GOOGLE_THOUGHT_SIGNATURE_FORMAT,
|
|
163
234
|
)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
def append_image(self, image_part: message.ImageFilePart) -> None:
|
|
238
|
+
"""Append an ImageFilePart."""
|
|
239
|
+
self.assistant_parts.append(image_part)
|
|
240
|
+
|
|
241
|
+
def append_tool_call(self, call_id: str, name: str, arguments_json: str) -> None:
|
|
242
|
+
"""Append a ToolCallPart."""
|
|
243
|
+
self.assistant_parts.append(
|
|
244
|
+
message.ToolCallPart(
|
|
245
|
+
call_id=call_id,
|
|
246
|
+
tool_name=name,
|
|
247
|
+
arguments_json=arguments_json,
|
|
172
248
|
)
|
|
173
|
-
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def get_partial_parts(self) -> list[message.Part]:
|
|
252
|
+
"""Get accumulated parts excluding tool calls, with thinking degraded.
|
|
174
253
|
|
|
175
|
-
|
|
176
|
-
"""
|
|
177
|
-
|
|
178
|
-
return
|
|
179
|
-
self.assistant_parts.append(message.TextPart(text="".join(self.accumulated_text)))
|
|
180
|
-
self.accumulated_text.clear()
|
|
254
|
+
Filters out ToolCallPart and applies degrade_thinking_to_text.
|
|
255
|
+
"""
|
|
256
|
+
return build_partial_parts(self.assistant_parts)
|
|
181
257
|
|
|
182
258
|
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
183
259
|
"""Build a partial AssistantMessage from accumulated state.
|
|
184
260
|
|
|
185
|
-
Flushes all accumulated content and returns the message.
|
|
186
261
|
Returns None if no content has been accumulated yet.
|
|
187
262
|
"""
|
|
188
|
-
self.
|
|
189
|
-
self.flush_text()
|
|
190
|
-
|
|
191
|
-
filtered_parts: list[message.Part] = []
|
|
192
|
-
for part in self.assistant_parts:
|
|
193
|
-
if isinstance(part, message.ToolCallPart):
|
|
194
|
-
continue
|
|
195
|
-
filtered_parts.append(part)
|
|
196
|
-
|
|
197
|
-
filtered_parts = degrade_thinking_to_text(filtered_parts)
|
|
198
|
-
|
|
199
|
-
if not filtered_parts:
|
|
200
|
-
return None
|
|
201
|
-
return message.AssistantMessage(
|
|
202
|
-
parts=filtered_parts,
|
|
203
|
-
response_id=self.response_id,
|
|
204
|
-
stop_reason="aborted",
|
|
205
|
-
)
|
|
263
|
+
return build_partial_message(self.assistant_parts, response_id=self.response_id)
|
|
206
264
|
|
|
207
265
|
|
|
208
266
|
async def parse_google_stream(
|
|
209
|
-
stream: AsyncIterator[
|
|
267
|
+
stream: AsyncIterator[GenerateContentResponse],
|
|
210
268
|
param: llm_param.LLMCallParameter,
|
|
211
269
|
metadata_tracker: MetadataTracker,
|
|
212
270
|
state: GoogleStreamStateManager,
|
|
213
271
|
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
214
|
-
stage: Literal["waiting", "thinking", "assistant", "tool"] = "waiting"
|
|
215
|
-
|
|
216
272
|
# Track tool calls where args arrive as partial updates.
|
|
217
273
|
partial_args_by_call: dict[str, dict[str, Any]] = {}
|
|
218
|
-
started_tool_calls: dict[str, str] = {} # call_id -> name
|
|
274
|
+
started_tool_calls: dict[str, tuple[str, bytes | None]] = {} # call_id -> (name, thought_signature)
|
|
219
275
|
started_tool_items: set[str] = set()
|
|
220
276
|
completed_tool_items: set[str] = set()
|
|
221
277
|
|
|
222
|
-
|
|
278
|
+
# Track image index for unique filenames
|
|
279
|
+
image_index = 0
|
|
280
|
+
|
|
281
|
+
last_usage_metadata: GenerateContentResponseUsageMetadata | None = None
|
|
223
282
|
|
|
224
283
|
async for chunk in stream:
|
|
225
|
-
log_debug(
|
|
226
|
-
chunk.model_dump_json(exclude_none=True),
|
|
227
|
-
style="blue",
|
|
228
|
-
debug_type=DebugType.LLM_STREAM,
|
|
229
|
-
)
|
|
284
|
+
log_debug(debug_json(chunk.model_dump(exclude_none=True)), style="blue", debug_type=DebugType.LLM_STREAM)
|
|
230
285
|
|
|
231
286
|
if state.response_id is None:
|
|
232
|
-
state.response_id =
|
|
287
|
+
state.response_id = chunk.response_id or uuid4().hex
|
|
233
288
|
|
|
234
|
-
if
|
|
289
|
+
if chunk.usage_metadata is not None:
|
|
235
290
|
last_usage_metadata = chunk.usage_metadata
|
|
236
291
|
|
|
237
|
-
candidates =
|
|
292
|
+
candidates = chunk.candidates or []
|
|
238
293
|
candidate0 = candidates[0] if candidates else None
|
|
239
|
-
finish_reason =
|
|
294
|
+
finish_reason = candidate0.finish_reason if candidate0 else None
|
|
240
295
|
if finish_reason is not None:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
reason_value = getattr(finish_reason, "name", None) or str(finish_reason)
|
|
245
|
-
state.stop_reason = _map_finish_reason(reason_value)
|
|
246
|
-
content = getattr(candidate0, "content", None) if candidate0 else None
|
|
247
|
-
content_parts = getattr(content, "parts", None) if content else None
|
|
296
|
+
state.stop_reason = _map_finish_reason(finish_reason.name)
|
|
297
|
+
content = candidate0.content if candidate0 else None
|
|
298
|
+
content_parts = content.parts if content else None
|
|
248
299
|
if not content_parts:
|
|
249
300
|
continue
|
|
250
301
|
|
|
251
302
|
for part in content_parts:
|
|
252
|
-
|
|
303
|
+
# Handle text parts (both thought and regular text)
|
|
304
|
+
if part.text is not None:
|
|
253
305
|
text = part.text
|
|
254
306
|
if not text:
|
|
255
307
|
continue
|
|
256
308
|
metadata_tracker.record_token()
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if
|
|
263
|
-
|
|
309
|
+
|
|
310
|
+
if part.thought is True:
|
|
311
|
+
# Thinking text - append and merge with previous ThinkingTextPart
|
|
312
|
+
state.append_thinking_text(text)
|
|
313
|
+
# Add ThinkingSignaturePart after thinking text if present
|
|
314
|
+
if part.thought_signature:
|
|
315
|
+
encoded_sig = _encode_thought_signature(part.thought_signature)
|
|
316
|
+
if encoded_sig:
|
|
317
|
+
state.append_thinking_signature(encoded_sig)
|
|
264
318
|
yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
|
|
265
319
|
else:
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
320
|
+
# Regular text - append and merge with previous TextPart
|
|
321
|
+
state.append_text(text)
|
|
322
|
+
# Regular text parts can also have thought_signature
|
|
323
|
+
if part.thought_signature:
|
|
324
|
+
encoded_sig = _encode_thought_signature(part.thought_signature)
|
|
325
|
+
if encoded_sig:
|
|
326
|
+
state.append_thinking_signature(encoded_sig)
|
|
270
327
|
yield message.AssistantTextDelta(content=text, response_id=state.response_id)
|
|
271
328
|
|
|
272
|
-
|
|
329
|
+
# Handle inline_data (image generation responses)
|
|
330
|
+
inline_data = part.inline_data
|
|
331
|
+
if inline_data is not None and inline_data.data:
|
|
332
|
+
# Thought images (interim images produced during thinking) do not
|
|
333
|
+
# carry thought signatures and must not be treated as response
|
|
334
|
+
# images for multi-turn history.
|
|
335
|
+
if part.thought is True:
|
|
336
|
+
continue
|
|
337
|
+
mime_type = inline_data.mime_type or "image/png"
|
|
338
|
+
encoded_data = b64encode(inline_data.data).decode("ascii")
|
|
339
|
+
data_url = f"data:{mime_type};base64,{encoded_data}"
|
|
340
|
+
try:
|
|
341
|
+
image_part = save_assistant_image(
|
|
342
|
+
data_url=data_url,
|
|
343
|
+
session_id=param.session_id,
|
|
344
|
+
response_id=state.response_id,
|
|
345
|
+
image_index=image_index,
|
|
346
|
+
)
|
|
347
|
+
image_index += 1
|
|
348
|
+
state.append_image(image_part)
|
|
349
|
+
# Add ThinkingSignaturePart after image if present, or synthetic signature for thinking models
|
|
350
|
+
if part.thought_signature:
|
|
351
|
+
encoded_sig = _encode_thought_signature(part.thought_signature)
|
|
352
|
+
if encoded_sig:
|
|
353
|
+
state.append_thinking_signature(encoded_sig)
|
|
354
|
+
elif support_thinking(param.model_id):
|
|
355
|
+
encoded_sig = _encode_thought_signature(SYNTHETIC_THOUGHT_SIGNATURE)
|
|
356
|
+
if encoded_sig:
|
|
357
|
+
state.append_thinking_signature(encoded_sig)
|
|
358
|
+
yield message.AssistantImageDelta(
|
|
359
|
+
response_id=state.response_id,
|
|
360
|
+
file_path=image_part.file_path,
|
|
361
|
+
)
|
|
362
|
+
except ValueError:
|
|
363
|
+
pass # Skip invalid images
|
|
364
|
+
|
|
365
|
+
# Handle function calls
|
|
366
|
+
function_call = part.function_call
|
|
273
367
|
if function_call is None:
|
|
274
368
|
continue
|
|
275
369
|
|
|
276
370
|
metadata_tracker.record_token()
|
|
277
|
-
call_id =
|
|
278
|
-
name =
|
|
279
|
-
|
|
371
|
+
call_id = function_call.id or uuid4().hex
|
|
372
|
+
name = function_call.name or ""
|
|
373
|
+
|
|
374
|
+
# Capture thought_signature from the part (required for tools in thinking models)
|
|
375
|
+
thought_signature = part.thought_signature
|
|
376
|
+
|
|
377
|
+
# Store name and thought_signature for later use (partial args / flush)
|
|
378
|
+
if call_id not in started_tool_calls or (thought_signature and started_tool_calls[call_id][1] is None):
|
|
379
|
+
started_tool_calls[call_id] = (name, thought_signature)
|
|
280
380
|
|
|
281
381
|
if call_id not in started_tool_items:
|
|
282
382
|
started_tool_items.add(call_id)
|
|
283
383
|
yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
|
|
284
384
|
|
|
285
|
-
args_obj =
|
|
385
|
+
args_obj = function_call.args
|
|
286
386
|
if args_obj is not None:
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
state.assistant_parts.append(
|
|
293
|
-
message.ToolCallPart(
|
|
294
|
-
call_id=call_id,
|
|
295
|
-
tool_name=name,
|
|
296
|
-
arguments_json=json.dumps(args_obj, ensure_ascii=False),
|
|
297
|
-
)
|
|
298
|
-
)
|
|
387
|
+
# Add ToolCallPart, then ThinkingSignaturePart after it
|
|
388
|
+
state.append_tool_call(call_id, name, json.dumps(args_obj, ensure_ascii=False))
|
|
389
|
+
encoded_sig = _encode_thought_signature(thought_signature)
|
|
390
|
+
if encoded_sig:
|
|
391
|
+
state.append_thinking_signature(encoded_sig)
|
|
299
392
|
completed_tool_items.add(call_id)
|
|
300
393
|
continue
|
|
301
394
|
|
|
302
|
-
partial_args =
|
|
395
|
+
partial_args = function_call.partial_args
|
|
303
396
|
if partial_args is not None:
|
|
304
397
|
acc = partial_args_by_call.setdefault(call_id, {})
|
|
305
398
|
_merge_partial_args(acc, partial_args)
|
|
306
399
|
|
|
307
|
-
will_continue =
|
|
400
|
+
will_continue = function_call.will_continue
|
|
308
401
|
if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
message.ToolCallPart(
|
|
316
|
-
call_id=call_id,
|
|
317
|
-
tool_name=name,
|
|
318
|
-
arguments_json=json.dumps(partial_args_by_call[call_id], ensure_ascii=False),
|
|
319
|
-
)
|
|
320
|
-
)
|
|
402
|
+
# Add ToolCallPart, then ThinkingSignaturePart after it
|
|
403
|
+
state.append_tool_call(call_id, name, json.dumps(partial_args_by_call[call_id], ensure_ascii=False))
|
|
404
|
+
stored_sig = started_tool_calls.get(call_id, (name, None))[1]
|
|
405
|
+
encoded_stored_sig = _encode_thought_signature(stored_sig)
|
|
406
|
+
if encoded_stored_sig:
|
|
407
|
+
state.append_thinking_signature(encoded_stored_sig)
|
|
321
408
|
completed_tool_items.add(call_id)
|
|
322
409
|
|
|
323
410
|
# Flush any pending tool calls that never produced args.
|
|
324
|
-
for call_id, name in started_tool_calls.items():
|
|
411
|
+
for call_id, (name, stored_sig) in started_tool_calls.items():
|
|
325
412
|
if call_id in completed_tool_items:
|
|
326
413
|
continue
|
|
327
414
|
args = partial_args_by_call.get(call_id, {})
|
|
328
|
-
state.
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
arguments_json=json.dumps(args, ensure_ascii=False),
|
|
333
|
-
)
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
state.flush_thinking()
|
|
337
|
-
state.flush_text()
|
|
415
|
+
state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
|
|
416
|
+
encoded_stored_sig = _encode_thought_signature(stored_sig)
|
|
417
|
+
if encoded_stored_sig:
|
|
418
|
+
state.append_thinking_signature(encoded_stored_sig)
|
|
338
419
|
|
|
339
420
|
usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
|
|
340
421
|
if usage is not None:
|
|
@@ -355,7 +436,7 @@ class GoogleLLMStream(LLMStreamABC):
|
|
|
355
436
|
|
|
356
437
|
def __init__(
|
|
357
438
|
self,
|
|
358
|
-
stream: AsyncIterator[
|
|
439
|
+
stream: AsyncIterator[GenerateContentResponse],
|
|
359
440
|
*,
|
|
360
441
|
param: llm_param.LLMCallParameter,
|
|
361
442
|
metadata_tracker: MetadataTracker,
|
|
@@ -383,7 +464,14 @@ class GoogleLLMStream(LLMStreamABC):
|
|
|
383
464
|
yield item
|
|
384
465
|
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|
|
385
466
|
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
386
|
-
|
|
467
|
+
# Use accumulated parts for potential prefill on retry
|
|
468
|
+
self._metadata_tracker.set_response_id(self._state.response_id)
|
|
469
|
+
yield message.AssistantMessage(
|
|
470
|
+
parts=self._state.get_partial_parts(),
|
|
471
|
+
response_id=self._state.response_id,
|
|
472
|
+
usage=self._metadata_tracker.finalize(),
|
|
473
|
+
stop_reason="error",
|
|
474
|
+
)
|
|
387
475
|
|
|
388
476
|
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
389
477
|
if self._completed:
|
|
@@ -419,13 +507,12 @@ class GoogleClient(LLMClientABC):
|
|
|
419
507
|
config = _build_config(param)
|
|
420
508
|
|
|
421
509
|
log_debug(
|
|
422
|
-
|
|
510
|
+
debug_json(
|
|
423
511
|
{
|
|
424
512
|
"model": str(param.model_id),
|
|
425
513
|
"contents": [c.model_dump(exclude_none=True) for c in contents],
|
|
426
514
|
"config": config.model_dump(exclude_none=True),
|
|
427
|
-
}
|
|
428
|
-
ensure_ascii=False,
|
|
515
|
+
}
|
|
429
516
|
),
|
|
430
517
|
style="yellow",
|
|
431
518
|
debug_type=DebugType.LLM_PAYLOAD,
|
|
@@ -434,7 +521,7 @@ class GoogleClient(LLMClientABC):
|
|
|
434
521
|
try:
|
|
435
522
|
stream = await self.client.aio.models.generate_content_stream(
|
|
436
523
|
model=str(param.model_id),
|
|
437
|
-
contents=cast(
|
|
524
|
+
contents=cast(ContentListUnion, contents),
|
|
438
525
|
config=config,
|
|
439
526
|
)
|
|
440
527
|
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|