klaude-code 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/.DS_Store +0 -0
- klaude_code/cli/auth_cmd.py +2 -13
- klaude_code/cli/cost_cmd.py +10 -10
- klaude_code/cli/list_model.py +8 -0
- klaude_code/cli/main.py +41 -8
- klaude_code/cli/session_cmd.py +2 -11
- klaude_code/config/assets/builtin_config.yaml +45 -26
- klaude_code/config/config.py +30 -7
- klaude_code/config/model_matcher.py +3 -3
- klaude_code/config/sub_agent_model_helper.py +1 -1
- klaude_code/const.py +2 -1
- klaude_code/core/agent_profile.py +1 -0
- klaude_code/core/executor.py +4 -0
- klaude_code/core/loaded_skills.py +36 -0
- klaude_code/core/tool/context.py +1 -3
- klaude_code/core/tool/file/edit_tool.py +1 -1
- klaude_code/core/tool/file/read_tool.py +2 -2
- klaude_code/core/tool/file/write_tool.py +1 -1
- klaude_code/core/turn.py +19 -7
- klaude_code/llm/anthropic/client.py +97 -60
- klaude_code/llm/anthropic/input.py +20 -9
- klaude_code/llm/google/client.py +223 -148
- klaude_code/llm/google/input.py +44 -36
- klaude_code/llm/openai_compatible/stream.py +109 -99
- klaude_code/llm/openrouter/reasoning.py +4 -29
- klaude_code/llm/partial_message.py +2 -32
- klaude_code/llm/responses/client.py +99 -81
- klaude_code/llm/responses/input.py +11 -25
- klaude_code/llm/stream_parts.py +94 -0
- klaude_code/log.py +57 -0
- klaude_code/protocol/events/system.py +3 -0
- klaude_code/protocol/llm_param.py +1 -0
- klaude_code/session/export.py +259 -91
- klaude_code/session/templates/export_session.html +141 -59
- klaude_code/skill/.DS_Store +0 -0
- klaude_code/skill/assets/.DS_Store +0 -0
- klaude_code/skill/loader.py +1 -0
- klaude_code/tui/command/fork_session_cmd.py +14 -23
- klaude_code/tui/command/model_picker.py +2 -17
- klaude_code/tui/command/refresh_cmd.py +2 -0
- klaude_code/tui/command/resume_cmd.py +2 -18
- klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
- klaude_code/tui/command/thinking_cmd.py +2 -14
- klaude_code/tui/components/common.py +1 -1
- klaude_code/tui/components/metadata.py +22 -21
- klaude_code/tui/components/rich/markdown.py +8 -0
- klaude_code/tui/components/rich/quote.py +36 -8
- klaude_code/tui/components/rich/theme.py +2 -0
- klaude_code/tui/components/welcome.py +32 -0
- klaude_code/tui/input/prompt_toolkit.py +3 -1
- klaude_code/tui/machine.py +19 -1
- klaude_code/tui/renderer.py +3 -4
- klaude_code/tui/terminal/selector.py +174 -31
- {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/METADATA +1 -1
- {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/RECORD +57 -53
- klaude_code/skill/assets/jj-workspace/SKILL.md +0 -20
- {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/WHEEL +0 -0
- {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/entry_points.txt +0 -0
klaude_code/llm/google/client.py
CHANGED
|
@@ -4,32 +4,69 @@
|
|
|
4
4
|
# pyright: reportAttributeAccessIssue=false
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
|
+
from base64 import b64encode
|
|
7
8
|
from collections.abc import AsyncGenerator, AsyncIterator
|
|
8
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, cast, override
|
|
9
10
|
from uuid import uuid4
|
|
10
11
|
|
|
11
12
|
import httpx
|
|
12
13
|
from google.genai import Client
|
|
13
14
|
from google.genai.errors import APIError, ClientError, ServerError
|
|
14
15
|
from google.genai.types import (
|
|
16
|
+
ContentListUnion,
|
|
15
17
|
FunctionCallingConfig,
|
|
16
18
|
FunctionCallingConfigMode,
|
|
17
19
|
GenerateContentConfig,
|
|
20
|
+
GenerateContentResponse,
|
|
21
|
+
GenerateContentResponseUsageMetadata,
|
|
18
22
|
HttpOptions,
|
|
23
|
+
PartialArg,
|
|
19
24
|
ThinkingConfig,
|
|
25
|
+
ThinkingLevel,
|
|
20
26
|
ToolConfig,
|
|
21
|
-
UsageMetadata,
|
|
22
27
|
)
|
|
23
28
|
|
|
24
29
|
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
25
30
|
from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
|
|
31
|
+
from klaude_code.llm.image import save_assistant_image
|
|
26
32
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
27
|
-
from klaude_code.llm.partial_message import degrade_thinking_to_text
|
|
28
33
|
from klaude_code.llm.registry import register
|
|
34
|
+
from klaude_code.llm.stream_parts import (
|
|
35
|
+
append_text_part,
|
|
36
|
+
append_thinking_text_part,
|
|
37
|
+
build_partial_message,
|
|
38
|
+
build_partial_parts,
|
|
39
|
+
)
|
|
29
40
|
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
30
|
-
from klaude_code.log import DebugType, log_debug
|
|
41
|
+
from klaude_code.log import DebugType, debug_json, log_debug
|
|
31
42
|
from klaude_code.protocol import llm_param, message, model
|
|
32
43
|
|
|
44
|
+
# Unified format for Google thought signatures
|
|
45
|
+
GOOGLE_THOUGHT_SIGNATURE_FORMAT = "google"
|
|
46
|
+
|
|
47
|
+
# Synthetic signature for image parts that need one but don't have it.
|
|
48
|
+
# See: https://ai.google.dev/gemini-api/docs/thought-signatures
|
|
49
|
+
SYNTHETIC_THOUGHT_SIGNATURE = b"skip_thought_signature_validator"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def support_thinking(model_id: str | None) -> bool:
|
|
53
|
+
return bool(model_id) and ("gemini-3" in model_id or "gemini-2.5-pro" in model_id)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def convert_gemini_thinking_level(reasoning_effort: str | None) -> ThinkingLevel | None:
|
|
57
|
+
"""Convert reasoning_effort to Gemini ThinkingLevel."""
|
|
58
|
+
if reasoning_effort is None:
|
|
59
|
+
return None
|
|
60
|
+
mapping: dict[str, ThinkingLevel] = {
|
|
61
|
+
"xhigh": ThinkingLevel.HIGH,
|
|
62
|
+
"high": ThinkingLevel.HIGH,
|
|
63
|
+
"medium": ThinkingLevel.MEDIUM,
|
|
64
|
+
"low": ThinkingLevel.LOW,
|
|
65
|
+
"minimal": ThinkingLevel.MINIMAL,
|
|
66
|
+
"none": ThinkingLevel.MINIMAL,
|
|
67
|
+
}
|
|
68
|
+
return mapping.get(reasoning_effort)
|
|
69
|
+
|
|
33
70
|
|
|
34
71
|
def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
35
72
|
tool_list = convert_tool_schema(param.tools)
|
|
@@ -39,18 +76,21 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
|
39
76
|
tool_config = ToolConfig(
|
|
40
77
|
function_calling_config=FunctionCallingConfig(
|
|
41
78
|
mode=FunctionCallingConfigMode.AUTO,
|
|
42
|
-
# Gemini streams tool args; keep this enabled to maximize fidelity.
|
|
43
|
-
stream_function_call_arguments=True,
|
|
44
79
|
)
|
|
45
80
|
)
|
|
46
81
|
|
|
47
82
|
thinking_config: ThinkingConfig | None = None
|
|
48
|
-
if param.
|
|
49
|
-
thinking_config = ThinkingConfig(
|
|
83
|
+
if support_thinking(param.model_id):
|
|
84
|
+
thinking_config: ThinkingConfig | None = ThinkingConfig(
|
|
50
85
|
include_thoughts=True,
|
|
51
|
-
thinking_budget=param.thinking.budget_tokens,
|
|
52
86
|
)
|
|
53
87
|
|
|
88
|
+
if param.thinking:
|
|
89
|
+
if param.thinking.budget_tokens:
|
|
90
|
+
thinking_config.thinking_budget = param.thinking.budget_tokens
|
|
91
|
+
if param.thinking.reasoning_effort:
|
|
92
|
+
thinking_config.thinking_level = convert_gemini_thinking_level(param.thinking.reasoning_effort)
|
|
93
|
+
|
|
54
94
|
return GenerateContentConfig(
|
|
55
95
|
system_instruction=param.system,
|
|
56
96
|
temperature=param.temperature,
|
|
@@ -62,7 +102,7 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
|
62
102
|
|
|
63
103
|
|
|
64
104
|
def _usage_from_metadata(
|
|
65
|
-
usage:
|
|
105
|
+
usage: GenerateContentResponseUsageMetadata | None,
|
|
66
106
|
*,
|
|
67
107
|
context_limit: int | None,
|
|
68
108
|
max_tokens: int | None,
|
|
@@ -72,9 +112,16 @@ def _usage_from_metadata(
|
|
|
72
112
|
|
|
73
113
|
cached = usage.cached_content_token_count or 0
|
|
74
114
|
prompt = usage.prompt_token_count or 0
|
|
75
|
-
response = usage.
|
|
115
|
+
response = usage.candidates_token_count or 0
|
|
76
116
|
thoughts = usage.thoughts_token_count or 0
|
|
77
117
|
|
|
118
|
+
# Extract image tokens from candidates_tokens_details
|
|
119
|
+
image_tokens = 0
|
|
120
|
+
if usage.candidates_tokens_details:
|
|
121
|
+
for detail in usage.candidates_tokens_details:
|
|
122
|
+
if detail.modality and detail.modality.name == "IMAGE" and detail.token_count:
|
|
123
|
+
image_tokens += detail.token_count
|
|
124
|
+
|
|
78
125
|
total = usage.total_token_count
|
|
79
126
|
if total is None:
|
|
80
127
|
total = prompt + cached + response + thoughts
|
|
@@ -84,30 +131,29 @@ def _usage_from_metadata(
|
|
|
84
131
|
cached_tokens=cached,
|
|
85
132
|
output_tokens=response + thoughts,
|
|
86
133
|
reasoning_tokens=thoughts,
|
|
134
|
+
image_tokens=image_tokens,
|
|
87
135
|
context_size=total,
|
|
88
136
|
context_limit=context_limit,
|
|
89
137
|
max_tokens=max_tokens,
|
|
90
138
|
)
|
|
91
139
|
|
|
92
140
|
|
|
93
|
-
def _partial_arg_value(partial:
|
|
94
|
-
if
|
|
141
|
+
def _partial_arg_value(partial: PartialArg) -> str | float | bool | None:
|
|
142
|
+
if partial.string_value is not None:
|
|
95
143
|
return partial.string_value
|
|
96
|
-
if
|
|
144
|
+
if partial.number_value is not None:
|
|
97
145
|
return partial.number_value
|
|
98
|
-
if
|
|
146
|
+
if partial.bool_value is not None:
|
|
99
147
|
return partial.bool_value
|
|
100
|
-
if getattr(partial, "null_value", None) is not None:
|
|
101
|
-
return None
|
|
102
148
|
return None
|
|
103
149
|
|
|
104
150
|
|
|
105
|
-
def _merge_partial_args(dst: dict[str, Any], partial_args: list[
|
|
151
|
+
def _merge_partial_args(dst: dict[str, Any], partial_args: list[PartialArg] | None) -> None:
|
|
106
152
|
if not partial_args:
|
|
107
153
|
return
|
|
108
154
|
for partial in partial_args:
|
|
109
|
-
json_path =
|
|
110
|
-
if not
|
|
155
|
+
json_path = partial.json_path
|
|
156
|
+
if not json_path or not json_path.startswith("$."):
|
|
111
157
|
continue
|
|
112
158
|
key = json_path[2:]
|
|
113
159
|
if not key or any(ch in key for ch in "[]"):
|
|
@@ -115,6 +161,15 @@ def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) ->
|
|
|
115
161
|
dst[key] = _partial_arg_value(partial)
|
|
116
162
|
|
|
117
163
|
|
|
164
|
+
def _encode_thought_signature(sig: bytes | str | None) -> str | None:
|
|
165
|
+
"""Encode thought signature bytes to base64 string."""
|
|
166
|
+
if sig is None:
|
|
167
|
+
return None
|
|
168
|
+
if isinstance(sig, bytes):
|
|
169
|
+
return b64encode(sig).decode("ascii")
|
|
170
|
+
return sig
|
|
171
|
+
|
|
172
|
+
|
|
118
173
|
def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
119
174
|
normalized = reason.strip().lower()
|
|
120
175
|
mapping: dict[str, model.StopReason] = {
|
|
@@ -139,202 +194,216 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
|
139
194
|
class GoogleStreamStateManager:
|
|
140
195
|
"""Manages streaming state for Google LLM responses.
|
|
141
196
|
|
|
142
|
-
Accumulates
|
|
143
|
-
|
|
197
|
+
Accumulates parts directly during streaming to support get_partial_message()
|
|
198
|
+
for cancellation scenarios. Merges consecutive text parts of the same type.
|
|
144
199
|
"""
|
|
145
200
|
|
|
146
201
|
def __init__(self, param_model: str) -> None:
|
|
147
202
|
self.param_model = param_model
|
|
148
|
-
self.accumulated_thoughts: list[str] = []
|
|
149
|
-
self.accumulated_text: list[str] = []
|
|
150
|
-
self.thought_signature: str | None = None
|
|
151
203
|
self.assistant_parts: list[message.Part] = []
|
|
152
204
|
self.response_id: str | None = None
|
|
153
205
|
self.stop_reason: model.StopReason | None = None
|
|
154
206
|
|
|
155
|
-
def
|
|
156
|
-
"""
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
207
|
+
def append_thinking_text(self, text: str) -> None:
|
|
208
|
+
"""Append thinking text, merging with previous ThinkingTextPart if possible."""
|
|
209
|
+
append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
|
|
210
|
+
|
|
211
|
+
def append_text(self, text: str) -> None:
|
|
212
|
+
"""Append text, merging with previous TextPart if possible."""
|
|
213
|
+
append_text_part(self.assistant_parts, text)
|
|
214
|
+
|
|
215
|
+
def append_thinking_signature(self, signature: str) -> None:
|
|
216
|
+
"""Append a ThinkingSignaturePart after the current part."""
|
|
217
|
+
self.assistant_parts.append(
|
|
218
|
+
message.ThinkingSignaturePart(
|
|
219
|
+
signature=signature,
|
|
220
|
+
model_id=self.param_model,
|
|
221
|
+
format=GOOGLE_THOUGHT_SIGNATURE_FORMAT,
|
|
163
222
|
)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def append_image(self, image_part: message.ImageFilePart) -> None:
|
|
226
|
+
"""Append an ImageFilePart."""
|
|
227
|
+
self.assistant_parts.append(image_part)
|
|
228
|
+
|
|
229
|
+
def append_tool_call(self, call_id: str, name: str, arguments_json: str) -> None:
|
|
230
|
+
"""Append a ToolCallPart."""
|
|
231
|
+
self.assistant_parts.append(
|
|
232
|
+
message.ToolCallPart(
|
|
233
|
+
call_id=call_id,
|
|
234
|
+
tool_name=name,
|
|
235
|
+
arguments_json=arguments_json,
|
|
172
236
|
)
|
|
173
|
-
|
|
237
|
+
)
|
|
174
238
|
|
|
175
|
-
def
|
|
176
|
-
"""
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
self.
|
|
239
|
+
def get_partial_parts(self) -> list[message.Part]:
|
|
240
|
+
"""Get accumulated parts excluding tool calls, with thinking degraded.
|
|
241
|
+
|
|
242
|
+
Filters out ToolCallPart and applies degrade_thinking_to_text.
|
|
243
|
+
"""
|
|
244
|
+
return build_partial_parts(self.assistant_parts)
|
|
181
245
|
|
|
182
246
|
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
183
247
|
"""Build a partial AssistantMessage from accumulated state.
|
|
184
248
|
|
|
185
|
-
Flushes all accumulated content and returns the message.
|
|
186
249
|
Returns None if no content has been accumulated yet.
|
|
187
250
|
"""
|
|
188
|
-
self.
|
|
189
|
-
self.flush_text()
|
|
190
|
-
|
|
191
|
-
filtered_parts: list[message.Part] = []
|
|
192
|
-
for part in self.assistant_parts:
|
|
193
|
-
if isinstance(part, message.ToolCallPart):
|
|
194
|
-
continue
|
|
195
|
-
filtered_parts.append(part)
|
|
196
|
-
|
|
197
|
-
filtered_parts = degrade_thinking_to_text(filtered_parts)
|
|
198
|
-
|
|
199
|
-
if not filtered_parts:
|
|
200
|
-
return None
|
|
201
|
-
return message.AssistantMessage(
|
|
202
|
-
parts=filtered_parts,
|
|
203
|
-
response_id=self.response_id,
|
|
204
|
-
stop_reason="aborted",
|
|
205
|
-
)
|
|
251
|
+
return build_partial_message(self.assistant_parts, response_id=self.response_id)
|
|
206
252
|
|
|
207
253
|
|
|
208
254
|
async def parse_google_stream(
|
|
209
|
-
stream: AsyncIterator[
|
|
255
|
+
stream: AsyncIterator[GenerateContentResponse],
|
|
210
256
|
param: llm_param.LLMCallParameter,
|
|
211
257
|
metadata_tracker: MetadataTracker,
|
|
212
258
|
state: GoogleStreamStateManager,
|
|
213
259
|
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
214
|
-
stage: Literal["waiting", "thinking", "assistant", "tool"] = "waiting"
|
|
215
|
-
|
|
216
260
|
# Track tool calls where args arrive as partial updates.
|
|
217
261
|
partial_args_by_call: dict[str, dict[str, Any]] = {}
|
|
218
|
-
started_tool_calls: dict[str, str] = {} # call_id -> name
|
|
262
|
+
started_tool_calls: dict[str, tuple[str, bytes | None]] = {} # call_id -> (name, thought_signature)
|
|
219
263
|
started_tool_items: set[str] = set()
|
|
220
264
|
completed_tool_items: set[str] = set()
|
|
221
265
|
|
|
222
|
-
|
|
266
|
+
# Track image index for unique filenames
|
|
267
|
+
image_index = 0
|
|
268
|
+
|
|
269
|
+
last_usage_metadata: GenerateContentResponseUsageMetadata | None = None
|
|
223
270
|
|
|
224
271
|
async for chunk in stream:
|
|
225
|
-
log_debug(
|
|
226
|
-
chunk.model_dump_json(exclude_none=True),
|
|
227
|
-
style="blue",
|
|
228
|
-
debug_type=DebugType.LLM_STREAM,
|
|
229
|
-
)
|
|
272
|
+
log_debug(debug_json(chunk.model_dump(exclude_none=True)), style="blue", debug_type=DebugType.LLM_STREAM)
|
|
230
273
|
|
|
231
274
|
if state.response_id is None:
|
|
232
|
-
state.response_id =
|
|
275
|
+
state.response_id = chunk.response_id or uuid4().hex
|
|
233
276
|
|
|
234
|
-
if
|
|
277
|
+
if chunk.usage_metadata is not None:
|
|
235
278
|
last_usage_metadata = chunk.usage_metadata
|
|
236
279
|
|
|
237
|
-
candidates =
|
|
280
|
+
candidates = chunk.candidates or []
|
|
238
281
|
candidate0 = candidates[0] if candidates else None
|
|
239
|
-
finish_reason =
|
|
282
|
+
finish_reason = candidate0.finish_reason if candidate0 else None
|
|
240
283
|
if finish_reason is not None:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
reason_value = getattr(finish_reason, "name", None) or str(finish_reason)
|
|
245
|
-
state.stop_reason = _map_finish_reason(reason_value)
|
|
246
|
-
content = getattr(candidate0, "content", None) if candidate0 else None
|
|
247
|
-
content_parts = getattr(content, "parts", None) if content else None
|
|
284
|
+
state.stop_reason = _map_finish_reason(finish_reason.name)
|
|
285
|
+
content = candidate0.content if candidate0 else None
|
|
286
|
+
content_parts = content.parts if content else None
|
|
248
287
|
if not content_parts:
|
|
249
288
|
continue
|
|
250
289
|
|
|
251
290
|
for part in content_parts:
|
|
252
|
-
|
|
291
|
+
# Handle text parts (both thought and regular text)
|
|
292
|
+
if part.text is not None:
|
|
253
293
|
text = part.text
|
|
254
294
|
if not text:
|
|
255
295
|
continue
|
|
256
296
|
metadata_tracker.record_token()
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if
|
|
263
|
-
|
|
297
|
+
|
|
298
|
+
if part.thought is True:
|
|
299
|
+
# Thinking text - append and merge with previous ThinkingTextPart
|
|
300
|
+
state.append_thinking_text(text)
|
|
301
|
+
# Add ThinkingSignaturePart after thinking text if present
|
|
302
|
+
if part.thought_signature:
|
|
303
|
+
encoded_sig = _encode_thought_signature(part.thought_signature)
|
|
304
|
+
if encoded_sig:
|
|
305
|
+
state.append_thinking_signature(encoded_sig)
|
|
264
306
|
yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
|
|
265
307
|
else:
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
308
|
+
# Regular text - append and merge with previous TextPart
|
|
309
|
+
state.append_text(text)
|
|
310
|
+
# Regular text parts can also have thought_signature
|
|
311
|
+
if part.thought_signature:
|
|
312
|
+
encoded_sig = _encode_thought_signature(part.thought_signature)
|
|
313
|
+
if encoded_sig:
|
|
314
|
+
state.append_thinking_signature(encoded_sig)
|
|
270
315
|
yield message.AssistantTextDelta(content=text, response_id=state.response_id)
|
|
271
316
|
|
|
272
|
-
|
|
317
|
+
# Handle inline_data (image generation responses)
|
|
318
|
+
inline_data = part.inline_data
|
|
319
|
+
if inline_data is not None and inline_data.data:
|
|
320
|
+
# Thought images (interim images produced during thinking) do not
|
|
321
|
+
# carry thought signatures and must not be treated as response
|
|
322
|
+
# images for multi-turn history.
|
|
323
|
+
if part.thought is True:
|
|
324
|
+
continue
|
|
325
|
+
mime_type = inline_data.mime_type or "image/png"
|
|
326
|
+
encoded_data = b64encode(inline_data.data).decode("ascii")
|
|
327
|
+
data_url = f"data:{mime_type};base64,{encoded_data}"
|
|
328
|
+
try:
|
|
329
|
+
image_part = save_assistant_image(
|
|
330
|
+
data_url=data_url,
|
|
331
|
+
session_id=param.session_id,
|
|
332
|
+
response_id=state.response_id,
|
|
333
|
+
image_index=image_index,
|
|
334
|
+
)
|
|
335
|
+
image_index += 1
|
|
336
|
+
state.append_image(image_part)
|
|
337
|
+
# Add ThinkingSignaturePart after image if present, or synthetic signature for thinking models
|
|
338
|
+
if part.thought_signature:
|
|
339
|
+
encoded_sig = _encode_thought_signature(part.thought_signature)
|
|
340
|
+
if encoded_sig:
|
|
341
|
+
state.append_thinking_signature(encoded_sig)
|
|
342
|
+
elif support_thinking(param.model_id):
|
|
343
|
+
encoded_sig = _encode_thought_signature(SYNTHETIC_THOUGHT_SIGNATURE)
|
|
344
|
+
if encoded_sig:
|
|
345
|
+
state.append_thinking_signature(encoded_sig)
|
|
346
|
+
yield message.AssistantImageDelta(
|
|
347
|
+
response_id=state.response_id,
|
|
348
|
+
file_path=image_part.file_path,
|
|
349
|
+
)
|
|
350
|
+
except ValueError:
|
|
351
|
+
pass # Skip invalid images
|
|
352
|
+
|
|
353
|
+
# Handle function calls
|
|
354
|
+
function_call = part.function_call
|
|
273
355
|
if function_call is None:
|
|
274
356
|
continue
|
|
275
357
|
|
|
276
358
|
metadata_tracker.record_token()
|
|
277
|
-
call_id =
|
|
278
|
-
name =
|
|
279
|
-
|
|
359
|
+
call_id = function_call.id or uuid4().hex
|
|
360
|
+
name = function_call.name or ""
|
|
361
|
+
|
|
362
|
+
# Capture thought_signature from the part (required for tools in thinking models)
|
|
363
|
+
thought_signature = part.thought_signature
|
|
364
|
+
|
|
365
|
+
# Store name and thought_signature for later use (partial args / flush)
|
|
366
|
+
if call_id not in started_tool_calls or (thought_signature and started_tool_calls[call_id][1] is None):
|
|
367
|
+
started_tool_calls[call_id] = (name, thought_signature)
|
|
280
368
|
|
|
281
369
|
if call_id not in started_tool_items:
|
|
282
370
|
started_tool_items.add(call_id)
|
|
283
371
|
yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
|
|
284
372
|
|
|
285
|
-
args_obj =
|
|
373
|
+
args_obj = function_call.args
|
|
286
374
|
if args_obj is not None:
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
state.assistant_parts.append(
|
|
293
|
-
message.ToolCallPart(
|
|
294
|
-
call_id=call_id,
|
|
295
|
-
tool_name=name,
|
|
296
|
-
arguments_json=json.dumps(args_obj, ensure_ascii=False),
|
|
297
|
-
)
|
|
298
|
-
)
|
|
375
|
+
# Add ToolCallPart, then ThinkingSignaturePart after it
|
|
376
|
+
state.append_tool_call(call_id, name, json.dumps(args_obj, ensure_ascii=False))
|
|
377
|
+
encoded_sig = _encode_thought_signature(thought_signature)
|
|
378
|
+
if encoded_sig:
|
|
379
|
+
state.append_thinking_signature(encoded_sig)
|
|
299
380
|
completed_tool_items.add(call_id)
|
|
300
381
|
continue
|
|
301
382
|
|
|
302
|
-
partial_args =
|
|
383
|
+
partial_args = function_call.partial_args
|
|
303
384
|
if partial_args is not None:
|
|
304
385
|
acc = partial_args_by_call.setdefault(call_id, {})
|
|
305
386
|
_merge_partial_args(acc, partial_args)
|
|
306
387
|
|
|
307
|
-
will_continue =
|
|
388
|
+
will_continue = function_call.will_continue
|
|
308
389
|
if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
message.ToolCallPart(
|
|
316
|
-
call_id=call_id,
|
|
317
|
-
tool_name=name,
|
|
318
|
-
arguments_json=json.dumps(partial_args_by_call[call_id], ensure_ascii=False),
|
|
319
|
-
)
|
|
320
|
-
)
|
|
390
|
+
# Add ToolCallPart, then ThinkingSignaturePart after it
|
|
391
|
+
state.append_tool_call(call_id, name, json.dumps(partial_args_by_call[call_id], ensure_ascii=False))
|
|
392
|
+
stored_sig = started_tool_calls.get(call_id, (name, None))[1]
|
|
393
|
+
encoded_stored_sig = _encode_thought_signature(stored_sig)
|
|
394
|
+
if encoded_stored_sig:
|
|
395
|
+
state.append_thinking_signature(encoded_stored_sig)
|
|
321
396
|
completed_tool_items.add(call_id)
|
|
322
397
|
|
|
323
398
|
# Flush any pending tool calls that never produced args.
|
|
324
|
-
for call_id, name in started_tool_calls.items():
|
|
399
|
+
for call_id, (name, stored_sig) in started_tool_calls.items():
|
|
325
400
|
if call_id in completed_tool_items:
|
|
326
401
|
continue
|
|
327
402
|
args = partial_args_by_call.get(call_id, {})
|
|
328
|
-
state.
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
arguments_json=json.dumps(args, ensure_ascii=False),
|
|
333
|
-
)
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
state.flush_thinking()
|
|
337
|
-
state.flush_text()
|
|
403
|
+
state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
|
|
404
|
+
encoded_stored_sig = _encode_thought_signature(stored_sig)
|
|
405
|
+
if encoded_stored_sig:
|
|
406
|
+
state.append_thinking_signature(encoded_stored_sig)
|
|
338
407
|
|
|
339
408
|
usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
|
|
340
409
|
if usage is not None:
|
|
@@ -355,7 +424,7 @@ class GoogleLLMStream(LLMStreamABC):
|
|
|
355
424
|
|
|
356
425
|
def __init__(
|
|
357
426
|
self,
|
|
358
|
-
stream: AsyncIterator[
|
|
427
|
+
stream: AsyncIterator[GenerateContentResponse],
|
|
359
428
|
*,
|
|
360
429
|
param: llm_param.LLMCallParameter,
|
|
361
430
|
metadata_tracker: MetadataTracker,
|
|
@@ -383,7 +452,14 @@ class GoogleLLMStream(LLMStreamABC):
|
|
|
383
452
|
yield item
|
|
384
453
|
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|
|
385
454
|
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
386
|
-
|
|
455
|
+
# Use accumulated parts for potential prefill on retry
|
|
456
|
+
self._metadata_tracker.set_response_id(self._state.response_id)
|
|
457
|
+
yield message.AssistantMessage(
|
|
458
|
+
parts=self._state.get_partial_parts(),
|
|
459
|
+
response_id=self._state.response_id,
|
|
460
|
+
usage=self._metadata_tracker.finalize(),
|
|
461
|
+
stop_reason="error",
|
|
462
|
+
)
|
|
387
463
|
|
|
388
464
|
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
389
465
|
if self._completed:
|
|
@@ -419,13 +495,12 @@ class GoogleClient(LLMClientABC):
|
|
|
419
495
|
config = _build_config(param)
|
|
420
496
|
|
|
421
497
|
log_debug(
|
|
422
|
-
|
|
498
|
+
debug_json(
|
|
423
499
|
{
|
|
424
500
|
"model": str(param.model_id),
|
|
425
501
|
"contents": [c.model_dump(exclude_none=True) for c in contents],
|
|
426
502
|
"config": config.model_dump(exclude_none=True),
|
|
427
|
-
}
|
|
428
|
-
ensure_ascii=False,
|
|
503
|
+
}
|
|
429
504
|
),
|
|
430
505
|
style="yellow",
|
|
431
506
|
debug_type=DebugType.LLM_PAYLOAD,
|
|
@@ -434,7 +509,7 @@ class GoogleClient(LLMClientABC):
|
|
|
434
509
|
try:
|
|
435
510
|
stream = await self.client.aio.models.generate_content_stream(
|
|
436
511
|
model=str(param.model_id),
|
|
437
|
-
contents=cast(
|
|
512
|
+
contents=cast(ContentListUnion, contents),
|
|
438
513
|
config=config,
|
|
439
514
|
)
|
|
440
515
|
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|