klaude-code 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. klaude_code/.DS_Store +0 -0
  2. klaude_code/cli/auth_cmd.py +2 -13
  3. klaude_code/cli/cost_cmd.py +10 -10
  4. klaude_code/cli/list_model.py +8 -0
  5. klaude_code/cli/main.py +41 -8
  6. klaude_code/cli/session_cmd.py +2 -11
  7. klaude_code/config/assets/builtin_config.yaml +45 -26
  8. klaude_code/config/config.py +30 -7
  9. klaude_code/config/model_matcher.py +3 -3
  10. klaude_code/config/sub_agent_model_helper.py +1 -1
  11. klaude_code/const.py +2 -1
  12. klaude_code/core/agent_profile.py +1 -0
  13. klaude_code/core/executor.py +4 -0
  14. klaude_code/core/loaded_skills.py +36 -0
  15. klaude_code/core/tool/context.py +1 -3
  16. klaude_code/core/tool/file/edit_tool.py +1 -1
  17. klaude_code/core/tool/file/read_tool.py +2 -2
  18. klaude_code/core/tool/file/write_tool.py +1 -1
  19. klaude_code/core/turn.py +19 -7
  20. klaude_code/llm/anthropic/client.py +97 -60
  21. klaude_code/llm/anthropic/input.py +20 -9
  22. klaude_code/llm/google/client.py +223 -148
  23. klaude_code/llm/google/input.py +44 -36
  24. klaude_code/llm/openai_compatible/stream.py +109 -99
  25. klaude_code/llm/openrouter/reasoning.py +4 -29
  26. klaude_code/llm/partial_message.py +2 -32
  27. klaude_code/llm/responses/client.py +99 -81
  28. klaude_code/llm/responses/input.py +11 -25
  29. klaude_code/llm/stream_parts.py +94 -0
  30. klaude_code/log.py +57 -0
  31. klaude_code/protocol/events/system.py +3 -0
  32. klaude_code/protocol/llm_param.py +1 -0
  33. klaude_code/session/export.py +259 -91
  34. klaude_code/session/templates/export_session.html +141 -59
  35. klaude_code/skill/.DS_Store +0 -0
  36. klaude_code/skill/assets/.DS_Store +0 -0
  37. klaude_code/skill/loader.py +1 -0
  38. klaude_code/tui/command/fork_session_cmd.py +14 -23
  39. klaude_code/tui/command/model_picker.py +2 -17
  40. klaude_code/tui/command/refresh_cmd.py +2 -0
  41. klaude_code/tui/command/resume_cmd.py +2 -18
  42. klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
  43. klaude_code/tui/command/thinking_cmd.py +2 -14
  44. klaude_code/tui/components/common.py +1 -1
  45. klaude_code/tui/components/metadata.py +22 -21
  46. klaude_code/tui/components/rich/markdown.py +8 -0
  47. klaude_code/tui/components/rich/quote.py +36 -8
  48. klaude_code/tui/components/rich/theme.py +2 -0
  49. klaude_code/tui/components/welcome.py +32 -0
  50. klaude_code/tui/input/prompt_toolkit.py +3 -1
  51. klaude_code/tui/machine.py +19 -1
  52. klaude_code/tui/renderer.py +3 -4
  53. klaude_code/tui/terminal/selector.py +174 -31
  54. {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/METADATA +1 -1
  55. {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/RECORD +57 -53
  56. klaude_code/skill/assets/jj-workspace/SKILL.md +0 -20
  57. {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/WHEEL +0 -0
  58. {klaude_code-2.5.1.dist-info → klaude_code-2.5.3.dist-info}/entry_points.txt +0 -0
@@ -4,32 +4,69 @@
4
4
  # pyright: reportAttributeAccessIssue=false
5
5
 
6
6
  import json
7
+ from base64 import b64encode
7
8
  from collections.abc import AsyncGenerator, AsyncIterator
8
- from typing import Any, Literal, cast, override
9
+ from typing import Any, cast, override
9
10
  from uuid import uuid4
10
11
 
11
12
  import httpx
12
13
  from google.genai import Client
13
14
  from google.genai.errors import APIError, ClientError, ServerError
14
15
  from google.genai.types import (
16
+ ContentListUnion,
15
17
  FunctionCallingConfig,
16
18
  FunctionCallingConfigMode,
17
19
  GenerateContentConfig,
20
+ GenerateContentResponse,
21
+ GenerateContentResponseUsageMetadata,
18
22
  HttpOptions,
23
+ PartialArg,
19
24
  ThinkingConfig,
25
+ ThinkingLevel,
20
26
  ToolConfig,
21
- UsageMetadata,
22
27
  )
23
28
 
24
29
  from klaude_code.llm.client import LLMClientABC, LLMStreamABC
25
30
  from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
31
+ from klaude_code.llm.image import save_assistant_image
26
32
  from klaude_code.llm.input_common import apply_config_defaults
27
- from klaude_code.llm.partial_message import degrade_thinking_to_text
28
33
  from klaude_code.llm.registry import register
34
+ from klaude_code.llm.stream_parts import (
35
+ append_text_part,
36
+ append_thinking_text_part,
37
+ build_partial_message,
38
+ build_partial_parts,
39
+ )
29
40
  from klaude_code.llm.usage import MetadataTracker, error_llm_stream
30
- from klaude_code.log import DebugType, log_debug
41
+ from klaude_code.log import DebugType, debug_json, log_debug
31
42
  from klaude_code.protocol import llm_param, message, model
32
43
 
44
+ # Unified format for Google thought signatures
45
+ GOOGLE_THOUGHT_SIGNATURE_FORMAT = "google"
46
+
47
+ # Synthetic signature for image parts that need one but don't have it.
48
+ # See: https://ai.google.dev/gemini-api/docs/thought-signatures
49
+ SYNTHETIC_THOUGHT_SIGNATURE = b"skip_thought_signature_validator"
50
+
51
+
52
+ def support_thinking(model_id: str | None) -> bool:
53
+ return bool(model_id) and ("gemini-3" in model_id or "gemini-2.5-pro" in model_id)
54
+
55
+
56
+ def convert_gemini_thinking_level(reasoning_effort: str | None) -> ThinkingLevel | None:
57
+ """Convert reasoning_effort to Gemini ThinkingLevel."""
58
+ if reasoning_effort is None:
59
+ return None
60
+ mapping: dict[str, ThinkingLevel] = {
61
+ "xhigh": ThinkingLevel.HIGH,
62
+ "high": ThinkingLevel.HIGH,
63
+ "medium": ThinkingLevel.MEDIUM,
64
+ "low": ThinkingLevel.LOW,
65
+ "minimal": ThinkingLevel.MINIMAL,
66
+ "none": ThinkingLevel.MINIMAL,
67
+ }
68
+ return mapping.get(reasoning_effort)
69
+
33
70
 
34
71
  def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
35
72
  tool_list = convert_tool_schema(param.tools)
@@ -39,18 +76,21 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
39
76
  tool_config = ToolConfig(
40
77
  function_calling_config=FunctionCallingConfig(
41
78
  mode=FunctionCallingConfigMode.AUTO,
42
- # Gemini streams tool args; keep this enabled to maximize fidelity.
43
- stream_function_call_arguments=True,
44
79
  )
45
80
  )
46
81
 
47
82
  thinking_config: ThinkingConfig | None = None
48
- if param.thinking and param.thinking.type == "enabled":
49
- thinking_config = ThinkingConfig(
83
+ if support_thinking(param.model_id):
84
+ thinking_config: ThinkingConfig | None = ThinkingConfig(
50
85
  include_thoughts=True,
51
- thinking_budget=param.thinking.budget_tokens,
52
86
  )
53
87
 
88
+ if param.thinking:
89
+ if param.thinking.budget_tokens:
90
+ thinking_config.thinking_budget = param.thinking.budget_tokens
91
+ if param.thinking.reasoning_effort:
92
+ thinking_config.thinking_level = convert_gemini_thinking_level(param.thinking.reasoning_effort)
93
+
54
94
  return GenerateContentConfig(
55
95
  system_instruction=param.system,
56
96
  temperature=param.temperature,
@@ -62,7 +102,7 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
62
102
 
63
103
 
64
104
  def _usage_from_metadata(
65
- usage: UsageMetadata | None,
105
+ usage: GenerateContentResponseUsageMetadata | None,
66
106
  *,
67
107
  context_limit: int | None,
68
108
  max_tokens: int | None,
@@ -72,9 +112,16 @@ def _usage_from_metadata(
72
112
 
73
113
  cached = usage.cached_content_token_count or 0
74
114
  prompt = usage.prompt_token_count or 0
75
- response = usage.response_token_count or 0
115
+ response = usage.candidates_token_count or 0
76
116
  thoughts = usage.thoughts_token_count or 0
77
117
 
118
+ # Extract image tokens from candidates_tokens_details
119
+ image_tokens = 0
120
+ if usage.candidates_tokens_details:
121
+ for detail in usage.candidates_tokens_details:
122
+ if detail.modality and detail.modality.name == "IMAGE" and detail.token_count:
123
+ image_tokens += detail.token_count
124
+
78
125
  total = usage.total_token_count
79
126
  if total is None:
80
127
  total = prompt + cached + response + thoughts
@@ -84,30 +131,29 @@ def _usage_from_metadata(
84
131
  cached_tokens=cached,
85
132
  output_tokens=response + thoughts,
86
133
  reasoning_tokens=thoughts,
134
+ image_tokens=image_tokens,
87
135
  context_size=total,
88
136
  context_limit=context_limit,
89
137
  max_tokens=max_tokens,
90
138
  )
91
139
 
92
140
 
93
- def _partial_arg_value(partial: Any) -> Any:
94
- if getattr(partial, "string_value", None) is not None:
141
+ def _partial_arg_value(partial: PartialArg) -> str | float | bool | None:
142
+ if partial.string_value is not None:
95
143
  return partial.string_value
96
- if getattr(partial, "number_value", None) is not None:
144
+ if partial.number_value is not None:
97
145
  return partial.number_value
98
- if getattr(partial, "bool_value", None) is not None:
146
+ if partial.bool_value is not None:
99
147
  return partial.bool_value
100
- if getattr(partial, "null_value", None) is not None:
101
- return None
102
148
  return None
103
149
 
104
150
 
105
- def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) -> None:
151
+ def _merge_partial_args(dst: dict[str, Any], partial_args: list[PartialArg] | None) -> None:
106
152
  if not partial_args:
107
153
  return
108
154
  for partial in partial_args:
109
- json_path = getattr(partial, "json_path", None)
110
- if not isinstance(json_path, str) or not json_path.startswith("$."):
155
+ json_path = partial.json_path
156
+ if not json_path or not json_path.startswith("$."):
111
157
  continue
112
158
  key = json_path[2:]
113
159
  if not key or any(ch in key for ch in "[]"):
@@ -115,6 +161,15 @@ def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) ->
115
161
  dst[key] = _partial_arg_value(partial)
116
162
 
117
163
 
164
+ def _encode_thought_signature(sig: bytes | str | None) -> str | None:
165
+ """Encode thought signature bytes to base64 string."""
166
+ if sig is None:
167
+ return None
168
+ if isinstance(sig, bytes):
169
+ return b64encode(sig).decode("ascii")
170
+ return sig
171
+
172
+
118
173
  def _map_finish_reason(reason: str) -> model.StopReason | None:
119
174
  normalized = reason.strip().lower()
120
175
  mapping: dict[str, model.StopReason] = {
@@ -139,202 +194,216 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
139
194
  class GoogleStreamStateManager:
140
195
  """Manages streaming state for Google LLM responses.
141
196
 
142
- Accumulates thinking content, assistant text, and tool calls during streaming
143
- to support get_partial_message() for cancellation scenarios.
197
+ Accumulates parts directly during streaming to support get_partial_message()
198
+ for cancellation scenarios. Merges consecutive text parts of the same type.
144
199
  """
145
200
 
146
201
  def __init__(self, param_model: str) -> None:
147
202
  self.param_model = param_model
148
- self.accumulated_thoughts: list[str] = []
149
- self.accumulated_text: list[str] = []
150
- self.thought_signature: str | None = None
151
203
  self.assistant_parts: list[message.Part] = []
152
204
  self.response_id: str | None = None
153
205
  self.stop_reason: model.StopReason | None = None
154
206
 
155
- def flush_thinking(self) -> None:
156
- """Flush accumulated thinking content into assistant_parts."""
157
- if self.accumulated_thoughts:
158
- self.assistant_parts.append(
159
- message.ThinkingTextPart(
160
- text="".join(self.accumulated_thoughts),
161
- model_id=self.param_model,
162
- )
207
+ def append_thinking_text(self, text: str) -> None:
208
+ """Append thinking text, merging with previous ThinkingTextPart if possible."""
209
+ append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
210
+
211
+ def append_text(self, text: str) -> None:
212
+ """Append text, merging with previous TextPart if possible."""
213
+ append_text_part(self.assistant_parts, text)
214
+
215
+ def append_thinking_signature(self, signature: str) -> None:
216
+ """Append a ThinkingSignaturePart after the current part."""
217
+ self.assistant_parts.append(
218
+ message.ThinkingSignaturePart(
219
+ signature=signature,
220
+ model_id=self.param_model,
221
+ format=GOOGLE_THOUGHT_SIGNATURE_FORMAT,
163
222
  )
164
- self.accumulated_thoughts.clear()
165
- if self.thought_signature:
166
- self.assistant_parts.append(
167
- message.ThinkingSignaturePart(
168
- signature=self.thought_signature,
169
- model_id=self.param_model,
170
- format="google_thought_signature",
171
- )
223
+ )
224
+
225
+ def append_image(self, image_part: message.ImageFilePart) -> None:
226
+ """Append an ImageFilePart."""
227
+ self.assistant_parts.append(image_part)
228
+
229
+ def append_tool_call(self, call_id: str, name: str, arguments_json: str) -> None:
230
+ """Append a ToolCallPart."""
231
+ self.assistant_parts.append(
232
+ message.ToolCallPart(
233
+ call_id=call_id,
234
+ tool_name=name,
235
+ arguments_json=arguments_json,
172
236
  )
173
- self.thought_signature = None
237
+ )
174
238
 
175
- def flush_text(self) -> None:
176
- """Flush accumulated text content into assistant_parts."""
177
- if not self.accumulated_text:
178
- return
179
- self.assistant_parts.append(message.TextPart(text="".join(self.accumulated_text)))
180
- self.accumulated_text.clear()
239
+ def get_partial_parts(self) -> list[message.Part]:
240
+ """Get accumulated parts excluding tool calls, with thinking degraded.
241
+
242
+ Filters out ToolCallPart and applies degrade_thinking_to_text.
243
+ """
244
+ return build_partial_parts(self.assistant_parts)
181
245
 
182
246
  def get_partial_message(self) -> message.AssistantMessage | None:
183
247
  """Build a partial AssistantMessage from accumulated state.
184
248
 
185
- Flushes all accumulated content and returns the message.
186
249
  Returns None if no content has been accumulated yet.
187
250
  """
188
- self.flush_thinking()
189
- self.flush_text()
190
-
191
- filtered_parts: list[message.Part] = []
192
- for part in self.assistant_parts:
193
- if isinstance(part, message.ToolCallPart):
194
- continue
195
- filtered_parts.append(part)
196
-
197
- filtered_parts = degrade_thinking_to_text(filtered_parts)
198
-
199
- if not filtered_parts:
200
- return None
201
- return message.AssistantMessage(
202
- parts=filtered_parts,
203
- response_id=self.response_id,
204
- stop_reason="aborted",
205
- )
251
+ return build_partial_message(self.assistant_parts, response_id=self.response_id)
206
252
 
207
253
 
208
254
  async def parse_google_stream(
209
- stream: AsyncIterator[Any],
255
+ stream: AsyncIterator[GenerateContentResponse],
210
256
  param: llm_param.LLMCallParameter,
211
257
  metadata_tracker: MetadataTracker,
212
258
  state: GoogleStreamStateManager,
213
259
  ) -> AsyncGenerator[message.LLMStreamItem]:
214
- stage: Literal["waiting", "thinking", "assistant", "tool"] = "waiting"
215
-
216
260
  # Track tool calls where args arrive as partial updates.
217
261
  partial_args_by_call: dict[str, dict[str, Any]] = {}
218
- started_tool_calls: dict[str, str] = {} # call_id -> name
262
+ started_tool_calls: dict[str, tuple[str, bytes | None]] = {} # call_id -> (name, thought_signature)
219
263
  started_tool_items: set[str] = set()
220
264
  completed_tool_items: set[str] = set()
221
265
 
222
- last_usage_metadata: UsageMetadata | None = None
266
+ # Track image index for unique filenames
267
+ image_index = 0
268
+
269
+ last_usage_metadata: GenerateContentResponseUsageMetadata | None = None
223
270
 
224
271
  async for chunk in stream:
225
- log_debug(
226
- chunk.model_dump_json(exclude_none=True),
227
- style="blue",
228
- debug_type=DebugType.LLM_STREAM,
229
- )
272
+ log_debug(debug_json(chunk.model_dump(exclude_none=True)), style="blue", debug_type=DebugType.LLM_STREAM)
230
273
 
231
274
  if state.response_id is None:
232
- state.response_id = getattr(chunk, "response_id", None) or uuid4().hex
275
+ state.response_id = chunk.response_id or uuid4().hex
233
276
 
234
- if getattr(chunk, "usage_metadata", None) is not None:
277
+ if chunk.usage_metadata is not None:
235
278
  last_usage_metadata = chunk.usage_metadata
236
279
 
237
- candidates = getattr(chunk, "candidates", None) or []
280
+ candidates = chunk.candidates or []
238
281
  candidate0 = candidates[0] if candidates else None
239
- finish_reason = getattr(candidate0, "finish_reason", None) if candidate0 else None
282
+ finish_reason = candidate0.finish_reason if candidate0 else None
240
283
  if finish_reason is not None:
241
- if isinstance(finish_reason, str):
242
- reason_value = finish_reason
243
- else:
244
- reason_value = getattr(finish_reason, "name", None) or str(finish_reason)
245
- state.stop_reason = _map_finish_reason(reason_value)
246
- content = getattr(candidate0, "content", None) if candidate0 else None
247
- content_parts = getattr(content, "parts", None) if content else None
284
+ state.stop_reason = _map_finish_reason(finish_reason.name)
285
+ content = candidate0.content if candidate0 else None
286
+ content_parts = content.parts if content else None
248
287
  if not content_parts:
249
288
  continue
250
289
 
251
290
  for part in content_parts:
252
- if getattr(part, "text", None) is not None:
291
+ # Handle text parts (both thought and regular text)
292
+ if part.text is not None:
253
293
  text = part.text
254
294
  if not text:
255
295
  continue
256
296
  metadata_tracker.record_token()
257
- if getattr(part, "thought", False) is True:
258
- if stage == "assistant":
259
- state.flush_text()
260
- stage = "thinking"
261
- state.accumulated_thoughts.append(text)
262
- if getattr(part, "thought_signature", None):
263
- state.thought_signature = part.thought_signature
297
+
298
+ if part.thought is True:
299
+ # Thinking text - append and merge with previous ThinkingTextPart
300
+ state.append_thinking_text(text)
301
+ # Add ThinkingSignaturePart after thinking text if present
302
+ if part.thought_signature:
303
+ encoded_sig = _encode_thought_signature(part.thought_signature)
304
+ if encoded_sig:
305
+ state.append_thinking_signature(encoded_sig)
264
306
  yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
265
307
  else:
266
- if stage == "thinking":
267
- state.flush_thinking()
268
- stage = "assistant"
269
- state.accumulated_text.append(text)
308
+ # Regular text - append and merge with previous TextPart
309
+ state.append_text(text)
310
+ # Regular text parts can also have thought_signature
311
+ if part.thought_signature:
312
+ encoded_sig = _encode_thought_signature(part.thought_signature)
313
+ if encoded_sig:
314
+ state.append_thinking_signature(encoded_sig)
270
315
  yield message.AssistantTextDelta(content=text, response_id=state.response_id)
271
316
 
272
- function_call = getattr(part, "function_call", None)
317
+ # Handle inline_data (image generation responses)
318
+ inline_data = part.inline_data
319
+ if inline_data is not None and inline_data.data:
320
+ # Thought images (interim images produced during thinking) do not
321
+ # carry thought signatures and must not be treated as response
322
+ # images for multi-turn history.
323
+ if part.thought is True:
324
+ continue
325
+ mime_type = inline_data.mime_type or "image/png"
326
+ encoded_data = b64encode(inline_data.data).decode("ascii")
327
+ data_url = f"data:{mime_type};base64,{encoded_data}"
328
+ try:
329
+ image_part = save_assistant_image(
330
+ data_url=data_url,
331
+ session_id=param.session_id,
332
+ response_id=state.response_id,
333
+ image_index=image_index,
334
+ )
335
+ image_index += 1
336
+ state.append_image(image_part)
337
+ # Add ThinkingSignaturePart after image if present, or synthetic signature for thinking models
338
+ if part.thought_signature:
339
+ encoded_sig = _encode_thought_signature(part.thought_signature)
340
+ if encoded_sig:
341
+ state.append_thinking_signature(encoded_sig)
342
+ elif support_thinking(param.model_id):
343
+ encoded_sig = _encode_thought_signature(SYNTHETIC_THOUGHT_SIGNATURE)
344
+ if encoded_sig:
345
+ state.append_thinking_signature(encoded_sig)
346
+ yield message.AssistantImageDelta(
347
+ response_id=state.response_id,
348
+ file_path=image_part.file_path,
349
+ )
350
+ except ValueError:
351
+ pass # Skip invalid images
352
+
353
+ # Handle function calls
354
+ function_call = part.function_call
273
355
  if function_call is None:
274
356
  continue
275
357
 
276
358
  metadata_tracker.record_token()
277
- call_id = getattr(function_call, "id", None) or uuid4().hex
278
- name = getattr(function_call, "name", None) or ""
279
- started_tool_calls.setdefault(call_id, name)
359
+ call_id = function_call.id or uuid4().hex
360
+ name = function_call.name or ""
361
+
362
+ # Capture thought_signature from the part (required for tools in thinking models)
363
+ thought_signature = part.thought_signature
364
+
365
+ # Store name and thought_signature for later use (partial args / flush)
366
+ if call_id not in started_tool_calls or (thought_signature and started_tool_calls[call_id][1] is None):
367
+ started_tool_calls[call_id] = (name, thought_signature)
280
368
 
281
369
  if call_id not in started_tool_items:
282
370
  started_tool_items.add(call_id)
283
371
  yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
284
372
 
285
- args_obj = getattr(function_call, "args", None)
373
+ args_obj = function_call.args
286
374
  if args_obj is not None:
287
- if stage == "thinking":
288
- state.flush_thinking()
289
- if stage == "assistant":
290
- state.flush_text()
291
- stage = "tool"
292
- state.assistant_parts.append(
293
- message.ToolCallPart(
294
- call_id=call_id,
295
- tool_name=name,
296
- arguments_json=json.dumps(args_obj, ensure_ascii=False),
297
- )
298
- )
375
+ # Add ToolCallPart, then ThinkingSignaturePart after it
376
+ state.append_tool_call(call_id, name, json.dumps(args_obj, ensure_ascii=False))
377
+ encoded_sig = _encode_thought_signature(thought_signature)
378
+ if encoded_sig:
379
+ state.append_thinking_signature(encoded_sig)
299
380
  completed_tool_items.add(call_id)
300
381
  continue
301
382
 
302
- partial_args = getattr(function_call, "partial_args", None)
383
+ partial_args = function_call.partial_args
303
384
  if partial_args is not None:
304
385
  acc = partial_args_by_call.setdefault(call_id, {})
305
386
  _merge_partial_args(acc, partial_args)
306
387
 
307
- will_continue = getattr(function_call, "will_continue", None)
388
+ will_continue = function_call.will_continue
308
389
  if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
309
- if stage == "thinking":
310
- state.flush_thinking()
311
- if stage == "assistant":
312
- state.flush_text()
313
- stage = "tool"
314
- state.assistant_parts.append(
315
- message.ToolCallPart(
316
- call_id=call_id,
317
- tool_name=name,
318
- arguments_json=json.dumps(partial_args_by_call[call_id], ensure_ascii=False),
319
- )
320
- )
390
+ # Add ToolCallPart, then ThinkingSignaturePart after it
391
+ state.append_tool_call(call_id, name, json.dumps(partial_args_by_call[call_id], ensure_ascii=False))
392
+ stored_sig = started_tool_calls.get(call_id, (name, None))[1]
393
+ encoded_stored_sig = _encode_thought_signature(stored_sig)
394
+ if encoded_stored_sig:
395
+ state.append_thinking_signature(encoded_stored_sig)
321
396
  completed_tool_items.add(call_id)
322
397
 
323
398
  # Flush any pending tool calls that never produced args.
324
- for call_id, name in started_tool_calls.items():
399
+ for call_id, (name, stored_sig) in started_tool_calls.items():
325
400
  if call_id in completed_tool_items:
326
401
  continue
327
402
  args = partial_args_by_call.get(call_id, {})
328
- state.assistant_parts.append(
329
- message.ToolCallPart(
330
- call_id=call_id,
331
- tool_name=name,
332
- arguments_json=json.dumps(args, ensure_ascii=False),
333
- )
334
- )
335
-
336
- state.flush_thinking()
337
- state.flush_text()
403
+ state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
404
+ encoded_stored_sig = _encode_thought_signature(stored_sig)
405
+ if encoded_stored_sig:
406
+ state.append_thinking_signature(encoded_stored_sig)
338
407
 
339
408
  usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
340
409
  if usage is not None:
@@ -355,7 +424,7 @@ class GoogleLLMStream(LLMStreamABC):
355
424
 
356
425
  def __init__(
357
426
  self,
358
- stream: AsyncIterator[Any],
427
+ stream: AsyncIterator[GenerateContentResponse],
359
428
  *,
360
429
  param: llm_param.LLMCallParameter,
361
430
  metadata_tracker: MetadataTracker,
@@ -383,7 +452,14 @@ class GoogleLLMStream(LLMStreamABC):
383
452
  yield item
384
453
  except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
385
454
  yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
386
- yield message.AssistantMessage(parts=[], response_id=None, usage=self._metadata_tracker.finalize())
455
+ # Use accumulated parts for potential prefill on retry
456
+ self._metadata_tracker.set_response_id(self._state.response_id)
457
+ yield message.AssistantMessage(
458
+ parts=self._state.get_partial_parts(),
459
+ response_id=self._state.response_id,
460
+ usage=self._metadata_tracker.finalize(),
461
+ stop_reason="error",
462
+ )
387
463
 
388
464
  def get_partial_message(self) -> message.AssistantMessage | None:
389
465
  if self._completed:
@@ -419,13 +495,12 @@ class GoogleClient(LLMClientABC):
419
495
  config = _build_config(param)
420
496
 
421
497
  log_debug(
422
- json.dumps(
498
+ debug_json(
423
499
  {
424
500
  "model": str(param.model_id),
425
501
  "contents": [c.model_dump(exclude_none=True) for c in contents],
426
502
  "config": config.model_dump(exclude_none=True),
427
- },
428
- ensure_ascii=False,
503
+ }
429
504
  ),
430
505
  style="yellow",
431
506
  debug_type=DebugType.LLM_PAYLOAD,
@@ -434,7 +509,7 @@ class GoogleClient(LLMClientABC):
434
509
  try:
435
510
  stream = await self.client.aio.models.generate_content_stream(
436
511
  model=str(param.model_id),
437
- contents=cast(Any, contents),
512
+ contents=cast(ContentListUnion, contents),
438
513
  config=config,
439
514
  )
440
515
  except (APIError, ClientError, ServerError, httpx.HTTPError) as e: