klaude-code 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. klaude_code/auth/__init__.py +10 -0
  2. klaude_code/auth/env.py +77 -0
  3. klaude_code/cli/auth_cmd.py +89 -21
  4. klaude_code/cli/config_cmd.py +5 -5
  5. klaude_code/cli/cost_cmd.py +167 -68
  6. klaude_code/cli/main.py +51 -27
  7. klaude_code/cli/self_update.py +7 -7
  8. klaude_code/config/assets/builtin_config.yaml +45 -24
  9. klaude_code/config/builtin_config.py +23 -9
  10. klaude_code/config/config.py +19 -9
  11. klaude_code/config/model_matcher.py +1 -1
  12. klaude_code/const.py +2 -1
  13. klaude_code/core/tool/file/edit_tool.py +1 -1
  14. klaude_code/core/tool/file/read_tool.py +2 -2
  15. klaude_code/core/tool/file/write_tool.py +1 -1
  16. klaude_code/core/turn.py +21 -4
  17. klaude_code/llm/anthropic/client.py +75 -50
  18. klaude_code/llm/anthropic/input.py +20 -9
  19. klaude_code/llm/google/client.py +235 -148
  20. klaude_code/llm/google/input.py +44 -36
  21. klaude_code/llm/openai_compatible/stream.py +114 -100
  22. klaude_code/llm/openrouter/client.py +1 -0
  23. klaude_code/llm/openrouter/reasoning.py +4 -29
  24. klaude_code/llm/partial_message.py +2 -32
  25. klaude_code/llm/responses/client.py +99 -81
  26. klaude_code/llm/responses/input.py +11 -25
  27. klaude_code/llm/stream_parts.py +94 -0
  28. klaude_code/log.py +57 -0
  29. klaude_code/protocol/events.py +214 -0
  30. klaude_code/protocol/sub_agent/image_gen.py +0 -4
  31. klaude_code/session/session.py +51 -18
  32. klaude_code/tui/command/fork_session_cmd.py +14 -23
  33. klaude_code/tui/command/model_picker.py +2 -17
  34. klaude_code/tui/command/resume_cmd.py +2 -18
  35. klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
  36. klaude_code/tui/command/thinking_cmd.py +2 -14
  37. klaude_code/tui/commands.py +0 -5
  38. klaude_code/tui/components/common.py +1 -1
  39. klaude_code/tui/components/metadata.py +21 -21
  40. klaude_code/tui/components/rich/quote.py +36 -8
  41. klaude_code/tui/components/rich/theme.py +2 -0
  42. klaude_code/tui/components/sub_agent.py +6 -0
  43. klaude_code/tui/display.py +11 -1
  44. klaude_code/tui/input/completers.py +11 -7
  45. klaude_code/tui/input/prompt_toolkit.py +3 -1
  46. klaude_code/tui/machine.py +108 -56
  47. klaude_code/tui/renderer.py +4 -65
  48. klaude_code/tui/terminal/selector.py +174 -31
  49. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/METADATA +23 -31
  50. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/RECORD +52 -58
  51. klaude_code/cli/session_cmd.py +0 -96
  52. klaude_code/protocol/events/__init__.py +0 -63
  53. klaude_code/protocol/events/base.py +0 -18
  54. klaude_code/protocol/events/chat.py +0 -30
  55. klaude_code/protocol/events/lifecycle.py +0 -23
  56. klaude_code/protocol/events/metadata.py +0 -16
  57. klaude_code/protocol/events/streaming.py +0 -43
  58. klaude_code/protocol/events/system.py +0 -56
  59. klaude_code/protocol/events/tools.py +0 -27
  60. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/WHEEL +0 -0
  61. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/entry_points.txt +0 -0
@@ -4,32 +4,72 @@
4
4
  # pyright: reportAttributeAccessIssue=false
5
5
 
6
6
  import json
7
+ from base64 import b64encode
7
8
  from collections.abc import AsyncGenerator, AsyncIterator
8
- from typing import Any, Literal, cast, override
9
+ from typing import Any, cast, override
9
10
  from uuid import uuid4
10
11
 
11
12
  import httpx
12
13
  from google.genai import Client
13
14
  from google.genai.errors import APIError, ClientError, ServerError
14
15
  from google.genai.types import (
16
+ ContentListUnion,
15
17
  FunctionCallingConfig,
16
18
  FunctionCallingConfigMode,
17
19
  GenerateContentConfig,
20
+ GenerateContentResponse,
21
+ GenerateContentResponseUsageMetadata,
18
22
  HttpOptions,
23
+ PartialArg,
19
24
  ThinkingConfig,
25
+ ThinkingLevel,
20
26
  ToolConfig,
21
- UsageMetadata,
27
+ )
28
+ from google.genai.types import (
29
+ ImageConfig as GoogleImageConfig,
22
30
  )
23
31
 
24
32
  from klaude_code.llm.client import LLMClientABC, LLMStreamABC
25
33
  from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
34
+ from klaude_code.llm.image import save_assistant_image
26
35
  from klaude_code.llm.input_common import apply_config_defaults
27
- from klaude_code.llm.partial_message import degrade_thinking_to_text
28
36
  from klaude_code.llm.registry import register
37
+ from klaude_code.llm.stream_parts import (
38
+ append_text_part,
39
+ append_thinking_text_part,
40
+ build_partial_message,
41
+ build_partial_parts,
42
+ )
29
43
  from klaude_code.llm.usage import MetadataTracker, error_llm_stream
30
- from klaude_code.log import DebugType, log_debug
44
+ from klaude_code.log import DebugType, debug_json, log_debug
31
45
  from klaude_code.protocol import llm_param, message, model
32
46
 
47
+ # Unified format for Google thought signatures
48
+ GOOGLE_THOUGHT_SIGNATURE_FORMAT = "google"
49
+
50
+ # Synthetic signature for image parts that need one but don't have it.
51
+ # See: https://ai.google.dev/gemini-api/docs/thought-signatures
52
+ SYNTHETIC_THOUGHT_SIGNATURE = b"skip_thought_signature_validator"
53
+
54
+
55
+ def support_thinking(model_id: str | None) -> bool:
56
+ return bool(model_id) and ("gemini-3" in model_id or "gemini-2.5-pro" in model_id)
57
+
58
+
59
+ def convert_gemini_thinking_level(reasoning_effort: str | None) -> ThinkingLevel | None:
60
+ """Convert reasoning_effort to Gemini ThinkingLevel."""
61
+ if reasoning_effort is None:
62
+ return None
63
+ mapping: dict[str, ThinkingLevel] = {
64
+ "xhigh": ThinkingLevel.HIGH,
65
+ "high": ThinkingLevel.HIGH,
66
+ "medium": ThinkingLevel.MEDIUM,
67
+ "low": ThinkingLevel.LOW,
68
+ "minimal": ThinkingLevel.MINIMAL,
69
+ "none": ThinkingLevel.MINIMAL,
70
+ }
71
+ return mapping.get(reasoning_effort)
72
+
33
73
 
34
74
  def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
35
75
  tool_list = convert_tool_schema(param.tools)
@@ -39,16 +79,27 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
39
79
  tool_config = ToolConfig(
40
80
  function_calling_config=FunctionCallingConfig(
41
81
  mode=FunctionCallingConfigMode.AUTO,
42
- # Gemini streams tool args; keep this enabled to maximize fidelity.
43
- stream_function_call_arguments=True,
44
82
  )
45
83
  )
46
84
 
47
85
  thinking_config: ThinkingConfig | None = None
48
- if param.thinking and param.thinking.type == "enabled":
49
- thinking_config = ThinkingConfig(
86
+ if support_thinking(param.model_id):
87
+ thinking_config: ThinkingConfig | None = ThinkingConfig(
50
88
  include_thoughts=True,
51
- thinking_budget=param.thinking.budget_tokens,
89
+ )
90
+
91
+ if param.thinking:
92
+ if param.thinking.budget_tokens:
93
+ thinking_config.thinking_budget = param.thinking.budget_tokens
94
+ if param.thinking.reasoning_effort:
95
+ thinking_config.thinking_level = convert_gemini_thinking_level(param.thinking.reasoning_effort)
96
+
97
+ # ImageGen per-call overrides
98
+ image_config: GoogleImageConfig | None = None
99
+ if param.image_config is not None:
100
+ image_config = GoogleImageConfig(
101
+ aspect_ratio=param.image_config.aspect_ratio,
102
+ image_size=param.image_config.image_size,
52
103
  )
53
104
 
54
105
  return GenerateContentConfig(
@@ -58,11 +109,12 @@ def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
58
109
  tools=cast(Any, tool_list) if tool_list else None,
59
110
  tool_config=tool_config,
60
111
  thinking_config=thinking_config,
112
+ image_config=image_config,
61
113
  )
62
114
 
63
115
 
64
116
  def _usage_from_metadata(
65
- usage: UsageMetadata | None,
117
+ usage: GenerateContentResponseUsageMetadata | None,
66
118
  *,
67
119
  context_limit: int | None,
68
120
  max_tokens: int | None,
@@ -72,9 +124,16 @@ def _usage_from_metadata(
72
124
 
73
125
  cached = usage.cached_content_token_count or 0
74
126
  prompt = usage.prompt_token_count or 0
75
- response = usage.response_token_count or 0
127
+ response = usage.candidates_token_count or 0
76
128
  thoughts = usage.thoughts_token_count or 0
77
129
 
130
+ # Extract image tokens from candidates_tokens_details
131
+ image_tokens = 0
132
+ if usage.candidates_tokens_details:
133
+ for detail in usage.candidates_tokens_details:
134
+ if detail.modality and detail.modality.name == "IMAGE" and detail.token_count:
135
+ image_tokens += detail.token_count
136
+
78
137
  total = usage.total_token_count
79
138
  if total is None:
80
139
  total = prompt + cached + response + thoughts
@@ -84,30 +143,29 @@ def _usage_from_metadata(
84
143
  cached_tokens=cached,
85
144
  output_tokens=response + thoughts,
86
145
  reasoning_tokens=thoughts,
146
+ image_tokens=image_tokens,
87
147
  context_size=total,
88
148
  context_limit=context_limit,
89
149
  max_tokens=max_tokens,
90
150
  )
91
151
 
92
152
 
93
- def _partial_arg_value(partial: Any) -> Any:
94
- if getattr(partial, "string_value", None) is not None:
153
+ def _partial_arg_value(partial: PartialArg) -> str | float | bool | None:
154
+ if partial.string_value is not None:
95
155
  return partial.string_value
96
- if getattr(partial, "number_value", None) is not None:
156
+ if partial.number_value is not None:
97
157
  return partial.number_value
98
- if getattr(partial, "bool_value", None) is not None:
158
+ if partial.bool_value is not None:
99
159
  return partial.bool_value
100
- if getattr(partial, "null_value", None) is not None:
101
- return None
102
160
  return None
103
161
 
104
162
 
105
- def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) -> None:
163
+ def _merge_partial_args(dst: dict[str, Any], partial_args: list[PartialArg] | None) -> None:
106
164
  if not partial_args:
107
165
  return
108
166
  for partial in partial_args:
109
- json_path = getattr(partial, "json_path", None)
110
- if not isinstance(json_path, str) or not json_path.startswith("$."):
167
+ json_path = partial.json_path
168
+ if not json_path or not json_path.startswith("$."):
111
169
  continue
112
170
  key = json_path[2:]
113
171
  if not key or any(ch in key for ch in "[]"):
@@ -115,6 +173,15 @@ def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) ->
115
173
  dst[key] = _partial_arg_value(partial)
116
174
 
117
175
 
176
+ def _encode_thought_signature(sig: bytes | str | None) -> str | None:
177
+ """Encode thought signature bytes to base64 string."""
178
+ if sig is None:
179
+ return None
180
+ if isinstance(sig, bytes):
181
+ return b64encode(sig).decode("ascii")
182
+ return sig
183
+
184
+
118
185
  def _map_finish_reason(reason: str) -> model.StopReason | None:
119
186
  normalized = reason.strip().lower()
120
187
  mapping: dict[str, model.StopReason] = {
@@ -139,202 +206,216 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
139
206
  class GoogleStreamStateManager:
140
207
  """Manages streaming state for Google LLM responses.
141
208
 
142
- Accumulates thinking content, assistant text, and tool calls during streaming
143
- to support get_partial_message() for cancellation scenarios.
209
+ Accumulates parts directly during streaming to support get_partial_message()
210
+ for cancellation scenarios. Merges consecutive text parts of the same type.
144
211
  """
145
212
 
146
213
  def __init__(self, param_model: str) -> None:
147
214
  self.param_model = param_model
148
- self.accumulated_thoughts: list[str] = []
149
- self.accumulated_text: list[str] = []
150
- self.thought_signature: str | None = None
151
215
  self.assistant_parts: list[message.Part] = []
152
216
  self.response_id: str | None = None
153
217
  self.stop_reason: model.StopReason | None = None
154
218
 
155
- def flush_thinking(self) -> None:
156
- """Flush accumulated thinking content into assistant_parts."""
157
- if self.accumulated_thoughts:
158
- self.assistant_parts.append(
159
- message.ThinkingTextPart(
160
- text="".join(self.accumulated_thoughts),
161
- model_id=self.param_model,
162
- )
219
+ def append_thinking_text(self, text: str) -> None:
220
+ """Append thinking text, merging with previous ThinkingTextPart if possible."""
221
+ append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
222
+
223
+ def append_text(self, text: str) -> None:
224
+ """Append text, merging with previous TextPart if possible."""
225
+ append_text_part(self.assistant_parts, text)
226
+
227
+ def append_thinking_signature(self, signature: str) -> None:
228
+ """Append a ThinkingSignaturePart after the current part."""
229
+ self.assistant_parts.append(
230
+ message.ThinkingSignaturePart(
231
+ signature=signature,
232
+ model_id=self.param_model,
233
+ format=GOOGLE_THOUGHT_SIGNATURE_FORMAT,
163
234
  )
164
- self.accumulated_thoughts.clear()
165
- if self.thought_signature:
166
- self.assistant_parts.append(
167
- message.ThinkingSignaturePart(
168
- signature=self.thought_signature,
169
- model_id=self.param_model,
170
- format="google_thought_signature",
171
- )
235
+ )
236
+
237
+ def append_image(self, image_part: message.ImageFilePart) -> None:
238
+ """Append an ImageFilePart."""
239
+ self.assistant_parts.append(image_part)
240
+
241
+ def append_tool_call(self, call_id: str, name: str, arguments_json: str) -> None:
242
+ """Append a ToolCallPart."""
243
+ self.assistant_parts.append(
244
+ message.ToolCallPart(
245
+ call_id=call_id,
246
+ tool_name=name,
247
+ arguments_json=arguments_json,
172
248
  )
173
- self.thought_signature = None
249
+ )
250
+
251
+ def get_partial_parts(self) -> list[message.Part]:
252
+ """Get accumulated parts excluding tool calls, with thinking degraded.
174
253
 
175
- def flush_text(self) -> None:
176
- """Flush accumulated text content into assistant_parts."""
177
- if not self.accumulated_text:
178
- return
179
- self.assistant_parts.append(message.TextPart(text="".join(self.accumulated_text)))
180
- self.accumulated_text.clear()
254
+ Filters out ToolCallPart and applies degrade_thinking_to_text.
255
+ """
256
+ return build_partial_parts(self.assistant_parts)
181
257
 
182
258
  def get_partial_message(self) -> message.AssistantMessage | None:
183
259
  """Build a partial AssistantMessage from accumulated state.
184
260
 
185
- Flushes all accumulated content and returns the message.
186
261
  Returns None if no content has been accumulated yet.
187
262
  """
188
- self.flush_thinking()
189
- self.flush_text()
190
-
191
- filtered_parts: list[message.Part] = []
192
- for part in self.assistant_parts:
193
- if isinstance(part, message.ToolCallPart):
194
- continue
195
- filtered_parts.append(part)
196
-
197
- filtered_parts = degrade_thinking_to_text(filtered_parts)
198
-
199
- if not filtered_parts:
200
- return None
201
- return message.AssistantMessage(
202
- parts=filtered_parts,
203
- response_id=self.response_id,
204
- stop_reason="aborted",
205
- )
263
+ return build_partial_message(self.assistant_parts, response_id=self.response_id)
206
264
 
207
265
 
208
266
  async def parse_google_stream(
209
- stream: AsyncIterator[Any],
267
+ stream: AsyncIterator[GenerateContentResponse],
210
268
  param: llm_param.LLMCallParameter,
211
269
  metadata_tracker: MetadataTracker,
212
270
  state: GoogleStreamStateManager,
213
271
  ) -> AsyncGenerator[message.LLMStreamItem]:
214
- stage: Literal["waiting", "thinking", "assistant", "tool"] = "waiting"
215
-
216
272
  # Track tool calls where args arrive as partial updates.
217
273
  partial_args_by_call: dict[str, dict[str, Any]] = {}
218
- started_tool_calls: dict[str, str] = {} # call_id -> name
274
+ started_tool_calls: dict[str, tuple[str, bytes | None]] = {} # call_id -> (name, thought_signature)
219
275
  started_tool_items: set[str] = set()
220
276
  completed_tool_items: set[str] = set()
221
277
 
222
- last_usage_metadata: UsageMetadata | None = None
278
+ # Track image index for unique filenames
279
+ image_index = 0
280
+
281
+ last_usage_metadata: GenerateContentResponseUsageMetadata | None = None
223
282
 
224
283
  async for chunk in stream:
225
- log_debug(
226
- chunk.model_dump_json(exclude_none=True),
227
- style="blue",
228
- debug_type=DebugType.LLM_STREAM,
229
- )
284
+ log_debug(debug_json(chunk.model_dump(exclude_none=True)), style="blue", debug_type=DebugType.LLM_STREAM)
230
285
 
231
286
  if state.response_id is None:
232
- state.response_id = getattr(chunk, "response_id", None) or uuid4().hex
287
+ state.response_id = chunk.response_id or uuid4().hex
233
288
 
234
- if getattr(chunk, "usage_metadata", None) is not None:
289
+ if chunk.usage_metadata is not None:
235
290
  last_usage_metadata = chunk.usage_metadata
236
291
 
237
- candidates = getattr(chunk, "candidates", None) or []
292
+ candidates = chunk.candidates or []
238
293
  candidate0 = candidates[0] if candidates else None
239
- finish_reason = getattr(candidate0, "finish_reason", None) if candidate0 else None
294
+ finish_reason = candidate0.finish_reason if candidate0 else None
240
295
  if finish_reason is not None:
241
- if isinstance(finish_reason, str):
242
- reason_value = finish_reason
243
- else:
244
- reason_value = getattr(finish_reason, "name", None) or str(finish_reason)
245
- state.stop_reason = _map_finish_reason(reason_value)
246
- content = getattr(candidate0, "content", None) if candidate0 else None
247
- content_parts = getattr(content, "parts", None) if content else None
296
+ state.stop_reason = _map_finish_reason(finish_reason.name)
297
+ content = candidate0.content if candidate0 else None
298
+ content_parts = content.parts if content else None
248
299
  if not content_parts:
249
300
  continue
250
301
 
251
302
  for part in content_parts:
252
- if getattr(part, "text", None) is not None:
303
+ # Handle text parts (both thought and regular text)
304
+ if part.text is not None:
253
305
  text = part.text
254
306
  if not text:
255
307
  continue
256
308
  metadata_tracker.record_token()
257
- if getattr(part, "thought", False) is True:
258
- if stage == "assistant":
259
- state.flush_text()
260
- stage = "thinking"
261
- state.accumulated_thoughts.append(text)
262
- if getattr(part, "thought_signature", None):
263
- state.thought_signature = part.thought_signature
309
+
310
+ if part.thought is True:
311
+ # Thinking text - append and merge with previous ThinkingTextPart
312
+ state.append_thinking_text(text)
313
+ # Add ThinkingSignaturePart after thinking text if present
314
+ if part.thought_signature:
315
+ encoded_sig = _encode_thought_signature(part.thought_signature)
316
+ if encoded_sig:
317
+ state.append_thinking_signature(encoded_sig)
264
318
  yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
265
319
  else:
266
- if stage == "thinking":
267
- state.flush_thinking()
268
- stage = "assistant"
269
- state.accumulated_text.append(text)
320
+ # Regular text - append and merge with previous TextPart
321
+ state.append_text(text)
322
+ # Regular text parts can also have thought_signature
323
+ if part.thought_signature:
324
+ encoded_sig = _encode_thought_signature(part.thought_signature)
325
+ if encoded_sig:
326
+ state.append_thinking_signature(encoded_sig)
270
327
  yield message.AssistantTextDelta(content=text, response_id=state.response_id)
271
328
 
272
- function_call = getattr(part, "function_call", None)
329
+ # Handle inline_data (image generation responses)
330
+ inline_data = part.inline_data
331
+ if inline_data is not None and inline_data.data:
332
+ # Thought images (interim images produced during thinking) do not
333
+ # carry thought signatures and must not be treated as response
334
+ # images for multi-turn history.
335
+ if part.thought is True:
336
+ continue
337
+ mime_type = inline_data.mime_type or "image/png"
338
+ encoded_data = b64encode(inline_data.data).decode("ascii")
339
+ data_url = f"data:{mime_type};base64,{encoded_data}"
340
+ try:
341
+ image_part = save_assistant_image(
342
+ data_url=data_url,
343
+ session_id=param.session_id,
344
+ response_id=state.response_id,
345
+ image_index=image_index,
346
+ )
347
+ image_index += 1
348
+ state.append_image(image_part)
349
+ # Add ThinkingSignaturePart after image if present, or synthetic signature for thinking models
350
+ if part.thought_signature:
351
+ encoded_sig = _encode_thought_signature(part.thought_signature)
352
+ if encoded_sig:
353
+ state.append_thinking_signature(encoded_sig)
354
+ elif support_thinking(param.model_id):
355
+ encoded_sig = _encode_thought_signature(SYNTHETIC_THOUGHT_SIGNATURE)
356
+ if encoded_sig:
357
+ state.append_thinking_signature(encoded_sig)
358
+ yield message.AssistantImageDelta(
359
+ response_id=state.response_id,
360
+ file_path=image_part.file_path,
361
+ )
362
+ except ValueError:
363
+ pass # Skip invalid images
364
+
365
+ # Handle function calls
366
+ function_call = part.function_call
273
367
  if function_call is None:
274
368
  continue
275
369
 
276
370
  metadata_tracker.record_token()
277
- call_id = getattr(function_call, "id", None) or uuid4().hex
278
- name = getattr(function_call, "name", None) or ""
279
- started_tool_calls.setdefault(call_id, name)
371
+ call_id = function_call.id or uuid4().hex
372
+ name = function_call.name or ""
373
+
374
+ # Capture thought_signature from the part (required for tools in thinking models)
375
+ thought_signature = part.thought_signature
376
+
377
+ # Store name and thought_signature for later use (partial args / flush)
378
+ if call_id not in started_tool_calls or (thought_signature and started_tool_calls[call_id][1] is None):
379
+ started_tool_calls[call_id] = (name, thought_signature)
280
380
 
281
381
  if call_id not in started_tool_items:
282
382
  started_tool_items.add(call_id)
283
383
  yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
284
384
 
285
- args_obj = getattr(function_call, "args", None)
385
+ args_obj = function_call.args
286
386
  if args_obj is not None:
287
- if stage == "thinking":
288
- state.flush_thinking()
289
- if stage == "assistant":
290
- state.flush_text()
291
- stage = "tool"
292
- state.assistant_parts.append(
293
- message.ToolCallPart(
294
- call_id=call_id,
295
- tool_name=name,
296
- arguments_json=json.dumps(args_obj, ensure_ascii=False),
297
- )
298
- )
387
+ # Add ToolCallPart, then ThinkingSignaturePart after it
388
+ state.append_tool_call(call_id, name, json.dumps(args_obj, ensure_ascii=False))
389
+ encoded_sig = _encode_thought_signature(thought_signature)
390
+ if encoded_sig:
391
+ state.append_thinking_signature(encoded_sig)
299
392
  completed_tool_items.add(call_id)
300
393
  continue
301
394
 
302
- partial_args = getattr(function_call, "partial_args", None)
395
+ partial_args = function_call.partial_args
303
396
  if partial_args is not None:
304
397
  acc = partial_args_by_call.setdefault(call_id, {})
305
398
  _merge_partial_args(acc, partial_args)
306
399
 
307
- will_continue = getattr(function_call, "will_continue", None)
400
+ will_continue = function_call.will_continue
308
401
  if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
309
- if stage == "thinking":
310
- state.flush_thinking()
311
- if stage == "assistant":
312
- state.flush_text()
313
- stage = "tool"
314
- state.assistant_parts.append(
315
- message.ToolCallPart(
316
- call_id=call_id,
317
- tool_name=name,
318
- arguments_json=json.dumps(partial_args_by_call[call_id], ensure_ascii=False),
319
- )
320
- )
402
+ # Add ToolCallPart, then ThinkingSignaturePart after it
403
+ state.append_tool_call(call_id, name, json.dumps(partial_args_by_call[call_id], ensure_ascii=False))
404
+ stored_sig = started_tool_calls.get(call_id, (name, None))[1]
405
+ encoded_stored_sig = _encode_thought_signature(stored_sig)
406
+ if encoded_stored_sig:
407
+ state.append_thinking_signature(encoded_stored_sig)
321
408
  completed_tool_items.add(call_id)
322
409
 
323
410
  # Flush any pending tool calls that never produced args.
324
- for call_id, name in started_tool_calls.items():
411
+ for call_id, (name, stored_sig) in started_tool_calls.items():
325
412
  if call_id in completed_tool_items:
326
413
  continue
327
414
  args = partial_args_by_call.get(call_id, {})
328
- state.assistant_parts.append(
329
- message.ToolCallPart(
330
- call_id=call_id,
331
- tool_name=name,
332
- arguments_json=json.dumps(args, ensure_ascii=False),
333
- )
334
- )
335
-
336
- state.flush_thinking()
337
- state.flush_text()
415
+ state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
416
+ encoded_stored_sig = _encode_thought_signature(stored_sig)
417
+ if encoded_stored_sig:
418
+ state.append_thinking_signature(encoded_stored_sig)
338
419
 
339
420
  usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
340
421
  if usage is not None:
@@ -355,7 +436,7 @@ class GoogleLLMStream(LLMStreamABC):
355
436
 
356
437
  def __init__(
357
438
  self,
358
- stream: AsyncIterator[Any],
439
+ stream: AsyncIterator[GenerateContentResponse],
359
440
  *,
360
441
  param: llm_param.LLMCallParameter,
361
442
  metadata_tracker: MetadataTracker,
@@ -383,7 +464,14 @@ class GoogleLLMStream(LLMStreamABC):
383
464
  yield item
384
465
  except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
385
466
  yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
386
- yield message.AssistantMessage(parts=[], response_id=None, usage=self._metadata_tracker.finalize())
467
+ # Use accumulated parts for potential prefill on retry
468
+ self._metadata_tracker.set_response_id(self._state.response_id)
469
+ yield message.AssistantMessage(
470
+ parts=self._state.get_partial_parts(),
471
+ response_id=self._state.response_id,
472
+ usage=self._metadata_tracker.finalize(),
473
+ stop_reason="error",
474
+ )
387
475
 
388
476
  def get_partial_message(self) -> message.AssistantMessage | None:
389
477
  if self._completed:
@@ -419,13 +507,12 @@ class GoogleClient(LLMClientABC):
419
507
  config = _build_config(param)
420
508
 
421
509
  log_debug(
422
- json.dumps(
510
+ debug_json(
423
511
  {
424
512
  "model": str(param.model_id),
425
513
  "contents": [c.model_dump(exclude_none=True) for c in contents],
426
514
  "config": config.model_dump(exclude_none=True),
427
- },
428
- ensure_ascii=False,
515
+ }
429
516
  ),
430
517
  style="yellow",
431
518
  debug_type=DebugType.LLM_PAYLOAD,
@@ -434,7 +521,7 @@ class GoogleClient(LLMClientABC):
434
521
  try:
435
522
  stream = await self.client.aio.models.generate_content_stream(
436
523
  model=str(param.model_id),
437
- contents=cast(Any, contents),
524
+ contents=cast(ContentListUnion, contents),
438
525
  config=config,
439
526
  )
440
527
  except (APIError, ClientError, ServerError, httpx.HTTPError) as e: