klaude-code 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. klaude_code/auth/__init__.py +10 -0
  2. klaude_code/auth/env.py +77 -0
  3. klaude_code/cli/auth_cmd.py +89 -21
  4. klaude_code/cli/config_cmd.py +5 -5
  5. klaude_code/cli/cost_cmd.py +167 -68
  6. klaude_code/cli/main.py +51 -27
  7. klaude_code/cli/self_update.py +7 -7
  8. klaude_code/config/assets/builtin_config.yaml +45 -24
  9. klaude_code/config/builtin_config.py +23 -9
  10. klaude_code/config/config.py +19 -9
  11. klaude_code/config/model_matcher.py +1 -1
  12. klaude_code/const.py +2 -1
  13. klaude_code/core/tool/file/edit_tool.py +1 -1
  14. klaude_code/core/tool/file/read_tool.py +2 -2
  15. klaude_code/core/tool/file/write_tool.py +1 -1
  16. klaude_code/core/turn.py +21 -4
  17. klaude_code/llm/anthropic/client.py +75 -50
  18. klaude_code/llm/anthropic/input.py +20 -9
  19. klaude_code/llm/google/client.py +235 -148
  20. klaude_code/llm/google/input.py +44 -36
  21. klaude_code/llm/openai_compatible/stream.py +114 -100
  22. klaude_code/llm/openrouter/client.py +1 -0
  23. klaude_code/llm/openrouter/reasoning.py +4 -29
  24. klaude_code/llm/partial_message.py +2 -32
  25. klaude_code/llm/responses/client.py +99 -81
  26. klaude_code/llm/responses/input.py +11 -25
  27. klaude_code/llm/stream_parts.py +94 -0
  28. klaude_code/log.py +57 -0
  29. klaude_code/protocol/events.py +214 -0
  30. klaude_code/protocol/sub_agent/image_gen.py +0 -4
  31. klaude_code/session/session.py +51 -18
  32. klaude_code/tui/command/fork_session_cmd.py +14 -23
  33. klaude_code/tui/command/model_picker.py +2 -17
  34. klaude_code/tui/command/resume_cmd.py +2 -18
  35. klaude_code/tui/command/sub_agent_model_cmd.py +5 -19
  36. klaude_code/tui/command/thinking_cmd.py +2 -14
  37. klaude_code/tui/commands.py +0 -5
  38. klaude_code/tui/components/common.py +1 -1
  39. klaude_code/tui/components/metadata.py +21 -21
  40. klaude_code/tui/components/rich/quote.py +36 -8
  41. klaude_code/tui/components/rich/theme.py +2 -0
  42. klaude_code/tui/components/sub_agent.py +6 -0
  43. klaude_code/tui/display.py +11 -1
  44. klaude_code/tui/input/completers.py +11 -7
  45. klaude_code/tui/input/prompt_toolkit.py +3 -1
  46. klaude_code/tui/machine.py +108 -56
  47. klaude_code/tui/renderer.py +4 -65
  48. klaude_code/tui/terminal/selector.py +174 -31
  49. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/METADATA +23 -31
  50. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/RECORD +52 -58
  51. klaude_code/cli/session_cmd.py +0 -96
  52. klaude_code/protocol/events/__init__.py +0 -63
  53. klaude_code/protocol/events/base.py +0 -18
  54. klaude_code/protocol/events/chat.py +0 -30
  55. klaude_code/protocol/events/lifecycle.py +0 -23
  56. klaude_code/protocol/events/metadata.py +0 -16
  57. klaude_code/protocol/events/streaming.py +0 -43
  58. klaude_code/protocol/events/system.py +0 -56
  59. klaude_code/protocol/events/tools.py +0 -27
  60. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/WHEEL +0 -0
  61. {klaude_code-2.5.2.dist-info → klaude_code-2.6.0.dist-info}/entry_points.txt +0 -0
@@ -11,7 +11,7 @@ from typing import Any
11
11
  from google.genai import types
12
12
 
13
13
  from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
14
- from klaude_code.llm.image import parse_data_url
14
+ from klaude_code.llm.image import assistant_image_to_data_url, parse_data_url
15
15
  from klaude_code.llm.input_common import (
16
16
  DeveloperAttachment,
17
17
  attach_developer_messages,
@@ -108,51 +108,49 @@ def _tool_messages_to_contents(
108
108
  return contents
109
109
 
110
110
 
111
+ def _decode_thought_signature(sig: str | None) -> bytes | None:
112
+ """Decode base64 thought signature to bytes."""
113
+ if not sig:
114
+ return None
115
+ try:
116
+ return b64decode(sig)
117
+ except (BinasciiError, ValueError):
118
+ return None
119
+
120
+
111
121
  def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str | None) -> types.Content | None:
112
122
  parts: list[types.Part] = []
113
123
  native_thinking_parts, degraded_thinking_texts = split_thinking_parts(msg, model_name)
114
124
  native_thinking_ids = {id(part) for part in native_thinking_parts}
115
- pending_thought_text: str | None = None
116
- pending_thought_signature: str | None = None
117
-
118
- def flush_thought() -> None:
119
- nonlocal pending_thought_text, pending_thought_signature
120
- if pending_thought_text is None and pending_thought_signature is None:
121
- return
122
-
123
- signature_bytes: bytes | None = None
124
- if pending_thought_signature:
125
- try:
126
- signature_bytes = b64decode(pending_thought_signature)
127
- except (BinasciiError, ValueError):
128
- signature_bytes = None
129
-
130
- parts.append(
131
- types.Part(
132
- text=pending_thought_text or "",
133
- thought=True,
134
- thought_signature=signature_bytes,
135
- )
136
- )
137
- pending_thought_text = None
138
- pending_thought_signature = None
139
125
 
140
126
  for part in msg.parts:
141
127
  if isinstance(part, message.ThinkingTextPart):
142
128
  if id(part) not in native_thinking_ids:
143
129
  continue
144
- pending_thought_text = part.text
145
- continue
146
- if isinstance(part, message.ThinkingSignaturePart):
130
+ parts.append(types.Part(text=part.text, thought=True))
131
+
132
+ elif isinstance(part, message.ThinkingSignaturePart):
147
133
  if id(part) not in native_thinking_ids:
148
134
  continue
149
- if part.signature and (part.format or "").startswith("google"):
150
- pending_thought_signature = part.signature
151
- continue
152
-
153
- flush_thought()
154
- if isinstance(part, message.TextPart):
135
+ if not part.signature or part.format != "google":
136
+ continue
137
+ # Attach signature to the previous part
138
+ if parts:
139
+ sig_bytes = _decode_thought_signature(part.signature)
140
+ if sig_bytes:
141
+ last_part = parts[-1]
142
+ parts[-1] = types.Part(
143
+ text=last_part.text,
144
+ thought=last_part.thought,
145
+ function_call=last_part.function_call,
146
+ inline_data=last_part.inline_data,
147
+ file_data=last_part.file_data,
148
+ thought_signature=sig_bytes,
149
+ )
150
+
151
+ elif isinstance(part, message.TextPart):
155
152
  parts.append(types.Part(text=part.text))
153
+
156
154
  elif isinstance(part, message.ToolCallPart):
157
155
  args: dict[str, Any]
158
156
  if part.arguments_json:
@@ -162,9 +160,19 @@ def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str
162
160
  args = {"_raw": part.arguments_json}
163
161
  else:
164
162
  args = {}
165
- parts.append(types.Part(function_call=types.FunctionCall(id=part.call_id, name=part.tool_name, args=args)))
163
+ parts.append(
164
+ types.Part(
165
+ function_call=types.FunctionCall(id=part.call_id, name=part.tool_name, args=args),
166
+ )
167
+ )
166
168
 
167
- flush_thought()
169
+ elif isinstance(part, message.ImageFilePart):
170
+ # Convert saved image back to inline_data for multi-turn
171
+ try:
172
+ data_url = assistant_image_to_data_url(part)
173
+ parts.append(_image_part_to_part(message.ImageURLPart(url=data_url)))
174
+ except (ValueError, FileNotFoundError):
175
+ pass # Skip if image cannot be loaded
168
176
 
169
177
  if degraded_thinking_texts:
170
178
  parts.insert(0, types.Part(text="<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>"))
@@ -2,8 +2,8 @@
2
2
 
3
3
  This module provides reusable primitives for OpenAI-compatible providers:
4
4
 
5
- - ``StreamStateManager``: accumulates assistant content and tool calls.
6
- - ``ReasoningHandlerABC``: provider-specific reasoning extraction + buffering.
5
+ - ``StreamStateManager``: accumulates assistant parts in stream order.
6
+ - ``ReasoningHandlerABC``: provider-specific reasoning extraction.
7
7
  - ``OpenAILLMStream``: LLMStream implementation for OpenAI-compatible clients.
8
8
 
9
9
  OpenRouter uses the same OpenAI Chat Completions API surface but differs in
@@ -15,7 +15,7 @@ from __future__ import annotations
15
15
  from abc import ABC, abstractmethod
16
16
  from collections.abc import AsyncGenerator, Callable
17
17
  from dataclasses import dataclass
18
- from typing import Any, Literal, cast
18
+ from typing import Any, cast
19
19
 
20
20
  import httpx
21
21
  import openai
@@ -26,91 +26,107 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
26
26
 
27
27
  from klaude_code.llm.client import LLMStreamABC
28
28
  from klaude_code.llm.image import save_assistant_image
29
- from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
30
- from klaude_code.llm.partial_message import degrade_thinking_to_text
29
+ from klaude_code.llm.openai_compatible.tool_call_accumulator import normalize_tool_name
30
+ from klaude_code.llm.stream_parts import (
31
+ append_text_part,
32
+ append_thinking_text_part,
33
+ build_partial_message,
34
+ build_partial_parts,
35
+ )
31
36
  from klaude_code.llm.usage import MetadataTracker, convert_usage
32
37
  from klaude_code.protocol import llm_param, message, model
33
38
 
34
- StreamStage = Literal["waiting", "reasoning", "assistant", "tool"]
35
-
36
39
 
37
40
  class StreamStateManager:
38
- """Manages streaming state and provides flush operations for accumulated content.
41
+ """Manages streaming state and accumulates parts in stream order.
39
42
 
40
- This class encapsulates the common state management logic used by both
41
- OpenAI-compatible and OpenRouter clients, reducing code duplication.
43
+ The persisted AssistantMessage is built directly from ``assistant_parts``.
44
+ ``get_partial_message()`` returns a best-effort message on cancellation.
42
45
  """
43
46
 
44
47
  def __init__(
45
48
  self,
46
49
  param_model: str,
47
50
  response_id: str | None = None,
48
- reasoning_flusher: Callable[[], list[message.Part]] | None = None,
49
51
  ):
50
52
  self.param_model = param_model
51
53
  self.response_id = response_id
52
- self.stage: StreamStage = "waiting"
53
- self.accumulated_content: list[str] = []
54
- self.accumulated_images: list[message.ImageFilePart] = []
55
- self.accumulated_tool_calls: ToolCallAccumulatorABC = BasicToolCallAccumulator()
56
- self.emitted_tool_start_indices: set[int] = set()
57
- self._reasoning_flusher = reasoning_flusher
58
- self.parts: list[message.Part] = []
54
+ self.assistant_parts: list[message.Part] = []
55
+ self._image_index: int = 0
56
+ self._tool_part_index_by_tc_index: dict[int, int] = {}
57
+ self._emitted_tool_start_indices: set[int] = set()
59
58
  self.stop_reason: model.StopReason | None = None
60
59
 
61
60
  def set_response_id(self, response_id: str) -> None:
62
61
  """Set the response ID once received from the stream."""
63
62
  self.response_id = response_id
64
- self.accumulated_tool_calls.set_response_id(response_id)
65
63
 
66
- def flush_reasoning(self) -> None:
67
- """Flush accumulated reasoning content into parts."""
68
- if self._reasoning_flusher is not None:
69
- self.parts.extend(self._reasoning_flusher())
64
+ def append_thinking_text(self, text: str) -> None:
65
+ """Append thinking text, merging with the previous ThinkingTextPart when possible."""
66
+ append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
67
+
68
+ def append_text(self, text: str) -> None:
69
+ """Append assistant text, merging with the previous TextPart when possible."""
70
+ append_text_part(self.assistant_parts, text)
71
+
72
+ def append_image(self, image_part: message.ImageFilePart) -> None:
73
+ self.assistant_parts.append(image_part)
74
+ self._image_index += 1
75
+
76
+ def upsert_tool_call(self, *, tc_index: int, call_id: str | None, name: str | None, arguments: str | None) -> None:
77
+ """Insert a ToolCallPart at first sight and keep updating its fields.
78
+
79
+ Chat Completions streams tool call fields incrementally (name/id first,
80
+ then argument fragments). We keep the ToolCallPart in-place to preserve
81
+ stream order in the persisted AssistantMessage.
82
+ """
70
83
 
71
- def flush_assistant(self) -> None:
72
- """Flush accumulated assistant content into parts."""
73
- if not self.accumulated_content and not self.accumulated_images:
84
+ part_index = self._tool_part_index_by_tc_index.get(tc_index)
85
+ if part_index is None:
86
+ tool_part = message.ToolCallPart(
87
+ call_id=call_id or "",
88
+ tool_name=normalize_tool_name(name or ""),
89
+ arguments_json=arguments or "",
90
+ )
91
+ self.assistant_parts.append(tool_part)
92
+ self._tool_part_index_by_tc_index[tc_index] = len(self.assistant_parts) - 1
74
93
  return
75
- if self.accumulated_content:
76
- self.parts.append(message.TextPart(text="".join(self.accumulated_content)))
77
- if self.accumulated_images:
78
- self.parts.extend(self.accumulated_images)
79
- self.accumulated_content = []
80
- self.accumulated_images = []
81
- return
82
-
83
- def flush_tool_calls(self) -> None:
84
- """Flush accumulated tool calls into parts."""
85
- items = self.accumulated_tool_calls.get()
86
- if items:
87
- self.parts.extend(items)
88
- self.accumulated_tool_calls.reset()
89
-
90
- def flush_all(self) -> list[message.Part]:
91
- """Flush all accumulated content in order: reasoning, assistant, tool calls."""
92
- self.flush_reasoning()
93
- self.flush_assistant()
94
- if self.stage == "tool":
95
- self.flush_tool_calls()
96
- return list(self.parts)
94
+
95
+ existing = self.assistant_parts[part_index]
96
+ if not isinstance(existing, message.ToolCallPart):
97
+ return
98
+
99
+ if call_id and not existing.call_id:
100
+ existing.call_id = call_id
101
+ if name and not existing.tool_name:
102
+ existing.tool_name = normalize_tool_name(name)
103
+ if arguments:
104
+ existing.arguments_json += arguments
105
+
106
+ def mark_tool_start_emitted(self, tc_index: int) -> bool:
107
+ """Return True if this is the first time we emit ToolCallStartDelta for this index."""
108
+ if tc_index in self._emitted_tool_start_indices:
109
+ return False
110
+ self._emitted_tool_start_indices.add(tc_index)
111
+ return True
112
+
113
+ def next_image_index(self) -> int:
114
+ return self._image_index
115
+
116
+ def get_partial_parts(self) -> list[message.Part]:
117
+ """Get accumulated parts excluding tool calls, with thinking degraded.
118
+
119
+ Filters out ToolCallPart and applies degrade_thinking_to_text.
120
+ """
121
+ return build_partial_parts(self.assistant_parts)
97
122
 
98
123
  def get_partial_message(self) -> message.AssistantMessage | None:
99
124
  """Build a partial AssistantMessage from accumulated state.
100
125
 
101
- Flushes all accumulated content (reasoning, assistant text, tool calls)
102
- and returns the message. Returns None if no content has been accumulated.
126
+ Filters out tool calls and degrades thinking content for safety.
127
+ Returns None if no content has been accumulated.
103
128
  """
104
- self.flush_reasoning()
105
- self.flush_assistant()
106
- parts = degrade_thinking_to_text(list(self.parts))
107
- if not parts:
108
- return None
109
- return message.AssistantMessage(
110
- parts=parts,
111
- response_id=self.response_id,
112
- stop_reason="aborted",
113
- )
129
+ return build_partial_message(self.assistant_parts, response_id=self.response_id)
114
130
 
115
131
 
116
132
  @dataclass(slots=True)
@@ -148,7 +164,6 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
148
164
  ) -> None:
149
165
  self._param_model = param_model
150
166
  self._response_id = response_id
151
- self._accumulated: list[str] = []
152
167
 
153
168
  def set_response_id(self, response_id: str | None) -> None:
154
169
  self._response_id = response_id
@@ -158,18 +173,10 @@ class DefaultReasoningHandler(ReasoningHandlerABC):
158
173
  if not reasoning_content:
159
174
  return ReasoningDeltaResult(handled=False, outputs=[])
160
175
  text = str(reasoning_content)
161
- self._accumulated.append(text)
162
176
  return ReasoningDeltaResult(handled=True, outputs=[text])
163
177
 
164
178
  def flush(self) -> list[message.Part]:
165
- if not self._accumulated:
166
- return []
167
- item = message.ThinkingTextPart(
168
- text="".join(self._accumulated),
169
- model_id=self._param_model,
170
- )
171
- self._accumulated = []
172
- return [item]
179
+ return []
173
180
 
174
181
 
175
182
  def _map_finish_reason(reason: str) -> model.StopReason | None:
@@ -192,6 +199,7 @@ async def parse_chat_completions_stream(
192
199
  metadata_tracker: MetadataTracker,
193
200
  reasoning_handler: ReasoningHandlerABC,
194
201
  on_event: Callable[[object], None] | None = None,
202
+ provider_prefix: str = "",
195
203
  ) -> AsyncGenerator[message.LLMStreamItem]:
196
204
  """Parse OpenAI Chat Completions stream into stream items.
197
205
 
@@ -228,7 +236,7 @@ async def parse_chat_completions_stream(
228
236
  if event_model := getattr(event, "model", None):
229
237
  metadata_tracker.set_model_name(str(event_model))
230
238
  if provider := getattr(event, "provider", None):
231
- metadata_tracker.set_provider(str(provider))
239
+ metadata_tracker.set_provider(f"{provider_prefix}{provider}")
232
240
 
233
241
  choices = cast(Any, getattr(event, "choices", None))
234
242
  if not choices:
@@ -254,26 +262,21 @@ async def parse_chat_completions_stream(
254
262
  # Reasoning
255
263
  reasoning_result = reasoning_handler.on_delta(delta)
256
264
  if reasoning_result.handled:
257
- state.stage = "reasoning"
258
265
  for output in reasoning_result.outputs:
259
266
  if isinstance(output, str):
260
267
  if not output:
261
268
  continue
262
269
  metadata_tracker.record_token()
270
+ state.append_thinking_text(output)
263
271
  yield message.ThinkingTextDelta(content=output, response_id=state.response_id)
264
272
  else:
265
- state.parts.append(output)
273
+ state.assistant_parts.append(output)
266
274
 
267
275
  # Assistant
268
276
  images = getattr(delta, "images", None)
269
277
  if isinstance(images, list) and images:
270
278
  images_list = cast(list[object], images)
271
279
  metadata_tracker.record_token()
272
- if state.stage == "reasoning":
273
- state.flush_reasoning()
274
- elif state.stage == "tool":
275
- state.flush_tool_calls()
276
- state.stage = "assistant"
277
280
  for image_obj in images_list:
278
281
  url = _extract_image_url(image_obj)
279
282
  if not url:
@@ -286,50 +289,59 @@ async def parse_chat_completions_stream(
286
289
  data_url=url,
287
290
  session_id=param.session_id,
288
291
  response_id=state.response_id,
289
- image_index=len(state.accumulated_images),
292
+ image_index=state.next_image_index(),
290
293
  )
291
294
  except ValueError as exc:
292
295
  yield message.StreamErrorItem(error=str(exc))
293
296
  return
294
- state.accumulated_images.append(assistant_image)
297
+ state.append_image(assistant_image)
295
298
  yield message.AssistantImageDelta(
296
299
  response_id=state.response_id, file_path=assistant_image.file_path
297
300
  )
298
301
 
299
- if (content := getattr(delta, "content", None)) and (state.stage == "assistant" or str(content).strip()):
302
+ content_str = str(content) if (content := getattr(delta, "content", None)) is not None else ""
303
+
304
+ if content_str and (
305
+ (state.assistant_parts and isinstance(state.assistant_parts[-1], message.TextPart))
306
+ or content_str.strip()
307
+ ):
300
308
  metadata_tracker.record_token()
301
- if state.stage == "reasoning":
302
- state.flush_reasoning()
303
- elif state.stage == "tool":
304
- state.flush_tool_calls()
305
- state.stage = "assistant"
306
- state.accumulated_content.append(str(content))
309
+ state.append_text(content_str)
307
310
  yield message.AssistantTextDelta(
308
- content=str(content),
311
+ content=content_str,
309
312
  response_id=state.response_id,
310
313
  )
311
314
 
312
315
  # Tool
313
316
  if (tool_calls := getattr(delta, "tool_calls", None)) and len(tool_calls) > 0:
314
317
  metadata_tracker.record_token()
315
- if state.stage == "reasoning":
316
- state.flush_reasoning()
317
- elif state.stage == "assistant":
318
- state.flush_assistant()
319
- state.stage = "tool"
320
318
  for tc in tool_calls:
321
- if tc.index not in state.emitted_tool_start_indices and tc.function and tc.function.name:
322
- state.emitted_tool_start_indices.add(tc.index)
319
+ tc_index = getattr(tc, "index", None)
320
+ if not isinstance(tc_index, int):
321
+ continue
322
+ fn = getattr(tc, "function", None)
323
+ fn_name = getattr(fn, "name", None) if fn is not None else None
324
+ fn_args = getattr(fn, "arguments", None) if fn is not None else None
325
+ tc_id = getattr(tc, "id", None)
326
+
327
+ if fn_name and state.mark_tool_start_emitted(tc_index):
323
328
  yield message.ToolCallStartDelta(
324
329
  response_id=state.response_id,
325
- call_id=tc.id or "",
326
- name=tc.function.name,
330
+ call_id=str(tc_id or ""),
331
+ name=str(fn_name),
327
332
  )
328
- state.accumulated_tool_calls.add(tool_calls)
333
+ state.upsert_tool_call(
334
+ tc_index=tc_index,
335
+ call_id=str(tc_id) if isinstance(tc_id, str) else None,
336
+ name=str(fn_name) if isinstance(fn_name, str) else None,
337
+ arguments=str(fn_args) if isinstance(fn_args, str) else None,
338
+ )
329
339
  except (openai.OpenAIError, httpx.HTTPError) as e:
330
340
  yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
341
+ state.stop_reason = "error"
331
342
 
332
- parts = state.flush_all()
343
+ # On error, use partial parts (excluding incomplete tool calls) for potential prefill on retry
344
+ parts = state.get_partial_parts() if state.stop_reason == "error" else list(state.assistant_parts)
333
345
  if parts:
334
346
  metadata_tracker.record_token()
335
347
  metadata_tracker.set_response_id(state.response_id)
@@ -353,15 +365,16 @@ class OpenAILLMStream(LLMStreamABC):
353
365
  metadata_tracker: MetadataTracker,
354
366
  reasoning_handler: ReasoningHandlerABC,
355
367
  on_event: Callable[[object], None] | None = None,
368
+ provider_prefix: str = "",
356
369
  ) -> None:
357
370
  self._stream = stream
358
371
  self._param = param
359
372
  self._metadata_tracker = metadata_tracker
360
373
  self._reasoning_handler = reasoning_handler
361
374
  self._on_event = on_event
375
+ self._provider_prefix = provider_prefix
362
376
  self._state = StreamStateManager(
363
377
  param_model=str(param.model_id),
364
- reasoning_flusher=reasoning_handler.flush,
365
378
  )
366
379
  self._completed = False
367
380
 
@@ -376,6 +389,7 @@ class OpenAILLMStream(LLMStreamABC):
376
389
  metadata_tracker=self._metadata_tracker,
377
390
  reasoning_handler=self._reasoning_handler,
378
391
  on_event=self._on_event,
392
+ provider_prefix=self._provider_prefix,
379
393
  ):
380
394
  if isinstance(item, message.AssistantMessage):
381
395
  self._completed = True
@@ -145,4 +145,5 @@ class OpenRouterClient(LLMClientABC):
145
145
  metadata_tracker=metadata_tracker,
146
146
  reasoning_handler=reasoning_handler,
147
147
  on_event=on_event,
148
+ provider_prefix="openrouter/",
148
149
  )
@@ -30,7 +30,6 @@ class ReasoningStreamHandler(ReasoningHandlerABC):
30
30
  self._response_id = response_id
31
31
 
32
32
  self._reasoning_id: str | None = None
33
- self._accumulated_reasoning: list[str] = []
34
33
 
35
34
  def set_response_id(self, response_id: str | None) -> None:
36
35
  """Update the response identifier used for emitted items."""
@@ -62,44 +61,20 @@ class ReasoningStreamHandler(ReasoningHandlerABC):
62
61
 
63
62
  if detail.type == "reasoning.encrypted":
64
63
  self._reasoning_id = detail.id
65
- # Flush accumulated text before encrypted content
66
- items.extend(self._flush_text())
67
64
  if signature_part := self._build_signature_part(detail.data, detail):
68
65
  items.append(signature_part)
69
66
  return items
70
67
 
71
68
  if detail.type in ("reasoning.text", "reasoning.summary"):
72
69
  self._reasoning_id = detail.id
73
- # Accumulate text
74
- text = detail.text if detail.type == "reasoning.text" else detail.summary
75
- if text:
76
- self._accumulated_reasoning.append(text)
77
- # Flush on signature (encrypted content)
78
- if detail.signature:
79
- items.extend(self._flush_text())
80
- if signature_part := self._build_signature_part(detail.signature, detail):
81
- items.append(signature_part)
70
+ # Signature (Anthropic-style) can arrive alongside text/summary.
71
+ if detail.signature and (signature_part := self._build_signature_part(detail.signature, detail)):
72
+ items.append(signature_part)
82
73
 
83
74
  return items
84
75
 
85
76
  def flush(self) -> list[message.Part]:
86
- """Flush buffered reasoning text on finalize."""
87
- return self._flush_text()
88
-
89
- def _flush_text(self) -> list[message.Part]:
90
- """Flush accumulated reasoning text as a single part."""
91
- if not self._accumulated_reasoning:
92
- return []
93
- item = self._build_text_part("".join(self._accumulated_reasoning))
94
- self._accumulated_reasoning = []
95
- return [item]
96
-
97
- def _build_text_part(self, content: str) -> message.ThinkingTextPart:
98
- return message.ThinkingTextPart(
99
- id=self._reasoning_id,
100
- text=content,
101
- model_id=self._param_model,
102
- )
77
+ return []
103
78
 
104
79
  def _build_signature_part(
105
80
  self,
@@ -1,35 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from klaude_code.protocol import message
3
+ from klaude_code.llm.stream_parts import degrade_thinking_to_text
4
4
 
5
-
6
- def degrade_thinking_to_text(parts: list[message.Part]) -> list[message.Part]:
7
- """Degrade thinking parts into a regular TextPart.
8
-
9
- Some providers require thinking signatures/encrypted content to be echoed back
10
- for subsequent calls. During interruption we cannot reliably determine whether
11
- we have a complete signature, so we persist thinking as plain text instead.
12
- """
13
-
14
- thinking_texts: list[str] = []
15
- non_thinking_parts: list[message.Part] = []
16
-
17
- for part in parts:
18
- if isinstance(part, message.ThinkingTextPart):
19
- text = part.text
20
- if text and text.strip():
21
- thinking_texts.append(text)
22
- continue
23
- if isinstance(part, message.ThinkingSignaturePart):
24
- continue
25
- non_thinking_parts.append(part)
26
-
27
- if not thinking_texts:
28
- return non_thinking_parts
29
-
30
- joined = "\n".join(thinking_texts).strip()
31
- thinking_block = f"<thinking>\n{joined}\n</thinking>"
32
- if non_thinking_parts:
33
- thinking_block += "\n\n"
34
-
35
- return [message.TextPart(text=thinking_block), *non_thinking_parts]
5
+ __all__ = ["degrade_thinking_to_text"]