klaude-code 2.4.2__py3-none-any.whl → 2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. klaude_code/app/runtime.py +2 -6
  2. klaude_code/cli/main.py +0 -1
  3. klaude_code/config/assets/builtin_config.yaml +7 -0
  4. klaude_code/const.py +7 -4
  5. klaude_code/core/agent.py +10 -1
  6. klaude_code/core/agent_profile.py +47 -35
  7. klaude_code/core/executor.py +6 -21
  8. klaude_code/core/manager/sub_agent_manager.py +17 -1
  9. klaude_code/core/prompts/prompt-sub-agent-web.md +4 -4
  10. klaude_code/core/task.py +66 -4
  11. klaude_code/core/tool/__init__.py +0 -5
  12. klaude_code/core/tool/context.py +12 -1
  13. klaude_code/core/tool/offload.py +311 -0
  14. klaude_code/core/tool/shell/bash_tool.md +1 -43
  15. klaude_code/core/tool/sub_agent_tool.py +1 -0
  16. klaude_code/core/tool/todo/todo_write_tool.md +0 -23
  17. klaude_code/core/tool/tool_runner.py +14 -9
  18. klaude_code/core/tool/web/web_fetch_tool.md +1 -1
  19. klaude_code/core/tool/web/web_fetch_tool.py +14 -39
  20. klaude_code/core/turn.py +127 -139
  21. klaude_code/llm/anthropic/client.py +176 -82
  22. klaude_code/llm/bedrock/client.py +8 -12
  23. klaude_code/llm/claude/client.py +11 -15
  24. klaude_code/llm/client.py +31 -4
  25. klaude_code/llm/codex/client.py +7 -11
  26. klaude_code/llm/google/client.py +150 -69
  27. klaude_code/llm/openai_compatible/client.py +10 -15
  28. klaude_code/llm/openai_compatible/stream.py +68 -6
  29. klaude_code/llm/openrouter/client.py +9 -15
  30. klaude_code/llm/partial_message.py +35 -0
  31. klaude_code/llm/responses/client.py +134 -68
  32. klaude_code/llm/usage.py +30 -0
  33. klaude_code/protocol/commands.py +0 -4
  34. klaude_code/protocol/events/lifecycle.py +1 -0
  35. klaude_code/protocol/events/metadata.py +1 -0
  36. klaude_code/protocol/events/streaming.py +0 -1
  37. klaude_code/protocol/events/system.py +0 -4
  38. klaude_code/protocol/model.py +2 -15
  39. klaude_code/protocol/sub_agent/explore.py +0 -10
  40. klaude_code/protocol/sub_agent/image_gen.py +0 -7
  41. klaude_code/protocol/sub_agent/task.py +0 -10
  42. klaude_code/protocol/sub_agent/web.py +4 -12
  43. klaude_code/session/templates/export_session.html +4 -4
  44. klaude_code/skill/manager.py +2 -1
  45. klaude_code/tui/components/metadata.py +41 -49
  46. klaude_code/tui/components/rich/markdown.py +1 -3
  47. klaude_code/tui/components/rich/theme.py +2 -2
  48. klaude_code/tui/components/tools.py +0 -31
  49. klaude_code/tui/components/welcome.py +1 -32
  50. klaude_code/tui/input/prompt_toolkit.py +25 -9
  51. klaude_code/tui/machine.py +31 -19
  52. {klaude_code-2.4.2.dist-info → klaude_code-2.5.1.dist-info}/METADATA +1 -1
  53. {klaude_code-2.4.2.dist-info → klaude_code-2.5.1.dist-info}/RECORD +55 -55
  54. klaude_code/core/prompts/prompt-nano-banana.md +0 -1
  55. klaude_code/core/tool/truncation.py +0 -203
  56. {klaude_code-2.4.2.dist-info → klaude_code-2.5.1.dist-info}/WHEEL +0 -0
  57. {klaude_code-2.4.2.dist-info → klaude_code-2.5.1.dist-info}/entry_points.txt +0 -0
@@ -21,11 +21,12 @@ from google.genai.types import (
21
21
  UsageMetadata,
22
22
  )
23
23
 
24
- from klaude_code.llm.client import LLMClientABC
24
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
25
25
  from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
26
26
  from klaude_code.llm.input_common import apply_config_defaults
27
+ from klaude_code.llm.partial_message import degrade_thinking_to_text
27
28
  from klaude_code.llm.registry import register
28
- from klaude_code.llm.usage import MetadataTracker
29
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
29
30
  from klaude_code.log import DebugType, log_debug
30
31
  from klaude_code.protocol import llm_param, message, model
31
32
 
@@ -135,19 +136,83 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
135
136
  return mapping.get(normalized)
136
137
 
137
138
 
139
+ class GoogleStreamStateManager:
140
+ """Manages streaming state for Google LLM responses.
141
+
142
+ Accumulates thinking content, assistant text, and tool calls during streaming
143
+ to support get_partial_message() for cancellation scenarios.
144
+ """
145
+
146
+ def __init__(self, param_model: str) -> None:
147
+ self.param_model = param_model
148
+ self.accumulated_thoughts: list[str] = []
149
+ self.accumulated_text: list[str] = []
150
+ self.thought_signature: str | None = None
151
+ self.assistant_parts: list[message.Part] = []
152
+ self.response_id: str | None = None
153
+ self.stop_reason: model.StopReason | None = None
154
+
155
+ def flush_thinking(self) -> None:
156
+ """Flush accumulated thinking content into assistant_parts."""
157
+ if self.accumulated_thoughts:
158
+ self.assistant_parts.append(
159
+ message.ThinkingTextPart(
160
+ text="".join(self.accumulated_thoughts),
161
+ model_id=self.param_model,
162
+ )
163
+ )
164
+ self.accumulated_thoughts.clear()
165
+ if self.thought_signature:
166
+ self.assistant_parts.append(
167
+ message.ThinkingSignaturePart(
168
+ signature=self.thought_signature,
169
+ model_id=self.param_model,
170
+ format="google_thought_signature",
171
+ )
172
+ )
173
+ self.thought_signature = None
174
+
175
+ def flush_text(self) -> None:
176
+ """Flush accumulated text content into assistant_parts."""
177
+ if not self.accumulated_text:
178
+ return
179
+ self.assistant_parts.append(message.TextPart(text="".join(self.accumulated_text)))
180
+ self.accumulated_text.clear()
181
+
182
+ def get_partial_message(self) -> message.AssistantMessage | None:
183
+ """Build a partial AssistantMessage from accumulated state.
184
+
185
+ Flushes all accumulated content and returns the message.
186
+ Returns None if no content has been accumulated yet.
187
+ """
188
+ self.flush_thinking()
189
+ self.flush_text()
190
+
191
+ filtered_parts: list[message.Part] = []
192
+ for part in self.assistant_parts:
193
+ if isinstance(part, message.ToolCallPart):
194
+ continue
195
+ filtered_parts.append(part)
196
+
197
+ filtered_parts = degrade_thinking_to_text(filtered_parts)
198
+
199
+ if not filtered_parts:
200
+ return None
201
+ return message.AssistantMessage(
202
+ parts=filtered_parts,
203
+ response_id=self.response_id,
204
+ stop_reason="aborted",
205
+ )
206
+
207
+
138
208
  async def parse_google_stream(
139
209
  stream: AsyncIterator[Any],
140
210
  param: llm_param.LLMCallParameter,
141
211
  metadata_tracker: MetadataTracker,
212
+ state: GoogleStreamStateManager,
142
213
  ) -> AsyncGenerator[message.LLMStreamItem]:
143
- response_id: str | None = None
144
214
  stage: Literal["waiting", "thinking", "assistant", "tool"] = "waiting"
145
215
 
146
- accumulated_text: list[str] = []
147
- accumulated_thoughts: list[str] = []
148
- thought_signature: str | None = None
149
- assistant_parts: list[message.Part] = []
150
-
151
216
  # Track tool calls where args arrive as partial updates.
152
217
  partial_args_by_call: dict[str, dict[str, Any]] = {}
153
218
  started_tool_calls: dict[str, str] = {} # call_id -> name
@@ -155,33 +220,6 @@ async def parse_google_stream(
155
220
  completed_tool_items: set[str] = set()
156
221
 
157
222
  last_usage_metadata: UsageMetadata | None = None
158
- stop_reason: model.StopReason | None = None
159
-
160
- def flush_thinking() -> None:
161
- nonlocal thought_signature
162
- if accumulated_thoughts:
163
- assistant_parts.append(
164
- message.ThinkingTextPart(
165
- text="".join(accumulated_thoughts),
166
- model_id=str(param.model_id),
167
- )
168
- )
169
- accumulated_thoughts.clear()
170
- if thought_signature:
171
- assistant_parts.append(
172
- message.ThinkingSignaturePart(
173
- signature=thought_signature,
174
- model_id=str(param.model_id),
175
- format="google_thought_signature",
176
- )
177
- )
178
- thought_signature = None
179
-
180
- def flush_text() -> None:
181
- if not accumulated_text:
182
- return
183
- assistant_parts.append(message.TextPart(text="".join(accumulated_text)))
184
- accumulated_text.clear()
185
223
 
186
224
  async for chunk in stream:
187
225
  log_debug(
@@ -190,8 +228,8 @@ async def parse_google_stream(
190
228
  debug_type=DebugType.LLM_STREAM,
191
229
  )
192
230
 
193
- if response_id is None:
194
- response_id = getattr(chunk, "response_id", None) or uuid4().hex
231
+ if state.response_id is None:
232
+ state.response_id = getattr(chunk, "response_id", None) or uuid4().hex
195
233
 
196
234
  if getattr(chunk, "usage_metadata", None) is not None:
197
235
  last_usage_metadata = chunk.usage_metadata
@@ -204,7 +242,7 @@ async def parse_google_stream(
204
242
  reason_value = finish_reason
205
243
  else:
206
244
  reason_value = getattr(finish_reason, "name", None) or str(finish_reason)
207
- stop_reason = _map_finish_reason(reason_value)
245
+ state.stop_reason = _map_finish_reason(reason_value)
208
246
  content = getattr(candidate0, "content", None) if candidate0 else None
209
247
  content_parts = getattr(content, "parts", None) if content else None
210
248
  if not content_parts:
@@ -218,18 +256,18 @@ async def parse_google_stream(
218
256
  metadata_tracker.record_token()
219
257
  if getattr(part, "thought", False) is True:
220
258
  if stage == "assistant":
221
- flush_text()
259
+ state.flush_text()
222
260
  stage = "thinking"
223
- accumulated_thoughts.append(text)
261
+ state.accumulated_thoughts.append(text)
224
262
  if getattr(part, "thought_signature", None):
225
- thought_signature = part.thought_signature
226
- yield message.ThinkingTextDelta(content=text, response_id=response_id)
263
+ state.thought_signature = part.thought_signature
264
+ yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
227
265
  else:
228
266
  if stage == "thinking":
229
- flush_thinking()
267
+ state.flush_thinking()
230
268
  stage = "assistant"
231
- accumulated_text.append(text)
232
- yield message.AssistantTextDelta(content=text, response_id=response_id)
269
+ state.accumulated_text.append(text)
270
+ yield message.AssistantTextDelta(content=text, response_id=state.response_id)
233
271
 
234
272
  function_call = getattr(part, "function_call", None)
235
273
  if function_call is None:
@@ -242,16 +280,16 @@ async def parse_google_stream(
242
280
 
243
281
  if call_id not in started_tool_items:
244
282
  started_tool_items.add(call_id)
245
- yield message.ToolCallStartDelta(response_id=response_id, call_id=call_id, name=name)
283
+ yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
246
284
 
247
285
  args_obj = getattr(function_call, "args", None)
248
286
  if args_obj is not None:
249
287
  if stage == "thinking":
250
- flush_thinking()
288
+ state.flush_thinking()
251
289
  if stage == "assistant":
252
- flush_text()
290
+ state.flush_text()
253
291
  stage = "tool"
254
- assistant_parts.append(
292
+ state.assistant_parts.append(
255
293
  message.ToolCallPart(
256
294
  call_id=call_id,
257
295
  tool_name=name,
@@ -269,11 +307,11 @@ async def parse_google_stream(
269
307
  will_continue = getattr(function_call, "will_continue", None)
270
308
  if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
271
309
  if stage == "thinking":
272
- flush_thinking()
310
+ state.flush_thinking()
273
311
  if stage == "assistant":
274
- flush_text()
312
+ state.flush_text()
275
313
  stage = "tool"
276
- assistant_parts.append(
314
+ state.assistant_parts.append(
277
315
  message.ToolCallPart(
278
316
  call_id=call_id,
279
317
  tool_name=name,
@@ -287,7 +325,7 @@ async def parse_google_stream(
287
325
  if call_id in completed_tool_items:
288
326
  continue
289
327
  args = partial_args_by_call.get(call_id, {})
290
- assistant_parts.append(
328
+ state.assistant_parts.append(
291
329
  message.ToolCallPart(
292
330
  call_id=call_id,
293
331
  tool_name=name,
@@ -295,23 +333,64 @@ async def parse_google_stream(
295
333
  )
296
334
  )
297
335
 
298
- flush_thinking()
299
- flush_text()
336
+ state.flush_thinking()
337
+ state.flush_text()
300
338
 
301
339
  usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
302
340
  if usage is not None:
303
341
  metadata_tracker.set_usage(usage)
304
342
  metadata_tracker.set_model_name(str(param.model_id))
305
- metadata_tracker.set_response_id(response_id)
343
+ metadata_tracker.set_response_id(state.response_id)
306
344
  metadata = metadata_tracker.finalize()
307
345
  yield message.AssistantMessage(
308
- parts=assistant_parts,
309
- response_id=response_id,
346
+ parts=state.assistant_parts,
347
+ response_id=state.response_id,
310
348
  usage=metadata,
311
- stop_reason=stop_reason,
349
+ stop_reason=state.stop_reason,
312
350
  )
313
351
 
314
352
 
353
+ class GoogleLLMStream(LLMStreamABC):
354
+ """LLMStream implementation for Google LLM clients."""
355
+
356
+ def __init__(
357
+ self,
358
+ stream: AsyncIterator[Any],
359
+ *,
360
+ param: llm_param.LLMCallParameter,
361
+ metadata_tracker: MetadataTracker,
362
+ state: GoogleStreamStateManager,
363
+ ) -> None:
364
+ self._stream = stream
365
+ self._param = param
366
+ self._metadata_tracker = metadata_tracker
367
+ self._state = state
368
+ self._completed = False
369
+
370
+ def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
371
+ return self._iterate()
372
+
373
+ async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
374
+ try:
375
+ async for item in parse_google_stream(
376
+ self._stream,
377
+ param=self._param,
378
+ metadata_tracker=self._metadata_tracker,
379
+ state=self._state,
380
+ ):
381
+ if isinstance(item, message.AssistantMessage):
382
+ self._completed = True
383
+ yield item
384
+ except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
385
+ yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
386
+ yield message.AssistantMessage(parts=[], response_id=None, usage=self._metadata_tracker.finalize())
387
+
388
+ def get_partial_message(self) -> message.AssistantMessage | None:
389
+ if self._completed:
390
+ return None
391
+ return self._state.get_partial_message()
392
+
393
+
315
394
  @register(llm_param.LLMClientProtocol.GOOGLE)
316
395
  class GoogleClient(LLMClientABC):
317
396
  def __init__(self, config: llm_param.LLMConfigParameter):
@@ -332,7 +411,7 @@ class GoogleClient(LLMClientABC):
332
411
  return cls(config)
333
412
 
334
413
  @override
335
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
414
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
336
415
  param = apply_config_defaults(param, self.get_llm_config())
337
416
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
338
417
 
@@ -359,13 +438,15 @@ class GoogleClient(LLMClientABC):
359
438
  config=config,
360
439
  )
361
440
  except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
362
- yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
363
- yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
364
- return
441
+ return error_llm_stream(
442
+ metadata_tracker,
443
+ error=f"{e.__class__.__name__} {e!s}",
444
+ )
365
445
 
366
- try:
367
- async for item in parse_google_stream(stream, param=param, metadata_tracker=metadata_tracker):
368
- yield item
369
- except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
370
- yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
371
- yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
446
+ state = GoogleStreamStateManager(param_model=str(param.model_id))
447
+ return GoogleLLMStream(
448
+ stream,
449
+ param=param,
450
+ metadata_tracker=metadata_tracker,
451
+ state=state,
452
+ )
@@ -1,5 +1,4 @@
1
1
  import json
2
- from collections.abc import AsyncGenerator
3
2
  from typing import Any, override
4
3
 
5
4
  import httpx
@@ -7,14 +6,14 @@ import openai
7
6
  from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
8
7
 
9
8
  from klaude_code.const import LLM_HTTP_TIMEOUT_CONNECT, LLM_HTTP_TIMEOUT_READ, LLM_HTTP_TIMEOUT_TOTAL
10
- from klaude_code.llm.client import LLMClientABC
9
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
11
10
  from klaude_code.llm.input_common import apply_config_defaults
12
11
  from klaude_code.llm.openai_compatible.input import convert_history_to_input, convert_tool_schema
13
- from klaude_code.llm.openai_compatible.stream import DefaultReasoningHandler, parse_chat_completions_stream
12
+ from klaude_code.llm.openai_compatible.stream import DefaultReasoningHandler, OpenAILLMStream
14
13
  from klaude_code.llm.registry import register
15
- from klaude_code.llm.usage import MetadataTracker
14
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
16
15
  from klaude_code.log import DebugType, log_debug
17
- from klaude_code.protocol import llm_param, message
16
+ from klaude_code.protocol import llm_param
18
17
 
19
18
 
20
19
  def build_payload(param: llm_param.LLMCallParameter) -> tuple[CompletionCreateParamsStreaming, dict[str, object]]:
@@ -77,7 +76,7 @@ class OpenAICompatibleClient(LLMClientABC):
77
76
  return cls(config)
78
77
 
79
78
  @override
80
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
79
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
81
80
  param = apply_config_defaults(param, self.get_llm_config())
82
81
 
83
82
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -85,9 +84,8 @@ class OpenAICompatibleClient(LLMClientABC):
85
84
  try:
86
85
  payload, extra_body = build_payload(param)
87
86
  except (ValueError, OSError) as e:
88
- yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
89
- yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
90
- return
87
+ return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
88
+
91
89
  extra_headers: dict[str, str] = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
92
90
 
93
91
  log_debug(
@@ -103,9 +101,7 @@ class OpenAICompatibleClient(LLMClientABC):
103
101
  extra_headers=extra_headers,
104
102
  )
105
103
  except (openai.OpenAIError, httpx.HTTPError) as e:
106
- yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
107
- yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
108
- return
104
+ return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
109
105
 
110
106
  reasoning_handler = DefaultReasoningHandler(
111
107
  param_model=str(param.model_id),
@@ -119,11 +115,10 @@ class OpenAICompatibleClient(LLMClientABC):
119
115
  debug_type=DebugType.LLM_STREAM,
120
116
  )
121
117
 
122
- async for item in parse_chat_completions_stream(
118
+ return OpenAILLMStream(
123
119
  stream,
124
120
  param=param,
125
121
  metadata_tracker=metadata_tracker,
126
122
  reasoning_handler=reasoning_handler,
127
123
  on_event=on_event,
128
- ):
129
- yield item
124
+ )
@@ -4,7 +4,7 @@ This module provides reusable primitives for OpenAI-compatible providers:
4
4
 
5
5
  - ``StreamStateManager``: accumulates assistant content and tool calls.
6
6
  - ``ReasoningHandlerABC``: provider-specific reasoning extraction + buffering.
7
- - ``parse_chat_completions_stream``: shared stream loop that emits stream/history items.
7
+ - ``OpenAILLMStream``: LLMStream implementation for OpenAI-compatible clients.
8
8
 
9
9
  OpenRouter uses the same OpenAI Chat Completions API surface but differs in
10
10
  how reasoning is represented (``reasoning_details`` vs ``reasoning_content``).
@@ -24,8 +24,10 @@ import pydantic
24
24
  from openai import AsyncStream
25
25
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
26
26
 
27
+ from klaude_code.llm.client import LLMStreamABC
27
28
  from klaude_code.llm.image import save_assistant_image
28
29
  from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
30
+ from klaude_code.llm.partial_message import degrade_thinking_to_text
29
31
  from klaude_code.llm.usage import MetadataTracker, convert_usage
30
32
  from klaude_code.protocol import llm_param, message, model
31
33
 
@@ -93,6 +95,23 @@ class StreamStateManager:
93
95
  self.flush_tool_calls()
94
96
  return list(self.parts)
95
97
 
98
+ def get_partial_message(self) -> message.AssistantMessage | None:
99
+ """Build a partial AssistantMessage from accumulated state.
100
+
101
+ Flushes all accumulated content (reasoning, assistant text, tool calls)
102
+ and returns the message. Returns None if no content has been accumulated.
103
+ """
104
+ self.flush_reasoning()
105
+ self.flush_assistant()
106
+ parts = degrade_thinking_to_text(list(self.parts))
107
+ if not parts:
108
+ return None
109
+ return message.AssistantMessage(
110
+ parts=parts,
111
+ response_id=self.response_id,
112
+ stop_reason="aborted",
113
+ )
114
+
96
115
 
97
116
  @dataclass(slots=True)
98
117
  class ReasoningDeltaResult:
@@ -168,6 +187,7 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
168
187
  async def parse_chat_completions_stream(
169
188
  stream: AsyncStream[ChatCompletionChunk],
170
189
  *,
190
+ state: StreamStateManager,
171
191
  param: llm_param.LLMCallParameter,
172
192
  metadata_tracker: MetadataTracker,
173
193
  reasoning_handler: ReasoningHandlerABC,
@@ -176,13 +196,10 @@ async def parse_chat_completions_stream(
176
196
  """Parse OpenAI Chat Completions stream into stream items.
177
197
 
178
198
  This is shared by OpenAI-compatible and OpenRouter clients.
199
+ The state parameter allows external access to accumulated content
200
+ for cancellation scenarios.
179
201
  """
180
202
 
181
- state = StreamStateManager(
182
- param_model=str(param.model_id),
183
- reasoning_flusher=reasoning_handler.flush,
184
- )
185
-
186
203
  def _extract_image_url(image_obj: object) -> str | None:
187
204
  image_url = getattr(image_obj, "image_url", None)
188
205
  if image_url is not None:
@@ -323,3 +340,48 @@ async def parse_chat_completions_stream(
323
340
  usage=metadata,
324
341
  stop_reason=state.stop_reason,
325
342
  )
343
+
344
+
345
+ class OpenAILLMStream(LLMStreamABC):
346
+ """LLMStream implementation for OpenAI-compatible clients."""
347
+
348
+ def __init__(
349
+ self,
350
+ stream: AsyncStream[ChatCompletionChunk],
351
+ *,
352
+ param: llm_param.LLMCallParameter,
353
+ metadata_tracker: MetadataTracker,
354
+ reasoning_handler: ReasoningHandlerABC,
355
+ on_event: Callable[[object], None] | None = None,
356
+ ) -> None:
357
+ self._stream = stream
358
+ self._param = param
359
+ self._metadata_tracker = metadata_tracker
360
+ self._reasoning_handler = reasoning_handler
361
+ self._on_event = on_event
362
+ self._state = StreamStateManager(
363
+ param_model=str(param.model_id),
364
+ reasoning_flusher=reasoning_handler.flush,
365
+ )
366
+ self._completed = False
367
+
368
+ def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
369
+ return self._iterate()
370
+
371
+ async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
372
+ async for item in parse_chat_completions_stream(
373
+ self._stream,
374
+ state=self._state,
375
+ param=self._param,
376
+ metadata_tracker=self._metadata_tracker,
377
+ reasoning_handler=self._reasoning_handler,
378
+ on_event=self._on_event,
379
+ ):
380
+ if isinstance(item, message.AssistantMessage):
381
+ self._completed = True
382
+ yield item
383
+
384
+ def get_partial_message(self) -> message.AssistantMessage | None:
385
+ if self._completed:
386
+ return None
387
+ return self._state.get_partial_message()
@@ -1,5 +1,4 @@
1
1
  import json
2
- from collections.abc import AsyncGenerator
3
2
  from typing import Any, cast, override
4
3
 
5
4
  import httpx
@@ -14,16 +13,16 @@ from klaude_code.const import (
14
13
  LLM_HTTP_TIMEOUT_TOTAL,
15
14
  OPENROUTER_BASE_URL,
16
15
  )
17
- from klaude_code.llm.client import LLMClientABC
16
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
18
17
  from klaude_code.llm.input_common import apply_config_defaults
19
18
  from klaude_code.llm.openai_compatible.input import convert_tool_schema
20
- from klaude_code.llm.openai_compatible.stream import parse_chat_completions_stream
19
+ from klaude_code.llm.openai_compatible.stream import OpenAILLMStream
21
20
  from klaude_code.llm.openrouter.input import convert_history_to_input, is_claude_model
22
21
  from klaude_code.llm.openrouter.reasoning import ReasoningStreamHandler
23
22
  from klaude_code.llm.registry import register
24
- from klaude_code.llm.usage import MetadataTracker
23
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
25
24
  from klaude_code.log import DebugType, is_debug_enabled, log_debug
26
- from klaude_code.protocol import llm_param, message
25
+ from klaude_code.protocol import llm_param
27
26
 
28
27
 
29
28
  def build_payload(
@@ -103,7 +102,7 @@ class OpenRouterClient(LLMClientABC):
103
102
  return cls(config)
104
103
 
105
104
  @override
106
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
105
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
107
106
  param = apply_config_defaults(param, self.get_llm_config())
108
107
 
109
108
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -111,9 +110,7 @@ class OpenRouterClient(LLMClientABC):
111
110
  try:
112
111
  payload, extra_body, extra_headers = build_payload(param)
113
112
  except (ValueError, OSError) as e:
114
- yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
115
- yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
116
- return
113
+ return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
117
114
 
118
115
  log_debug(
119
116
  json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
@@ -128,9 +125,7 @@ class OpenRouterClient(LLMClientABC):
128
125
  extra_headers=extra_headers,
129
126
  )
130
127
  except (openai.OpenAIError, httpx.HTTPError) as e:
131
- yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
132
- yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
133
- return
128
+ return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
134
129
 
135
130
  reasoning_handler = ReasoningStreamHandler(
136
131
  param_model=str(param.model_id),
@@ -144,11 +139,10 @@ class OpenRouterClient(LLMClientABC):
144
139
  debug_type=DebugType.LLM_STREAM,
145
140
  )
146
141
 
147
- async for item in parse_chat_completions_stream(
142
+ return OpenAILLMStream(
148
143
  stream,
149
144
  param=param,
150
145
  metadata_tracker=metadata_tracker,
151
146
  reasoning_handler=reasoning_handler,
152
147
  on_event=on_event,
153
- ):
154
- yield item
148
+ )
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ from klaude_code.protocol import message
4
+
5
+
6
+ def degrade_thinking_to_text(parts: list[message.Part]) -> list[message.Part]:
7
+ """Degrade thinking parts into a regular TextPart.
8
+
9
+ Some providers require thinking signatures/encrypted content to be echoed back
10
+ for subsequent calls. During interruption we cannot reliably determine whether
11
+ we have a complete signature, so we persist thinking as plain text instead.
12
+ """
13
+
14
+ thinking_texts: list[str] = []
15
+ non_thinking_parts: list[message.Part] = []
16
+
17
+ for part in parts:
18
+ if isinstance(part, message.ThinkingTextPart):
19
+ text = part.text
20
+ if text and text.strip():
21
+ thinking_texts.append(text)
22
+ continue
23
+ if isinstance(part, message.ThinkingSignaturePart):
24
+ continue
25
+ non_thinking_parts.append(part)
26
+
27
+ if not thinking_texts:
28
+ return non_thinking_parts
29
+
30
+ joined = "\n".join(thinking_texts).strip()
31
+ thinking_block = f"<thinking>\n{joined}\n</thinking>"
32
+ if non_thinking_parts:
33
+ thinking_block += "\n\n"
34
+
35
+ return [message.TextPart(text=thinking_block), *non_thinking_parts]