klaude-code 2.4.2__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. klaude_code/app/runtime.py +2 -6
  2. klaude_code/cli/main.py +0 -1
  3. klaude_code/config/assets/builtin_config.yaml +7 -0
  4. klaude_code/const.py +7 -4
  5. klaude_code/core/agent.py +10 -1
  6. klaude_code/core/agent_profile.py +47 -35
  7. klaude_code/core/executor.py +6 -21
  8. klaude_code/core/manager/sub_agent_manager.py +17 -1
  9. klaude_code/core/prompts/prompt-sub-agent-web.md +4 -4
  10. klaude_code/core/task.py +65 -4
  11. klaude_code/core/tool/__init__.py +0 -5
  12. klaude_code/core/tool/context.py +12 -1
  13. klaude_code/core/tool/offload.py +311 -0
  14. klaude_code/core/tool/shell/bash_tool.md +1 -43
  15. klaude_code/core/tool/sub_agent_tool.py +1 -0
  16. klaude_code/core/tool/todo/todo_write_tool.md +0 -23
  17. klaude_code/core/tool/tool_runner.py +14 -9
  18. klaude_code/core/tool/web/web_fetch_tool.md +1 -1
  19. klaude_code/core/tool/web/web_fetch_tool.py +14 -39
  20. klaude_code/core/turn.py +128 -139
  21. klaude_code/llm/anthropic/client.py +176 -82
  22. klaude_code/llm/bedrock/client.py +8 -12
  23. klaude_code/llm/claude/client.py +11 -15
  24. klaude_code/llm/client.py +31 -4
  25. klaude_code/llm/codex/client.py +7 -11
  26. klaude_code/llm/google/client.py +150 -69
  27. klaude_code/llm/openai_compatible/client.py +10 -15
  28. klaude_code/llm/openai_compatible/stream.py +68 -6
  29. klaude_code/llm/openrouter/client.py +9 -15
  30. klaude_code/llm/partial_message.py +35 -0
  31. klaude_code/llm/responses/client.py +134 -68
  32. klaude_code/llm/usage.py +30 -0
  33. klaude_code/protocol/commands.py +0 -4
  34. klaude_code/protocol/events/metadata.py +1 -0
  35. klaude_code/protocol/events/system.py +0 -4
  36. klaude_code/protocol/model.py +2 -15
  37. klaude_code/protocol/sub_agent/explore.py +0 -10
  38. klaude_code/protocol/sub_agent/image_gen.py +0 -7
  39. klaude_code/protocol/sub_agent/task.py +0 -10
  40. klaude_code/protocol/sub_agent/web.py +4 -12
  41. klaude_code/session/templates/export_session.html +4 -4
  42. klaude_code/skill/manager.py +2 -1
  43. klaude_code/tui/components/metadata.py +41 -49
  44. klaude_code/tui/components/rich/markdown.py +1 -3
  45. klaude_code/tui/components/rich/theme.py +2 -2
  46. klaude_code/tui/components/tools.py +0 -31
  47. klaude_code/tui/components/welcome.py +1 -32
  48. klaude_code/tui/input/prompt_toolkit.py +25 -9
  49. klaude_code/tui/machine.py +2 -1
  50. {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/METADATA +1 -1
  51. {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/RECORD +53 -53
  52. klaude_code/core/prompts/prompt-nano-banana.md +0 -1
  53. klaude_code/core/tool/truncation.py +0 -203
  54. {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/WHEEL +0 -0
  55. {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/entry_points.txt +0 -0
@@ -31,10 +31,11 @@ from klaude_code.const import (
31
31
  LLM_HTTP_TIMEOUT_TOTAL,
32
32
  )
33
33
  from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
34
- from klaude_code.llm.client import LLMClientABC
34
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
35
35
  from klaude_code.llm.input_common import apply_config_defaults
36
+ from klaude_code.llm.partial_message import degrade_thinking_to_text
36
37
  from klaude_code.llm.registry import register
37
- from klaude_code.llm.usage import MetadataTracker, error_stream_items
38
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
38
39
  from klaude_code.log import DebugType, log_debug
39
40
  from klaude_code.protocol import llm_param, message, model
40
41
 
@@ -54,6 +55,94 @@ def _map_anthropic_stop_reason(reason: str) -> model.StopReason | None:
54
55
  return mapping.get(reason)
55
56
 
56
57
 
58
+ class AnthropicStreamStateManager:
59
+ """Manages streaming state for Anthropic API responses.
60
+
61
+ Accumulates thinking, content, and tool call parts during streaming
62
+ to support partial message retrieval on cancellation.
63
+ """
64
+
65
+ def __init__(self, model_id: str) -> None:
66
+ self.model_id = model_id
67
+ self.accumulated_thinking: list[str] = []
68
+ self.accumulated_content: list[str] = []
69
+ self.parts: list[message.Part] = []
70
+ self.response_id: str | None = None
71
+ self.pending_signature: str | None = None
72
+ self.stop_reason: model.StopReason | None = None
73
+
74
+ # Tool call state
75
+ self.current_tool_name: str | None = None
76
+ self.current_tool_call_id: str | None = None
77
+ self.current_tool_inputs: list[str] | None = None
78
+
79
+ # Token tracking
80
+ self.input_token: int = 0
81
+ self.cached_token: int = 0
82
+
83
+ def flush_thinking(self) -> None:
84
+ """Flush accumulated thinking content into parts."""
85
+ if not self.accumulated_thinking:
86
+ return
87
+ full_thinking = "".join(self.accumulated_thinking)
88
+ self.parts.append(message.ThinkingTextPart(text=full_thinking, model_id=self.model_id))
89
+ if self.pending_signature:
90
+ self.parts.append(
91
+ message.ThinkingSignaturePart(
92
+ signature=self.pending_signature,
93
+ model_id=self.model_id,
94
+ format="anthropic",
95
+ )
96
+ )
97
+ self.accumulated_thinking.clear()
98
+ self.pending_signature = None
99
+
100
+ def flush_content(self) -> None:
101
+ """Flush accumulated content into parts."""
102
+ if not self.accumulated_content:
103
+ return
104
+ self.parts.append(message.TextPart(text="".join(self.accumulated_content)))
105
+ self.accumulated_content.clear()
106
+
107
+ def flush_tool_call(self) -> None:
108
+ """Flush current tool call into parts."""
109
+ if self.current_tool_name and self.current_tool_call_id:
110
+ self.parts.append(
111
+ message.ToolCallPart(
112
+ call_id=self.current_tool_call_id,
113
+ tool_name=self.current_tool_name,
114
+ arguments_json="".join(self.current_tool_inputs) if self.current_tool_inputs else "",
115
+ )
116
+ )
117
+ self.current_tool_name = None
118
+ self.current_tool_call_id = None
119
+ self.current_tool_inputs = None
120
+
121
+ def flush_all(self) -> list[message.Part]:
122
+ """Flush all accumulated content in order and return parts."""
123
+ self.flush_thinking()
124
+ self.flush_content()
125
+ self.flush_tool_call()
126
+ return list(self.parts)
127
+
128
+ def get_partial_message(self) -> message.AssistantMessage | None:
129
+ """Build a partial AssistantMessage from accumulated state.
130
+
131
+ Flushes all accumulated content and returns the message with
132
+ stop_reason="aborted". Returns None if no content has been accumulated.
133
+ """
134
+ self.flush_thinking()
135
+ self.flush_content()
136
+ parts = degrade_thinking_to_text(list(self.parts))
137
+ if not parts:
138
+ return None
139
+ return message.AssistantMessage(
140
+ parts=parts,
141
+ response_id=self.response_id,
142
+ stop_reason="aborted",
143
+ )
144
+
145
+
57
146
  def build_payload(
58
147
  param: llm_param.LLMCallParameter,
59
148
  *,
@@ -113,22 +202,13 @@ async def parse_anthropic_stream(
113
202
  stream: Any,
114
203
  param: llm_param.LLMCallParameter,
115
204
  metadata_tracker: MetadataTracker,
205
+ state: AnthropicStreamStateManager,
116
206
  ) -> AsyncGenerator[message.LLMStreamItem]:
117
- """Parse Anthropic beta messages stream and yield stream items."""
118
- accumulated_thinking: list[str] = []
119
- accumulated_content: list[str] = []
120
- parts: list[message.Part] = []
121
- response_id: str | None = None
122
- stop_reason: model.StopReason | None = None
123
- pending_signature: str | None = None
124
-
125
- current_tool_name: str | None = None
126
- current_tool_call_id: str | None = None
127
- current_tool_inputs: list[str] | None = None
128
-
129
- input_token = 0
130
- cached_token = 0
207
+ """Parse Anthropic beta messages stream and yield stream items.
131
208
 
209
+ The state parameter allows external access to accumulated content
210
+ for cancellation scenarios.
211
+ """
132
212
  async for event in await stream:
133
213
  log_debug(
134
214
  f"[{event.type}]",
@@ -138,34 +218,33 @@ async def parse_anthropic_stream(
138
218
  )
139
219
  match event:
140
220
  case BetaRawMessageStartEvent() as event:
141
- response_id = event.message.id
142
- cached_token = event.message.usage.cache_read_input_tokens or 0
143
- input_token = event.message.usage.input_tokens
221
+ state.response_id = event.message.id
222
+ state.cached_token = event.message.usage.cache_read_input_tokens or 0
223
+ state.input_token = event.message.usage.input_tokens
144
224
  case BetaRawContentBlockDeltaEvent() as event:
145
225
  match event.delta:
146
226
  case BetaThinkingDelta() as delta:
147
227
  if delta.thinking:
148
228
  metadata_tracker.record_token()
149
- accumulated_thinking.append(delta.thinking)
150
- yield message.ThinkingTextDelta(
151
- content=delta.thinking,
152
- response_id=response_id,
153
- )
229
+ state.accumulated_thinking.append(delta.thinking)
230
+ yield message.ThinkingTextDelta(
231
+ content=delta.thinking,
232
+ response_id=state.response_id,
233
+ )
154
234
  case BetaSignatureDelta() as delta:
155
- pending_signature = delta.signature
235
+ state.pending_signature = delta.signature
156
236
  case BetaTextDelta() as delta:
157
237
  if delta.text:
158
238
  metadata_tracker.record_token()
159
- accumulated_content.append(delta.text)
160
- yield message.AssistantTextDelta(
161
- content=delta.text,
162
- response_id=response_id,
163
- )
239
+ state.accumulated_content.append(delta.text)
240
+ yield message.AssistantTextDelta(
241
+ content=delta.text,
242
+ response_id=state.response_id,
243
+ )
164
244
  case BetaInputJSONDelta() as delta:
165
- if current_tool_inputs is not None:
166
- if delta.partial_json:
167
- metadata_tracker.record_token()
168
- current_tool_inputs.append(delta.partial_json)
245
+ if state.current_tool_inputs is not None and delta.partial_json:
246
+ metadata_tracker.record_token()
247
+ state.current_tool_inputs.append(delta.partial_json)
169
248
  case _:
170
249
  pass
171
250
  case BetaRawContentBlockStartEvent() as event:
@@ -173,74 +252,92 @@ async def parse_anthropic_stream(
173
252
  case BetaToolUseBlock() as block:
174
253
  metadata_tracker.record_token()
175
254
  yield message.ToolCallStartDelta(
176
- response_id=response_id,
255
+ response_id=state.response_id,
177
256
  call_id=block.id,
178
257
  name=block.name,
179
258
  )
180
- current_tool_name = block.name
181
- current_tool_call_id = block.id
182
- current_tool_inputs = []
259
+ state.current_tool_name = block.name
260
+ state.current_tool_call_id = block.id
261
+ state.current_tool_inputs = []
183
262
  case _:
184
263
  pass
185
264
  case BetaRawContentBlockStopEvent():
186
- if accumulated_thinking:
265
+ if state.accumulated_thinking:
187
266
  metadata_tracker.record_token()
188
- full_thinking = "".join(accumulated_thinking)
189
- parts.append(message.ThinkingTextPart(text=full_thinking, model_id=str(param.model_id)))
190
- if pending_signature:
191
- parts.append(
192
- message.ThinkingSignaturePart(
193
- signature=pending_signature,
194
- model_id=str(param.model_id),
195
- format="anthropic",
196
- )
197
- )
198
- accumulated_thinking.clear()
199
- pending_signature = None
200
- if accumulated_content:
267
+ state.flush_thinking()
268
+ if state.accumulated_content:
201
269
  metadata_tracker.record_token()
202
- parts.append(message.TextPart(text="".join(accumulated_content)))
203
- accumulated_content.clear()
204
- if current_tool_name and current_tool_call_id:
270
+ state.flush_content()
271
+ if state.current_tool_name and state.current_tool_call_id:
205
272
  metadata_tracker.record_token()
206
- parts.append(
207
- message.ToolCallPart(
208
- call_id=current_tool_call_id,
209
- tool_name=current_tool_name,
210
- arguments_json="".join(current_tool_inputs) if current_tool_inputs else "",
211
- )
212
- )
213
- current_tool_name = None
214
- current_tool_call_id = None
215
- current_tool_inputs = None
273
+ state.flush_tool_call()
216
274
  case BetaRawMessageDeltaEvent() as event:
217
275
  metadata_tracker.set_usage(
218
276
  model.Usage(
219
- input_tokens=input_token + cached_token,
277
+ input_tokens=state.input_token + state.cached_token,
220
278
  output_tokens=event.usage.output_tokens,
221
- cached_tokens=cached_token,
222
- context_size=input_token + cached_token + event.usage.output_tokens,
279
+ cached_tokens=state.cached_token,
280
+ context_size=state.input_token + state.cached_token + event.usage.output_tokens,
223
281
  context_limit=param.context_limit,
224
282
  max_tokens=param.max_tokens,
225
283
  )
226
284
  )
227
285
  metadata_tracker.set_model_name(str(param.model_id))
228
- metadata_tracker.set_response_id(response_id)
286
+ metadata_tracker.set_response_id(state.response_id)
229
287
  raw_stop_reason = getattr(event, "stop_reason", None)
230
288
  if isinstance(raw_stop_reason, str):
231
- stop_reason = _map_anthropic_stop_reason(raw_stop_reason)
289
+ state.stop_reason = _map_anthropic_stop_reason(raw_stop_reason)
232
290
  case _:
233
291
  pass
234
292
 
293
+ parts = state.flush_all()
294
+ if parts:
295
+ metadata_tracker.record_token()
235
296
  metadata = metadata_tracker.finalize()
236
297
  yield message.AssistantMessage(
237
298
  parts=parts,
238
- response_id=response_id,
299
+ response_id=state.response_id,
239
300
  usage=metadata,
240
- stop_reason=stop_reason,
301
+ stop_reason=state.stop_reason,
241
302
  )
242
303
 
243
304
 
305
+ class AnthropicLLMStream(LLMStreamABC):
306
+ """LLMStream implementation for Anthropic-compatible clients."""
307
+
308
+ def __init__(
309
+ self,
310
+ stream: Any,
311
+ *,
312
+ param: llm_param.LLMCallParameter,
313
+ metadata_tracker: MetadataTracker,
314
+ ) -> None:
315
+ self._stream = stream
316
+ self._param = param
317
+ self._metadata_tracker = metadata_tracker
318
+ self._state = AnthropicStreamStateManager(model_id=str(param.model_id))
319
+ self._completed = False
320
+
321
+ def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
322
+ return self._iterate()
323
+
324
+ async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
325
+ async for item in parse_anthropic_stream(
326
+ self._stream,
327
+ self._param,
328
+ self._metadata_tracker,
329
+ self._state,
330
+ ):
331
+ if isinstance(item, message.AssistantMessage):
332
+ self._completed = True
333
+ yield item
334
+
335
+ def get_partial_message(self) -> message.AssistantMessage | None:
336
+ if self._completed:
337
+ return None
338
+ return self._state.get_partial_message()
339
+
340
+
244
341
  @register(llm_param.LLMClientProtocol.ANTHROPIC)
245
342
  class AnthropicClient(LLMClientABC):
246
343
  def __init__(self, config: llm_param.LLMConfigParameter):
@@ -269,7 +366,7 @@ class AnthropicClient(LLMClientABC):
269
366
  return cls(config)
270
367
 
271
368
  @override
272
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
369
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
273
370
  param = apply_config_defaults(param, self.get_llm_config())
274
371
 
275
372
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -282,15 +379,12 @@ class AnthropicClient(LLMClientABC):
282
379
  debug_type=DebugType.LLM_PAYLOAD,
283
380
  )
284
381
 
285
- stream = self.client.beta.messages.create(
286
- **payload,
287
- extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
288
- )
289
-
290
382
  try:
291
- async for item in parse_anthropic_stream(stream, param, metadata_tracker):
292
- yield item
383
+ stream = self.client.beta.messages.create(
384
+ **payload,
385
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
386
+ )
387
+ return AnthropicLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
293
388
  except (APIError, httpx.HTTPError) as e:
294
389
  error_message = f"{e.__class__.__name__} {e!s}"
295
- for item in error_stream_items(metadata_tracker, error=error_message):
296
- yield item
390
+ return error_llm_stream(metadata_tracker, error=error_message)
@@ -1,7 +1,6 @@
1
1
  """AWS Bedrock LLM client using Anthropic SDK."""
2
2
 
3
3
  import json
4
- from collections.abc import AsyncGenerator
5
4
  from typing import override
6
5
 
7
6
  import anthropic
@@ -9,13 +8,13 @@ import httpx
9
8
  from anthropic import APIError
10
9
 
11
10
  from klaude_code.const import LLM_HTTP_TIMEOUT_CONNECT, LLM_HTTP_TIMEOUT_READ, LLM_HTTP_TIMEOUT_TOTAL
12
- from klaude_code.llm.anthropic.client import build_payload, parse_anthropic_stream
13
- from klaude_code.llm.client import LLMClientABC
11
+ from klaude_code.llm.anthropic.client import AnthropicLLMStream, build_payload
12
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
14
13
  from klaude_code.llm.input_common import apply_config_defaults
15
14
  from klaude_code.llm.registry import register
16
- from klaude_code.llm.usage import MetadataTracker, error_stream_items
15
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
17
16
  from klaude_code.log import DebugType, log_debug
18
- from klaude_code.protocol import llm_param, message
17
+ from klaude_code.protocol import llm_param
19
18
 
20
19
 
21
20
  @register(llm_param.LLMClientProtocol.BEDROCK)
@@ -39,7 +38,7 @@ class BedrockClient(LLMClientABC):
39
38
  return cls(config)
40
39
 
41
40
  @override
42
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
41
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
43
42
  param = apply_config_defaults(param, self.get_llm_config())
44
43
 
45
44
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -52,12 +51,9 @@ class BedrockClient(LLMClientABC):
52
51
  debug_type=DebugType.LLM_PAYLOAD,
53
52
  )
54
53
 
55
- stream = self.client.beta.messages.create(**payload)
56
-
57
54
  try:
58
- async for item in parse_anthropic_stream(stream, param, metadata_tracker):
59
- yield item
55
+ stream = self.client.beta.messages.create(**payload)
56
+ return AnthropicLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
60
57
  except (APIError, httpx.HTTPError) as e:
61
58
  error_message = f"{e.__class__.__name__} {e!s}"
62
- for item in error_stream_items(metadata_tracker, error=error_message):
63
- yield item
59
+ return error_llm_stream(metadata_tracker, error=error_message)
@@ -1,5 +1,4 @@
1
1
  import json
2
- from collections.abc import AsyncGenerator
3
2
  from typing import override
4
3
 
5
4
  import anthropic
@@ -17,13 +16,13 @@ from klaude_code.const import (
17
16
  LLM_HTTP_TIMEOUT_READ,
18
17
  LLM_HTTP_TIMEOUT_TOTAL,
19
18
  )
20
- from klaude_code.llm.anthropic.client import build_payload, parse_anthropic_stream
21
- from klaude_code.llm.client import LLMClientABC
19
+ from klaude_code.llm.anthropic.client import AnthropicLLMStream, build_payload
20
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
22
21
  from klaude_code.llm.input_common import apply_config_defaults
23
22
  from klaude_code.llm.registry import register
24
- from klaude_code.llm.usage import MetadataTracker, error_stream_items
23
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
25
24
  from klaude_code.log import DebugType, log_debug
26
- from klaude_code.protocol import llm_param, message
25
+ from klaude_code.protocol import llm_param
27
26
 
28
27
  _CLAUDE_OAUTH_REQUIRED_BETAS: tuple[str, ...] = (
29
28
  ANTHROPIC_BETA_OAUTH,
@@ -71,7 +70,7 @@ class ClaudeClient(LLMClientABC):
71
70
  return cls(config)
72
71
 
73
72
  @override
74
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
73
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
75
74
  self._ensure_valid_token()
76
75
  param = apply_config_defaults(param, self.get_llm_config())
77
76
 
@@ -91,15 +90,12 @@ class ClaudeClient(LLMClientABC):
91
90
  debug_type=DebugType.LLM_PAYLOAD,
92
91
  )
93
92
 
94
- stream = self.client.beta.messages.create(
95
- **payload,
96
- extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
97
- )
98
-
99
93
  try:
100
- async for item in parse_anthropic_stream(stream, param, metadata_tracker):
101
- yield item
94
+ stream = self.client.beta.messages.create(
95
+ **payload,
96
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
97
+ )
98
+ return AnthropicLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
102
99
  except (APIError, httpx.HTTPError) as e:
103
100
  error_message = f"{e.__class__.__name__} {e!s}"
104
- for item in error_stream_items(metadata_tracker, error=error_message):
105
- yield item
101
+ return error_llm_stream(metadata_tracker, error=error_message)
klaude_code/llm/client.py CHANGED
@@ -1,10 +1,34 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from collections.abc import AsyncGenerator
3
- from typing import ParamSpec, TypeVar, cast
3
+ from typing import ParamSpec, TypeVar
4
4
 
5
5
  from klaude_code.protocol import llm_param, message
6
6
 
7
7
 
8
+ class LLMStreamABC(ABC):
9
+ """Abstract base class for LLM streaming response with state access.
10
+
11
+ Provides both async iteration over stream items and access to accumulated
12
+ message state for cancellation scenarios.
13
+ """
14
+
15
+ @abstractmethod
16
+ def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
17
+ """Iterate over stream items."""
18
+ ...
19
+
20
+ @abstractmethod
21
+ def get_partial_message(self) -> message.AssistantMessage | None:
22
+ """Get accumulated message for cancel scenarios.
23
+
24
+ Returns the message constructed from accumulated parts so far,
25
+ including thinking and assistant text. Returns None if no content
26
+ has been accumulated yet.
27
+
28
+ """
29
+ ...
30
+
31
+
8
32
  class LLMClientABC(ABC):
9
33
  def __init__(self, config: llm_param.LLMConfigParameter) -> None:
10
34
  self._config = config
@@ -15,9 +39,12 @@ class LLMClientABC(ABC):
15
39
  pass
16
40
 
17
41
  @abstractmethod
18
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
19
- if False: # pragma: no cover
20
- yield cast(message.LLMStreamItem, None)
42
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
43
+ """Start an LLM call and return a stream object.
44
+
45
+ The returned stream can be iterated to receive stream items,
46
+ and provides get_partial_message() for cancellation scenarios.
47
+ """
21
48
  raise NotImplementedError
22
49
 
23
50
  def get_llm_config(self) -> llm_param.LLMConfigParameter:
@@ -1,7 +1,6 @@
1
1
  """Codex LLM client using ChatGPT subscription via OAuth."""
2
2
 
3
3
  import json
4
- from collections.abc import AsyncGenerator
5
4
  from typing import override
6
5
 
7
6
  import httpx
@@ -19,14 +18,14 @@ from klaude_code.const import (
19
18
  LLM_HTTP_TIMEOUT_READ,
20
19
  LLM_HTTP_TIMEOUT_TOTAL,
21
20
  )
22
- from klaude_code.llm.client import LLMClientABC
21
+ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
23
22
  from klaude_code.llm.input_common import apply_config_defaults
24
23
  from klaude_code.llm.registry import register
25
- from klaude_code.llm.responses.client import parse_responses_stream
24
+ from klaude_code.llm.responses.client import ResponsesLLMStream
26
25
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
27
- from klaude_code.llm.usage import MetadataTracker, error_stream_items
26
+ from klaude_code.llm.usage import MetadataTracker, error_llm_stream
28
27
  from klaude_code.log import DebugType, log_debug
29
- from klaude_code.protocol import llm_param, message
28
+ from klaude_code.protocol import llm_param
30
29
 
31
30
 
32
31
  def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
@@ -118,7 +117,7 @@ class CodexClient(LLMClientABC):
118
117
  return cls(config)
119
118
 
120
119
  @override
121
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[message.LLMStreamItem]:
120
+ async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
122
121
  # Ensure token is valid before API call
123
122
  self._ensure_valid_token()
124
123
 
@@ -147,9 +146,6 @@ class CodexClient(LLMClientABC):
147
146
  )
148
147
  except (openai.OpenAIError, httpx.HTTPError) as e:
149
148
  error_message = f"{e.__class__.__name__} {e!s}"
150
- for item in error_stream_items(metadata_tracker, error=error_message):
151
- yield item
152
- return
149
+ return error_llm_stream(metadata_tracker, error=error_message)
153
150
 
154
- async for item in parse_responses_stream(stream, param, metadata_tracker):
155
- yield item
151
+ return ResponsesLLMStream(stream, param=param, metadata_tracker=metadata_tracker)