klaude-code 2.4.1__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. klaude_code/app/runtime.py +2 -6
  2. klaude_code/cli/main.py +0 -1
  3. klaude_code/config/assets/builtin_config.yaml +7 -0
  4. klaude_code/const.py +7 -4
  5. klaude_code/core/agent.py +10 -1
  6. klaude_code/core/agent_profile.py +47 -35
  7. klaude_code/core/executor.py +6 -21
  8. klaude_code/core/manager/sub_agent_manager.py +17 -1
  9. klaude_code/core/prompts/prompt-sub-agent-web.md +4 -4
  10. klaude_code/core/task.py +65 -4
  11. klaude_code/core/tool/__init__.py +0 -5
  12. klaude_code/core/tool/context.py +12 -1
  13. klaude_code/core/tool/offload.py +311 -0
  14. klaude_code/core/tool/shell/bash_tool.md +1 -43
  15. klaude_code/core/tool/sub_agent_tool.py +1 -0
  16. klaude_code/core/tool/todo/todo_write_tool.md +0 -23
  17. klaude_code/core/tool/tool_runner.py +14 -9
  18. klaude_code/core/tool/web/web_fetch_tool.md +1 -1
  19. klaude_code/core/tool/web/web_fetch_tool.py +14 -39
  20. klaude_code/core/turn.py +128 -138
  21. klaude_code/llm/anthropic/client.py +176 -82
  22. klaude_code/llm/bedrock/client.py +8 -12
  23. klaude_code/llm/claude/client.py +11 -15
  24. klaude_code/llm/client.py +31 -4
  25. klaude_code/llm/codex/client.py +7 -11
  26. klaude_code/llm/google/client.py +150 -69
  27. klaude_code/llm/openai_compatible/client.py +10 -15
  28. klaude_code/llm/openai_compatible/stream.py +68 -6
  29. klaude_code/llm/openrouter/client.py +9 -15
  30. klaude_code/llm/partial_message.py +35 -0
  31. klaude_code/llm/responses/client.py +134 -68
  32. klaude_code/llm/usage.py +30 -0
  33. klaude_code/protocol/commands.py +0 -4
  34. klaude_code/protocol/events/metadata.py +1 -0
  35. klaude_code/protocol/events/streaming.py +1 -0
  36. klaude_code/protocol/events/system.py +0 -4
  37. klaude_code/protocol/model.py +2 -15
  38. klaude_code/protocol/sub_agent/explore.py +0 -10
  39. klaude_code/protocol/sub_agent/image_gen.py +0 -7
  40. klaude_code/protocol/sub_agent/task.py +0 -10
  41. klaude_code/protocol/sub_agent/web.py +4 -12
  42. klaude_code/session/templates/export_session.html +4 -4
  43. klaude_code/skill/manager.py +2 -1
  44. klaude_code/tui/components/metadata.py +41 -49
  45. klaude_code/tui/components/rich/markdown.py +1 -3
  46. klaude_code/tui/components/rich/theme.py +2 -2
  47. klaude_code/tui/components/sub_agent.py +9 -1
  48. klaude_code/tui/components/tools.py +0 -31
  49. klaude_code/tui/components/welcome.py +1 -32
  50. klaude_code/tui/input/prompt_toolkit.py +25 -9
  51. klaude_code/tui/machine.py +40 -8
  52. klaude_code/tui/renderer.py +1 -0
  53. {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/METADATA +2 -2
  54. {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/RECORD +56 -56
  55. klaude_code/core/prompts/prompt-nano-banana.md +0 -1
  56. klaude_code/core/tool/truncation.py +0 -203
  57. {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/WHEEL +0 -0
  58. {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/entry_points.txt +0 -0
klaude_code/core/turn.py CHANGED
@@ -4,7 +4,7 @@ from collections.abc import AsyncGenerator
4
4
  from dataclasses import dataclass, field
5
5
  from typing import TYPE_CHECKING
6
6
 
7
- from klaude_code.const import INTERRUPT_MARKER, SUPPORTED_IMAGE_SIZES
7
+ from klaude_code.const import SUPPORTED_IMAGE_SIZES
8
8
  from klaude_code.core.tool import ToolABC
9
9
  from klaude_code.core.tool.context import SubAgentResumeClaims, ToolContext
10
10
 
@@ -20,6 +20,7 @@ from klaude_code.core.tool.tool_runner import (
20
20
  ToolExecutorEvent,
21
21
  )
22
22
  from klaude_code.llm import LLMClientABC
23
+ from klaude_code.llm.client import LLMStreamABC
23
24
  from klaude_code.log import DebugType, log_debug
24
25
  from klaude_code.protocol import events, llm_param, message, model, tools
25
26
 
@@ -104,8 +105,7 @@ class TurnExecutor:
104
105
  self._context = context
105
106
  self._tool_executor: ToolExecutor | None = None
106
107
  self._turn_result: TurnResult | None = None
107
- self._assistant_delta_buffer: list[str] = []
108
- self._assistant_response_id: str | None = None
108
+ self._llm_stream: LLMStreamABC | None = None
109
109
 
110
110
  @property
111
111
  def report_back_result(self) -> str | None:
@@ -147,7 +147,7 @@ class TurnExecutor:
147
147
  def cancel(self) -> list[events.Event]:
148
148
  """Cancel running tools and return any resulting events."""
149
149
  ui_events: list[events.Event] = []
150
- self._persist_partial_assistant_on_cancel()
150
+ self._persist_partial_message_on_cancel()
151
151
  if self._tool_executor is not None:
152
152
  for exec_event in self._tool_executor.cancel():
153
153
  for ui_event in build_events_from_tool_executor_event(self._context.session_ctx.session_id, exec_event):
@@ -237,146 +237,144 @@ class TurnExecutor:
237
237
  if image_config.model_dump(exclude_none=True):
238
238
  call_param.image_config = image_config
239
239
 
240
- async for delta in ctx.llm_client.call(call_param):
241
- log_debug(
242
- f"[{delta.__class__.__name__}]",
243
- delta.model_dump_json(exclude_none=True),
244
- style="green",
245
- debug_type=DebugType.RESPONSE,
246
- )
247
- match delta:
248
- case message.ThinkingTextDelta() as delta:
249
- if not thinking_active:
250
- thinking_active = True
251
- yield events.ThinkingStartEvent(
252
- response_id=delta.response_id,
253
- session_id=session_ctx.session_id,
254
- )
255
- yield events.ThinkingDeltaEvent(
256
- content=delta.content,
257
- response_id=delta.response_id,
258
- session_id=session_ctx.session_id,
259
- )
260
- case message.AssistantTextDelta() as delta:
261
- if thinking_active:
262
- thinking_active = False
263
- yield events.ThinkingEndEvent(
240
+ self._llm_stream = await ctx.llm_client.call(call_param)
241
+ try:
242
+ async for delta in self._llm_stream:
243
+ log_debug(
244
+ f"[{delta.__class__.__name__}]",
245
+ delta.model_dump_json(exclude_none=True),
246
+ style="green",
247
+ debug_type=DebugType.RESPONSE,
248
+ )
249
+ match delta:
250
+ case message.ThinkingTextDelta() as delta:
251
+ if not thinking_active:
252
+ thinking_active = True
253
+ yield events.ThinkingStartEvent(
254
+ response_id=delta.response_id,
255
+ session_id=session_ctx.session_id,
256
+ )
257
+ yield events.ThinkingDeltaEvent(
258
+ content=delta.content,
264
259
  response_id=delta.response_id,
265
260
  session_id=session_ctx.session_id,
266
261
  )
267
- if not assistant_text_active:
268
- assistant_text_active = True
269
- yield events.AssistantTextStartEvent(
262
+ case message.AssistantTextDelta() as delta:
263
+ if thinking_active:
264
+ thinking_active = False
265
+ yield events.ThinkingEndEvent(
266
+ response_id=delta.response_id,
267
+ session_id=session_ctx.session_id,
268
+ )
269
+ if not assistant_text_active:
270
+ assistant_text_active = True
271
+ yield events.AssistantTextStartEvent(
272
+ response_id=delta.response_id,
273
+ session_id=session_ctx.session_id,
274
+ )
275
+ yield events.AssistantTextDeltaEvent(
276
+ content=delta.content,
270
277
  response_id=delta.response_id,
271
278
  session_id=session_ctx.session_id,
272
279
  )
273
- if delta.response_id:
274
- self._assistant_response_id = delta.response_id
275
- self._assistant_delta_buffer.append(delta.content)
276
- yield events.AssistantTextDeltaEvent(
277
- content=delta.content,
278
- response_id=delta.response_id,
279
- session_id=session_ctx.session_id,
280
- )
281
- case message.AssistantImageDelta() as delta:
282
- if thinking_active:
283
- thinking_active = False
284
- yield events.ThinkingEndEvent(
280
+ case message.AssistantImageDelta() as delta:
281
+ if thinking_active:
282
+ thinking_active = False
283
+ yield events.ThinkingEndEvent(
284
+ response_id=delta.response_id,
285
+ session_id=session_ctx.session_id,
286
+ )
287
+ yield events.AssistantImageDeltaEvent(
288
+ file_path=delta.file_path,
285
289
  response_id=delta.response_id,
286
290
  session_id=session_ctx.session_id,
287
291
  )
288
- yield events.AssistantImageDeltaEvent(
289
- file_path=delta.file_path,
290
- response_id=delta.response_id,
291
- session_id=session_ctx.session_id,
292
- )
293
- case message.AssistantMessage() as msg:
294
- if msg.response_id is None and self._assistant_response_id:
295
- msg.response_id = self._assistant_response_id
296
- if thinking_active:
297
- thinking_active = False
298
- yield events.ThinkingEndEvent(
299
- response_id=msg.response_id,
300
- session_id=session_ctx.session_id,
301
- )
302
- if assistant_text_active:
303
- assistant_text_active = False
304
- yield events.AssistantTextEndEvent(
305
- response_id=msg.response_id,
306
- session_id=session_ctx.session_id,
307
- )
308
- turn_result.assistant_message = msg
309
- for part in msg.parts:
310
- if isinstance(part, message.ToolCallPart):
311
- turn_result.tool_calls.append(
312
- ToolCallRequest(
313
- response_id=msg.response_id,
314
- call_id=part.call_id,
315
- tool_name=part.tool_name,
316
- arguments_json=part.arguments_json,
292
+ case message.AssistantMessage() as msg:
293
+ if thinking_active:
294
+ thinking_active = False
295
+ yield events.ThinkingEndEvent(
296
+ response_id=msg.response_id,
297
+ session_id=session_ctx.session_id,
298
+ )
299
+ if assistant_text_active:
300
+ assistant_text_active = False
301
+ yield events.AssistantTextEndEvent(
302
+ response_id=msg.response_id,
303
+ session_id=session_ctx.session_id,
304
+ )
305
+ turn_result.assistant_message = msg
306
+ for part in msg.parts:
307
+ if isinstance(part, message.ToolCallPart):
308
+ turn_result.tool_calls.append(
309
+ ToolCallRequest(
310
+ response_id=msg.response_id,
311
+ call_id=part.call_id,
312
+ tool_name=part.tool_name,
313
+ arguments_json=part.arguments_json,
314
+ )
317
315
  )
316
+ if msg.stop_reason != "aborted":
317
+ thinking_text = "".join(
318
+ part.text for part in msg.parts if isinstance(part, message.ThinkingTextPart)
318
319
  )
319
- if msg.stop_reason != "aborted":
320
- thinking_text = "".join(
321
- part.text for part in msg.parts if isinstance(part, message.ThinkingTextPart)
322
- )
323
- yield events.ResponseCompleteEvent(
324
- content=message.join_text_parts(msg.parts),
325
- response_id=msg.response_id,
326
- session_id=session_ctx.session_id,
327
- thinking_text=thinking_text or None,
328
- )
329
- if msg.stop_reason == "aborted":
330
- yield events.InterruptEvent(session_id=session_ctx.session_id)
331
- if msg.usage:
332
- metadata = msg.usage
333
- if metadata.response_id is None:
334
- metadata.response_id = msg.response_id
335
- if not metadata.model_name:
336
- metadata.model_name = ctx.llm_client.model_name
337
- if metadata.provider is None:
338
- metadata.provider = ctx.llm_client.get_llm_config().provider_name or None
339
- yield events.UsageEvent(
340
- session_id=session_ctx.session_id,
341
- usage=metadata,
320
+ yield events.ResponseCompleteEvent(
321
+ content=message.join_text_parts(msg.parts),
322
+ response_id=msg.response_id,
323
+ session_id=session_ctx.session_id,
324
+ thinking_text=thinking_text or None,
325
+ )
326
+ if msg.stop_reason == "aborted":
327
+ yield events.InterruptEvent(session_id=session_ctx.session_id)
328
+ if msg.usage:
329
+ metadata = msg.usage
330
+ if metadata.response_id is None:
331
+ metadata.response_id = msg.response_id
332
+ if not metadata.model_name:
333
+ metadata.model_name = ctx.llm_client.model_name
334
+ if metadata.provider is None:
335
+ metadata.provider = ctx.llm_client.get_llm_config().provider_name or None
336
+ yield events.UsageEvent(
337
+ session_id=session_ctx.session_id,
338
+ usage=metadata,
339
+ )
340
+ case message.StreamErrorItem() as msg:
341
+ turn_result.stream_error = msg
342
+ log_debug(
343
+ "[StreamError]",
344
+ msg.error,
345
+ style="red",
346
+ debug_type=DebugType.RESPONSE,
342
347
  )
343
- case message.StreamErrorItem() as msg:
344
- turn_result.stream_error = msg
345
- log_debug(
346
- "[StreamError]",
347
- msg.error,
348
- style="red",
349
- debug_type=DebugType.RESPONSE,
350
- )
351
- case message.ToolCallStartDelta() as msg:
352
- if thinking_active:
353
- thinking_active = False
354
- yield events.ThinkingEndEvent(
355
- response_id=msg.response_id,
348
+ case message.ToolCallStartDelta() as msg:
349
+ if thinking_active:
350
+ thinking_active = False
351
+ yield events.ThinkingEndEvent(
352
+ response_id=msg.response_id,
353
+ session_id=session_ctx.session_id,
354
+ )
355
+ if assistant_text_active:
356
+ assistant_text_active = False
357
+ yield events.AssistantTextEndEvent(
358
+ response_id=msg.response_id,
359
+ session_id=session_ctx.session_id,
360
+ )
361
+ yield events.ToolCallStartEvent(
356
362
  session_id=session_ctx.session_id,
357
- )
358
- if assistant_text_active:
359
- assistant_text_active = False
360
- yield events.AssistantTextEndEvent(
361
363
  response_id=msg.response_id,
362
- session_id=session_ctx.session_id,
364
+ tool_call_id=msg.call_id,
365
+ tool_name=msg.name,
366
+ model_id=ctx.llm_client.model_name,
363
367
  )
364
- yield events.ToolCallStartEvent(
365
- session_id=session_ctx.session_id,
366
- response_id=msg.response_id,
367
- tool_call_id=msg.call_id,
368
- tool_name=msg.name,
369
- )
370
- case _:
371
- continue
368
+ case _:
369
+ continue
370
+ finally:
371
+ self._llm_stream = None
372
372
 
373
373
  def _append_success_history(self, turn_result: TurnResult) -> None:
374
374
  """Persist successful turn artifacts to conversation history."""
375
375
  session_ctx = self._context.session_ctx
376
376
  if turn_result.assistant_message:
377
377
  session_ctx.append_history([turn_result.assistant_message])
378
- self._assistant_delta_buffer.clear()
379
- self._assistant_response_id = None
380
378
 
381
379
  async def _run_tool_executor(self, tool_calls: list[ToolCallRequest]) -> AsyncGenerator[events.Event]:
382
380
  """Run tools for the turn and translate executor events to UI events."""
@@ -404,23 +402,15 @@ class TurnExecutor:
404
402
  finally:
405
403
  self._tool_executor = None
406
404
 
407
- def _persist_partial_assistant_on_cancel(self) -> None:
408
- """Persist streamed assistant text when a turn is interrupted.
405
+ def _persist_partial_message_on_cancel(self) -> None:
406
+ """Persist accumulated message when a turn is interrupted.
409
407
 
410
- Reasoning and tool calls are intentionally discarded on interrupt; only
411
- the assistant message text collected so far is saved so it appears in
412
- subsequent history/context.
408
+ Retrieves the partial message from the LLM stream, including both
409
+ thinking and assistant text accumulated so far.
413
410
  """
414
-
415
- if not self._assistant_delta_buffer:
411
+ if self._llm_stream is None:
416
412
  return
417
- partial_text = "".join(self._assistant_delta_buffer) + INTERRUPT_MARKER
418
- if not partial_text:
413
+ partial_message = self._llm_stream.get_partial_message()
414
+ if partial_message is None:
419
415
  return
420
- partial_message = message.AssistantMessage(
421
- parts=message.text_parts_from_str(partial_text),
422
- response_id=self._assistant_response_id,
423
- stop_reason="aborted",
424
- )
425
416
  self._context.session_ctx.append_history([partial_message])
426
- self._assistant_delta_buffer.clear()