klaude-code 1.2.12__py3-none-any.whl → 1.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. klaude_code/auth/codex/oauth.py +3 -3
  2. klaude_code/cli/auth_cmd.py +73 -0
  3. klaude_code/cli/config_cmd.py +88 -0
  4. klaude_code/cli/debug.py +72 -0
  5. klaude_code/cli/main.py +31 -142
  6. klaude_code/cli/runtime.py +19 -58
  7. klaude_code/cli/session_cmd.py +9 -9
  8. klaude_code/command/__init__.py +6 -6
  9. klaude_code/command/export_cmd.py +3 -3
  10. klaude_code/command/model_cmd.py +1 -1
  11. klaude_code/command/registry.py +1 -1
  12. klaude_code/command/terminal_setup_cmd.py +2 -2
  13. klaude_code/command/thinking_cmd.py +8 -6
  14. klaude_code/config/__init__.py +1 -5
  15. klaude_code/config/config.py +31 -4
  16. klaude_code/config/list_model.py +1 -1
  17. klaude_code/const/__init__.py +8 -3
  18. klaude_code/core/agent.py +14 -62
  19. klaude_code/core/executor.py +11 -10
  20. klaude_code/core/manager/agent_manager.py +4 -4
  21. klaude_code/core/manager/llm_clients.py +10 -49
  22. klaude_code/core/manager/llm_clients_builder.py +8 -21
  23. klaude_code/core/manager/sub_agent_manager.py +3 -3
  24. klaude_code/core/prompt.py +12 -7
  25. klaude_code/core/reminders.py +1 -1
  26. klaude_code/core/task.py +2 -2
  27. klaude_code/core/tool/__init__.py +16 -25
  28. klaude_code/core/tool/file/_utils.py +1 -1
  29. klaude_code/core/tool/file/apply_patch.py +17 -25
  30. klaude_code/core/tool/file/apply_patch_tool.py +4 -7
  31. klaude_code/core/tool/file/edit_tool.py +4 -11
  32. klaude_code/core/tool/file/multi_edit_tool.py +2 -3
  33. klaude_code/core/tool/file/read_tool.py +3 -4
  34. klaude_code/core/tool/file/write_tool.py +2 -3
  35. klaude_code/core/tool/memory/memory_tool.py +2 -8
  36. klaude_code/core/tool/memory/skill_loader.py +3 -2
  37. klaude_code/core/tool/shell/command_safety.py +0 -1
  38. klaude_code/core/tool/tool_context.py +1 -3
  39. klaude_code/core/tool/tool_registry.py +2 -1
  40. klaude_code/core/tool/tool_runner.py +1 -1
  41. klaude_code/core/tool/truncation.py +2 -5
  42. klaude_code/core/turn.py +9 -3
  43. klaude_code/llm/anthropic/client.py +6 -2
  44. klaude_code/llm/client.py +5 -1
  45. klaude_code/llm/codex/client.py +2 -2
  46. klaude_code/llm/input_common.py +2 -2
  47. klaude_code/llm/openai_compatible/client.py +11 -8
  48. klaude_code/llm/openai_compatible/stream_processor.py +2 -1
  49. klaude_code/llm/openrouter/client.py +22 -9
  50. klaude_code/llm/openrouter/reasoning_handler.py +19 -132
  51. klaude_code/llm/registry.py +6 -5
  52. klaude_code/llm/responses/client.py +10 -5
  53. klaude_code/protocol/events.py +9 -2
  54. klaude_code/protocol/model.py +7 -1
  55. klaude_code/protocol/sub_agent.py +2 -2
  56. klaude_code/session/export.py +58 -0
  57. klaude_code/session/selector.py +2 -2
  58. klaude_code/session/session.py +37 -7
  59. klaude_code/session/templates/export_session.html +46 -0
  60. klaude_code/trace/__init__.py +2 -2
  61. klaude_code/trace/log.py +144 -5
  62. klaude_code/ui/__init__.py +4 -9
  63. klaude_code/ui/core/stage_manager.py +7 -4
  64. klaude_code/ui/modes/debug/display.py +2 -1
  65. klaude_code/ui/modes/repl/__init__.py +1 -1
  66. klaude_code/ui/modes/repl/completers.py +6 -7
  67. klaude_code/ui/modes/repl/display.py +3 -4
  68. klaude_code/ui/modes/repl/event_handler.py +63 -5
  69. klaude_code/ui/modes/repl/key_bindings.py +2 -3
  70. klaude_code/ui/modes/repl/renderer.py +52 -62
  71. klaude_code/ui/renderers/diffs.py +1 -4
  72. klaude_code/ui/renderers/tools.py +4 -0
  73. klaude_code/ui/rich/markdown.py +3 -3
  74. klaude_code/ui/rich/searchable_text.py +6 -6
  75. klaude_code/ui/rich/status.py +3 -4
  76. klaude_code/ui/rich/theme.py +2 -5
  77. klaude_code/ui/terminal/control.py +7 -16
  78. klaude_code/ui/terminal/notifier.py +2 -4
  79. klaude_code/ui/utils/common.py +1 -1
  80. klaude_code/ui/utils/debouncer.py +2 -2
  81. {klaude_code-1.2.12.dist-info → klaude_code-1.2.14.dist-info}/METADATA +1 -1
  82. {klaude_code-1.2.12.dist-info → klaude_code-1.2.14.dist-info}/RECORD +84 -81
  83. {klaude_code-1.2.12.dist-info → klaude_code-1.2.14.dist-info}/WHEEL +0 -0
  84. {klaude_code-1.2.12.dist-info → klaude_code-1.2.14.dist-info}/entry_points.txt +0 -0
@@ -72,7 +72,7 @@ class OpenAICompatibleClient(LLMClientABC):
72
72
  return cls(config)
73
73
 
74
74
  @override
75
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
75
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
76
76
  param = apply_config_defaults(param, self.get_llm_config())
77
77
 
78
78
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -115,10 +115,10 @@ class OpenAICompatibleClient(LLMClientABC):
115
115
  continue
116
116
 
117
117
  # Support Moonshot Kimi K2's usage field in choice
118
- if hasattr(event.choices[0], "usage") and getattr(event.choices[0], "usage"):
118
+ if usage := getattr(event.choices[0], "usage", None):
119
119
  metadata_tracker.set_usage(
120
120
  convert_usage(
121
- openai.types.CompletionUsage.model_validate(getattr(event.choices[0], "usage")),
121
+ openai.types.CompletionUsage.model_validate(usage),
122
122
  param.context_limit,
123
123
  param.max_tokens,
124
124
  )
@@ -127,15 +127,18 @@ class OpenAICompatibleClient(LLMClientABC):
127
127
  delta = event.choices[0].delta
128
128
 
129
129
  # Reasoning
130
- reasoning_content = (
131
- getattr(delta, "reasoning_content", None)
130
+ if (
131
+ reasoning_content := getattr(delta, "reasoning_content", None)
132
132
  or getattr(delta, "reasoning", None)
133
133
  or ""
134
- )
135
- if reasoning_content:
134
+ ):
136
135
  metadata_tracker.record_token()
137
136
  state.stage = "reasoning"
138
137
  state.accumulated_reasoning.append(reasoning_content)
138
+ yield model.ReasoningTextDelta(
139
+ content=reasoning_content,
140
+ response_id=state.response_id,
141
+ )
139
142
 
140
143
  # Assistant
141
144
  if delta.content and (
@@ -176,7 +179,7 @@ class OpenAICompatibleClient(LLMClientABC):
176
179
  )
177
180
  state.accumulated_tool_calls.add(delta.tool_calls)
178
181
  except (openai.OpenAIError, httpx.HTTPError) as e:
179
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
182
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
180
183
 
181
184
  # Finalize
182
185
  for item in state.flush_all():
@@ -5,7 +5,8 @@ logic for accumulating and flushing reasoning, assistant content, and tool calls
5
5
  across different LLM providers (OpenAI-compatible, OpenRouter).
6
6
  """
7
7
 
8
- from typing import Callable, Literal
8
+ from collections.abc import Callable
9
+ from typing import Literal
9
10
 
10
11
  from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
11
12
  from klaude_code.protocol import model
@@ -15,7 +15,7 @@ from klaude_code.llm.openrouter.reasoning_handler import ReasoningDetail, Reason
15
15
  from klaude_code.llm.registry import register
16
16
  from klaude_code.llm.usage import MetadataTracker, convert_usage
17
17
  from klaude_code.protocol import llm_param, model
18
- from klaude_code.trace import DebugType, log, log_debug
18
+ from klaude_code.trace import DebugType, is_debug_enabled, log, log_debug
19
19
 
20
20
 
21
21
  def build_payload(
@@ -26,8 +26,12 @@ def build_payload(
26
26
  tools = convert_tool_schema(param.tools)
27
27
 
28
28
  extra_body: dict[str, object] = {
29
- "usage": {"include": True} # To get the cache tokens at the end of the response
29
+ "usage": {"include": True}, # To get the cache tokens at the end of the response
30
30
  }
31
+ if is_debug_enabled():
32
+ extra_body["debug"] = {
33
+ "echo_upstream_body": True
34
+ } # https://openrouter.ai/docs/api/reference/errors-and-debugging#debug-option-shape
31
35
  extra_headers: dict[str, str] = {}
32
36
 
33
37
  if param.thinking:
@@ -45,9 +49,7 @@ def build_payload(
45
49
  extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
46
50
 
47
51
  if is_claude_model(param.model):
48
- extra_headers["anthropic-beta"] = (
49
- "interleaved-thinking-2025-05-14" # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
50
- )
52
+ extra_headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14,interleaved-thinking-2025-05-14"
51
53
 
52
54
  payload: CompletionCreateParamsStreaming = {
53
55
  "model": str(param.model),
@@ -81,7 +83,7 @@ class OpenRouterClient(LLMClientABC):
81
83
  return cls(config)
82
84
 
83
85
  @override
84
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
86
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
85
87
  param = apply_config_defaults(param, self.get_llm_config())
86
88
 
87
89
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -133,13 +135,24 @@ class OpenRouterClient(LLMClientABC):
133
135
  delta = event.choices[0].delta
134
136
 
135
137
  # Reasoning
136
- if hasattr(delta, "reasoning_details") and getattr(delta, "reasoning_details"):
137
- reasoning_details = getattr(delta, "reasoning_details")
138
+ if reasoning_details := getattr(delta, "reasoning_details", None):
138
139
  for item in reasoning_details:
139
140
  try:
140
141
  reasoning_detail = ReasoningDetail.model_validate(item)
141
142
  metadata_tracker.record_token()
142
143
  state.stage = "reasoning"
144
+ # Yield delta immediately for streaming
145
+ if reasoning_detail.text:
146
+ yield model.ReasoningTextDelta(
147
+ content=reasoning_detail.text,
148
+ response_id=state.response_id,
149
+ )
150
+ if reasoning_detail.summary:
151
+ yield model.ReasoningTextDelta(
152
+ content=reasoning_detail.summary,
153
+ response_id=state.response_id,
154
+ )
155
+ # Keep existing handler logic for final items
143
156
  for conversation_item in reasoning_handler.on_detail(reasoning_detail):
144
157
  yield conversation_item
145
158
  except Exception as e:
@@ -182,7 +195,7 @@ class OpenRouterClient(LLMClientABC):
182
195
  state.accumulated_tool_calls.add(delta.tool_calls)
183
196
 
184
197
  except (openai.OpenAIError, httpx.HTTPError) as e:
185
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
198
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
186
199
 
187
200
  # Finalize
188
201
  for item in state.flush_all():
@@ -1,5 +1,3 @@
1
- from enum import Enum
2
-
3
1
  from pydantic import BaseModel
4
2
 
5
3
  from klaude_code.protocol import model
@@ -18,14 +16,8 @@ class ReasoningDetail(BaseModel):
18
16
  signature: str | None = None # Claude's signature
19
17
 
20
18
 
21
- class ReasoningMode(str, Enum):
22
- COMPLETE_CHUNK = "complete_chunk"
23
- GPT5_SECTIONS = "gpt5_sections"
24
- ACCUMULATE = "accumulate"
25
-
26
-
27
19
  class ReasoningStreamHandler:
28
- """Encapsulates reasoning stream handling across different model behaviors."""
20
+ """Accumulates reasoning text and flushes on encrypted content or finalize."""
29
21
 
30
22
  def __init__(
31
23
  self,
@@ -37,59 +29,48 @@ class ReasoningStreamHandler:
37
29
 
38
30
  self._reasoning_id: str | None = None
39
31
  self._accumulated_reasoning: list[str] = []
40
- self._gpt5_line_buffer: str = ""
41
- self._gpt5_section_lines: list[str] = []
42
32
 
43
33
  def set_response_id(self, response_id: str | None) -> None:
44
34
  """Update the response identifier used for emitted items."""
45
-
46
35
  self._response_id = response_id
47
36
 
48
37
  def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
49
38
  """Process a single reasoning detail and return streamable items."""
50
-
51
39
  items: list[model.ConversationItem] = []
52
40
 
53
41
  if detail.type == "reasoning.encrypted":
54
42
  self._reasoning_id = detail.id
43
+ # Flush accumulated text before encrypted content
44
+ items.extend(self._flush_text())
55
45
  if encrypted_item := self._build_encrypted_item(detail.data, detail):
56
46
  items.append(encrypted_item)
57
47
  return items
58
48
 
59
49
  if detail.type in ("reasoning.text", "reasoning.summary"):
60
50
  self._reasoning_id = detail.id
61
- if encrypted_item := self._build_encrypted_item(detail.signature, detail):
62
- items.append(encrypted_item)
51
+ # Accumulate text
63
52
  text = detail.text if detail.type == "reasoning.text" else detail.summary
64
53
  if text:
65
- items.extend(self._handle_text(text))
54
+ self._accumulated_reasoning.append(text)
55
+ # Flush on signature (encrypted content)
56
+ if detail.signature:
57
+ items.extend(self._flush_text())
58
+ if encrypted_item := self._build_encrypted_item(detail.signature, detail):
59
+ items.append(encrypted_item)
66
60
 
67
61
  return items
68
62
 
69
63
  def flush(self) -> list[model.ConversationItem]:
70
- """Flush buffered reasoning text and encrypted payloads."""
64
+ """Flush buffered reasoning text on finalize."""
65
+ return self._flush_text()
71
66
 
72
- items: list[model.ConversationItem] = []
73
- mode = self._resolve_mode()
74
-
75
- if mode is ReasoningMode.GPT5_SECTIONS:
76
- for section in self._drain_gpt5_sections():
77
- items.append(self._build_text_item(section))
78
- elif self._accumulated_reasoning and mode is ReasoningMode.ACCUMULATE:
79
- items.append(self._build_text_item("".join(self._accumulated_reasoning)))
80
- self._accumulated_reasoning = []
81
-
82
- return items
83
-
84
- def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
85
- mode = self._resolve_mode()
86
- if mode is ReasoningMode.COMPLETE_CHUNK:
87
- return [self._build_text_item(text)]
88
- if mode is ReasoningMode.GPT5_SECTIONS:
89
- sections = self._process_gpt5_text(text)
90
- return [self._build_text_item(section) for section in sections]
91
- self._accumulated_reasoning.append(text)
92
- return []
67
+ def _flush_text(self) -> list[model.ConversationItem]:
68
+ """Flush accumulated reasoning text as a single item."""
69
+ if not self._accumulated_reasoning:
70
+ return []
71
+ item = self._build_text_item("".join(self._accumulated_reasoning))
72
+ self._accumulated_reasoning = []
73
+ return [item]
93
74
 
94
75
  def _build_text_item(self, content: str) -> model.ReasoningTextItem:
95
76
  return model.ReasoningTextItem(
@@ -113,97 +94,3 @@ class ReasoningStreamHandler:
113
94
  response_id=self._response_id,
114
95
  model=self._param_model,
115
96
  )
116
-
117
- def _process_gpt5_text(self, text: str) -> list[str]:
118
- emitted_sections: list[str] = []
119
- self._gpt5_line_buffer += text
120
- while True:
121
- newline_index = self._gpt5_line_buffer.find("\n")
122
- if newline_index == -1:
123
- break
124
- line = self._gpt5_line_buffer[:newline_index]
125
- self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
126
- remainder = line
127
- while True:
128
- split_result = self._split_gpt5_title_line(remainder)
129
- if split_result is None:
130
- break
131
- prefix_segment, title_segment, remainder = split_result
132
- if prefix_segment:
133
- if not self._gpt5_section_lines:
134
- self._gpt5_section_lines = []
135
- self._gpt5_section_lines.append(f"{prefix_segment}\n")
136
- if self._gpt5_section_lines:
137
- emitted_sections.append("".join(self._gpt5_section_lines))
138
- self._gpt5_section_lines = [f"{title_segment} \n"] # Add two spaces for markdown line break
139
- if remainder:
140
- if not self._gpt5_section_lines:
141
- self._gpt5_section_lines = []
142
- self._gpt5_section_lines.append(f"{remainder}\n")
143
- return emitted_sections
144
-
145
- def _drain_gpt5_sections(self) -> list[str]:
146
- sections: list[str] = []
147
- if self._gpt5_line_buffer:
148
- if not self._gpt5_section_lines:
149
- self._gpt5_section_lines = [self._gpt5_line_buffer]
150
- else:
151
- self._gpt5_section_lines.append(self._gpt5_line_buffer)
152
- self._gpt5_line_buffer = ""
153
- if self._gpt5_section_lines:
154
- sections.append("".join(self._gpt5_section_lines))
155
- self._gpt5_section_lines = []
156
- return sections
157
-
158
- def _is_gpt5(self) -> bool:
159
- return "gpt-5" in self._param_model.lower()
160
-
161
- def _is_complete_chunk_reasoning_model(self) -> bool:
162
- """Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
163
-
164
- return self._param_model.startswith("google/gemini")
165
-
166
- def _resolve_mode(self) -> ReasoningMode:
167
- if self._is_complete_chunk_reasoning_model():
168
- return ReasoningMode.COMPLETE_CHUNK
169
- if self._is_gpt5():
170
- return ReasoningMode.GPT5_SECTIONS
171
- return ReasoningMode.ACCUMULATE
172
-
173
- def _is_gpt5_title_line(self, line: str) -> bool:
174
- stripped = line.strip()
175
- if not stripped:
176
- return False
177
- return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
178
-
179
- def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
180
- if not line:
181
- return None
182
- search_start = 0
183
- while True:
184
- opening_index = line.find("**", search_start)
185
- if opening_index == -1:
186
- return None
187
- closing_index = line.find("**", opening_index + 2)
188
- if closing_index == -1:
189
- return None
190
- title_candidate = line[opening_index : closing_index + 2]
191
- stripped_title = title_candidate.strip()
192
- if self._is_gpt5_title_line(stripped_title):
193
- # Treat as a GPT-5 title only when everything after the
194
- # bold segment is either whitespace or starts a new bold
195
- # title. This prevents inline bold like `**xxx**yyyy`
196
- # from being misclassified as a section title while
197
- # preserving support for consecutive titles in one line.
198
- after = line[closing_index + 2 :]
199
- if after.strip() and not after.lstrip().startswith("**"):
200
- search_start = closing_index + 2
201
- continue
202
- prefix_segment = line[:opening_index]
203
- remainder_segment = after
204
- return (
205
- prefix_segment if prefix_segment else None,
206
- stripped_title,
207
- remainder_segment,
208
- )
209
- search_start = closing_index + 2
@@ -1,4 +1,5 @@
1
- from typing import TYPE_CHECKING, Callable, TypeVar
1
+ from collections.abc import Callable
2
+ from typing import TYPE_CHECKING, TypeVar
2
3
 
3
4
  from klaude_code.protocol import llm_param
4
5
 
@@ -20,13 +21,13 @@ def _load_protocol(protocol: llm_param.LLMClientProtocol) -> None:
20
21
 
21
22
  # Import only the needed module to trigger @register decorator
22
23
  if protocol == llm_param.LLMClientProtocol.ANTHROPIC:
23
- from . import anthropic as _ # noqa: F401
24
+ from . import anthropic as _
24
25
  elif protocol == llm_param.LLMClientProtocol.CODEX:
25
- from . import codex as _ # noqa: F401
26
+ from . import codex as _
26
27
  elif protocol == llm_param.LLMClientProtocol.OPENAI:
27
- from . import openai_compatible as _ # noqa: F401
28
+ from . import openai_compatible as _
28
29
  elif protocol == llm_param.LLMClientProtocol.OPENROUTER:
29
- from . import openrouter as _ # noqa: F401
30
+ from . import openrouter as _
30
31
  elif protocol == llm_param.LLMClientProtocol.RESPONSES:
31
32
  from . import responses as _ # noqa: F401
32
33
 
@@ -16,7 +16,6 @@ from klaude_code.llm.usage import MetadataTracker
16
16
  from klaude_code.protocol import llm_param, model
17
17
  from klaude_code.trace import DebugType, log_debug
18
18
 
19
-
20
19
  if TYPE_CHECKING:
21
20
  from openai import AsyncStream
22
21
  from openai.types.responses import ResponseStreamEvent
@@ -60,7 +59,7 @@ async def parse_responses_stream(
60
59
  stream: "AsyncStream[ResponseStreamEvent]",
61
60
  param: llm_param.LLMCallParameter,
62
61
  metadata_tracker: MetadataTracker,
63
- ) -> AsyncGenerator[model.ConversationItem, None]:
62
+ ) -> AsyncGenerator[model.ConversationItem]:
64
63
  """Parse OpenAI Responses API stream events into ConversationItems."""
65
64
  response_id: str | None = None
66
65
 
@@ -76,6 +75,12 @@ async def parse_responses_stream(
76
75
  case responses.ResponseCreatedEvent() as event:
77
76
  response_id = event.response.id
78
77
  yield model.StartItem(response_id=response_id)
78
+ case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
79
+ if event.delta:
80
+ yield model.ReasoningTextDelta(
81
+ content=event.delta,
82
+ response_id=response_id,
83
+ )
79
84
  case responses.ResponseReasoningSummaryTextDoneEvent() as event:
80
85
  if event.text:
81
86
  yield model.ReasoningTextItem(
@@ -164,7 +169,7 @@ async def parse_responses_stream(
164
169
  debug_type=DebugType.LLM_STREAM,
165
170
  )
166
171
  except (openai.OpenAIError, httpx.HTTPError) as e:
167
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
172
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
168
173
 
169
174
 
170
175
  @register(llm_param.LLMClientProtocol.RESPONSES)
@@ -194,7 +199,7 @@ class ResponsesClient(LLMClientABC):
194
199
  return cls(config)
195
200
 
196
201
  @override
197
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
202
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
198
203
  param = apply_config_defaults(param, self.get_llm_config())
199
204
 
200
205
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
@@ -212,7 +217,7 @@ class ResponsesClient(LLMClientABC):
212
217
  extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
213
218
  )
214
219
  except (openai.OpenAIError, httpx.HTTPError) as e:
215
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
220
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
216
221
  return
217
222
 
218
223
  async for item in parse_responses_stream(stream, param, metadata_tracker):
@@ -54,6 +54,12 @@ class ThinkingEvent(BaseModel):
54
54
  content: str
55
55
 
56
56
 
57
+ class ThinkingDeltaEvent(BaseModel):
58
+ session_id: str
59
+ response_id: str | None = None
60
+ content: str
61
+
62
+
57
63
  class AssistantMessageDeltaEvent(BaseModel):
58
64
  session_id: str
59
65
  response_id: str | None = None
@@ -79,7 +85,6 @@ class ToolCallEvent(BaseModel):
79
85
  tool_call_id: str
80
86
  tool_name: str
81
87
  arguments: str
82
- is_replay: bool = False
83
88
 
84
89
 
85
90
  class ToolResultEvent(BaseModel):
@@ -90,7 +95,6 @@ class ToolResultEvent(BaseModel):
90
95
  result: str
91
96
  ui_extra: model.ToolResultUIExtra | None = None
92
97
  status: Literal["success", "error"]
93
- is_replay: bool = False
94
98
  task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
95
99
 
96
100
 
@@ -130,6 +134,8 @@ class TodoChangeEvent(BaseModel):
130
134
 
131
135
  HistoryItemEvent = (
132
136
  ThinkingEvent
137
+ | TaskStartEvent
138
+ | TaskFinishEvent
133
139
  | TurnStartEvent # This event is used for UI to print new empty line
134
140
  | AssistantMessageEvent
135
141
  | ToolCallEvent
@@ -153,6 +159,7 @@ Event = (
153
159
  TaskStartEvent
154
160
  | TaskFinishEvent
155
161
  | ThinkingEvent
162
+ | ThinkingDeltaEvent
156
163
  | AssistantMessageDeltaEvent
157
164
  | AssistantMessageEvent
158
165
  | ToolCallEvent
@@ -296,6 +296,12 @@ class AssistantMessageDelta(BaseModel):
296
296
  created_at: datetime = Field(default_factory=datetime.now)
297
297
 
298
298
 
299
+ class ReasoningTextDelta(BaseModel):
300
+ response_id: str | None = None
301
+ content: str
302
+ created_at: datetime = Field(default_factory=datetime.now)
303
+
304
+
299
305
  class StreamErrorItem(BaseModel):
300
306
  error: str
301
307
  created_at: datetime = Field(default_factory=datetime.now)
@@ -392,7 +398,7 @@ MessageItem = (
392
398
  )
393
399
 
394
400
 
395
- StreamItem = AssistantMessageDelta
401
+ StreamItem = AssistantMessageDelta | ReasoningTextDelta
396
402
 
397
403
  ConversationItem = (
398
404
  StartItem
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from collections.abc import Callable
3
4
  from dataclasses import dataclass, field
4
- from typing import TYPE_CHECKING, Any, Callable
5
+ from typing import TYPE_CHECKING, Any
5
6
 
6
7
  from klaude_code.protocol import tools
7
8
 
@@ -290,7 +291,6 @@ register_sub_agent(
290
291
  tool_set=(tools.BASH, tools.READ),
291
292
  prompt_builder=_explore_prompt_builder,
292
293
  active_form="Exploring",
293
- target_model_filter=lambda model: ("haiku" not in model) and ("kimi" not in model) and ("grok" not in model),
294
294
  )
295
295
  )
296
296
 
@@ -544,7 +544,13 @@ def _format_tool_call(tool_call: model.ToolCallItem, result: model.ToolResultIte
544
544
  def _build_messages_html(
545
545
  history: list[model.ConversationItem],
546
546
  tool_results: dict[str, model.ToolResultItem],
547
+ *,
548
+ seen_session_ids: set[str] | None = None,
549
+ nesting_level: int = 0,
547
550
  ) -> str:
551
+ if seen_session_ids is None:
552
+ seen_session_ids = set()
553
+
548
554
  blocks: list[str] = []
549
555
  assistant_counter = 0
550
556
 
@@ -596,9 +602,61 @@ def _build_messages_html(
596
602
  result = tool_results.get(item.call_id)
597
603
  blocks.append(_format_tool_call(item, result))
598
604
 
605
+ # Recursively render sub-agent session history
606
+ if result is not None:
607
+ sub_agent_html = _render_sub_agent_session(result, seen_session_ids, nesting_level)
608
+ if sub_agent_html:
609
+ blocks.append(sub_agent_html)
610
+
599
611
  return "\n".join(blocks)
600
612
 
601
613
 
614
+ def _render_sub_agent_session(
615
+ tool_result: model.ToolResultItem,
616
+ seen_session_ids: set[str],
617
+ nesting_level: int,
618
+ ) -> str | None:
619
+ """Render sub-agent session history when a tool result references it."""
620
+ from klaude_code.session.session import Session
621
+
622
+ ui_extra = tool_result.ui_extra
623
+ if not isinstance(ui_extra, model.SessionIdUIExtra):
624
+ return None
625
+
626
+ session_id = ui_extra.session_id
627
+ if not session_id or session_id in seen_session_ids:
628
+ return None
629
+
630
+ seen_session_ids.add(session_id)
631
+
632
+ try:
633
+ sub_session = Session.load(session_id)
634
+ except Exception:
635
+ return None
636
+
637
+ sub_history = sub_session.conversation_history
638
+ sub_tool_results = {item.call_id: item for item in sub_history if isinstance(item, model.ToolResultItem)}
639
+
640
+ sub_html = _build_messages_html(
641
+ sub_history,
642
+ sub_tool_results,
643
+ seen_session_ids=seen_session_ids,
644
+ nesting_level=nesting_level + 1,
645
+ )
646
+
647
+ if not sub_html:
648
+ return None
649
+
650
+ # Wrap in a collapsible sub-agent container using same style as other collapsible sections
651
+ indent_style = f' style="margin-left: {nesting_level * 16}px;"' if nesting_level > 0 else ""
652
+ return (
653
+ f'<details class="sub-agent-session"{indent_style}>'
654
+ f"<summary>Sub-agent: {_escape_html(session_id)}</summary>"
655
+ f'<div class="sub-agent-content">{sub_html}</div>'
656
+ f"</details>"
657
+ )
658
+
659
+
602
660
  def build_export_html(
603
661
  session: Session,
604
662
  system_prompt: str,
@@ -40,7 +40,7 @@ def resume_select_session() -> str | None:
40
40
  ("class:b", f"{msg_count_display:>{MSG_COUNT_WIDTH}} "),
41
41
  (
42
42
  "class:t",
43
- f"{model_display[:MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} ",
43
+ f"{model_display[: MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} ",
44
44
  ),
45
45
  (
46
46
  "class:t",
@@ -69,7 +69,7 @@ def resume_select_session() -> str | None:
69
69
  model_display = s.model_name or "N/A"
70
70
  print(
71
71
  f"{i}. {_fmt(s.updated_at)} {msg_count_display:>{MSG_COUNT_WIDTH}} "
72
- f"{model_display[:MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} {s.id} {s.work_dir}"
72
+ f"{model_display[: MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} {s.id} {s.work_dir}"
73
73
  )
74
74
  try:
75
75
  raw = input("Select a session number: ").strip()