klaude-code 1.2.12__py3-none-any.whl → 1.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/codex/oauth.py +3 -3
- klaude_code/cli/main.py +5 -5
- klaude_code/cli/runtime.py +19 -27
- klaude_code/cli/session_cmd.py +6 -8
- klaude_code/command/__init__.py +6 -6
- klaude_code/command/export_cmd.py +3 -3
- klaude_code/command/registry.py +1 -1
- klaude_code/command/terminal_setup_cmd.py +2 -2
- klaude_code/command/thinking_cmd.py +8 -6
- klaude_code/config/__init__.py +1 -1
- klaude_code/config/list_model.py +1 -1
- klaude_code/core/agent.py +13 -61
- klaude_code/core/executor.py +11 -10
- klaude_code/core/manager/agent_manager.py +4 -4
- klaude_code/core/manager/llm_clients.py +10 -49
- klaude_code/core/manager/llm_clients_builder.py +8 -21
- klaude_code/core/manager/sub_agent_manager.py +3 -3
- klaude_code/core/prompt.py +2 -2
- klaude_code/core/reminders.py +1 -1
- klaude_code/core/task.py +2 -2
- klaude_code/core/tool/__init__.py +16 -25
- klaude_code/core/tool/file/_utils.py +1 -1
- klaude_code/core/tool/file/apply_patch.py +17 -25
- klaude_code/core/tool/file/apply_patch_tool.py +4 -7
- klaude_code/core/tool/file/edit_tool.py +4 -11
- klaude_code/core/tool/file/multi_edit_tool.py +2 -3
- klaude_code/core/tool/file/read_tool.py +3 -4
- klaude_code/core/tool/file/write_tool.py +2 -3
- klaude_code/core/tool/memory/memory_tool.py +2 -8
- klaude_code/core/tool/memory/skill_loader.py +3 -2
- klaude_code/core/tool/shell/command_safety.py +0 -1
- klaude_code/core/tool/tool_context.py +1 -3
- klaude_code/core/tool/tool_registry.py +2 -1
- klaude_code/core/tool/tool_runner.py +1 -1
- klaude_code/core/tool/truncation.py +2 -5
- klaude_code/core/turn.py +9 -3
- klaude_code/llm/anthropic/client.py +6 -2
- klaude_code/llm/client.py +1 -1
- klaude_code/llm/codex/client.py +2 -2
- klaude_code/llm/input_common.py +2 -2
- klaude_code/llm/openai_compatible/client.py +11 -8
- klaude_code/llm/openai_compatible/stream_processor.py +2 -1
- klaude_code/llm/openrouter/client.py +20 -8
- klaude_code/llm/openrouter/reasoning_handler.py +19 -132
- klaude_code/llm/registry.py +6 -5
- klaude_code/llm/responses/client.py +10 -5
- klaude_code/protocol/events.py +7 -0
- klaude_code/protocol/model.py +7 -1
- klaude_code/protocol/sub_agent.py +2 -1
- klaude_code/session/selector.py +2 -2
- klaude_code/session/session.py +2 -4
- klaude_code/trace/__init__.py +1 -1
- klaude_code/trace/log.py +1 -1
- klaude_code/ui/__init__.py +4 -9
- klaude_code/ui/core/stage_manager.py +7 -4
- klaude_code/ui/modes/repl/__init__.py +1 -1
- klaude_code/ui/modes/repl/completers.py +3 -4
- klaude_code/ui/modes/repl/display.py +3 -4
- klaude_code/ui/modes/repl/event_handler.py +63 -5
- klaude_code/ui/modes/repl/key_bindings.py +2 -3
- klaude_code/ui/modes/repl/renderer.py +2 -1
- klaude_code/ui/renderers/diffs.py +1 -4
- klaude_code/ui/rich/markdown.py +3 -3
- klaude_code/ui/rich/searchable_text.py +6 -6
- klaude_code/ui/rich/status.py +3 -4
- klaude_code/ui/rich/theme.py +1 -4
- klaude_code/ui/terminal/control.py +7 -16
- klaude_code/ui/terminal/notifier.py +2 -4
- klaude_code/ui/utils/common.py +1 -1
- klaude_code/ui/utils/debouncer.py +2 -2
- {klaude_code-1.2.12.dist-info → klaude_code-1.2.13.dist-info}/METADATA +1 -1
- {klaude_code-1.2.12.dist-info → klaude_code-1.2.13.dist-info}/RECORD +74 -74
- {klaude_code-1.2.12.dist-info → klaude_code-1.2.13.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.12.dist-info → klaude_code-1.2.13.dist-info}/entry_points.txt +0 -0
klaude_code/llm/codex/client.py
CHANGED
|
@@ -114,7 +114,7 @@ class CodexClient(LLMClientABC):
|
|
|
114
114
|
return cls(config)
|
|
115
115
|
|
|
116
116
|
@override
|
|
117
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
117
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
118
118
|
# Ensure token is valid before API call
|
|
119
119
|
self._ensure_valid_token()
|
|
120
120
|
|
|
@@ -142,7 +142,7 @@ class CodexClient(LLMClientABC):
|
|
|
142
142
|
extra_headers=extra_headers,
|
|
143
143
|
)
|
|
144
144
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
145
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
145
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
146
146
|
return
|
|
147
147
|
|
|
148
148
|
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
klaude_code/llm/input_common.py
CHANGED
|
@@ -5,10 +5,10 @@ This module provides shared abstractions for providers that require message grou
|
|
|
5
5
|
since it uses a flat item list matching our internal protocol.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from collections.abc import Iterator
|
|
8
|
+
from collections.abc import Iterable, Iterator
|
|
9
9
|
from dataclasses import dataclass, field
|
|
10
10
|
from enum import Enum
|
|
11
|
-
from typing import TYPE_CHECKING
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
12
|
|
|
13
13
|
from klaude_code import const
|
|
14
14
|
|
|
@@ -72,7 +72,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
72
72
|
return cls(config)
|
|
73
73
|
|
|
74
74
|
@override
|
|
75
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
75
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
76
76
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
77
77
|
|
|
78
78
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -115,10 +115,10 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
115
115
|
continue
|
|
116
116
|
|
|
117
117
|
# Support Moonshot Kimi K2's usage field in choice
|
|
118
|
-
if
|
|
118
|
+
if usage := getattr(event.choices[0], "usage", None):
|
|
119
119
|
metadata_tracker.set_usage(
|
|
120
120
|
convert_usage(
|
|
121
|
-
openai.types.CompletionUsage.model_validate(
|
|
121
|
+
openai.types.CompletionUsage.model_validate(usage),
|
|
122
122
|
param.context_limit,
|
|
123
123
|
param.max_tokens,
|
|
124
124
|
)
|
|
@@ -127,15 +127,18 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
127
127
|
delta = event.choices[0].delta
|
|
128
128
|
|
|
129
129
|
# Reasoning
|
|
130
|
-
|
|
131
|
-
getattr(delta, "reasoning_content", None)
|
|
130
|
+
if (
|
|
131
|
+
reasoning_content := getattr(delta, "reasoning_content", None)
|
|
132
132
|
or getattr(delta, "reasoning", None)
|
|
133
133
|
or ""
|
|
134
|
-
)
|
|
135
|
-
if reasoning_content:
|
|
134
|
+
):
|
|
136
135
|
metadata_tracker.record_token()
|
|
137
136
|
state.stage = "reasoning"
|
|
138
137
|
state.accumulated_reasoning.append(reasoning_content)
|
|
138
|
+
yield model.ReasoningTextDelta(
|
|
139
|
+
content=reasoning_content,
|
|
140
|
+
response_id=state.response_id,
|
|
141
|
+
)
|
|
139
142
|
|
|
140
143
|
# Assistant
|
|
141
144
|
if delta.content and (
|
|
@@ -176,7 +179,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
176
179
|
)
|
|
177
180
|
state.accumulated_tool_calls.add(delta.tool_calls)
|
|
178
181
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
179
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
182
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
180
183
|
|
|
181
184
|
# Finalize
|
|
182
185
|
for item in state.flush_all():
|
|
@@ -5,7 +5,8 @@ logic for accumulating and flushing reasoning, assistant content, and tool calls
|
|
|
5
5
|
across different LLM providers (OpenAI-compatible, OpenRouter).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from typing import Literal
|
|
9
10
|
|
|
10
11
|
from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
|
|
11
12
|
from klaude_code.protocol import model
|
|
@@ -26,7 +26,10 @@ def build_payload(
|
|
|
26
26
|
tools = convert_tool_schema(param.tools)
|
|
27
27
|
|
|
28
28
|
extra_body: dict[str, object] = {
|
|
29
|
-
"usage": {"include": True} # To get the cache tokens at the end of the response
|
|
29
|
+
"usage": {"include": True}, # To get the cache tokens at the end of the response
|
|
30
|
+
"debug": {
|
|
31
|
+
"echo_upstream_body": True
|
|
32
|
+
}, # https://openrouter.ai/docs/api/reference/errors-and-debugging#debug-option-shape
|
|
30
33
|
}
|
|
31
34
|
extra_headers: dict[str, str] = {}
|
|
32
35
|
|
|
@@ -45,9 +48,7 @@ def build_payload(
|
|
|
45
48
|
extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
|
|
46
49
|
|
|
47
50
|
if is_claude_model(param.model):
|
|
48
|
-
extra_headers["anthropic-beta"] =
|
|
49
|
-
"interleaved-thinking-2025-05-14" # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
|
|
50
|
-
)
|
|
51
|
+
extra_headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14,interleaved-thinking-2025-05-14"
|
|
51
52
|
|
|
52
53
|
payload: CompletionCreateParamsStreaming = {
|
|
53
54
|
"model": str(param.model),
|
|
@@ -81,7 +82,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
81
82
|
return cls(config)
|
|
82
83
|
|
|
83
84
|
@override
|
|
84
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
85
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
85
86
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
86
87
|
|
|
87
88
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -133,13 +134,24 @@ class OpenRouterClient(LLMClientABC):
|
|
|
133
134
|
delta = event.choices[0].delta
|
|
134
135
|
|
|
135
136
|
# Reasoning
|
|
136
|
-
if
|
|
137
|
-
reasoning_details = getattr(delta, "reasoning_details")
|
|
137
|
+
if reasoning_details := getattr(delta, "reasoning_details", None):
|
|
138
138
|
for item in reasoning_details:
|
|
139
139
|
try:
|
|
140
140
|
reasoning_detail = ReasoningDetail.model_validate(item)
|
|
141
141
|
metadata_tracker.record_token()
|
|
142
142
|
state.stage = "reasoning"
|
|
143
|
+
# Yield delta immediately for streaming
|
|
144
|
+
if reasoning_detail.text:
|
|
145
|
+
yield model.ReasoningTextDelta(
|
|
146
|
+
content=reasoning_detail.text,
|
|
147
|
+
response_id=state.response_id,
|
|
148
|
+
)
|
|
149
|
+
if reasoning_detail.summary:
|
|
150
|
+
yield model.ReasoningTextDelta(
|
|
151
|
+
content=reasoning_detail.summary,
|
|
152
|
+
response_id=state.response_id,
|
|
153
|
+
)
|
|
154
|
+
# Keep existing handler logic for final items
|
|
143
155
|
for conversation_item in reasoning_handler.on_detail(reasoning_detail):
|
|
144
156
|
yield conversation_item
|
|
145
157
|
except Exception as e:
|
|
@@ -182,7 +194,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
182
194
|
state.accumulated_tool_calls.add(delta.tool_calls)
|
|
183
195
|
|
|
184
196
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
185
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
197
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
186
198
|
|
|
187
199
|
# Finalize
|
|
188
200
|
for item in state.flush_all():
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
|
|
3
1
|
from pydantic import BaseModel
|
|
4
2
|
|
|
5
3
|
from klaude_code.protocol import model
|
|
@@ -18,14 +16,8 @@ class ReasoningDetail(BaseModel):
|
|
|
18
16
|
signature: str | None = None # Claude's signature
|
|
19
17
|
|
|
20
18
|
|
|
21
|
-
class ReasoningMode(str, Enum):
|
|
22
|
-
COMPLETE_CHUNK = "complete_chunk"
|
|
23
|
-
GPT5_SECTIONS = "gpt5_sections"
|
|
24
|
-
ACCUMULATE = "accumulate"
|
|
25
|
-
|
|
26
|
-
|
|
27
19
|
class ReasoningStreamHandler:
|
|
28
|
-
"""
|
|
20
|
+
"""Accumulates reasoning text and flushes on encrypted content or finalize."""
|
|
29
21
|
|
|
30
22
|
def __init__(
|
|
31
23
|
self,
|
|
@@ -37,59 +29,48 @@ class ReasoningStreamHandler:
|
|
|
37
29
|
|
|
38
30
|
self._reasoning_id: str | None = None
|
|
39
31
|
self._accumulated_reasoning: list[str] = []
|
|
40
|
-
self._gpt5_line_buffer: str = ""
|
|
41
|
-
self._gpt5_section_lines: list[str] = []
|
|
42
32
|
|
|
43
33
|
def set_response_id(self, response_id: str | None) -> None:
|
|
44
34
|
"""Update the response identifier used for emitted items."""
|
|
45
|
-
|
|
46
35
|
self._response_id = response_id
|
|
47
36
|
|
|
48
37
|
def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
|
|
49
38
|
"""Process a single reasoning detail and return streamable items."""
|
|
50
|
-
|
|
51
39
|
items: list[model.ConversationItem] = []
|
|
52
40
|
|
|
53
41
|
if detail.type == "reasoning.encrypted":
|
|
54
42
|
self._reasoning_id = detail.id
|
|
43
|
+
# Flush accumulated text before encrypted content
|
|
44
|
+
items.extend(self._flush_text())
|
|
55
45
|
if encrypted_item := self._build_encrypted_item(detail.data, detail):
|
|
56
46
|
items.append(encrypted_item)
|
|
57
47
|
return items
|
|
58
48
|
|
|
59
49
|
if detail.type in ("reasoning.text", "reasoning.summary"):
|
|
60
50
|
self._reasoning_id = detail.id
|
|
61
|
-
|
|
62
|
-
items.append(encrypted_item)
|
|
51
|
+
# Accumulate text
|
|
63
52
|
text = detail.text if detail.type == "reasoning.text" else detail.summary
|
|
64
53
|
if text:
|
|
65
|
-
|
|
54
|
+
self._accumulated_reasoning.append(text)
|
|
55
|
+
# Flush on signature (encrypted content)
|
|
56
|
+
if detail.signature:
|
|
57
|
+
items.extend(self._flush_text())
|
|
58
|
+
if encrypted_item := self._build_encrypted_item(detail.signature, detail):
|
|
59
|
+
items.append(encrypted_item)
|
|
66
60
|
|
|
67
61
|
return items
|
|
68
62
|
|
|
69
63
|
def flush(self) -> list[model.ConversationItem]:
|
|
70
|
-
"""Flush buffered reasoning text
|
|
64
|
+
"""Flush buffered reasoning text on finalize."""
|
|
65
|
+
return self._flush_text()
|
|
71
66
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
items.append(self._build_text_item("".join(self._accumulated_reasoning)))
|
|
80
|
-
self._accumulated_reasoning = []
|
|
81
|
-
|
|
82
|
-
return items
|
|
83
|
-
|
|
84
|
-
def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
|
|
85
|
-
mode = self._resolve_mode()
|
|
86
|
-
if mode is ReasoningMode.COMPLETE_CHUNK:
|
|
87
|
-
return [self._build_text_item(text)]
|
|
88
|
-
if mode is ReasoningMode.GPT5_SECTIONS:
|
|
89
|
-
sections = self._process_gpt5_text(text)
|
|
90
|
-
return [self._build_text_item(section) for section in sections]
|
|
91
|
-
self._accumulated_reasoning.append(text)
|
|
92
|
-
return []
|
|
67
|
+
def _flush_text(self) -> list[model.ConversationItem]:
|
|
68
|
+
"""Flush accumulated reasoning text as a single item."""
|
|
69
|
+
if not self._accumulated_reasoning:
|
|
70
|
+
return []
|
|
71
|
+
item = self._build_text_item("".join(self._accumulated_reasoning))
|
|
72
|
+
self._accumulated_reasoning = []
|
|
73
|
+
return [item]
|
|
93
74
|
|
|
94
75
|
def _build_text_item(self, content: str) -> model.ReasoningTextItem:
|
|
95
76
|
return model.ReasoningTextItem(
|
|
@@ -113,97 +94,3 @@ class ReasoningStreamHandler:
|
|
|
113
94
|
response_id=self._response_id,
|
|
114
95
|
model=self._param_model,
|
|
115
96
|
)
|
|
116
|
-
|
|
117
|
-
def _process_gpt5_text(self, text: str) -> list[str]:
|
|
118
|
-
emitted_sections: list[str] = []
|
|
119
|
-
self._gpt5_line_buffer += text
|
|
120
|
-
while True:
|
|
121
|
-
newline_index = self._gpt5_line_buffer.find("\n")
|
|
122
|
-
if newline_index == -1:
|
|
123
|
-
break
|
|
124
|
-
line = self._gpt5_line_buffer[:newline_index]
|
|
125
|
-
self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
|
|
126
|
-
remainder = line
|
|
127
|
-
while True:
|
|
128
|
-
split_result = self._split_gpt5_title_line(remainder)
|
|
129
|
-
if split_result is None:
|
|
130
|
-
break
|
|
131
|
-
prefix_segment, title_segment, remainder = split_result
|
|
132
|
-
if prefix_segment:
|
|
133
|
-
if not self._gpt5_section_lines:
|
|
134
|
-
self._gpt5_section_lines = []
|
|
135
|
-
self._gpt5_section_lines.append(f"{prefix_segment}\n")
|
|
136
|
-
if self._gpt5_section_lines:
|
|
137
|
-
emitted_sections.append("".join(self._gpt5_section_lines))
|
|
138
|
-
self._gpt5_section_lines = [f"{title_segment} \n"] # Add two spaces for markdown line break
|
|
139
|
-
if remainder:
|
|
140
|
-
if not self._gpt5_section_lines:
|
|
141
|
-
self._gpt5_section_lines = []
|
|
142
|
-
self._gpt5_section_lines.append(f"{remainder}\n")
|
|
143
|
-
return emitted_sections
|
|
144
|
-
|
|
145
|
-
def _drain_gpt5_sections(self) -> list[str]:
|
|
146
|
-
sections: list[str] = []
|
|
147
|
-
if self._gpt5_line_buffer:
|
|
148
|
-
if not self._gpt5_section_lines:
|
|
149
|
-
self._gpt5_section_lines = [self._gpt5_line_buffer]
|
|
150
|
-
else:
|
|
151
|
-
self._gpt5_section_lines.append(self._gpt5_line_buffer)
|
|
152
|
-
self._gpt5_line_buffer = ""
|
|
153
|
-
if self._gpt5_section_lines:
|
|
154
|
-
sections.append("".join(self._gpt5_section_lines))
|
|
155
|
-
self._gpt5_section_lines = []
|
|
156
|
-
return sections
|
|
157
|
-
|
|
158
|
-
def _is_gpt5(self) -> bool:
|
|
159
|
-
return "gpt-5" in self._param_model.lower()
|
|
160
|
-
|
|
161
|
-
def _is_complete_chunk_reasoning_model(self) -> bool:
|
|
162
|
-
"""Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
|
|
163
|
-
|
|
164
|
-
return self._param_model.startswith("google/gemini")
|
|
165
|
-
|
|
166
|
-
def _resolve_mode(self) -> ReasoningMode:
|
|
167
|
-
if self._is_complete_chunk_reasoning_model():
|
|
168
|
-
return ReasoningMode.COMPLETE_CHUNK
|
|
169
|
-
if self._is_gpt5():
|
|
170
|
-
return ReasoningMode.GPT5_SECTIONS
|
|
171
|
-
return ReasoningMode.ACCUMULATE
|
|
172
|
-
|
|
173
|
-
def _is_gpt5_title_line(self, line: str) -> bool:
|
|
174
|
-
stripped = line.strip()
|
|
175
|
-
if not stripped:
|
|
176
|
-
return False
|
|
177
|
-
return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
|
|
178
|
-
|
|
179
|
-
def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
|
|
180
|
-
if not line:
|
|
181
|
-
return None
|
|
182
|
-
search_start = 0
|
|
183
|
-
while True:
|
|
184
|
-
opening_index = line.find("**", search_start)
|
|
185
|
-
if opening_index == -1:
|
|
186
|
-
return None
|
|
187
|
-
closing_index = line.find("**", opening_index + 2)
|
|
188
|
-
if closing_index == -1:
|
|
189
|
-
return None
|
|
190
|
-
title_candidate = line[opening_index : closing_index + 2]
|
|
191
|
-
stripped_title = title_candidate.strip()
|
|
192
|
-
if self._is_gpt5_title_line(stripped_title):
|
|
193
|
-
# Treat as a GPT-5 title only when everything after the
|
|
194
|
-
# bold segment is either whitespace or starts a new bold
|
|
195
|
-
# title. This prevents inline bold like `**xxx**yyyy`
|
|
196
|
-
# from being misclassified as a section title while
|
|
197
|
-
# preserving support for consecutive titles in one line.
|
|
198
|
-
after = line[closing_index + 2 :]
|
|
199
|
-
if after.strip() and not after.lstrip().startswith("**"):
|
|
200
|
-
search_start = closing_index + 2
|
|
201
|
-
continue
|
|
202
|
-
prefix_segment = line[:opening_index]
|
|
203
|
-
remainder_segment = after
|
|
204
|
-
return (
|
|
205
|
-
prefix_segment if prefix_segment else None,
|
|
206
|
-
stripped_title,
|
|
207
|
-
remainder_segment,
|
|
208
|
-
)
|
|
209
|
-
search_start = closing_index + 2
|
klaude_code/llm/registry.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
2
3
|
|
|
3
4
|
from klaude_code.protocol import llm_param
|
|
4
5
|
|
|
@@ -20,13 +21,13 @@ def _load_protocol(protocol: llm_param.LLMClientProtocol) -> None:
|
|
|
20
21
|
|
|
21
22
|
# Import only the needed module to trigger @register decorator
|
|
22
23
|
if protocol == llm_param.LLMClientProtocol.ANTHROPIC:
|
|
23
|
-
from . import anthropic as _
|
|
24
|
+
from . import anthropic as _
|
|
24
25
|
elif protocol == llm_param.LLMClientProtocol.CODEX:
|
|
25
|
-
from . import codex as _
|
|
26
|
+
from . import codex as _
|
|
26
27
|
elif protocol == llm_param.LLMClientProtocol.OPENAI:
|
|
27
|
-
from . import openai_compatible as _
|
|
28
|
+
from . import openai_compatible as _
|
|
28
29
|
elif protocol == llm_param.LLMClientProtocol.OPENROUTER:
|
|
29
|
-
from . import openrouter as _
|
|
30
|
+
from . import openrouter as _
|
|
30
31
|
elif protocol == llm_param.LLMClientProtocol.RESPONSES:
|
|
31
32
|
from . import responses as _ # noqa: F401
|
|
32
33
|
|
|
@@ -16,7 +16,6 @@ from klaude_code.llm.usage import MetadataTracker
|
|
|
16
16
|
from klaude_code.protocol import llm_param, model
|
|
17
17
|
from klaude_code.trace import DebugType, log_debug
|
|
18
18
|
|
|
19
|
-
|
|
20
19
|
if TYPE_CHECKING:
|
|
21
20
|
from openai import AsyncStream
|
|
22
21
|
from openai.types.responses import ResponseStreamEvent
|
|
@@ -60,7 +59,7 @@ async def parse_responses_stream(
|
|
|
60
59
|
stream: "AsyncStream[ResponseStreamEvent]",
|
|
61
60
|
param: llm_param.LLMCallParameter,
|
|
62
61
|
metadata_tracker: MetadataTracker,
|
|
63
|
-
) -> AsyncGenerator[model.ConversationItem
|
|
62
|
+
) -> AsyncGenerator[model.ConversationItem]:
|
|
64
63
|
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
65
64
|
response_id: str | None = None
|
|
66
65
|
|
|
@@ -76,6 +75,12 @@ async def parse_responses_stream(
|
|
|
76
75
|
case responses.ResponseCreatedEvent() as event:
|
|
77
76
|
response_id = event.response.id
|
|
78
77
|
yield model.StartItem(response_id=response_id)
|
|
78
|
+
case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
|
|
79
|
+
if event.delta:
|
|
80
|
+
yield model.ReasoningTextDelta(
|
|
81
|
+
content=event.delta,
|
|
82
|
+
response_id=response_id,
|
|
83
|
+
)
|
|
79
84
|
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
80
85
|
if event.text:
|
|
81
86
|
yield model.ReasoningTextItem(
|
|
@@ -164,7 +169,7 @@ async def parse_responses_stream(
|
|
|
164
169
|
debug_type=DebugType.LLM_STREAM,
|
|
165
170
|
)
|
|
166
171
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
167
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
172
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
168
173
|
|
|
169
174
|
|
|
170
175
|
@register(llm_param.LLMClientProtocol.RESPONSES)
|
|
@@ -194,7 +199,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
194
199
|
return cls(config)
|
|
195
200
|
|
|
196
201
|
@override
|
|
197
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
202
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
198
203
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
199
204
|
|
|
200
205
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -212,7 +217,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
212
217
|
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
213
218
|
)
|
|
214
219
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
215
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
220
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
216
221
|
return
|
|
217
222
|
|
|
218
223
|
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
klaude_code/protocol/events.py
CHANGED
|
@@ -54,6 +54,12 @@ class ThinkingEvent(BaseModel):
|
|
|
54
54
|
content: str
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
class ThinkingDeltaEvent(BaseModel):
|
|
58
|
+
session_id: str
|
|
59
|
+
response_id: str | None = None
|
|
60
|
+
content: str
|
|
61
|
+
|
|
62
|
+
|
|
57
63
|
class AssistantMessageDeltaEvent(BaseModel):
|
|
58
64
|
session_id: str
|
|
59
65
|
response_id: str | None = None
|
|
@@ -153,6 +159,7 @@ Event = (
|
|
|
153
159
|
TaskStartEvent
|
|
154
160
|
| TaskFinishEvent
|
|
155
161
|
| ThinkingEvent
|
|
162
|
+
| ThinkingDeltaEvent
|
|
156
163
|
| AssistantMessageDeltaEvent
|
|
157
164
|
| AssistantMessageEvent
|
|
158
165
|
| ToolCallEvent
|
klaude_code/protocol/model.py
CHANGED
|
@@ -296,6 +296,12 @@ class AssistantMessageDelta(BaseModel):
|
|
|
296
296
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
297
297
|
|
|
298
298
|
|
|
299
|
+
class ReasoningTextDelta(BaseModel):
|
|
300
|
+
response_id: str | None = None
|
|
301
|
+
content: str
|
|
302
|
+
created_at: datetime = Field(default_factory=datetime.now)
|
|
303
|
+
|
|
304
|
+
|
|
299
305
|
class StreamErrorItem(BaseModel):
|
|
300
306
|
error: str
|
|
301
307
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
@@ -392,7 +398,7 @@ MessageItem = (
|
|
|
392
398
|
)
|
|
393
399
|
|
|
394
400
|
|
|
395
|
-
StreamItem = AssistantMessageDelta
|
|
401
|
+
StreamItem = AssistantMessageDelta | ReasoningTextDelta
|
|
396
402
|
|
|
397
403
|
ConversationItem = (
|
|
398
404
|
StartItem
|
klaude_code/session/selector.py
CHANGED
|
@@ -40,7 +40,7 @@ def resume_select_session() -> str | None:
|
|
|
40
40
|
("class:b", f"{msg_count_display:>{MSG_COUNT_WIDTH}} "),
|
|
41
41
|
(
|
|
42
42
|
"class:t",
|
|
43
|
-
f"{model_display[:MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} ",
|
|
43
|
+
f"{model_display[: MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} ",
|
|
44
44
|
),
|
|
45
45
|
(
|
|
46
46
|
"class:t",
|
|
@@ -69,7 +69,7 @@ def resume_select_session() -> str | None:
|
|
|
69
69
|
model_display = s.model_name or "N/A"
|
|
70
70
|
print(
|
|
71
71
|
f"{i}. {_fmt(s.updated_at)} {msg_count_display:>{MSG_COUNT_WIDTH}} "
|
|
72
|
-
f"{model_display[:MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} {s.id} {s.work_dir}"
|
|
72
|
+
f"{model_display[: MODEL_WIDTH - 1] + '…' if len(model_display) > MODEL_WIDTH else model_display:<{MODEL_WIDTH}} {s.id} {s.work_dir}"
|
|
73
73
|
)
|
|
74
74
|
try:
|
|
75
75
|
raw = input("Select a session number: ").strip()
|
klaude_code/session/session.py
CHANGED
|
@@ -261,12 +261,10 @@ class Session(BaseModel):
|
|
|
261
261
|
return False
|
|
262
262
|
if prev_item is None:
|
|
263
263
|
return True
|
|
264
|
-
|
|
264
|
+
return isinstance(
|
|
265
265
|
prev_item,
|
|
266
266
|
model.UserMessageItem | model.ToolResultItem | model.DeveloperMessageItem,
|
|
267
|
-
)
|
|
268
|
-
return True
|
|
269
|
-
return False
|
|
267
|
+
)
|
|
270
268
|
|
|
271
269
|
def get_history_item(self) -> Iterable[events.HistoryItemEvent]:
|
|
272
270
|
prev_item: model.ConversationItem | None = None
|
klaude_code/trace/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
from .log import DebugType, is_debug_enabled, log, log_debug, logger, set_debug_logging
|
|
2
2
|
|
|
3
|
-
__all__ = ["
|
|
3
|
+
__all__ = ["DebugType", "is_debug_enabled", "log", "log_debug", "logger", "set_debug_logging"]
|
klaude_code/trace/log.py
CHANGED
klaude_code/ui/__init__.py
CHANGED
|
@@ -73,19 +73,14 @@ def create_exec_display(debug: bool = False, stream_json: bool = False) -> Displ
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
__all__ = [
|
|
76
|
-
|
|
76
|
+
"DebugEventDisplay",
|
|
77
77
|
"DisplayABC",
|
|
78
|
+
"ExecDisplay",
|
|
78
79
|
"InputProviderABC",
|
|
79
|
-
|
|
80
|
+
"PromptToolkitInput",
|
|
80
81
|
"REPLDisplay",
|
|
81
|
-
"ExecDisplay",
|
|
82
82
|
"StreamJsonDisplay",
|
|
83
|
-
"
|
|
84
|
-
# Input implementations
|
|
85
|
-
"PromptToolkitInput",
|
|
86
|
-
# Factory functions
|
|
83
|
+
"TerminalNotifier",
|
|
87
84
|
"create_default_display",
|
|
88
85
|
"create_exec_display",
|
|
89
|
-
# Supporting types
|
|
90
|
-
"TerminalNotifier",
|
|
91
86
|
]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Awaitable, Callable
|
|
3
4
|
from enum import Enum
|
|
4
|
-
from typing import Awaitable, Callable
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class Stage(Enum):
|
|
@@ -19,10 +19,12 @@ class StageManager:
|
|
|
19
19
|
self,
|
|
20
20
|
*,
|
|
21
21
|
finish_assistant: Callable[[], Awaitable[None]],
|
|
22
|
+
finish_thinking: Callable[[], Awaitable[None]],
|
|
22
23
|
on_enter_thinking: Callable[[], None],
|
|
23
24
|
):
|
|
24
25
|
self._stage = Stage.WAITING
|
|
25
26
|
self._finish_assistant = finish_assistant
|
|
27
|
+
self._finish_thinking = finish_thinking
|
|
26
28
|
self._on_enter_thinking = on_enter_thinking
|
|
27
29
|
|
|
28
30
|
@property
|
|
@@ -49,7 +51,8 @@ class StageManager:
|
|
|
49
51
|
self._stage = Stage.WAITING
|
|
50
52
|
|
|
51
53
|
async def _leave_current_stage(self) -> None:
|
|
52
|
-
if self._stage == Stage.
|
|
54
|
+
if self._stage == Stage.THINKING:
|
|
55
|
+
await self._finish_thinking()
|
|
56
|
+
elif self._stage == Stage.ASSISTANT:
|
|
53
57
|
await self.finish_assistant()
|
|
54
|
-
|
|
55
|
-
self._stage = Stage.WAITING
|
|
58
|
+
self._stage = Stage.WAITING
|
|
@@ -9,7 +9,7 @@ if TYPE_CHECKING:
|
|
|
9
9
|
from klaude_code.core.agent import Agent
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def build_repl_status_snapshot(agent:
|
|
12
|
+
def build_repl_status_snapshot(agent: Agent | None, update_message: str | None) -> REPLStatusSnapshot:
|
|
13
13
|
"""Build a status snapshot for the REPL bottom toolbar.
|
|
14
14
|
|
|
15
15
|
Aggregates model name, context usage, and basic call counts from the
|
|
@@ -133,10 +133,9 @@ class _ComboCompleter(Completer):
|
|
|
133
133
|
complete_event, # type: ignore[override]
|
|
134
134
|
) -> Iterable[Completion]:
|
|
135
135
|
# Try slash command completion first (only on first line)
|
|
136
|
-
if document.cursor_position_row == 0:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
return
|
|
136
|
+
if document.cursor_position_row == 0 and self._slash_completer.is_slash_command_context(document):
|
|
137
|
+
yield from self._slash_completer.get_completions(document, complete_event)
|
|
138
|
+
return
|
|
140
139
|
|
|
141
140
|
# Fall back to @ file completion
|
|
142
141
|
yield from self._at_completer.get_completions(document, complete_event)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
3
4
|
from typing import override
|
|
4
5
|
|
|
5
6
|
from klaude_code.protocol import events
|
|
@@ -53,8 +54,6 @@ class REPLDisplay(DisplayABC):
|
|
|
53
54
|
async def stop(self) -> None:
|
|
54
55
|
await self.event_handler.stop()
|
|
55
56
|
# Ensure any active spinner is stopped so Rich restores the cursor.
|
|
56
|
-
|
|
57
|
+
# Spinner may already be stopped or not started; ignore.
|
|
58
|
+
with contextlib.suppress(Exception):
|
|
57
59
|
self.renderer.spinner_stop()
|
|
58
|
-
except Exception:
|
|
59
|
-
# Spinner may already be stopped or not started; ignore.
|
|
60
|
-
pass
|