klaude-code 1.2.6__py3-none-any.whl → 1.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/__init__.py +24 -0
- klaude_code/auth/codex/__init__.py +20 -0
- klaude_code/auth/codex/exceptions.py +17 -0
- klaude_code/auth/codex/jwt_utils.py +45 -0
- klaude_code/auth/codex/oauth.py +229 -0
- klaude_code/auth/codex/token_manager.py +84 -0
- klaude_code/cli/main.py +63 -0
- klaude_code/command/status_cmd.py +13 -5
- klaude_code/config/list_model.py +53 -0
- klaude_code/core/prompt.py +10 -14
- klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
- klaude_code/core/prompts/prompt-subagent-explore.md +3 -1
- klaude_code/core/reminders.py +14 -5
- klaude_code/core/task.py +1 -0
- klaude_code/core/tool/truncation.py +4 -0
- klaude_code/llm/__init__.py +2 -0
- klaude_code/llm/anthropic/input.py +25 -10
- klaude_code/llm/codex/__init__.py +5 -0
- klaude_code/llm/codex/client.py +116 -0
- klaude_code/llm/responses/client.py +153 -138
- klaude_code/llm/usage.py +3 -0
- klaude_code/protocol/llm_param.py +3 -1
- klaude_code/protocol/model.py +2 -1
- klaude_code/protocol/sub_agent.py +2 -1
- klaude_code/session/export.py +9 -14
- klaude_code/session/templates/export_session.html +5 -0
- klaude_code/ui/modes/repl/completers.py +41 -8
- klaude_code/ui/modes/repl/event_handler.py +15 -23
- klaude_code/ui/renderers/developer.py +9 -8
- klaude_code/ui/renderers/metadata.py +9 -5
- klaude_code/ui/renderers/user_input.py +23 -10
- klaude_code/ui/rich/theme.py +2 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/METADATA +1 -1
- {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/RECORD +37 -28
- /klaude_code/core/prompts/{prompt-codex.md → prompt-codex-gpt-5-1.md} +0 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import time
|
|
3
3
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import override
|
|
4
|
+
from typing import TYPE_CHECKING, override
|
|
5
5
|
|
|
6
6
|
import httpx
|
|
7
7
|
from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError
|
|
@@ -15,6 +15,153 @@ from klaude_code.llm.usage import calculate_cost
|
|
|
15
15
|
from klaude_code.protocol import llm_param, model
|
|
16
16
|
from klaude_code.trace import DebugType, log_debug
|
|
17
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from openai import AsyncStream
|
|
20
|
+
from openai.types.responses import ResponseStreamEvent
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def parse_responses_stream(
|
|
24
|
+
stream: "AsyncStream[ResponseStreamEvent]",
|
|
25
|
+
param: llm_param.LLMCallParameter,
|
|
26
|
+
cost_config: llm_param.Cost | None,
|
|
27
|
+
request_start_time: float,
|
|
28
|
+
) -> AsyncGenerator[model.ConversationItem, None]:
|
|
29
|
+
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
30
|
+
first_token_time: float | None = None
|
|
31
|
+
last_token_time: float | None = None
|
|
32
|
+
response_id: str | None = None
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
async for event in stream:
|
|
36
|
+
log_debug(
|
|
37
|
+
f"[{event.type}]",
|
|
38
|
+
event.model_dump_json(exclude_none=True),
|
|
39
|
+
style="blue",
|
|
40
|
+
debug_type=DebugType.LLM_STREAM,
|
|
41
|
+
)
|
|
42
|
+
match event:
|
|
43
|
+
case responses.ResponseCreatedEvent() as event:
|
|
44
|
+
response_id = event.response.id
|
|
45
|
+
yield model.StartItem(response_id=response_id)
|
|
46
|
+
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
47
|
+
if event.text:
|
|
48
|
+
yield model.ReasoningTextItem(
|
|
49
|
+
content=event.text,
|
|
50
|
+
response_id=response_id,
|
|
51
|
+
model=str(param.model),
|
|
52
|
+
)
|
|
53
|
+
case responses.ResponseTextDeltaEvent() as event:
|
|
54
|
+
if first_token_time is None:
|
|
55
|
+
first_token_time = time.time()
|
|
56
|
+
last_token_time = time.time()
|
|
57
|
+
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
58
|
+
case responses.ResponseOutputItemAddedEvent() as event:
|
|
59
|
+
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
60
|
+
yield model.ToolCallStartItem(
|
|
61
|
+
response_id=response_id,
|
|
62
|
+
call_id=event.item.call_id,
|
|
63
|
+
name=event.item.name,
|
|
64
|
+
)
|
|
65
|
+
case responses.ResponseOutputItemDoneEvent() as event:
|
|
66
|
+
match event.item:
|
|
67
|
+
case responses.ResponseReasoningItem() as item:
|
|
68
|
+
if item.encrypted_content:
|
|
69
|
+
yield model.ReasoningEncryptedItem(
|
|
70
|
+
id=item.id,
|
|
71
|
+
encrypted_content=item.encrypted_content,
|
|
72
|
+
response_id=response_id,
|
|
73
|
+
model=str(param.model),
|
|
74
|
+
)
|
|
75
|
+
case responses.ResponseOutputMessage() as item:
|
|
76
|
+
yield model.AssistantMessageItem(
|
|
77
|
+
content="\n".join(
|
|
78
|
+
[
|
|
79
|
+
part.text
|
|
80
|
+
for part in item.content
|
|
81
|
+
if isinstance(part, responses.ResponseOutputText)
|
|
82
|
+
]
|
|
83
|
+
),
|
|
84
|
+
id=item.id,
|
|
85
|
+
response_id=response_id,
|
|
86
|
+
)
|
|
87
|
+
case responses.ResponseFunctionToolCall() as item:
|
|
88
|
+
if first_token_time is None:
|
|
89
|
+
first_token_time = time.time()
|
|
90
|
+
last_token_time = time.time()
|
|
91
|
+
yield model.ToolCallItem(
|
|
92
|
+
name=item.name,
|
|
93
|
+
arguments=item.arguments.strip(),
|
|
94
|
+
call_id=item.call_id,
|
|
95
|
+
id=item.id,
|
|
96
|
+
response_id=response_id,
|
|
97
|
+
)
|
|
98
|
+
case _:
|
|
99
|
+
pass
|
|
100
|
+
case responses.ResponseCompletedEvent() as event:
|
|
101
|
+
usage: model.Usage | None = None
|
|
102
|
+
error_reason: str | None = None
|
|
103
|
+
if event.response.incomplete_details is not None:
|
|
104
|
+
error_reason = event.response.incomplete_details.reason
|
|
105
|
+
if event.response.usage is not None:
|
|
106
|
+
total_tokens = event.response.usage.total_tokens
|
|
107
|
+
context_usage_percent = (
|
|
108
|
+
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
throughput_tps: float | None = None
|
|
112
|
+
first_token_latency_ms: float | None = None
|
|
113
|
+
|
|
114
|
+
if first_token_time is not None:
|
|
115
|
+
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
116
|
+
|
|
117
|
+
if (
|
|
118
|
+
first_token_time is not None
|
|
119
|
+
and last_token_time is not None
|
|
120
|
+
and event.response.usage.output_tokens > 0
|
|
121
|
+
):
|
|
122
|
+
time_duration = last_token_time - first_token_time
|
|
123
|
+
if time_duration >= 0.15:
|
|
124
|
+
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
125
|
+
|
|
126
|
+
usage = model.Usage(
|
|
127
|
+
input_tokens=event.response.usage.input_tokens,
|
|
128
|
+
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
129
|
+
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
130
|
+
output_tokens=event.response.usage.output_tokens,
|
|
131
|
+
total_tokens=total_tokens,
|
|
132
|
+
context_usage_percent=context_usage_percent,
|
|
133
|
+
throughput_tps=throughput_tps,
|
|
134
|
+
first_token_latency_ms=first_token_latency_ms,
|
|
135
|
+
)
|
|
136
|
+
calculate_cost(usage, cost_config)
|
|
137
|
+
yield model.ResponseMetadataItem(
|
|
138
|
+
usage=usage,
|
|
139
|
+
response_id=response_id,
|
|
140
|
+
model_name=str(param.model),
|
|
141
|
+
status=event.response.status,
|
|
142
|
+
error_reason=error_reason,
|
|
143
|
+
)
|
|
144
|
+
if event.response.status != "completed":
|
|
145
|
+
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
146
|
+
if error_reason:
|
|
147
|
+
error_message = f"{error_message}: {error_reason}"
|
|
148
|
+
log_debug(
|
|
149
|
+
"[LLM status warning]",
|
|
150
|
+
error_message,
|
|
151
|
+
style="red",
|
|
152
|
+
debug_type=DebugType.LLM_STREAM,
|
|
153
|
+
)
|
|
154
|
+
yield model.StreamErrorItem(error=error_message)
|
|
155
|
+
case _:
|
|
156
|
+
log_debug(
|
|
157
|
+
"[Unhandled stream event]",
|
|
158
|
+
str(event),
|
|
159
|
+
style="red",
|
|
160
|
+
debug_type=DebugType.LLM_STREAM,
|
|
161
|
+
)
|
|
162
|
+
except RateLimitError as e:
|
|
163
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
164
|
+
|
|
18
165
|
|
|
19
166
|
@register(llm_param.LLMClientProtocol.RESPONSES)
|
|
20
167
|
class ResponsesClient(LLMClientABC):
|
|
@@ -47,20 +194,15 @@ class ResponsesClient(LLMClientABC):
|
|
|
47
194
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
48
195
|
|
|
49
196
|
request_start_time = time.time()
|
|
50
|
-
first_token_time: float | None = None
|
|
51
|
-
last_token_time: float | None = None
|
|
52
|
-
response_id: str | None = None
|
|
53
197
|
|
|
54
198
|
inputs = convert_history_to_input(param.input, param.model)
|
|
55
199
|
tools = convert_tool_schema(param.tools)
|
|
56
200
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
stream = call_with_logged_payload(
|
|
201
|
+
stream = await call_with_logged_payload(
|
|
60
202
|
self.client.responses.create,
|
|
61
203
|
model=str(param.model),
|
|
62
204
|
tool_choice="auto",
|
|
63
|
-
parallel_tool_calls=
|
|
205
|
+
parallel_tool_calls=True,
|
|
64
206
|
include=[
|
|
65
207
|
"reasoning.encrypted_content",
|
|
66
208
|
],
|
|
@@ -75,6 +217,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
75
217
|
text={
|
|
76
218
|
"verbosity": param.verbosity,
|
|
77
219
|
},
|
|
220
|
+
prompt_cache_key=param.session_id or "",
|
|
78
221
|
reasoning={
|
|
79
222
|
"effort": param.thinking.reasoning_effort,
|
|
80
223
|
"summary": param.thinking.reasoning_summary,
|
|
@@ -84,133 +227,5 @@ class ResponsesClient(LLMClientABC):
|
|
|
84
227
|
extra_headers={"extra": json.dumps({"session_id": param.session_id})},
|
|
85
228
|
)
|
|
86
229
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
log_debug(
|
|
90
|
-
f"[{event.type}]",
|
|
91
|
-
event.model_dump_json(exclude_none=True),
|
|
92
|
-
style="blue",
|
|
93
|
-
debug_type=DebugType.LLM_STREAM,
|
|
94
|
-
)
|
|
95
|
-
match event:
|
|
96
|
-
case responses.ResponseCreatedEvent() as event:
|
|
97
|
-
response_id = event.response.id
|
|
98
|
-
yield model.StartItem(response_id=response_id)
|
|
99
|
-
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
100
|
-
if event.text:
|
|
101
|
-
yield model.ReasoningTextItem(
|
|
102
|
-
content=event.text,
|
|
103
|
-
response_id=response_id,
|
|
104
|
-
model=str(param.model),
|
|
105
|
-
)
|
|
106
|
-
case responses.ResponseTextDeltaEvent() as event:
|
|
107
|
-
if first_token_time is None:
|
|
108
|
-
first_token_time = time.time()
|
|
109
|
-
last_token_time = time.time()
|
|
110
|
-
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
111
|
-
case responses.ResponseOutputItemAddedEvent() as event:
|
|
112
|
-
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
113
|
-
yield model.ToolCallStartItem(
|
|
114
|
-
response_id=response_id,
|
|
115
|
-
call_id=event.item.call_id,
|
|
116
|
-
name=event.item.name,
|
|
117
|
-
)
|
|
118
|
-
case responses.ResponseOutputItemDoneEvent() as event:
|
|
119
|
-
match event.item:
|
|
120
|
-
case responses.ResponseReasoningItem() as item:
|
|
121
|
-
if item.encrypted_content:
|
|
122
|
-
yield model.ReasoningEncryptedItem(
|
|
123
|
-
id=item.id,
|
|
124
|
-
encrypted_content=item.encrypted_content,
|
|
125
|
-
response_id=response_id,
|
|
126
|
-
model=str(param.model),
|
|
127
|
-
)
|
|
128
|
-
case responses.ResponseOutputMessage() as item:
|
|
129
|
-
yield model.AssistantMessageItem(
|
|
130
|
-
content="\n".join(
|
|
131
|
-
[
|
|
132
|
-
part.text
|
|
133
|
-
for part in item.content
|
|
134
|
-
if isinstance(part, responses.ResponseOutputText)
|
|
135
|
-
]
|
|
136
|
-
),
|
|
137
|
-
id=item.id,
|
|
138
|
-
response_id=response_id,
|
|
139
|
-
)
|
|
140
|
-
case responses.ResponseFunctionToolCall() as item:
|
|
141
|
-
if first_token_time is None:
|
|
142
|
-
first_token_time = time.time()
|
|
143
|
-
last_token_time = time.time()
|
|
144
|
-
yield model.ToolCallItem(
|
|
145
|
-
name=item.name,
|
|
146
|
-
arguments=item.arguments.strip(),
|
|
147
|
-
call_id=item.call_id,
|
|
148
|
-
id=item.id,
|
|
149
|
-
response_id=response_id,
|
|
150
|
-
)
|
|
151
|
-
case _:
|
|
152
|
-
pass
|
|
153
|
-
case responses.ResponseCompletedEvent() as event:
|
|
154
|
-
usage: model.Usage | None = None
|
|
155
|
-
error_reason: str | None = None
|
|
156
|
-
if event.response.incomplete_details is not None:
|
|
157
|
-
error_reason = event.response.incomplete_details.reason
|
|
158
|
-
if event.response.usage is not None:
|
|
159
|
-
total_tokens = event.response.usage.total_tokens
|
|
160
|
-
context_usage_percent = (
|
|
161
|
-
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
throughput_tps: float | None = None
|
|
165
|
-
first_token_latency_ms: float | None = None
|
|
166
|
-
|
|
167
|
-
if first_token_time is not None:
|
|
168
|
-
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
169
|
-
|
|
170
|
-
if (
|
|
171
|
-
first_token_time is not None
|
|
172
|
-
and last_token_time is not None
|
|
173
|
-
and event.response.usage.output_tokens > 0
|
|
174
|
-
):
|
|
175
|
-
time_duration = last_token_time - first_token_time
|
|
176
|
-
if time_duration >= 0.15:
|
|
177
|
-
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
178
|
-
|
|
179
|
-
usage = model.Usage(
|
|
180
|
-
input_tokens=event.response.usage.input_tokens,
|
|
181
|
-
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
182
|
-
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
183
|
-
output_tokens=event.response.usage.output_tokens,
|
|
184
|
-
total_tokens=total_tokens,
|
|
185
|
-
context_usage_percent=context_usage_percent,
|
|
186
|
-
throughput_tps=throughput_tps,
|
|
187
|
-
first_token_latency_ms=first_token_latency_ms,
|
|
188
|
-
)
|
|
189
|
-
calculate_cost(usage, self._config.cost)
|
|
190
|
-
yield model.ResponseMetadataItem(
|
|
191
|
-
usage=usage,
|
|
192
|
-
response_id=response_id,
|
|
193
|
-
model_name=str(param.model),
|
|
194
|
-
status=event.response.status,
|
|
195
|
-
error_reason=error_reason,
|
|
196
|
-
)
|
|
197
|
-
if event.response.status != "completed":
|
|
198
|
-
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
199
|
-
if error_reason:
|
|
200
|
-
error_message = f"{error_message}: {error_reason}"
|
|
201
|
-
log_debug(
|
|
202
|
-
"[LLM status warning]",
|
|
203
|
-
error_message,
|
|
204
|
-
style="red",
|
|
205
|
-
debug_type=DebugType.LLM_STREAM,
|
|
206
|
-
)
|
|
207
|
-
yield model.StreamErrorItem(error=error_message)
|
|
208
|
-
case _:
|
|
209
|
-
log_debug(
|
|
210
|
-
"[Unhandled stream event]",
|
|
211
|
-
str(event),
|
|
212
|
-
style="red",
|
|
213
|
-
debug_type=DebugType.LLM_STREAM,
|
|
214
|
-
)
|
|
215
|
-
except RateLimitError as e:
|
|
216
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
230
|
+
async for item in parse_responses_stream(stream, param, self._config.cost, request_start_time):
|
|
231
|
+
yield item
|
klaude_code/llm/usage.py
CHANGED
|
@@ -14,6 +14,9 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
|
|
|
14
14
|
if cost_config is None:
|
|
15
15
|
return
|
|
16
16
|
|
|
17
|
+
# Set currency
|
|
18
|
+
usage.currency = cost_config.currency
|
|
19
|
+
|
|
17
20
|
# Non-cached input tokens cost
|
|
18
21
|
non_cached_input = usage.input_tokens - usage.cached_tokens
|
|
19
22
|
usage.input_cost = (non_cached_input / 1_000_000) * cost_config.input
|
|
@@ -12,6 +12,7 @@ class LLMClientProtocol(Enum):
|
|
|
12
12
|
RESPONSES = "responses"
|
|
13
13
|
OPENROUTER = "openrouter"
|
|
14
14
|
ANTHROPIC = "anthropic"
|
|
15
|
+
CODEX = "codex"
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class ToolSchema(BaseModel):
|
|
@@ -36,12 +37,13 @@ class Thinking(BaseModel):
|
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class Cost(BaseModel):
|
|
39
|
-
"""Cost configuration per million tokens
|
|
40
|
+
"""Cost configuration per million tokens."""
|
|
40
41
|
|
|
41
42
|
input: float # Input token price per million tokens
|
|
42
43
|
output: float # Output token price per million tokens
|
|
43
44
|
cache_read: float = 0.0 # Cache read price per million tokens
|
|
44
45
|
cache_write: float = 0.0 # Cache write price per million tokens (ignored in calculation for now)
|
|
46
|
+
currency: Literal["USD", "CNY"] = "USD" # Currency for cost display
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
class OpenRouterProviderRouting(BaseModel):
|
klaude_code/protocol/model.py
CHANGED
|
@@ -21,11 +21,12 @@ class Usage(BaseModel):
|
|
|
21
21
|
throughput_tps: float | None = None
|
|
22
22
|
first_token_latency_ms: float | None = None
|
|
23
23
|
|
|
24
|
-
# Cost
|
|
24
|
+
# Cost (calculated from token counts and cost config)
|
|
25
25
|
input_cost: float | None = None # Cost for non-cached input tokens
|
|
26
26
|
output_cost: float | None = None # Cost for output tokens (including reasoning)
|
|
27
27
|
cache_read_cost: float | None = None # Cost for cached tokens
|
|
28
28
|
total_cost: float | None = None # Total cost (input + output + cache_read)
|
|
29
|
+
currency: str = "USD" # Currency for cost display (USD or CNY)
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class TodoItem(BaseModel):
|
|
@@ -245,7 +245,8 @@ register_sub_agent(
|
|
|
245
245
|
EXPLORE_DESCRIPTION = """\
|
|
246
246
|
Spin up a fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. "src/components/**/*.tsx"), \
|
|
247
247
|
search code for keywords (eg. "API endpoints"), or answer questions about the codebase (eg. "how do API endpoints work?")\
|
|
248
|
-
When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "very thorough" for comprehensive analysis across multiple locations and naming conventions
|
|
248
|
+
When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "very thorough" for comprehensive analysis across multiple locations and naming conventions.
|
|
249
|
+
Always spawn multiple search agents in parallel to maximise speed.\
|
|
249
250
|
"""
|
|
250
251
|
|
|
251
252
|
EXPLORE_PARAMETERS = {
|
klaude_code/session/export.py
CHANGED
|
@@ -154,8 +154,9 @@ def _format_token_count(count: int) -> str:
|
|
|
154
154
|
return f"{m}M" if rem == 0 else f"{m}M{rem}k"
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
def _format_cost(cost: float) -> str:
|
|
158
|
-
|
|
157
|
+
def _format_cost(cost: float, currency: str = "USD") -> str:
|
|
158
|
+
symbol = "¥" if currency == "CNY" else "$"
|
|
159
|
+
return f"{symbol}{cost:.4f}"
|
|
159
160
|
|
|
160
161
|
|
|
161
162
|
def _render_metadata_item(item: model.ResponseMetadataItem) -> str:
|
|
@@ -175,26 +176,24 @@ def _render_metadata_item(item: model.ResponseMetadataItem) -> str:
|
|
|
175
176
|
# Input with cost
|
|
176
177
|
input_stat = f"input: {_format_token_count(u.input_tokens)}"
|
|
177
178
|
if u.input_cost is not None:
|
|
178
|
-
input_stat += f"({_format_cost(u.input_cost)})"
|
|
179
|
+
input_stat += f"({_format_cost(u.input_cost, u.currency)})"
|
|
179
180
|
parts.append(f'<span class="metadata-stat">{input_stat}</span>')
|
|
180
181
|
|
|
181
182
|
# Cached with cost
|
|
182
183
|
if u.cached_tokens > 0:
|
|
183
184
|
cached_stat = f"cached: {_format_token_count(u.cached_tokens)}"
|
|
184
185
|
if u.cache_read_cost is not None:
|
|
185
|
-
cached_stat += f"({_format_cost(u.cache_read_cost)})"
|
|
186
|
+
cached_stat += f"({_format_cost(u.cache_read_cost, u.currency)})"
|
|
186
187
|
parts.append(f'<span class="metadata-stat">{cached_stat}</span>')
|
|
187
188
|
|
|
188
189
|
# Output with cost
|
|
189
190
|
output_stat = f"output: {_format_token_count(u.output_tokens)}"
|
|
190
191
|
if u.output_cost is not None:
|
|
191
|
-
output_stat += f"({_format_cost(u.output_cost)})"
|
|
192
|
+
output_stat += f"({_format_cost(u.output_cost, u.currency)})"
|
|
192
193
|
parts.append(f'<span class="metadata-stat">{output_stat}</span>')
|
|
193
194
|
|
|
194
195
|
if u.reasoning_tokens > 0:
|
|
195
|
-
parts.append(
|
|
196
|
-
f'<span class="metadata-stat">thinking: {_format_token_count(u.reasoning_tokens)}</span>'
|
|
197
|
-
)
|
|
196
|
+
parts.append(f'<span class="metadata-stat">thinking: {_format_token_count(u.reasoning_tokens)}</span>')
|
|
198
197
|
if u.context_usage_percent is not None:
|
|
199
198
|
parts.append(f'<span class="metadata-stat">context: {u.context_usage_percent:.1f}%</span>')
|
|
200
199
|
if u.throughput_tps is not None:
|
|
@@ -205,16 +204,12 @@ def _render_metadata_item(item: model.ResponseMetadataItem) -> str:
|
|
|
205
204
|
|
|
206
205
|
# Total cost
|
|
207
206
|
if item.usage is not None and item.usage.total_cost is not None:
|
|
208
|
-
parts.append(f'<span class="metadata-stat">cost: {_format_cost(item.usage.total_cost)}</span>')
|
|
207
|
+
parts.append(f'<span class="metadata-stat">cost: {_format_cost(item.usage.total_cost, item.usage.currency)}</span>')
|
|
209
208
|
|
|
210
209
|
divider = '<span class="metadata-divider">/</span>'
|
|
211
210
|
joined_html = divider.join(parts)
|
|
212
211
|
|
|
213
|
-
return
|
|
214
|
-
f'<div class="response-metadata">'
|
|
215
|
-
f'<div class="metadata-line">{joined_html}</div>'
|
|
216
|
-
f"</div>"
|
|
217
|
-
)
|
|
212
|
+
return f'<div class="response-metadata"><div class="metadata-line">{joined_html}</div></div>'
|
|
218
213
|
|
|
219
214
|
|
|
220
215
|
def _render_assistant_message(index: int, content: str, timestamp: datetime) -> str:
|
|
@@ -1077,6 +1077,11 @@
|
|
|
1077
1077
|
});
|
|
1078
1078
|
</script>
|
|
1079
1079
|
<script>
|
|
1080
|
+
// Trim whitespace from pre-wrap content to avoid formatting artifacts
|
|
1081
|
+
document.querySelectorAll(".system-prompt-content").forEach((el) => {
|
|
1082
|
+
el.textContent = el.textContent.trim();
|
|
1083
|
+
});
|
|
1084
|
+
|
|
1080
1085
|
// Markdown rendering and Syntax Highlighting
|
|
1081
1086
|
document.querySelectorAll(".markdown-content").forEach((el) => {
|
|
1082
1087
|
const raw = el.dataset.raw;
|
|
@@ -27,8 +27,11 @@ from prompt_toolkit.formatted_text import HTML
|
|
|
27
27
|
|
|
28
28
|
from klaude_code.command import get_commands
|
|
29
29
|
|
|
30
|
-
# Pattern to match @token for completion refresh (used by key bindings)
|
|
31
|
-
|
|
30
|
+
# Pattern to match @token for completion refresh (used by key bindings).
|
|
31
|
+
# Supports both plain tokens like `@src/file.py` and quoted tokens like
|
|
32
|
+
# `@"path with spaces/file.py"` so that filenames with spaces remain a
|
|
33
|
+
# single logical token.
|
|
34
|
+
AT_TOKEN_PATTERN = re.compile(r'(^|\s)@(?P<frag>"[^"]*"|[^\s]*)$')
|
|
32
35
|
|
|
33
36
|
|
|
34
37
|
def create_repl_completer() -> Completer:
|
|
@@ -182,31 +185,48 @@ class _AtFilesCompleter(Completer):
|
|
|
182
185
|
if not m:
|
|
183
186
|
return [] # type: ignore[reportUnknownVariableType]
|
|
184
187
|
|
|
185
|
-
frag = m.group("frag") # text after '@' and before cursor (
|
|
188
|
+
frag = m.group("frag") # raw text after '@' and before cursor (may be quoted)
|
|
189
|
+
# Normalize fragment for search: support optional quoting syntax @"...".
|
|
190
|
+
is_quoted = frag.startswith('"')
|
|
191
|
+
search_frag = frag
|
|
192
|
+
if is_quoted:
|
|
193
|
+
# Drop leading quote; if user already closed the quote, drop trailing quote as well.
|
|
194
|
+
search_frag = search_frag[1:]
|
|
195
|
+
if search_frag.endswith('"'):
|
|
196
|
+
search_frag = search_frag[:-1]
|
|
197
|
+
|
|
186
198
|
token_start_in_input = len(text_before) - len(f"@{frag}")
|
|
187
199
|
|
|
188
200
|
cwd = Path.cwd()
|
|
189
201
|
|
|
190
202
|
# If no fragment yet, show lightweight suggestions from current directory
|
|
191
|
-
if
|
|
203
|
+
if search_frag.strip() == "":
|
|
192
204
|
suggestions = self._suggest_for_empty_fragment(cwd)
|
|
193
205
|
if not suggestions:
|
|
194
206
|
return [] # type: ignore[reportUnknownVariableType]
|
|
195
207
|
start_position = token_start_in_input - len(text_before)
|
|
196
208
|
for s in suggestions[: self._max_results]:
|
|
197
|
-
yield Completion(
|
|
209
|
+
yield Completion(
|
|
210
|
+
text=self._format_completion_text(s, is_quoted=is_quoted),
|
|
211
|
+
start_position=start_position,
|
|
212
|
+
display=s,
|
|
213
|
+
)
|
|
198
214
|
return [] # type: ignore[reportUnknownVariableType]
|
|
199
215
|
|
|
200
216
|
# Gather suggestions with debounce/caching based on search keyword
|
|
201
|
-
suggestions = self._complete_paths(cwd,
|
|
217
|
+
suggestions = self._complete_paths(cwd, search_frag)
|
|
202
218
|
if not suggestions:
|
|
203
219
|
return [] # type: ignore[reportUnknownVariableType]
|
|
204
220
|
|
|
205
221
|
# Prepare Completion objects. Replace from the '@' character.
|
|
206
222
|
start_position = token_start_in_input - len(text_before) # negative
|
|
207
223
|
for s in suggestions[: self._max_results]:
|
|
208
|
-
# Insert
|
|
209
|
-
yield Completion(
|
|
224
|
+
# Insert formatted text (with quoting when needed) so that subsequent typing does not keep triggering
|
|
225
|
+
yield Completion(
|
|
226
|
+
text=self._format_completion_text(s, is_quoted=is_quoted),
|
|
227
|
+
start_position=start_position,
|
|
228
|
+
display=s,
|
|
229
|
+
)
|
|
210
230
|
|
|
211
231
|
# ---- Core logic ----
|
|
212
232
|
def _complete_paths(self, cwd: Path, keyword: str) -> list[str]:
|
|
@@ -318,6 +338,19 @@ class _AtFilesCompleter(Completer):
|
|
|
318
338
|
uniq.append(s)
|
|
319
339
|
return uniq
|
|
320
340
|
|
|
341
|
+
def _format_completion_text(self, suggestion: str, *, is_quoted: bool) -> str:
|
|
342
|
+
"""Format completion insertion text for a given suggestion.
|
|
343
|
+
|
|
344
|
+
Paths that contain whitespace are always wrapped in quotes so that they
|
|
345
|
+
can be parsed correctly by the @-file reader. If the user explicitly
|
|
346
|
+
started a quoted token (e.g. @"foo), we preserve quoting even when the
|
|
347
|
+
suggested path itself does not contain spaces.
|
|
348
|
+
"""
|
|
349
|
+
needs_quotes = any(ch.isspace() for ch in suggestion)
|
|
350
|
+
if needs_quotes or is_quoted:
|
|
351
|
+
return f'@"{suggestion}" '
|
|
352
|
+
return f"@{suggestion} "
|
|
353
|
+
|
|
321
354
|
def _same_scope(self, prev_key: str, cur_key: str) -> bool:
|
|
322
355
|
# Consider same scope if they share the same base directory and one prefix startswith the other
|
|
323
356
|
try:
|