klaude-code 1.2.6__py3-none-any.whl → 1.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. klaude_code/auth/__init__.py +24 -0
  2. klaude_code/auth/codex/__init__.py +20 -0
  3. klaude_code/auth/codex/exceptions.py +17 -0
  4. klaude_code/auth/codex/jwt_utils.py +45 -0
  5. klaude_code/auth/codex/oauth.py +229 -0
  6. klaude_code/auth/codex/token_manager.py +84 -0
  7. klaude_code/cli/main.py +63 -0
  8. klaude_code/command/status_cmd.py +13 -5
  9. klaude_code/config/list_model.py +53 -0
  10. klaude_code/core/prompt.py +10 -14
  11. klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
  12. klaude_code/core/prompts/prompt-subagent-explore.md +3 -1
  13. klaude_code/core/reminders.py +14 -5
  14. klaude_code/core/task.py +1 -0
  15. klaude_code/core/tool/truncation.py +4 -0
  16. klaude_code/llm/__init__.py +2 -0
  17. klaude_code/llm/anthropic/input.py +25 -10
  18. klaude_code/llm/codex/__init__.py +5 -0
  19. klaude_code/llm/codex/client.py +116 -0
  20. klaude_code/llm/responses/client.py +153 -138
  21. klaude_code/llm/usage.py +3 -0
  22. klaude_code/protocol/llm_param.py +3 -1
  23. klaude_code/protocol/model.py +2 -1
  24. klaude_code/protocol/sub_agent.py +2 -1
  25. klaude_code/session/export.py +9 -14
  26. klaude_code/session/templates/export_session.html +5 -0
  27. klaude_code/ui/modes/repl/completers.py +41 -8
  28. klaude_code/ui/modes/repl/event_handler.py +15 -23
  29. klaude_code/ui/renderers/developer.py +9 -8
  30. klaude_code/ui/renderers/metadata.py +9 -5
  31. klaude_code/ui/renderers/user_input.py +23 -10
  32. klaude_code/ui/rich/theme.py +2 -0
  33. {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/METADATA +1 -1
  34. {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/RECORD +37 -28
  35. /klaude_code/core/prompts/{prompt-codex.md → prompt-codex-gpt-5-1.md} +0 -0
  36. {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/WHEEL +0 -0
  37. {klaude_code-1.2.6.dist-info → klaude_code-1.2.7.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import time
3
3
  from collections.abc import AsyncGenerator
4
- from typing import override
4
+ from typing import TYPE_CHECKING, override
5
5
 
6
6
  import httpx
7
7
  from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError
@@ -15,6 +15,153 @@ from klaude_code.llm.usage import calculate_cost
15
15
  from klaude_code.protocol import llm_param, model
16
16
  from klaude_code.trace import DebugType, log_debug
17
17
 
18
+ if TYPE_CHECKING:
19
+ from openai import AsyncStream
20
+ from openai.types.responses import ResponseStreamEvent
21
+
22
+
23
+ async def parse_responses_stream(
24
+ stream: "AsyncStream[ResponseStreamEvent]",
25
+ param: llm_param.LLMCallParameter,
26
+ cost_config: llm_param.Cost | None,
27
+ request_start_time: float,
28
+ ) -> AsyncGenerator[model.ConversationItem, None]:
29
+ """Parse OpenAI Responses API stream events into ConversationItems."""
30
+ first_token_time: float | None = None
31
+ last_token_time: float | None = None
32
+ response_id: str | None = None
33
+
34
+ try:
35
+ async for event in stream:
36
+ log_debug(
37
+ f"[{event.type}]",
38
+ event.model_dump_json(exclude_none=True),
39
+ style="blue",
40
+ debug_type=DebugType.LLM_STREAM,
41
+ )
42
+ match event:
43
+ case responses.ResponseCreatedEvent() as event:
44
+ response_id = event.response.id
45
+ yield model.StartItem(response_id=response_id)
46
+ case responses.ResponseReasoningSummaryTextDoneEvent() as event:
47
+ if event.text:
48
+ yield model.ReasoningTextItem(
49
+ content=event.text,
50
+ response_id=response_id,
51
+ model=str(param.model),
52
+ )
53
+ case responses.ResponseTextDeltaEvent() as event:
54
+ if first_token_time is None:
55
+ first_token_time = time.time()
56
+ last_token_time = time.time()
57
+ yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
58
+ case responses.ResponseOutputItemAddedEvent() as event:
59
+ if isinstance(event.item, responses.ResponseFunctionToolCall):
60
+ yield model.ToolCallStartItem(
61
+ response_id=response_id,
62
+ call_id=event.item.call_id,
63
+ name=event.item.name,
64
+ )
65
+ case responses.ResponseOutputItemDoneEvent() as event:
66
+ match event.item:
67
+ case responses.ResponseReasoningItem() as item:
68
+ if item.encrypted_content:
69
+ yield model.ReasoningEncryptedItem(
70
+ id=item.id,
71
+ encrypted_content=item.encrypted_content,
72
+ response_id=response_id,
73
+ model=str(param.model),
74
+ )
75
+ case responses.ResponseOutputMessage() as item:
76
+ yield model.AssistantMessageItem(
77
+ content="\n".join(
78
+ [
79
+ part.text
80
+ for part in item.content
81
+ if isinstance(part, responses.ResponseOutputText)
82
+ ]
83
+ ),
84
+ id=item.id,
85
+ response_id=response_id,
86
+ )
87
+ case responses.ResponseFunctionToolCall() as item:
88
+ if first_token_time is None:
89
+ first_token_time = time.time()
90
+ last_token_time = time.time()
91
+ yield model.ToolCallItem(
92
+ name=item.name,
93
+ arguments=item.arguments.strip(),
94
+ call_id=item.call_id,
95
+ id=item.id,
96
+ response_id=response_id,
97
+ )
98
+ case _:
99
+ pass
100
+ case responses.ResponseCompletedEvent() as event:
101
+ usage: model.Usage | None = None
102
+ error_reason: str | None = None
103
+ if event.response.incomplete_details is not None:
104
+ error_reason = event.response.incomplete_details.reason
105
+ if event.response.usage is not None:
106
+ total_tokens = event.response.usage.total_tokens
107
+ context_usage_percent = (
108
+ (total_tokens / param.context_limit) * 100 if param.context_limit else None
109
+ )
110
+
111
+ throughput_tps: float | None = None
112
+ first_token_latency_ms: float | None = None
113
+
114
+ if first_token_time is not None:
115
+ first_token_latency_ms = (first_token_time - request_start_time) * 1000
116
+
117
+ if (
118
+ first_token_time is not None
119
+ and last_token_time is not None
120
+ and event.response.usage.output_tokens > 0
121
+ ):
122
+ time_duration = last_token_time - first_token_time
123
+ if time_duration >= 0.15:
124
+ throughput_tps = event.response.usage.output_tokens / time_duration
125
+
126
+ usage = model.Usage(
127
+ input_tokens=event.response.usage.input_tokens,
128
+ cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
129
+ reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
130
+ output_tokens=event.response.usage.output_tokens,
131
+ total_tokens=total_tokens,
132
+ context_usage_percent=context_usage_percent,
133
+ throughput_tps=throughput_tps,
134
+ first_token_latency_ms=first_token_latency_ms,
135
+ )
136
+ calculate_cost(usage, cost_config)
137
+ yield model.ResponseMetadataItem(
138
+ usage=usage,
139
+ response_id=response_id,
140
+ model_name=str(param.model),
141
+ status=event.response.status,
142
+ error_reason=error_reason,
143
+ )
144
+ if event.response.status != "completed":
145
+ error_message = f"LLM response finished with status '{event.response.status}'"
146
+ if error_reason:
147
+ error_message = f"{error_message}: {error_reason}"
148
+ log_debug(
149
+ "[LLM status warning]",
150
+ error_message,
151
+ style="red",
152
+ debug_type=DebugType.LLM_STREAM,
153
+ )
154
+ yield model.StreamErrorItem(error=error_message)
155
+ case _:
156
+ log_debug(
157
+ "[Unhandled stream event]",
158
+ str(event),
159
+ style="red",
160
+ debug_type=DebugType.LLM_STREAM,
161
+ )
162
+ except RateLimitError as e:
163
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
164
+
18
165
 
19
166
  @register(llm_param.LLMClientProtocol.RESPONSES)
20
167
  class ResponsesClient(LLMClientABC):
@@ -47,20 +194,15 @@ class ResponsesClient(LLMClientABC):
47
194
  param = apply_config_defaults(param, self.get_llm_config())
48
195
 
49
196
  request_start_time = time.time()
50
- first_token_time: float | None = None
51
- last_token_time: float | None = None
52
- response_id: str | None = None
53
197
 
54
198
  inputs = convert_history_to_input(param.input, param.model)
55
199
  tools = convert_tool_schema(param.tools)
56
200
 
57
- parallel_tool_calls = True
58
-
59
- stream = call_with_logged_payload(
201
+ stream = await call_with_logged_payload(
60
202
  self.client.responses.create,
61
203
  model=str(param.model),
62
204
  tool_choice="auto",
63
- parallel_tool_calls=parallel_tool_calls, # OpenAI's Codex is always False, we try to enable it here. It seems gpt-5-codex has bugs when parallel_tool_calls is True.
205
+ parallel_tool_calls=True,
64
206
  include=[
65
207
  "reasoning.encrypted_content",
66
208
  ],
@@ -75,6 +217,7 @@ class ResponsesClient(LLMClientABC):
75
217
  text={
76
218
  "verbosity": param.verbosity,
77
219
  },
220
+ prompt_cache_key=param.session_id or "",
78
221
  reasoning={
79
222
  "effort": param.thinking.reasoning_effort,
80
223
  "summary": param.thinking.reasoning_summary,
@@ -84,133 +227,5 @@ class ResponsesClient(LLMClientABC):
84
227
  extra_headers={"extra": json.dumps({"session_id": param.session_id})},
85
228
  )
86
229
 
87
- try:
88
- async for event in await stream:
89
- log_debug(
90
- f"[{event.type}]",
91
- event.model_dump_json(exclude_none=True),
92
- style="blue",
93
- debug_type=DebugType.LLM_STREAM,
94
- )
95
- match event:
96
- case responses.ResponseCreatedEvent() as event:
97
- response_id = event.response.id
98
- yield model.StartItem(response_id=response_id)
99
- case responses.ResponseReasoningSummaryTextDoneEvent() as event:
100
- if event.text:
101
- yield model.ReasoningTextItem(
102
- content=event.text,
103
- response_id=response_id,
104
- model=str(param.model),
105
- )
106
- case responses.ResponseTextDeltaEvent() as event:
107
- if first_token_time is None:
108
- first_token_time = time.time()
109
- last_token_time = time.time()
110
- yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
111
- case responses.ResponseOutputItemAddedEvent() as event:
112
- if isinstance(event.item, responses.ResponseFunctionToolCall):
113
- yield model.ToolCallStartItem(
114
- response_id=response_id,
115
- call_id=event.item.call_id,
116
- name=event.item.name,
117
- )
118
- case responses.ResponseOutputItemDoneEvent() as event:
119
- match event.item:
120
- case responses.ResponseReasoningItem() as item:
121
- if item.encrypted_content:
122
- yield model.ReasoningEncryptedItem(
123
- id=item.id,
124
- encrypted_content=item.encrypted_content,
125
- response_id=response_id,
126
- model=str(param.model),
127
- )
128
- case responses.ResponseOutputMessage() as item:
129
- yield model.AssistantMessageItem(
130
- content="\n".join(
131
- [
132
- part.text
133
- for part in item.content
134
- if isinstance(part, responses.ResponseOutputText)
135
- ]
136
- ),
137
- id=item.id,
138
- response_id=response_id,
139
- )
140
- case responses.ResponseFunctionToolCall() as item:
141
- if first_token_time is None:
142
- first_token_time = time.time()
143
- last_token_time = time.time()
144
- yield model.ToolCallItem(
145
- name=item.name,
146
- arguments=item.arguments.strip(),
147
- call_id=item.call_id,
148
- id=item.id,
149
- response_id=response_id,
150
- )
151
- case _:
152
- pass
153
- case responses.ResponseCompletedEvent() as event:
154
- usage: model.Usage | None = None
155
- error_reason: str | None = None
156
- if event.response.incomplete_details is not None:
157
- error_reason = event.response.incomplete_details.reason
158
- if event.response.usage is not None:
159
- total_tokens = event.response.usage.total_tokens
160
- context_usage_percent = (
161
- (total_tokens / param.context_limit) * 100 if param.context_limit else None
162
- )
163
-
164
- throughput_tps: float | None = None
165
- first_token_latency_ms: float | None = None
166
-
167
- if first_token_time is not None:
168
- first_token_latency_ms = (first_token_time - request_start_time) * 1000
169
-
170
- if (
171
- first_token_time is not None
172
- and last_token_time is not None
173
- and event.response.usage.output_tokens > 0
174
- ):
175
- time_duration = last_token_time - first_token_time
176
- if time_duration >= 0.15:
177
- throughput_tps = event.response.usage.output_tokens / time_duration
178
-
179
- usage = model.Usage(
180
- input_tokens=event.response.usage.input_tokens,
181
- cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
182
- reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
183
- output_tokens=event.response.usage.output_tokens,
184
- total_tokens=total_tokens,
185
- context_usage_percent=context_usage_percent,
186
- throughput_tps=throughput_tps,
187
- first_token_latency_ms=first_token_latency_ms,
188
- )
189
- calculate_cost(usage, self._config.cost)
190
- yield model.ResponseMetadataItem(
191
- usage=usage,
192
- response_id=response_id,
193
- model_name=str(param.model),
194
- status=event.response.status,
195
- error_reason=error_reason,
196
- )
197
- if event.response.status != "completed":
198
- error_message = f"LLM response finished with status '{event.response.status}'"
199
- if error_reason:
200
- error_message = f"{error_message}: {error_reason}"
201
- log_debug(
202
- "[LLM status warning]",
203
- error_message,
204
- style="red",
205
- debug_type=DebugType.LLM_STREAM,
206
- )
207
- yield model.StreamErrorItem(error=error_message)
208
- case _:
209
- log_debug(
210
- "[Unhandled stream event]",
211
- str(event),
212
- style="red",
213
- debug_type=DebugType.LLM_STREAM,
214
- )
215
- except RateLimitError as e:
216
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
230
+ async for item in parse_responses_stream(stream, param, self._config.cost, request_start_time):
231
+ yield item
klaude_code/llm/usage.py CHANGED
@@ -14,6 +14,9 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
14
14
  if cost_config is None:
15
15
  return
16
16
 
17
+ # Set currency
18
+ usage.currency = cost_config.currency
19
+
17
20
  # Non-cached input tokens cost
18
21
  non_cached_input = usage.input_tokens - usage.cached_tokens
19
22
  usage.input_cost = (non_cached_input / 1_000_000) * cost_config.input
@@ -12,6 +12,7 @@ class LLMClientProtocol(Enum):
12
12
  RESPONSES = "responses"
13
13
  OPENROUTER = "openrouter"
14
14
  ANTHROPIC = "anthropic"
15
+ CODEX = "codex"
15
16
 
16
17
 
17
18
  class ToolSchema(BaseModel):
@@ -36,12 +37,13 @@ class Thinking(BaseModel):
36
37
 
37
38
 
38
39
  class Cost(BaseModel):
39
- """Cost configuration per million tokens (USD)."""
40
+ """Cost configuration per million tokens."""
40
41
 
41
42
  input: float # Input token price per million tokens
42
43
  output: float # Output token price per million tokens
43
44
  cache_read: float = 0.0 # Cache read price per million tokens
44
45
  cache_write: float = 0.0 # Cache write price per million tokens (ignored in calculation for now)
46
+ currency: Literal["USD", "CNY"] = "USD" # Currency for cost display
45
47
 
46
48
 
47
49
  class OpenRouterProviderRouting(BaseModel):
@@ -21,11 +21,12 @@ class Usage(BaseModel):
21
21
  throughput_tps: float | None = None
22
22
  first_token_latency_ms: float | None = None
23
23
 
24
- # Cost in USD (calculated from token counts and cost config)
24
+ # Cost (calculated from token counts and cost config)
25
25
  input_cost: float | None = None # Cost for non-cached input tokens
26
26
  output_cost: float | None = None # Cost for output tokens (including reasoning)
27
27
  cache_read_cost: float | None = None # Cost for cached tokens
28
28
  total_cost: float | None = None # Total cost (input + output + cache_read)
29
+ currency: str = "USD" # Currency for cost display (USD or CNY)
29
30
 
30
31
 
31
32
  class TodoItem(BaseModel):
@@ -245,7 +245,8 @@ register_sub_agent(
245
245
  EXPLORE_DESCRIPTION = """\
246
246
  Spin up a fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. "src/components/**/*.tsx"), \
247
247
  search code for keywords (eg. "API endpoints"), or answer questions about the codebase (eg. "how do API endpoints work?")\
248
- When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "very thorough" for comprehensive analysis across multiple locations and naming conventions\
248
+ When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "very thorough" for comprehensive analysis across multiple locations and naming conventions.
249
+ Always spawn multiple search agents in parallel to maximise speed.\
249
250
  """
250
251
 
251
252
  EXPLORE_PARAMETERS = {
@@ -154,8 +154,9 @@ def _format_token_count(count: int) -> str:
154
154
  return f"{m}M" if rem == 0 else f"{m}M{rem}k"
155
155
 
156
156
 
157
- def _format_cost(cost: float) -> str:
158
- return f"${cost:.4f}"
157
+ def _format_cost(cost: float, currency: str = "USD") -> str:
158
+ symbol = "¥" if currency == "CNY" else "$"
159
+ return f"{symbol}{cost:.4f}"
159
160
 
160
161
 
161
162
  def _render_metadata_item(item: model.ResponseMetadataItem) -> str:
@@ -175,26 +176,24 @@ def _render_metadata_item(item: model.ResponseMetadataItem) -> str:
175
176
  # Input with cost
176
177
  input_stat = f"input: {_format_token_count(u.input_tokens)}"
177
178
  if u.input_cost is not None:
178
- input_stat += f"({_format_cost(u.input_cost)})"
179
+ input_stat += f"({_format_cost(u.input_cost, u.currency)})"
179
180
  parts.append(f'<span class="metadata-stat">{input_stat}</span>')
180
181
 
181
182
  # Cached with cost
182
183
  if u.cached_tokens > 0:
183
184
  cached_stat = f"cached: {_format_token_count(u.cached_tokens)}"
184
185
  if u.cache_read_cost is not None:
185
- cached_stat += f"({_format_cost(u.cache_read_cost)})"
186
+ cached_stat += f"({_format_cost(u.cache_read_cost, u.currency)})"
186
187
  parts.append(f'<span class="metadata-stat">{cached_stat}</span>')
187
188
 
188
189
  # Output with cost
189
190
  output_stat = f"output: {_format_token_count(u.output_tokens)}"
190
191
  if u.output_cost is not None:
191
- output_stat += f"({_format_cost(u.output_cost)})"
192
+ output_stat += f"({_format_cost(u.output_cost, u.currency)})"
192
193
  parts.append(f'<span class="metadata-stat">{output_stat}</span>')
193
194
 
194
195
  if u.reasoning_tokens > 0:
195
- parts.append(
196
- f'<span class="metadata-stat">thinking: {_format_token_count(u.reasoning_tokens)}</span>'
197
- )
196
+ parts.append(f'<span class="metadata-stat">thinking: {_format_token_count(u.reasoning_tokens)}</span>')
198
197
  if u.context_usage_percent is not None:
199
198
  parts.append(f'<span class="metadata-stat">context: {u.context_usage_percent:.1f}%</span>')
200
199
  if u.throughput_tps is not None:
@@ -205,16 +204,12 @@ def _render_metadata_item(item: model.ResponseMetadataItem) -> str:
205
204
 
206
205
  # Total cost
207
206
  if item.usage is not None and item.usage.total_cost is not None:
208
- parts.append(f'<span class="metadata-stat">cost: {_format_cost(item.usage.total_cost)}</span>')
207
+ parts.append(f'<span class="metadata-stat">cost: {_format_cost(item.usage.total_cost, item.usage.currency)}</span>')
209
208
 
210
209
  divider = '<span class="metadata-divider">/</span>'
211
210
  joined_html = divider.join(parts)
212
211
 
213
- return (
214
- f'<div class="response-metadata">'
215
- f'<div class="metadata-line">{joined_html}</div>'
216
- f"</div>"
217
- )
212
+ return f'<div class="response-metadata"><div class="metadata-line">{joined_html}</div></div>'
218
213
 
219
214
 
220
215
  def _render_assistant_message(index: int, content: str, timestamp: datetime) -> str:
@@ -1077,6 +1077,11 @@
1077
1077
  });
1078
1078
  </script>
1079
1079
  <script>
1080
+ // Trim whitespace from pre-wrap content to avoid formatting artifacts
1081
+ document.querySelectorAll(".system-prompt-content").forEach((el) => {
1082
+ el.textContent = el.textContent.trim();
1083
+ });
1084
+
1080
1085
  // Markdown rendering and Syntax Highlighting
1081
1086
  document.querySelectorAll(".markdown-content").forEach((el) => {
1082
1087
  const raw = el.dataset.raw;
@@ -27,8 +27,11 @@ from prompt_toolkit.formatted_text import HTML
27
27
 
28
28
  from klaude_code.command import get_commands
29
29
 
30
- # Pattern to match @token for completion refresh (used by key bindings)
31
- AT_TOKEN_PATTERN = re.compile(r"(^|\s)@(?P<frag>[^\s]*)$")
30
+ # Pattern to match @token for completion refresh (used by key bindings).
31
+ # Supports both plain tokens like `@src/file.py` and quoted tokens like
32
+ # `@"path with spaces/file.py"` so that filenames with spaces remain a
33
+ # single logical token.
34
+ AT_TOKEN_PATTERN = re.compile(r'(^|\s)@(?P<frag>"[^"]*"|[^\s]*)$')
32
35
 
33
36
 
34
37
  def create_repl_completer() -> Completer:
@@ -182,31 +185,48 @@ class _AtFilesCompleter(Completer):
182
185
  if not m:
183
186
  return [] # type: ignore[reportUnknownVariableType]
184
187
 
185
- frag = m.group("frag") # text after '@' and before cursor (no spaces)
188
+ frag = m.group("frag") # raw text after '@' and before cursor (may be quoted)
189
+ # Normalize fragment for search: support optional quoting syntax @"...".
190
+ is_quoted = frag.startswith('"')
191
+ search_frag = frag
192
+ if is_quoted:
193
+ # Drop leading quote; if user already closed the quote, drop trailing quote as well.
194
+ search_frag = search_frag[1:]
195
+ if search_frag.endswith('"'):
196
+ search_frag = search_frag[:-1]
197
+
186
198
  token_start_in_input = len(text_before) - len(f"@{frag}")
187
199
 
188
200
  cwd = Path.cwd()
189
201
 
190
202
  # If no fragment yet, show lightweight suggestions from current directory
191
- if frag.strip() == "":
203
+ if search_frag.strip() == "":
192
204
  suggestions = self._suggest_for_empty_fragment(cwd)
193
205
  if not suggestions:
194
206
  return [] # type: ignore[reportUnknownVariableType]
195
207
  start_position = token_start_in_input - len(text_before)
196
208
  for s in suggestions[: self._max_results]:
197
- yield Completion(text=f"@{s} ", start_position=start_position, display=s)
209
+ yield Completion(
210
+ text=self._format_completion_text(s, is_quoted=is_quoted),
211
+ start_position=start_position,
212
+ display=s,
213
+ )
198
214
  return [] # type: ignore[reportUnknownVariableType]
199
215
 
200
216
  # Gather suggestions with debounce/caching based on search keyword
201
- suggestions = self._complete_paths(cwd, frag)
217
+ suggestions = self._complete_paths(cwd, search_frag)
202
218
  if not suggestions:
203
219
  return [] # type: ignore[reportUnknownVariableType]
204
220
 
205
221
  # Prepare Completion objects. Replace from the '@' character.
206
222
  start_position = token_start_in_input - len(text_before) # negative
207
223
  for s in suggestions[: self._max_results]:
208
- # Insert '@<path> ' so that subsequent typing does not keep triggering
209
- yield Completion(text=f"@{s} ", start_position=start_position, display=s)
224
+ # Insert formatted text (with quoting when needed) so that subsequent typing does not keep triggering
225
+ yield Completion(
226
+ text=self._format_completion_text(s, is_quoted=is_quoted),
227
+ start_position=start_position,
228
+ display=s,
229
+ )
210
230
 
211
231
  # ---- Core logic ----
212
232
  def _complete_paths(self, cwd: Path, keyword: str) -> list[str]:
@@ -318,6 +338,19 @@ class _AtFilesCompleter(Completer):
318
338
  uniq.append(s)
319
339
  return uniq
320
340
 
341
+ def _format_completion_text(self, suggestion: str, *, is_quoted: bool) -> str:
342
+ """Format completion insertion text for a given suggestion.
343
+
344
+ Paths that contain whitespace are always wrapped in quotes so that they
345
+ can be parsed correctly by the @-file reader. If the user explicitly
346
+ started a quoted token (e.g. @"foo), we preserve quoting even when the
347
+ suggested path itself does not contain spaces.
348
+ """
349
+ needs_quotes = any(ch.isspace() for ch in suggestion)
350
+ if needs_quotes or is_quoted:
351
+ return f'@"{suggestion}" '
352
+ return f"@{suggestion} "
353
+
321
354
  def _same_scope(self, prev_key: str, cur_key: str) -> bool:
322
355
  # Consider same scope if they share the same base directory and one prefix startswith the other
323
356
  try: