klaude-code 2.10.3__py3-none-any.whl → 2.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. klaude_code/auth/AGENTS.md +4 -24
  2. klaude_code/auth/__init__.py +1 -17
  3. klaude_code/cli/auth_cmd.py +3 -53
  4. klaude_code/cli/list_model.py +0 -50
  5. klaude_code/config/assets/builtin_config.yaml +0 -28
  6. klaude_code/config/config.py +5 -42
  7. klaude_code/const.py +5 -2
  8. klaude_code/core/agent_profile.py +2 -10
  9. klaude_code/core/backtrack/__init__.py +3 -0
  10. klaude_code/core/backtrack/manager.py +48 -0
  11. klaude_code/core/memory.py +25 -9
  12. klaude_code/core/task.py +53 -7
  13. klaude_code/core/tool/__init__.py +2 -0
  14. klaude_code/core/tool/backtrack/__init__.py +3 -0
  15. klaude_code/core/tool/backtrack/backtrack_tool.md +17 -0
  16. klaude_code/core/tool/backtrack/backtrack_tool.py +65 -0
  17. klaude_code/core/tool/context.py +5 -0
  18. klaude_code/core/turn.py +3 -0
  19. klaude_code/llm/input_common.py +70 -1
  20. klaude_code/llm/openai_compatible/input.py +5 -2
  21. klaude_code/llm/openrouter/input.py +5 -2
  22. klaude_code/llm/registry.py +0 -1
  23. klaude_code/protocol/events.py +10 -0
  24. klaude_code/protocol/llm_param.py +0 -1
  25. klaude_code/protocol/message.py +10 -1
  26. klaude_code/protocol/tools.py +1 -0
  27. klaude_code/session/session.py +111 -2
  28. klaude_code/session/store.py +2 -0
  29. klaude_code/skill/assets/executing-plans/SKILL.md +84 -0
  30. klaude_code/skill/assets/writing-plans/SKILL.md +116 -0
  31. klaude_code/tui/commands.py +15 -0
  32. klaude_code/tui/components/developer.py +1 -1
  33. klaude_code/tui/components/rich/status.py +7 -76
  34. klaude_code/tui/components/rich/theme.py +10 -0
  35. klaude_code/tui/components/tools.py +31 -18
  36. klaude_code/tui/display.py +4 -0
  37. klaude_code/tui/input/prompt_toolkit.py +15 -1
  38. klaude_code/tui/machine.py +26 -8
  39. klaude_code/tui/renderer.py +97 -0
  40. klaude_code/tui/runner.py +7 -2
  41. klaude_code/tui/terminal/image.py +28 -12
  42. klaude_code/ui/terminal/title.py +8 -3
  43. {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/METADATA +1 -1
  44. {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/RECORD +46 -49
  45. klaude_code/auth/antigravity/__init__.py +0 -20
  46. klaude_code/auth/antigravity/exceptions.py +0 -17
  47. klaude_code/auth/antigravity/oauth.py +0 -315
  48. klaude_code/auth/antigravity/pkce.py +0 -25
  49. klaude_code/auth/antigravity/token_manager.py +0 -27
  50. klaude_code/core/prompts/prompt-antigravity.md +0 -80
  51. klaude_code/llm/antigravity/__init__.py +0 -3
  52. klaude_code/llm/antigravity/client.py +0 -558
  53. klaude_code/llm/antigravity/input.py +0 -268
  54. klaude_code/skill/assets/create-plan/SKILL.md +0 -74
  55. {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/WHEEL +0 -0
  56. {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/entry_points.txt +0 -0
@@ -1,558 +0,0 @@
1
- """Antigravity LLM client using Cloud Code Assist API."""
2
-
3
- import asyncio
4
- import json
5
- import re
6
- from base64 import b64encode
7
- from collections.abc import AsyncGenerator
8
- from typing import TypedDict, override
9
- from uuid import uuid4
10
-
11
- import httpx
12
-
13
- from klaude_code.auth.antigravity import AntigravityOAuth, AntigravityTokenManager
14
- from klaude_code.llm.antigravity.input import Content, Tool, convert_history_to_contents, convert_tool_schema
15
- from klaude_code.llm.client import LLMClientABC, LLMStreamABC
16
- from klaude_code.llm.image import save_assistant_image
17
- from klaude_code.llm.input_common import apply_config_defaults
18
- from klaude_code.llm.registry import register
19
- from klaude_code.llm.stream_parts import (
20
- append_text_part,
21
- append_thinking_text_part,
22
- build_partial_message,
23
- build_partial_parts,
24
- )
25
- from klaude_code.llm.usage import MetadataTracker, error_llm_stream
26
- from klaude_code.log import DebugType, debug_json, log_debug
27
- from klaude_code.protocol import llm_param, message, model
28
-
29
- # Unified format for Google thought signatures
30
- GOOGLE_THOUGHT_SIGNATURE_FORMAT = "google"
31
-
32
- # Cloud Code Assist API endpoint
33
- DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com"
34
-
35
- # Antigravity headers
36
- ANTIGRAVITY_HEADERS = {
37
- "User-Agent": "antigravity/1.11.5 darwin/arm64",
38
- "X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
39
- "Client-Metadata": json.dumps(
40
- {
41
- "ideType": "IDE_UNSPECIFIED",
42
- "platform": "PLATFORM_UNSPECIFIED",
43
- "pluginType": "GEMINI",
44
- }
45
- ),
46
- }
47
-
48
- # Retry configuration
49
- MAX_RETRIES = 3
50
- BASE_DELAY_MS = 1000
51
-
52
-
53
- class ThinkingConfig(TypedDict, total=False):
54
- includeThoughts: bool
55
- thinkingBudget: int
56
- thinkingLevel: str
57
-
58
-
59
- class GenerationConfig(TypedDict, total=False):
60
- maxOutputTokens: int
61
- temperature: float
62
- thinkingConfig: ThinkingConfig | None
63
-
64
-
65
- class ToolConfig(TypedDict, total=False):
66
- functionCallingConfig: dict[str, str]
67
-
68
-
69
- class SystemInstruction(TypedDict, total=False):
70
- role: str
71
- parts: list[dict[str, str]]
72
-
73
-
74
- class RequestBody(TypedDict, total=False):
75
- contents: list[Content]
76
- systemInstruction: SystemInstruction
77
- generationConfig: GenerationConfig
78
- tools: list[Tool]
79
- toolConfig: ToolConfig
80
-
81
-
82
- class CloudCodeAssistRequest(TypedDict, total=False):
83
- project: str
84
- model: str
85
- request: RequestBody
86
- requestType: str
87
- userAgent: str
88
- requestId: str
89
-
90
-
91
- def _convert_thinking_level(reasoning_effort: str | None) -> str | None:
92
- """Convert reasoning_effort to Gemini ThinkingLevel."""
93
- if reasoning_effort is None:
94
- return None
95
- mapping: dict[str, str] = {
96
- "xhigh": "HIGH",
97
- "high": "HIGH",
98
- "medium": "MEDIUM",
99
- "low": "LOW",
100
- "minimal": "MINIMAL",
101
- "none": "MINIMAL",
102
- }
103
- return mapping.get(reasoning_effort)
104
-
105
-
106
- def _extract_retry_delay(error_text: str) -> int | None:
107
- """Extract retry delay from error response in milliseconds."""
108
- # Pattern: "Your quota will reset after 39s" or "18h31m10s"
109
- match = re.search(r"reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s", error_text, re.IGNORECASE)
110
- if match:
111
- hours = int(match.group(1)) if match.group(1) else 0
112
- minutes = int(match.group(2)) if match.group(2) else 0
113
- seconds = float(match.group(3))
114
- total_ms = int(((hours * 60 + minutes) * 60 + seconds) * 1000)
115
- if total_ms > 0:
116
- return total_ms + 1000 # Add 1s buffer
117
-
118
- # Pattern: "Please retry in X[ms|s]"
119
- match = re.search(r"Please retry in ([0-9.]+)(ms|s)", error_text, re.IGNORECASE)
120
- if match:
121
- value = float(match.group(1))
122
- if match.group(2).lower() == "ms":
123
- return int(value) + 1000
124
- return int(value * 1000) + 1000
125
-
126
- # Pattern: "retryDelay": "34.074824224s"
127
- match = re.search(r'"retryDelay":\s*"([0-9.]+)(ms|s)"', error_text, re.IGNORECASE)
128
- if match:
129
- value = float(match.group(1))
130
- if match.group(2).lower() == "ms":
131
- return int(value) + 1000
132
- return int(value * 1000) + 1000
133
-
134
- return None
135
-
136
-
137
- def _is_retryable_error(status: int, error_text: str) -> bool:
138
- """Check if an error is retryable.
139
-
140
- Note: 429 is NOT retryable - fail immediately to let caller rotate accounts.
141
- """
142
- if status in (500, 502, 503, 504):
143
- return True
144
- # Exclude rate limit patterns - let caller handle account rotation
145
- if status == 429:
146
- return False
147
- return bool(re.search(r"overloaded|service.?unavailable", error_text, re.IGNORECASE))
148
-
149
-
150
- def _map_finish_reason(reason: str) -> model.StopReason | None:
151
- """Map finish reason string to StopReason."""
152
- mapping: dict[str, model.StopReason] = {
153
- "STOP": "stop",
154
- "MAX_TOKENS": "length",
155
- "SAFETY": "error",
156
- "RECITATION": "error",
157
- "OTHER": "error",
158
- }
159
- return mapping.get(reason.upper())
160
-
161
-
162
- def _encode_thought_signature(sig: bytes | str | None) -> str | None:
163
- """Encode thought signature to base64 string."""
164
- if sig is None:
165
- return None
166
- if isinstance(sig, bytes):
167
- return b64encode(sig).decode("ascii")
168
- return sig
169
-
170
-
171
- def _build_request(
172
- param: llm_param.LLMCallParameter,
173
- contents: list[Content],
174
- project_id: str,
175
- ) -> CloudCodeAssistRequest:
176
- """Build Cloud Code Assist API request."""
177
- request: RequestBody = {"contents": contents}
178
-
179
- # System instruction from param.system
180
- if param.system:
181
- request["systemInstruction"] = {
182
- "role": "user",
183
- "parts": [{"text": param.system}],
184
- }
185
-
186
- # Generation config
187
- generation_config: GenerationConfig = {}
188
- if param.temperature is not None:
189
- generation_config["temperature"] = param.temperature
190
- if param.max_tokens is not None:
191
- generation_config["maxOutputTokens"] = param.max_tokens
192
-
193
- # Thinking config
194
- thinking_config: ThinkingConfig | None = None
195
- if param.thinking:
196
- thinking_config = {"includeThoughts": True}
197
- if param.thinking.budget_tokens:
198
- thinking_config["thinkingBudget"] = param.thinking.budget_tokens
199
- if param.thinking.reasoning_effort:
200
- level = _convert_thinking_level(param.thinking.reasoning_effort)
201
- if level:
202
- thinking_config["thinkingLevel"] = level
203
- generation_config["thinkingConfig"] = thinking_config
204
-
205
- if generation_config:
206
- request["generationConfig"] = generation_config
207
-
208
- # Tools
209
- tools = convert_tool_schema(param.tools)
210
- if tools:
211
- request["tools"] = tools
212
- request["toolConfig"] = {"functionCallingConfig": {"mode": "AUTO"}}
213
-
214
- return CloudCodeAssistRequest(
215
- project=project_id,
216
- model=str(param.model_id),
217
- request=request,
218
- requestType="agent",
219
- userAgent="antigravity",
220
- requestId=f"agent-{uuid4().hex[:16]}",
221
- )
222
-
223
-
224
- class AntigravityStreamStateManager:
225
- """Manages streaming state for Antigravity LLM responses."""
226
-
227
- def __init__(self, param_model: str) -> None:
228
- self.param_model = param_model
229
- self.assistant_parts: list[message.Part] = []
230
- self.response_id: str | None = None
231
- self.stop_reason: model.StopReason | None = None
232
-
233
- def append_thinking_text(self, text: str) -> None:
234
- append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
235
-
236
- def append_text(self, text: str) -> None:
237
- append_text_part(self.assistant_parts, text)
238
-
239
- def append_thinking_signature(self, signature: str) -> None:
240
- self.assistant_parts.append(
241
- message.ThinkingSignaturePart(
242
- signature=signature,
243
- model_id=self.param_model,
244
- format=GOOGLE_THOUGHT_SIGNATURE_FORMAT,
245
- )
246
- )
247
-
248
- def append_image(self, image_part: message.ImageFilePart) -> None:
249
- self.assistant_parts.append(image_part)
250
-
251
- def append_tool_call(self, call_id: str, name: str, arguments_json: str) -> None:
252
- self.assistant_parts.append(
253
- message.ToolCallPart(
254
- call_id=call_id,
255
- tool_name=name,
256
- arguments_json=arguments_json,
257
- )
258
- )
259
-
260
- def get_partial_parts(self) -> list[message.Part]:
261
- return build_partial_parts(self.assistant_parts)
262
-
263
- def get_partial_message(self) -> message.AssistantMessage | None:
264
- return build_partial_message(self.assistant_parts, response_id=self.response_id)
265
-
266
-
267
- async def _parse_sse_stream(
268
- response: httpx.Response,
269
- param: llm_param.LLMCallParameter,
270
- metadata_tracker: MetadataTracker,
271
- state: AntigravityStreamStateManager,
272
- ) -> AsyncGenerator[message.LLMStreamItem]:
273
- """Parse SSE stream from Cloud Code Assist API."""
274
- tool_call_counter = 0
275
- started_tool_calls: dict[str, tuple[str, str | None]] = {} # call_id -> (name, thought_signature)
276
- completed_tool_items: set[str] = set()
277
- image_index = 0
278
-
279
- async for line in response.aiter_lines():
280
- if not line.startswith("data:"):
281
- continue
282
-
283
- json_str = line[5:].strip()
284
- if not json_str:
285
- continue
286
-
287
- try:
288
- chunk = json.loads(json_str)
289
- except json.JSONDecodeError:
290
- continue
291
-
292
- response_data = chunk.get("response")
293
- if not response_data:
294
- continue
295
-
296
- if state.response_id is None:
297
- state.response_id = response_data.get("responseId") or uuid4().hex
298
-
299
- # Process candidates
300
- candidates = response_data.get("candidates", [])
301
- candidate0 = candidates[0] if candidates else None
302
- if not candidate0:
303
- continue
304
-
305
- finish_reason = candidate0.get("finishReason")
306
- if finish_reason:
307
- state.stop_reason = _map_finish_reason(finish_reason)
308
-
309
- content = candidate0.get("content", {})
310
- content_parts = content.get("parts", [])
311
-
312
- for part in content_parts:
313
- log_debug(debug_json(part), style="blue", debug_type=DebugType.LLM_STREAM)
314
- # Handle text parts and thought signatures
315
- text = part.get("text")
316
- thought_signature = part.get("thoughtSignature")
317
- is_thinking = part.get("thought") is True
318
-
319
- if text:
320
- metadata_tracker.record_token()
321
- if is_thinking:
322
- state.append_thinking_text(text)
323
- yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
324
- else:
325
- state.append_text(text)
326
- yield message.AssistantTextDelta(content=text, response_id=state.response_id)
327
-
328
- # Handle thought signature (may come with empty text, but not for function calls)
329
- if thought_signature and not part.get("functionCall"):
330
- encoded_sig = _encode_thought_signature(thought_signature)
331
- if encoded_sig:
332
- state.append_thinking_signature(encoded_sig)
333
-
334
- # Handle inline_data (image generation)
335
- inline_data = part.get("inlineData")
336
- if inline_data and inline_data.get("data"):
337
- if part.get("thought") is True:
338
- continue # Skip thought images
339
- mime_type = inline_data.get("mimeType", "image/png")
340
- data = inline_data["data"]
341
- data_url = f"data:{mime_type};base64,{data}"
342
- try:
343
- image_part = save_assistant_image(
344
- data_url=data_url,
345
- session_id=param.session_id,
346
- response_id=state.response_id,
347
- image_index=image_index,
348
- )
349
- image_index += 1
350
- state.append_image(image_part)
351
- yield message.AssistantImageDelta(
352
- response_id=state.response_id,
353
- file_path=image_part.file_path,
354
- )
355
- except ValueError:
356
- pass
357
-
358
- # Handle function calls
359
- function_call = part.get("functionCall")
360
- if function_call:
361
- metadata_tracker.record_token()
362
- call_id = function_call.get("id") or f"call_{uuid4().hex[:8]}_{tool_call_counter}"
363
- tool_call_counter += 1
364
- name = function_call.get("name", "")
365
- thought_signature = part.get("thoughtSignature")
366
-
367
- if call_id not in started_tool_calls:
368
- started_tool_calls[call_id] = (name, thought_signature)
369
- yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
370
-
371
- args = function_call.get("args")
372
- if args is not None and call_id not in completed_tool_items:
373
- state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
374
- if thought_signature:
375
- encoded_sig = _encode_thought_signature(thought_signature)
376
- if encoded_sig:
377
- state.append_thinking_signature(encoded_sig)
378
- completed_tool_items.add(call_id)
379
-
380
- # Process usage metadata
381
- usage_metadata = response_data.get("usageMetadata")
382
- if usage_metadata:
383
- prompt_tokens = usage_metadata.get("promptTokenCount", 0)
384
- cached_tokens = usage_metadata.get("cachedContentTokenCount", 0)
385
- candidates_tokens = usage_metadata.get("candidatesTokenCount", 0)
386
- thoughts_tokens = usage_metadata.get("thoughtsTokenCount", 0)
387
- total_tokens = usage_metadata.get("totalTokenCount") or (
388
- prompt_tokens + candidates_tokens + thoughts_tokens
389
- )
390
-
391
- usage = model.Usage(
392
- input_tokens=prompt_tokens,
393
- cached_tokens=cached_tokens,
394
- output_tokens=candidates_tokens + thoughts_tokens,
395
- reasoning_tokens=thoughts_tokens,
396
- context_size=total_tokens,
397
- context_limit=param.context_limit,
398
- max_tokens=param.max_tokens,
399
- )
400
- metadata_tracker.set_usage(usage)
401
-
402
- # Finalize
403
- metadata_tracker.set_model_name(str(param.model_id))
404
- metadata_tracker.set_response_id(state.response_id)
405
- metadata = metadata_tracker.finalize()
406
- yield message.AssistantMessage(
407
- parts=state.assistant_parts,
408
- response_id=state.response_id,
409
- usage=metadata,
410
- stop_reason=state.stop_reason,
411
- )
412
-
413
-
414
- class AntigravityLLMStream(LLMStreamABC):
415
- """LLMStream implementation for Antigravity client."""
416
-
417
- def __init__(
418
- self,
419
- response: httpx.Response,
420
- *,
421
- param: llm_param.LLMCallParameter,
422
- metadata_tracker: MetadataTracker,
423
- state: AntigravityStreamStateManager,
424
- ) -> None:
425
- self._response = response
426
- self._param = param
427
- self._metadata_tracker = metadata_tracker
428
- self._state = state
429
- self._completed = False
430
-
431
- def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
432
- return self._iterate()
433
-
434
- async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
435
- try:
436
- async for item in _parse_sse_stream(
437
- self._response,
438
- param=self._param,
439
- metadata_tracker=self._metadata_tracker,
440
- state=self._state,
441
- ):
442
- if isinstance(item, message.AssistantMessage):
443
- self._completed = True
444
- yield item
445
- except httpx.HTTPError as e:
446
- yield message.StreamErrorItem(error=f"HTTPError: {e}")
447
- self._metadata_tracker.set_response_id(self._state.response_id)
448
- yield message.AssistantMessage(
449
- parts=self._state.get_partial_parts(),
450
- response_id=self._state.response_id,
451
- usage=self._metadata_tracker.finalize(),
452
- stop_reason="error",
453
- )
454
-
455
- def get_partial_message(self) -> message.AssistantMessage | None:
456
- if self._completed:
457
- return None
458
- return self._state.get_partial_message()
459
-
460
-
461
- @register(llm_param.LLMClientProtocol.ANTIGRAVITY)
462
- class AntigravityClient(LLMClientABC):
463
- """Antigravity LLM client using Cloud Code Assist API."""
464
-
465
- def __init__(self, config: llm_param.LLMConfigParameter):
466
- super().__init__(config)
467
- self._token_manager = AntigravityTokenManager()
468
- self._oauth = AntigravityOAuth(self._token_manager)
469
- self._endpoint = config.base_url or DEFAULT_ENDPOINT
470
- self._http_client: httpx.AsyncClient | None = None
471
-
472
- async def _get_http_client(self) -> httpx.AsyncClient:
473
- if self._http_client is None:
474
- self._http_client = httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=30.0))
475
- return self._http_client
476
-
477
- def _get_credentials(self) -> tuple[str, str]:
478
- """Get access token and project ID, refreshing if needed."""
479
- return self._oauth.ensure_valid_token()
480
-
481
- @classmethod
482
- @override
483
- def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
484
- return cls(config)
485
-
486
- @override
487
- async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
488
- param = apply_config_defaults(param, self.get_llm_config())
489
- metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
490
-
491
- # Get credentials
492
- try:
493
- access_token, project_id = self._get_credentials()
494
- except Exception as e:
495
- return error_llm_stream(metadata_tracker, error=str(e))
496
-
497
- # Convert messages
498
- contents = convert_history_to_contents(param.input, model_name=str(param.model_id))
499
- request_body = _build_request(param, contents, project_id)
500
-
501
- log_debug(
502
- debug_json(request_body),
503
- style="yellow",
504
- debug_type=DebugType.LLM_PAYLOAD,
505
- )
506
-
507
- # Make request with retry logic
508
- url = f"{self._endpoint}/v1internal:streamGenerateContent?alt=sse"
509
- headers = {
510
- "Authorization": f"Bearer {access_token}",
511
- "Content-Type": "application/json",
512
- "Accept": "text/event-stream",
513
- **ANTIGRAVITY_HEADERS,
514
- }
515
-
516
- client = await self._get_http_client()
517
- last_error: str | None = None
518
-
519
- for attempt in range(MAX_RETRIES + 1):
520
- try:
521
- response = await client.post(
522
- url,
523
- headers=headers,
524
- json=request_body,
525
- )
526
-
527
- if response.status_code == 200:
528
- state = AntigravityStreamStateManager(param_model=str(param.model_id))
529
- return AntigravityLLMStream(
530
- response,
531
- param=param,
532
- metadata_tracker=metadata_tracker,
533
- state=state,
534
- )
535
-
536
- error_text = response.text
537
- last_error = f"Cloud Code Assist API error ({response.status_code}): {error_text}"
538
-
539
- # Check if retryable
540
- if attempt < MAX_RETRIES and _is_retryable_error(response.status_code, error_text):
541
- delay_ms = _extract_retry_delay(error_text) or (BASE_DELAY_MS * (2**attempt))
542
- await asyncio.sleep(delay_ms / 1000)
543
- # Refresh token in case it expired
544
- access_token, project_id = self._get_credentials()
545
- headers["Authorization"] = f"Bearer {access_token}"
546
- continue
547
-
548
- break
549
-
550
- except httpx.HTTPError as e:
551
- last_error = f"HTTPError: {e}"
552
- if attempt < MAX_RETRIES:
553
- delay_ms = BASE_DELAY_MS * (2**attempt)
554
- await asyncio.sleep(delay_ms / 1000)
555
- continue
556
- break
557
-
558
- return error_llm_stream(metadata_tracker, error=last_error or "Request failed")