klaude-code 2.10.3__py3-none-any.whl → 2.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/AGENTS.md +4 -24
- klaude_code/auth/__init__.py +1 -17
- klaude_code/cli/auth_cmd.py +3 -53
- klaude_code/cli/list_model.py +0 -50
- klaude_code/config/assets/builtin_config.yaml +0 -28
- klaude_code/config/config.py +5 -42
- klaude_code/const.py +5 -2
- klaude_code/core/agent_profile.py +2 -10
- klaude_code/core/backtrack/__init__.py +3 -0
- klaude_code/core/backtrack/manager.py +48 -0
- klaude_code/core/memory.py +25 -9
- klaude_code/core/task.py +53 -7
- klaude_code/core/tool/__init__.py +2 -0
- klaude_code/core/tool/backtrack/__init__.py +3 -0
- klaude_code/core/tool/backtrack/backtrack_tool.md +17 -0
- klaude_code/core/tool/backtrack/backtrack_tool.py +65 -0
- klaude_code/core/tool/context.py +5 -0
- klaude_code/core/turn.py +3 -0
- klaude_code/llm/input_common.py +70 -1
- klaude_code/llm/openai_compatible/input.py +5 -2
- klaude_code/llm/openrouter/input.py +5 -2
- klaude_code/llm/registry.py +0 -1
- klaude_code/protocol/events.py +10 -0
- klaude_code/protocol/llm_param.py +0 -1
- klaude_code/protocol/message.py +10 -1
- klaude_code/protocol/tools.py +1 -0
- klaude_code/session/session.py +111 -2
- klaude_code/session/store.py +2 -0
- klaude_code/skill/assets/executing-plans/SKILL.md +84 -0
- klaude_code/skill/assets/writing-plans/SKILL.md +116 -0
- klaude_code/tui/commands.py +15 -0
- klaude_code/tui/components/developer.py +1 -1
- klaude_code/tui/components/rich/status.py +7 -76
- klaude_code/tui/components/rich/theme.py +10 -0
- klaude_code/tui/components/tools.py +31 -18
- klaude_code/tui/display.py +4 -0
- klaude_code/tui/input/prompt_toolkit.py +15 -1
- klaude_code/tui/machine.py +26 -8
- klaude_code/tui/renderer.py +97 -0
- klaude_code/tui/runner.py +7 -2
- klaude_code/tui/terminal/image.py +28 -12
- klaude_code/ui/terminal/title.py +8 -3
- {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/METADATA +1 -1
- {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/RECORD +46 -49
- klaude_code/auth/antigravity/__init__.py +0 -20
- klaude_code/auth/antigravity/exceptions.py +0 -17
- klaude_code/auth/antigravity/oauth.py +0 -315
- klaude_code/auth/antigravity/pkce.py +0 -25
- klaude_code/auth/antigravity/token_manager.py +0 -27
- klaude_code/core/prompts/prompt-antigravity.md +0 -80
- klaude_code/llm/antigravity/__init__.py +0 -3
- klaude_code/llm/antigravity/client.py +0 -558
- klaude_code/llm/antigravity/input.py +0 -268
- klaude_code/skill/assets/create-plan/SKILL.md +0 -74
- {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/WHEEL +0 -0
- {klaude_code-2.10.3.dist-info → klaude_code-2.10.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,558 +0,0 @@
|
|
|
1
|
-
"""Antigravity LLM client using Cloud Code Assist API."""
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import json
|
|
5
|
-
import re
|
|
6
|
-
from base64 import b64encode
|
|
7
|
-
from collections.abc import AsyncGenerator
|
|
8
|
-
from typing import TypedDict, override
|
|
9
|
-
from uuid import uuid4
|
|
10
|
-
|
|
11
|
-
import httpx
|
|
12
|
-
|
|
13
|
-
from klaude_code.auth.antigravity import AntigravityOAuth, AntigravityTokenManager
|
|
14
|
-
from klaude_code.llm.antigravity.input import Content, Tool, convert_history_to_contents, convert_tool_schema
|
|
15
|
-
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
16
|
-
from klaude_code.llm.image import save_assistant_image
|
|
17
|
-
from klaude_code.llm.input_common import apply_config_defaults
|
|
18
|
-
from klaude_code.llm.registry import register
|
|
19
|
-
from klaude_code.llm.stream_parts import (
|
|
20
|
-
append_text_part,
|
|
21
|
-
append_thinking_text_part,
|
|
22
|
-
build_partial_message,
|
|
23
|
-
build_partial_parts,
|
|
24
|
-
)
|
|
25
|
-
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
26
|
-
from klaude_code.log import DebugType, debug_json, log_debug
|
|
27
|
-
from klaude_code.protocol import llm_param, message, model
|
|
28
|
-
|
|
29
|
-
# Unified format for Google thought signatures
|
|
30
|
-
GOOGLE_THOUGHT_SIGNATURE_FORMAT = "google"
|
|
31
|
-
|
|
32
|
-
# Cloud Code Assist API endpoint
|
|
33
|
-
DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com"
|
|
34
|
-
|
|
35
|
-
# Antigravity headers
|
|
36
|
-
ANTIGRAVITY_HEADERS = {
|
|
37
|
-
"User-Agent": "antigravity/1.11.5 darwin/arm64",
|
|
38
|
-
"X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
|
39
|
-
"Client-Metadata": json.dumps(
|
|
40
|
-
{
|
|
41
|
-
"ideType": "IDE_UNSPECIFIED",
|
|
42
|
-
"platform": "PLATFORM_UNSPECIFIED",
|
|
43
|
-
"pluginType": "GEMINI",
|
|
44
|
-
}
|
|
45
|
-
),
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
# Retry configuration
|
|
49
|
-
MAX_RETRIES = 3
|
|
50
|
-
BASE_DELAY_MS = 1000
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class ThinkingConfig(TypedDict, total=False):
|
|
54
|
-
includeThoughts: bool
|
|
55
|
-
thinkingBudget: int
|
|
56
|
-
thinkingLevel: str
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class GenerationConfig(TypedDict, total=False):
|
|
60
|
-
maxOutputTokens: int
|
|
61
|
-
temperature: float
|
|
62
|
-
thinkingConfig: ThinkingConfig | None
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class ToolConfig(TypedDict, total=False):
|
|
66
|
-
functionCallingConfig: dict[str, str]
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class SystemInstruction(TypedDict, total=False):
|
|
70
|
-
role: str
|
|
71
|
-
parts: list[dict[str, str]]
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class RequestBody(TypedDict, total=False):
|
|
75
|
-
contents: list[Content]
|
|
76
|
-
systemInstruction: SystemInstruction
|
|
77
|
-
generationConfig: GenerationConfig
|
|
78
|
-
tools: list[Tool]
|
|
79
|
-
toolConfig: ToolConfig
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class CloudCodeAssistRequest(TypedDict, total=False):
|
|
83
|
-
project: str
|
|
84
|
-
model: str
|
|
85
|
-
request: RequestBody
|
|
86
|
-
requestType: str
|
|
87
|
-
userAgent: str
|
|
88
|
-
requestId: str
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def _convert_thinking_level(reasoning_effort: str | None) -> str | None:
|
|
92
|
-
"""Convert reasoning_effort to Gemini ThinkingLevel."""
|
|
93
|
-
if reasoning_effort is None:
|
|
94
|
-
return None
|
|
95
|
-
mapping: dict[str, str] = {
|
|
96
|
-
"xhigh": "HIGH",
|
|
97
|
-
"high": "HIGH",
|
|
98
|
-
"medium": "MEDIUM",
|
|
99
|
-
"low": "LOW",
|
|
100
|
-
"minimal": "MINIMAL",
|
|
101
|
-
"none": "MINIMAL",
|
|
102
|
-
}
|
|
103
|
-
return mapping.get(reasoning_effort)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _extract_retry_delay(error_text: str) -> int | None:
|
|
107
|
-
"""Extract retry delay from error response in milliseconds."""
|
|
108
|
-
# Pattern: "Your quota will reset after 39s" or "18h31m10s"
|
|
109
|
-
match = re.search(r"reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s", error_text, re.IGNORECASE)
|
|
110
|
-
if match:
|
|
111
|
-
hours = int(match.group(1)) if match.group(1) else 0
|
|
112
|
-
minutes = int(match.group(2)) if match.group(2) else 0
|
|
113
|
-
seconds = float(match.group(3))
|
|
114
|
-
total_ms = int(((hours * 60 + minutes) * 60 + seconds) * 1000)
|
|
115
|
-
if total_ms > 0:
|
|
116
|
-
return total_ms + 1000 # Add 1s buffer
|
|
117
|
-
|
|
118
|
-
# Pattern: "Please retry in X[ms|s]"
|
|
119
|
-
match = re.search(r"Please retry in ([0-9.]+)(ms|s)", error_text, re.IGNORECASE)
|
|
120
|
-
if match:
|
|
121
|
-
value = float(match.group(1))
|
|
122
|
-
if match.group(2).lower() == "ms":
|
|
123
|
-
return int(value) + 1000
|
|
124
|
-
return int(value * 1000) + 1000
|
|
125
|
-
|
|
126
|
-
# Pattern: "retryDelay": "34.074824224s"
|
|
127
|
-
match = re.search(r'"retryDelay":\s*"([0-9.]+)(ms|s)"', error_text, re.IGNORECASE)
|
|
128
|
-
if match:
|
|
129
|
-
value = float(match.group(1))
|
|
130
|
-
if match.group(2).lower() == "ms":
|
|
131
|
-
return int(value) + 1000
|
|
132
|
-
return int(value * 1000) + 1000
|
|
133
|
-
|
|
134
|
-
return None
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _is_retryable_error(status: int, error_text: str) -> bool:
|
|
138
|
-
"""Check if an error is retryable.
|
|
139
|
-
|
|
140
|
-
Note: 429 is NOT retryable - fail immediately to let caller rotate accounts.
|
|
141
|
-
"""
|
|
142
|
-
if status in (500, 502, 503, 504):
|
|
143
|
-
return True
|
|
144
|
-
# Exclude rate limit patterns - let caller handle account rotation
|
|
145
|
-
if status == 429:
|
|
146
|
-
return False
|
|
147
|
-
return bool(re.search(r"overloaded|service.?unavailable", error_text, re.IGNORECASE))
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
151
|
-
"""Map finish reason string to StopReason."""
|
|
152
|
-
mapping: dict[str, model.StopReason] = {
|
|
153
|
-
"STOP": "stop",
|
|
154
|
-
"MAX_TOKENS": "length",
|
|
155
|
-
"SAFETY": "error",
|
|
156
|
-
"RECITATION": "error",
|
|
157
|
-
"OTHER": "error",
|
|
158
|
-
}
|
|
159
|
-
return mapping.get(reason.upper())
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def _encode_thought_signature(sig: bytes | str | None) -> str | None:
|
|
163
|
-
"""Encode thought signature to base64 string."""
|
|
164
|
-
if sig is None:
|
|
165
|
-
return None
|
|
166
|
-
if isinstance(sig, bytes):
|
|
167
|
-
return b64encode(sig).decode("ascii")
|
|
168
|
-
return sig
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def _build_request(
|
|
172
|
-
param: llm_param.LLMCallParameter,
|
|
173
|
-
contents: list[Content],
|
|
174
|
-
project_id: str,
|
|
175
|
-
) -> CloudCodeAssistRequest:
|
|
176
|
-
"""Build Cloud Code Assist API request."""
|
|
177
|
-
request: RequestBody = {"contents": contents}
|
|
178
|
-
|
|
179
|
-
# System instruction from param.system
|
|
180
|
-
if param.system:
|
|
181
|
-
request["systemInstruction"] = {
|
|
182
|
-
"role": "user",
|
|
183
|
-
"parts": [{"text": param.system}],
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
# Generation config
|
|
187
|
-
generation_config: GenerationConfig = {}
|
|
188
|
-
if param.temperature is not None:
|
|
189
|
-
generation_config["temperature"] = param.temperature
|
|
190
|
-
if param.max_tokens is not None:
|
|
191
|
-
generation_config["maxOutputTokens"] = param.max_tokens
|
|
192
|
-
|
|
193
|
-
# Thinking config
|
|
194
|
-
thinking_config: ThinkingConfig | None = None
|
|
195
|
-
if param.thinking:
|
|
196
|
-
thinking_config = {"includeThoughts": True}
|
|
197
|
-
if param.thinking.budget_tokens:
|
|
198
|
-
thinking_config["thinkingBudget"] = param.thinking.budget_tokens
|
|
199
|
-
if param.thinking.reasoning_effort:
|
|
200
|
-
level = _convert_thinking_level(param.thinking.reasoning_effort)
|
|
201
|
-
if level:
|
|
202
|
-
thinking_config["thinkingLevel"] = level
|
|
203
|
-
generation_config["thinkingConfig"] = thinking_config
|
|
204
|
-
|
|
205
|
-
if generation_config:
|
|
206
|
-
request["generationConfig"] = generation_config
|
|
207
|
-
|
|
208
|
-
# Tools
|
|
209
|
-
tools = convert_tool_schema(param.tools)
|
|
210
|
-
if tools:
|
|
211
|
-
request["tools"] = tools
|
|
212
|
-
request["toolConfig"] = {"functionCallingConfig": {"mode": "AUTO"}}
|
|
213
|
-
|
|
214
|
-
return CloudCodeAssistRequest(
|
|
215
|
-
project=project_id,
|
|
216
|
-
model=str(param.model_id),
|
|
217
|
-
request=request,
|
|
218
|
-
requestType="agent",
|
|
219
|
-
userAgent="antigravity",
|
|
220
|
-
requestId=f"agent-{uuid4().hex[:16]}",
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
class AntigravityStreamStateManager:
|
|
225
|
-
"""Manages streaming state for Antigravity LLM responses."""
|
|
226
|
-
|
|
227
|
-
def __init__(self, param_model: str) -> None:
|
|
228
|
-
self.param_model = param_model
|
|
229
|
-
self.assistant_parts: list[message.Part] = []
|
|
230
|
-
self.response_id: str | None = None
|
|
231
|
-
self.stop_reason: model.StopReason | None = None
|
|
232
|
-
|
|
233
|
-
def append_thinking_text(self, text: str) -> None:
|
|
234
|
-
append_thinking_text_part(self.assistant_parts, text, model_id=self.param_model)
|
|
235
|
-
|
|
236
|
-
def append_text(self, text: str) -> None:
|
|
237
|
-
append_text_part(self.assistant_parts, text)
|
|
238
|
-
|
|
239
|
-
def append_thinking_signature(self, signature: str) -> None:
|
|
240
|
-
self.assistant_parts.append(
|
|
241
|
-
message.ThinkingSignaturePart(
|
|
242
|
-
signature=signature,
|
|
243
|
-
model_id=self.param_model,
|
|
244
|
-
format=GOOGLE_THOUGHT_SIGNATURE_FORMAT,
|
|
245
|
-
)
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
def append_image(self, image_part: message.ImageFilePart) -> None:
|
|
249
|
-
self.assistant_parts.append(image_part)
|
|
250
|
-
|
|
251
|
-
def append_tool_call(self, call_id: str, name: str, arguments_json: str) -> None:
|
|
252
|
-
self.assistant_parts.append(
|
|
253
|
-
message.ToolCallPart(
|
|
254
|
-
call_id=call_id,
|
|
255
|
-
tool_name=name,
|
|
256
|
-
arguments_json=arguments_json,
|
|
257
|
-
)
|
|
258
|
-
)
|
|
259
|
-
|
|
260
|
-
def get_partial_parts(self) -> list[message.Part]:
|
|
261
|
-
return build_partial_parts(self.assistant_parts)
|
|
262
|
-
|
|
263
|
-
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
264
|
-
return build_partial_message(self.assistant_parts, response_id=self.response_id)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
async def _parse_sse_stream(
|
|
268
|
-
response: httpx.Response,
|
|
269
|
-
param: llm_param.LLMCallParameter,
|
|
270
|
-
metadata_tracker: MetadataTracker,
|
|
271
|
-
state: AntigravityStreamStateManager,
|
|
272
|
-
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
273
|
-
"""Parse SSE stream from Cloud Code Assist API."""
|
|
274
|
-
tool_call_counter = 0
|
|
275
|
-
started_tool_calls: dict[str, tuple[str, str | None]] = {} # call_id -> (name, thought_signature)
|
|
276
|
-
completed_tool_items: set[str] = set()
|
|
277
|
-
image_index = 0
|
|
278
|
-
|
|
279
|
-
async for line in response.aiter_lines():
|
|
280
|
-
if not line.startswith("data:"):
|
|
281
|
-
continue
|
|
282
|
-
|
|
283
|
-
json_str = line[5:].strip()
|
|
284
|
-
if not json_str:
|
|
285
|
-
continue
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
chunk = json.loads(json_str)
|
|
289
|
-
except json.JSONDecodeError:
|
|
290
|
-
continue
|
|
291
|
-
|
|
292
|
-
response_data = chunk.get("response")
|
|
293
|
-
if not response_data:
|
|
294
|
-
continue
|
|
295
|
-
|
|
296
|
-
if state.response_id is None:
|
|
297
|
-
state.response_id = response_data.get("responseId") or uuid4().hex
|
|
298
|
-
|
|
299
|
-
# Process candidates
|
|
300
|
-
candidates = response_data.get("candidates", [])
|
|
301
|
-
candidate0 = candidates[0] if candidates else None
|
|
302
|
-
if not candidate0:
|
|
303
|
-
continue
|
|
304
|
-
|
|
305
|
-
finish_reason = candidate0.get("finishReason")
|
|
306
|
-
if finish_reason:
|
|
307
|
-
state.stop_reason = _map_finish_reason(finish_reason)
|
|
308
|
-
|
|
309
|
-
content = candidate0.get("content", {})
|
|
310
|
-
content_parts = content.get("parts", [])
|
|
311
|
-
|
|
312
|
-
for part in content_parts:
|
|
313
|
-
log_debug(debug_json(part), style="blue", debug_type=DebugType.LLM_STREAM)
|
|
314
|
-
# Handle text parts and thought signatures
|
|
315
|
-
text = part.get("text")
|
|
316
|
-
thought_signature = part.get("thoughtSignature")
|
|
317
|
-
is_thinking = part.get("thought") is True
|
|
318
|
-
|
|
319
|
-
if text:
|
|
320
|
-
metadata_tracker.record_token()
|
|
321
|
-
if is_thinking:
|
|
322
|
-
state.append_thinking_text(text)
|
|
323
|
-
yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
|
|
324
|
-
else:
|
|
325
|
-
state.append_text(text)
|
|
326
|
-
yield message.AssistantTextDelta(content=text, response_id=state.response_id)
|
|
327
|
-
|
|
328
|
-
# Handle thought signature (may come with empty text, but not for function calls)
|
|
329
|
-
if thought_signature and not part.get("functionCall"):
|
|
330
|
-
encoded_sig = _encode_thought_signature(thought_signature)
|
|
331
|
-
if encoded_sig:
|
|
332
|
-
state.append_thinking_signature(encoded_sig)
|
|
333
|
-
|
|
334
|
-
# Handle inline_data (image generation)
|
|
335
|
-
inline_data = part.get("inlineData")
|
|
336
|
-
if inline_data and inline_data.get("data"):
|
|
337
|
-
if part.get("thought") is True:
|
|
338
|
-
continue # Skip thought images
|
|
339
|
-
mime_type = inline_data.get("mimeType", "image/png")
|
|
340
|
-
data = inline_data["data"]
|
|
341
|
-
data_url = f"data:{mime_type};base64,{data}"
|
|
342
|
-
try:
|
|
343
|
-
image_part = save_assistant_image(
|
|
344
|
-
data_url=data_url,
|
|
345
|
-
session_id=param.session_id,
|
|
346
|
-
response_id=state.response_id,
|
|
347
|
-
image_index=image_index,
|
|
348
|
-
)
|
|
349
|
-
image_index += 1
|
|
350
|
-
state.append_image(image_part)
|
|
351
|
-
yield message.AssistantImageDelta(
|
|
352
|
-
response_id=state.response_id,
|
|
353
|
-
file_path=image_part.file_path,
|
|
354
|
-
)
|
|
355
|
-
except ValueError:
|
|
356
|
-
pass
|
|
357
|
-
|
|
358
|
-
# Handle function calls
|
|
359
|
-
function_call = part.get("functionCall")
|
|
360
|
-
if function_call:
|
|
361
|
-
metadata_tracker.record_token()
|
|
362
|
-
call_id = function_call.get("id") or f"call_{uuid4().hex[:8]}_{tool_call_counter}"
|
|
363
|
-
tool_call_counter += 1
|
|
364
|
-
name = function_call.get("name", "")
|
|
365
|
-
thought_signature = part.get("thoughtSignature")
|
|
366
|
-
|
|
367
|
-
if call_id not in started_tool_calls:
|
|
368
|
-
started_tool_calls[call_id] = (name, thought_signature)
|
|
369
|
-
yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
|
|
370
|
-
|
|
371
|
-
args = function_call.get("args")
|
|
372
|
-
if args is not None and call_id not in completed_tool_items:
|
|
373
|
-
state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
|
|
374
|
-
if thought_signature:
|
|
375
|
-
encoded_sig = _encode_thought_signature(thought_signature)
|
|
376
|
-
if encoded_sig:
|
|
377
|
-
state.append_thinking_signature(encoded_sig)
|
|
378
|
-
completed_tool_items.add(call_id)
|
|
379
|
-
|
|
380
|
-
# Process usage metadata
|
|
381
|
-
usage_metadata = response_data.get("usageMetadata")
|
|
382
|
-
if usage_metadata:
|
|
383
|
-
prompt_tokens = usage_metadata.get("promptTokenCount", 0)
|
|
384
|
-
cached_tokens = usage_metadata.get("cachedContentTokenCount", 0)
|
|
385
|
-
candidates_tokens = usage_metadata.get("candidatesTokenCount", 0)
|
|
386
|
-
thoughts_tokens = usage_metadata.get("thoughtsTokenCount", 0)
|
|
387
|
-
total_tokens = usage_metadata.get("totalTokenCount") or (
|
|
388
|
-
prompt_tokens + candidates_tokens + thoughts_tokens
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
usage = model.Usage(
|
|
392
|
-
input_tokens=prompt_tokens,
|
|
393
|
-
cached_tokens=cached_tokens,
|
|
394
|
-
output_tokens=candidates_tokens + thoughts_tokens,
|
|
395
|
-
reasoning_tokens=thoughts_tokens,
|
|
396
|
-
context_size=total_tokens,
|
|
397
|
-
context_limit=param.context_limit,
|
|
398
|
-
max_tokens=param.max_tokens,
|
|
399
|
-
)
|
|
400
|
-
metadata_tracker.set_usage(usage)
|
|
401
|
-
|
|
402
|
-
# Finalize
|
|
403
|
-
metadata_tracker.set_model_name(str(param.model_id))
|
|
404
|
-
metadata_tracker.set_response_id(state.response_id)
|
|
405
|
-
metadata = metadata_tracker.finalize()
|
|
406
|
-
yield message.AssistantMessage(
|
|
407
|
-
parts=state.assistant_parts,
|
|
408
|
-
response_id=state.response_id,
|
|
409
|
-
usage=metadata,
|
|
410
|
-
stop_reason=state.stop_reason,
|
|
411
|
-
)
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
class AntigravityLLMStream(LLMStreamABC):
|
|
415
|
-
"""LLMStream implementation for Antigravity client."""
|
|
416
|
-
|
|
417
|
-
def __init__(
|
|
418
|
-
self,
|
|
419
|
-
response: httpx.Response,
|
|
420
|
-
*,
|
|
421
|
-
param: llm_param.LLMCallParameter,
|
|
422
|
-
metadata_tracker: MetadataTracker,
|
|
423
|
-
state: AntigravityStreamStateManager,
|
|
424
|
-
) -> None:
|
|
425
|
-
self._response = response
|
|
426
|
-
self._param = param
|
|
427
|
-
self._metadata_tracker = metadata_tracker
|
|
428
|
-
self._state = state
|
|
429
|
-
self._completed = False
|
|
430
|
-
|
|
431
|
-
def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
432
|
-
return self._iterate()
|
|
433
|
-
|
|
434
|
-
async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
435
|
-
try:
|
|
436
|
-
async for item in _parse_sse_stream(
|
|
437
|
-
self._response,
|
|
438
|
-
param=self._param,
|
|
439
|
-
metadata_tracker=self._metadata_tracker,
|
|
440
|
-
state=self._state,
|
|
441
|
-
):
|
|
442
|
-
if isinstance(item, message.AssistantMessage):
|
|
443
|
-
self._completed = True
|
|
444
|
-
yield item
|
|
445
|
-
except httpx.HTTPError as e:
|
|
446
|
-
yield message.StreamErrorItem(error=f"HTTPError: {e}")
|
|
447
|
-
self._metadata_tracker.set_response_id(self._state.response_id)
|
|
448
|
-
yield message.AssistantMessage(
|
|
449
|
-
parts=self._state.get_partial_parts(),
|
|
450
|
-
response_id=self._state.response_id,
|
|
451
|
-
usage=self._metadata_tracker.finalize(),
|
|
452
|
-
stop_reason="error",
|
|
453
|
-
)
|
|
454
|
-
|
|
455
|
-
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
456
|
-
if self._completed:
|
|
457
|
-
return None
|
|
458
|
-
return self._state.get_partial_message()
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
@register(llm_param.LLMClientProtocol.ANTIGRAVITY)
|
|
462
|
-
class AntigravityClient(LLMClientABC):
|
|
463
|
-
"""Antigravity LLM client using Cloud Code Assist API."""
|
|
464
|
-
|
|
465
|
-
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
466
|
-
super().__init__(config)
|
|
467
|
-
self._token_manager = AntigravityTokenManager()
|
|
468
|
-
self._oauth = AntigravityOAuth(self._token_manager)
|
|
469
|
-
self._endpoint = config.base_url or DEFAULT_ENDPOINT
|
|
470
|
-
self._http_client: httpx.AsyncClient | None = None
|
|
471
|
-
|
|
472
|
-
async def _get_http_client(self) -> httpx.AsyncClient:
|
|
473
|
-
if self._http_client is None:
|
|
474
|
-
self._http_client = httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=30.0))
|
|
475
|
-
return self._http_client
|
|
476
|
-
|
|
477
|
-
def _get_credentials(self) -> tuple[str, str]:
|
|
478
|
-
"""Get access token and project ID, refreshing if needed."""
|
|
479
|
-
return self._oauth.ensure_valid_token()
|
|
480
|
-
|
|
481
|
-
@classmethod
|
|
482
|
-
@override
|
|
483
|
-
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
484
|
-
return cls(config)
|
|
485
|
-
|
|
486
|
-
@override
|
|
487
|
-
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
488
|
-
param = apply_config_defaults(param, self.get_llm_config())
|
|
489
|
-
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
490
|
-
|
|
491
|
-
# Get credentials
|
|
492
|
-
try:
|
|
493
|
-
access_token, project_id = self._get_credentials()
|
|
494
|
-
except Exception as e:
|
|
495
|
-
return error_llm_stream(metadata_tracker, error=str(e))
|
|
496
|
-
|
|
497
|
-
# Convert messages
|
|
498
|
-
contents = convert_history_to_contents(param.input, model_name=str(param.model_id))
|
|
499
|
-
request_body = _build_request(param, contents, project_id)
|
|
500
|
-
|
|
501
|
-
log_debug(
|
|
502
|
-
debug_json(request_body),
|
|
503
|
-
style="yellow",
|
|
504
|
-
debug_type=DebugType.LLM_PAYLOAD,
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
# Make request with retry logic
|
|
508
|
-
url = f"{self._endpoint}/v1internal:streamGenerateContent?alt=sse"
|
|
509
|
-
headers = {
|
|
510
|
-
"Authorization": f"Bearer {access_token}",
|
|
511
|
-
"Content-Type": "application/json",
|
|
512
|
-
"Accept": "text/event-stream",
|
|
513
|
-
**ANTIGRAVITY_HEADERS,
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
client = await self._get_http_client()
|
|
517
|
-
last_error: str | None = None
|
|
518
|
-
|
|
519
|
-
for attempt in range(MAX_RETRIES + 1):
|
|
520
|
-
try:
|
|
521
|
-
response = await client.post(
|
|
522
|
-
url,
|
|
523
|
-
headers=headers,
|
|
524
|
-
json=request_body,
|
|
525
|
-
)
|
|
526
|
-
|
|
527
|
-
if response.status_code == 200:
|
|
528
|
-
state = AntigravityStreamStateManager(param_model=str(param.model_id))
|
|
529
|
-
return AntigravityLLMStream(
|
|
530
|
-
response,
|
|
531
|
-
param=param,
|
|
532
|
-
metadata_tracker=metadata_tracker,
|
|
533
|
-
state=state,
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
error_text = response.text
|
|
537
|
-
last_error = f"Cloud Code Assist API error ({response.status_code}): {error_text}"
|
|
538
|
-
|
|
539
|
-
# Check if retryable
|
|
540
|
-
if attempt < MAX_RETRIES and _is_retryable_error(response.status_code, error_text):
|
|
541
|
-
delay_ms = _extract_retry_delay(error_text) or (BASE_DELAY_MS * (2**attempt))
|
|
542
|
-
await asyncio.sleep(delay_ms / 1000)
|
|
543
|
-
# Refresh token in case it expired
|
|
544
|
-
access_token, project_id = self._get_credentials()
|
|
545
|
-
headers["Authorization"] = f"Bearer {access_token}"
|
|
546
|
-
continue
|
|
547
|
-
|
|
548
|
-
break
|
|
549
|
-
|
|
550
|
-
except httpx.HTTPError as e:
|
|
551
|
-
last_error = f"HTTPError: {e}"
|
|
552
|
-
if attempt < MAX_RETRIES:
|
|
553
|
-
delay_ms = BASE_DELAY_MS * (2**attempt)
|
|
554
|
-
await asyncio.sleep(delay_ms / 1000)
|
|
555
|
-
continue
|
|
556
|
-
break
|
|
557
|
-
|
|
558
|
-
return error_llm_stream(metadata_tracker, error=last_error or "Request failed")
|