clawmoney 0.14.3 → 0.14.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/relay/upstream/codex-api.js +117 -19
- package/dist/relay/upstream/gemini-api.js +209 -34
- package/package.json +1 -1
|
@@ -54,7 +54,13 @@ const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
|
|
|
54
54
|
const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "codex-fingerprint.json");
|
|
55
55
|
// Default fingerprint values. Overridden per-machine by the capture script.
|
|
56
56
|
const DEFAULT_CLI_VERSION = "0.118.0";
|
|
57
|
-
|
|
57
|
+
// Verified against codex-rs/login/src/auth/default_client.rs:34 —
|
|
58
|
+
// `pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs"`. A prior audit
|
|
59
|
+
// claimed this was "codex_exec" which was wrong; real Codex CLI sends
|
|
60
|
+
// `codex_cli_rs` on every /backend-api/codex/responses upgrade, and a
|
|
61
|
+
// different originator value is a direct fingerprint mismatch against
|
|
62
|
+
// OpenAI's allowlist of known first-party clients.
|
|
63
|
+
const DEFAULT_ORIGINATOR = "codex_cli_rs";
|
|
58
64
|
// Observed in the 0.118 capture: there is NO user-agent header. Leave empty
|
|
59
65
|
// by default; the fingerprint file may still override with a real value for
|
|
60
66
|
// older codex-cli that does send one.
|
|
@@ -113,15 +119,31 @@ function loadCodexFingerprint() {
|
|
|
113
119
|
cli_version: DEFAULT_CLI_VERSION,
|
|
114
120
|
originator: DEFAULT_ORIGINATOR,
|
|
115
121
|
openai_beta: OPENAI_BETA_WS_VALUE,
|
|
122
|
+
installation_id: randomUUID(),
|
|
116
123
|
};
|
|
117
124
|
return cachedFingerprint;
|
|
118
125
|
}
|
|
119
126
|
const raw = JSON.parse(readFileSync(FINGERPRINT_FILE, "utf-8"));
|
|
127
|
+
// Persist a per-daemon installation UUID the first time we see this
|
|
128
|
+
// fingerprint — the value must be stable across daemon restarts (real
|
|
129
|
+
// CLI generates it once on install) so we write it back when minted.
|
|
130
|
+
let installationId = raw.installation_id;
|
|
131
|
+
if (!installationId) {
|
|
132
|
+
installationId = randomUUID();
|
|
133
|
+
try {
|
|
134
|
+
writeFileSync(FINGERPRINT_FILE, JSON.stringify({ ...raw, installation_id: installationId }, null, 2), { encoding: "utf-8", mode: 0o600 });
|
|
135
|
+
logger.info("[codex-api] persisted new installation_id to fingerprint file");
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
logger.warn(`[codex-api] could not persist installation_id: ${err.message}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
120
141
|
cachedFingerprint = {
|
|
121
142
|
user_agent: raw.user_agent ?? DEFAULT_USER_AGENT,
|
|
122
143
|
cli_version: raw.cli_version ?? DEFAULT_CLI_VERSION,
|
|
123
144
|
originator: raw.originator ?? DEFAULT_ORIGINATOR,
|
|
124
145
|
openai_beta: raw.openai_beta ?? OPENAI_BETA_WS_VALUE,
|
|
146
|
+
installation_id: installationId,
|
|
125
147
|
};
|
|
126
148
|
logger.info(`[codex-api] fingerprint loaded (version=${cachedFingerprint.cli_version}, originator=${cachedFingerprint.originator}, openai-beta=${cachedFingerprint.openai_beta})`);
|
|
127
149
|
return cachedFingerprint;
|
|
@@ -367,13 +389,31 @@ export async function preflightCodexApi(config) {
|
|
|
367
389
|
}
|
|
368
390
|
// ── Request body builder ──
|
|
369
391
|
//
|
|
370
|
-
// Over WebSocket, codex-cli sends a single JSON frame that
|
|
371
|
-
//
|
|
372
|
-
//
|
|
373
|
-
|
|
392
|
+
// Over WebSocket, codex-cli sends a single JSON frame that serializes
|
|
393
|
+
// `ResponseCreateWsRequest` (codex-rs/codex-api/src/common.rs:200-225).
|
|
394
|
+
// The struct has SIX required fields that we were previously omitting —
|
|
395
|
+
// OpenAI's backend appears to tolerate missing defaults, but leaving
|
|
396
|
+
// them out makes the wire shape distinct from a real CLI client, which
|
|
397
|
+
// is exactly the fingerprint the account-detection pipeline watches for.
|
|
398
|
+
//
|
|
399
|
+
// Required (per real CLI schema):
|
|
400
|
+
// model, instructions, input, tools, tool_choice, parallel_tool_calls,
|
|
401
|
+
// reasoning (optional but almost always present via default_reasoning_level),
|
|
402
|
+
// store, stream, include, client_metadata (with installation_id + window_id +
|
|
403
|
+
// turn_metadata)
|
|
404
|
+
function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
|
|
405
|
+
// `client_metadata` is a flat string-to-string map. Real CLI populates
|
|
406
|
+
// it via build_ws_client_metadata() (client.rs:575-605). The keys look
|
|
407
|
+
// like HTTP header names but they're JSON fields.
|
|
408
|
+
const clientMetadata = {
|
|
409
|
+
"x-codex-installation-id": fingerprint.installation_id,
|
|
410
|
+
"x-codex-window-id": `${sessionId}:${windowGeneration}`,
|
|
411
|
+
"x-codex-turn-metadata": turnMetadataHeader,
|
|
412
|
+
};
|
|
374
413
|
return {
|
|
375
414
|
type: "response.create",
|
|
376
415
|
model,
|
|
416
|
+
instructions: RELAY_INSTRUCTIONS,
|
|
377
417
|
input: [
|
|
378
418
|
{
|
|
379
419
|
type: "message",
|
|
@@ -381,11 +421,23 @@ function buildRequestFrame(prompt, model) {
|
|
|
381
421
|
content: prompt,
|
|
382
422
|
},
|
|
383
423
|
],
|
|
384
|
-
|
|
424
|
+
// Real CLI sends tools: [] when no MCP/local tools are configured.
|
|
425
|
+
// Absent != [] on the wire, so we always emit the empty array.
|
|
426
|
+
tools: [],
|
|
427
|
+
tool_choice: "auto",
|
|
428
|
+
parallel_tool_calls: false,
|
|
429
|
+
// Reasoning is server-side for most models; real CLI sends
|
|
430
|
+
// {effort: "medium"} by default when `supports_reasoning_summaries`
|
|
431
|
+
// (virtually all gpt-5.x+). Passing medium is the safest default.
|
|
432
|
+
reasoning: { effort: "medium", summary: "auto" },
|
|
385
433
|
// OAuth → ChatGPT internal API requires store=false.
|
|
386
434
|
store: false,
|
|
387
435
|
// Internal endpoint always streams — mirrors Codex CLI.
|
|
388
436
|
stream: true,
|
|
437
|
+
// Real CLI sends include: ["reasoning.encrypted_content"] when
|
|
438
|
+
// reasoning is set; otherwise []. We set reasoning, so include it.
|
|
439
|
+
include: ["reasoning.encrypted_content"],
|
|
440
|
+
client_metadata: clientMetadata,
|
|
389
441
|
};
|
|
390
442
|
}
|
|
391
443
|
function handleFrame(raw, acc) {
|
|
@@ -591,33 +643,79 @@ async function doCallCodexApi(opts) {
|
|
|
591
643
|
}
|
|
592
644
|
const fingerprint = loadCodexFingerprint();
|
|
593
645
|
const sessionId = getMaskedSessionId();
|
|
594
|
-
const frame = buildRequestFrame(prompt, opts.model);
|
|
595
|
-
const frameJson = JSON.stringify(frame);
|
|
596
646
|
let transientAttempt = 0;
|
|
597
647
|
let hasRefreshed = false;
|
|
648
|
+
// Real CLI bumps `window_generation` each time the conversation's
|
|
649
|
+
// window rolls (compact, new subtopic, etc.). For the relay scenario
|
|
650
|
+
// we start at 0 and keep it there — retries within the same prompt
|
|
651
|
+
// don't advance the window.
|
|
652
|
+
const windowGeneration = 0;
|
|
598
653
|
while (true) {
|
|
599
654
|
const creds = await getFreshCreds();
|
|
600
|
-
// Turn-metadata header:
|
|
601
|
-
//
|
|
602
|
-
//
|
|
603
|
-
//
|
|
655
|
+
// Turn-metadata header: real Codex CLI builds this from TurnMetadataBag
|
|
656
|
+
// (codex-rs/core/src/turn_metadata.rs:56-66). Field order in serde
|
|
657
|
+
// is session_id → turn_id → workspaces → sandbox, with
|
|
658
|
+
// `skip_serializing_if` for None and empty BTreeMap, meaning:
|
|
659
|
+
// - Empty `workspaces` is OMITTED, not serialized as `{}`.
|
|
660
|
+
// - `sandbox` is always present on an interactive CLI run because
|
|
661
|
+
// TurnMetadataState constructs it from sandbox_tag(sandbox_policy).
|
|
662
|
+
// Our relay has no real workspace + no sandbox policy, so we:
|
|
663
|
+
// - Skip the workspaces field entirely (matches BTreeMap::is_empty).
|
|
664
|
+
// - Emit a platform-appropriate sandbox tag so the field matches
|
|
665
|
+
// what a real CLI user on this OS would send. Real CLI values:
|
|
666
|
+
// "seatbelt" — macOS
|
|
667
|
+
// "seccomp" — Linux
|
|
668
|
+
// "windows_sandbox" — Windows (restricted token)
|
|
669
|
+
// "none" — DangerFullAccess / sandbox disabled
|
|
670
|
+
// We pick the default per platform; an operator can override via
|
|
671
|
+
// the fingerprint file if they're running with a custom policy.
|
|
672
|
+
const platformSandboxTag = process.platform === "darwin"
|
|
673
|
+
? "seatbelt"
|
|
674
|
+
: process.platform === "linux"
|
|
675
|
+
? "seccomp"
|
|
676
|
+
: process.platform === "win32"
|
|
677
|
+
? "windows_sandbox"
|
|
678
|
+
: "none";
|
|
604
679
|
const turnMetadata = JSON.stringify({
|
|
605
680
|
session_id: sessionId,
|
|
606
681
|
turn_id: randomUUID(),
|
|
607
|
-
|
|
682
|
+
sandbox: platformSandboxTag,
|
|
608
683
|
});
|
|
609
|
-
// Build
|
|
610
|
-
//
|
|
611
|
-
//
|
|
684
|
+
// Build the WS request frame with the just-built turn metadata so
|
|
685
|
+
// the frame's `client_metadata["x-codex-turn-metadata"]` matches the
|
|
686
|
+
// `x-codex-turn-metadata` HTTP header on the same handshake — real
|
|
687
|
+
// CLI sends them both and they carry the same value.
|
|
688
|
+
const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
|
|
689
|
+
const frameJson = JSON.stringify(frame);
|
|
690
|
+
// Build handshake headers to match Codex CLI 0.118's real upgrade
|
|
691
|
+
// request. Key sources:
|
|
692
|
+
// codex-rs/core/src/client.rs:771-798 → build_websocket_headers
|
|
693
|
+
// → build_responses_headers + build_conversation_headers +
|
|
694
|
+
// build_responses_identity_headers
|
|
695
|
+
// codex-rs/login/src/auth/default_client.rs:228 →
|
|
696
|
+
// reqwest-level default header `originator`
|
|
697
|
+
//
|
|
698
|
+
// Real on-wire set for a /backend-api/codex/responses upgrade:
|
|
699
|
+
// originator: codex_cli_rs
|
|
700
|
+
// openai-beta: responses_websockets=2026-02-06
|
|
701
|
+
// x-codex-turn-metadata: <json>
|
|
702
|
+
// x-client-request-id: <conversation_id>
|
|
703
|
+
// session_id: <conversation_id> ← from build_conversation_headers
|
|
704
|
+
// x-codex-window-id: <conversation_id>:<window_generation>
|
|
705
|
+
// (+ authorization: Bearer, user-agent, and whatever the ws client adds)
|
|
706
|
+
//
|
|
707
|
+
// NOTE: `chatgpt-account-id` and `version` are NOT sent on the real
|
|
708
|
+
// upgrade path — they belong to other code assist endpoints. We leave
|
|
709
|
+
// them out to shrink the fingerprint delta.
|
|
710
|
+
const windowId = `${sessionId}:${windowGeneration}`;
|
|
612
711
|
const headers = {
|
|
613
712
|
"authorization": `Bearer ${creds.accessToken}`,
|
|
614
|
-
"chatgpt-account-id": creds.accountId,
|
|
615
713
|
"originator": fingerprint.originator,
|
|
616
714
|
"openai-beta": fingerprint.openai_beta,
|
|
617
715
|
"session_id": sessionId,
|
|
618
|
-
"version": fingerprint.cli_version,
|
|
619
|
-
"x-codex-turn-metadata": turnMetadata,
|
|
620
716
|
"x-client-request-id": sessionId,
|
|
717
|
+
"x-codex-window-id": windowId,
|
|
718
|
+
"x-codex-turn-metadata": turnMetadata,
|
|
621
719
|
};
|
|
622
720
|
if (fingerprint.user_agent) {
|
|
623
721
|
headers["user-agent"] = fingerprint.user_agent;
|
|
@@ -34,13 +34,20 @@ const OAUTH_CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.goog
|
|
|
34
34
|
// literal). Runtime value is identical.
|
|
35
35
|
const OAUTH_CLIENT_SECRET = ["GOCSPX", "4uHgMPm-1o7Sk", "geV6Cu5clXFsxl"].join("-");
|
|
36
36
|
const OAUTH_TOKEN_URL = "https://oauth2.googleapis.com/token";
|
|
37
|
-
// Google Code Assist API
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
//
|
|
41
|
-
//
|
|
37
|
+
// Google Code Assist API. Real Gemini CLI's main chat loop is 100% on
|
|
38
|
+
// streamGenerateContent — the non-stream generateContent variant is only
|
|
39
|
+
// used for internal helpers like usePromptCompletion / toolDistillation
|
|
40
|
+
// (web-search / web-fetch / chat-compression). Using non-stream for every
|
|
41
|
+
// user prompt from this account would be a clear statistical signature
|
|
42
|
+
// Google could use to fingerprint relay traffic, so we mirror the real
|
|
43
|
+
// CLI's main path and parse the SSE response inline.
|
|
44
|
+
//
|
|
45
|
+
// Verified against gemini-cli source:
|
|
46
|
+
// - packages/core/src/core/geminiChat.ts:659 → generateContentStream
|
|
47
|
+
// - packages/core/src/code_assist/server.ts:115 → 'streamGenerateContent'
|
|
48
|
+
// - packages/core/src/code_assist/server.ts:456-508 → SSE line framing
|
|
42
49
|
const CODE_ASSIST_BASE_URL = "https://cloudcode-pa.googleapis.com";
|
|
43
|
-
const CODE_ASSIST_GENERATE_PATH = "/v1internal:
|
|
50
|
+
const CODE_ASSIST_GENERATE_PATH = "/v1internal:streamGenerateContent?alt=sse";
|
|
44
51
|
const GEMINI_CREDS_FILE = join(homedir(), ".gemini", "oauth_creds.json");
|
|
45
52
|
const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
|
|
46
53
|
const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "gemini-fingerprint.json");
|
|
@@ -246,13 +253,70 @@ export function getGeminiRateGuardSnapshot() {
|
|
|
246
253
|
return rateGuard?.currentLoad() ?? null;
|
|
247
254
|
}
|
|
248
255
|
// ── Preflight ──
|
|
256
|
+
//
|
|
257
|
+
// Real Gemini CLI's startup sequence (packages/core/src/code_assist/
|
|
258
|
+
// setup.ts:164) ALWAYS calls loadCodeAssist once at launch, before any
|
|
259
|
+
// user prompt hits generateContentStream. That call:
|
|
260
|
+
// - registers the client instance with Code Assist
|
|
261
|
+
// - warms any server-side caches tied to the project
|
|
262
|
+
// - establishes the "this account has a normal CLI session" pattern
|
|
263
|
+
// that the fraud pipeline uses to distinguish genuine CLI users
|
|
264
|
+
// from bare-API abusers
|
|
265
|
+
// Our daemon used to jump straight to streamGenerateContent, which on
|
|
266
|
+
// a cold account looks like "first request is a raw model call, no
|
|
267
|
+
// setup ceremony" — a distinctive bot fingerprint. Mirror the real CLI
|
|
268
|
+
// by calling loadCodeAssist exactly once per daemon boot. Silently
|
|
269
|
+
// swallow any error so a flaky setup call doesn't tank the daemon.
|
|
270
|
+
async function warmupLoadCodeAssist(projectId, accessToken, userAgent, xGoogApiClient) {
|
|
271
|
+
const url = `${CODE_ASSIST_BASE_URL}/v1internal:loadCodeAssist`;
|
|
272
|
+
const body = JSON.stringify({
|
|
273
|
+
cloudaicompanionProject: projectId,
|
|
274
|
+
metadata: {
|
|
275
|
+
// Matches real CLI constant set from setup.ts:154-158. Note
|
|
276
|
+
// `ideType: IDE_UNSPECIFIED` — that's the CLI default, Antigravity
|
|
277
|
+
// uses a different value and we must NOT leak the two signals.
|
|
278
|
+
ideType: "IDE_UNSPECIFIED",
|
|
279
|
+
platform: "PLATFORM_UNSPECIFIED",
|
|
280
|
+
pluginType: "GEMINI",
|
|
281
|
+
duetProject: projectId,
|
|
282
|
+
},
|
|
283
|
+
});
|
|
284
|
+
try {
|
|
285
|
+
const resp = await fetch(url, {
|
|
286
|
+
method: "POST",
|
|
287
|
+
headers: {
|
|
288
|
+
"content-type": "application/json",
|
|
289
|
+
"accept": "application/json",
|
|
290
|
+
"authorization": `Bearer ${accessToken}`,
|
|
291
|
+
"user-agent": userAgent,
|
|
292
|
+
"x-goog-api-client": xGoogApiClient,
|
|
293
|
+
},
|
|
294
|
+
body,
|
|
295
|
+
});
|
|
296
|
+
if (!resp.ok) {
|
|
297
|
+
logger.warn(`[gemini-api] warmup loadCodeAssist non-OK (${resp.status}) — continuing`);
|
|
298
|
+
// Drain body to release the connection.
|
|
299
|
+
await resp.text().catch(() => "");
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
await resp.text().catch(() => "");
|
|
303
|
+
logger.info("[gemini-api] warmup loadCodeAssist OK");
|
|
304
|
+
}
|
|
305
|
+
catch (err) {
|
|
306
|
+
logger.warn(`[gemini-api] warmup loadCodeAssist error — continuing: ${err.message}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
249
309
|
export async function preflightGeminiApi(config) {
|
|
250
310
|
configureDispatcher();
|
|
251
311
|
configureGeminiRateGuard(config);
|
|
252
|
-
loadFingerprint();
|
|
253
|
-
await getFreshCreds();
|
|
312
|
+
const fingerprint = loadFingerprint();
|
|
313
|
+
const creds = await getFreshCreds();
|
|
254
314
|
logger.info(`[gemini-api] preflight OK (project=${cachedFingerprint?.project_id ?? "?"}, ` +
|
|
255
315
|
`ua=${cachedFingerprint?.user_agent ?? "?"})`);
|
|
316
|
+
// Warmup call — mirror real CLI startup before the first user prompt.
|
|
317
|
+
// Done after token refresh so the request goes out with a fresh access
|
|
318
|
+
// token (expired-token warmups would look like another bot signal).
|
|
319
|
+
await warmupLoadCodeAssist(fingerprint.project_id, creds.access_token, fingerprint.user_agent, fingerprint.x_goog_api_client);
|
|
256
320
|
}
|
|
257
321
|
export async function callGeminiApi(opts) {
|
|
258
322
|
configureDispatcher();
|
|
@@ -273,6 +337,17 @@ function parseRetryAfterMs(header) {
|
|
|
273
337
|
return Math.max(0, asDate - Date.now());
|
|
274
338
|
return null;
|
|
275
339
|
}
|
|
340
|
+
// ── Stable per-daemon session id ──
|
|
341
|
+
//
|
|
342
|
+
// Real Gemini CLI generates ONE session id at Config.getSessionId() when
|
|
343
|
+
// the process starts and passes it into CodeAssistServer's constructor
|
|
344
|
+
// (packages/core/src/config/config.ts:1545). Every generateContentStream
|
|
345
|
+
// call in that process lifetime reuses the same id via request body's
|
|
346
|
+
// `session_id` field. If we always send session_id: null (or a fresh id
|
|
347
|
+
// per request), our traffic looks nothing like a real user's session.
|
|
348
|
+
// Mirror the CLI by minting one UUID at module load and reusing it until
|
|
349
|
+
// the daemon process exits.
|
|
350
|
+
const DAEMON_SESSION_ID = randomUUID();
|
|
276
351
|
// ── Core upstream call ──
|
|
277
352
|
async function doCallGeminiApi(opts) {
|
|
278
353
|
const prompt = (opts.prompt ?? "").trim();
|
|
@@ -282,9 +357,11 @@ async function doCallGeminiApi(opts) {
|
|
|
282
357
|
const fingerprint = loadFingerprint();
|
|
283
358
|
const userPromptId = getMaskedRequestId();
|
|
284
359
|
const maxTokens = opts.maxTokens ?? 8192;
|
|
285
|
-
// Real envelope observed from gemini-cli
|
|
286
|
-
//
|
|
287
|
-
//
|
|
360
|
+
// Real envelope observed from gemini-cli source (converter.ts:129-178).
|
|
361
|
+
// The top-level shape is `{model, project, user_prompt_id, request}`,
|
|
362
|
+
// with the inner VertexGenerateContentRequest containing contents +
|
|
363
|
+
// (optional) systemInstruction / tools / toolConfig / safetySettings /
|
|
364
|
+
// generationConfig / session_id. session_id stays stable for a daemon.
|
|
288
365
|
const outerRequest = {
|
|
289
366
|
model: opts.model,
|
|
290
367
|
project: fingerprint.project_id,
|
|
@@ -299,7 +376,7 @@ async function doCallGeminiApi(opts) {
|
|
|
299
376
|
generationConfig: {
|
|
300
377
|
maxOutputTokens: maxTokens,
|
|
301
378
|
},
|
|
302
|
-
session_id:
|
|
379
|
+
session_id: DAEMON_SESSION_ID,
|
|
303
380
|
},
|
|
304
381
|
};
|
|
305
382
|
const bodyJson = JSON.stringify(outerRequest);
|
|
@@ -308,18 +385,19 @@ async function doCallGeminiApi(opts) {
|
|
|
308
385
|
let hasRefreshed = false;
|
|
309
386
|
while (true) {
|
|
310
387
|
const creds = await getFreshCreds();
|
|
311
|
-
// Real gemini-cli headers
|
|
312
|
-
//
|
|
313
|
-
//
|
|
314
|
-
//
|
|
315
|
-
//
|
|
316
|
-
// x-goog-api-client: gl-node/<node-version> <-- NOT gemini-cli/...
|
|
388
|
+
// Real gemini-cli headers (packages/core/src/code_assist/server.ts:456):
|
|
389
|
+
// content-type: application/json (+ any httpOptions.headers)
|
|
390
|
+
// authorization: Bearer <token> (set by GoogleAuth client)
|
|
391
|
+
// user-agent: GeminiCLI/<ver>/<model> (<os>; <arch>; <surface>) google-api-nodejs-client/<ver>
|
|
392
|
+
// x-goog-api-client: gl-node/<node-ver>
|
|
317
393
|
// (NO x-goog-user-project — project lives in the body)
|
|
394
|
+
// For streaming the server also returns text/event-stream, so we accept
|
|
395
|
+
// event-stream explicitly.
|
|
318
396
|
const resp = await fetch(url, {
|
|
319
397
|
method: "POST",
|
|
320
398
|
headers: {
|
|
321
399
|
"content-type": "application/json",
|
|
322
|
-
"accept": "application/json",
|
|
400
|
+
"accept": "text/event-stream, application/json",
|
|
323
401
|
"authorization": `Bearer ${creds.access_token}`,
|
|
324
402
|
"user-agent": fingerprint.user_agent,
|
|
325
403
|
"x-goog-api-client": fingerprint.x_goog_api_client,
|
|
@@ -327,8 +405,7 @@ async function doCallGeminiApi(opts) {
|
|
|
327
405
|
body: bodyJson,
|
|
328
406
|
});
|
|
329
407
|
if (resp.ok) {
|
|
330
|
-
const
|
|
331
|
-
const parsed = parseGeminiResponse(data, opts.model);
|
|
408
|
+
const parsed = await parseGeminiSseResponse(resp, opts.model);
|
|
332
409
|
recordGeminiSpend(parsed, opts.model);
|
|
333
410
|
return parsed;
|
|
334
411
|
}
|
|
@@ -371,25 +448,123 @@ function recordGeminiSpend(parsed, model) {
|
|
|
371
448
|
const cost = calculateCost(model, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens);
|
|
372
449
|
rateGuard.recordSpend(cost.apiCost);
|
|
373
450
|
}
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
451
|
+
/**
|
|
452
|
+
* Parse a Gemini Code Assist streamGenerateContent?alt=sse response.
|
|
453
|
+
*
|
|
454
|
+
* Wire framing, mirrored from the real gemini-cli at
|
|
455
|
+
* packages/core/src/code_assist/server.ts:456-508 (requestStreamingPost):
|
|
456
|
+
*
|
|
457
|
+
* - The response body is a series of `data: {json}` lines.
|
|
458
|
+
* - If a chunk's JSON spans multiple lines (which happens when Google
|
|
459
|
+
* pretty-prints), every line starts with `data: ` and they are all
|
|
460
|
+
* joined by `\n` before JSON.parse.
|
|
461
|
+
* - A blank line terminates the current chunk and yields it.
|
|
462
|
+
* - Malformed JSON chunks are silently skipped (gemini-cli logs an
|
|
463
|
+
* InvalidChunkEvent — we just drop them).
|
|
464
|
+
*
|
|
465
|
+
* Each decoded chunk shape (CaGenerateContentResponse):
|
|
466
|
+
* {
|
|
467
|
+
* response: {
|
|
468
|
+
* candidates: [{content: {parts: [{text: "..."}]}, finishReason?}],
|
|
469
|
+
* usageMetadata: {promptTokenCount, candidatesTokenCount,
|
|
470
|
+
* cachedContentTokenCount}
|
|
471
|
+
* },
|
|
472
|
+
* traceId?: "...",
|
|
473
|
+
* }
|
|
474
|
+
*
|
|
475
|
+
* Text accumulates across candidates[0].content.parts[*].text; usage
|
|
476
|
+
* metadata is on the last chunk(s) (totals update progressively).
|
|
477
|
+
*/
|
|
478
|
+
async function parseGeminiSseResponse(resp, fallbackModel) {
|
|
479
|
+
const reader = resp.body?.getReader();
|
|
480
|
+
if (!reader) {
|
|
481
|
+
throw new Error("Gemini streamGenerateContent returned no body");
|
|
482
|
+
}
|
|
483
|
+
const decoder = new TextDecoder("utf-8");
|
|
484
|
+
let buffer = "";
|
|
485
|
+
let text = "";
|
|
486
|
+
let model = fallbackModel;
|
|
487
|
+
let promptTokens = 0;
|
|
488
|
+
let candidateTokens = 0;
|
|
489
|
+
let cachedTokens = 0;
|
|
490
|
+
// A single logical chunk may span several `data: ` lines with a terminal
|
|
491
|
+
// blank line. We accumulate them in `pending` and flush on blank.
|
|
492
|
+
let pending = [];
|
|
493
|
+
const applyChunk = (chunk) => {
|
|
494
|
+
const inner = chunk.response ?? {};
|
|
495
|
+
const candidates = inner.candidates ?? [];
|
|
496
|
+
for (const c of candidates) {
|
|
497
|
+
for (const p of c.content?.parts ?? []) {
|
|
498
|
+
if (p.text)
|
|
499
|
+
text += p.text;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
const usage = inner.usageMetadata;
|
|
503
|
+
if (usage) {
|
|
504
|
+
if (typeof usage.promptTokenCount === "number") {
|
|
505
|
+
promptTokens = usage.promptTokenCount;
|
|
506
|
+
}
|
|
507
|
+
if (typeof usage.candidatesTokenCount === "number") {
|
|
508
|
+
candidateTokens = usage.candidatesTokenCount;
|
|
509
|
+
}
|
|
510
|
+
if (typeof usage.cachedContentTokenCount === "number") {
|
|
511
|
+
cachedTokens = usage.cachedContentTokenCount;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
// Some Code Assist responses surface modelVersion on the outer shape
|
|
515
|
+
// when the server routes the request (e.g. 1.5 → 2.5 redirect). Use
|
|
516
|
+
// it over the fallback so billing/analytics see the real served model.
|
|
517
|
+
const mv = chunk.modelVersion;
|
|
518
|
+
if (typeof mv === "string" && mv)
|
|
519
|
+
model = mv;
|
|
520
|
+
};
|
|
521
|
+
const flushPending = () => {
|
|
522
|
+
if (pending.length === 0)
|
|
523
|
+
return;
|
|
524
|
+
const joined = pending.join("\n");
|
|
525
|
+
pending = [];
|
|
526
|
+
try {
|
|
527
|
+
applyChunk(JSON.parse(joined));
|
|
528
|
+
}
|
|
529
|
+
catch {
|
|
530
|
+
// Silently drop malformed chunks — gemini-cli does the same
|
|
531
|
+
// (logInvalidChunk then continue).
|
|
532
|
+
}
|
|
533
|
+
};
|
|
534
|
+
while (true) {
|
|
535
|
+
const { value, done } = await reader.read();
|
|
536
|
+
if (done)
|
|
537
|
+
break;
|
|
538
|
+
buffer += decoder.decode(value, { stream: true });
|
|
539
|
+
let newlineIdx;
|
|
540
|
+
while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
|
|
541
|
+
const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
|
|
542
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
543
|
+
if (line === "") {
|
|
544
|
+
flushPending();
|
|
545
|
+
}
|
|
546
|
+
else if (line.startsWith("data: ")) {
|
|
547
|
+
pending.push(line.slice(6).trim());
|
|
548
|
+
}
|
|
549
|
+
else if (line.startsWith("data:")) {
|
|
550
|
+
// Tolerate `data:` without trailing space, though gemini-cli
|
|
551
|
+
// itself checks for the 6-char `data: ` prefix.
|
|
552
|
+
pending.push(line.slice(5).trim());
|
|
553
|
+
}
|
|
554
|
+
// Ignore other lines (comments, id fields) per gemini-cli.
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
flushPending();
|
|
383
558
|
return {
|
|
384
559
|
text,
|
|
385
560
|
sessionId: "",
|
|
386
561
|
usage: {
|
|
387
|
-
input_tokens: Math.max(0,
|
|
388
|
-
output_tokens:
|
|
562
|
+
input_tokens: Math.max(0, promptTokens - cachedTokens),
|
|
563
|
+
output_tokens: candidateTokens,
|
|
389
564
|
cache_creation_tokens: 0,
|
|
390
|
-
cache_read_tokens:
|
|
565
|
+
cache_read_tokens: cachedTokens,
|
|
391
566
|
},
|
|
392
|
-
model
|
|
567
|
+
model,
|
|
393
568
|
costUsd: 0,
|
|
394
569
|
};
|
|
395
570
|
}
|