clawmoney 0.14.3 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/relay/upstream/codex-api.js +117 -19
- package/dist/relay/upstream/gemini-api.js +150 -32
- package/package.json +1 -1
|
@@ -54,7 +54,13 @@ const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
|
|
|
54
54
|
const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "codex-fingerprint.json");
|
|
55
55
|
// Default fingerprint values. Overridden per-machine by the capture script.
|
|
56
56
|
const DEFAULT_CLI_VERSION = "0.118.0";
|
|
57
|
-
|
|
57
|
+
// Verified against codex-rs/login/src/auth/default_client.rs:34 —
|
|
58
|
+
// `pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs"`. A prior audit
|
|
59
|
+
// claimed this was "codex_exec" which was wrong; real Codex CLI sends
|
|
60
|
+
// `codex_cli_rs` on every /backend-api/codex/responses upgrade, and a
|
|
61
|
+
// different originator value is a direct fingerprint mismatch against
|
|
62
|
+
// OpenAI's allowlist of known first-party clients.
|
|
63
|
+
const DEFAULT_ORIGINATOR = "codex_cli_rs";
|
|
58
64
|
// Observed in the 0.118 capture: there is NO user-agent header. Leave empty
|
|
59
65
|
// by default; the fingerprint file may still override with a real value for
|
|
60
66
|
// older codex-cli that does send one.
|
|
@@ -113,15 +119,31 @@ function loadCodexFingerprint() {
|
|
|
113
119
|
cli_version: DEFAULT_CLI_VERSION,
|
|
114
120
|
originator: DEFAULT_ORIGINATOR,
|
|
115
121
|
openai_beta: OPENAI_BETA_WS_VALUE,
|
|
122
|
+
installation_id: randomUUID(),
|
|
116
123
|
};
|
|
117
124
|
return cachedFingerprint;
|
|
118
125
|
}
|
|
119
126
|
const raw = JSON.parse(readFileSync(FINGERPRINT_FILE, "utf-8"));
|
|
127
|
+
// Persist a per-daemon installation UUID the first time we see this
|
|
128
|
+
// fingerprint — the value must be stable across daemon restarts (real
|
|
129
|
+
// CLI generates it once on install) so we write it back when minted.
|
|
130
|
+
let installationId = raw.installation_id;
|
|
131
|
+
if (!installationId) {
|
|
132
|
+
installationId = randomUUID();
|
|
133
|
+
try {
|
|
134
|
+
writeFileSync(FINGERPRINT_FILE, JSON.stringify({ ...raw, installation_id: installationId }, null, 2), { encoding: "utf-8", mode: 0o600 });
|
|
135
|
+
logger.info("[codex-api] persisted new installation_id to fingerprint file");
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
logger.warn(`[codex-api] could not persist installation_id: ${err.message}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
120
141
|
cachedFingerprint = {
|
|
121
142
|
user_agent: raw.user_agent ?? DEFAULT_USER_AGENT,
|
|
122
143
|
cli_version: raw.cli_version ?? DEFAULT_CLI_VERSION,
|
|
123
144
|
originator: raw.originator ?? DEFAULT_ORIGINATOR,
|
|
124
145
|
openai_beta: raw.openai_beta ?? OPENAI_BETA_WS_VALUE,
|
|
146
|
+
installation_id: installationId,
|
|
125
147
|
};
|
|
126
148
|
logger.info(`[codex-api] fingerprint loaded (version=${cachedFingerprint.cli_version}, originator=${cachedFingerprint.originator}, openai-beta=${cachedFingerprint.openai_beta})`);
|
|
127
149
|
return cachedFingerprint;
|
|
@@ -367,13 +389,31 @@ export async function preflightCodexApi(config) {
|
|
|
367
389
|
}
|
|
368
390
|
// ── Request body builder ──
|
|
369
391
|
//
|
|
370
|
-
// Over WebSocket, codex-cli sends a single JSON frame that
|
|
371
|
-
//
|
|
372
|
-
//
|
|
373
|
-
|
|
392
|
+
// Over WebSocket, codex-cli sends a single JSON frame that serializes
|
|
393
|
+
// `ResponseCreateWsRequest` (codex-rs/codex-api/src/common.rs:200-225).
|
|
394
|
+
// The struct has SIX required fields that we were previously omitting —
|
|
395
|
+
// OpenAI's backend appears to tolerate missing defaults, but leaving
|
|
396
|
+
// them out makes the wire shape distinct from a real CLI client, which
|
|
397
|
+
// is exactly the fingerprint the account-detection pipeline watches for.
|
|
398
|
+
//
|
|
399
|
+
// Required (per real CLI schema):
|
|
400
|
+
// model, instructions, input, tools, tool_choice, parallel_tool_calls,
|
|
401
|
+
// reasoning (optional but almost always present via default_reasoning_level),
|
|
402
|
+
// store, stream, include, client_metadata (with installation_id + window_id +
|
|
403
|
+
// turn_metadata)
|
|
404
|
+
function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
|
|
405
|
+
// `client_metadata` is a flat string-to-string map. Real CLI populates
|
|
406
|
+
// it via build_ws_client_metadata() (client.rs:575-605). The keys look
|
|
407
|
+
// like HTTP header names but they're JSON fields.
|
|
408
|
+
const clientMetadata = {
|
|
409
|
+
"x-codex-installation-id": fingerprint.installation_id,
|
|
410
|
+
"x-codex-window-id": `${sessionId}:${windowGeneration}`,
|
|
411
|
+
"x-codex-turn-metadata": turnMetadataHeader,
|
|
412
|
+
};
|
|
374
413
|
return {
|
|
375
414
|
type: "response.create",
|
|
376
415
|
model,
|
|
416
|
+
instructions: RELAY_INSTRUCTIONS,
|
|
377
417
|
input: [
|
|
378
418
|
{
|
|
379
419
|
type: "message",
|
|
@@ -381,11 +421,23 @@ function buildRequestFrame(prompt, model) {
|
|
|
381
421
|
content: prompt,
|
|
382
422
|
},
|
|
383
423
|
],
|
|
384
|
-
|
|
424
|
+
// Real CLI sends tools: [] when no MCP/local tools are configured.
|
|
425
|
+
// Absent != [] on the wire, so we always emit the empty array.
|
|
426
|
+
tools: [],
|
|
427
|
+
tool_choice: "auto",
|
|
428
|
+
parallel_tool_calls: false,
|
|
429
|
+
// Reasoning is server-side for most models; real CLI sends
|
|
430
|
+
// {effort: "medium"} by default when `supports_reasoning_summaries`
|
|
431
|
+
// (virtually all gpt-5.x+). Passing medium is the safest default.
|
|
432
|
+
reasoning: { effort: "medium", summary: "auto" },
|
|
385
433
|
// OAuth → ChatGPT internal API requires store=false.
|
|
386
434
|
store: false,
|
|
387
435
|
// Internal endpoint always streams — mirrors Codex CLI.
|
|
388
436
|
stream: true,
|
|
437
|
+
// Real CLI sends include: ["reasoning.encrypted_content"] when
|
|
438
|
+
// reasoning is set; otherwise []. We set reasoning, so include it.
|
|
439
|
+
include: ["reasoning.encrypted_content"],
|
|
440
|
+
client_metadata: clientMetadata,
|
|
389
441
|
};
|
|
390
442
|
}
|
|
391
443
|
function handleFrame(raw, acc) {
|
|
@@ -591,33 +643,79 @@ async function doCallCodexApi(opts) {
|
|
|
591
643
|
}
|
|
592
644
|
const fingerprint = loadCodexFingerprint();
|
|
593
645
|
const sessionId = getMaskedSessionId();
|
|
594
|
-
const frame = buildRequestFrame(prompt, opts.model);
|
|
595
|
-
const frameJson = JSON.stringify(frame);
|
|
596
646
|
let transientAttempt = 0;
|
|
597
647
|
let hasRefreshed = false;
|
|
648
|
+
// Real CLI bumps `window_generation` each time the conversation's
|
|
649
|
+
// window rolls (compact, new subtopic, etc.). For the relay scenario
|
|
650
|
+
// we start at 0 and keep it there — retries within the same prompt
|
|
651
|
+
// don't advance the window.
|
|
652
|
+
const windowGeneration = 0;
|
|
598
653
|
while (true) {
|
|
599
654
|
const creds = await getFreshCreds();
|
|
600
|
-
// Turn-metadata header:
|
|
601
|
-
//
|
|
602
|
-
//
|
|
603
|
-
//
|
|
655
|
+
// Turn-metadata header: real Codex CLI builds this from TurnMetadataBag
|
|
656
|
+
// (codex-rs/core/src/turn_metadata.rs:56-66). Field order in serde
|
|
657
|
+
// is session_id → turn_id → workspaces → sandbox, with
|
|
658
|
+
// `skip_serializing_if` for None and empty BTreeMap, meaning:
|
|
659
|
+
// - Empty `workspaces` is OMITTED, not serialized as `{}`.
|
|
660
|
+
// - `sandbox` is always present on an interactive CLI run because
|
|
661
|
+
// TurnMetadataState constructs it from sandbox_tag(sandbox_policy).
|
|
662
|
+
// Our relay has no real workspace + no sandbox policy, so we:
|
|
663
|
+
// - Skip the workspaces field entirely (matches BTreeMap::is_empty).
|
|
664
|
+
// - Emit a platform-appropriate sandbox tag so the field matches
|
|
665
|
+
// what a real CLI user on this OS would send. Real CLI values:
|
|
666
|
+
// "seatbelt" — macOS
|
|
667
|
+
// "seccomp" — Linux
|
|
668
|
+
// "windows_sandbox" — Windows (restricted token)
|
|
669
|
+
// "none" — DangerFullAccess / sandbox disabled
|
|
670
|
+
// We pick the default per platform; an operator can override via
|
|
671
|
+
// the fingerprint file if they're running with a custom policy.
|
|
672
|
+
const platformSandboxTag = process.platform === "darwin"
|
|
673
|
+
? "seatbelt"
|
|
674
|
+
: process.platform === "linux"
|
|
675
|
+
? "seccomp"
|
|
676
|
+
: process.platform === "win32"
|
|
677
|
+
? "windows_sandbox"
|
|
678
|
+
: "none";
|
|
604
679
|
const turnMetadata = JSON.stringify({
|
|
605
680
|
session_id: sessionId,
|
|
606
681
|
turn_id: randomUUID(),
|
|
607
|
-
|
|
682
|
+
sandbox: platformSandboxTag,
|
|
608
683
|
});
|
|
609
|
-
// Build
|
|
610
|
-
//
|
|
611
|
-
//
|
|
684
|
+
// Build the WS request frame with the just-built turn metadata so
|
|
685
|
+
// the frame's `client_metadata["x-codex-turn-metadata"]` matches the
|
|
686
|
+
// `x-codex-turn-metadata` HTTP header on the same handshake — real
|
|
687
|
+
// CLI sends them both and they carry the same value.
|
|
688
|
+
const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
|
|
689
|
+
const frameJson = JSON.stringify(frame);
|
|
690
|
+
// Build handshake headers to match Codex CLI 0.118's real upgrade
|
|
691
|
+
// request. Key sources:
|
|
692
|
+
// codex-rs/core/src/client.rs:771-798 → build_websocket_headers
|
|
693
|
+
// → build_responses_headers + build_conversation_headers +
|
|
694
|
+
// build_responses_identity_headers
|
|
695
|
+
// codex-rs/login/src/auth/default_client.rs:228 →
|
|
696
|
+
// reqwest-level default header `originator`
|
|
697
|
+
//
|
|
698
|
+
// Real on-wire set for a /backend-api/codex/responses upgrade:
|
|
699
|
+
// originator: codex_cli_rs
|
|
700
|
+
// openai-beta: responses_websockets=2026-02-06
|
|
701
|
+
// x-codex-turn-metadata: <json>
|
|
702
|
+
// x-client-request-id: <conversation_id>
|
|
703
|
+
// session_id: <conversation_id> ← from build_conversation_headers
|
|
704
|
+
// x-codex-window-id: <conversation_id>:<window_generation>
|
|
705
|
+
// (+ authorization: Bearer, user-agent, and whatever the ws client adds)
|
|
706
|
+
//
|
|
707
|
+
// NOTE: `chatgpt-account-id` and `version` are NOT sent on the real
|
|
708
|
+
// upgrade path — they belong to other code assist endpoints. We leave
|
|
709
|
+
// them out to shrink the fingerprint delta.
|
|
710
|
+
const windowId = `${sessionId}:${windowGeneration}`;
|
|
612
711
|
const headers = {
|
|
613
712
|
"authorization": `Bearer ${creds.accessToken}`,
|
|
614
|
-
"chatgpt-account-id": creds.accountId,
|
|
615
713
|
"originator": fingerprint.originator,
|
|
616
714
|
"openai-beta": fingerprint.openai_beta,
|
|
617
715
|
"session_id": sessionId,
|
|
618
|
-
"version": fingerprint.cli_version,
|
|
619
|
-
"x-codex-turn-metadata": turnMetadata,
|
|
620
716
|
"x-client-request-id": sessionId,
|
|
717
|
+
"x-codex-window-id": windowId,
|
|
718
|
+
"x-codex-turn-metadata": turnMetadata,
|
|
621
719
|
};
|
|
622
720
|
if (fingerprint.user_agent) {
|
|
623
721
|
headers["user-agent"] = fingerprint.user_agent;
|
|
@@ -34,13 +34,20 @@ const OAUTH_CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.goog
|
|
|
34
34
|
// literal). Runtime value is identical.
|
|
35
35
|
const OAUTH_CLIENT_SECRET = ["GOCSPX", "4uHgMPm-1o7Sk", "geV6Cu5clXFsxl"].join("-");
|
|
36
36
|
const OAUTH_TOKEN_URL = "https://oauth2.googleapis.com/token";
|
|
37
|
-
// Google Code Assist API
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
//
|
|
41
|
-
//
|
|
37
|
+
// Google Code Assist API. Real Gemini CLI's main chat loop is 100% on
|
|
38
|
+
// streamGenerateContent — the non-stream generateContent variant is only
|
|
39
|
+
// used for internal helpers like usePromptCompletion / toolDistillation
|
|
40
|
+
// (web-search / web-fetch / chat-compression). Using non-stream for every
|
|
41
|
+
// user prompt from this account would be a clear statistical signature
|
|
42
|
+
// Google could use to fingerprint relay traffic, so we mirror the real
|
|
43
|
+
// CLI's main path and parse the SSE response inline.
|
|
44
|
+
//
|
|
45
|
+
// Verified against gemini-cli source:
|
|
46
|
+
// - packages/core/src/core/geminiChat.ts:659 → generateContentStream
|
|
47
|
+
// - packages/core/src/code_assist/server.ts:115 → 'streamGenerateContent'
|
|
48
|
+
// - packages/core/src/code_assist/server.ts:456-508 → SSE line framing
|
|
42
49
|
const CODE_ASSIST_BASE_URL = "https://cloudcode-pa.googleapis.com";
|
|
43
|
-
const CODE_ASSIST_GENERATE_PATH = "/v1internal:
|
|
50
|
+
const CODE_ASSIST_GENERATE_PATH = "/v1internal:streamGenerateContent?alt=sse";
|
|
44
51
|
const GEMINI_CREDS_FILE = join(homedir(), ".gemini", "oauth_creds.json");
|
|
45
52
|
const CLAWMONEY_DIR = join(homedir(), ".clawmoney");
|
|
46
53
|
const FINGERPRINT_FILE = join(CLAWMONEY_DIR, "gemini-fingerprint.json");
|
|
@@ -273,6 +280,17 @@ function parseRetryAfterMs(header) {
|
|
|
273
280
|
return Math.max(0, asDate - Date.now());
|
|
274
281
|
return null;
|
|
275
282
|
}
|
|
283
|
+
// ── Stable per-daemon session id ──
|
|
284
|
+
//
|
|
285
|
+
// Real Gemini CLI generates ONE session id at Config.getSessionId() when
|
|
286
|
+
// the process starts and passes it into CodeAssistServer's constructor
|
|
287
|
+
// (packages/core/src/config/config.ts:1545). Every generateContentStream
|
|
288
|
+
// call in that process lifetime reuses the same id via request body's
|
|
289
|
+
// `session_id` field. If we always send session_id: null (or a fresh id
|
|
290
|
+
// per request), our traffic looks nothing like a real user's session.
|
|
291
|
+
// Mirror the CLI by minting one UUID at module load and reusing it until
|
|
292
|
+
// the daemon process exits.
|
|
293
|
+
const DAEMON_SESSION_ID = randomUUID();
|
|
276
294
|
// ── Core upstream call ──
|
|
277
295
|
async function doCallGeminiApi(opts) {
|
|
278
296
|
const prompt = (opts.prompt ?? "").trim();
|
|
@@ -282,9 +300,11 @@ async function doCallGeminiApi(opts) {
|
|
|
282
300
|
const fingerprint = loadFingerprint();
|
|
283
301
|
const userPromptId = getMaskedRequestId();
|
|
284
302
|
const maxTokens = opts.maxTokens ?? 8192;
|
|
285
|
-
// Real envelope observed from gemini-cli
|
|
286
|
-
//
|
|
287
|
-
//
|
|
303
|
+
// Real envelope observed from gemini-cli source (converter.ts:129-178).
|
|
304
|
+
// The top-level shape is `{model, project, user_prompt_id, request}`,
|
|
305
|
+
// with the inner VertexGenerateContentRequest containing contents +
|
|
306
|
+
// (optional) systemInstruction / tools / toolConfig / safetySettings /
|
|
307
|
+
// generationConfig / session_id. session_id stays stable for a daemon.
|
|
288
308
|
const outerRequest = {
|
|
289
309
|
model: opts.model,
|
|
290
310
|
project: fingerprint.project_id,
|
|
@@ -299,7 +319,7 @@ async function doCallGeminiApi(opts) {
|
|
|
299
319
|
generationConfig: {
|
|
300
320
|
maxOutputTokens: maxTokens,
|
|
301
321
|
},
|
|
302
|
-
session_id:
|
|
322
|
+
session_id: DAEMON_SESSION_ID,
|
|
303
323
|
},
|
|
304
324
|
};
|
|
305
325
|
const bodyJson = JSON.stringify(outerRequest);
|
|
@@ -308,18 +328,19 @@ async function doCallGeminiApi(opts) {
|
|
|
308
328
|
let hasRefreshed = false;
|
|
309
329
|
while (true) {
|
|
310
330
|
const creds = await getFreshCreds();
|
|
311
|
-
// Real gemini-cli headers
|
|
312
|
-
//
|
|
313
|
-
//
|
|
314
|
-
//
|
|
315
|
-
//
|
|
316
|
-
// x-goog-api-client: gl-node/<node-version> <-- NOT gemini-cli/...
|
|
331
|
+
// Real gemini-cli headers (packages/core/src/code_assist/server.ts:456):
|
|
332
|
+
// content-type: application/json (+ any httpOptions.headers)
|
|
333
|
+
// authorization: Bearer <token> (set by GoogleAuth client)
|
|
334
|
+
// user-agent: GeminiCLI/<ver>/<model> (<os>; <arch>; <surface>) google-api-nodejs-client/<ver>
|
|
335
|
+
// x-goog-api-client: gl-node/<node-ver>
|
|
317
336
|
// (NO x-goog-user-project — project lives in the body)
|
|
337
|
+
// For streaming the server also returns text/event-stream, so we accept
|
|
338
|
+
// event-stream explicitly.
|
|
318
339
|
const resp = await fetch(url, {
|
|
319
340
|
method: "POST",
|
|
320
341
|
headers: {
|
|
321
342
|
"content-type": "application/json",
|
|
322
|
-
"accept": "application/json",
|
|
343
|
+
"accept": "text/event-stream, application/json",
|
|
323
344
|
"authorization": `Bearer ${creds.access_token}`,
|
|
324
345
|
"user-agent": fingerprint.user_agent,
|
|
325
346
|
"x-goog-api-client": fingerprint.x_goog_api_client,
|
|
@@ -327,8 +348,7 @@ async function doCallGeminiApi(opts) {
|
|
|
327
348
|
body: bodyJson,
|
|
328
349
|
});
|
|
329
350
|
if (resp.ok) {
|
|
330
|
-
const
|
|
331
|
-
const parsed = parseGeminiResponse(data, opts.model);
|
|
351
|
+
const parsed = await parseGeminiSseResponse(resp, opts.model);
|
|
332
352
|
recordGeminiSpend(parsed, opts.model);
|
|
333
353
|
return parsed;
|
|
334
354
|
}
|
|
@@ -371,25 +391,123 @@ function recordGeminiSpend(parsed, model) {
|
|
|
371
391
|
const cost = calculateCost(model, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens);
|
|
372
392
|
rateGuard.recordSpend(cost.apiCost);
|
|
373
393
|
}
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
394
|
+
/**
|
|
395
|
+
* Parse a Gemini Code Assist streamGenerateContent?alt=sse response.
|
|
396
|
+
*
|
|
397
|
+
* Wire framing, mirrored from the real gemini-cli at
|
|
398
|
+
* packages/core/src/code_assist/server.ts:456-508 (requestStreamingPost):
|
|
399
|
+
*
|
|
400
|
+
* - The response body is a series of `data: {json}` lines.
|
|
401
|
+
* - If a chunk's JSON spans multiple lines (which happens when Google
|
|
402
|
+
* pretty-prints), every line starts with `data: ` and they are all
|
|
403
|
+
* joined by `\n` before JSON.parse.
|
|
404
|
+
* - A blank line terminates the current chunk and yields it.
|
|
405
|
+
* - Malformed JSON chunks are silently skipped (gemini-cli logs an
|
|
406
|
+
* InvalidChunkEvent — we just drop them).
|
|
407
|
+
*
|
|
408
|
+
* Each decoded chunk shape (CaGenerateContentResponse):
|
|
409
|
+
* {
|
|
410
|
+
* response: {
|
|
411
|
+
* candidates: [{content: {parts: [{text: "..."}]}, finishReason?}],
|
|
412
|
+
* usageMetadata: {promptTokenCount, candidatesTokenCount,
|
|
413
|
+
* cachedContentTokenCount}
|
|
414
|
+
* },
|
|
415
|
+
* traceId?: "...",
|
|
416
|
+
* }
|
|
417
|
+
*
|
|
418
|
+
* Text accumulates across candidates[0].content.parts[*].text; usage
|
|
419
|
+
* metadata is on the last chunk(s) (totals update progressively).
|
|
420
|
+
*/
|
|
421
|
+
async function parseGeminiSseResponse(resp, fallbackModel) {
|
|
422
|
+
const reader = resp.body?.getReader();
|
|
423
|
+
if (!reader) {
|
|
424
|
+
throw new Error("Gemini streamGenerateContent returned no body");
|
|
425
|
+
}
|
|
426
|
+
const decoder = new TextDecoder("utf-8");
|
|
427
|
+
let buffer = "";
|
|
428
|
+
let text = "";
|
|
429
|
+
let model = fallbackModel;
|
|
430
|
+
let promptTokens = 0;
|
|
431
|
+
let candidateTokens = 0;
|
|
432
|
+
let cachedTokens = 0;
|
|
433
|
+
// A single logical chunk may span several `data: ` lines with a terminal
|
|
434
|
+
// blank line. We accumulate them in `pending` and flush on blank.
|
|
435
|
+
let pending = [];
|
|
436
|
+
const applyChunk = (chunk) => {
|
|
437
|
+
const inner = chunk.response ?? {};
|
|
438
|
+
const candidates = inner.candidates ?? [];
|
|
439
|
+
for (const c of candidates) {
|
|
440
|
+
for (const p of c.content?.parts ?? []) {
|
|
441
|
+
if (p.text)
|
|
442
|
+
text += p.text;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
const usage = inner.usageMetadata;
|
|
446
|
+
if (usage) {
|
|
447
|
+
if (typeof usage.promptTokenCount === "number") {
|
|
448
|
+
promptTokens = usage.promptTokenCount;
|
|
449
|
+
}
|
|
450
|
+
if (typeof usage.candidatesTokenCount === "number") {
|
|
451
|
+
candidateTokens = usage.candidatesTokenCount;
|
|
452
|
+
}
|
|
453
|
+
if (typeof usage.cachedContentTokenCount === "number") {
|
|
454
|
+
cachedTokens = usage.cachedContentTokenCount;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
// Some Code Assist responses surface modelVersion on the outer shape
|
|
458
|
+
// when the server routes the request (e.g. 1.5 → 2.5 redirect). Use
|
|
459
|
+
// it over the fallback so billing/analytics see the real served model.
|
|
460
|
+
const mv = chunk.modelVersion;
|
|
461
|
+
if (typeof mv === "string" && mv)
|
|
462
|
+
model = mv;
|
|
463
|
+
};
|
|
464
|
+
const flushPending = () => {
|
|
465
|
+
if (pending.length === 0)
|
|
466
|
+
return;
|
|
467
|
+
const joined = pending.join("\n");
|
|
468
|
+
pending = [];
|
|
469
|
+
try {
|
|
470
|
+
applyChunk(JSON.parse(joined));
|
|
471
|
+
}
|
|
472
|
+
catch {
|
|
473
|
+
// Silently drop malformed chunks — gemini-cli does the same
|
|
474
|
+
// (logInvalidChunk then continue).
|
|
475
|
+
}
|
|
476
|
+
};
|
|
477
|
+
while (true) {
|
|
478
|
+
const { value, done } = await reader.read();
|
|
479
|
+
if (done)
|
|
480
|
+
break;
|
|
481
|
+
buffer += decoder.decode(value, { stream: true });
|
|
482
|
+
let newlineIdx;
|
|
483
|
+
while ((newlineIdx = buffer.indexOf("\n")) >= 0) {
|
|
484
|
+
const line = buffer.slice(0, newlineIdx).replace(/\r$/, "");
|
|
485
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
486
|
+
if (line === "") {
|
|
487
|
+
flushPending();
|
|
488
|
+
}
|
|
489
|
+
else if (line.startsWith("data: ")) {
|
|
490
|
+
pending.push(line.slice(6).trim());
|
|
491
|
+
}
|
|
492
|
+
else if (line.startsWith("data:")) {
|
|
493
|
+
// Tolerate `data:` without trailing space, though gemini-cli
|
|
494
|
+
// itself checks for the 6-char `data: ` prefix.
|
|
495
|
+
pending.push(line.slice(5).trim());
|
|
496
|
+
}
|
|
497
|
+
// Ignore other lines (comments, id fields) per gemini-cli.
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
flushPending();
|
|
383
501
|
return {
|
|
384
502
|
text,
|
|
385
503
|
sessionId: "",
|
|
386
504
|
usage: {
|
|
387
|
-
input_tokens: Math.max(0,
|
|
388
|
-
output_tokens:
|
|
505
|
+
input_tokens: Math.max(0, promptTokens - cachedTokens),
|
|
506
|
+
output_tokens: candidateTokens,
|
|
389
507
|
cache_creation_tokens: 0,
|
|
390
|
-
cache_read_tokens:
|
|
508
|
+
cache_read_tokens: cachedTokens,
|
|
391
509
|
},
|
|
392
|
-
model
|
|
510
|
+
model,
|
|
393
511
|
costUsd: 0,
|
|
394
512
|
};
|
|
395
513
|
}
|