clawmoney 0.14.4 → 0.14.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -401,7 +401,7 @@ export async function preflightCodexApi(config) {
|
|
|
401
401
|
// reasoning (optional but almost always present via default_reasoning_level),
|
|
402
402
|
// store, stream, include, client_metadata (with installation_id + window_id +
|
|
403
403
|
// turn_metadata)
|
|
404
|
-
function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
|
|
404
|
+
function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
|
|
405
405
|
// `client_metadata` is a flat string-to-string map. Real CLI populates
|
|
406
406
|
// it via build_ws_client_metadata() (client.rs:575-605). The keys look
|
|
407
407
|
// like HTTP header names but they're JSON fields.
|
|
@@ -410,7 +410,7 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
|
|
|
410
410
|
"x-codex-window-id": `${sessionId}:${windowGeneration}`,
|
|
411
411
|
"x-codex-turn-metadata": turnMetadataHeader,
|
|
412
412
|
};
|
|
413
|
-
|
|
413
|
+
const frame = {
|
|
414
414
|
type: "response.create",
|
|
415
415
|
model,
|
|
416
416
|
instructions: RELAY_INSTRUCTIONS,
|
|
@@ -439,6 +439,15 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
|
|
|
439
439
|
include: ["reasoning.encrypted_content"],
|
|
440
440
|
client_metadata: clientMetadata,
|
|
441
441
|
};
|
|
442
|
+
if (warmup) {
|
|
443
|
+
// Real CLI's prewarm flow sets `generate: false` on the first frame
|
|
444
|
+
// of each turn (codex-rs/core/src/client.rs:1283-1285). The server
|
|
445
|
+
// replies with a response.completed event but does NOT generate
|
|
446
|
+
// tokens, so the warmup is cheap. The real frame then follows on
|
|
447
|
+
// the SAME WebSocket session.
|
|
448
|
+
frame.generate = false;
|
|
449
|
+
}
|
|
450
|
+
return frame;
|
|
442
451
|
}
|
|
443
452
|
function handleFrame(raw, acc) {
|
|
444
453
|
let evt;
|
|
@@ -681,12 +690,28 @@ async function doCallCodexApi(opts) {
|
|
|
681
690
|
turn_id: randomUUID(),
|
|
682
691
|
sandbox: platformSandboxTag,
|
|
683
692
|
});
|
|
684
|
-
// Build the WS
|
|
685
|
-
//
|
|
686
|
-
//
|
|
687
|
-
//
|
|
688
|
-
|
|
689
|
-
|
|
693
|
+
// Build TWO frames for the same WS session — real Codex CLI's turn
|
|
694
|
+
// flow is:
|
|
695
|
+
// 1. open WebSocket
|
|
696
|
+
// 2. send prewarm frame `{...request, generate: false}`
|
|
697
|
+
// 3. wait for response.completed (server returns completed with
|
|
698
|
+
// no generated tokens — warmup is cheap)
|
|
699
|
+
// 4. send the real frame on the SAME connection
|
|
700
|
+
// 5. wait for response.completed with the actual stream output
|
|
701
|
+
// 6. close WebSocket
|
|
702
|
+
// See codex-rs/core/src/client.rs:1377-1425 (prewarm_websocket) and
|
|
703
|
+
// lines 1283-1285 (`if warmup { ws_payload.generate = Some(false); }`).
|
|
704
|
+
//
|
|
705
|
+
// Relay accounts that skip step 2-3 stick out: the account's entire
|
|
706
|
+
// traffic history shows zero prewarm frames, while every real CLI
|
|
707
|
+
// user's account shows exactly one prewarm per turn. We mirror the
|
|
708
|
+
// full two-phase flow to eliminate this signal.
|
|
709
|
+
const warmupFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
710
|
+
/*warmup*/ true);
|
|
711
|
+
const realFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
712
|
+
/*warmup*/ false);
|
|
713
|
+
const warmupFrameJson = JSON.stringify(warmupFrame);
|
|
714
|
+
const realFrameJson = JSON.stringify(realFrame);
|
|
690
715
|
// Build handshake headers to match Codex CLI 0.118's real upgrade
|
|
691
716
|
// request. Key sources:
|
|
692
717
|
// codex-rs/core/src/client.rs:771-798 → build_websocket_headers
|
|
@@ -764,8 +789,15 @@ async function doCallCodexApi(opts) {
|
|
|
764
789
|
}
|
|
765
790
|
throw err;
|
|
766
791
|
}
|
|
767
|
-
// Connection is open
|
|
768
|
-
//
|
|
792
|
+
// Connection is open. Run the two-phase prewarm → real flow on the
|
|
793
|
+
// same WebSocket session. Phase state machine:
|
|
794
|
+
// - phase = "warmup": server frames are consumed only to detect
|
|
795
|
+
// response.completed. Text / usage deltas are ignored because
|
|
796
|
+
// generate=false suppresses them (and even if the server sends
|
|
797
|
+
// something, we want the real request's numbers, not the
|
|
798
|
+
// warmup's).
|
|
799
|
+
// - phase = "real": server frames populate the shared accumulator
|
|
800
|
+
// as before; response.completed finishes the promise.
|
|
769
801
|
const { ws } = dialed;
|
|
770
802
|
const acc = {
|
|
771
803
|
text: "",
|
|
@@ -777,6 +809,7 @@ async function doCallCodexApi(opts) {
|
|
|
777
809
|
};
|
|
778
810
|
let resolved = false;
|
|
779
811
|
const result = await new Promise((resolve) => {
|
|
812
|
+
let phase = "warmup";
|
|
780
813
|
const finish = (r) => {
|
|
781
814
|
if (resolved)
|
|
782
815
|
return;
|
|
@@ -797,6 +830,29 @@ async function doCallCodexApi(opts) {
|
|
|
797
830
|
error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
|
|
798
831
|
});
|
|
799
832
|
}, WS_OVERALL_TIMEOUT_MS);
|
|
833
|
+
// Scratch accumulator used for the warmup phase. Real CLI throws
|
|
834
|
+
// warmup output away (client.rs:1408-1417 just reads until
|
|
835
|
+
// Completed and discards everything else).
|
|
836
|
+
const warmupAcc = {
|
|
837
|
+
text: "",
|
|
838
|
+
inputTokens: 0,
|
|
839
|
+
outputTokens: 0,
|
|
840
|
+
cacheReadTokens: 0,
|
|
841
|
+
model: opts.model,
|
|
842
|
+
terminal: false,
|
|
843
|
+
};
|
|
844
|
+
const sendFrame = (frameJson) => {
|
|
845
|
+
try {
|
|
846
|
+
ws.send(frameJson, (sendErr) => {
|
|
847
|
+
if (sendErr) {
|
|
848
|
+
finish({ ok: false, retriable: true, error: sendErr });
|
|
849
|
+
}
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
catch (err) {
|
|
853
|
+
finish({ ok: false, retriable: true, error: err });
|
|
854
|
+
}
|
|
855
|
+
};
|
|
800
856
|
ws.on("message", (data, _isBinary) => {
|
|
801
857
|
const text = Buffer.isBuffer(data)
|
|
802
858
|
? data.toString("utf-8")
|
|
@@ -804,7 +860,8 @@ async function doCallCodexApi(opts) {
|
|
|
804
860
|
? Buffer.concat(data).toString("utf-8")
|
|
805
861
|
: Buffer.from(data).toString("utf-8");
|
|
806
862
|
// Frames are individual JSON objects (no newline framing).
|
|
807
|
-
const
|
|
863
|
+
const target = phase === "warmup" ? warmupAcc : acc;
|
|
864
|
+
const outcome = handleFrame(text, target);
|
|
808
865
|
if (outcome.rateLimit && rateGuard) {
|
|
809
866
|
// Soft hint — record but don't kill this request. Next request will
|
|
810
867
|
// hit the cooldown check at the guard level.
|
|
@@ -817,16 +874,25 @@ async function doCallCodexApi(opts) {
|
|
|
817
874
|
retriable: false,
|
|
818
875
|
error: new Error(`Codex upstream error: ${outcome.error}`),
|
|
819
876
|
});
|
|
877
|
+
return;
|
|
820
878
|
}
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
879
|
+
if (phase === "warmup") {
|
|
880
|
+
// Warmup done — advance phase and send the real frame on
|
|
881
|
+
// the same WebSocket. Do NOT close the socket here; real
|
|
882
|
+
// CLI keeps the connection open so the real request can
|
|
883
|
+
// reuse it.
|
|
884
|
+
phase = "real";
|
|
885
|
+
sendFrame(realFrameJson);
|
|
886
|
+
return;
|
|
824
887
|
}
|
|
888
|
+
// Real phase completed.
|
|
889
|
+
acc.terminal = true;
|
|
890
|
+
finish({ ok: true });
|
|
825
891
|
}
|
|
826
892
|
});
|
|
827
893
|
ws.on("close", (code, reason) => {
|
|
828
894
|
if (acc.terminal)
|
|
829
|
-
return; // normal close after terminal event
|
|
895
|
+
return; // normal close after real-phase terminal event
|
|
830
896
|
finish({
|
|
831
897
|
ok: false,
|
|
832
898
|
retriable: true,
|
|
@@ -836,17 +902,11 @@ async function doCallCodexApi(opts) {
|
|
|
836
902
|
ws.on("error", (err) => {
|
|
837
903
|
finish({ ok: false, retriable: true, error: err });
|
|
838
904
|
});
|
|
839
|
-
//
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
}
|
|
845
|
-
});
|
|
846
|
-
}
|
|
847
|
-
catch (err) {
|
|
848
|
-
finish({ ok: false, retriable: true, error: err });
|
|
849
|
-
}
|
|
905
|
+
// Phase 1: send the warmup frame (generate=false). The server
|
|
906
|
+
// responds with response.completed without generating tokens;
|
|
907
|
+
// our message handler then transitions to phase "real" and sends
|
|
908
|
+
// the real frame on this same connection.
|
|
909
|
+
sendFrame(warmupFrameJson);
|
|
850
910
|
});
|
|
851
911
|
if (!result.ok) {
|
|
852
912
|
if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
@@ -253,13 +253,70 @@ export function getGeminiRateGuardSnapshot() {
|
|
|
253
253
|
return rateGuard?.currentLoad() ?? null;
|
|
254
254
|
}
|
|
255
255
|
// ── Preflight ──
|
|
256
|
+
//
|
|
257
|
+
// Real Gemini CLI's startup sequence (packages/core/src/code_assist/
|
|
258
|
+
// setup.ts:164) ALWAYS calls loadCodeAssist once at launch, before any
|
|
259
|
+
// user prompt hits generateContentStream. That call:
|
|
260
|
+
// - registers the client instance with Code Assist
|
|
261
|
+
// - warms any server-side caches tied to the project
|
|
262
|
+
// - establishes the "this account has a normal CLI session" pattern
|
|
263
|
+
// that the fraud pipeline uses to distinguish genuine CLI users
|
|
264
|
+
// from bare-API abusers
|
|
265
|
+
// Our daemon used to jump straight to streamGenerateContent, which on
|
|
266
|
+
// a cold account looks like "first request is a raw model call, no
|
|
267
|
+
// setup ceremony" — a distinctive bot fingerprint. Mirror the real CLI
|
|
268
|
+
// by calling loadCodeAssist exactly once per daemon boot. Silently
|
|
269
|
+
// swallow any error so a flaky setup call doesn't tank the daemon.
|
|
270
|
+
async function warmupLoadCodeAssist(projectId, accessToken, userAgent, xGoogApiClient) {
|
|
271
|
+
const url = `${CODE_ASSIST_BASE_URL}/v1internal:loadCodeAssist`;
|
|
272
|
+
const body = JSON.stringify({
|
|
273
|
+
cloudaicompanionProject: projectId,
|
|
274
|
+
metadata: {
|
|
275
|
+
// Matches real CLI constant set from setup.ts:154-158. Note
|
|
276
|
+
// `ideType: IDE_UNSPECIFIED` — that's the CLI default, Antigravity
|
|
277
|
+
// uses a different value and we must NOT leak the two signals.
|
|
278
|
+
ideType: "IDE_UNSPECIFIED",
|
|
279
|
+
platform: "PLATFORM_UNSPECIFIED",
|
|
280
|
+
pluginType: "GEMINI",
|
|
281
|
+
duetProject: projectId,
|
|
282
|
+
},
|
|
283
|
+
});
|
|
284
|
+
try {
|
|
285
|
+
const resp = await fetch(url, {
|
|
286
|
+
method: "POST",
|
|
287
|
+
headers: {
|
|
288
|
+
"content-type": "application/json",
|
|
289
|
+
"accept": "application/json",
|
|
290
|
+
"authorization": `Bearer ${accessToken}`,
|
|
291
|
+
"user-agent": userAgent,
|
|
292
|
+
"x-goog-api-client": xGoogApiClient,
|
|
293
|
+
},
|
|
294
|
+
body,
|
|
295
|
+
});
|
|
296
|
+
if (!resp.ok) {
|
|
297
|
+
logger.warn(`[gemini-api] warmup loadCodeAssist non-OK (${resp.status}) — continuing`);
|
|
298
|
+
// Drain body to release the connection.
|
|
299
|
+
await resp.text().catch(() => "");
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
await resp.text().catch(() => "");
|
|
303
|
+
logger.info("[gemini-api] warmup loadCodeAssist OK");
|
|
304
|
+
}
|
|
305
|
+
catch (err) {
|
|
306
|
+
logger.warn(`[gemini-api] warmup loadCodeAssist error — continuing: ${err.message}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
256
309
|
export async function preflightGeminiApi(config) {
|
|
257
310
|
configureDispatcher();
|
|
258
311
|
configureGeminiRateGuard(config);
|
|
259
|
-
loadFingerprint();
|
|
260
|
-
await getFreshCreds();
|
|
312
|
+
const fingerprint = loadFingerprint();
|
|
313
|
+
const creds = await getFreshCreds();
|
|
261
314
|
logger.info(`[gemini-api] preflight OK (project=${cachedFingerprint?.project_id ?? "?"}, ` +
|
|
262
315
|
`ua=${cachedFingerprint?.user_agent ?? "?"})`);
|
|
316
|
+
// Warmup call — mirror real CLI startup before the first user prompt.
|
|
317
|
+
// Done after token refresh so the request goes out with a fresh access
|
|
318
|
+
// token (expired-token warmups would look like another bot signal).
|
|
319
|
+
await warmupLoadCodeAssist(fingerprint.project_id, creds.access_token, fingerprint.user_agent, fingerprint.x_goog_api_client);
|
|
263
320
|
}
|
|
264
321
|
export async function callGeminiApi(opts) {
|
|
265
322
|
configureDispatcher();
|