npm - clawmoney - Versions diffs - 0.14.4 → 0.14.6 - Mend

clawmoney 0.14.4 → 0.14.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/relay/upstream/codex-api.js +86 -26
package/dist/relay/upstream/gemini-api.js +59 -2
package/package.json +1 -1

package/dist/relay/upstream/codex-api.js CHANGED Viewed

@@ -401,7 +401,7 @@ export async function preflightCodexApi(config) {
 //   reasoning (optional but almost always present via default_reasoning_level),
 //   store, stream, include, client_metadata (with installation_id + window_id +
 //   turn_metadata)
-function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
+function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
     // `client_metadata` is a flat string-to-string map. Real CLI populates
     // it via build_ws_client_metadata() (client.rs:575-605). The keys look
     // like HTTP header names but they're JSON fields.
@@ -410,7 +410,7 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
         "x-codex-window-id": `${sessionId}:${windowGeneration}`,
         "x-codex-turn-metadata": turnMetadataHeader,
     };
-    return {
+    const frame = {
         type: "response.create",
         model,
         instructions: RELAY_INSTRUCTIONS,
@@ -439,6 +439,15 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
         include: ["reasoning.encrypted_content"],
         client_metadata: clientMetadata,
     };
+    if (warmup) {
+        // Real CLI's prewarm flow sets `generate: false` on the first frame
+        // of each turn (codex-rs/core/src/client.rs:1283-1285). The server
+        // replies with a response.completed event but does NOT generate
+        // tokens, so the warmup is cheap. The real frame then follows on
+        // the SAME WebSocket session.
+        frame.generate = false;
+    }
+    return frame;
 }
 function handleFrame(raw, acc) {
     let evt;
@@ -681,12 +690,28 @@ async function doCallCodexApi(opts) {
             turn_id: randomUUID(),
             sandbox: platformSandboxTag,
         });
-        // Build the WS request frame with the just-built turn metadata so
-        // the frame's `client_metadata["x-codex-turn-metadata"]` matches the
-        // `x-codex-turn-metadata` HTTP header on the same handshake — real
-        // CLI sends them both and they carry the same value.
-        const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
-        const frameJson = JSON.stringify(frame);
+        // Build TWO frames for the same WS session — real Codex CLI's turn
+        // flow is:
+        //   1. open WebSocket
+        //   2. send prewarm frame `{...request, generate: false}`
+        //   3. wait for response.completed (server returns completed with
+        //      no generated tokens — warmup is cheap)
+        //   4. send the real frame on the SAME connection
+        //   5. wait for response.completed with the actual stream output
+        //   6. close WebSocket
+        // See codex-rs/core/src/client.rs:1377-1425 (prewarm_websocket) and
+        // lines 1283-1285 (`if warmup { ws_payload.generate = Some(false); }`).
+        //
+        // Relay accounts that skip step 2-3 stick out: the account's entire
+        // traffic history shows zero prewarm frames, while every real CLI
+        // user's account shows exactly one prewarm per turn. We mirror the
+        // full two-phase flow to eliminate this signal.
+        const warmupFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
+        /*warmup*/ true);
+        const realFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
+        /*warmup*/ false);
+        const warmupFrameJson = JSON.stringify(warmupFrame);
+        const realFrameJson = JSON.stringify(realFrame);
         // Build handshake headers to match Codex CLI 0.118's real upgrade
         // request. Key sources:
         //   codex-rs/core/src/client.rs:771-798 → build_websocket_headers
@@ -764,8 +789,15 @@ async function doCallCodexApi(opts) {
             }
             throw err;
         }
-        // Connection is open — send the first (and only) client frame and
-        // accumulate server frames until we see a terminal event.
+        // Connection is open. Run the two-phase prewarm → real flow on the
+        // same WebSocket session. Phase state machine:
+        //   - phase = "warmup": server frames are consumed only to detect
+        //     response.completed. Text / usage deltas are ignored because
+        //     generate=false suppresses them (and even if the server sends
+        //     something, we want the real request's numbers, not the
+        //     warmup's).
+        //   - phase = "real": server frames populate the shared accumulator
+        //     as before; response.completed finishes the promise.
         const { ws } = dialed;
         const acc = {
             text: "",
@@ -777,6 +809,7 @@ async function doCallCodexApi(opts) {
         };
         let resolved = false;
         const result = await new Promise((resolve) => {
+            let phase = "warmup";
             const finish = (r) => {
                 if (resolved)
                     return;
@@ -797,6 +830,29 @@ async function doCallCodexApi(opts) {
                     error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
                 });
             }, WS_OVERALL_TIMEOUT_MS);
+            // Scratch accumulator used for the warmup phase. Real CLI throws
+            // warmup output away (client.rs:1408-1417 just reads until
+            // Completed and discards everything else).
+            const warmupAcc = {
+                text: "",
+                inputTokens: 0,
+                outputTokens: 0,
+                cacheReadTokens: 0,
+                model: opts.model,
+                terminal: false,
+            };
+            const sendFrame = (frameJson) => {
+                try {
+                    ws.send(frameJson, (sendErr) => {
+                        if (sendErr) {
+                            finish({ ok: false, retriable: true, error: sendErr });
+                        }
+                    });
+                }
+                catch (err) {
+                    finish({ ok: false, retriable: true, error: err });
+                }
+            };
             ws.on("message", (data, _isBinary) => {
                 const text = Buffer.isBuffer(data)
                     ? data.toString("utf-8")
@@ -804,7 +860,8 @@ async function doCallCodexApi(opts) {
                         ? Buffer.concat(data).toString("utf-8")
                         : Buffer.from(data).toString("utf-8");
                 // Frames are individual JSON objects (no newline framing).
-                const outcome = handleFrame(text, acc);
+                const target = phase === "warmup" ? warmupAcc : acc;
+                const outcome = handleFrame(text, target);
                 if (outcome.rateLimit && rateGuard) {
                     // Soft hint — record but don't kill this request. Next request will
                     // hit the cooldown check at the guard level.
@@ -817,16 +874,25 @@ async function doCallCodexApi(opts) {
                             retriable: false,
                             error: new Error(`Codex upstream error: ${outcome.error}`),
                         });
+                        return;
                     }
-                    else {
-                        acc.terminal = true;
-                        finish({ ok: true });
+                    if (phase === "warmup") {
+                        // Warmup done — advance phase and send the real frame on
+                        // the same WebSocket. Do NOT close the socket here; real
+                        // CLI keeps the connection open so the real request can
+                        // reuse it.
+                        phase = "real";
+                        sendFrame(realFrameJson);
+                        return;
                     }
+                    // Real phase completed.
+                    acc.terminal = true;
+                    finish({ ok: true });
                 }
             });
             ws.on("close", (code, reason) => {
                 if (acc.terminal)
-                    return; // normal close after terminal event
+                    return; // normal close after real-phase terminal event
                 finish({
                     ok: false,
                     retriable: true,
@@ -836,17 +902,11 @@ async function doCallCodexApi(opts) {
             ws.on("error", (err) => {
                 finish({ ok: false, retriable: true, error: err });
             });
-            // Send the request frame.
-            try {
-                ws.send(frameJson, (sendErr) => {
-                    if (sendErr) {
-                        finish({ ok: false, retriable: true, error: sendErr });
-                    }
-                });
-            }
-            catch (err) {
-                finish({ ok: false, retriable: true, error: err });
-            }
+            // Phase 1: send the warmup frame (generate=false). The server
+            // responds with response.completed without generating tokens;
+            // our message handler then transitions to phase "real" and sends
+            // the real frame on this same connection.
+            sendFrame(warmupFrameJson);
         });
         if (!result.ok) {
             if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {

package/dist/relay/upstream/gemini-api.js CHANGED Viewed

@@ -253,13 +253,70 @@ export function getGeminiRateGuardSnapshot() {
     return rateGuard?.currentLoad() ?? null;
 }
 // ── Preflight ──
+//
+// Real Gemini CLI's startup sequence (packages/core/src/code_assist/
+// setup.ts:164) ALWAYS calls loadCodeAssist once at launch, before any
+// user prompt hits generateContentStream. That call:
+//   - registers the client instance with Code Assist
+//   - warms any server-side caches tied to the project
+//   - establishes the "this account has a normal CLI session" pattern
+//     that the fraud pipeline uses to distinguish genuine CLI users
+//     from bare-API abusers
+// Our daemon used to jump straight to streamGenerateContent, which on
+// a cold account looks like "first request is a raw model call, no
+// setup ceremony" — a distinctive bot fingerprint. Mirror the real CLI
+// by calling loadCodeAssist exactly once per daemon boot. Silently
+// swallow any error so a flaky setup call doesn't tank the daemon.
+async function warmupLoadCodeAssist(projectId, accessToken, userAgent, xGoogApiClient) {
+    const url = `${CODE_ASSIST_BASE_URL}/v1internal:loadCodeAssist`;
+    const body = JSON.stringify({
+        cloudaicompanionProject: projectId,
+        metadata: {
+            // Matches real CLI constant set from setup.ts:154-158. Note
+            // `ideType: IDE_UNSPECIFIED` — that's the CLI default, Antigravity
+            // uses a different value and we must NOT leak the two signals.
+            ideType: "IDE_UNSPECIFIED",
+            platform: "PLATFORM_UNSPECIFIED",
+            pluginType: "GEMINI",
+            duetProject: projectId,
+        },
+    });
+    try {
+        const resp = await fetch(url, {
+            method: "POST",
+            headers: {
+                "content-type": "application/json",
+                "accept": "application/json",
+                "authorization": `Bearer ${accessToken}`,
+                "user-agent": userAgent,
+                "x-goog-api-client": xGoogApiClient,
+            },
+            body,
+        });
+        if (!resp.ok) {
+            logger.warn(`[gemini-api] warmup loadCodeAssist non-OK (${resp.status}) — continuing`);
+            // Drain body to release the connection.
+            await resp.text().catch(() => "");
+            return;
+        }
+        await resp.text().catch(() => "");
+        logger.info("[gemini-api] warmup loadCodeAssist OK");
+    }
+    catch (err) {
+        logger.warn(`[gemini-api] warmup loadCodeAssist error — continuing: ${err.message}`);
+    }
+}
 export async function preflightGeminiApi(config) {
     configureDispatcher();
     configureGeminiRateGuard(config);
-    loadFingerprint();
-    await getFreshCreds();
+    const fingerprint = loadFingerprint();
+    const creds = await getFreshCreds();
     logger.info(`[gemini-api] preflight OK (project=${cachedFingerprint?.project_id ?? "?"}, ` +
         `ua=${cachedFingerprint?.user_agent ?? "?"})`);
+    // Warmup call — mirror real CLI startup before the first user prompt.
+    // Done after token refresh so the request goes out with a fresh access
+    // token (expired-token warmups would look like another bot signal).
+    await warmupLoadCodeAssist(fingerprint.project_id, creds.access_token, fingerprint.user_agent, fingerprint.x_goog_api_client);
 }
 export async function callGeminiApi(opts) {
     configureDispatcher();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clawmoney",
-  "version": "0.14.4",
+  "version": "0.14.6",
   "description": "ClawMoney CLI -- Earn rewards with your AI agent",
   "type": "module",
   "bin": {