npm - clawmoney - Versions diffs - 0.14.5 → 0.14.7 - Mend

clawmoney 0.14.5 → 0.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/commands/relay.js +14 -1
package/dist/relay/provider.js +17 -1
package/dist/relay/upstream/codex-api.js +86 -26
package/package.json +1 -1

package/dist/commands/relay.js CHANGED Viewed

@@ -97,7 +97,20 @@ export async function relayRegisterCommand(options) {
         const discountPct = Math.round(RELAY_DISCOUNT * 100);
         console.log(chalk.dim(`  Buyers pay ${discountPct}% of the official API price — a ${100 - discountPct}% discount applied by the Hub.`));
         console.log("");
-        console.log(chalk.dim(`  Next: run "clawmoney relay start" to begin accepting requests.`));
+        console.log(chalk.bold("  Next steps"));
+        console.log(chalk.dim(`    1. Start the daemon:`));
+        console.log(chalk.dim(`         clawmoney relay start`));
+        if (process.platform === "darwin") {
+            console.log(chalk.dim(`    2. (macOS) Install the daemon as a launchd user agent so it`));
+            console.log(chalk.dim(`       survives logouts AND keeps macOS Keychain unlocked for`));
+            console.log(chalk.dim(`       Claude API mode (SSH shells can't read a locked Keychain):`));
+            console.log(chalk.dim(`         ./scripts/install-daemon-launchd.sh`));
+            console.log(chalk.dim(`       (from the clawmoney-cli repo; see scripts/README for details)`));
+        }
+        console.log("");
+        console.log(chalk.dim(`  Tip: the daemon now defaults to direct-API mode (execution_mode: api)`));
+        console.log(chalk.dim(`  for ~10x lower latency per request. To fall back to subprocess-per-`));
+        console.log(chalk.dim(`  request mode, set \`relay.execution_mode: cli\` in ~/.clawmoney/config.yaml.`));
     }
     catch (err) {
         regSpinner.fail(chalk.red("Registration failed"));

package/dist/relay/provider.js CHANGED Viewed

@@ -32,9 +32,25 @@ import { relayLogger as logger } from "./logger.js";
 const CONFIG_DIR = join(homedir(), ".clawmoney");
 const CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
 const PID_FILE = join(CONFIG_DIR, "relay.pid");
+// Default execution mode is `api` as of 0.14.7. The `cli` fallback is still
+// supported — set `relay.execution_mode: cli` in ~/.clawmoney/config.yaml
+// or export CLAWMONEY_RELAY_EXECUTION_MODE=cli at launch — but new
+// providers get the direct-API path by default because:
+//   - Every spawnCli() round-trip burns 2-5 seconds of cold start, which
+//     is far too much for a request/response relay where buyers expect
+//     sub-second handoff.
+//   - Each subprocess consumes its own RAM + file handles; API mode runs
+//     hundreds of concurrent calls out of one Node process.
+//   - The fingerprint gap that used to make CLI mode "safer" is now
+//     closed — 0.14.0–0.14.6 ported the real CLI's attribution hash,
+//     streaming transport, thinking config, dynamic beta header, session
+//     masking, Gemini startup warmup, and Codex per-turn prewarm. API
+//     mode now matches real-CLI wire shape on every upstream.
+// CLI mode will be removed entirely in 0.15.0 once we've observed a
+// week of API-mode-default in production.
 const DEFAULT_RELAY = {
     cli_type: "claude",
-    execution_mode: "cli",
+    execution_mode: "api",
     model: "claude-opus-4-6",
     mode: "chat",
     concurrency: 5,

package/dist/relay/upstream/codex-api.js CHANGED Viewed

@@ -401,7 +401,7 @@ export async function preflightCodexApi(config) {
 //   reasoning (optional but almost always present via default_reasoning_level),
 //   store, stream, include, client_metadata (with installation_id + window_id +
 //   turn_metadata)
-function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
+function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
     // `client_metadata` is a flat string-to-string map. Real CLI populates
     // it via build_ws_client_metadata() (client.rs:575-605). The keys look
     // like HTTP header names but they're JSON fields.
@@ -410,7 +410,7 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
         "x-codex-window-id": `${sessionId}:${windowGeneration}`,
         "x-codex-turn-metadata": turnMetadataHeader,
     };
-    return {
+    const frame = {
         type: "response.create",
         model,
         instructions: RELAY_INSTRUCTIONS,
@@ -439,6 +439,15 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
         include: ["reasoning.encrypted_content"],
         client_metadata: clientMetadata,
     };
+    if (warmup) {
+        // Real CLI's prewarm flow sets `generate: false` on the first frame
+        // of each turn (codex-rs/core/src/client.rs:1283-1285). The server
+        // replies with a response.completed event but does NOT generate
+        // tokens, so the warmup is cheap. The real frame then follows on
+        // the SAME WebSocket session.
+        frame.generate = false;
+    }
+    return frame;
 }
 function handleFrame(raw, acc) {
     let evt;
@@ -681,12 +690,28 @@ async function doCallCodexApi(opts) {
             turn_id: randomUUID(),
             sandbox: platformSandboxTag,
         });
-        // Build the WS request frame with the just-built turn metadata so
-        // the frame's `client_metadata["x-codex-turn-metadata"]` matches the
-        // `x-codex-turn-metadata` HTTP header on the same handshake — real
-        // CLI sends them both and they carry the same value.
-        const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
-        const frameJson = JSON.stringify(frame);
+        // Build TWO frames for the same WS session — real Codex CLI's turn
+        // flow is:
+        //   1. open WebSocket
+        //   2. send prewarm frame `{...request, generate: false}`
+        //   3. wait for response.completed (server returns completed with
+        //      no generated tokens — warmup is cheap)
+        //   4. send the real frame on the SAME connection
+        //   5. wait for response.completed with the actual stream output
+        //   6. close WebSocket
+        // See codex-rs/core/src/client.rs:1377-1425 (prewarm_websocket) and
+        // lines 1283-1285 (`if warmup { ws_payload.generate = Some(false); }`).
+        //
+        // Relay accounts that skip step 2-3 stick out: the account's entire
+        // traffic history shows zero prewarm frames, while every real CLI
+        // user's account shows exactly one prewarm per turn. We mirror the
+        // full two-phase flow to eliminate this signal.
+        const warmupFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
+        /*warmup*/ true);
+        const realFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
+        /*warmup*/ false);
+        const warmupFrameJson = JSON.stringify(warmupFrame);
+        const realFrameJson = JSON.stringify(realFrame);
         // Build handshake headers to match Codex CLI 0.118's real upgrade
         // request. Key sources:
         //   codex-rs/core/src/client.rs:771-798 → build_websocket_headers
@@ -764,8 +789,15 @@ async function doCallCodexApi(opts) {
             }
             throw err;
         }
-        // Connection is open — send the first (and only) client frame and
-        // accumulate server frames until we see a terminal event.
+        // Connection is open. Run the two-phase prewarm → real flow on the
+        // same WebSocket session. Phase state machine:
+        //   - phase = "warmup": server frames are consumed only to detect
+        //     response.completed. Text / usage deltas are ignored because
+        //     generate=false suppresses them (and even if the server sends
+        //     something, we want the real request's numbers, not the
+        //     warmup's).
+        //   - phase = "real": server frames populate the shared accumulator
+        //     as before; response.completed finishes the promise.
         const { ws } = dialed;
         const acc = {
             text: "",
@@ -777,6 +809,7 @@ async function doCallCodexApi(opts) {
         };
         let resolved = false;
         const result = await new Promise((resolve) => {
+            let phase = "warmup";
             const finish = (r) => {
                 if (resolved)
                     return;
@@ -797,6 +830,29 @@ async function doCallCodexApi(opts) {
                     error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
                 });
             }, WS_OVERALL_TIMEOUT_MS);
+            // Scratch accumulator used for the warmup phase. Real CLI throws
+            // warmup output away (client.rs:1408-1417 just reads until
+            // Completed and discards everything else).
+            const warmupAcc = {
+                text: "",
+                inputTokens: 0,
+                outputTokens: 0,
+                cacheReadTokens: 0,
+                model: opts.model,
+                terminal: false,
+            };
+            const sendFrame = (frameJson) => {
+                try {
+                    ws.send(frameJson, (sendErr) => {
+                        if (sendErr) {
+                            finish({ ok: false, retriable: true, error: sendErr });
+                        }
+                    });
+                }
+                catch (err) {
+                    finish({ ok: false, retriable: true, error: err });
+                }
+            };
             ws.on("message", (data, _isBinary) => {
                 const text = Buffer.isBuffer(data)
                     ? data.toString("utf-8")
@@ -804,7 +860,8 @@ async function doCallCodexApi(opts) {
                         ? Buffer.concat(data).toString("utf-8")
                         : Buffer.from(data).toString("utf-8");
                 // Frames are individual JSON objects (no newline framing).
-                const outcome = handleFrame(text, acc);
+                const target = phase === "warmup" ? warmupAcc : acc;
+                const outcome = handleFrame(text, target);
                 if (outcome.rateLimit && rateGuard) {
                     // Soft hint — record but don't kill this request. Next request will
                     // hit the cooldown check at the guard level.
@@ -817,16 +874,25 @@ async function doCallCodexApi(opts) {
                             retriable: false,
                             error: new Error(`Codex upstream error: ${outcome.error}`),
                         });
+                        return;
                     }
-                    else {
-                        acc.terminal = true;
-                        finish({ ok: true });
+                    if (phase === "warmup") {
+                        // Warmup done — advance phase and send the real frame on
+                        // the same WebSocket. Do NOT close the socket here; real
+                        // CLI keeps the connection open so the real request can
+                        // reuse it.
+                        phase = "real";
+                        sendFrame(realFrameJson);
+                        return;
                     }
+                    // Real phase completed.
+                    acc.terminal = true;
+                    finish({ ok: true });
                 }
             });
             ws.on("close", (code, reason) => {
                 if (acc.terminal)
-                    return; // normal close after terminal event
+                    return; // normal close after real-phase terminal event
                 finish({
                     ok: false,
                     retriable: true,
@@ -836,17 +902,11 @@ async function doCallCodexApi(opts) {
             ws.on("error", (err) => {
                 finish({ ok: false, retriable: true, error: err });
             });
-            // Send the request frame.
-            try {
-                ws.send(frameJson, (sendErr) => {
-                    if (sendErr) {
-                        finish({ ok: false, retriable: true, error: sendErr });
-                    }
-                });
-            }
-            catch (err) {
-                finish({ ok: false, retriable: true, error: err });
-            }
+            // Phase 1: send the warmup frame (generate=false). The server
+            // responds with response.completed without generating tokens;
+            // our message handler then transitions to phase "real" and sends
+            // the real frame on this same connection.
+            sendFrame(warmupFrameJson);
         });
         if (!result.ok) {
             if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clawmoney",
-  "version": "0.14.5",
+  "version": "0.14.7",
   "description": "ClawMoney CLI -- Earn rewards with your AI agent",
   "type": "module",
   "bin": {