clawmoney 0.14.4 → 0.14.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -401,7 +401,7 @@ export async function preflightCodexApi(config) {
401
401
  // reasoning (optional but almost always present via default_reasoning_level),
402
402
  // store, stream, include, client_metadata (with installation_id + window_id +
403
403
  // turn_metadata)
404
- function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
404
+ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
405
405
  // `client_metadata` is a flat string-to-string map. Real CLI populates
406
406
  // it via build_ws_client_metadata() (client.rs:575-605). The keys look
407
407
  // like HTTP header names but they're JSON fields.
@@ -410,7 +410,7 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
410
410
  "x-codex-window-id": `${sessionId}:${windowGeneration}`,
411
411
  "x-codex-turn-metadata": turnMetadataHeader,
412
412
  };
413
- return {
413
+ const frame = {
414
414
  type: "response.create",
415
415
  model,
416
416
  instructions: RELAY_INSTRUCTIONS,
@@ -439,6 +439,15 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
439
439
  include: ["reasoning.encrypted_content"],
440
440
  client_metadata: clientMetadata,
441
441
  };
442
+ if (warmup) {
443
+ // Real CLI's prewarm flow sets `generate: false` on the first frame
444
+ // of each turn (codex-rs/core/src/client.rs:1283-1285). The server
445
+ // replies with a response.completed event but does NOT generate
446
+ // tokens, so the warmup is cheap. The real frame then follows on
447
+ // the SAME WebSocket session.
448
+ frame.generate = false;
449
+ }
450
+ return frame;
442
451
  }
443
452
  function handleFrame(raw, acc) {
444
453
  let evt;
@@ -681,12 +690,28 @@ async function doCallCodexApi(opts) {
681
690
  turn_id: randomUUID(),
682
691
  sandbox: platformSandboxTag,
683
692
  });
684
- // Build the WS request frame with the just-built turn metadata so
685
- // the frame's `client_metadata["x-codex-turn-metadata"]` matches the
686
- // `x-codex-turn-metadata` HTTP header on the same handshake — real
687
- // CLI sends them both and they carry the same value.
688
- const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
689
- const frameJson = JSON.stringify(frame);
693
+ // Build TWO frames for the same WS session real Codex CLI's turn
694
+ // flow is:
695
+ // 1. open WebSocket
696
+ // 2. send prewarm frame `{...request, generate: false}`
697
+ // 3. wait for response.completed (server returns completed with
698
+ // no generated tokens — warmup is cheap)
699
+ // 4. send the real frame on the SAME connection
700
+ // 5. wait for response.completed with the actual stream output
701
+ // 6. close WebSocket
702
+ // See codex-rs/core/src/client.rs:1377-1425 (prewarm_websocket) and
703
+ // lines 1283-1285 (`if warmup { ws_payload.generate = Some(false); }`).
704
+ //
705
+ // Relay accounts that skip step 2-3 stick out: the account's entire
706
+ // traffic history shows zero prewarm frames, while every real CLI
707
+ // user's account shows exactly one prewarm per turn. We mirror the
708
+ // full two-phase flow to eliminate this signal.
709
+ const warmupFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
710
+ /*warmup*/ true);
711
+ const realFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
712
+ /*warmup*/ false);
713
+ const warmupFrameJson = JSON.stringify(warmupFrame);
714
+ const realFrameJson = JSON.stringify(realFrame);
690
715
  // Build handshake headers to match Codex CLI 0.118's real upgrade
691
716
  // request. Key sources:
692
717
  // codex-rs/core/src/client.rs:771-798 → build_websocket_headers
@@ -764,8 +789,15 @@ async function doCallCodexApi(opts) {
764
789
  }
765
790
  throw err;
766
791
  }
767
- // Connection is open send the first (and only) client frame and
768
- // accumulate server frames until we see a terminal event.
792
+ // Connection is open. Run the two-phase prewarm real flow on the
793
+ // same WebSocket session. Phase state machine:
794
+ // - phase = "warmup": server frames are consumed only to detect
795
+ // response.completed. Text / usage deltas are ignored because
796
+ // generate=false suppresses them (and even if the server sends
797
+ // something, we want the real request's numbers, not the
798
+ // warmup's).
799
+ // - phase = "real": server frames populate the shared accumulator
800
+ // as before; response.completed finishes the promise.
769
801
  const { ws } = dialed;
770
802
  const acc = {
771
803
  text: "",
@@ -777,6 +809,7 @@ async function doCallCodexApi(opts) {
777
809
  };
778
810
  let resolved = false;
779
811
  const result = await new Promise((resolve) => {
812
+ let phase = "warmup";
780
813
  const finish = (r) => {
781
814
  if (resolved)
782
815
  return;
@@ -797,6 +830,29 @@ async function doCallCodexApi(opts) {
797
830
  error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
798
831
  });
799
832
  }, WS_OVERALL_TIMEOUT_MS);
833
+ // Scratch accumulator used for the warmup phase. Real CLI throws
834
+ // warmup output away (client.rs:1408-1417 just reads until
835
+ // Completed and discards everything else).
836
+ const warmupAcc = {
837
+ text: "",
838
+ inputTokens: 0,
839
+ outputTokens: 0,
840
+ cacheReadTokens: 0,
841
+ model: opts.model,
842
+ terminal: false,
843
+ };
844
+ const sendFrame = (frameJson) => {
845
+ try {
846
+ ws.send(frameJson, (sendErr) => {
847
+ if (sendErr) {
848
+ finish({ ok: false, retriable: true, error: sendErr });
849
+ }
850
+ });
851
+ }
852
+ catch (err) {
853
+ finish({ ok: false, retriable: true, error: err });
854
+ }
855
+ };
800
856
  ws.on("message", (data, _isBinary) => {
801
857
  const text = Buffer.isBuffer(data)
802
858
  ? data.toString("utf-8")
@@ -804,7 +860,8 @@ async function doCallCodexApi(opts) {
804
860
  ? Buffer.concat(data).toString("utf-8")
805
861
  : Buffer.from(data).toString("utf-8");
806
862
  // Frames are individual JSON objects (no newline framing).
807
- const outcome = handleFrame(text, acc);
863
+ const target = phase === "warmup" ? warmupAcc : acc;
864
+ const outcome = handleFrame(text, target);
808
865
  if (outcome.rateLimit && rateGuard) {
809
866
  // Soft hint — record but don't kill this request. Next request will
810
867
  // hit the cooldown check at the guard level.
@@ -817,16 +874,25 @@ async function doCallCodexApi(opts) {
817
874
  retriable: false,
818
875
  error: new Error(`Codex upstream error: ${outcome.error}`),
819
876
  });
877
+ return;
820
878
  }
821
- else {
822
- acc.terminal = true;
823
- finish({ ok: true });
879
+ if (phase === "warmup") {
880
+ // Warmup done — advance phase and send the real frame on
881
+ // the same WebSocket. Do NOT close the socket here; real
882
+ // CLI keeps the connection open so the real request can
883
+ // reuse it.
884
+ phase = "real";
885
+ sendFrame(realFrameJson);
886
+ return;
824
887
  }
888
+ // Real phase completed.
889
+ acc.terminal = true;
890
+ finish({ ok: true });
825
891
  }
826
892
  });
827
893
  ws.on("close", (code, reason) => {
828
894
  if (acc.terminal)
829
- return; // normal close after terminal event
895
+ return; // normal close after real-phase terminal event
830
896
  finish({
831
897
  ok: false,
832
898
  retriable: true,
@@ -836,17 +902,11 @@ async function doCallCodexApi(opts) {
836
902
  ws.on("error", (err) => {
837
903
  finish({ ok: false, retriable: true, error: err });
838
904
  });
839
- // Send the request frame.
840
- try {
841
- ws.send(frameJson, (sendErr) => {
842
- if (sendErr) {
843
- finish({ ok: false, retriable: true, error: sendErr });
844
- }
845
- });
846
- }
847
- catch (err) {
848
- finish({ ok: false, retriable: true, error: err });
849
- }
905
+ // Phase 1: send the warmup frame (generate=false). The server
906
+ // responds with response.completed without generating tokens;
907
+ // our message handler then transitions to phase "real" and sends
908
+ // the real frame on this same connection.
909
+ sendFrame(warmupFrameJson);
850
910
  });
851
911
  if (!result.ok) {
852
912
  if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
@@ -253,13 +253,70 @@ export function getGeminiRateGuardSnapshot() {
253
253
  return rateGuard?.currentLoad() ?? null;
254
254
  }
255
255
  // ── Preflight ──
256
+ //
257
+ // Real Gemini CLI's startup sequence (packages/core/src/code_assist/
258
+ // setup.ts:164) ALWAYS calls loadCodeAssist once at launch, before any
259
+ // user prompt hits generateContentStream. That call:
260
+ // - registers the client instance with Code Assist
261
+ // - warms any server-side caches tied to the project
262
+ // - establishes the "this account has a normal CLI session" pattern
263
+ // that the fraud pipeline uses to distinguish genuine CLI users
264
+ // from bare-API abusers
265
+ // Our daemon used to jump straight to streamGenerateContent, which on
266
+ // a cold account looks like "first request is a raw model call, no
267
+ // setup ceremony" — a distinctive bot fingerprint. Mirror the real CLI
268
+ // by calling loadCodeAssist exactly once per daemon boot. Silently
269
+ // swallow any error so a flaky setup call doesn't tank the daemon.
270
+ async function warmupLoadCodeAssist(projectId, accessToken, userAgent, xGoogApiClient) {
271
+ const url = `${CODE_ASSIST_BASE_URL}/v1internal:loadCodeAssist`;
272
+ const body = JSON.stringify({
273
+ cloudaicompanionProject: projectId,
274
+ metadata: {
275
+ // Matches real CLI constant set from setup.ts:154-158. Note
276
+ // `ideType: IDE_UNSPECIFIED` — that's the CLI default, Antigravity
277
+ // uses a different value and we must NOT leak the two signals.
278
+ ideType: "IDE_UNSPECIFIED",
279
+ platform: "PLATFORM_UNSPECIFIED",
280
+ pluginType: "GEMINI",
281
+ duetProject: projectId,
282
+ },
283
+ });
284
+ try {
285
+ const resp = await fetch(url, {
286
+ method: "POST",
287
+ headers: {
288
+ "content-type": "application/json",
289
+ "accept": "application/json",
290
+ "authorization": `Bearer ${accessToken}`,
291
+ "user-agent": userAgent,
292
+ "x-goog-api-client": xGoogApiClient,
293
+ },
294
+ body,
295
+ });
296
+ if (!resp.ok) {
297
+ logger.warn(`[gemini-api] warmup loadCodeAssist non-OK (${resp.status}) — continuing`);
298
+ // Drain body to release the connection.
299
+ await resp.text().catch(() => "");
300
+ return;
301
+ }
302
+ await resp.text().catch(() => "");
303
+ logger.info("[gemini-api] warmup loadCodeAssist OK");
304
+ }
305
+ catch (err) {
306
+ logger.warn(`[gemini-api] warmup loadCodeAssist error — continuing: ${err.message}`);
307
+ }
308
+ }
256
309
  export async function preflightGeminiApi(config) {
257
310
  configureDispatcher();
258
311
  configureGeminiRateGuard(config);
259
- loadFingerprint();
260
- await getFreshCreds();
312
+ const fingerprint = loadFingerprint();
313
+ const creds = await getFreshCreds();
261
314
  logger.info(`[gemini-api] preflight OK (project=${cachedFingerprint?.project_id ?? "?"}, ` +
262
315
  `ua=${cachedFingerprint?.user_agent ?? "?"})`);
316
+ // Warmup call — mirror real CLI startup before the first user prompt.
317
+ // Done after token refresh so the request goes out with a fresh access
318
+ // token (expired-token warmups would look like another bot signal).
319
+ await warmupLoadCodeAssist(fingerprint.project_id, creds.access_token, fingerprint.user_agent, fingerprint.x_goog_api_client);
263
320
  }
264
321
  export async function callGeminiApi(opts) {
265
322
  configureDispatcher();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.14.4",
3
+ "version": "0.14.6",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {