clawmoney 0.14.5 → 0.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,7 +97,20 @@ export async function relayRegisterCommand(options) {
97
97
  const discountPct = Math.round(RELAY_DISCOUNT * 100);
98
98
  console.log(chalk.dim(` Buyers pay ${discountPct}% of the official API price — a ${100 - discountPct}% discount applied by the Hub.`));
99
99
  console.log("");
100
- console.log(chalk.dim(` Next: run "clawmoney relay start" to begin accepting requests.`));
100
+ console.log(chalk.bold(" Next steps"));
101
+ console.log(chalk.dim(` 1. Start the daemon:`));
102
+ console.log(chalk.dim(` clawmoney relay start`));
103
+ if (process.platform === "darwin") {
104
+ console.log(chalk.dim(` 2. (macOS) Install the daemon as a launchd user agent so it`));
105
+ console.log(chalk.dim(` survives logouts AND keeps macOS Keychain unlocked for`));
106
+ console.log(chalk.dim(` Claude API mode (SSH shells can't read a locked Keychain):`));
107
+ console.log(chalk.dim(` ./scripts/install-daemon-launchd.sh`));
108
+ console.log(chalk.dim(` (from the clawmoney-cli repo; see scripts/README for details)`));
109
+ }
110
+ console.log("");
111
+ console.log(chalk.dim(` Tip: the daemon now defaults to direct-API mode (execution_mode: api)`));
112
+ console.log(chalk.dim(` for ~10x lower latency per request. To fall back to subprocess-per-`));
113
+ console.log(chalk.dim(` request mode, set \`relay.execution_mode: cli\` in ~/.clawmoney/config.yaml.`));
101
114
  }
102
115
  catch (err) {
103
116
  regSpinner.fail(chalk.red("Registration failed"));
@@ -32,9 +32,25 @@ import { relayLogger as logger } from "./logger.js";
32
32
  const CONFIG_DIR = join(homedir(), ".clawmoney");
33
33
  const CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
34
34
  const PID_FILE = join(CONFIG_DIR, "relay.pid");
35
+ // Default execution mode is `api` as of 0.14.7. The `cli` fallback is still
36
+ // supported — set `relay.execution_mode: cli` in ~/.clawmoney/config.yaml
37
+ // or export CLAWMONEY_RELAY_EXECUTION_MODE=cli at launch — but new
38
+ // providers get the direct-API path by default because:
39
+ // - Every spawnCli() round-trip burns 2-5 seconds of cold start, which
40
+ // is far too much for a request/response relay where buyers expect
41
+ // sub-second handoff.
42
+ // - Each subprocess consumes its own RAM + file handles; API mode runs
43
+ // hundreds of concurrent calls out of one Node process.
44
+ // - The fingerprint gap that used to make CLI mode "safer" is now
45
+ // closed — 0.14.0–0.14.6 ported the real CLI's attribution hash,
46
+ // streaming transport, thinking config, dynamic beta header, session
47
+ // masking, Gemini startup warmup, and Codex per-turn prewarm. API
48
+ // mode now matches real-CLI wire shape on every upstream.
49
+ // CLI mode will be removed entirely in 0.15.0 once we've observed a
50
+ // week of API-mode-default in production.
35
51
  const DEFAULT_RELAY = {
36
52
  cli_type: "claude",
37
- execution_mode: "cli",
53
+ execution_mode: "api",
38
54
  model: "claude-opus-4-6",
39
55
  mode: "chat",
40
56
  concurrency: 5,
@@ -401,7 +401,7 @@ export async function preflightCodexApi(config) {
401
401
  // reasoning (optional but almost always present via default_reasoning_level),
402
402
  // store, stream, include, client_metadata (with installation_id + window_id +
403
403
  // turn_metadata)
404
- function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
404
+ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
405
405
  // `client_metadata` is a flat string-to-string map. Real CLI populates
406
406
  // it via build_ws_client_metadata() (client.rs:575-605). The keys look
407
407
  // like HTTP header names but they're JSON fields.
@@ -410,7 +410,7 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
410
410
  "x-codex-window-id": `${sessionId}:${windowGeneration}`,
411
411
  "x-codex-turn-metadata": turnMetadataHeader,
412
412
  };
413
- return {
413
+ const frame = {
414
414
  type: "response.create",
415
415
  model,
416
416
  instructions: RELAY_INSTRUCTIONS,
@@ -439,6 +439,15 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
439
439
  include: ["reasoning.encrypted_content"],
440
440
  client_metadata: clientMetadata,
441
441
  };
442
+ if (warmup) {
443
+ // Real CLI's prewarm flow sets `generate: false` on the first frame
444
+ // of each turn (codex-rs/core/src/client.rs:1283-1285). The server
445
+ // replies with a response.completed event but does NOT generate
446
+ // tokens, so the warmup is cheap. The real frame then follows on
447
+ // the SAME WebSocket session.
448
+ frame.generate = false;
449
+ }
450
+ return frame;
442
451
  }
443
452
  function handleFrame(raw, acc) {
444
453
  let evt;
@@ -681,12 +690,28 @@ async function doCallCodexApi(opts) {
681
690
  turn_id: randomUUID(),
682
691
  sandbox: platformSandboxTag,
683
692
  });
684
- // Build the WS request frame with the just-built turn metadata so
685
- // the frame's `client_metadata["x-codex-turn-metadata"]` matches the
686
- // `x-codex-turn-metadata` HTTP header on the same handshake — real
687
- // CLI sends them both and they carry the same value.
688
- const frame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration);
689
- const frameJson = JSON.stringify(frame);
693
+ // Build TWO frames for the same WS session real Codex CLI's turn
694
+ // flow is:
695
+ // 1. open WebSocket
696
+ // 2. send prewarm frame `{...request, generate: false}`
697
+ // 3. wait for response.completed (server returns completed with
698
+ // no generated tokens — warmup is cheap)
699
+ // 4. send the real frame on the SAME connection
700
+ // 5. wait for response.completed with the actual stream output
701
+ // 6. close WebSocket
702
+ // See codex-rs/core/src/client.rs:1377-1425 (prewarm_websocket) and
703
+ // lines 1283-1285 (`if warmup { ws_payload.generate = Some(false); }`).
704
+ //
705
+ // Relay accounts that skip step 2-3 stick out: the account's entire
706
+ // traffic history shows zero prewarm frames, while every real CLI
707
+ // user's account shows exactly one prewarm per turn. We mirror the
708
+ // full two-phase flow to eliminate this signal.
709
+ const warmupFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
710
+ /*warmup*/ true);
711
+ const realFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
712
+ /*warmup*/ false);
713
+ const warmupFrameJson = JSON.stringify(warmupFrame);
714
+ const realFrameJson = JSON.stringify(realFrame);
690
715
  // Build handshake headers to match Codex CLI 0.118's real upgrade
691
716
  // request. Key sources:
692
717
  // codex-rs/core/src/client.rs:771-798 → build_websocket_headers
@@ -764,8 +789,15 @@ async function doCallCodexApi(opts) {
764
789
  }
765
790
  throw err;
766
791
  }
767
- // Connection is open send the first (and only) client frame and
768
- // accumulate server frames until we see a terminal event.
792
+ // Connection is open. Run the two-phase prewarm real flow on the
793
+ // same WebSocket session. Phase state machine:
794
+ // - phase = "warmup": server frames are consumed only to detect
795
+ // response.completed. Text / usage deltas are ignored because
796
+ // generate=false suppresses them (and even if the server sends
797
+ // something, we want the real request's numbers, not the
798
+ // warmup's).
799
+ // - phase = "real": server frames populate the shared accumulator
800
+ // as before; response.completed finishes the promise.
769
801
  const { ws } = dialed;
770
802
  const acc = {
771
803
  text: "",
@@ -777,6 +809,7 @@ async function doCallCodexApi(opts) {
777
809
  };
778
810
  let resolved = false;
779
811
  const result = await new Promise((resolve) => {
812
+ let phase = "warmup";
780
813
  const finish = (r) => {
781
814
  if (resolved)
782
815
  return;
@@ -797,6 +830,29 @@ async function doCallCodexApi(opts) {
797
830
  error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
798
831
  });
799
832
  }, WS_OVERALL_TIMEOUT_MS);
833
+ // Scratch accumulator used for the warmup phase. Real CLI throws
834
+ // warmup output away (client.rs:1408-1417 just reads until
835
+ // Completed and discards everything else).
836
+ const warmupAcc = {
837
+ text: "",
838
+ inputTokens: 0,
839
+ outputTokens: 0,
840
+ cacheReadTokens: 0,
841
+ model: opts.model,
842
+ terminal: false,
843
+ };
844
+ const sendFrame = (frameJson) => {
845
+ try {
846
+ ws.send(frameJson, (sendErr) => {
847
+ if (sendErr) {
848
+ finish({ ok: false, retriable: true, error: sendErr });
849
+ }
850
+ });
851
+ }
852
+ catch (err) {
853
+ finish({ ok: false, retriable: true, error: err });
854
+ }
855
+ };
800
856
  ws.on("message", (data, _isBinary) => {
801
857
  const text = Buffer.isBuffer(data)
802
858
  ? data.toString("utf-8")
@@ -804,7 +860,8 @@ async function doCallCodexApi(opts) {
804
860
  ? Buffer.concat(data).toString("utf-8")
805
861
  : Buffer.from(data).toString("utf-8");
806
862
  // Frames are individual JSON objects (no newline framing).
807
- const outcome = handleFrame(text, acc);
863
+ const target = phase === "warmup" ? warmupAcc : acc;
864
+ const outcome = handleFrame(text, target);
808
865
  if (outcome.rateLimit && rateGuard) {
809
866
  // Soft hint — record but don't kill this request. Next request will
810
867
  // hit the cooldown check at the guard level.
@@ -817,16 +874,25 @@ async function doCallCodexApi(opts) {
817
874
  retriable: false,
818
875
  error: new Error(`Codex upstream error: ${outcome.error}`),
819
876
  });
877
+ return;
820
878
  }
821
- else {
822
- acc.terminal = true;
823
- finish({ ok: true });
879
+ if (phase === "warmup") {
880
+ // Warmup done — advance phase and send the real frame on
881
+ // the same WebSocket. Do NOT close the socket here; real
882
+ // CLI keeps the connection open so the real request can
883
+ // reuse it.
884
+ phase = "real";
885
+ sendFrame(realFrameJson);
886
+ return;
824
887
  }
888
+ // Real phase completed.
889
+ acc.terminal = true;
890
+ finish({ ok: true });
825
891
  }
826
892
  });
827
893
  ws.on("close", (code, reason) => {
828
894
  if (acc.terminal)
829
- return; // normal close after terminal event
895
+ return; // normal close after real-phase terminal event
830
896
  finish({
831
897
  ok: false,
832
898
  retriable: true,
@@ -836,17 +902,11 @@ async function doCallCodexApi(opts) {
836
902
  ws.on("error", (err) => {
837
903
  finish({ ok: false, retriable: true, error: err });
838
904
  });
839
- // Send the request frame.
840
- try {
841
- ws.send(frameJson, (sendErr) => {
842
- if (sendErr) {
843
- finish({ ok: false, retriable: true, error: sendErr });
844
- }
845
- });
846
- }
847
- catch (err) {
848
- finish({ ok: false, retriable: true, error: err });
849
- }
905
+ // Phase 1: send the warmup frame (generate=false). The server
906
+ // responds with response.completed without generating tokens;
907
+ // our message handler then transitions to phase "real" and sends
908
+ // the real frame on this same connection.
909
+ sendFrame(warmupFrameJson);
850
910
  });
851
911
  if (!result.ok) {
852
912
  if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.14.5",
3
+ "version": "0.14.7",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {