clawmoney 0.14.5 → 0.14.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/relay.js +14 -1
- package/dist/relay/provider.js +17 -1
- package/dist/relay/upstream/codex-api.js +86 -26
- package/package.json +1 -1
package/dist/commands/relay.js
CHANGED
|
@@ -97,7 +97,20 @@ export async function relayRegisterCommand(options) {
|
|
|
97
97
|
const discountPct = Math.round(RELAY_DISCOUNT * 100);
|
|
98
98
|
console.log(chalk.dim(` Buyers pay ${discountPct}% of the official API price — a ${100 - discountPct}% discount applied by the Hub.`));
|
|
99
99
|
console.log("");
|
|
100
|
-
console.log(chalk.
|
|
100
|
+
console.log(chalk.bold(" Next steps"));
|
|
101
|
+
console.log(chalk.dim(` 1. Start the daemon:`));
|
|
102
|
+
console.log(chalk.dim(` clawmoney relay start`));
|
|
103
|
+
if (process.platform === "darwin") {
|
|
104
|
+
console.log(chalk.dim(` 2. (macOS) Install the daemon as a launchd user agent so it`));
|
|
105
|
+
console.log(chalk.dim(` survives logouts AND keeps macOS Keychain unlocked for`));
|
|
106
|
+
console.log(chalk.dim(` Claude API mode (SSH shells can't read a locked Keychain):`));
|
|
107
|
+
console.log(chalk.dim(` ./scripts/install-daemon-launchd.sh`));
|
|
108
|
+
console.log(chalk.dim(` (from the clawmoney-cli repo; see scripts/README for details)`));
|
|
109
|
+
}
|
|
110
|
+
console.log("");
|
|
111
|
+
console.log(chalk.dim(` Tip: the daemon now defaults to direct-API mode (execution_mode: api)`));
|
|
112
|
+
console.log(chalk.dim(` for ~10x lower latency per request. To fall back to subprocess-per-`));
|
|
113
|
+
console.log(chalk.dim(` request mode, set \`relay.execution_mode: cli\` in ~/.clawmoney/config.yaml.`));
|
|
101
114
|
}
|
|
102
115
|
catch (err) {
|
|
103
116
|
regSpinner.fail(chalk.red("Registration failed"));
|
package/dist/relay/provider.js
CHANGED
|
@@ -32,9 +32,25 @@ import { relayLogger as logger } from "./logger.js";
|
|
|
32
32
|
const CONFIG_DIR = join(homedir(), ".clawmoney");
|
|
33
33
|
const CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
|
|
34
34
|
const PID_FILE = join(CONFIG_DIR, "relay.pid");
|
|
35
|
+
// Default execution mode is `api` as of 0.14.7. The `cli` fallback is still
|
|
36
|
+
// supported — set `relay.execution_mode: cli` in ~/.clawmoney/config.yaml
|
|
37
|
+
// or export CLAWMONEY_RELAY_EXECUTION_MODE=cli at launch — but new
|
|
38
|
+
// providers get the direct-API path by default because:
|
|
39
|
+
// - Every spawnCli() round-trip burns 2-5 seconds of cold start, which
|
|
40
|
+
// is far too much for a request/response relay where buyers expect
|
|
41
|
+
// sub-second handoff.
|
|
42
|
+
// - Each subprocess consumes its own RAM + file handles; API mode runs
|
|
43
|
+
// hundreds of concurrent calls out of one Node process.
|
|
44
|
+
// - The fingerprint gap that used to make CLI mode "safer" is now
|
|
45
|
+
// closed — 0.14.0–0.14.6 ported the real CLI's attribution hash,
|
|
46
|
+
// streaming transport, thinking config, dynamic beta header, session
|
|
47
|
+
// masking, Gemini startup warmup, and Codex per-turn prewarm. API
|
|
48
|
+
// mode now matches real-CLI wire shape on every upstream.
|
|
49
|
+
// CLI mode will be removed entirely in 0.15.0 once we've observed a
|
|
50
|
+
// week of API-mode-default in production.
|
|
35
51
|
const DEFAULT_RELAY = {
|
|
36
52
|
cli_type: "claude",
|
|
37
|
-
execution_mode: "
|
|
53
|
+
execution_mode: "api",
|
|
38
54
|
model: "claude-opus-4-6",
|
|
39
55
|
mode: "chat",
|
|
40
56
|
concurrency: 5,
|
|
@@ -401,7 +401,7 @@ export async function preflightCodexApi(config) {
|
|
|
401
401
|
// reasoning (optional but almost always present via default_reasoning_level),
|
|
402
402
|
// store, stream, include, client_metadata (with installation_id + window_id +
|
|
403
403
|
// turn_metadata)
|
|
404
|
-
function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration) {
|
|
404
|
+
function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
|
|
405
405
|
// `client_metadata` is a flat string-to-string map. Real CLI populates
|
|
406
406
|
// it via build_ws_client_metadata() (client.rs:575-605). The keys look
|
|
407
407
|
// like HTTP header names but they're JSON fields.
|
|
@@ -410,7 +410,7 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
|
|
|
410
410
|
"x-codex-window-id": `${sessionId}:${windowGeneration}`,
|
|
411
411
|
"x-codex-turn-metadata": turnMetadataHeader,
|
|
412
412
|
};
|
|
413
|
-
|
|
413
|
+
const frame = {
|
|
414
414
|
type: "response.create",
|
|
415
415
|
model,
|
|
416
416
|
instructions: RELAY_INSTRUCTIONS,
|
|
@@ -439,6 +439,15 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
|
|
|
439
439
|
include: ["reasoning.encrypted_content"],
|
|
440
440
|
client_metadata: clientMetadata,
|
|
441
441
|
};
|
|
442
|
+
if (warmup) {
|
|
443
|
+
// Real CLI's prewarm flow sets `generate: false` on the first frame
|
|
444
|
+
// of each turn (codex-rs/core/src/client.rs:1283-1285). The server
|
|
445
|
+
// replies with a response.completed event but does NOT generate
|
|
446
|
+
// tokens, so the warmup is cheap. The real frame then follows on
|
|
447
|
+
// the SAME WebSocket session.
|
|
448
|
+
frame.generate = false;
|
|
449
|
+
}
|
|
450
|
+
return frame;
|
|
442
451
|
}
|
|
443
452
|
function handleFrame(raw, acc) {
|
|
444
453
|
let evt;
|
|
@@ -681,12 +690,28 @@ async function doCallCodexApi(opts) {
|
|
|
681
690
|
turn_id: randomUUID(),
|
|
682
691
|
sandbox: platformSandboxTag,
|
|
683
692
|
});
|
|
684
|
-
// Build the WS
|
|
685
|
-
//
|
|
686
|
-
//
|
|
687
|
-
//
|
|
688
|
-
|
|
689
|
-
|
|
693
|
+
// Build TWO frames for the same WS session — real Codex CLI's turn
|
|
694
|
+
// flow is:
|
|
695
|
+
// 1. open WebSocket
|
|
696
|
+
// 2. send prewarm frame `{...request, generate: false}`
|
|
697
|
+
// 3. wait for response.completed (server returns completed with
|
|
698
|
+
// no generated tokens — warmup is cheap)
|
|
699
|
+
// 4. send the real frame on the SAME connection
|
|
700
|
+
// 5. wait for response.completed with the actual stream output
|
|
701
|
+
// 6. close WebSocket
|
|
702
|
+
// See codex-rs/core/src/client.rs:1377-1425 (prewarm_websocket) and
|
|
703
|
+
// lines 1283-1285 (`if warmup { ws_payload.generate = Some(false); }`).
|
|
704
|
+
//
|
|
705
|
+
// Relay accounts that skip step 2-3 stick out: the account's entire
|
|
706
|
+
// traffic history shows zero prewarm frames, while every real CLI
|
|
707
|
+
// user's account shows exactly one prewarm per turn. We mirror the
|
|
708
|
+
// full two-phase flow to eliminate this signal.
|
|
709
|
+
const warmupFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
710
|
+
/*warmup*/ true);
|
|
711
|
+
const realFrame = buildCodexRequestFrame(prompt, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
712
|
+
/*warmup*/ false);
|
|
713
|
+
const warmupFrameJson = JSON.stringify(warmupFrame);
|
|
714
|
+
const realFrameJson = JSON.stringify(realFrame);
|
|
690
715
|
// Build handshake headers to match Codex CLI 0.118's real upgrade
|
|
691
716
|
// request. Key sources:
|
|
692
717
|
// codex-rs/core/src/client.rs:771-798 → build_websocket_headers
|
|
@@ -764,8 +789,15 @@ async function doCallCodexApi(opts) {
|
|
|
764
789
|
}
|
|
765
790
|
throw err;
|
|
766
791
|
}
|
|
767
|
-
// Connection is open
|
|
768
|
-
//
|
|
792
|
+
// Connection is open. Run the two-phase prewarm → real flow on the
|
|
793
|
+
// same WebSocket session. Phase state machine:
|
|
794
|
+
// - phase = "warmup": server frames are consumed only to detect
|
|
795
|
+
// response.completed. Text / usage deltas are ignored because
|
|
796
|
+
// generate=false suppresses them (and even if the server sends
|
|
797
|
+
// something, we want the real request's numbers, not the
|
|
798
|
+
// warmup's).
|
|
799
|
+
// - phase = "real": server frames populate the shared accumulator
|
|
800
|
+
// as before; response.completed finishes the promise.
|
|
769
801
|
const { ws } = dialed;
|
|
770
802
|
const acc = {
|
|
771
803
|
text: "",
|
|
@@ -777,6 +809,7 @@ async function doCallCodexApi(opts) {
|
|
|
777
809
|
};
|
|
778
810
|
let resolved = false;
|
|
779
811
|
const result = await new Promise((resolve) => {
|
|
812
|
+
let phase = "warmup";
|
|
780
813
|
const finish = (r) => {
|
|
781
814
|
if (resolved)
|
|
782
815
|
return;
|
|
@@ -797,6 +830,29 @@ async function doCallCodexApi(opts) {
|
|
|
797
830
|
error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
|
|
798
831
|
});
|
|
799
832
|
}, WS_OVERALL_TIMEOUT_MS);
|
|
833
|
+
// Scratch accumulator used for the warmup phase. Real CLI throws
|
|
834
|
+
// warmup output away (client.rs:1408-1417 just reads until
|
|
835
|
+
// Completed and discards everything else).
|
|
836
|
+
const warmupAcc = {
|
|
837
|
+
text: "",
|
|
838
|
+
inputTokens: 0,
|
|
839
|
+
outputTokens: 0,
|
|
840
|
+
cacheReadTokens: 0,
|
|
841
|
+
model: opts.model,
|
|
842
|
+
terminal: false,
|
|
843
|
+
};
|
|
844
|
+
const sendFrame = (frameJson) => {
|
|
845
|
+
try {
|
|
846
|
+
ws.send(frameJson, (sendErr) => {
|
|
847
|
+
if (sendErr) {
|
|
848
|
+
finish({ ok: false, retriable: true, error: sendErr });
|
|
849
|
+
}
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
catch (err) {
|
|
853
|
+
finish({ ok: false, retriable: true, error: err });
|
|
854
|
+
}
|
|
855
|
+
};
|
|
800
856
|
ws.on("message", (data, _isBinary) => {
|
|
801
857
|
const text = Buffer.isBuffer(data)
|
|
802
858
|
? data.toString("utf-8")
|
|
@@ -804,7 +860,8 @@ async function doCallCodexApi(opts) {
|
|
|
804
860
|
? Buffer.concat(data).toString("utf-8")
|
|
805
861
|
: Buffer.from(data).toString("utf-8");
|
|
806
862
|
// Frames are individual JSON objects (no newline framing).
|
|
807
|
-
const
|
|
863
|
+
const target = phase === "warmup" ? warmupAcc : acc;
|
|
864
|
+
const outcome = handleFrame(text, target);
|
|
808
865
|
if (outcome.rateLimit && rateGuard) {
|
|
809
866
|
// Soft hint — record but don't kill this request. Next request will
|
|
810
867
|
// hit the cooldown check at the guard level.
|
|
@@ -817,16 +874,25 @@ async function doCallCodexApi(opts) {
|
|
|
817
874
|
retriable: false,
|
|
818
875
|
error: new Error(`Codex upstream error: ${outcome.error}`),
|
|
819
876
|
});
|
|
877
|
+
return;
|
|
820
878
|
}
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
879
|
+
if (phase === "warmup") {
|
|
880
|
+
// Warmup done — advance phase and send the real frame on
|
|
881
|
+
// the same WebSocket. Do NOT close the socket here; real
|
|
882
|
+
// CLI keeps the connection open so the real request can
|
|
883
|
+
// reuse it.
|
|
884
|
+
phase = "real";
|
|
885
|
+
sendFrame(realFrameJson);
|
|
886
|
+
return;
|
|
824
887
|
}
|
|
888
|
+
// Real phase completed.
|
|
889
|
+
acc.terminal = true;
|
|
890
|
+
finish({ ok: true });
|
|
825
891
|
}
|
|
826
892
|
});
|
|
827
893
|
ws.on("close", (code, reason) => {
|
|
828
894
|
if (acc.terminal)
|
|
829
|
-
return; // normal close after terminal event
|
|
895
|
+
return; // normal close after real-phase terminal event
|
|
830
896
|
finish({
|
|
831
897
|
ok: false,
|
|
832
898
|
retriable: true,
|
|
@@ -836,17 +902,11 @@ async function doCallCodexApi(opts) {
|
|
|
836
902
|
ws.on("error", (err) => {
|
|
837
903
|
finish({ ok: false, retriable: true, error: err });
|
|
838
904
|
});
|
|
839
|
-
//
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
}
|
|
845
|
-
});
|
|
846
|
-
}
|
|
847
|
-
catch (err) {
|
|
848
|
-
finish({ ok: false, retriable: true, error: err });
|
|
849
|
-
}
|
|
905
|
+
// Phase 1: send the warmup frame (generate=false). The server
|
|
906
|
+
// responds with response.completed without generating tokens;
|
|
907
|
+
// our message handler then transitions to phase "real" and sends
|
|
908
|
+
// the real frame on this same connection.
|
|
909
|
+
sendFrame(warmupFrameJson);
|
|
850
910
|
});
|
|
851
911
|
if (!result.ok) {
|
|
852
912
|
if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
|