clawmoney 0.15.7 → 0.15.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { homedir } from "node:os";
4
4
  import YAML from "yaml";
5
5
  import { RelayWsClient } from "./ws-client.js";
6
6
  import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
7
- import { callCodexApi, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
7
+ import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
8
8
  import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
9
9
  import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
10
10
  /**
@@ -192,11 +192,25 @@ async function executeRelayRequest(request, config, sendChunk) {
192
192
  // antigravity → daily-cloudcode-pa). Each handler has its own
193
193
  // fingerprint file and rate-guard instance.
194
194
  if (cliType === "codex") {
195
- parsed = await callCodexApi({
196
- prompt,
197
- model,
198
- maxTokens: max_budget_usd ? undefined : 4096,
199
- });
195
+ // Same two-mode pattern as claude: passthrough when the Hub forwards
196
+ // a real Responses API body (used by /v1/responses endpoint for
197
+ // Codex CLI drop-in replacement), template mode otherwise (used by
198
+ // the OpenAI-compat /v1/chat/completions classic endpoint).
199
+ if (request.passthrough_body) {
200
+ parsed = await callCodexApiPassthrough({
201
+ clientBody: request.passthrough_body,
202
+ model,
203
+ onRawEvent: sendChunk,
204
+ });
205
+ }
206
+ else {
207
+ parsed = await callCodexApi({
208
+ prompt,
209
+ model,
210
+ maxTokens: max_budget_usd ? undefined : 4096,
211
+ onRawEvent: sendChunk,
212
+ });
213
+ }
200
214
  }
201
215
  else if (cliType === "gemini") {
202
216
  parsed = await callGeminiApi({
@@ -38,5 +38,12 @@ export interface CallCodexApiOptions {
38
38
  prompt: string;
39
39
  model: string;
40
40
  maxTokens?: number;
41
+ onRawEvent?: (sse: string) => void;
41
42
  }
42
43
  export declare function callCodexApi(opts: CallCodexApiOptions): Promise<ParsedOutput>;
44
+ export interface CallCodexApiPassthroughOptions {
45
+ clientBody: Record<string, unknown>;
46
+ model: string;
47
+ onRawEvent?: (sse: string) => void;
48
+ }
49
+ export declare function callCodexApiPassthrough(opts: CallCodexApiPassthroughOptions): Promise<ParsedOutput>;
@@ -449,6 +449,47 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
449
449
  }
450
450
  return frame;
451
451
  }
452
+ // Patch a raw ChatGPT WS frame before we forward it to the Hub as SSE.
453
+ // ChatGPT's internal response.completed frames come from a proprietary
454
+ // backend that does NOT populate usage.total_tokens — the Codex CLI Rust
455
+ // parser is strict about this field (stream disconnected before completion:
456
+ // failed to parse ResponseCompleted: missing field `total_tokens`), so we
457
+ // inject it here when we can compute it from input_tokens + output_tokens.
458
+ // Returns the possibly-rewritten frame JSON; on parse/shape error returns
459
+ // the original untouched so a malformed input never turns into a crash.
460
+ function patchCodexFrameForForwarding(raw) {
461
+ try {
462
+ const evt = JSON.parse(raw);
463
+ const type = evt["type"];
464
+ if (type !== "response.completed" && type !== "response.done") {
465
+ return raw;
466
+ }
467
+ const resp = evt["response"];
468
+ if (!resp || typeof resp !== "object")
469
+ return raw;
470
+ const usage = resp["usage"];
471
+ if (!usage || typeof usage !== "object")
472
+ return raw;
473
+ if (typeof usage["total_tokens"] === "number")
474
+ return raw;
475
+ const input = Number(usage["input_tokens"] ?? 0);
476
+ const output = Number(usage["output_tokens"] ?? 0);
477
+ usage["total_tokens"] = input + output;
478
+ // Also ensure the nested *_details objects exist — Codex CLI's
479
+ // schema checks for them on the response.completed frame.
480
+ if (!usage["input_tokens_details"] || typeof usage["input_tokens_details"] !== "object") {
481
+ const cached = Number(usage.cache_read_input_tokens ?? 0);
482
+ usage["input_tokens_details"] = { cached_tokens: cached };
483
+ }
484
+ if (!usage["output_tokens_details"] || typeof usage["output_tokens_details"] !== "object") {
485
+ usage["output_tokens_details"] = { reasoning_tokens: 0 };
486
+ }
487
+ return JSON.stringify(evt);
488
+ }
489
+ catch {
490
+ return raw;
491
+ }
492
+ }
452
493
  function handleFrame(raw, acc) {
453
494
  let evt;
454
495
  try {
@@ -645,6 +686,12 @@ export async function callCodexApi(opts) {
645
686
  configureRateGuard();
646
687
  return rateGuard.run(() => doCallCodexApi(opts));
647
688
  }
689
+ export async function callCodexApiPassthrough(opts) {
690
+ configureDispatcher();
691
+ if (!rateGuard)
692
+ configureRateGuard();
693
+ return rateGuard.run(() => doCallCodexApiPassthrough(opts));
694
+ }
648
695
  async function doCallCodexApi(opts) {
649
696
  const prompt = (opts.prompt ?? "").trim();
650
697
  if (!prompt) {
@@ -861,6 +908,26 @@ async function doCallCodexApi(opts) {
861
908
  : Buffer.from(data).toString("utf-8");
862
909
  // Frames are individual JSON objects (no newline framing).
863
910
  const target = phase === "warmup" ? warmupAcc : acc;
911
+ // Forward raw frames to the caller (when streaming is requested)
912
+ // only for the real phase — warmup frames are daemon-internal and
913
+ // never reach the end client. Each frame is re-emitted as an
914
+ // Anthropic-style SSE block where `event:` is the frame type
915
+ // (response.output_text.delta, response.completed, etc.), which
916
+ // matches OpenAI's public Responses API SSE wire format exactly.
917
+ if (phase === "real" && opts.onRawEvent) {
918
+ try {
919
+ const parsedFrame = JSON.parse(text);
920
+ const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
921
+ // Inject usage.total_tokens on response.completed frames so
922
+ // the end client's strict parser doesn't abort the stream.
923
+ const patched = patchCodexFrameForForwarding(text);
924
+ opts.onRawEvent(`event: ${frameType}\ndata: ${patched}\n\n`);
925
+ }
926
+ catch {
927
+ // Non-JSON frame — forward as a plain data event.
928
+ opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
929
+ }
930
+ }
864
931
  const outcome = handleFrame(text, target);
865
932
  if (outcome.rateLimit && rateGuard) {
866
933
  // Soft hint — record but don't kill this request. Next request will
@@ -939,3 +1006,298 @@ async function doCallCodexApi(opts) {
939
1006
  return parsed;
940
1007
  }
941
1008
  }
1009
+ // ── Passthrough frame builder ─────────────────────────────────────────────
1010
+ // Build a ChatGPT backend-api/codex/responses WS frame from the buyer's
1011
+ // raw Responses API body. Daemon-controlled fields (type, client_metadata,
1012
+ // store, stream, include, generate, model) are always overwritten; every
1013
+ // other field — input, instructions, tools, tool_choice, reasoning,
1014
+ // parallel_tool_calls, etc. — is preserved verbatim so the end client's
1015
+ // agentic loop works end-to-end.
1016
+ function buildCodexPassthroughFrame(clientBody, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
1017
+ // Shallow clone so we don't mutate the buyer's dict across retries.
1018
+ const frame = { ...clientBody };
1019
+ // Daemon-controlled envelope fields — always forced.
1020
+ frame.type = "response.create";
1021
+ frame.model = model;
1022
+ frame.store = false;
1023
+ frame.stream = true;
1024
+ // Real CLI sends include: ["reasoning.encrypted_content"] when reasoning
1025
+ // is enabled. We set reasoning below (from client or default), so include
1026
+ // it for fingerprint parity.
1027
+ frame.include = ["reasoning.encrypted_content"];
1028
+ // Daemon fingerprint injection — client_metadata is how the upstream
1029
+ // ties traffic to a device+window identity. Buyers never see this
1030
+ // field; always set it from our fingerprint.
1031
+ frame.client_metadata = {
1032
+ "x-codex-installation-id": fingerprint.installation_id,
1033
+ "x-codex-window-id": `${sessionId}:${windowGeneration}`,
1034
+ "x-codex-turn-metadata": turnMetadataHeader,
1035
+ };
1036
+ // Reasoning: if buyer sent their own reasoning config, preserve it;
1037
+ // otherwise inject the real-CLI default `{effort: "medium", summary: "auto"}`
1038
+ // so the request shape matches typical CLI traffic.
1039
+ if (!frame.reasoning || typeof frame.reasoning !== "object") {
1040
+ frame.reasoning = { effort: "medium", summary: "auto" };
1041
+ }
1042
+ // Ensure tools is an array (real CLI always sends tools, even if empty).
1043
+ if (!Array.isArray(frame.tools)) {
1044
+ frame.tools = [];
1045
+ }
1046
+ // Default tool_choice if not set.
1047
+ if (frame.tool_choice === undefined || frame.tool_choice === null) {
1048
+ frame.tool_choice = "auto";
1049
+ }
1050
+ // Default parallel_tool_calls to false (matches current template).
1051
+ if (frame.parallel_tool_calls === undefined) {
1052
+ frame.parallel_tool_calls = false;
1053
+ }
1054
+ // Instructions: if buyer didn't send one, fall back to the template
1055
+ // mode's RELAY_INSTRUCTIONS so the model still has guidance.
1056
+ if (typeof frame.instructions !== "string" || !frame.instructions) {
1057
+ frame.instructions = RELAY_INSTRUCTIONS;
1058
+ }
1059
+ if (warmup) {
1060
+ // Real CLI's prewarm flow: first frame of each turn has generate:false.
1061
+ frame.generate = false;
1062
+ }
1063
+ else {
1064
+ // Explicitly unset any leftover generate:false (buyer's body shouldn't
1065
+ // carry it, but defensive).
1066
+ delete frame.generate;
1067
+ }
1068
+ return frame;
1069
+ }
1070
+ // ── Passthrough entry point ───────────────────────────────────────────────
1071
+ //
1072
+ // Copy-pasted from doCallCodexApi (with frame-building swapped for
1073
+ // buildCodexPassthroughFrame). Duplicated rather than refactored so we
1074
+ // can iterate on passthrough-specific bugs without risking a regression
1075
+ // in the battle-tested template path. When passthrough stabilizes we
1076
+ // can merge the two via a frame-builder parameter.
1077
+ async function doCallCodexApiPassthrough(opts) {
1078
+ // Minimal body validation — we need at least `input` (array) and the
1079
+ // model. Everything else is optional per the Responses API spec.
1080
+ const input = opts.clientBody.input;
1081
+ if (!Array.isArray(input) || input.length === 0) {
1082
+ throw new Error("Passthrough body missing `input` array");
1083
+ }
1084
+ const fingerprint = loadCodexFingerprint();
1085
+ const sessionId = getMaskedSessionId();
1086
+ let transientAttempt = 0;
1087
+ let hasRefreshed = false;
1088
+ const windowGeneration = 0;
1089
+ while (true) {
1090
+ const creds = await getFreshCreds();
1091
+ const platformSandboxTag = process.platform === "darwin"
1092
+ ? "seatbelt"
1093
+ : process.platform === "linux"
1094
+ ? "seccomp"
1095
+ : process.platform === "win32"
1096
+ ? "windows_sandbox"
1097
+ : "none";
1098
+ const turnMetadata = JSON.stringify({
1099
+ session_id: sessionId,
1100
+ turn_id: randomUUID(),
1101
+ sandbox: platformSandboxTag,
1102
+ });
1103
+ const warmupFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
1104
+ /*warmup*/ true);
1105
+ const realFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
1106
+ /*warmup*/ false);
1107
+ const warmupFrameJson = JSON.stringify(warmupFrame);
1108
+ const realFrameJson = JSON.stringify(realFrame);
1109
+ const windowId = `${sessionId}:${windowGeneration}`;
1110
+ const headers = {
1111
+ "authorization": `Bearer ${creds.accessToken}`,
1112
+ "originator": fingerprint.originator,
1113
+ "openai-beta": fingerprint.openai_beta,
1114
+ "session_id": sessionId,
1115
+ "x-client-request-id": sessionId,
1116
+ "x-codex-window-id": windowId,
1117
+ "x-codex-turn-metadata": turnMetadata,
1118
+ };
1119
+ if (fingerprint.user_agent) {
1120
+ headers["user-agent"] = fingerprint.user_agent;
1121
+ }
1122
+ let dialed;
1123
+ try {
1124
+ dialed = await dialCodexWebSocket(headers);
1125
+ }
1126
+ catch (err) {
1127
+ if (err instanceof WsDialError) {
1128
+ const status = err.statusCode;
1129
+ if (status === 429) {
1130
+ const cooldown = cooldownFromHttpHeaders(err.headers);
1131
+ if (cooldown && rateGuard) {
1132
+ rateGuard.triggerCooldown(cooldown.ms, cooldown.reason);
1133
+ }
1134
+ else if (rateGuard) {
1135
+ rateGuard.triggerCooldown(Date.now() + 5 * 60_000, "fallback 5m (no reset header)");
1136
+ }
1137
+ throw new Error(`Codex 429 rate-limited: ${err.bodySnippet.slice(0, 300)}`);
1138
+ }
1139
+ if (status === 401 && !hasRefreshed) {
1140
+ logger.warn("[codex-api] 401 from upgrade (passthrough), refreshing token + retry");
1141
+ hasRefreshed = true;
1142
+ cachedCreds = null;
1143
+ continue;
1144
+ }
1145
+ const isTransient = status >= 500 && status <= 599;
1146
+ if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
1147
+ const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
1148
+ logger.warn(`[codex-api] upgrade ${status} (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.bodySnippet.slice(0, 200)}`);
1149
+ await new Promise((r) => setTimeout(r, backoffMs));
1150
+ transientAttempt++;
1151
+ continue;
1152
+ }
1153
+ throw new Error(`Codex upgrade ${status}: ${err.bodySnippet.slice(0, 400)}`);
1154
+ }
1155
+ if (transientAttempt < MAX_TRANSIENT_RETRIES) {
1156
+ const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
1157
+ logger.warn(`[codex-api] transport error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.message}`);
1158
+ await new Promise((r) => setTimeout(r, backoffMs));
1159
+ transientAttempt++;
1160
+ continue;
1161
+ }
1162
+ throw err;
1163
+ }
1164
+ const { ws } = dialed;
1165
+ const acc = {
1166
+ text: "",
1167
+ inputTokens: 0,
1168
+ outputTokens: 0,
1169
+ cacheReadTokens: 0,
1170
+ model: opts.model,
1171
+ terminal: false,
1172
+ };
1173
+ let resolved = false;
1174
+ const result = await new Promise((resolve) => {
1175
+ let phase = "warmup";
1176
+ const finish = (r) => {
1177
+ if (resolved)
1178
+ return;
1179
+ resolved = true;
1180
+ clearTimeout(timer);
1181
+ try {
1182
+ ws.close(1000, "done");
1183
+ }
1184
+ catch {
1185
+ // ignore
1186
+ }
1187
+ resolve(r);
1188
+ };
1189
+ const timer = setTimeout(() => {
1190
+ finish({
1191
+ ok: false,
1192
+ retriable: false,
1193
+ error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
1194
+ });
1195
+ }, WS_OVERALL_TIMEOUT_MS);
1196
+ const warmupAcc = {
1197
+ text: "",
1198
+ inputTokens: 0,
1199
+ outputTokens: 0,
1200
+ cacheReadTokens: 0,
1201
+ model: opts.model,
1202
+ terminal: false,
1203
+ };
1204
+ const sendFrame = (frameJson) => {
1205
+ try {
1206
+ ws.send(frameJson, (sendErr) => {
1207
+ if (sendErr) {
1208
+ finish({ ok: false, retriable: true, error: sendErr });
1209
+ }
1210
+ });
1211
+ }
1212
+ catch (err) {
1213
+ finish({ ok: false, retriable: true, error: err });
1214
+ }
1215
+ };
1216
+ ws.on("message", (data, _isBinary) => {
1217
+ const text = Buffer.isBuffer(data)
1218
+ ? data.toString("utf-8")
1219
+ : Array.isArray(data)
1220
+ ? Buffer.concat(data).toString("utf-8")
1221
+ : Buffer.from(data).toString("utf-8");
1222
+ const target = phase === "warmup" ? warmupAcc : acc;
1223
+ // Forward raw frames to the Hub for real-time SSE streaming to
1224
+ // the end client. Same rules as template mode — only real phase,
1225
+ // wrap as `event: <type>\ndata: <json>\n\n`.
1226
+ if (phase === "real" && opts.onRawEvent) {
1227
+ try {
1228
+ const parsedFrame = JSON.parse(text);
1229
+ const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
1230
+ const patched = patchCodexFrameForForwarding(text);
1231
+ opts.onRawEvent(`event: ${frameType}\ndata: ${patched}\n\n`);
1232
+ }
1233
+ catch {
1234
+ opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
1235
+ }
1236
+ }
1237
+ const outcome = handleFrame(text, target);
1238
+ if (outcome.rateLimit && rateGuard) {
1239
+ rateGuard.triggerCooldown(outcome.rateLimit.ms, outcome.rateLimit.reason);
1240
+ }
1241
+ if (outcome.terminal) {
1242
+ if (outcome.error) {
1243
+ finish({
1244
+ ok: false,
1245
+ retriable: false,
1246
+ error: new Error(`Codex upstream error: ${outcome.error}`),
1247
+ });
1248
+ return;
1249
+ }
1250
+ if (phase === "warmup") {
1251
+ phase = "real";
1252
+ sendFrame(realFrameJson);
1253
+ return;
1254
+ }
1255
+ acc.terminal = true;
1256
+ finish({ ok: true });
1257
+ }
1258
+ });
1259
+ ws.on("close", (code, reason) => {
1260
+ if (acc.terminal)
1261
+ return;
1262
+ finish({
1263
+ ok: false,
1264
+ retriable: true,
1265
+ error: new Error(`Codex WS closed early (code=${code}, reason=${reason.toString().slice(0, 200)})`),
1266
+ });
1267
+ });
1268
+ ws.on("error", (err) => {
1269
+ finish({ ok: false, retriable: true, error: err });
1270
+ });
1271
+ sendFrame(warmupFrameJson);
1272
+ });
1273
+ if (!result.ok) {
1274
+ if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
1275
+ const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
1276
+ logger.warn(`[codex-api] mid-session ws error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${result.error.message}`);
1277
+ await new Promise((r) => setTimeout(r, backoffMs));
1278
+ transientAttempt++;
1279
+ continue;
1280
+ }
1281
+ throw result.error;
1282
+ }
1283
+ const parsed = {
1284
+ text: acc.text,
1285
+ sessionId,
1286
+ usage: {
1287
+ input_tokens: acc.inputTokens,
1288
+ output_tokens: acc.outputTokens,
1289
+ cache_creation_tokens: 0,
1290
+ cache_read_tokens: acc.cacheReadTokens,
1291
+ },
1292
+ model: acc.model,
1293
+ costUsd: 0,
1294
+ };
1295
+ if (rateGuard) {
1296
+ const cost = calculateCost(opts.model, parsed.usage.input_tokens, parsed.usage.output_tokens, parsed.usage.cache_creation_tokens, parsed.usage.cache_read_tokens);
1297
+ rateGuard.recordSpend(cost.apiCost);
1298
+ parsed.costUsd = cost.apiCost;
1299
+ }
1300
+ logger.info(`[codex-api] passthrough OK model=${acc.model} in=${acc.inputTokens} out=${acc.outputTokens} cache_read=${acc.cacheReadTokens}`);
1301
+ return parsed;
1302
+ }
1303
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.15.7",
3
+ "version": "0.15.9",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {