clawmoney 0.15.7 → 0.15.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { homedir } from "node:os";
4
4
  import YAML from "yaml";
5
5
  import { RelayWsClient } from "./ws-client.js";
6
6
  import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
7
- import { callCodexApi, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
7
+ import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
8
8
  import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
9
9
  import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
10
10
  /**
@@ -192,11 +192,25 @@ async function executeRelayRequest(request, config, sendChunk) {
192
192
  // antigravity → daily-cloudcode-pa). Each handler has its own
193
193
  // fingerprint file and rate-guard instance.
194
194
  if (cliType === "codex") {
195
- parsed = await callCodexApi({
196
- prompt,
197
- model,
198
- maxTokens: max_budget_usd ? undefined : 4096,
199
- });
195
+ // Same two-mode pattern as claude: passthrough when the Hub forwards
196
+ // a real Responses API body (used by /v1/responses endpoint for
197
+ // Codex CLI drop-in replacement), template mode otherwise (used by
198
+ // the OpenAI-compat /v1/chat/completions classic endpoint).
199
+ if (request.passthrough_body) {
200
+ parsed = await callCodexApiPassthrough({
201
+ clientBody: request.passthrough_body,
202
+ model,
203
+ onRawEvent: sendChunk,
204
+ });
205
+ }
206
+ else {
207
+ parsed = await callCodexApi({
208
+ prompt,
209
+ model,
210
+ maxTokens: max_budget_usd ? undefined : 4096,
211
+ onRawEvent: sendChunk,
212
+ });
213
+ }
200
214
  }
201
215
  else if (cliType === "gemini") {
202
216
  parsed = await callGeminiApi({
@@ -38,5 +38,12 @@ export interface CallCodexApiOptions {
38
38
  prompt: string;
39
39
  model: string;
40
40
  maxTokens?: number;
41
+ onRawEvent?: (sse: string) => void;
41
42
  }
42
43
  export declare function callCodexApi(opts: CallCodexApiOptions): Promise<ParsedOutput>;
44
+ export interface CallCodexApiPassthroughOptions {
45
+ clientBody: Record<string, unknown>;
46
+ model: string;
47
+ onRawEvent?: (sse: string) => void;
48
+ }
49
+ export declare function callCodexApiPassthrough(opts: CallCodexApiPassthroughOptions): Promise<ParsedOutput>;
@@ -645,6 +645,12 @@ export async function callCodexApi(opts) {
645
645
  configureRateGuard();
646
646
  return rateGuard.run(() => doCallCodexApi(opts));
647
647
  }
648
+ export async function callCodexApiPassthrough(opts) {
649
+ configureDispatcher();
650
+ if (!rateGuard)
651
+ configureRateGuard();
652
+ return rateGuard.run(() => doCallCodexApiPassthrough(opts));
653
+ }
648
654
  async function doCallCodexApi(opts) {
649
655
  const prompt = (opts.prompt ?? "").trim();
650
656
  if (!prompt) {
@@ -861,6 +867,23 @@ async function doCallCodexApi(opts) {
861
867
  : Buffer.from(data).toString("utf-8");
862
868
  // Frames are individual JSON objects (no newline framing).
863
869
  const target = phase === "warmup" ? warmupAcc : acc;
870
+ // Forward raw frames to the caller (when streaming is requested)
871
+ // only for the real phase — warmup frames are daemon-internal and
872
+ // never reach the end client. Each frame is re-emitted as an
873
+ // Anthropic-style SSE block where `event:` is the frame type
874
+ // (response.output_text.delta, response.completed, etc.), which
875
+ // matches OpenAI's public Responses API SSE wire format exactly.
876
+ if (phase === "real" && opts.onRawEvent) {
877
+ try {
878
+ const parsedFrame = JSON.parse(text);
879
+ const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
880
+ opts.onRawEvent(`event: ${frameType}\ndata: ${text}\n\n`);
881
+ }
882
+ catch {
883
+ // Non-JSON frame — forward as a plain data event.
884
+ opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
885
+ }
886
+ }
864
887
  const outcome = handleFrame(text, target);
865
888
  if (outcome.rateLimit && rateGuard) {
866
889
  // Soft hint — record but don't kill this request. Next request will
@@ -939,3 +962,297 @@ async function doCallCodexApi(opts) {
939
962
  return parsed;
940
963
  }
941
964
  }
965
+ // ── Passthrough frame builder ─────────────────────────────────────────────
966
+ // Build a ChatGPT backend-api/codex/responses WS frame from the buyer's
967
+ // raw Responses API body. Daemon-controlled fields (type, client_metadata,
968
+ // store, stream, include, generate, model) are always overwritten; every
969
+ // other field — input, instructions, tools, tool_choice, reasoning,
970
+ // parallel_tool_calls, etc. — is preserved verbatim so the end client's
971
+ // agentic loop works end-to-end.
972
+ function buildCodexPassthroughFrame(clientBody, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
973
+ // Shallow clone so we don't mutate the buyer's dict across retries.
974
+ const frame = { ...clientBody };
975
+ // Daemon-controlled envelope fields — always forced.
976
+ frame.type = "response.create";
977
+ frame.model = model;
978
+ frame.store = false;
979
+ frame.stream = true;
980
+ // Real CLI sends include: ["reasoning.encrypted_content"] when reasoning
981
+ // is enabled. We set reasoning below (from client or default), so include
982
+ // it for fingerprint parity.
983
+ frame.include = ["reasoning.encrypted_content"];
984
+ // Daemon fingerprint injection — client_metadata is how the upstream
985
+ // ties traffic to a device+window identity. Buyers never see this
986
+ // field; always set it from our fingerprint.
987
+ frame.client_metadata = {
988
+ "x-codex-installation-id": fingerprint.installation_id,
989
+ "x-codex-window-id": `${sessionId}:${windowGeneration}`,
990
+ "x-codex-turn-metadata": turnMetadataHeader,
991
+ };
992
+ // Reasoning: if buyer sent their own reasoning config, preserve it;
993
+ // otherwise inject the real-CLI default `{effort: "medium", summary: "auto"}`
994
+ // so the request shape matches typical CLI traffic.
995
+ if (!frame.reasoning || typeof frame.reasoning !== "object") {
996
+ frame.reasoning = { effort: "medium", summary: "auto" };
997
+ }
998
+ // Ensure tools is an array (real CLI always sends tools, even if empty).
999
+ if (!Array.isArray(frame.tools)) {
1000
+ frame.tools = [];
1001
+ }
1002
+ // Default tool_choice if not set.
1003
+ if (frame.tool_choice === undefined || frame.tool_choice === null) {
1004
+ frame.tool_choice = "auto";
1005
+ }
1006
+ // Default parallel_tool_calls to false (matches current template).
1007
+ if (frame.parallel_tool_calls === undefined) {
1008
+ frame.parallel_tool_calls = false;
1009
+ }
1010
+ // Instructions: if buyer didn't send one, fall back to the template
1011
+ // mode's RELAY_INSTRUCTIONS so the model still has guidance.
1012
+ if (typeof frame.instructions !== "string" || !frame.instructions) {
1013
+ frame.instructions = RELAY_INSTRUCTIONS;
1014
+ }
1015
+ if (warmup) {
1016
+ // Real CLI's prewarm flow: first frame of each turn has generate:false.
1017
+ frame.generate = false;
1018
+ }
1019
+ else {
1020
+ // Explicitly unset any leftover generate:false (buyer's body shouldn't
1021
+ // carry it, but defensive).
1022
+ delete frame.generate;
1023
+ }
1024
+ return frame;
1025
+ }
1026
+ // ── Passthrough entry point ───────────────────────────────────────────────
1027
+ //
1028
+ // Copy-pasted from doCallCodexApi (with frame-building swapped for
1029
+ // buildCodexPassthroughFrame). Duplicated rather than refactored so we
1030
+ // can iterate on passthrough-specific bugs without risking a regression
1031
+ // in the battle-tested template path. When passthrough stabilizes we
1032
+ // can merge the two via a frame-builder parameter.
1033
+ async function doCallCodexApiPassthrough(opts) {
1034
+ // Minimal body validation — we need at least `input` (array) and the
1035
+ // model. Everything else is optional per the Responses API spec.
1036
+ const input = opts.clientBody.input;
1037
+ if (!Array.isArray(input) || input.length === 0) {
1038
+ throw new Error("Passthrough body missing `input` array");
1039
+ }
1040
+ const fingerprint = loadCodexFingerprint();
1041
+ const sessionId = getMaskedSessionId();
1042
+ let transientAttempt = 0;
1043
+ let hasRefreshed = false;
1044
+ const windowGeneration = 0;
1045
+ while (true) {
1046
+ const creds = await getFreshCreds();
1047
+ const platformSandboxTag = process.platform === "darwin"
1048
+ ? "seatbelt"
1049
+ : process.platform === "linux"
1050
+ ? "seccomp"
1051
+ : process.platform === "win32"
1052
+ ? "windows_sandbox"
1053
+ : "none";
1054
+ const turnMetadata = JSON.stringify({
1055
+ session_id: sessionId,
1056
+ turn_id: randomUUID(),
1057
+ sandbox: platformSandboxTag,
1058
+ });
1059
+ const warmupFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
1060
+ /*warmup*/ true);
1061
+ const realFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
1062
+ /*warmup*/ false);
1063
+ const warmupFrameJson = JSON.stringify(warmupFrame);
1064
+ const realFrameJson = JSON.stringify(realFrame);
1065
+ const windowId = `${sessionId}:${windowGeneration}`;
1066
+ const headers = {
1067
+ "authorization": `Bearer ${creds.accessToken}`,
1068
+ "originator": fingerprint.originator,
1069
+ "openai-beta": fingerprint.openai_beta,
1070
+ "session_id": sessionId,
1071
+ "x-client-request-id": sessionId,
1072
+ "x-codex-window-id": windowId,
1073
+ "x-codex-turn-metadata": turnMetadata,
1074
+ };
1075
+ if (fingerprint.user_agent) {
1076
+ headers["user-agent"] = fingerprint.user_agent;
1077
+ }
1078
+ let dialed;
1079
+ try {
1080
+ dialed = await dialCodexWebSocket(headers);
1081
+ }
1082
+ catch (err) {
1083
+ if (err instanceof WsDialError) {
1084
+ const status = err.statusCode;
1085
+ if (status === 429) {
1086
+ const cooldown = cooldownFromHttpHeaders(err.headers);
1087
+ if (cooldown && rateGuard) {
1088
+ rateGuard.triggerCooldown(cooldown.ms, cooldown.reason);
1089
+ }
1090
+ else if (rateGuard) {
1091
+ rateGuard.triggerCooldown(Date.now() + 5 * 60_000, "fallback 5m (no reset header)");
1092
+ }
1093
+ throw new Error(`Codex 429 rate-limited: ${err.bodySnippet.slice(0, 300)}`);
1094
+ }
1095
+ if (status === 401 && !hasRefreshed) {
1096
+ logger.warn("[codex-api] 401 from upgrade (passthrough), refreshing token + retry");
1097
+ hasRefreshed = true;
1098
+ cachedCreds = null;
1099
+ continue;
1100
+ }
1101
+ const isTransient = status >= 500 && status <= 599;
1102
+ if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
1103
+ const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
1104
+ logger.warn(`[codex-api] upgrade ${status} (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.bodySnippet.slice(0, 200)}`);
1105
+ await new Promise((r) => setTimeout(r, backoffMs));
1106
+ transientAttempt++;
1107
+ continue;
1108
+ }
1109
+ throw new Error(`Codex upgrade ${status}: ${err.bodySnippet.slice(0, 400)}`);
1110
+ }
1111
+ if (transientAttempt < MAX_TRANSIENT_RETRIES) {
1112
+ const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
1113
+ logger.warn(`[codex-api] transport error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.message}`);
1114
+ await new Promise((r) => setTimeout(r, backoffMs));
1115
+ transientAttempt++;
1116
+ continue;
1117
+ }
1118
+ throw err;
1119
+ }
1120
+ const { ws } = dialed;
1121
+ const acc = {
1122
+ text: "",
1123
+ inputTokens: 0,
1124
+ outputTokens: 0,
1125
+ cacheReadTokens: 0,
1126
+ model: opts.model,
1127
+ terminal: false,
1128
+ };
1129
+ let resolved = false;
1130
+ const result = await new Promise((resolve) => {
1131
+ let phase = "warmup";
1132
+ const finish = (r) => {
1133
+ if (resolved)
1134
+ return;
1135
+ resolved = true;
1136
+ clearTimeout(timer);
1137
+ try {
1138
+ ws.close(1000, "done");
1139
+ }
1140
+ catch {
1141
+ // ignore
1142
+ }
1143
+ resolve(r);
1144
+ };
1145
+ const timer = setTimeout(() => {
1146
+ finish({
1147
+ ok: false,
1148
+ retriable: false,
1149
+ error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
1150
+ });
1151
+ }, WS_OVERALL_TIMEOUT_MS);
1152
+ const warmupAcc = {
1153
+ text: "",
1154
+ inputTokens: 0,
1155
+ outputTokens: 0,
1156
+ cacheReadTokens: 0,
1157
+ model: opts.model,
1158
+ terminal: false,
1159
+ };
1160
+ const sendFrame = (frameJson) => {
1161
+ try {
1162
+ ws.send(frameJson, (sendErr) => {
1163
+ if (sendErr) {
1164
+ finish({ ok: false, retriable: true, error: sendErr });
1165
+ }
1166
+ });
1167
+ }
1168
+ catch (err) {
1169
+ finish({ ok: false, retriable: true, error: err });
1170
+ }
1171
+ };
1172
+ ws.on("message", (data, _isBinary) => {
1173
+ const text = Buffer.isBuffer(data)
1174
+ ? data.toString("utf-8")
1175
+ : Array.isArray(data)
1176
+ ? Buffer.concat(data).toString("utf-8")
1177
+ : Buffer.from(data).toString("utf-8");
1178
+ const target = phase === "warmup" ? warmupAcc : acc;
1179
+ // Forward raw frames to the Hub for real-time SSE streaming to
1180
+ // the end client. Same rules as template mode — only real phase,
1181
+ // wrap as `event: <type>\ndata: <json>\n\n`.
1182
+ if (phase === "real" && opts.onRawEvent) {
1183
+ try {
1184
+ const parsedFrame = JSON.parse(text);
1185
+ const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
1186
+ opts.onRawEvent(`event: ${frameType}\ndata: ${text}\n\n`);
1187
+ }
1188
+ catch {
1189
+ opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
1190
+ }
1191
+ }
1192
+ const outcome = handleFrame(text, target);
1193
+ if (outcome.rateLimit && rateGuard) {
1194
+ rateGuard.triggerCooldown(outcome.rateLimit.ms, outcome.rateLimit.reason);
1195
+ }
1196
+ if (outcome.terminal) {
1197
+ if (outcome.error) {
1198
+ finish({
1199
+ ok: false,
1200
+ retriable: false,
1201
+ error: new Error(`Codex upstream error: ${outcome.error}`),
1202
+ });
1203
+ return;
1204
+ }
1205
+ if (phase === "warmup") {
1206
+ phase = "real";
1207
+ sendFrame(realFrameJson);
1208
+ return;
1209
+ }
1210
+ acc.terminal = true;
1211
+ finish({ ok: true });
1212
+ }
1213
+ });
1214
+ ws.on("close", (code, reason) => {
1215
+ if (acc.terminal)
1216
+ return;
1217
+ finish({
1218
+ ok: false,
1219
+ retriable: true,
1220
+ error: new Error(`Codex WS closed early (code=${code}, reason=${reason.toString().slice(0, 200)})`),
1221
+ });
1222
+ });
1223
+ ws.on("error", (err) => {
1224
+ finish({ ok: false, retriable: true, error: err });
1225
+ });
1226
+ sendFrame(warmupFrameJson);
1227
+ });
1228
+ if (!result.ok) {
1229
+ if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
1230
+ const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
1231
+ logger.warn(`[codex-api] mid-session ws error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${result.error.message}`);
1232
+ await new Promise((r) => setTimeout(r, backoffMs));
1233
+ transientAttempt++;
1234
+ continue;
1235
+ }
1236
+ throw result.error;
1237
+ }
1238
+ const parsed = {
1239
+ text: acc.text,
1240
+ sessionId,
1241
+ usage: {
1242
+ input_tokens: acc.inputTokens,
1243
+ output_tokens: acc.outputTokens,
1244
+ cache_creation_tokens: 0,
1245
+ cache_read_tokens: acc.cacheReadTokens,
1246
+ },
1247
+ model: acc.model,
1248
+ costUsd: 0,
1249
+ };
1250
+ if (rateGuard) {
1251
+ const cost = calculateCost(opts.model, parsed.usage.input_tokens, parsed.usage.output_tokens, parsed.usage.cache_creation_tokens, parsed.usage.cache_read_tokens);
1252
+ rateGuard.recordSpend(cost.apiCost);
1253
+ parsed.costUsd = cost.apiCost;
1254
+ }
1255
+ logger.info(`[codex-api] passthrough OK model=${acc.model} in=${acc.inputTokens} out=${acc.outputTokens} cache_read=${acc.cacheReadTokens}`);
1256
+ return parsed;
1257
+ }
1258
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmoney",
3
- "version": "0.15.7",
3
+ "version": "0.15.8",
4
4
  "description": "ClawMoney CLI -- Earn rewards with your AI agent",
5
5
  "type": "module",
6
6
  "bin": {