clawmoney 0.15.7 → 0.15.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/relay/provider.js
CHANGED
|
@@ -4,7 +4,7 @@ import { homedir } from "node:os";
|
|
|
4
4
|
import YAML from "yaml";
|
|
5
5
|
import { RelayWsClient } from "./ws-client.js";
|
|
6
6
|
import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
|
|
7
|
-
import { callCodexApi, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
|
|
7
|
+
import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
|
|
8
8
|
import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
|
|
9
9
|
import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
|
|
10
10
|
/**
|
|
@@ -192,11 +192,25 @@ async function executeRelayRequest(request, config, sendChunk) {
|
|
|
192
192
|
// antigravity → daily-cloudcode-pa). Each handler has its own
|
|
193
193
|
// fingerprint file and rate-guard instance.
|
|
194
194
|
if (cliType === "codex") {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
195
|
+
// Same two-mode pattern as claude: passthrough when the Hub forwards
|
|
196
|
+
// a real Responses API body (used by /v1/responses endpoint for
|
|
197
|
+
// Codex CLI drop-in replacement), template mode otherwise (used by
|
|
198
|
+
// the OpenAI-compat /v1/chat/completions classic endpoint).
|
|
199
|
+
if (request.passthrough_body) {
|
|
200
|
+
parsed = await callCodexApiPassthrough({
|
|
201
|
+
clientBody: request.passthrough_body,
|
|
202
|
+
model,
|
|
203
|
+
onRawEvent: sendChunk,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
parsed = await callCodexApi({
|
|
208
|
+
prompt,
|
|
209
|
+
model,
|
|
210
|
+
maxTokens: max_budget_usd ? undefined : 4096,
|
|
211
|
+
onRawEvent: sendChunk,
|
|
212
|
+
});
|
|
213
|
+
}
|
|
200
214
|
}
|
|
201
215
|
else if (cliType === "gemini") {
|
|
202
216
|
parsed = await callGeminiApi({
|
|
@@ -38,5 +38,12 @@ export interface CallCodexApiOptions {
|
|
|
38
38
|
prompt: string;
|
|
39
39
|
model: string;
|
|
40
40
|
maxTokens?: number;
|
|
41
|
+
onRawEvent?: (sse: string) => void;
|
|
41
42
|
}
|
|
42
43
|
export declare function callCodexApi(opts: CallCodexApiOptions): Promise<ParsedOutput>;
|
|
44
|
+
export interface CallCodexApiPassthroughOptions {
|
|
45
|
+
clientBody: Record<string, unknown>;
|
|
46
|
+
model: string;
|
|
47
|
+
onRawEvent?: (sse: string) => void;
|
|
48
|
+
}
|
|
49
|
+
export declare function callCodexApiPassthrough(opts: CallCodexApiPassthroughOptions): Promise<ParsedOutput>;
|
|
@@ -449,6 +449,47 @@ function buildCodexRequestFrame(prompt, model, fingerprint, sessionId, turnMetad
|
|
|
449
449
|
}
|
|
450
450
|
return frame;
|
|
451
451
|
}
|
|
452
|
+
// Patch a raw ChatGPT WS frame before we forward it to the Hub as SSE.
|
|
453
|
+
// ChatGPT's internal response.completed frames come from a proprietary
|
|
454
|
+
// backend that does NOT populate usage.total_tokens — the Codex CLI Rust
|
|
455
|
+
// parser is strict about this field (stream disconnected before completion:
|
|
456
|
+
// failed to parse ResponseCompleted: missing field `total_tokens`), so we
|
|
457
|
+
// inject it here when we can compute it from input_tokens + output_tokens.
|
|
458
|
+
// Returns the possibly-rewritten frame JSON; on parse/shape error returns
|
|
459
|
+
// the original untouched so a malformed input never turns into a crash.
|
|
460
|
+
function patchCodexFrameForForwarding(raw) {
|
|
461
|
+
try {
|
|
462
|
+
const evt = JSON.parse(raw);
|
|
463
|
+
const type = evt["type"];
|
|
464
|
+
if (type !== "response.completed" && type !== "response.done") {
|
|
465
|
+
return raw;
|
|
466
|
+
}
|
|
467
|
+
const resp = evt["response"];
|
|
468
|
+
if (!resp || typeof resp !== "object")
|
|
469
|
+
return raw;
|
|
470
|
+
const usage = resp["usage"];
|
|
471
|
+
if (!usage || typeof usage !== "object")
|
|
472
|
+
return raw;
|
|
473
|
+
if (typeof usage["total_tokens"] === "number")
|
|
474
|
+
return raw;
|
|
475
|
+
const input = Number(usage["input_tokens"] ?? 0);
|
|
476
|
+
const output = Number(usage["output_tokens"] ?? 0);
|
|
477
|
+
usage["total_tokens"] = input + output;
|
|
478
|
+
// Also ensure the nested *_details objects exist — Codex CLI's
|
|
479
|
+
// schema checks for them on the response.completed frame.
|
|
480
|
+
if (!usage["input_tokens_details"] || typeof usage["input_tokens_details"] !== "object") {
|
|
481
|
+
const cached = Number(usage.cache_read_input_tokens ?? 0);
|
|
482
|
+
usage["input_tokens_details"] = { cached_tokens: cached };
|
|
483
|
+
}
|
|
484
|
+
if (!usage["output_tokens_details"] || typeof usage["output_tokens_details"] !== "object") {
|
|
485
|
+
usage["output_tokens_details"] = { reasoning_tokens: 0 };
|
|
486
|
+
}
|
|
487
|
+
return JSON.stringify(evt);
|
|
488
|
+
}
|
|
489
|
+
catch {
|
|
490
|
+
return raw;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
452
493
|
function handleFrame(raw, acc) {
|
|
453
494
|
let evt;
|
|
454
495
|
try {
|
|
@@ -645,6 +686,12 @@ export async function callCodexApi(opts) {
|
|
|
645
686
|
configureRateGuard();
|
|
646
687
|
return rateGuard.run(() => doCallCodexApi(opts));
|
|
647
688
|
}
|
|
689
|
+
export async function callCodexApiPassthrough(opts) {
|
|
690
|
+
configureDispatcher();
|
|
691
|
+
if (!rateGuard)
|
|
692
|
+
configureRateGuard();
|
|
693
|
+
return rateGuard.run(() => doCallCodexApiPassthrough(opts));
|
|
694
|
+
}
|
|
648
695
|
async function doCallCodexApi(opts) {
|
|
649
696
|
const prompt = (opts.prompt ?? "").trim();
|
|
650
697
|
if (!prompt) {
|
|
@@ -861,6 +908,26 @@ async function doCallCodexApi(opts) {
|
|
|
861
908
|
: Buffer.from(data).toString("utf-8");
|
|
862
909
|
// Frames are individual JSON objects (no newline framing).
|
|
863
910
|
const target = phase === "warmup" ? warmupAcc : acc;
|
|
911
|
+
// Forward raw frames to the caller (when streaming is requested)
|
|
912
|
+
// only for the real phase — warmup frames are daemon-internal and
|
|
913
|
+
// never reach the end client. Each frame is re-emitted as an
|
|
914
|
+
// Anthropic-style SSE block where `event:` is the frame type
|
|
915
|
+
// (response.output_text.delta, response.completed, etc.), which
|
|
916
|
+
// matches OpenAI's public Responses API SSE wire format exactly.
|
|
917
|
+
if (phase === "real" && opts.onRawEvent) {
|
|
918
|
+
try {
|
|
919
|
+
const parsedFrame = JSON.parse(text);
|
|
920
|
+
const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
|
|
921
|
+
// Inject usage.total_tokens on response.completed frames so
|
|
922
|
+
// the end client's strict parser doesn't abort the stream.
|
|
923
|
+
const patched = patchCodexFrameForForwarding(text);
|
|
924
|
+
opts.onRawEvent(`event: ${frameType}\ndata: ${patched}\n\n`);
|
|
925
|
+
}
|
|
926
|
+
catch {
|
|
927
|
+
// Non-JSON frame — forward as a plain data event.
|
|
928
|
+
opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
|
|
929
|
+
}
|
|
930
|
+
}
|
|
864
931
|
const outcome = handleFrame(text, target);
|
|
865
932
|
if (outcome.rateLimit && rateGuard) {
|
|
866
933
|
// Soft hint — record but don't kill this request. Next request will
|
|
@@ -939,3 +1006,298 @@ async function doCallCodexApi(opts) {
|
|
|
939
1006
|
return parsed;
|
|
940
1007
|
}
|
|
941
1008
|
}
|
|
1009
|
+
// ── Passthrough frame builder ─────────────────────────────────────────────
|
|
1010
|
+
// Build a ChatGPT backend-api/codex/responses WS frame from the buyer's
|
|
1011
|
+
// raw Responses API body. Daemon-controlled fields (type, client_metadata,
|
|
1012
|
+
// store, stream, include, generate, model) are always overwritten; every
|
|
1013
|
+
// other field — input, instructions, tools, tool_choice, reasoning,
|
|
1014
|
+
// parallel_tool_calls, etc. — is preserved verbatim so the end client's
|
|
1015
|
+
// agentic loop works end-to-end.
|
|
1016
|
+
function buildCodexPassthroughFrame(clientBody, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
|
|
1017
|
+
// Shallow clone so we don't mutate the buyer's dict across retries.
|
|
1018
|
+
const frame = { ...clientBody };
|
|
1019
|
+
// Daemon-controlled envelope fields — always forced.
|
|
1020
|
+
frame.type = "response.create";
|
|
1021
|
+
frame.model = model;
|
|
1022
|
+
frame.store = false;
|
|
1023
|
+
frame.stream = true;
|
|
1024
|
+
// Real CLI sends include: ["reasoning.encrypted_content"] when reasoning
|
|
1025
|
+
// is enabled. We set reasoning below (from client or default), so include
|
|
1026
|
+
// it for fingerprint parity.
|
|
1027
|
+
frame.include = ["reasoning.encrypted_content"];
|
|
1028
|
+
// Daemon fingerprint injection — client_metadata is how the upstream
|
|
1029
|
+
// ties traffic to a device+window identity. Buyers never see this
|
|
1030
|
+
// field; always set it from our fingerprint.
|
|
1031
|
+
frame.client_metadata = {
|
|
1032
|
+
"x-codex-installation-id": fingerprint.installation_id,
|
|
1033
|
+
"x-codex-window-id": `${sessionId}:${windowGeneration}`,
|
|
1034
|
+
"x-codex-turn-metadata": turnMetadataHeader,
|
|
1035
|
+
};
|
|
1036
|
+
// Reasoning: if buyer sent their own reasoning config, preserve it;
|
|
1037
|
+
// otherwise inject the real-CLI default `{effort: "medium", summary: "auto"}`
|
|
1038
|
+
// so the request shape matches typical CLI traffic.
|
|
1039
|
+
if (!frame.reasoning || typeof frame.reasoning !== "object") {
|
|
1040
|
+
frame.reasoning = { effort: "medium", summary: "auto" };
|
|
1041
|
+
}
|
|
1042
|
+
// Ensure tools is an array (real CLI always sends tools, even if empty).
|
|
1043
|
+
if (!Array.isArray(frame.tools)) {
|
|
1044
|
+
frame.tools = [];
|
|
1045
|
+
}
|
|
1046
|
+
// Default tool_choice if not set.
|
|
1047
|
+
if (frame.tool_choice === undefined || frame.tool_choice === null) {
|
|
1048
|
+
frame.tool_choice = "auto";
|
|
1049
|
+
}
|
|
1050
|
+
// Default parallel_tool_calls to false (matches current template).
|
|
1051
|
+
if (frame.parallel_tool_calls === undefined) {
|
|
1052
|
+
frame.parallel_tool_calls = false;
|
|
1053
|
+
}
|
|
1054
|
+
// Instructions: if buyer didn't send one, fall back to the template
|
|
1055
|
+
// mode's RELAY_INSTRUCTIONS so the model still has guidance.
|
|
1056
|
+
if (typeof frame.instructions !== "string" || !frame.instructions) {
|
|
1057
|
+
frame.instructions = RELAY_INSTRUCTIONS;
|
|
1058
|
+
}
|
|
1059
|
+
if (warmup) {
|
|
1060
|
+
// Real CLI's prewarm flow: first frame of each turn has generate:false.
|
|
1061
|
+
frame.generate = false;
|
|
1062
|
+
}
|
|
1063
|
+
else {
|
|
1064
|
+
// Explicitly unset any leftover generate:false (buyer's body shouldn't
|
|
1065
|
+
// carry it, but defensive).
|
|
1066
|
+
delete frame.generate;
|
|
1067
|
+
}
|
|
1068
|
+
return frame;
|
|
1069
|
+
}
|
|
1070
|
+
// ── Passthrough entry point ───────────────────────────────────────────────
|
|
1071
|
+
//
|
|
1072
|
+
// Copy-pasted from doCallCodexApi (with frame-building swapped for
|
|
1073
|
+
// buildCodexPassthroughFrame). Duplicated rather than refactored so we
|
|
1074
|
+
// can iterate on passthrough-specific bugs without risking a regression
|
|
1075
|
+
// in the battle-tested template path. When passthrough stabilizes we
|
|
1076
|
+
// can merge the two via a frame-builder parameter.
|
|
1077
|
+
async function doCallCodexApiPassthrough(opts) {
|
|
1078
|
+
// Minimal body validation — we need at least `input` (array) and the
|
|
1079
|
+
// model. Everything else is optional per the Responses API spec.
|
|
1080
|
+
const input = opts.clientBody.input;
|
|
1081
|
+
if (!Array.isArray(input) || input.length === 0) {
|
|
1082
|
+
throw new Error("Passthrough body missing `input` array");
|
|
1083
|
+
}
|
|
1084
|
+
const fingerprint = loadCodexFingerprint();
|
|
1085
|
+
const sessionId = getMaskedSessionId();
|
|
1086
|
+
let transientAttempt = 0;
|
|
1087
|
+
let hasRefreshed = false;
|
|
1088
|
+
const windowGeneration = 0;
|
|
1089
|
+
while (true) {
|
|
1090
|
+
const creds = await getFreshCreds();
|
|
1091
|
+
const platformSandboxTag = process.platform === "darwin"
|
|
1092
|
+
? "seatbelt"
|
|
1093
|
+
: process.platform === "linux"
|
|
1094
|
+
? "seccomp"
|
|
1095
|
+
: process.platform === "win32"
|
|
1096
|
+
? "windows_sandbox"
|
|
1097
|
+
: "none";
|
|
1098
|
+
const turnMetadata = JSON.stringify({
|
|
1099
|
+
session_id: sessionId,
|
|
1100
|
+
turn_id: randomUUID(),
|
|
1101
|
+
sandbox: platformSandboxTag,
|
|
1102
|
+
});
|
|
1103
|
+
const warmupFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
1104
|
+
/*warmup*/ true);
|
|
1105
|
+
const realFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
1106
|
+
/*warmup*/ false);
|
|
1107
|
+
const warmupFrameJson = JSON.stringify(warmupFrame);
|
|
1108
|
+
const realFrameJson = JSON.stringify(realFrame);
|
|
1109
|
+
const windowId = `${sessionId}:${windowGeneration}`;
|
|
1110
|
+
const headers = {
|
|
1111
|
+
"authorization": `Bearer ${creds.accessToken}`,
|
|
1112
|
+
"originator": fingerprint.originator,
|
|
1113
|
+
"openai-beta": fingerprint.openai_beta,
|
|
1114
|
+
"session_id": sessionId,
|
|
1115
|
+
"x-client-request-id": sessionId,
|
|
1116
|
+
"x-codex-window-id": windowId,
|
|
1117
|
+
"x-codex-turn-metadata": turnMetadata,
|
|
1118
|
+
};
|
|
1119
|
+
if (fingerprint.user_agent) {
|
|
1120
|
+
headers["user-agent"] = fingerprint.user_agent;
|
|
1121
|
+
}
|
|
1122
|
+
let dialed;
|
|
1123
|
+
try {
|
|
1124
|
+
dialed = await dialCodexWebSocket(headers);
|
|
1125
|
+
}
|
|
1126
|
+
catch (err) {
|
|
1127
|
+
if (err instanceof WsDialError) {
|
|
1128
|
+
const status = err.statusCode;
|
|
1129
|
+
if (status === 429) {
|
|
1130
|
+
const cooldown = cooldownFromHttpHeaders(err.headers);
|
|
1131
|
+
if (cooldown && rateGuard) {
|
|
1132
|
+
rateGuard.triggerCooldown(cooldown.ms, cooldown.reason);
|
|
1133
|
+
}
|
|
1134
|
+
else if (rateGuard) {
|
|
1135
|
+
rateGuard.triggerCooldown(Date.now() + 5 * 60_000, "fallback 5m (no reset header)");
|
|
1136
|
+
}
|
|
1137
|
+
throw new Error(`Codex 429 rate-limited: ${err.bodySnippet.slice(0, 300)}`);
|
|
1138
|
+
}
|
|
1139
|
+
if (status === 401 && !hasRefreshed) {
|
|
1140
|
+
logger.warn("[codex-api] 401 from upgrade (passthrough), refreshing token + retry");
|
|
1141
|
+
hasRefreshed = true;
|
|
1142
|
+
cachedCreds = null;
|
|
1143
|
+
continue;
|
|
1144
|
+
}
|
|
1145
|
+
const isTransient = status >= 500 && status <= 599;
|
|
1146
|
+
if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1147
|
+
const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1148
|
+
logger.warn(`[codex-api] upgrade ${status} (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.bodySnippet.slice(0, 200)}`);
|
|
1149
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1150
|
+
transientAttempt++;
|
|
1151
|
+
continue;
|
|
1152
|
+
}
|
|
1153
|
+
throw new Error(`Codex upgrade ${status}: ${err.bodySnippet.slice(0, 400)}`);
|
|
1154
|
+
}
|
|
1155
|
+
if (transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1156
|
+
const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1157
|
+
logger.warn(`[codex-api] transport error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.message}`);
|
|
1158
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1159
|
+
transientAttempt++;
|
|
1160
|
+
continue;
|
|
1161
|
+
}
|
|
1162
|
+
throw err;
|
|
1163
|
+
}
|
|
1164
|
+
const { ws } = dialed;
|
|
1165
|
+
const acc = {
|
|
1166
|
+
text: "",
|
|
1167
|
+
inputTokens: 0,
|
|
1168
|
+
outputTokens: 0,
|
|
1169
|
+
cacheReadTokens: 0,
|
|
1170
|
+
model: opts.model,
|
|
1171
|
+
terminal: false,
|
|
1172
|
+
};
|
|
1173
|
+
let resolved = false;
|
|
1174
|
+
const result = await new Promise((resolve) => {
|
|
1175
|
+
let phase = "warmup";
|
|
1176
|
+
const finish = (r) => {
|
|
1177
|
+
if (resolved)
|
|
1178
|
+
return;
|
|
1179
|
+
resolved = true;
|
|
1180
|
+
clearTimeout(timer);
|
|
1181
|
+
try {
|
|
1182
|
+
ws.close(1000, "done");
|
|
1183
|
+
}
|
|
1184
|
+
catch {
|
|
1185
|
+
// ignore
|
|
1186
|
+
}
|
|
1187
|
+
resolve(r);
|
|
1188
|
+
};
|
|
1189
|
+
const timer = setTimeout(() => {
|
|
1190
|
+
finish({
|
|
1191
|
+
ok: false,
|
|
1192
|
+
retriable: false,
|
|
1193
|
+
error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
|
|
1194
|
+
});
|
|
1195
|
+
}, WS_OVERALL_TIMEOUT_MS);
|
|
1196
|
+
const warmupAcc = {
|
|
1197
|
+
text: "",
|
|
1198
|
+
inputTokens: 0,
|
|
1199
|
+
outputTokens: 0,
|
|
1200
|
+
cacheReadTokens: 0,
|
|
1201
|
+
model: opts.model,
|
|
1202
|
+
terminal: false,
|
|
1203
|
+
};
|
|
1204
|
+
const sendFrame = (frameJson) => {
|
|
1205
|
+
try {
|
|
1206
|
+
ws.send(frameJson, (sendErr) => {
|
|
1207
|
+
if (sendErr) {
|
|
1208
|
+
finish({ ok: false, retriable: true, error: sendErr });
|
|
1209
|
+
}
|
|
1210
|
+
});
|
|
1211
|
+
}
|
|
1212
|
+
catch (err) {
|
|
1213
|
+
finish({ ok: false, retriable: true, error: err });
|
|
1214
|
+
}
|
|
1215
|
+
};
|
|
1216
|
+
ws.on("message", (data, _isBinary) => {
|
|
1217
|
+
const text = Buffer.isBuffer(data)
|
|
1218
|
+
? data.toString("utf-8")
|
|
1219
|
+
: Array.isArray(data)
|
|
1220
|
+
? Buffer.concat(data).toString("utf-8")
|
|
1221
|
+
: Buffer.from(data).toString("utf-8");
|
|
1222
|
+
const target = phase === "warmup" ? warmupAcc : acc;
|
|
1223
|
+
// Forward raw frames to the Hub for real-time SSE streaming to
|
|
1224
|
+
// the end client. Same rules as template mode — only real phase,
|
|
1225
|
+
// wrap as `event: <type>\ndata: <json>\n\n`.
|
|
1226
|
+
if (phase === "real" && opts.onRawEvent) {
|
|
1227
|
+
try {
|
|
1228
|
+
const parsedFrame = JSON.parse(text);
|
|
1229
|
+
const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
|
|
1230
|
+
const patched = patchCodexFrameForForwarding(text);
|
|
1231
|
+
opts.onRawEvent(`event: ${frameType}\ndata: ${patched}\n\n`);
|
|
1232
|
+
}
|
|
1233
|
+
catch {
|
|
1234
|
+
opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
const outcome = handleFrame(text, target);
|
|
1238
|
+
if (outcome.rateLimit && rateGuard) {
|
|
1239
|
+
rateGuard.triggerCooldown(outcome.rateLimit.ms, outcome.rateLimit.reason);
|
|
1240
|
+
}
|
|
1241
|
+
if (outcome.terminal) {
|
|
1242
|
+
if (outcome.error) {
|
|
1243
|
+
finish({
|
|
1244
|
+
ok: false,
|
|
1245
|
+
retriable: false,
|
|
1246
|
+
error: new Error(`Codex upstream error: ${outcome.error}`),
|
|
1247
|
+
});
|
|
1248
|
+
return;
|
|
1249
|
+
}
|
|
1250
|
+
if (phase === "warmup") {
|
|
1251
|
+
phase = "real";
|
|
1252
|
+
sendFrame(realFrameJson);
|
|
1253
|
+
return;
|
|
1254
|
+
}
|
|
1255
|
+
acc.terminal = true;
|
|
1256
|
+
finish({ ok: true });
|
|
1257
|
+
}
|
|
1258
|
+
});
|
|
1259
|
+
ws.on("close", (code, reason) => {
|
|
1260
|
+
if (acc.terminal)
|
|
1261
|
+
return;
|
|
1262
|
+
finish({
|
|
1263
|
+
ok: false,
|
|
1264
|
+
retriable: true,
|
|
1265
|
+
error: new Error(`Codex WS closed early (code=${code}, reason=${reason.toString().slice(0, 200)})`),
|
|
1266
|
+
});
|
|
1267
|
+
});
|
|
1268
|
+
ws.on("error", (err) => {
|
|
1269
|
+
finish({ ok: false, retriable: true, error: err });
|
|
1270
|
+
});
|
|
1271
|
+
sendFrame(warmupFrameJson);
|
|
1272
|
+
});
|
|
1273
|
+
if (!result.ok) {
|
|
1274
|
+
if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1275
|
+
const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1276
|
+
logger.warn(`[codex-api] mid-session ws error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${result.error.message}`);
|
|
1277
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1278
|
+
transientAttempt++;
|
|
1279
|
+
continue;
|
|
1280
|
+
}
|
|
1281
|
+
throw result.error;
|
|
1282
|
+
}
|
|
1283
|
+
const parsed = {
|
|
1284
|
+
text: acc.text,
|
|
1285
|
+
sessionId,
|
|
1286
|
+
usage: {
|
|
1287
|
+
input_tokens: acc.inputTokens,
|
|
1288
|
+
output_tokens: acc.outputTokens,
|
|
1289
|
+
cache_creation_tokens: 0,
|
|
1290
|
+
cache_read_tokens: acc.cacheReadTokens,
|
|
1291
|
+
},
|
|
1292
|
+
model: acc.model,
|
|
1293
|
+
costUsd: 0,
|
|
1294
|
+
};
|
|
1295
|
+
if (rateGuard) {
|
|
1296
|
+
const cost = calculateCost(opts.model, parsed.usage.input_tokens, parsed.usage.output_tokens, parsed.usage.cache_creation_tokens, parsed.usage.cache_read_tokens);
|
|
1297
|
+
rateGuard.recordSpend(cost.apiCost);
|
|
1298
|
+
parsed.costUsd = cost.apiCost;
|
|
1299
|
+
}
|
|
1300
|
+
logger.info(`[codex-api] passthrough OK model=${acc.model} in=${acc.inputTokens} out=${acc.outputTokens} cache_read=${acc.cacheReadTokens}`);
|
|
1301
|
+
return parsed;
|
|
1302
|
+
}
|
|
1303
|
+
}
|