clawmoney 0.15.6 → 0.15.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/relay/provider.js
CHANGED
|
@@ -3,8 +3,8 @@ import { join } from "node:path";
|
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
4
|
import YAML from "yaml";
|
|
5
5
|
import { RelayWsClient } from "./ws-client.js";
|
|
6
|
-
import { callClaudeApi, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
|
|
7
|
-
import { callCodexApi, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
|
|
6
|
+
import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGuardSnapshot as getClaudeRateGuardSnapshot, } from "./upstream/claude-api.js";
|
|
7
|
+
import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
|
|
8
8
|
import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
|
|
9
9
|
import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
|
|
10
10
|
/**
|
|
@@ -192,11 +192,25 @@ async function executeRelayRequest(request, config, sendChunk) {
|
|
|
192
192
|
// antigravity → daily-cloudcode-pa). Each handler has its own
|
|
193
193
|
// fingerprint file and rate-guard instance.
|
|
194
194
|
if (cliType === "codex") {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
195
|
+
// Same two-mode pattern as claude: passthrough when the Hub forwards
|
|
196
|
+
// a real Responses API body (used by /v1/responses endpoint for
|
|
197
|
+
// Codex CLI drop-in replacement), template mode otherwise (used by
|
|
198
|
+
// the OpenAI-compat /v1/chat/completions classic endpoint).
|
|
199
|
+
if (request.passthrough_body) {
|
|
200
|
+
parsed = await callCodexApiPassthrough({
|
|
201
|
+
clientBody: request.passthrough_body,
|
|
202
|
+
model,
|
|
203
|
+
onRawEvent: sendChunk,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
parsed = await callCodexApi({
|
|
208
|
+
prompt,
|
|
209
|
+
model,
|
|
210
|
+
maxTokens: max_budget_usd ? undefined : 4096,
|
|
211
|
+
onRawEvent: sendChunk,
|
|
212
|
+
});
|
|
213
|
+
}
|
|
200
214
|
}
|
|
201
215
|
else if (cliType === "gemini") {
|
|
202
216
|
parsed = await callGeminiApi({
|
|
@@ -213,17 +227,38 @@ async function executeRelayRequest(request, config, sendChunk) {
|
|
|
213
227
|
});
|
|
214
228
|
}
|
|
215
229
|
else {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
230
|
+
// Claude: two modes.
|
|
231
|
+
//
|
|
232
|
+
// 1. PASSTHROUGH (preferred when Hub supplies request.passthrough_body):
|
|
233
|
+
// the Hub is acting as a transparent ANTHROPIC_BASE_URL proxy for a
|
|
234
|
+
// real Claude Code (or anthropic-SDK) client. Forward the buyer's
|
|
235
|
+
// actual body — tools, multi-turn messages, thinking, system, etc.
|
|
236
|
+
// all preserved — with surgical fingerprint rewrites so Anthropic
|
|
237
|
+
// sees a stable per-OAuth-account identity instead of a rotating
|
|
238
|
+
// buyer-identity signal.
|
|
239
|
+
//
|
|
240
|
+
// 2. TEMPLATE (fallback when no passthrough_body): the legacy
|
|
241
|
+
// chat-relay path. Daemon constructs a synthetic single-user-message
|
|
242
|
+
// request body that matches the real CC wire fingerprint exactly,
|
|
243
|
+
// dropping everything the buyer sent except the concatenated prompt
|
|
244
|
+
// text. Used for OpenAI-compatible /v1/chat/completions and any
|
|
245
|
+
// other client that doesn't need real agentic support.
|
|
246
|
+
if (request.passthrough_body) {
|
|
247
|
+
parsed = await callClaudeApiPassthrough({
|
|
248
|
+
clientBody: request.passthrough_body,
|
|
249
|
+
model,
|
|
250
|
+
clientBeta: request.anthropic_beta,
|
|
251
|
+
onRawEvent: sendChunk,
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
else {
|
|
255
|
+
parsed = await callClaudeApi({
|
|
256
|
+
prompt,
|
|
257
|
+
model,
|
|
258
|
+
maxTokens: max_budget_usd ? undefined : 4096,
|
|
259
|
+
onRawEvent: sendChunk,
|
|
260
|
+
});
|
|
261
|
+
}
|
|
227
262
|
}
|
|
228
263
|
const elapsedMs = Date.now() - startMs;
|
|
229
264
|
const answer = parsed.text.replace(/\n/g, " ").slice(0, 80);
|
package/dist/relay/types.d.ts
CHANGED
|
@@ -30,3 +30,10 @@ export interface CallClaudeApiOptions {
|
|
|
30
30
|
onRawEvent?: (rawFrame: string) => void;
|
|
31
31
|
}
|
|
32
32
|
export declare function callClaudeApi(opts: CallClaudeApiOptions): Promise<ParsedOutput>;
|
|
33
|
+
export interface CallClaudeApiPassthroughOptions {
|
|
34
|
+
clientBody: Record<string, unknown>;
|
|
35
|
+
model: string;
|
|
36
|
+
clientBeta?: string;
|
|
37
|
+
onRawEvent?: (rawFrame: string) => void;
|
|
38
|
+
}
|
|
39
|
+
export declare function callClaudeApiPassthrough(opts: CallClaudeApiPassthroughOptions): Promise<ParsedOutput>;
|
|
@@ -696,6 +696,12 @@ export async function callClaudeApi(opts) {
|
|
|
696
696
|
configureRateGuard();
|
|
697
697
|
return rateGuard.run(() => doCallClaudeApi(opts));
|
|
698
698
|
}
|
|
699
|
+
export async function callClaudeApiPassthrough(opts) {
|
|
700
|
+
configureDispatcher();
|
|
701
|
+
if (!rateGuard)
|
|
702
|
+
configureRateGuard();
|
|
703
|
+
return rateGuard.run(() => doCallClaudeApiPassthrough(opts));
|
|
704
|
+
}
|
|
699
705
|
// Maximum number of automatic retries on transient upstream errors
|
|
700
706
|
// (429 / 5xx). Matches the Anthropic official SDK default. Does NOT count
|
|
701
707
|
// the initial attempt or the one-shot 401-refresh retry.
|
|
@@ -874,6 +880,230 @@ async function doCallClaudeApi(opts) {
|
|
|
874
880
|
throw new Error(`Anthropic ${resp.status}: ${errText.slice(0, 400)}`);
|
|
875
881
|
}
|
|
876
882
|
}
|
|
883
|
+
// ── Passthrough helpers ──────────────────────────────────────────────────
|
|
884
|
+
// Extract the first user message's text content, regardless of whether
|
|
885
|
+
// content is a plain string (OpenAI-style) or an array of content blocks
|
|
886
|
+
// (real Anthropic shape). Used for computing the attribution header FP3.
|
|
887
|
+
function extractFirstUserMessageText(messages) {
|
|
888
|
+
if (!Array.isArray(messages))
|
|
889
|
+
return "";
|
|
890
|
+
for (const msg of messages) {
|
|
891
|
+
if (!msg || typeof msg !== "object")
|
|
892
|
+
continue;
|
|
893
|
+
const m = msg;
|
|
894
|
+
if (m.role !== "user")
|
|
895
|
+
continue;
|
|
896
|
+
const content = m.content;
|
|
897
|
+
if (typeof content === "string")
|
|
898
|
+
return content;
|
|
899
|
+
if (Array.isArray(content)) {
|
|
900
|
+
for (const block of content) {
|
|
901
|
+
if (block &&
|
|
902
|
+
typeof block === "object" &&
|
|
903
|
+
block.type === "text" &&
|
|
904
|
+
typeof block.text === "string") {
|
|
905
|
+
return block.text;
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
return "";
|
|
910
|
+
}
|
|
911
|
+
return "";
|
|
912
|
+
}
|
|
913
|
+
// Merge the fingerprint-required betas with the buyer's anthropic-beta
|
|
914
|
+
// list. Required betas (oauth, claude-code, interleaved-thinking) are
|
|
915
|
+
// non-negotiable — they must be present for an OAuth token to work. The
|
|
916
|
+
// buyer's extras (context-management, advisor-tool, etc.) are appended so
|
|
917
|
+
// newer Claude Code versions can request features our fingerprint file
|
|
918
|
+
// doesn't know about yet. Deduplicates and preserves order.
|
|
919
|
+
function mergeBetas(required, clientBeta) {
|
|
920
|
+
const seen = new Set();
|
|
921
|
+
const out = [];
|
|
922
|
+
for (const b of required) {
|
|
923
|
+
const t = b.trim();
|
|
924
|
+
if (t && !seen.has(t)) {
|
|
925
|
+
seen.add(t);
|
|
926
|
+
out.push(t);
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
if (clientBeta) {
|
|
930
|
+
for (const b of clientBeta.split(",")) {
|
|
931
|
+
const t = b.trim();
|
|
932
|
+
if (t && !seen.has(t)) {
|
|
933
|
+
seen.add(t);
|
|
934
|
+
out.push(t);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
return out.join(",");
|
|
939
|
+
}
|
|
940
|
+
// Rewrite the first system block's x-anthropic-billing-header (if present)
|
|
941
|
+
// so cc_version and FP3 match OUR fingerprint and the buyer's actual
|
|
942
|
+
// first user message. Real Claude Code always emits this block; sub2api's
|
|
943
|
+
// gateway_service.go mirrors it verbatim but rewrites the version to
|
|
944
|
+
// match the account's pinned UA (syncBillingHeaderVersion, gateway_billing_header.go).
|
|
945
|
+
//
|
|
946
|
+
// Critical because Anthropic's validator expects cc_version.<FP3> where
|
|
947
|
+
// FP3 is a deterministic hash of (message_chars + cli_version). If we
|
|
948
|
+
// leave the buyer's FP3 in place but their UA was a different version
|
|
949
|
+
// from our pinned UA, the FP3 no longer matches cli_version in the header
|
|
950
|
+
// and the validator rejects the request.
|
|
951
|
+
function syncPassthroughBillingHeader(body, fingerprint) {
|
|
952
|
+
if (!Array.isArray(body.system))
|
|
953
|
+
return;
|
|
954
|
+
const system = body.system;
|
|
955
|
+
if (system.length === 0)
|
|
956
|
+
return;
|
|
957
|
+
const firstBlock = system[0];
|
|
958
|
+
if (!firstBlock ||
|
|
959
|
+
typeof firstBlock !== "object" ||
|
|
960
|
+
firstBlock.type !== "text" ||
|
|
961
|
+
typeof firstBlock.text !== "string") {
|
|
962
|
+
return;
|
|
963
|
+
}
|
|
964
|
+
const currentText = firstBlock.text;
|
|
965
|
+
if (!currentText.startsWith("x-anthropic-billing-header:")) {
|
|
966
|
+
// Non-CC client didn't include a billing header — leave system alone.
|
|
967
|
+
// If we're strict about this we could PREPEND one, but for now we
|
|
968
|
+
// only touch what exists so non-CC passthrough (e.g. anthropic SDK
|
|
969
|
+
// direct) works without extra surgery.
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
972
|
+
const firstUserMsg = extractFirstUserMessageText(body.messages);
|
|
973
|
+
const newHeader = buildClaudeAttributionHeader(firstUserMsg, fingerprint.cc_version, fingerprint.cc_entrypoint);
|
|
974
|
+
firstBlock.text = newHeader;
|
|
975
|
+
}
|
|
976
|
+
// Walk system text blocks and rewrite third-party identity sentences
|
|
977
|
+
// (OpenCode, etc.) to the Claude Code banner. sub2api does the same thing
|
|
978
|
+
// in gateway_service.go:sanitizeSystemText — without it Anthropic's
|
|
979
|
+
// system-prompt dice-coefficient validator will 403 the request because
|
|
980
|
+
// the system prompt doesn't score high enough against the known real
|
|
981
|
+
// Claude Code templates.
|
|
982
|
+
function sanitizePassthroughSystemArray(body) {
|
|
983
|
+
if (!Array.isArray(body.system))
|
|
984
|
+
return;
|
|
985
|
+
for (const block of body.system) {
|
|
986
|
+
if (block &&
|
|
987
|
+
typeof block === "object" &&
|
|
988
|
+
block.type === "text" &&
|
|
989
|
+
typeof block.text === "string") {
|
|
990
|
+
block.text = sanitizePrompt(block.text);
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
async function doCallClaudeApiPassthrough(opts) {
|
|
995
|
+
const fingerprint = loadFingerprint();
|
|
996
|
+
autoBumpFingerprintUaVersion();
|
|
997
|
+
// Fresh read after any autobump, since it mutates cachedFingerprint in place.
|
|
998
|
+
const fp = loadFingerprint();
|
|
999
|
+
const sessionId = getMaskedSessionId();
|
|
1000
|
+
// Shallow clone so we don't mutate the buyer's dict on the way back out
|
|
1001
|
+
// of provider.ts — defensive against the Hub reusing the same dict for
|
|
1002
|
+
// multiple dispatch attempts.
|
|
1003
|
+
const body = { ...opts.clientBody };
|
|
1004
|
+
// Normalize model to canonical long-form. Anthropic OAuth rejects the
|
|
1005
|
+
// short form for some versions (e.g. claude-sonnet-4-5 → must be
|
|
1006
|
+
// claude-sonnet-4-5-20250929).
|
|
1007
|
+
body.model = normalizeModel(opts.model);
|
|
1008
|
+
// Force stream:true. Daemon always needs SSE wire format to drive
|
|
1009
|
+
// parseClaudeSseResponse, regardless of what the buyer asked for.
|
|
1010
|
+
body.stream = true;
|
|
1011
|
+
// sub2api drops these on OAuth (gateway_service.go:1082-1092). Keeping
|
|
1012
|
+
// them in the body risks Anthropic flagging the request shape as
|
|
1013
|
+
// non-Claude-Code, since real CC never sends them.
|
|
1014
|
+
delete body.temperature;
|
|
1015
|
+
delete body.tool_choice;
|
|
1016
|
+
// Rewrite metadata.user_id with our masked-session-bound fingerprint
|
|
1017
|
+
// identity. All other metadata fields are preserved.
|
|
1018
|
+
const metadata = body.metadata && typeof body.metadata === "object"
|
|
1019
|
+
? body.metadata
|
|
1020
|
+
: {};
|
|
1021
|
+
body.metadata = {
|
|
1022
|
+
...metadata,
|
|
1023
|
+
user_id: buildMetadataUserID(fp, sessionId),
|
|
1024
|
+
};
|
|
1025
|
+
// Sanitize system: replace third-party identity sentences + sync
|
|
1026
|
+
// billing header cc_version to match our pinned CLI version.
|
|
1027
|
+
sanitizePassthroughSystemArray(body);
|
|
1028
|
+
syncPassthroughBillingHeader(body, fp);
|
|
1029
|
+
// Clamp thinking.budget_tokens to Anthropic's minimum so buyer-chosen
|
|
1030
|
+
// small budgets don't 400. If max_tokens < budget_tokens + 1, bump
|
|
1031
|
+
// max_tokens too so the request stays valid.
|
|
1032
|
+
const thinking = body.thinking;
|
|
1033
|
+
if (thinking && typeof thinking === "object") {
|
|
1034
|
+
const t = thinking;
|
|
1035
|
+
if (t.type === "enabled" && typeof t.budget_tokens === "number") {
|
|
1036
|
+
if (t.budget_tokens < CLAUDE_MIN_THINKING_BUDGET) {
|
|
1037
|
+
t.budget_tokens = CLAUDE_MIN_THINKING_BUDGET;
|
|
1038
|
+
}
|
|
1039
|
+
if (typeof body.max_tokens === "number" &&
|
|
1040
|
+
body.max_tokens <= t.budget_tokens) {
|
|
1041
|
+
body.max_tokens = t.budget_tokens + 1;
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
// Ensure tools is at least an empty array so request shape matches real
|
|
1046
|
+
// CC (which always sends tools even if empty).
|
|
1047
|
+
if (!Array.isArray(body.tools)) {
|
|
1048
|
+
body.tools = [];
|
|
1049
|
+
}
|
|
1050
|
+
const bodyJson = JSON.stringify(body);
|
|
1051
|
+
// Merge required betas with buyer's betas for the header.
|
|
1052
|
+
const requiredBetas = pickClaudeBetasForModel(opts.model);
|
|
1053
|
+
const mergedBetas = mergeBetas(requiredBetas, opts.clientBeta);
|
|
1054
|
+
let transientAttempt = 0;
|
|
1055
|
+
let hasRefreshed = false;
|
|
1056
|
+
while (true) {
|
|
1057
|
+
const creds = await getFreshCreds();
|
|
1058
|
+
const resp = await fetch(ANTHROPIC_MESSAGES_URL, {
|
|
1059
|
+
method: "POST",
|
|
1060
|
+
headers: {
|
|
1061
|
+
...STATIC_CLAUDE_CODE_HEADERS,
|
|
1062
|
+
"accept": "application/json, text/event-stream",
|
|
1063
|
+
"anthropic-beta": mergedBetas,
|
|
1064
|
+
"user-agent": fp.user_agent,
|
|
1065
|
+
"authorization": `Bearer ${creds.accessToken}`,
|
|
1066
|
+
"x-claude-code-session-id": sessionId,
|
|
1067
|
+
},
|
|
1068
|
+
body: bodyJson,
|
|
1069
|
+
});
|
|
1070
|
+
const sessionWin = extractSessionWindowFromHeaders(resp.headers);
|
|
1071
|
+
if (sessionWin)
|
|
1072
|
+
rateGuard?.setSessionWindow(sessionWin);
|
|
1073
|
+
if (resp.ok) {
|
|
1074
|
+
const parsed = await parseClaudeSseResponse(resp, opts.model, opts.onRawEvent);
|
|
1075
|
+
recordSpendFromUsage(parsed, opts.model);
|
|
1076
|
+
return parsed;
|
|
1077
|
+
}
|
|
1078
|
+
const errText = await resp.text();
|
|
1079
|
+
if (resp.status === 429) {
|
|
1080
|
+
const cooldown = extractCooldownUntilFromHeaders(resp.headers);
|
|
1081
|
+
if (cooldown && rateGuard) {
|
|
1082
|
+
rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
|
|
1083
|
+
}
|
|
1084
|
+
else if (rateGuard) {
|
|
1085
|
+
rateGuard.triggerCooldown(Date.now() + 5 * 60_000, "fallback 5m (no reset header)");
|
|
1086
|
+
}
|
|
1087
|
+
throw new Error(`Anthropic 429 rate-limited: ${errText.slice(0, 300)}`);
|
|
1088
|
+
}
|
|
1089
|
+
if (resp.status === 401 && !hasRefreshed) {
|
|
1090
|
+
logger.warn("[claude-api] 401 from upstream (passthrough), refreshing token + retry");
|
|
1091
|
+
hasRefreshed = true;
|
|
1092
|
+
cachedCreds = null;
|
|
1093
|
+
continue;
|
|
1094
|
+
}
|
|
1095
|
+
const isTransient = resp.status >= 500 && resp.status <= 599;
|
|
1096
|
+
if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1097
|
+
const retryAfter = parseRetryAfterMs(resp.headers.get("retry-after"));
|
|
1098
|
+
const backoffMs = retryAfter ?? 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1099
|
+
logger.warn(`[claude-api] ${resp.status} from upstream (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${errText.slice(0, 200)}`);
|
|
1100
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1101
|
+
transientAttempt++;
|
|
1102
|
+
continue;
|
|
1103
|
+
}
|
|
1104
|
+
throw new Error(`Anthropic ${resp.status}: ${errText.slice(0, 400)}`);
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
877
1107
|
function recordSpendFromUsage(parsed, model) {
|
|
878
1108
|
if (!rateGuard)
|
|
879
1109
|
return;
|
|
@@ -38,5 +38,12 @@ export interface CallCodexApiOptions {
|
|
|
38
38
|
prompt: string;
|
|
39
39
|
model: string;
|
|
40
40
|
maxTokens?: number;
|
|
41
|
+
onRawEvent?: (sse: string) => void;
|
|
41
42
|
}
|
|
42
43
|
export declare function callCodexApi(opts: CallCodexApiOptions): Promise<ParsedOutput>;
|
|
44
|
+
export interface CallCodexApiPassthroughOptions {
|
|
45
|
+
clientBody: Record<string, unknown>;
|
|
46
|
+
model: string;
|
|
47
|
+
onRawEvent?: (sse: string) => void;
|
|
48
|
+
}
|
|
49
|
+
export declare function callCodexApiPassthrough(opts: CallCodexApiPassthroughOptions): Promise<ParsedOutput>;
|
|
@@ -645,6 +645,12 @@ export async function callCodexApi(opts) {
|
|
|
645
645
|
configureRateGuard();
|
|
646
646
|
return rateGuard.run(() => doCallCodexApi(opts));
|
|
647
647
|
}
|
|
648
|
+
export async function callCodexApiPassthrough(opts) {
|
|
649
|
+
configureDispatcher();
|
|
650
|
+
if (!rateGuard)
|
|
651
|
+
configureRateGuard();
|
|
652
|
+
return rateGuard.run(() => doCallCodexApiPassthrough(opts));
|
|
653
|
+
}
|
|
648
654
|
async function doCallCodexApi(opts) {
|
|
649
655
|
const prompt = (opts.prompt ?? "").trim();
|
|
650
656
|
if (!prompt) {
|
|
@@ -861,6 +867,23 @@ async function doCallCodexApi(opts) {
|
|
|
861
867
|
: Buffer.from(data).toString("utf-8");
|
|
862
868
|
// Frames are individual JSON objects (no newline framing).
|
|
863
869
|
const target = phase === "warmup" ? warmupAcc : acc;
|
|
870
|
+
// Forward raw frames to the caller (when streaming is requested)
|
|
871
|
+
// only for the real phase — warmup frames are daemon-internal and
|
|
872
|
+
// never reach the end client. Each frame is re-emitted as an
|
|
873
|
+
// Anthropic-style SSE block where `event:` is the frame type
|
|
874
|
+
// (response.output_text.delta, response.completed, etc.), which
|
|
875
|
+
// matches OpenAI's public Responses API SSE wire format exactly.
|
|
876
|
+
if (phase === "real" && opts.onRawEvent) {
|
|
877
|
+
try {
|
|
878
|
+
const parsedFrame = JSON.parse(text);
|
|
879
|
+
const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
|
|
880
|
+
opts.onRawEvent(`event: ${frameType}\ndata: ${text}\n\n`);
|
|
881
|
+
}
|
|
882
|
+
catch {
|
|
883
|
+
// Non-JSON frame — forward as a plain data event.
|
|
884
|
+
opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
864
887
|
const outcome = handleFrame(text, target);
|
|
865
888
|
if (outcome.rateLimit && rateGuard) {
|
|
866
889
|
// Soft hint — record but don't kill this request. Next request will
|
|
@@ -939,3 +962,297 @@ async function doCallCodexApi(opts) {
|
|
|
939
962
|
return parsed;
|
|
940
963
|
}
|
|
941
964
|
}
|
|
965
|
+
// ── Passthrough frame builder ─────────────────────────────────────────────
|
|
966
|
+
// Build a ChatGPT backend-api/codex/responses WS frame from the buyer's
|
|
967
|
+
// raw Responses API body. Daemon-controlled fields (type, client_metadata,
|
|
968
|
+
// store, stream, include, generate, model) are always overwritten; every
|
|
969
|
+
// other field — input, instructions, tools, tool_choice, reasoning,
|
|
970
|
+
// parallel_tool_calls, etc. — is preserved verbatim so the end client's
|
|
971
|
+
// agentic loop works end-to-end.
|
|
972
|
+
function buildCodexPassthroughFrame(clientBody, model, fingerprint, sessionId, turnMetadataHeader, windowGeneration, warmup) {
|
|
973
|
+
// Shallow clone so we don't mutate the buyer's dict across retries.
|
|
974
|
+
const frame = { ...clientBody };
|
|
975
|
+
// Daemon-controlled envelope fields — always forced.
|
|
976
|
+
frame.type = "response.create";
|
|
977
|
+
frame.model = model;
|
|
978
|
+
frame.store = false;
|
|
979
|
+
frame.stream = true;
|
|
980
|
+
// Real CLI sends include: ["reasoning.encrypted_content"] when reasoning
|
|
981
|
+
// is enabled. We set reasoning below (from client or default), so include
|
|
982
|
+
// it for fingerprint parity.
|
|
983
|
+
frame.include = ["reasoning.encrypted_content"];
|
|
984
|
+
// Daemon fingerprint injection — client_metadata is how the upstream
|
|
985
|
+
// ties traffic to a device+window identity. Buyers never see this
|
|
986
|
+
// field; always set it from our fingerprint.
|
|
987
|
+
frame.client_metadata = {
|
|
988
|
+
"x-codex-installation-id": fingerprint.installation_id,
|
|
989
|
+
"x-codex-window-id": `${sessionId}:${windowGeneration}`,
|
|
990
|
+
"x-codex-turn-metadata": turnMetadataHeader,
|
|
991
|
+
};
|
|
992
|
+
// Reasoning: if buyer sent their own reasoning config, preserve it;
|
|
993
|
+
// otherwise inject the real-CLI default `{effort: "medium", summary: "auto"}`
|
|
994
|
+
// so the request shape matches typical CLI traffic.
|
|
995
|
+
if (!frame.reasoning || typeof frame.reasoning !== "object") {
|
|
996
|
+
frame.reasoning = { effort: "medium", summary: "auto" };
|
|
997
|
+
}
|
|
998
|
+
// Ensure tools is an array (real CLI always sends tools, even if empty).
|
|
999
|
+
if (!Array.isArray(frame.tools)) {
|
|
1000
|
+
frame.tools = [];
|
|
1001
|
+
}
|
|
1002
|
+
// Default tool_choice if not set.
|
|
1003
|
+
if (frame.tool_choice === undefined || frame.tool_choice === null) {
|
|
1004
|
+
frame.tool_choice = "auto";
|
|
1005
|
+
}
|
|
1006
|
+
// Default parallel_tool_calls to false (matches current template).
|
|
1007
|
+
if (frame.parallel_tool_calls === undefined) {
|
|
1008
|
+
frame.parallel_tool_calls = false;
|
|
1009
|
+
}
|
|
1010
|
+
// Instructions: if buyer didn't send one, fall back to the template
|
|
1011
|
+
// mode's RELAY_INSTRUCTIONS so the model still has guidance.
|
|
1012
|
+
if (typeof frame.instructions !== "string" || !frame.instructions) {
|
|
1013
|
+
frame.instructions = RELAY_INSTRUCTIONS;
|
|
1014
|
+
}
|
|
1015
|
+
if (warmup) {
|
|
1016
|
+
// Real CLI's prewarm flow: first frame of each turn has generate:false.
|
|
1017
|
+
frame.generate = false;
|
|
1018
|
+
}
|
|
1019
|
+
else {
|
|
1020
|
+
// Explicitly unset any leftover generate:false (buyer's body shouldn't
|
|
1021
|
+
// carry it, but defensive).
|
|
1022
|
+
delete frame.generate;
|
|
1023
|
+
}
|
|
1024
|
+
return frame;
|
|
1025
|
+
}
|
|
1026
|
+
// ── Passthrough entry point ───────────────────────────────────────────────
|
|
1027
|
+
//
|
|
1028
|
+
// Copy-pasted from doCallCodexApi (with frame-building swapped for
|
|
1029
|
+
// buildCodexPassthroughFrame). Duplicated rather than refactored so we
|
|
1030
|
+
// can iterate on passthrough-specific bugs without risking a regression
|
|
1031
|
+
// in the battle-tested template path. When passthrough stabilizes we
|
|
1032
|
+
// can merge the two via a frame-builder parameter.
|
|
1033
|
+
async function doCallCodexApiPassthrough(opts) {
|
|
1034
|
+
// Minimal body validation — we need at least `input` (array) and the
|
|
1035
|
+
// model. Everything else is optional per the Responses API spec.
|
|
1036
|
+
const input = opts.clientBody.input;
|
|
1037
|
+
if (!Array.isArray(input) || input.length === 0) {
|
|
1038
|
+
throw new Error("Passthrough body missing `input` array");
|
|
1039
|
+
}
|
|
1040
|
+
const fingerprint = loadCodexFingerprint();
|
|
1041
|
+
const sessionId = getMaskedSessionId();
|
|
1042
|
+
let transientAttempt = 0;
|
|
1043
|
+
let hasRefreshed = false;
|
|
1044
|
+
const windowGeneration = 0;
|
|
1045
|
+
while (true) {
|
|
1046
|
+
const creds = await getFreshCreds();
|
|
1047
|
+
const platformSandboxTag = process.platform === "darwin"
|
|
1048
|
+
? "seatbelt"
|
|
1049
|
+
: process.platform === "linux"
|
|
1050
|
+
? "seccomp"
|
|
1051
|
+
: process.platform === "win32"
|
|
1052
|
+
? "windows_sandbox"
|
|
1053
|
+
: "none";
|
|
1054
|
+
const turnMetadata = JSON.stringify({
|
|
1055
|
+
session_id: sessionId,
|
|
1056
|
+
turn_id: randomUUID(),
|
|
1057
|
+
sandbox: platformSandboxTag,
|
|
1058
|
+
});
|
|
1059
|
+
const warmupFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
1060
|
+
/*warmup*/ true);
|
|
1061
|
+
const realFrame = buildCodexPassthroughFrame(opts.clientBody, opts.model, fingerprint, sessionId, turnMetadata, windowGeneration,
|
|
1062
|
+
/*warmup*/ false);
|
|
1063
|
+
const warmupFrameJson = JSON.stringify(warmupFrame);
|
|
1064
|
+
const realFrameJson = JSON.stringify(realFrame);
|
|
1065
|
+
const windowId = `${sessionId}:${windowGeneration}`;
|
|
1066
|
+
const headers = {
|
|
1067
|
+
"authorization": `Bearer ${creds.accessToken}`,
|
|
1068
|
+
"originator": fingerprint.originator,
|
|
1069
|
+
"openai-beta": fingerprint.openai_beta,
|
|
1070
|
+
"session_id": sessionId,
|
|
1071
|
+
"x-client-request-id": sessionId,
|
|
1072
|
+
"x-codex-window-id": windowId,
|
|
1073
|
+
"x-codex-turn-metadata": turnMetadata,
|
|
1074
|
+
};
|
|
1075
|
+
if (fingerprint.user_agent) {
|
|
1076
|
+
headers["user-agent"] = fingerprint.user_agent;
|
|
1077
|
+
}
|
|
1078
|
+
let dialed;
|
|
1079
|
+
try {
|
|
1080
|
+
dialed = await dialCodexWebSocket(headers);
|
|
1081
|
+
}
|
|
1082
|
+
catch (err) {
|
|
1083
|
+
if (err instanceof WsDialError) {
|
|
1084
|
+
const status = err.statusCode;
|
|
1085
|
+
if (status === 429) {
|
|
1086
|
+
const cooldown = cooldownFromHttpHeaders(err.headers);
|
|
1087
|
+
if (cooldown && rateGuard) {
|
|
1088
|
+
rateGuard.triggerCooldown(cooldown.ms, cooldown.reason);
|
|
1089
|
+
}
|
|
1090
|
+
else if (rateGuard) {
|
|
1091
|
+
rateGuard.triggerCooldown(Date.now() + 5 * 60_000, "fallback 5m (no reset header)");
|
|
1092
|
+
}
|
|
1093
|
+
throw new Error(`Codex 429 rate-limited: ${err.bodySnippet.slice(0, 300)}`);
|
|
1094
|
+
}
|
|
1095
|
+
if (status === 401 && !hasRefreshed) {
|
|
1096
|
+
logger.warn("[codex-api] 401 from upgrade (passthrough), refreshing token + retry");
|
|
1097
|
+
hasRefreshed = true;
|
|
1098
|
+
cachedCreds = null;
|
|
1099
|
+
continue;
|
|
1100
|
+
}
|
|
1101
|
+
const isTransient = status >= 500 && status <= 599;
|
|
1102
|
+
if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1103
|
+
const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1104
|
+
logger.warn(`[codex-api] upgrade ${status} (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.bodySnippet.slice(0, 200)}`);
|
|
1105
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1106
|
+
transientAttempt++;
|
|
1107
|
+
continue;
|
|
1108
|
+
}
|
|
1109
|
+
throw new Error(`Codex upgrade ${status}: ${err.bodySnippet.slice(0, 400)}`);
|
|
1110
|
+
}
|
|
1111
|
+
if (transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1112
|
+
const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1113
|
+
logger.warn(`[codex-api] transport error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${err.message}`);
|
|
1114
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1115
|
+
transientAttempt++;
|
|
1116
|
+
continue;
|
|
1117
|
+
}
|
|
1118
|
+
throw err;
|
|
1119
|
+
}
|
|
1120
|
+
const { ws } = dialed;
|
|
1121
|
+
const acc = {
|
|
1122
|
+
text: "",
|
|
1123
|
+
inputTokens: 0,
|
|
1124
|
+
outputTokens: 0,
|
|
1125
|
+
cacheReadTokens: 0,
|
|
1126
|
+
model: opts.model,
|
|
1127
|
+
terminal: false,
|
|
1128
|
+
};
|
|
1129
|
+
let resolved = false;
|
|
1130
|
+
const result = await new Promise((resolve) => {
|
|
1131
|
+
let phase = "warmup";
|
|
1132
|
+
const finish = (r) => {
|
|
1133
|
+
if (resolved)
|
|
1134
|
+
return;
|
|
1135
|
+
resolved = true;
|
|
1136
|
+
clearTimeout(timer);
|
|
1137
|
+
try {
|
|
1138
|
+
ws.close(1000, "done");
|
|
1139
|
+
}
|
|
1140
|
+
catch {
|
|
1141
|
+
// ignore
|
|
1142
|
+
}
|
|
1143
|
+
resolve(r);
|
|
1144
|
+
};
|
|
1145
|
+
const timer = setTimeout(() => {
|
|
1146
|
+
finish({
|
|
1147
|
+
ok: false,
|
|
1148
|
+
retriable: false,
|
|
1149
|
+
error: new Error(`Codex WS timed out after ${WS_OVERALL_TIMEOUT_MS}ms waiting for response.completed`),
|
|
1150
|
+
});
|
|
1151
|
+
}, WS_OVERALL_TIMEOUT_MS);
|
|
1152
|
+
const warmupAcc = {
|
|
1153
|
+
text: "",
|
|
1154
|
+
inputTokens: 0,
|
|
1155
|
+
outputTokens: 0,
|
|
1156
|
+
cacheReadTokens: 0,
|
|
1157
|
+
model: opts.model,
|
|
1158
|
+
terminal: false,
|
|
1159
|
+
};
|
|
1160
|
+
const sendFrame = (frameJson) => {
|
|
1161
|
+
try {
|
|
1162
|
+
ws.send(frameJson, (sendErr) => {
|
|
1163
|
+
if (sendErr) {
|
|
1164
|
+
finish({ ok: false, retriable: true, error: sendErr });
|
|
1165
|
+
}
|
|
1166
|
+
});
|
|
1167
|
+
}
|
|
1168
|
+
catch (err) {
|
|
1169
|
+
finish({ ok: false, retriable: true, error: err });
|
|
1170
|
+
}
|
|
1171
|
+
};
|
|
1172
|
+
ws.on("message", (data, _isBinary) => {
|
|
1173
|
+
const text = Buffer.isBuffer(data)
|
|
1174
|
+
? data.toString("utf-8")
|
|
1175
|
+
: Array.isArray(data)
|
|
1176
|
+
? Buffer.concat(data).toString("utf-8")
|
|
1177
|
+
: Buffer.from(data).toString("utf-8");
|
|
1178
|
+
const target = phase === "warmup" ? warmupAcc : acc;
|
|
1179
|
+
// Forward raw frames to the Hub for real-time SSE streaming to
|
|
1180
|
+
// the end client. Same rules as template mode — only real phase,
|
|
1181
|
+
// wrap as `event: <type>\ndata: <json>\n\n`.
|
|
1182
|
+
if (phase === "real" && opts.onRawEvent) {
|
|
1183
|
+
try {
|
|
1184
|
+
const parsedFrame = JSON.parse(text);
|
|
1185
|
+
const frameType = typeof parsedFrame.type === "string" ? parsedFrame.type : "message";
|
|
1186
|
+
opts.onRawEvent(`event: ${frameType}\ndata: ${text}\n\n`);
|
|
1187
|
+
}
|
|
1188
|
+
catch {
|
|
1189
|
+
opts.onRawEvent(`event: message\ndata: ${text}\n\n`);
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
const outcome = handleFrame(text, target);
|
|
1193
|
+
if (outcome.rateLimit && rateGuard) {
|
|
1194
|
+
rateGuard.triggerCooldown(outcome.rateLimit.ms, outcome.rateLimit.reason);
|
|
1195
|
+
}
|
|
1196
|
+
if (outcome.terminal) {
|
|
1197
|
+
if (outcome.error) {
|
|
1198
|
+
finish({
|
|
1199
|
+
ok: false,
|
|
1200
|
+
retriable: false,
|
|
1201
|
+
error: new Error(`Codex upstream error: ${outcome.error}`),
|
|
1202
|
+
});
|
|
1203
|
+
return;
|
|
1204
|
+
}
|
|
1205
|
+
if (phase === "warmup") {
|
|
1206
|
+
phase = "real";
|
|
1207
|
+
sendFrame(realFrameJson);
|
|
1208
|
+
return;
|
|
1209
|
+
}
|
|
1210
|
+
acc.terminal = true;
|
|
1211
|
+
finish({ ok: true });
|
|
1212
|
+
}
|
|
1213
|
+
});
|
|
1214
|
+
ws.on("close", (code, reason) => {
|
|
1215
|
+
if (acc.terminal)
|
|
1216
|
+
return;
|
|
1217
|
+
finish({
|
|
1218
|
+
ok: false,
|
|
1219
|
+
retriable: true,
|
|
1220
|
+
error: new Error(`Codex WS closed early (code=${code}, reason=${reason.toString().slice(0, 200)})`),
|
|
1221
|
+
});
|
|
1222
|
+
});
|
|
1223
|
+
ws.on("error", (err) => {
|
|
1224
|
+
finish({ ok: false, retriable: true, error: err });
|
|
1225
|
+
});
|
|
1226
|
+
sendFrame(warmupFrameJson);
|
|
1227
|
+
});
|
|
1228
|
+
if (!result.ok) {
|
|
1229
|
+
if (result.retriable && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
1230
|
+
const backoffMs = 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
1231
|
+
logger.warn(`[codex-api] mid-session ws error (passthrough attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${result.error.message}`);
|
|
1232
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
1233
|
+
transientAttempt++;
|
|
1234
|
+
continue;
|
|
1235
|
+
}
|
|
1236
|
+
throw result.error;
|
|
1237
|
+
}
|
|
1238
|
+
const parsed = {
|
|
1239
|
+
text: acc.text,
|
|
1240
|
+
sessionId,
|
|
1241
|
+
usage: {
|
|
1242
|
+
input_tokens: acc.inputTokens,
|
|
1243
|
+
output_tokens: acc.outputTokens,
|
|
1244
|
+
cache_creation_tokens: 0,
|
|
1245
|
+
cache_read_tokens: acc.cacheReadTokens,
|
|
1246
|
+
},
|
|
1247
|
+
model: acc.model,
|
|
1248
|
+
costUsd: 0,
|
|
1249
|
+
};
|
|
1250
|
+
if (rateGuard) {
|
|
1251
|
+
const cost = calculateCost(opts.model, parsed.usage.input_tokens, parsed.usage.output_tokens, parsed.usage.cache_creation_tokens, parsed.usage.cache_read_tokens);
|
|
1252
|
+
rateGuard.recordSpend(cost.apiCost);
|
|
1253
|
+
parsed.costUsd = cost.apiCost;
|
|
1254
|
+
}
|
|
1255
|
+
logger.info(`[codex-api] passthrough OK model=${acc.model} in=${acc.inputTokens} out=${acc.outputTokens} cache_read=${acc.cacheReadTokens}`);
|
|
1256
|
+
return parsed;
|
|
1257
|
+
}
|
|
1258
|
+
}
|