clawmoney 0.15.67 → 0.15.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/relay/provider.js +107 -2
- package/dist/relay/upstream/claude-api.js +106 -21
- package/package.json +1 -1
package/dist/relay/provider.js
CHANGED
|
@@ -167,6 +167,83 @@ function extractMessageText(content) {
|
|
|
167
167
|
function messagesToPrompt(messages) {
|
|
168
168
|
return messages.map((m) => extractMessageText(m.content)).join("\n");
|
|
169
169
|
}
|
|
170
|
+
// ── OAuth auto-pause (per-cli_type) ────────────────────────────────────
|
|
171
|
+
//
|
|
172
|
+
// When upstream keeps rejecting our OAuth token (Anthropic 403
|
|
173
|
+
// permission_error, ChatGPT auth failures, etc.), continuing to hammer
|
|
174
|
+
// it wastes buyer requests, surfaces errors the Hub has to failover
|
|
175
|
+
// around, and thrashes the Hub's 5xx ban / unban cycle every time the
|
|
176
|
+
// daemon reconnects. Track consecutive auth-broken errors per cli_type
|
|
177
|
+
// — after AUTH_ERROR_THRESHOLD hits in a row, stop accepting new
|
|
178
|
+
// requests for THAT cli_type until daemon restart. Every successful
|
|
179
|
+
// upstream response resets the counter.
|
|
180
|
+
//
|
|
181
|
+
// Key properties:
|
|
182
|
+
// - Per cli_type: a broken Claude OAuth doesn't take down Codex or
|
|
183
|
+
// Gemini on the same daemon, because each has its own counter and
|
|
184
|
+
// its own disable flag.
|
|
185
|
+
// - In-memory only: state resets on daemon restart. If the operator
|
|
186
|
+
// re-authed between restarts, the next request proves the token
|
|
187
|
+
// works and nothing happens; if they didn't, the counter fills
|
|
188
|
+
// back up within AUTH_ERROR_THRESHOLD requests and re-disables.
|
|
189
|
+
// - No WS lifecycle touched: the daemon stays connected to the Hub
|
|
190
|
+
// so other cli_types still serve. We just refuse to call upstream
|
|
191
|
+
// for the disabled one, returning a clean error the Hub can use
|
|
192
|
+
// to ban this provider row (its existing _is_auth_broken_error
|
|
193
|
+
// pattern catches our "OAuth authentication broken" message).
|
|
194
|
+
//
|
|
195
|
+
// Operator recovery: run `clawmoney login <cli>` (or re-auth the
|
|
196
|
+
// relevant CLI directly — `claude login`, `codex login`, etc.), then
|
|
197
|
+
// `clawmoney relay restart` to reset the counter.
|
|
198
|
+
const AUTH_ERROR_THRESHOLD = 3;
|
|
199
|
+
const consecutiveAuthErrorsByCli = new Map();
|
|
200
|
+
const cliAuthDisabled = new Set();
|
|
201
|
+
const AUTH_BROKEN_PATTERNS = [
|
|
202
|
+
// Anthropic 403: OAuth authentication is currently not allowed for
|
|
203
|
+
// this organization. The new prod signal from 2026-04-15 incident.
|
|
204
|
+
"permission_error",
|
|
205
|
+
"not allowed for this organization",
|
|
206
|
+
// Legacy Claude / Anthropic auth failures (also matched by Hub's
|
|
207
|
+
// _AUTH_BROKEN_PATTERNS, so the two sides agree on classification).
|
|
208
|
+
"token refresh failed",
|
|
209
|
+
"invalid_grant",
|
|
210
|
+
"request not allowed",
|
|
211
|
+
"oauth refresh",
|
|
212
|
+
// Generic OAuth HTTP signatures. Catches the one-off 401/403
|
|
213
|
+
// responses from codex / gemini / antigravity that carry the same
|
|
214
|
+
// meaning even when the upstream-specific message format differs.
|
|
215
|
+
"unauthorized",
|
|
216
|
+
];
|
|
217
|
+
function isAuthBrokenError(errMsg) {
|
|
218
|
+
const lower = errMsg.toLowerCase();
|
|
219
|
+
return AUTH_BROKEN_PATTERNS.some((p) => lower.includes(p));
|
|
220
|
+
}
|
|
221
|
+
function noteUpstreamAuthError(cliType) {
|
|
222
|
+
const next = (consecutiveAuthErrorsByCli.get(cliType) ?? 0) + 1;
|
|
223
|
+
consecutiveAuthErrorsByCli.set(cliType, next);
|
|
224
|
+
if (next >= AUTH_ERROR_THRESHOLD && !cliAuthDisabled.has(cliType)) {
|
|
225
|
+
cliAuthDisabled.add(cliType);
|
|
226
|
+
logger.error("");
|
|
227
|
+
logger.error(` ╔══════════════════════════════════════════════════════════════`);
|
|
228
|
+
logger.error(` ║ OAuth broken for cli_type='${cliType}' — ${next} consecutive`);
|
|
229
|
+
logger.error(` ║ auth-broken responses from upstream. Pausing relay for this`);
|
|
230
|
+
logger.error(` ║ cli_type to stop thrashing buyer requests + Hub ban state.`);
|
|
231
|
+
logger.error(` ║`);
|
|
232
|
+
logger.error(` ║ TO RESUME: re-authenticate your ${cliType} CLI locally, then`);
|
|
233
|
+
logger.error(` ║ run 'clawmoney relay restart'.`);
|
|
234
|
+
logger.error(` ║`);
|
|
235
|
+
logger.error(` ║ Other cli_types on this daemon continue to serve normally.`);
|
|
236
|
+
logger.error(` ╚══════════════════════════════════════════════════════════════`);
|
|
237
|
+
logger.error("");
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
function noteUpstreamSuccess(cliType) {
|
|
241
|
+
// Successful request → reset the consecutive counter. The disabled
|
|
242
|
+
// flag is sticky until daemon restart on purpose — we never want to
|
|
243
|
+
// "heal" mid-run based on a single lucky response, which could be
|
|
244
|
+
// an upstream glitch rather than a real token refresh.
|
|
245
|
+
consecutiveAuthErrorsByCli.delete(cliType);
|
|
246
|
+
}
|
|
170
247
|
async function executeRelayRequest(request, config, sendChunk) {
|
|
171
248
|
const { request_id, max_budget_usd } = request;
|
|
172
249
|
const cliType = request.cli_type ?? config.relay.cli_type;
|
|
@@ -190,6 +267,21 @@ async function executeRelayRequest(request, config, sendChunk) {
|
|
|
190
267
|
logger.info(` │ CLI: ${cliType} / ${model} (${modeLabel})`);
|
|
191
268
|
logger.info(` │ Turns: ${turns}`);
|
|
192
269
|
logger.info(` │ Prompt: ${String(lastUserMsg).slice(0, 80)}`);
|
|
270
|
+
// Fast-fail if this cli_type was auto-paused by a run of auth-broken
|
|
271
|
+
// responses earlier in the session. Returning the error here instead
|
|
272
|
+
// of calling upstream saves the round-trip and keeps the Hub's ban
|
|
273
|
+
// pattern triggering (it matches "OAuth authentication" / "auth
|
|
274
|
+
// broken" in _is_auth_broken_error) so buyer requests go straight to
|
|
275
|
+
// a healthy provider.
|
|
276
|
+
if (cliAuthDisabled.has(cliType)) {
|
|
277
|
+
logger.warn(` └─ REFUSED: ${cliType} auth paused (restart relay after re-auth)`);
|
|
278
|
+
return {
|
|
279
|
+
event: "relay_response",
|
|
280
|
+
request_id,
|
|
281
|
+
content: "",
|
|
282
|
+
error: `OAuth authentication broken for cli_type='${cliType}'. Provider needs to re-authenticate locally and restart the daemon. (permission_error)`,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
193
285
|
try {
|
|
194
286
|
const startMs = Date.now();
|
|
195
287
|
let parsed;
|
|
@@ -300,6 +392,9 @@ async function executeRelayRequest(request, config, sendChunk) {
|
|
|
300
392
|
if (fakeModelUsed) {
|
|
301
393
|
logger.warn(` ! CLAWMONEY_FAKE_MODEL_USED=${fakeModelUsed} — reporting fake model to Hub (test mode)`);
|
|
302
394
|
}
|
|
395
|
+
// Successful upstream round-trip — reset the auth-error counter for
|
|
396
|
+
// this cli_type. One good response means the token currently works.
|
|
397
|
+
noteUpstreamSuccess(cliType);
|
|
303
398
|
return {
|
|
304
399
|
event: "relay_response",
|
|
305
400
|
request_id,
|
|
@@ -312,12 +407,22 @@ async function executeRelayRequest(request, config, sendChunk) {
|
|
|
312
407
|
};
|
|
313
408
|
}
|
|
314
409
|
catch (err) {
|
|
315
|
-
|
|
410
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
411
|
+
logger.error(` └─ ERROR: ${errMsg}`);
|
|
412
|
+
// If the upstream error looks like a persistent auth failure
|
|
413
|
+
// (OAuth rejected, token broken, permission_error, etc.), bump
|
|
414
|
+
// this cli_type's consecutive-auth-error counter. After
|
|
415
|
+
// AUTH_ERROR_THRESHOLD in a row, future requests for this
|
|
416
|
+
// cli_type short-circuit at the top of executeRelayRequest until
|
|
417
|
+
// daemon restart.
|
|
418
|
+
if (isAuthBrokenError(errMsg)) {
|
|
419
|
+
noteUpstreamAuthError(cliType);
|
|
420
|
+
}
|
|
316
421
|
return {
|
|
317
422
|
event: "relay_response",
|
|
318
423
|
request_id,
|
|
319
424
|
content: "",
|
|
320
|
-
error:
|
|
425
|
+
error: errMsg || "Unknown execution error",
|
|
321
426
|
};
|
|
322
427
|
}
|
|
323
428
|
}
|
|
@@ -994,6 +994,56 @@ function mergeBetas(required, clientBeta) {
|
|
|
994
994
|
}
|
|
995
995
|
return out.join(",");
|
|
996
996
|
}
|
|
997
|
+
// Scan a passthrough body for any `cache_control: {type: "ephemeral",
|
|
998
|
+
// ttl: "1h"}` block across tools / system / messages. The presence of
|
|
999
|
+
// even one 1h block forces us to upgrade our own injected CC marker in
|
|
1000
|
+
// system to 1h too, because Anthropic rejects requests where a 1h
|
|
1001
|
+
// block appears after any 5m block in the global tools→system→messages
|
|
1002
|
+
// ordering (see long comment in ensureClaudeCodeShell).
|
|
1003
|
+
//
|
|
1004
|
+
// Returns true on the first 1h block found — this is a detect-only
|
|
1005
|
+
// walk, not a rewrite. Safe on malformed bodies (returns false).
|
|
1006
|
+
function bodyHasExtendedCacheBlock(body) {
|
|
1007
|
+
const isExtendedBlock = (block) => {
|
|
1008
|
+
if (!block || typeof block !== "object")
|
|
1009
|
+
return false;
|
|
1010
|
+
const cc = block
|
|
1011
|
+
.cache_control;
|
|
1012
|
+
if (!cc || typeof cc !== "object")
|
|
1013
|
+
return false;
|
|
1014
|
+
return cc.ttl === "1h";
|
|
1015
|
+
};
|
|
1016
|
+
if (Array.isArray(body.tools)) {
|
|
1017
|
+
for (const t of body.tools) {
|
|
1018
|
+
if (isExtendedBlock(t))
|
|
1019
|
+
return true;
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
if (Array.isArray(body.system)) {
|
|
1023
|
+
for (const b of body.system) {
|
|
1024
|
+
if (isExtendedBlock(b))
|
|
1025
|
+
return true;
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
if (Array.isArray(body.messages)) {
|
|
1029
|
+
for (const m of body.messages) {
|
|
1030
|
+
if (!m || typeof m !== "object")
|
|
1031
|
+
continue;
|
|
1032
|
+
const content = m.content;
|
|
1033
|
+
// Anthropic messages can carry content either as a string (no
|
|
1034
|
+
// cache_control possible) or as an array of content blocks
|
|
1035
|
+
// (each of which can carry cache_control). Only the array form
|
|
1036
|
+
// matters for this check.
|
|
1037
|
+
if (Array.isArray(content)) {
|
|
1038
|
+
for (const c of content) {
|
|
1039
|
+
if (isExtendedBlock(c))
|
|
1040
|
+
return true;
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
return false;
|
|
1046
|
+
}
|
|
997
1047
|
// Ensure a passthrough body carries the full Claude Code fingerprint
|
|
998
1048
|
// shell that Anthropic's OAuth-endpoint validator expects. Called from
|
|
999
1049
|
// doCallClaudeApiPassthrough as the last body-munging step before the
|
|
@@ -1057,34 +1107,69 @@ function ensureClaudeCodeShell(body, fingerprint) {
|
|
|
1057
1107
|
const firstUserMsg = extractFirstUserMessageText(body.messages);
|
|
1058
1108
|
const freshHeader = buildClaudeAttributionHeader(firstUserMsg, fingerprint.cc_version, fingerprint.cc_entrypoint);
|
|
1059
1109
|
// ── Inject CC marker if missing ──
|
|
1060
|
-
// Position: right after the billing header slot (idx 1), or right
|
|
1061
|
-
// after any buyer-prefixed system blocks (at head) if we're also
|
|
1062
|
-
// inserting the billing header.
|
|
1063
1110
|
//
|
|
1064
|
-
// Anthropic has a hard ordering rule
|
|
1065
|
-
//
|
|
1066
|
-
// `ttl="
|
|
1067
|
-
//
|
|
1068
|
-
//
|
|
1069
|
-
//
|
|
1070
|
-
//
|
|
1071
|
-
//
|
|
1072
|
-
//
|
|
1073
|
-
//
|
|
1074
|
-
//
|
|
1111
|
+
// Anthropic has a hard GLOBAL ordering rule across the whole request:
|
|
1112
|
+
// within the linear processing order `tools → system → messages`,
|
|
1113
|
+
// any block with `cache_control.ttl="1h"` MUST come before any block
|
|
1114
|
+
// with `ttl="5m"`. Not just within one section — globally. A 5m block
|
|
1115
|
+
// in system comes before any 1h block in messages and that's a 400.
|
|
1116
|
+
//
|
|
1117
|
+
// Our injected CC marker lives in system. Its default TTL is 5m
|
|
1118
|
+
// (what real Claude Code uses). When a buyer request carries any 1h
|
|
1119
|
+
// cache_control block ANYWHERE (their own system, or inside any
|
|
1120
|
+
// message content block, or in tools), naively injecting a 5m marker
|
|
1121
|
+
// in system causes:
|
|
1122
|
+
// system.N.cache_control.ttl — when the 1h is in system below us
|
|
1123
|
+
// messages.N.content.M.cache_control.ttl — when the 1h is in messages
|
|
1124
|
+
// Anthropic 400s with:
|
|
1125
|
+
// a ttl='1h' cache_control block must not come after a ttl='5m'
|
|
1126
|
+
// cache_control block. Note that blocks are processed in the
|
|
1127
|
+
// following order: `tools`, `system`, `messages`.
|
|
1128
|
+
//
|
|
1129
|
+
// Fix: detect whether the buyer's body touches 1h cache anywhere.
|
|
1130
|
+
// If yes, upgrade our marker's TTL to 1h too — then the whole request
|
|
1131
|
+
// is uniformly 1h from our side, no 1h-after-5m violation possible.
|
|
1132
|
+
// If no, keep the default 5m (matches real Claude Code fingerprint).
|
|
1133
|
+
//
|
|
1134
|
+
// The 1h TTL won't actually materialise extra cost for our marker
|
|
1135
|
+
// because our system block is < 1024 tokens and below Anthropic's
|
|
1136
|
+
// minimum cache token threshold, so neither 5m nor 1h actually
|
|
1137
|
+
// produces a cache write or read. The TTL label is purely a shape
|
|
1138
|
+
// marker that unblocks the ordering validator.
|
|
1075
1139
|
if (!hasCcMarker) {
|
|
1140
|
+
const buyerUsesExtendedCache = bodyHasExtendedCacheBlock(body);
|
|
1076
1141
|
const markerBlock = {
|
|
1077
1142
|
type: "text",
|
|
1078
1143
|
text: `${CLAUDE_CODE_SYSTEM_PROMPT_LEAD}\n\n${RELAY_INSTRUCTIONS}`,
|
|
1079
|
-
cache_control:
|
|
1144
|
+
cache_control: buyerUsesExtendedCache
|
|
1145
|
+
? { type: "ephemeral", ttl: "1h" }
|
|
1146
|
+
: { type: "ephemeral" },
|
|
1080
1147
|
};
|
|
1148
|
+
// Insert position inside system:
|
|
1149
|
+
// - If our marker is 5m: put it AFTER any existing 1h block in
|
|
1150
|
+
// system so system-internal ordering holds (1h-before-5m).
|
|
1151
|
+
// - If our marker is 1h: put it BEFORE any existing 5m block in
|
|
1152
|
+
// system for the same reason (1h-before-5m). No 5m block →
|
|
1153
|
+
// default slot.
|
|
1081
1154
|
let insertAt = hasBillingHeaderFirst ? 1 : 0;
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1155
|
+
if (buyerUsesExtendedCache) {
|
|
1156
|
+
for (let i = 0; i < system.length; i++) {
|
|
1157
|
+
const cc = system[i]
|
|
1158
|
+
?.cache_control;
|
|
1159
|
+
if (cc && typeof cc === "object" && (cc.ttl ?? "5m") === "5m") {
|
|
1160
|
+
insertAt = i;
|
|
1161
|
+
break;
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
else {
|
|
1166
|
+
for (let i = system.length - 1; i >= 0; i--) {
|
|
1167
|
+
const cc = system[i]
|
|
1168
|
+
?.cache_control;
|
|
1169
|
+
if (cc && typeof cc === "object" && cc.ttl === "1h") {
|
|
1170
|
+
insertAt = i + 1;
|
|
1171
|
+
break;
|
|
1172
|
+
}
|
|
1088
1173
|
}
|
|
1089
1174
|
}
|
|
1090
1175
|
system.splice(insertAt, 0, markerBlock);
|