clawmoney 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -367,9 +367,23 @@ export async function callClaudeApi(opts) {
|
|
|
367
367
|
configureRateGuard();
|
|
368
368
|
return rateGuard.run(() => doCallClaudeApi(opts));
|
|
369
369
|
}
|
|
370
|
+
// Maximum number of automatic retries on transient upstream errors
|
|
371
|
+
// (429 / 5xx). Matches the Anthropic official SDK default. Does NOT count
|
|
372
|
+
// the initial attempt or the one-shot 401-refresh retry.
|
|
373
|
+
const MAX_TRANSIENT_RETRIES = 2;
|
|
374
|
+
function parseRetryAfterMs(header) {
|
|
375
|
+
if (!header)
|
|
376
|
+
return null;
|
|
377
|
+
const asSeconds = Number(header);
|
|
378
|
+
if (Number.isFinite(asSeconds) && asSeconds >= 0)
|
|
379
|
+
return asSeconds * 1000;
|
|
380
|
+
const asDate = Date.parse(header);
|
|
381
|
+
if (Number.isFinite(asDate))
|
|
382
|
+
return Math.max(0, asDate - Date.now());
|
|
383
|
+
return null;
|
|
384
|
+
}
|
|
370
385
|
async function doCallClaudeApi(opts) {
|
|
371
386
|
const fingerprint = loadFingerprint();
|
|
372
|
-
const creds = await getFreshCreds();
|
|
373
387
|
const sessionId = randomUUID();
|
|
374
388
|
const maxTokens = opts.maxTokens ?? 4096;
|
|
375
389
|
const body = {
|
|
@@ -383,6 +397,18 @@ async function doCallClaudeApi(opts) {
|
|
|
383
397
|
{
|
|
384
398
|
type: "text",
|
|
385
399
|
text: `${CLAUDE_CODE_SYSTEM_PROMPT_LEAD}\n\n${RELAY_INSTRUCTIONS}`,
|
|
400
|
+
// Mark the last system block for prompt caching. Real Claude Code
|
|
401
|
+
// *always* attaches cache_control: {type: "ephemeral"} to its system
|
|
402
|
+
// blocks — Anthropic uses the presence of this marker as part of its
|
|
403
|
+
// "is this really Claude Code?" fingerprint check, so sending a bare
|
|
404
|
+
// string-typed or unmarked array-typed system is a detectability
|
|
405
|
+
// signal that can trip 403 "Request not allowed". Our system is too
|
|
406
|
+
// short (<1024 tokens) to actually hit the cache, so the marker's
|
|
407
|
+
// immediate effect is zero — it exists purely for fingerprint fidelity.
|
|
408
|
+
// When we later bloat system to >=1024 tokens (e.g. for high-traffic
|
|
409
|
+
// cost savings), this same marker will automatically start
|
|
410
|
+
// materializing real cache reads.
|
|
411
|
+
cache_control: { type: "ephemeral" },
|
|
386
412
|
},
|
|
387
413
|
],
|
|
388
414
|
messages: [
|
|
@@ -394,46 +420,55 @@ async function doCallClaudeApi(opts) {
|
|
|
394
420
|
metadata: { user_id: buildMetadataUserID(fingerprint, sessionId) },
|
|
395
421
|
stream: false,
|
|
396
422
|
};
|
|
397
|
-
const
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
"x-claude-code-session-id": sessionId,
|
|
404
|
-
},
|
|
405
|
-
body: JSON.stringify(body),
|
|
406
|
-
});
|
|
407
|
-
if (resp.status === 401) {
|
|
408
|
-
// Token became invalid mid-flight; force a refresh and retry once.
|
|
409
|
-
logger.warn("[claude-api] 401 from upstream, forcing refresh + retry");
|
|
410
|
-
cachedCreds = null;
|
|
411
|
-
const fresh = await getFreshCreds();
|
|
412
|
-
const retry = await fetch(ANTHROPIC_MESSAGES_URL, {
|
|
423
|
+
const bodyJson = JSON.stringify(body);
|
|
424
|
+
let transientAttempt = 0;
|
|
425
|
+
let hasRefreshed = false;
|
|
426
|
+
while (true) {
|
|
427
|
+
const creds = await getFreshCreds();
|
|
428
|
+
const resp = await fetch(ANTHROPIC_MESSAGES_URL, {
|
|
413
429
|
method: "POST",
|
|
414
430
|
headers: {
|
|
415
431
|
...STATIC_CLAUDE_CODE_HEADERS,
|
|
416
432
|
"user-agent": fingerprint.user_agent,
|
|
417
|
-
"authorization": `Bearer ${
|
|
433
|
+
"authorization": `Bearer ${creds.accessToken}`,
|
|
418
434
|
"x-claude-code-session-id": sessionId,
|
|
419
435
|
},
|
|
420
|
-
body:
|
|
436
|
+
body: bodyJson,
|
|
421
437
|
});
|
|
422
|
-
if (
|
|
423
|
-
const
|
|
424
|
-
|
|
438
|
+
if (resp.ok) {
|
|
439
|
+
const parsed = parseResponse(await resp.json(), opts.model);
|
|
440
|
+
recordSpendFromUsage(parsed, opts.model);
|
|
441
|
+
return parsed;
|
|
425
442
|
}
|
|
426
|
-
const
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
443
|
+
const errText = await resp.text();
|
|
444
|
+
// 401 → one-shot token refresh + retry. If we already refreshed once
|
|
445
|
+
// and still got 401, the credentials are genuinely broken — bubble up.
|
|
446
|
+
if (resp.status === 401 && !hasRefreshed) {
|
|
447
|
+
logger.warn("[claude-api] 401 from upstream, refreshing token + retry");
|
|
448
|
+
hasRefreshed = true;
|
|
449
|
+
cachedCreds = null;
|
|
450
|
+
continue;
|
|
451
|
+
}
|
|
452
|
+
// 429 / 5xx → transient upstream hiccup. Retry with exponential backoff
|
|
453
|
+
// + jitter, honoring Retry-After if present. This is what Anthropic's
|
|
454
|
+
// official SDK does by default; buyers used to see these as hard 502s
|
|
455
|
+
// even when the right move was "wait 1s and try again". We only do this
|
|
456
|
+
// inside the rate-guard slot we're already holding, so retries don't
|
|
457
|
+
// re-queue behind other requests.
|
|
458
|
+
const isTransient = resp.status === 429 ||
|
|
459
|
+
(resp.status >= 500 && resp.status <= 599);
|
|
460
|
+
if (isTransient && transientAttempt < MAX_TRANSIENT_RETRIES) {
|
|
461
|
+
const retryAfter = parseRetryAfterMs(resp.headers.get("retry-after"));
|
|
462
|
+
const backoffMs = retryAfter ?? 500 * Math.pow(2, transientAttempt) + Math.random() * 500;
|
|
463
|
+
logger.warn(`[claude-api] ${resp.status} from upstream (attempt ${transientAttempt + 1}/${MAX_TRANSIENT_RETRIES + 1}), retrying in ${Math.round(backoffMs)}ms — ${errText.slice(0, 200)}`);
|
|
464
|
+
await new Promise((r) => setTimeout(r, backoffMs));
|
|
465
|
+
transientAttempt++;
|
|
466
|
+
continue;
|
|
467
|
+
}
|
|
468
|
+
// Unrecoverable — bubble up with the upstream status + body so Hub can
|
|
469
|
+
// translate it into a sensible HTTP status for the buyer.
|
|
470
|
+
throw new Error(`Anthropic ${resp.status}: ${errText.slice(0, 400)}`);
|
|
433
471
|
}
|
|
434
|
-
const parsed = parseResponse(await resp.json(), opts.model);
|
|
435
|
-
recordSpendFromUsage(parsed, opts.model);
|
|
436
|
-
return parsed;
|
|
437
472
|
}
|
|
438
473
|
function recordSpendFromUsage(parsed, model) {
|
|
439
474
|
if (!rateGuard)
|