@juspay/neurolink 9.54.4 → 9.54.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/auth/tokenStore.d.ts +0 -1
- package/dist/browser/neurolink.min.js +305 -305
- package/dist/cli/commands/proxy.js +333 -31
- package/dist/client/httpClient.d.ts +1 -2
- package/dist/client/sseClient.d.ts +1 -2
- package/dist/client/wsClient.d.ts +1 -2
- package/dist/core/toolEvents.d.ts +0 -1
- package/dist/lib/auth/tokenStore.d.ts +0 -1
- package/dist/lib/client/httpClient.d.ts +1 -2
- package/dist/lib/client/sseClient.d.ts +1 -2
- package/dist/lib/client/wsClient.d.ts +1 -2
- package/dist/lib/core/toolEvents.d.ts +0 -1
- package/dist/lib/memory/hippocampusInitializer.d.ts +1 -2
- package/dist/lib/memory/hippocampusInitializer.js +1 -1
- package/dist/lib/models/anthropicModels.d.ts +0 -1
- package/dist/lib/models/anthropicModels.js +1 -0
- package/dist/lib/neurolink.js +1 -1
- package/dist/lib/observability/exporterRegistry.d.ts +1 -2
- package/dist/lib/observability/retryPolicy.d.ts +0 -1
- package/dist/lib/observability/sampling/samplers.d.ts +0 -1
- package/dist/lib/observability/spanProcessor.d.ts +0 -1
- package/dist/lib/processors/errors/errorHelpers.d.ts +1 -1
- package/dist/lib/providers/googleNativeGemini3.d.ts +1 -2
- package/dist/lib/proxy/proxyHealth.d.ts +0 -1
- package/dist/lib/proxy/routingPolicy.d.ts +4 -30
- package/dist/lib/proxy/routingPolicy.js +14 -46
- package/dist/lib/proxy/usageStats.d.ts +0 -1
- package/dist/lib/proxy/usageStats.js +0 -7
- package/dist/lib/server/routes/claudeProxyRoutes.js +55 -95
- package/dist/lib/types/proxy.d.ts +2 -12
- package/dist/lib/utils/sanitizers/filename.d.ts +0 -1
- package/dist/memory/hippocampusInitializer.d.ts +1 -2
- package/dist/memory/hippocampusInitializer.js +1 -1
- package/dist/models/anthropicModels.d.ts +0 -1
- package/dist/models/anthropicModels.js +1 -0
- package/dist/neurolink.js +1 -1
- package/dist/observability/exporterRegistry.d.ts +1 -2
- package/dist/observability/retryPolicy.d.ts +0 -1
- package/dist/observability/sampling/samplers.d.ts +0 -1
- package/dist/observability/spanProcessor.d.ts +0 -1
- package/dist/processors/errors/errorHelpers.d.ts +1 -1
- package/dist/providers/googleNativeGemini3.d.ts +1 -2
- package/dist/proxy/proxyHealth.d.ts +0 -1
- package/dist/proxy/routingPolicy.d.ts +4 -30
- package/dist/proxy/routingPolicy.js +14 -46
- package/dist/proxy/usageStats.d.ts +0 -1
- package/dist/proxy/usageStats.js +0 -7
- package/dist/server/routes/claudeProxyRoutes.js +55 -95
- package/dist/types/proxy.d.ts +2 -12
- package/dist/utils/sanitizers/filename.d.ts +0 -1
- package/package.json +1 -1
|
@@ -9,8 +9,7 @@
|
|
|
9
9
|
* providers so they can share a single implementation.
|
|
10
10
|
*/
|
|
11
11
|
import { type Tool } from "ai";
|
|
12
|
-
import type { ThinkingConfig, CollectedChunkResult, NativeFunctionCall,
|
|
13
|
-
export type { CollectedChunkResult, NativeFunctionCall, NativeFunctionDeclaration, NativeFunctionResponse, NativeToolDeclarationsResult, NativeToolsConfig, TextChannel, };
|
|
12
|
+
import type { ThinkingConfig, CollectedChunkResult, NativeFunctionCall, NativeFunctionResponse, NativeToolDeclarationsResult, NativeToolsConfig, TextChannel } from "../types/index.js";
|
|
14
13
|
/**
|
|
15
14
|
* Sanitize a JSON Schema for Gemini's proto-based API.
|
|
16
15
|
*
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { ProxyHealthResponse, ProxyReadinessState } from "../types/index.js";
|
|
2
|
-
export type { ProxyHealthResponse, ProxyReadinessState };
|
|
3
2
|
export declare function createProxyReadinessState(startTimeMs?: number): ProxyReadinessState;
|
|
4
3
|
export declare function markProxyReady(state: ProxyReadinessState, readyAtMs?: number): void;
|
|
5
4
|
export declare function buildProxyHealthResponse(state: ProxyReadinessState, options: {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type { ClaudeProxyModelTier,
|
|
2
|
-
export type { ClaudeProxyModelTier, ProxyTranslationAttempt, ProxyTranslationPlan, };
|
|
1
|
+
import type { ClaudeProxyModelTier, FallbackEntry, ParsedClaudeRequest, ProxyTranslationPlan } from "../types/index.js";
|
|
3
2
|
export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
|
|
4
3
|
/**
|
|
5
4
|
* Build a translation plan for a Claude-compatible proxy request.
|
|
@@ -12,32 +11,7 @@ export declare function buildProxyTranslationPlan(primary: {
|
|
|
12
11
|
model?: string;
|
|
13
12
|
}, fallbackChain: FallbackEntry[], requestedModel: string, _parsed: ParsedClaudeRequest): ProxyTranslationPlan;
|
|
14
13
|
/**
|
|
15
|
-
*
|
|
16
|
-
* Returns
|
|
14
|
+
* Parse the retry-after header from an upstream 429 response.
|
|
15
|
+
* Returns milliseconds to wait, or 0 if no valid header present.
|
|
17
16
|
*/
|
|
18
|
-
export declare function
|
|
19
|
-
/**
|
|
20
|
-
* Partition accounts into eligible (no cooldown) and skipped (cooling down).
|
|
21
|
-
*/
|
|
22
|
-
export declare function partitionAccountsByCooldown<T extends {
|
|
23
|
-
key: string;
|
|
24
|
-
}>(accounts: T[], getState: (account: T) => RuntimeAccountState, now?: number): {
|
|
25
|
-
eligible: T[];
|
|
26
|
-
skipped: CooldownSkippedAccount<T>[];
|
|
27
|
-
};
|
|
28
|
-
/**
|
|
29
|
-
* Apply a rate-limit cooldown to an account.
|
|
30
|
-
* Uses simple exponential backoff with a floor and cap.
|
|
31
|
-
*/
|
|
32
|
-
export declare function applyRateLimitCooldown(args: {
|
|
33
|
-
state: RuntimeAccountState;
|
|
34
|
-
retryAfterMs?: number;
|
|
35
|
-
now?: number;
|
|
36
|
-
capMs: number;
|
|
37
|
-
}): {
|
|
38
|
-
backoffMs: number;
|
|
39
|
-
};
|
|
40
|
-
/**
|
|
41
|
-
* Clear cooldown state for an account after a successful request.
|
|
42
|
-
*/
|
|
43
|
-
export declare function clearAccountCooldown(state: RuntimeAccountState): void;
|
|
17
|
+
export declare function parseRetryAfterMs(retryAfterHeader: string | null): number;
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
|
|
2
1
|
export function inferClaudeProxyModelTier(modelName) {
|
|
3
2
|
const normalized = modelName.toLowerCase();
|
|
4
3
|
if (normalized.includes("opus")) {
|
|
@@ -50,55 +49,24 @@ export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel
|
|
|
50
49
|
};
|
|
51
50
|
}
|
|
52
51
|
// ---------------------------------------------------------------------------
|
|
53
|
-
//
|
|
52
|
+
// Retry-after parsing helper
|
|
54
53
|
// ---------------------------------------------------------------------------
|
|
55
54
|
/**
|
|
56
|
-
*
|
|
57
|
-
* Returns
|
|
55
|
+
* Parse the retry-after header from an upstream 429 response.
|
|
56
|
+
* Returns milliseconds to wait, or 0 if no valid header present.
|
|
58
57
|
*/
|
|
59
|
-
export function
|
|
60
|
-
if (
|
|
61
|
-
return
|
|
58
|
+
export function parseRetryAfterMs(retryAfterHeader) {
|
|
59
|
+
if (!retryAfterHeader) {
|
|
60
|
+
return 0;
|
|
62
61
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
* Partition accounts into eligible (no cooldown) and skipped (cooling down).
|
|
67
|
-
*/
|
|
68
|
-
export function partitionAccountsByCooldown(accounts, getState, now = Date.now()) {
|
|
69
|
-
const eligible = [];
|
|
70
|
-
const skipped = [];
|
|
71
|
-
for (const account of accounts) {
|
|
72
|
-
const state = getState(account);
|
|
73
|
-
const until = getAccountCooldownUntil(state, now);
|
|
74
|
-
if (until !== null) {
|
|
75
|
-
skipped.push({
|
|
76
|
-
account,
|
|
77
|
-
cooldown: { until, backoffLevel: state.backoffLevel },
|
|
78
|
-
});
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
eligible.push(account);
|
|
62
|
+
const seconds = parseInt(retryAfterHeader, 10);
|
|
63
|
+
if (!Number.isNaN(seconds)) {
|
|
64
|
+
return Math.max(1, seconds) * 1000;
|
|
82
65
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
*/
|
|
89
|
-
export function applyRateLimitCooldown(args) {
|
|
90
|
-
const now = args.now ?? Date.now();
|
|
91
|
-
const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, DEFAULT_COOLDOWN_FLOOR_MS);
|
|
92
|
-
const backoffMs = Math.min(baseCooldownMs * 2 ** args.state.backoffLevel, args.capMs);
|
|
93
|
-
args.state.coolingUntil = now + backoffMs;
|
|
94
|
-
args.state.backoffLevel += 1;
|
|
95
|
-
return { backoffMs };
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Clear cooldown state for an account after a successful request.
|
|
99
|
-
*/
|
|
100
|
-
export function clearAccountCooldown(state) {
|
|
101
|
-
state.coolingUntil = undefined;
|
|
102
|
-
state.backoffLevel = 0;
|
|
66
|
+
const date = new Date(retryAfterHeader);
|
|
67
|
+
if (!Number.isNaN(date.getTime())) {
|
|
68
|
+
return Math.max(1000, date.getTime() - Date.now());
|
|
69
|
+
}
|
|
70
|
+
return 0;
|
|
103
71
|
}
|
|
104
72
|
//# sourceMappingURL=routingPolicy.js.map
|
|
@@ -8,7 +8,6 @@ export declare function recordAttempt(accountLabel: string, accountType: string)
|
|
|
8
8
|
export declare function recordFinalSuccess(accountLabel?: string, accountType?: string): void;
|
|
9
9
|
export declare function recordAttemptError(accountLabel: string, accountType: string, status: number): void;
|
|
10
10
|
export declare function recordFinalError(_status: number, accountLabel?: string, accountType?: string): void;
|
|
11
|
-
export declare function recordCooldown(accountLabel: string, accountType: string, cooldownUntil: number, backoffLevel: number): void;
|
|
12
11
|
export declare function getStats(): ProxyStats;
|
|
13
12
|
export declare function getAccountStats(label: string): AccountStats | undefined;
|
|
14
13
|
export declare function resetStats(): void;
|
|
@@ -24,7 +24,6 @@ export function recordFinalSuccess(accountLabel, accountType) {
|
|
|
24
24
|
if (accountLabel && accountType) {
|
|
25
25
|
const acct = ensureAccount(accountLabel, accountType);
|
|
26
26
|
acct.successCount++;
|
|
27
|
-
acct.currentBackoffLevel = 0;
|
|
28
27
|
}
|
|
29
28
|
}
|
|
30
29
|
export function recordAttemptError(accountLabel, accountType, status) {
|
|
@@ -45,11 +44,6 @@ export function recordFinalError(_status, accountLabel, accountType) {
|
|
|
45
44
|
acct.lastErrorAt = Date.now();
|
|
46
45
|
}
|
|
47
46
|
}
|
|
48
|
-
export function recordCooldown(accountLabel, accountType, cooldownUntil, backoffLevel) {
|
|
49
|
-
const acct = ensureAccount(accountLabel, accountType);
|
|
50
|
-
acct.coolingUntil = cooldownUntil;
|
|
51
|
-
acct.currentBackoffLevel = backoffLevel;
|
|
52
|
-
}
|
|
53
47
|
export function getStats() {
|
|
54
48
|
const accounts = {};
|
|
55
49
|
for (const [label, account] of Object.entries(stats.accounts)) {
|
|
@@ -80,7 +74,6 @@ function ensureAccount(label, type) {
|
|
|
80
74
|
errorCount: 0,
|
|
81
75
|
rateLimitCount: 0,
|
|
82
76
|
lastAttemptAt: 0,
|
|
83
|
-
currentBackoffLevel: 0,
|
|
84
77
|
};
|
|
85
78
|
}
|
|
86
79
|
return stats.accounts[label];
|
|
@@ -20,9 +20,9 @@ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
|
|
|
20
20
|
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
|
|
21
21
|
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
|
|
22
22
|
import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
|
|
23
|
-
import {
|
|
23
|
+
import { buildProxyTranslationPlan, parseRetryAfterMs, } from "../../proxy/routingPolicy.js";
|
|
24
24
|
import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
|
|
25
|
-
import { recordAttempt, recordAttemptError,
|
|
25
|
+
import { recordAttempt, recordAttemptError, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
|
|
26
26
|
import { logger } from "../../utils/logger.js";
|
|
27
27
|
import { ProviderHealthChecker } from "../../utils/providerHealth.js";
|
|
28
28
|
// ---------------------------------------------------------------------------
|
|
@@ -41,7 +41,7 @@ const BLOCKED_UPSTREAM_HEADERS = new Set([
|
|
|
41
41
|
// Module-level state
|
|
42
42
|
// ---------------------------------------------------------------------------
|
|
43
43
|
/** Fill-first: index of the current primary account. Only advances when
|
|
44
|
-
* the current account
|
|
44
|
+
* the current account exhausts 429 retries or auth retries fail. */
|
|
45
45
|
let primaryAccountIndex = 0;
|
|
46
46
|
/** Track account count so we can reset primaryAccountIndex when it changes. */
|
|
47
47
|
let lastKnownAccountCount = 0;
|
|
@@ -49,9 +49,12 @@ const MAX_AUTH_RETRIES = 5;
|
|
|
49
49
|
const MAX_CONSECUTIVE_REFRESH_FAILURES = 15;
|
|
50
50
|
const MAX_TRANSIENT_SAME_ACCOUNT_RETRIES = 2;
|
|
51
51
|
const TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS = [250, 1_000];
|
|
52
|
-
/**
|
|
53
|
-
|
|
54
|
-
const
|
|
52
|
+
/** Maximum upstream 429 attempts per account before rotating to the next account.
|
|
53
|
+
* Total attempts per account = this + 1 (the initial call plus this many retries). */
|
|
54
|
+
const MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES = 5;
|
|
55
|
+
/** Max time to sleep between 429 retries. Caps large upstream retry-after values
|
|
56
|
+
* so we don't hold the client connection open for minutes. */
|
|
57
|
+
const MAX_RATE_LIMIT_RETRY_DELAY_MS = 30_000;
|
|
55
58
|
/** Timeout for upstream requests to Anthropic. Must be generous enough
|
|
56
59
|
* to cover the full lifecycle of streaming responses, including extended
|
|
57
60
|
* thinking from Opus models (which can exceed 5 minutes for large contexts). */
|
|
@@ -59,10 +62,10 @@ const UPSTREAM_FETCH_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes
|
|
|
59
62
|
const accountRuntimeState = new Map();
|
|
60
63
|
/** Track whether we've run the one-time startup prune. */
|
|
61
64
|
let startupPruneDone = false;
|
|
62
|
-
/** Advance the primary account index when the current primary is
|
|
63
|
-
* This is what makes fill-first work:
|
|
64
|
-
*
|
|
65
|
-
* it's already a fallback
|
|
65
|
+
/** Advance the primary account index when the current primary is exhausted
|
|
66
|
+
* (429 retries exhausted or auth failure). This is what makes fill-first work:
|
|
67
|
+
* we stick to one account until it's unusable. Only advances when the exhausted
|
|
68
|
+
* account IS the current primary; otherwise it's already a fallback. */
|
|
66
69
|
function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) {
|
|
67
70
|
if (enabledCount <= 1) {
|
|
68
71
|
return;
|
|
@@ -1089,8 +1092,6 @@ async function loadClaudeProxyAccounts(args) {
|
|
|
1089
1092
|
await tokenStore.markEnabled(key);
|
|
1090
1093
|
logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials changed)`);
|
|
1091
1094
|
existingState.permanentlyDisabled = false;
|
|
1092
|
-
existingState.coolingUntil = undefined;
|
|
1093
|
-
existingState.backoffLevel = 0;
|
|
1094
1095
|
existingState.consecutiveRefreshFailures = 0;
|
|
1095
1096
|
}
|
|
1096
1097
|
else {
|
|
@@ -1187,8 +1188,6 @@ async function loadClaudeProxyAccounts(args) {
|
|
|
1187
1188
|
if (state.permanentlyDisabled) {
|
|
1188
1189
|
logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
|
|
1189
1190
|
}
|
|
1190
|
-
state.coolingUntil = undefined;
|
|
1191
|
-
state.backoffLevel = 0;
|
|
1192
1191
|
state.consecutiveRefreshFailures = 0;
|
|
1193
1192
|
state.permanentlyDisabled = false;
|
|
1194
1193
|
}
|
|
@@ -1513,16 +1512,10 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1513
1512
|
tracer?.end(502, Date.now() - requestStartTime);
|
|
1514
1513
|
return buildLoggedClaudeError(502, msg);
|
|
1515
1514
|
}
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
}
|
|
1520
|
-
// If no active cooldown remains (expired while retries ran), use 1s
|
|
1521
|
-
// instead of fabricating a long retry-after.
|
|
1522
|
-
const retryAfterSec = Number.isFinite(earliestRecovery)
|
|
1523
|
-
? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
|
|
1524
|
-
: 1;
|
|
1525
|
-
const errorMessage = `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`;
|
|
1515
|
+
// All accounts returned 429 after exhausting per-account retries.
|
|
1516
|
+
// Return 1s retry-after — the proxy already waited for each upstream retry-after inline.
|
|
1517
|
+
const retryAfterSec = 1;
|
|
1518
|
+
const errorMessage = `All ${orderedAccounts.length} accounts rate-limited after ${MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES + 1} attempts each (1 initial + ${MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES} retries).`;
|
|
1526
1519
|
logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
|
|
1527
1520
|
const errorBody = buildClaudeError(429, errorMessage, "overloaded_error");
|
|
1528
1521
|
tracer?.setError("rate_limit_error", errorMessage);
|
|
@@ -1552,7 +1545,6 @@ function buildClaudeAnthropicFailureResponse(args) {
|
|
|
1552
1545
|
}
|
|
1553
1546
|
async function handleAnthropicSuccessfulResponse(args) {
|
|
1554
1547
|
const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1555
|
-
clearAccountCooldown(accountState);
|
|
1556
1548
|
accountState.consecutiveRefreshFailures = 0;
|
|
1557
1549
|
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1558
1550
|
const quota = parseQuotaHeaders(response.headers);
|
|
@@ -1599,7 +1591,7 @@ async function handleAnthropicSuccessfulResponse(args) {
|
|
|
1599
1591
|
});
|
|
1600
1592
|
}
|
|
1601
1593
|
async function handleAnthropicStreamingSuccessResponse(args) {
|
|
1602
|
-
const { ctx, body, account, accountState, response, responseHeaders, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1594
|
+
const { ctx, body, account, accountState: _accountState, response, responseHeaders, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1603
1595
|
if (!response.body) {
|
|
1604
1596
|
upstreamSpan?.end();
|
|
1605
1597
|
tracer?.setError("stream_error", "No response body from upstream");
|
|
@@ -1626,8 +1618,6 @@ async function handleAnthropicStreamingSuccessResponse(args) {
|
|
|
1626
1618
|
const firstChunk = await reader.read();
|
|
1627
1619
|
if (firstChunk.done || !firstChunk.value || firstChunk.value.length === 0) {
|
|
1628
1620
|
reader.cancel();
|
|
1629
|
-
accountState.coolingUntil = Date.now() + 10_000;
|
|
1630
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
1631
1621
|
logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
|
|
1632
1622
|
tracer?.recordRetry(account.label, "empty_stream");
|
|
1633
1623
|
upstreamSpan?.end();
|
|
@@ -2186,8 +2176,6 @@ async function handleAnthropicAuthRetry(args) {
|
|
|
2186
2176
|
if (retryResp.ok) {
|
|
2187
2177
|
authRetrySucceeded = true;
|
|
2188
2178
|
accountState.consecutiveRefreshFailures = 0;
|
|
2189
|
-
accountState.backoffLevel = 0;
|
|
2190
|
-
accountState.coolingUntil = undefined;
|
|
2191
2179
|
logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
|
|
2192
2180
|
const successResponse = await handleAnthropicSuccessfulRetryResponse({
|
|
2193
2181
|
ctx,
|
|
@@ -2222,18 +2210,7 @@ async function handleAnthropicAuthRetry(args) {
|
|
|
2222
2210
|
recordAttemptError(account.label, account.type, retryStatus);
|
|
2223
2211
|
if (retryStatus === 429) {
|
|
2224
2212
|
currentSawRateLimit = true;
|
|
2225
|
-
const retryAfter = retryResp.headers.get("retry-after");
|
|
2226
|
-
const parsedRetryAfter = parseInt(retryAfter ?? "", 10);
|
|
2227
|
-
const cooldownMs = Number.isNaN(parsedRetryAfter)
|
|
2228
|
-
? 60_000
|
|
2229
|
-
: Math.max(1, parsedRetryAfter) * 1000;
|
|
2230
|
-
const cooldown = applyRateLimitCooldown({
|
|
2231
|
-
state: accountState,
|
|
2232
|
-
retryAfterMs: cooldownMs,
|
|
2233
|
-
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2234
|
-
});
|
|
2235
2213
|
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2236
|
-
recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
|
|
2237
2214
|
break;
|
|
2238
2215
|
}
|
|
2239
2216
|
if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
|
|
@@ -2288,20 +2265,15 @@ async function handleAnthropicAuthRetry(args) {
|
|
|
2288
2265
|
}
|
|
2289
2266
|
}
|
|
2290
2267
|
if (!authRetrySucceeded) {
|
|
2291
|
-
|
|
2292
|
-
if (!accountState.coolingUntil ||
|
|
2293
|
-
accountState.coolingUntil <= Date.now()) {
|
|
2294
|
-
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
2295
|
-
}
|
|
2296
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2297
|
-
}
|
|
2268
|
+
// No persistent cooldown — just move to next account for this request.
|
|
2298
2269
|
currentLastError = authRetryError;
|
|
2299
|
-
logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted,
|
|
2270
|
+
logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, rotating to next account`);
|
|
2300
2271
|
logAttempt(401, "authentication_error", authRetryError);
|
|
2301
2272
|
tracer?.setError("authentication_error", authRetryError);
|
|
2302
2273
|
tracer?.recordRetry(account.label, "auth_exhausted");
|
|
2303
2274
|
currentUpstreamSpan?.end();
|
|
2304
2275
|
currentUpstreamSpan = undefined;
|
|
2276
|
+
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2305
2277
|
}
|
|
2306
2278
|
return {
|
|
2307
2279
|
continueLoop: true,
|
|
@@ -2354,7 +2326,7 @@ function buildAnthropicTerminalErrorResponse(args) {
|
|
|
2354
2326
|
}
|
|
2355
2327
|
}
|
|
2356
2328
|
async function handleAnthropicNonOkResponse(args) {
|
|
2357
|
-
const { response, account, accountState, tracer, requestStartTime, fetchStartMs, attemptNumber, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawTransientFailure, invalidRequestFailure, maxConsecutiveRefreshFailures, } = args;
|
|
2329
|
+
const { response, account, accountState, enabledAccounts, orderedAccounts, tracer, requestStartTime, fetchStartMs, attemptNumber, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawTransientFailure, invalidRequestFailure, maxConsecutiveRefreshFailures, } = args;
|
|
2358
2330
|
let currentLastError = lastError;
|
|
2359
2331
|
let currentAuthFailureMessage = authFailureMessage;
|
|
2360
2332
|
let currentSawTransientFailure = sawTransientFailure;
|
|
@@ -2404,17 +2376,16 @@ async function handleAnthropicNonOkResponse(args) {
|
|
|
2404
2376
|
!account.refreshToken) {
|
|
2405
2377
|
recordAttemptError(account.label, account.type, response.status);
|
|
2406
2378
|
accountState.consecutiveRefreshFailures += 1;
|
|
2407
|
-
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
2408
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2409
2379
|
if (accountState.consecutiveRefreshFailures >= maxConsecutiveRefreshFailures) {
|
|
2410
2380
|
await disableAccountUntilReauth(account, accountState);
|
|
2411
2381
|
}
|
|
2412
2382
|
currentAuthFailureMessage = formatReauthMessage(account.label);
|
|
2413
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label}
|
|
2383
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label} (auth failure, no refresh token)`);
|
|
2414
2384
|
currentLastError = errBody;
|
|
2415
2385
|
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
2416
2386
|
tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
|
|
2417
2387
|
tracer?.recordRetry(account.label, "auth_no_refresh");
|
|
2388
|
+
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2418
2389
|
return {
|
|
2419
2390
|
continueLoop: true,
|
|
2420
2391
|
lastError: currentLastError,
|
|
@@ -2431,13 +2402,12 @@ async function handleAnthropicNonOkResponse(args) {
|
|
|
2431
2402
|
recordAttemptError(account.label, account.type, response.status);
|
|
2432
2403
|
currentAuthFailureMessage =
|
|
2433
2404
|
"Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
|
|
2434
|
-
|
|
2435
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2436
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
2405
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label} (auth failure, api_key)`);
|
|
2437
2406
|
currentLastError = errBody;
|
|
2438
2407
|
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
2439
2408
|
tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
|
|
2440
2409
|
tracer?.recordRetry(account.label, "auth_api_key");
|
|
2410
|
+
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2441
2411
|
return {
|
|
2442
2412
|
continueLoop: true,
|
|
2443
2413
|
lastError: currentLastError,
|
|
@@ -2805,7 +2775,7 @@ async function prepareAnthropicAccountAttempt(args) {
|
|
|
2805
2775
|
};
|
|
2806
2776
|
}
|
|
2807
2777
|
async function fetchAnthropicAccountResponse(args) {
|
|
2808
|
-
const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2778
|
+
const { url, headers, finalBodyStr, account, accountState: _accountState2, enabledAccounts: _enabledAccounts, orderedAccounts: _orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2809
2779
|
let lastError = currentLastError;
|
|
2810
2780
|
let sawRateLimit = currentSawRateLimit;
|
|
2811
2781
|
let sawNetworkError = currentSawNetworkError;
|
|
@@ -2844,36 +2814,18 @@ async function fetchAnthropicAccountResponse(args) {
|
|
|
2844
2814
|
}
|
|
2845
2815
|
if (response.status === 429) {
|
|
2846
2816
|
sawRateLimit = true;
|
|
2847
|
-
const
|
|
2848
|
-
let cooldownMs = 0;
|
|
2849
|
-
if (retryAfter) {
|
|
2850
|
-
const seconds = parseInt(retryAfter, 10);
|
|
2851
|
-
if (!Number.isNaN(seconds)) {
|
|
2852
|
-
cooldownMs = seconds * 1000;
|
|
2853
|
-
}
|
|
2854
|
-
else {
|
|
2855
|
-
const date = new Date(retryAfter);
|
|
2856
|
-
if (!Number.isNaN(date.getTime())) {
|
|
2857
|
-
cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
|
|
2858
|
-
}
|
|
2859
|
-
}
|
|
2860
|
-
}
|
|
2861
|
-
const cooldown = applyRateLimitCooldown({
|
|
2862
|
-
state: accountState,
|
|
2863
|
-
retryAfterMs: cooldownMs > 0 ? cooldownMs : undefined,
|
|
2864
|
-
capMs: RATE_LIMIT_BACKOFF_CAP_MS,
|
|
2865
|
-
});
|
|
2866
|
-
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2817
|
+
const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
|
|
2867
2818
|
recordAttemptError(account.label, account.type, 429);
|
|
2868
|
-
recordCooldown(account.label, account.type, Date.now() + cooldown.backoffMs, accountState.backoffLevel);
|
|
2869
2819
|
lastError = await response.text();
|
|
2870
|
-
logger.always(`[proxy] ← 429 account=${account.label}
|
|
2820
|
+
logger.always(`[proxy] ← 429 account=${account.label} retry-after=${retryAfterMs}ms (upstream)`);
|
|
2871
2821
|
logAttempt(429, "rate_limit_error", String(lastError));
|
|
2872
2822
|
tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
|
|
2873
2823
|
tracer?.recordRetry(account.label, "rate_limit");
|
|
2874
2824
|
currentUpstreamSpan?.end();
|
|
2875
2825
|
return {
|
|
2876
2826
|
continueLoop: true,
|
|
2827
|
+
retrySameAccount: true,
|
|
2828
|
+
retryAfterMs,
|
|
2877
2829
|
lastError,
|
|
2878
2830
|
sawRateLimit,
|
|
2879
2831
|
sawNetworkError,
|
|
@@ -2914,20 +2866,12 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
2914
2866
|
attemptNumber: 0,
|
|
2915
2867
|
};
|
|
2916
2868
|
const acctSelectionSpan = tracer?.startAccountSelection();
|
|
2917
|
-
|
|
2918
|
-
|
|
2919
|
-
|
|
2920
|
-
}
|
|
2921
|
-
// Only flag rate-limit when ALL accounts are cooling — if some are eligible,
|
|
2922
|
-
// let the actual attempt results determine sawRateLimit via real 429 responses.
|
|
2923
|
-
if (accountPartition.skipped.length > 0 &&
|
|
2924
|
-
accountPartition.eligible.length === 0) {
|
|
2925
|
-
loopState.sawRateLimit = true;
|
|
2926
|
-
loopState.lastError = `All ${accountPartition.skipped.length} accounts are cooling down`;
|
|
2927
|
-
}
|
|
2928
|
-
accountLoop: for (const account of accountPartition.eligible) {
|
|
2869
|
+
// No partition / cooldown gating — every account is always eligible.
|
|
2870
|
+
// Retries are handled inline per-account using upstream retry-after.
|
|
2871
|
+
accountLoop: for (const account of orderedAccounts) {
|
|
2929
2872
|
const accountState = getOrCreateRuntimeState(account.key);
|
|
2930
2873
|
let transientSameAccountRetries = 0;
|
|
2874
|
+
let rateLimitSameAccountRetries = 0;
|
|
2931
2875
|
while (true) {
|
|
2932
2876
|
loopState.attemptNumber += 1;
|
|
2933
2877
|
if (tracer && loopState.attemptNumber === 1 && acctSelectionSpan) {
|
|
@@ -2991,6 +2935,24 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
2991
2935
|
loopState.sawRateLimit = fetchResult.sawRateLimit;
|
|
2992
2936
|
loopState.sawNetworkError = fetchResult.sawNetworkError;
|
|
2993
2937
|
if (fetchResult.continueLoop || !fetchResult.response) {
|
|
2938
|
+
// 429 with retry-after: wait and retry same account up to 5 times
|
|
2939
|
+
if (fetchResult.retrySameAccount &&
|
|
2940
|
+
fetchResult.retryAfterMs !== undefined &&
|
|
2941
|
+
rateLimitSameAccountRetries < MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES) {
|
|
2942
|
+
rateLimitSameAccountRetries += 1;
|
|
2943
|
+
const delayMs = Math.min(fetchResult.retryAfterMs || 1_000, MAX_RATE_LIMIT_RETRY_DELAY_MS);
|
|
2944
|
+
logger.always(`[proxy] retrying same account=${account.label} after upstream 429 (${rateLimitSameAccountRetries}/${MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES}) in ${delayMs}ms`);
|
|
2945
|
+
await sleep(delayMs);
|
|
2946
|
+
continue;
|
|
2947
|
+
}
|
|
2948
|
+
// Rate-limit retries exhausted for this account — rotate
|
|
2949
|
+
if (fetchResult.retrySameAccount &&
|
|
2950
|
+
fetchResult.retryAfterMs !== undefined) {
|
|
2951
|
+
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2952
|
+
logger.always(`[proxy] exhausted ${MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES} rate-limit retries for account=${account.label}; rotating`);
|
|
2953
|
+
continue accountLoop;
|
|
2954
|
+
}
|
|
2955
|
+
// Transient error retry (network errors, 529 overloaded)
|
|
2994
2956
|
if (fetchResult.retrySameAccount &&
|
|
2995
2957
|
transientSameAccountRetries < MAX_TRANSIENT_SAME_ACCOUNT_RETRIES) {
|
|
2996
2958
|
transientSameAccountRetries += 1;
|
|
@@ -3052,6 +3014,8 @@ async function handleAnthropicRoutedClaudeRequest(args) {
|
|
|
3052
3014
|
response,
|
|
3053
3015
|
account,
|
|
3054
3016
|
accountState,
|
|
3017
|
+
enabledAccounts,
|
|
3018
|
+
orderedAccounts,
|
|
3055
3019
|
tracer,
|
|
3056
3020
|
requestStartTime,
|
|
3057
3021
|
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
@@ -3366,8 +3330,6 @@ function getOrCreateRuntimeState(accountKey) {
|
|
|
3366
3330
|
return existing;
|
|
3367
3331
|
}
|
|
3368
3332
|
const initial = {
|
|
3369
|
-
coolingUntil: undefined,
|
|
3370
|
-
backoffLevel: 0,
|
|
3371
3333
|
consecutiveRefreshFailures: 0,
|
|
3372
3334
|
permanentlyDisabled: false,
|
|
3373
3335
|
};
|
|
@@ -3376,8 +3338,6 @@ function getOrCreateRuntimeState(accountKey) {
|
|
|
3376
3338
|
}
|
|
3377
3339
|
async function disableAccountUntilReauth(account, state) {
|
|
3378
3340
|
state.permanentlyDisabled = true;
|
|
3379
|
-
state.coolingUntil = undefined;
|
|
3380
|
-
state.backoffLevel = 0;
|
|
3381
3341
|
// Decision 7 (usage): Persist disabled state to disk so it survives restarts
|
|
3382
3342
|
try {
|
|
3383
3343
|
const { tokenStore } = await import("../../auth/tokenStore.js");
|
|
@@ -574,6 +574,8 @@ export type PreparedAnthropicAccountAttempt = {
|
|
|
574
574
|
export type AnthropicUpstreamFetchResult = {
|
|
575
575
|
continueLoop: boolean;
|
|
576
576
|
retrySameAccount?: boolean;
|
|
577
|
+
/** When set, the caller should wait this many ms before retrying (from upstream retry-after). */
|
|
578
|
+
retryAfterMs?: number;
|
|
577
579
|
response?: Response;
|
|
578
580
|
lastError: unknown;
|
|
579
581
|
sawRateLimit: boolean;
|
|
@@ -589,8 +591,6 @@ export type AccountStats = {
|
|
|
589
591
|
rateLimitCount: number;
|
|
590
592
|
lastAttemptAt: number;
|
|
591
593
|
lastErrorAt?: number;
|
|
592
|
-
currentBackoffLevel: number;
|
|
593
|
-
coolingUntil?: number;
|
|
594
594
|
};
|
|
595
595
|
export type ProxyStats = {
|
|
596
596
|
startedAt: number;
|
|
@@ -639,8 +639,6 @@ export type AccountQuota = {
|
|
|
639
639
|
};
|
|
640
640
|
/** Runtime state for a proxy account. */
|
|
641
641
|
export type RuntimeAccountState = {
|
|
642
|
-
coolingUntil?: number;
|
|
643
|
-
backoffLevel: number;
|
|
644
642
|
consecutiveRefreshFailures: number;
|
|
645
643
|
permanentlyDisabled: boolean;
|
|
646
644
|
lastToken?: string;
|
|
@@ -708,14 +706,6 @@ export type ProxyTranslationPlan = {
|
|
|
708
706
|
attempts: ProxyTranslationAttempt[];
|
|
709
707
|
skipped: never[];
|
|
710
708
|
};
|
|
711
|
-
/** An account skipped during partitioning, with its cooldown info. */
|
|
712
|
-
export type CooldownSkippedAccount<T> = {
|
|
713
|
-
account: T;
|
|
714
|
-
cooldown: {
|
|
715
|
-
until: number;
|
|
716
|
-
backoffLevel: number;
|
|
717
|
-
};
|
|
718
|
-
};
|
|
719
709
|
/** Mutable readiness state tracked by the proxy process. */
|
|
720
710
|
export type ProxyReadinessState = {
|
|
721
711
|
startTimeMs: number;
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
* @see https://cheatsheetseries.owasp.org/cheatsheets/Input_Validation_Cheat_Sheet.html
|
|
11
11
|
*/
|
|
12
12
|
import type { SanitizeFileNameOptions, SanitizeDisplayNameOptions } from "../../types/index.js";
|
|
13
|
-
export type { SanitizeFileNameOptions, SanitizeDisplayNameOptions };
|
|
14
13
|
/**
|
|
15
14
|
* Sanitize a filename for safe filesystem storage.
|
|
16
15
|
* Removes characters that are invalid on various operating systems.
|
|
@@ -1,3 +1,2 @@
|
|
|
1
|
-
import { Hippocampus, type HippocampusConfig
|
|
2
|
-
export type { HippocampusConfig, StorageConfig };
|
|
1
|
+
import { Hippocampus, type HippocampusConfig } from "@juspay/hippocampus";
|
|
3
2
|
export declare function initializeHippocampus(config: HippocampusConfig): Hippocampus | null;
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import type { ClaudeSubscriptionTier, AnthropicModelMetadata } from "../types/index.js";
|
|
8
8
|
import { ModelAccessError } from "../types/index.js";
|
|
9
|
-
export type { ClaudeSubscriptionTier, AnthropicModelMetadata };
|
|
10
9
|
export { ModelAccessError };
|
|
11
10
|
/**
|
|
12
11
|
* Anthropic Claude model identifiers
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* model capabilities, and provides helper functions for tier-based access control.
|
|
6
6
|
*/
|
|
7
7
|
import { ModelAccessError } from "../types/index.js";
|
|
8
|
+
// Re-export runtime value for convenience
|
|
8
9
|
export { ModelAccessError };
|
|
9
10
|
// ============================================================================
|
|
10
11
|
// ANTHROPIC MODEL ENUM
|
package/dist/neurolink.js
CHANGED
|
@@ -47,7 +47,7 @@ import { ToolRouter } from "./mcp/routing/index.js";
|
|
|
47
47
|
import { directToolsServer } from "./mcp/servers/agent/directToolsServer.js";
|
|
48
48
|
import { inferAnnotations, isSafeToRetry } from "./mcp/toolAnnotations.js";
|
|
49
49
|
import { MCPToolRegistry } from "./mcp/toolRegistry.js";
|
|
50
|
-
import { initializeHippocampus
|
|
50
|
+
import { initializeHippocampus } from "./memory/hippocampusInitializer.js";
|
|
51
51
|
import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
|
|
52
52
|
import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
|
|
53
53
|
import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/index.js";
|
|
@@ -3,8 +3,7 @@
|
|
|
3
3
|
* Manages multiple observability exporters with circuit breaker protection
|
|
4
4
|
*/
|
|
5
5
|
import type { BaseExporter } from "./exporters/baseExporter.js";
|
|
6
|
-
import type { Sampler } from "
|
|
7
|
-
import type { ExporterHealthStatus, ExportResult, SpanData } from "../types/index.js";
|
|
6
|
+
import type { ExporterHealthStatus, ExportResult, Sampler, SpanData } from "../types/index.js";
|
|
8
7
|
/**
|
|
9
8
|
* Circuit breaker state for an exporter
|
|
10
9
|
*/
|