@juspay/neurolink 9.42.0 → 9.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/auth/anthropicOAuth.js +12 -0
- package/dist/browser/neurolink.min.js +335 -334
- package/dist/cli/commands/mcp.d.ts +6 -0
- package/dist/cli/commands/mcp.js +200 -184
- package/dist/cli/commands/proxy.js +560 -518
- package/dist/core/baseProvider.d.ts +6 -1
- package/dist/core/baseProvider.js +219 -232
- package/dist/core/factory.d.ts +3 -0
- package/dist/core/factory.js +140 -190
- package/dist/core/modules/ToolsManager.d.ts +1 -0
- package/dist/core/modules/ToolsManager.js +40 -42
- package/dist/core/toolEvents.d.ts +3 -0
- package/dist/core/toolEvents.js +7 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
- package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/evaluation/scorers/scorerRegistry.js +356 -284
- package/dist/lib/auth/anthropicOAuth.js +12 -0
- package/dist/lib/core/baseProvider.d.ts +6 -1
- package/dist/lib/core/baseProvider.js +219 -232
- package/dist/lib/core/factory.d.ts +3 -0
- package/dist/lib/core/factory.js +140 -190
- package/dist/lib/core/modules/ToolsManager.d.ts +1 -0
- package/dist/lib/core/modules/ToolsManager.js +40 -42
- package/dist/lib/core/toolEvents.d.ts +3 -0
- package/dist/lib/core/toolEvents.js +8 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +356 -284
- package/dist/lib/mcp/toolRegistry.d.ts +2 -0
- package/dist/lib/mcp/toolRegistry.js +32 -31
- package/dist/lib/neurolink.d.ts +38 -0
- package/dist/lib/neurolink.js +1890 -1707
- package/dist/lib/providers/googleAiStudio.js +0 -5
- package/dist/lib/providers/googleNativeGemini3.d.ts +4 -0
- package/dist/lib/providers/googleNativeGemini3.js +39 -1
- package/dist/lib/providers/googleVertex.d.ts +10 -0
- package/dist/lib/providers/googleVertex.js +445 -445
- package/dist/lib/providers/litellm.d.ts +1 -0
- package/dist/lib/providers/litellm.js +73 -64
- package/dist/lib/providers/ollama.js +17 -4
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +139 -140
- package/dist/lib/proxy/claudeFormat.js +14 -5
- package/dist/lib/proxy/oauthFetch.js +298 -318
- package/dist/lib/proxy/proxyConfig.js +3 -1
- package/dist/lib/proxy/proxyFetch.js +250 -222
- package/dist/lib/proxy/proxyHealth.d.ts +17 -0
- package/dist/lib/proxy/proxyHealth.js +55 -0
- package/dist/lib/proxy/requestLogger.js +140 -48
- package/dist/lib/proxy/routingPolicy.d.ts +33 -0
- package/dist/lib/proxy/routingPolicy.js +255 -0
- package/dist/lib/proxy/snapshotPersistence.d.ts +2 -0
- package/dist/lib/proxy/snapshotPersistence.js +41 -0
- package/dist/lib/proxy/sseInterceptor.js +36 -11
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +2 -1
- package/dist/lib/server/routes/claudeProxyRoutes.js +2916 -2377
- package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
- package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
- package/dist/lib/tasks/store/redisTaskStore.js +42 -17
- package/dist/lib/tasks/taskManager.d.ts +2 -0
- package/dist/lib/tasks/taskManager.js +100 -5
- package/dist/lib/telemetry/telemetryService.js +9 -5
- package/dist/lib/types/cli.d.ts +4 -0
- package/dist/lib/types/proxyTypes.d.ts +211 -1
- package/dist/lib/types/tools.d.ts +18 -0
- package/dist/lib/utils/providerHealth.d.ts +1 -0
- package/dist/lib/utils/providerHealth.js +46 -31
- package/dist/lib/utils/providerUtils.js +11 -22
- package/dist/lib/utils/schemaConversion.d.ts +1 -0
- package/dist/lib/utils/schemaConversion.js +3 -0
- package/dist/mcp/toolRegistry.d.ts +2 -0
- package/dist/mcp/toolRegistry.js +32 -31
- package/dist/neurolink.d.ts +38 -0
- package/dist/neurolink.js +1890 -1707
- package/dist/providers/googleAiStudio.js +0 -5
- package/dist/providers/googleNativeGemini3.d.ts +4 -0
- package/dist/providers/googleNativeGemini3.js +39 -1
- package/dist/providers/googleVertex.d.ts +10 -0
- package/dist/providers/googleVertex.js +445 -445
- package/dist/providers/litellm.d.ts +1 -0
- package/dist/providers/litellm.js +73 -64
- package/dist/providers/ollama.js +17 -4
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +139 -140
- package/dist/proxy/claudeFormat.js +14 -5
- package/dist/proxy/oauthFetch.js +298 -318
- package/dist/proxy/proxyConfig.js +3 -1
- package/dist/proxy/proxyFetch.js +250 -222
- package/dist/proxy/proxyHealth.d.ts +17 -0
- package/dist/proxy/proxyHealth.js +54 -0
- package/dist/proxy/requestLogger.js +140 -48
- package/dist/proxy/routingPolicy.d.ts +33 -0
- package/dist/proxy/routingPolicy.js +254 -0
- package/dist/proxy/snapshotPersistence.d.ts +2 -0
- package/dist/proxy/snapshotPersistence.js +40 -0
- package/dist/proxy/sseInterceptor.js +36 -11
- package/dist/server/routes/claudeProxyRoutes.d.ts +2 -1
- package/dist/server/routes/claudeProxyRoutes.js +2916 -2377
- package/dist/services/server/ai/observability/instrumentation.js +194 -218
- package/dist/tasks/backends/bullmqBackend.js +24 -18
- package/dist/tasks/store/redisTaskStore.js +42 -17
- package/dist/tasks/taskManager.d.ts +2 -0
- package/dist/tasks/taskManager.js +100 -5
- package/dist/telemetry/telemetryService.js +9 -5
- package/dist/types/cli.d.ts +4 -0
- package/dist/types/proxyTypes.d.ts +211 -1
- package/dist/types/tools.d.ts +18 -0
- package/dist/utils/providerHealth.d.ts +1 -0
- package/dist/utils/providerHealth.js +46 -31
- package/dist/utils/providerUtils.js +12 -22
- package/dist/utils/schemaConversion.d.ts +1 -0
- package/dist/utils/schemaConversion.js +3 -0
- package/package.json +3 -2
- package/scripts/observability/check-proxy-telemetry.mjs +1 -1
- package/scripts/observability/manage-local-openobserve.sh +36 -5
|
@@ -28,6 +28,9 @@ let otelResolveAttempts = 0;
|
|
|
28
28
|
const MAX_RESOLVE_ATTEMPTS = 10;
|
|
29
29
|
/** Maximum body chunk size emitted to OTLP logs. */
|
|
30
30
|
const BODY_OTLP_CHUNK_SIZE = 16_000;
|
|
31
|
+
/** Maximum redacted body bytes persisted per capture entry. */
|
|
32
|
+
const MAX_CAPTURED_BODY_BYTES = 1024 * 1024;
|
|
33
|
+
const BODY_TRUNCATION_MARKER = "\n...[TRUNCATED]";
|
|
31
34
|
const gzip = promisify(gzipCallback);
|
|
32
35
|
/** Headers whose values must always be redacted. */
|
|
33
36
|
const SENSITIVE_HEADER_NAMES = new Set([
|
|
@@ -262,7 +265,117 @@ function sanitizePhase(phase) {
|
|
|
262
265
|
function sha256(value) {
|
|
263
266
|
return createHash("sha256").update(value).digest("hex");
|
|
264
267
|
}
|
|
265
|
-
|
|
268
|
+
function utf8ByteLength(value) {
|
|
269
|
+
return Buffer.byteLength(value, "utf8");
|
|
270
|
+
}
|
|
271
|
+
function truncateUtf8String(input, maxBytes, marker = BODY_TRUNCATION_MARKER) {
|
|
272
|
+
const inputBytes = utf8ByteLength(input);
|
|
273
|
+
if (inputBytes <= maxBytes) {
|
|
274
|
+
return { value: input, bytes: inputBytes, truncated: false };
|
|
275
|
+
}
|
|
276
|
+
const markerBytes = utf8ByteLength(marker);
|
|
277
|
+
if (maxBytes <= markerBytes) {
|
|
278
|
+
return { value: marker, bytes: markerBytes, truncated: true };
|
|
279
|
+
}
|
|
280
|
+
let value = "";
|
|
281
|
+
let bytes = 0;
|
|
282
|
+
for (const char of input) {
|
|
283
|
+
const charBytes = utf8ByteLength(char);
|
|
284
|
+
if (bytes + charBytes + markerBytes > maxBytes) {
|
|
285
|
+
break;
|
|
286
|
+
}
|
|
287
|
+
value += char;
|
|
288
|
+
bytes += charBytes;
|
|
289
|
+
}
|
|
290
|
+
const truncatedValue = `${value}${marker}`;
|
|
291
|
+
return {
|
|
292
|
+
value: truncatedValue,
|
|
293
|
+
bytes: utf8ByteLength(truncatedValue),
|
|
294
|
+
truncated: true,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
function splitUtf8StringByBytes(input, maxBytes) {
|
|
298
|
+
if (!input) {
|
|
299
|
+
return [""];
|
|
300
|
+
}
|
|
301
|
+
const chunks = [];
|
|
302
|
+
let currentChunk = "";
|
|
303
|
+
let currentBytes = 0;
|
|
304
|
+
for (const char of input) {
|
|
305
|
+
const charBytes = utf8ByteLength(char);
|
|
306
|
+
if (currentChunk && currentBytes + charBytes > maxBytes) {
|
|
307
|
+
chunks.push(currentChunk);
|
|
308
|
+
currentChunk = char;
|
|
309
|
+
currentBytes = charBytes;
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
currentChunk += char;
|
|
313
|
+
currentBytes += charBytes;
|
|
314
|
+
}
|
|
315
|
+
if (currentChunk) {
|
|
316
|
+
chunks.push(currentChunk);
|
|
317
|
+
}
|
|
318
|
+
return chunks;
|
|
319
|
+
}
|
|
320
|
+
function prepareRedactedBody(body) {
|
|
321
|
+
const redacted = redactBody(body);
|
|
322
|
+
if (redacted === undefined) {
|
|
323
|
+
return { truncated: false };
|
|
324
|
+
}
|
|
325
|
+
return truncateUtf8String(redacted, MAX_CAPTURED_BODY_BYTES);
|
|
326
|
+
}
|
|
327
|
+
function collectManagedLogFiles(rootDir) {
|
|
328
|
+
const managedFiles = [];
|
|
329
|
+
const walk = (directory) => {
|
|
330
|
+
for (const entry of readdirSync(directory, { withFileTypes: true })) {
|
|
331
|
+
const entryPath = join(directory, entry.name);
|
|
332
|
+
if (entry.isDirectory()) {
|
|
333
|
+
walk(entryPath);
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
const isTopLevelProxyLog = directory === rootDir &&
|
|
337
|
+
/^proxy(?:-attempts|-debug)?-.*\.jsonl$/.test(entry.name);
|
|
338
|
+
const isBodyArtifact = entry.name.endsWith(".json.gz") &&
|
|
339
|
+
entryPath.includes(`${join(rootDir, "bodies")}`);
|
|
340
|
+
if (!isTopLevelProxyLog && !isBodyArtifact) {
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
try {
|
|
344
|
+
const stat = statSync(entryPath);
|
|
345
|
+
managedFiles.push({
|
|
346
|
+
path: entryPath,
|
|
347
|
+
mtime: stat.mtimeMs,
|
|
348
|
+
size: stat.size,
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
catch {
|
|
352
|
+
// Non-fatal
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
walk(rootDir);
|
|
357
|
+
return managedFiles;
|
|
358
|
+
}
|
|
359
|
+
function pruneEmptyDirectories(directory, stopAt) {
|
|
360
|
+
if (!existsSync(directory)) {
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
try {
|
|
364
|
+
const entries = readdirSync(directory, { withFileTypes: true });
|
|
365
|
+
for (const entry of entries) {
|
|
366
|
+
if (entry.isDirectory()) {
|
|
367
|
+
pruneEmptyDirectories(join(directory, entry.name), stopAt);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
if (directory !== stopAt && readdirSync(directory).length === 0) {
|
|
371
|
+
rmSync(directory, { recursive: true, force: true });
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
catch {
|
|
375
|
+
// Non-fatal
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
async function writeBodyArtifact(entry, redactedHeaders, redactedBody, bodyTruncated) {
|
|
266
379
|
if (!logDir || redactedBody === undefined) {
|
|
267
380
|
return {};
|
|
268
381
|
}
|
|
@@ -299,9 +412,10 @@ async function writeBodyArtifact(entry, redactedHeaders, redactedBody) {
|
|
|
299
412
|
return {
|
|
300
413
|
bodyPath,
|
|
301
414
|
bodySha256: sha256(redactedBody),
|
|
302
|
-
redactedBodyBytes:
|
|
415
|
+
redactedBodyBytes: utf8ByteLength(redactedBody),
|
|
303
416
|
storedFileBytes: compressed.byteLength,
|
|
304
417
|
redactedBody,
|
|
418
|
+
bodyTruncated,
|
|
305
419
|
};
|
|
306
420
|
}
|
|
307
421
|
function emitOtlpBodyLogRecord(entry, stored) {
|
|
@@ -311,9 +425,10 @@ function emitOtlpBodyLogRecord(entry, stored) {
|
|
|
311
425
|
return;
|
|
312
426
|
}
|
|
313
427
|
const otelLogger = provider.getLogger("neurolink-proxy-bodies", "1.0.0");
|
|
314
|
-
const
|
|
428
|
+
const chunks = splitUtf8StringByBytes(stored.redactedBody, BODY_OTLP_CHUNK_SIZE);
|
|
429
|
+
const totalChunks = Math.max(1, chunks.length);
|
|
315
430
|
for (let chunkIndex = 0; chunkIndex < totalChunks; chunkIndex++) {
|
|
316
|
-
const chunk =
|
|
431
|
+
const chunk = chunks[chunkIndex] ?? "";
|
|
317
432
|
otelLogger.emit({
|
|
318
433
|
severityNumber: (entry.responseStatus ?? 0) >= 400
|
|
319
434
|
? SeverityNumber.WARN
|
|
@@ -347,6 +462,9 @@ function emitOtlpBodyLogRecord(entry, stored) {
|
|
|
347
462
|
...(stored.redactedBodyBytes !== undefined && {
|
|
348
463
|
"body.bytes": stored.redactedBodyBytes,
|
|
349
464
|
}),
|
|
465
|
+
...(stored.bodyTruncated !== undefined && {
|
|
466
|
+
"body.truncated": stored.bodyTruncated,
|
|
467
|
+
}),
|
|
350
468
|
...(entry.traceId && { "trace.id": entry.traceId }),
|
|
351
469
|
...(entry.spanId && { "span.id": entry.spanId }),
|
|
352
470
|
...(entry.metadata && {
|
|
@@ -370,12 +488,18 @@ export async function logBodyCapture(entry) {
|
|
|
370
488
|
? { traceId: entry.traceId, spanId: entry.spanId }
|
|
371
489
|
: bridge.getCurrentTraceContext();
|
|
372
490
|
const redactedHeaders = redactHeaders(entry.headers);
|
|
373
|
-
|
|
491
|
+
const preparedBody = prepareRedactedBody(entry.body);
|
|
492
|
+
let stored;
|
|
374
493
|
try {
|
|
375
|
-
stored = await writeBodyArtifact(entry, redactedHeaders,
|
|
494
|
+
stored = await writeBodyArtifact(entry, redactedHeaders, preparedBody.value, preparedBody.truncated);
|
|
376
495
|
}
|
|
377
|
-
catch {
|
|
378
|
-
|
|
496
|
+
catch (writeError) {
|
|
497
|
+
logger.warn("[RequestLogger] writeBodyArtifact failed, falling back to in-memory body for OTLP", { error: writeError });
|
|
498
|
+
stored = {
|
|
499
|
+
redactedBody: preparedBody.value,
|
|
500
|
+
redactedBodyBytes: preparedBody.bytes,
|
|
501
|
+
bodyTruncated: preparedBody.truncated,
|
|
502
|
+
};
|
|
379
503
|
}
|
|
380
504
|
const dateStr = new Date(entry.timestamp).toISOString().split("T")[0];
|
|
381
505
|
const logFile = join(logDir, `proxy-debug-${dateStr}.jsonl`);
|
|
@@ -396,8 +520,9 @@ export async function logBodyCapture(entry) {
|
|
|
396
520
|
bodyPath: stored.bodyPath,
|
|
397
521
|
bodySha256: stored.bodySha256,
|
|
398
522
|
observedBodyBytes: entry.bodySize,
|
|
399
|
-
redactedBodyBytes: stored.redactedBodyBytes,
|
|
523
|
+
redactedBodyBytes: stored.redactedBodyBytes ?? preparedBody.bytes,
|
|
400
524
|
storedFileBytes: stored.storedFileBytes,
|
|
525
|
+
bodyTruncated: stored.bodyTruncated ?? preparedBody.truncated,
|
|
401
526
|
metadata: entry.metadata,
|
|
402
527
|
};
|
|
403
528
|
if (traceCtx) {
|
|
@@ -497,20 +622,7 @@ export function cleanupLogs(maxAgeDays = 7, maxSizeMb = 500) {
|
|
|
497
622
|
}
|
|
498
623
|
try {
|
|
499
624
|
const activeLogDir = logDir;
|
|
500
|
-
const files =
|
|
501
|
-
.filter((f) => (f.startsWith("proxy-") || f.startsWith("proxy-attempts-")) &&
|
|
502
|
-
f.endsWith(".jsonl"))
|
|
503
|
-
.map((f) => {
|
|
504
|
-
const filePath = join(activeLogDir, f);
|
|
505
|
-
const stat = statSync(filePath);
|
|
506
|
-
return {
|
|
507
|
-
name: f,
|
|
508
|
-
path: filePath,
|
|
509
|
-
mtime: stat.mtimeMs,
|
|
510
|
-
size: stat.size,
|
|
511
|
-
};
|
|
512
|
-
})
|
|
513
|
-
.sort((a, b) => a.mtime - b.mtime); // oldest first
|
|
625
|
+
const files = collectManagedLogFiles(activeLogDir).sort((a, b) => a.mtime - b.mtime); // oldest first
|
|
514
626
|
const cutoff = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000;
|
|
515
627
|
let deletedCount = 0;
|
|
516
628
|
let freedBytes = 0;
|
|
@@ -528,34 +640,11 @@ export function cleanupLogs(maxAgeDays = 7, maxSizeMb = 500) {
|
|
|
528
640
|
}
|
|
529
641
|
const bodiesDir = join(logDir, "bodies");
|
|
530
642
|
if (existsSync(bodiesDir)) {
|
|
531
|
-
|
|
532
|
-
const bodyPath = join(bodiesDir, entry);
|
|
533
|
-
try {
|
|
534
|
-
if (statSync(bodyPath).mtimeMs < cutoff) {
|
|
535
|
-
rmSync(bodyPath, { recursive: true, force: true });
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
catch {
|
|
539
|
-
// Non-fatal
|
|
540
|
-
}
|
|
541
|
-
}
|
|
542
|
-
}
|
|
543
|
-
// Include body artifacts in total size calculation
|
|
544
|
-
const bodiesDirForSize = join(logDir, "bodies");
|
|
545
|
-
let bodiesSize = 0;
|
|
546
|
-
if (existsSync(bodiesDirForSize)) {
|
|
547
|
-
for (const entry of readdirSync(bodiesDirForSize)) {
|
|
548
|
-
try {
|
|
549
|
-
bodiesSize += statSync(join(bodiesDirForSize, entry)).size;
|
|
550
|
-
}
|
|
551
|
-
catch {
|
|
552
|
-
// Non-fatal
|
|
553
|
-
}
|
|
554
|
-
}
|
|
643
|
+
pruneEmptyDirectories(bodiesDir, bodiesDir);
|
|
555
644
|
}
|
|
556
645
|
// Pass 2: if total size exceeds maxSizeMb, delete oldest until under limit
|
|
557
646
|
const maxBytes = maxSizeMb * 1024 * 1024;
|
|
558
|
-
let totalSize = remaining.reduce((sum, f) => sum + f.size, 0)
|
|
647
|
+
let totalSize = remaining.reduce((sum, f) => sum + f.size, 0);
|
|
559
648
|
while (totalSize > maxBytes && remaining.length > 0) {
|
|
560
649
|
const oldest = remaining.shift();
|
|
561
650
|
if (!oldest) {
|
|
@@ -566,6 +655,9 @@ export function cleanupLogs(maxAgeDays = 7, maxSizeMb = 500) {
|
|
|
566
655
|
deletedCount++;
|
|
567
656
|
freedBytes += oldest.size;
|
|
568
657
|
}
|
|
658
|
+
if (existsSync(bodiesDir)) {
|
|
659
|
+
pruneEmptyDirectories(bodiesDir, bodiesDir);
|
|
660
|
+
}
|
|
569
661
|
if (deletedCount > 0) {
|
|
570
662
|
logger.info(`[proxy] log cleanup: deleted ${deletedCount} file(s), freed ${(freedBytes / 1024 / 1024).toFixed(1)} MB`);
|
|
571
663
|
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, FallbackEntry, ParsedClaudeRequest, ProxyTranslationAttempt, ProxyTranslationPlan, RuntimeAccountState } from "../types/index.js";
|
|
2
|
+
export type { ClaudeProxyModelTier, ClaudeProxyRequestClass, ClaudeProxyRequestProfile, CooldownScope, CooldownSkippedAccount, FallbackEligibilityDecision, ProxyTranslationAttempt, ProxyTranslationPlan, };
|
|
3
|
+
export declare function inferClaudeProxyModelTier(modelName: string): ClaudeProxyModelTier;
|
|
4
|
+
export declare function classifyClaudeProxyRequest(requestedModel: string, parsed: ParsedClaudeRequest): ClaudeProxyRequestProfile;
|
|
5
|
+
export declare function getRequestClassCooldownKey(profile: ClaudeProxyRequestProfile): string;
|
|
6
|
+
export declare function getModelTierCooldownKey(profile: ClaudeProxyRequestProfile): string;
|
|
7
|
+
export declare function evaluateFallbackEligibility(profile: ClaudeProxyRequestProfile, candidate: {
|
|
8
|
+
provider?: string;
|
|
9
|
+
model?: string;
|
|
10
|
+
}): FallbackEligibilityDecision;
|
|
11
|
+
export declare function buildProxyTranslationPlan(primary: {
|
|
12
|
+
provider: string;
|
|
13
|
+
model?: string;
|
|
14
|
+
}, fallbackChain: FallbackEntry[], requestedModel: string, parsed: ParsedClaudeRequest): ProxyTranslationPlan;
|
|
15
|
+
export declare function summarizeSkippedFallbacks(plan: Pick<ProxyTranslationPlan, "profile" | "skipped">): string | null;
|
|
16
|
+
export declare function getActiveCooldownScope(state: RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): CooldownScope | null;
|
|
17
|
+
export declare function partitionAccountsByCooldown<T extends {
|
|
18
|
+
key: string;
|
|
19
|
+
}>(accounts: T[], getState: (account: T) => RuntimeAccountState, profile: ClaudeProxyRequestProfile, now?: number): {
|
|
20
|
+
eligible: T[];
|
|
21
|
+
skipped: CooldownSkippedAccount<T>[];
|
|
22
|
+
};
|
|
23
|
+
export declare function applyRateLimitCooldownScope(args: {
|
|
24
|
+
state: RuntimeAccountState;
|
|
25
|
+
profile: ClaudeProxyRequestProfile;
|
|
26
|
+
retryAfterMs?: number;
|
|
27
|
+
now?: number;
|
|
28
|
+
capMs: number;
|
|
29
|
+
}): {
|
|
30
|
+
backoffMs: number;
|
|
31
|
+
requestClassKey: string;
|
|
32
|
+
modelTierKey: string;
|
|
33
|
+
};
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
const STREAMING_CONVERSATIONAL_TOOL_THRESHOLD = 4;
|
|
2
|
+
const STRONG_TOOL_FIDELITY_THRESHOLD = 8;
|
|
3
|
+
const HIGH_TOOL_COUNT_THRESHOLD = 24;
|
|
4
|
+
const DEFAULT_COOLDOWN_FLOOR_MS = 1_000;
|
|
5
|
+
const HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS = 120_000;
|
|
6
|
+
const HIGH_FIDELITY_COOLDOWN_FLOOR_MS = 300_000;
|
|
7
|
+
export function inferClaudeProxyModelTier(modelName) {
|
|
8
|
+
const normalized = modelName.toLowerCase();
|
|
9
|
+
if (normalized.includes("opus")) {
|
|
10
|
+
return "opus";
|
|
11
|
+
}
|
|
12
|
+
if (normalized.includes("sonnet")) {
|
|
13
|
+
return "sonnet";
|
|
14
|
+
}
|
|
15
|
+
if (normalized.includes("haiku")) {
|
|
16
|
+
return "haiku";
|
|
17
|
+
}
|
|
18
|
+
return "other";
|
|
19
|
+
}
|
|
20
|
+
function detectToolHistory(parsed) {
|
|
21
|
+
return parsed.conversationMessages.some((message) => {
|
|
22
|
+
return (message.content.includes("[tool_use:") ||
|
|
23
|
+
message.content.includes("[tool_result:"));
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
export function classifyClaudeProxyRequest(requestedModel, parsed) {
|
|
27
|
+
const toolCount = Object.keys(parsed.tools).length;
|
|
28
|
+
const hasImages = parsed.images.length > 0;
|
|
29
|
+
const hasThinking = !!parsed.thinkingConfig?.enabled;
|
|
30
|
+
const hasToolHistory = detectToolHistory(parsed);
|
|
31
|
+
const requiresSpecificTool = !!parsed.toolChoiceName;
|
|
32
|
+
const requiresToolUse = parsed.toolChoice === "required" || requiresSpecificTool || hasToolHistory;
|
|
33
|
+
const requiresStrongToolFidelity = toolCount >= STRONG_TOOL_FIDELITY_THRESHOLD ||
|
|
34
|
+
requiresSpecificTool ||
|
|
35
|
+
hasToolHistory;
|
|
36
|
+
const isHighToolCountNonStream = !parsed.stream && toolCount >= HIGH_TOOL_COUNT_THRESHOLD;
|
|
37
|
+
const isStreamingConversational = parsed.stream &&
|
|
38
|
+
!hasImages &&
|
|
39
|
+
toolCount <= STREAMING_CONVERSATIONAL_TOOL_THRESHOLD &&
|
|
40
|
+
!requiresStrongToolFidelity;
|
|
41
|
+
const classes = [];
|
|
42
|
+
if (hasImages) {
|
|
43
|
+
classes.push("multimodal");
|
|
44
|
+
}
|
|
45
|
+
if (isHighToolCountNonStream) {
|
|
46
|
+
classes.push("high-tool-count-non-stream-structured");
|
|
47
|
+
}
|
|
48
|
+
if (requiresStrongToolFidelity) {
|
|
49
|
+
classes.push("strong-tool-fidelity");
|
|
50
|
+
}
|
|
51
|
+
if (isStreamingConversational) {
|
|
52
|
+
classes.push("streaming-conversational");
|
|
53
|
+
}
|
|
54
|
+
if (classes.length === 0) {
|
|
55
|
+
classes.push("standard");
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
requestedModel,
|
|
59
|
+
modelTier: inferClaudeProxyModelTier(requestedModel),
|
|
60
|
+
primaryClass: classes[0],
|
|
61
|
+
classes,
|
|
62
|
+
stream: parsed.stream,
|
|
63
|
+
toolCount,
|
|
64
|
+
hasImages,
|
|
65
|
+
hasThinking,
|
|
66
|
+
hasToolHistory,
|
|
67
|
+
requiresToolUse,
|
|
68
|
+
requiresSpecificTool,
|
|
69
|
+
requiresStrongToolFidelity,
|
|
70
|
+
isHighToolCountNonStream,
|
|
71
|
+
isStreamingConversational,
|
|
72
|
+
isMultimodal: hasImages,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function getRequestClassCooldownKey(profile) {
|
|
76
|
+
return `${profile.primaryClass}:${profile.requestedModel.toLowerCase()}`;
|
|
77
|
+
}
|
|
78
|
+
export function getModelTierCooldownKey(profile) {
|
|
79
|
+
return profile.modelTier;
|
|
80
|
+
}
|
|
81
|
+
function getQualityGuardReason(profile, provider, _model) {
|
|
82
|
+
// Only gate auto-provider fallback (no explicit provider).
|
|
83
|
+
// Configured fallback-chain entries are always allowed through —
|
|
84
|
+
// let them attempt the request and fail naturally if the provider
|
|
85
|
+
// cannot handle it.
|
|
86
|
+
if (!provider) {
|
|
87
|
+
if (profile.modelTier === "opus" ||
|
|
88
|
+
profile.requiresStrongToolFidelity ||
|
|
89
|
+
profile.isHighToolCountNonStream) {
|
|
90
|
+
return "auto-provider fallback is disabled for requests that require contract preservation";
|
|
91
|
+
}
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
export function evaluateFallbackEligibility(profile, candidate) {
|
|
97
|
+
const policyBlockReason = getQualityGuardReason(profile, candidate.provider, candidate.model);
|
|
98
|
+
if (policyBlockReason) {
|
|
99
|
+
return {
|
|
100
|
+
provider: candidate.provider,
|
|
101
|
+
model: candidate.model,
|
|
102
|
+
eligible: false,
|
|
103
|
+
reason: policyBlockReason,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
return {
|
|
107
|
+
provider: candidate.provider,
|
|
108
|
+
model: candidate.model,
|
|
109
|
+
eligible: true,
|
|
110
|
+
reason: "eligible",
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
export function buildProxyTranslationPlan(primary, fallbackChain, requestedModel, parsed) {
|
|
114
|
+
const profile = classifyClaudeProxyRequest(requestedModel, parsed);
|
|
115
|
+
const attempts = [
|
|
116
|
+
{
|
|
117
|
+
provider: primary.provider,
|
|
118
|
+
model: primary.model,
|
|
119
|
+
label: `${primary.provider}/${primary.model ?? "unknown"}`,
|
|
120
|
+
},
|
|
121
|
+
];
|
|
122
|
+
const skipped = [];
|
|
123
|
+
for (const fallback of fallbackChain) {
|
|
124
|
+
if (fallback.provider === primary.provider &&
|
|
125
|
+
fallback.model === primary.model) {
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
const decision = evaluateFallbackEligibility(profile, fallback);
|
|
129
|
+
if (!decision.eligible) {
|
|
130
|
+
skipped.push(decision);
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
attempts.push({
|
|
134
|
+
provider: fallback.provider,
|
|
135
|
+
model: fallback.model,
|
|
136
|
+
label: `${fallback.provider}/${fallback.model}`,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
if (fallbackChain.length === 0) {
|
|
140
|
+
const autoDecision = evaluateFallbackEligibility(profile, {});
|
|
141
|
+
if (autoDecision.eligible) {
|
|
142
|
+
attempts.push({ label: "auto-provider" });
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
skipped.push(autoDecision);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return {
|
|
149
|
+
profile,
|
|
150
|
+
attempts,
|
|
151
|
+
skipped,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
export function summarizeSkippedFallbacks(plan) {
|
|
155
|
+
if (plan.skipped.length === 0) {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
const summary = plan.skipped
|
|
159
|
+
.map((decision) => {
|
|
160
|
+
const label = decision.provider
|
|
161
|
+
? `${decision.provider}/${decision.model ?? "unknown"}`
|
|
162
|
+
: "auto-provider";
|
|
163
|
+
return `${label}: ${decision.reason}`;
|
|
164
|
+
})
|
|
165
|
+
.join("; ");
|
|
166
|
+
return `Fallback policy preserved the requested ${plan.profile.primaryClass} contract by skipping ineligible targets. ${summary}`;
|
|
167
|
+
}
|
|
168
|
+
export function getActiveCooldownScope(state, profile, now = Date.now()) {
|
|
169
|
+
let longest = null;
|
|
170
|
+
const requestClassKey = getRequestClassCooldownKey(profile);
|
|
171
|
+
const requestClassUntil = state.requestClassCooldowns?.[requestClassKey] ?? undefined;
|
|
172
|
+
if (requestClassUntil && requestClassUntil > now) {
|
|
173
|
+
longest = {
|
|
174
|
+
scope: "request_class",
|
|
175
|
+
key: requestClassKey,
|
|
176
|
+
until: requestClassUntil,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
const modelTierKey = getModelTierCooldownKey(profile);
|
|
180
|
+
const modelTierUntil = state.modelTierCooldowns?.[modelTierKey] ?? undefined;
|
|
181
|
+
if (modelTierUntil &&
|
|
182
|
+
modelTierUntil > now &&
|
|
183
|
+
modelTierUntil > (longest?.until ?? 0)) {
|
|
184
|
+
longest = {
|
|
185
|
+
scope: "model_tier",
|
|
186
|
+
key: modelTierKey,
|
|
187
|
+
until: modelTierUntil,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
if (state.coolingUntil &&
|
|
191
|
+
state.coolingUntil > now &&
|
|
192
|
+
state.coolingUntil > (longest?.until ?? 0)) {
|
|
193
|
+
longest = {
|
|
194
|
+
scope: "generic",
|
|
195
|
+
key: "generic",
|
|
196
|
+
until: state.coolingUntil,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
return longest;
|
|
200
|
+
}
|
|
201
|
+
export function partitionAccountsByCooldown(accounts, getState, profile, now = Date.now()) {
|
|
202
|
+
const eligible = [];
|
|
203
|
+
const skipped = [];
|
|
204
|
+
for (const account of accounts) {
|
|
205
|
+
const cooldown = getActiveCooldownScope(getState(account), profile, now);
|
|
206
|
+
if (cooldown) {
|
|
207
|
+
skipped.push({ account, cooldown });
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
eligible.push(account);
|
|
211
|
+
}
|
|
212
|
+
return {
|
|
213
|
+
eligible,
|
|
214
|
+
skipped,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
export function applyRateLimitCooldownScope(args) {
|
|
218
|
+
const now = args.now ?? Date.now();
|
|
219
|
+
const requestClassKey = getRequestClassCooldownKey(args.profile);
|
|
220
|
+
const modelTierKey = getModelTierCooldownKey(args.profile);
|
|
221
|
+
const rcBackoffLevels = args.state.requestClassBackoffLevels ?? {};
|
|
222
|
+
const mtBackoffLevels = args.state.modelTierBackoffLevels ?? {};
|
|
223
|
+
const scopedBackoffLevel = Math.max(rcBackoffLevels[requestClassKey] ?? 0, mtBackoffLevels[modelTierKey] ?? 0);
|
|
224
|
+
const floorMs = args.profile.modelTier === "opus" || args.profile.requiresStrongToolFidelity
|
|
225
|
+
? HIGH_FIDELITY_COOLDOWN_FLOOR_MS
|
|
226
|
+
: args.profile.isHighToolCountNonStream
|
|
227
|
+
? HIGH_TOOL_COUNT_COOLDOWN_FLOOR_MS
|
|
228
|
+
: DEFAULT_COOLDOWN_FLOOR_MS;
|
|
229
|
+
const baseCooldownMs = Math.max(args.retryAfterMs ?? 0, floorMs);
|
|
230
|
+
const backoffMs = Math.min(baseCooldownMs * 2 ** scopedBackoffLevel, args.capMs);
|
|
231
|
+
const until = now + backoffMs;
|
|
232
|
+
args.state.requestClassCooldowns = {
|
|
233
|
+
...(args.state.requestClassCooldowns ?? {}),
|
|
234
|
+
[requestClassKey]: Math.max(args.state.requestClassCooldowns?.[requestClassKey] ?? 0, until),
|
|
235
|
+
};
|
|
236
|
+
args.state.modelTierCooldowns = {
|
|
237
|
+
...(args.state.modelTierCooldowns ?? {}),
|
|
238
|
+
[modelTierKey]: Math.max(args.state.modelTierCooldowns?.[modelTierKey] ?? 0, until),
|
|
239
|
+
};
|
|
240
|
+
args.state.requestClassBackoffLevels = {
|
|
241
|
+
...rcBackoffLevels,
|
|
242
|
+
[requestClassKey]: (rcBackoffLevels[requestClassKey] ?? 0) + 1,
|
|
243
|
+
};
|
|
244
|
+
args.state.modelTierBackoffLevels = {
|
|
245
|
+
...mtBackoffLevels,
|
|
246
|
+
[modelTierKey]: (mtBackoffLevels[modelTierKey] ?? 0) + 1,
|
|
247
|
+
};
|
|
248
|
+
args.state.backoffLevel += 1;
|
|
249
|
+
return {
|
|
250
|
+
backoffMs,
|
|
251
|
+
requestClassKey,
|
|
252
|
+
modelTierKey,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { mkdir, rename, rm, writeFile } from "node:fs/promises";
|
|
3
|
+
import { basename, dirname, join } from "node:path";
|
|
4
|
+
const writeLocks = new Map();
|
|
5
|
+
async function writeSnapshotFile(targetPath, payload, mode) {
|
|
6
|
+
const dir = dirname(targetPath);
|
|
7
|
+
const baseName = basename(targetPath);
|
|
8
|
+
await mkdir(dir, { recursive: true });
|
|
9
|
+
const tempPath = join(dir, `.${baseName}.${process.pid}.${randomUUID()}.tmp`);
|
|
10
|
+
try {
|
|
11
|
+
await writeFile(tempPath, payload, { mode });
|
|
12
|
+
await rename(tempPath, targetPath);
|
|
13
|
+
}
|
|
14
|
+
finally {
|
|
15
|
+
await rm(tempPath, { force: true }).catch(() => {
|
|
16
|
+
// Best-effort cleanup only.
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export async function writeJsonSnapshotAtomically(targetPath, data, mode = 0o600) {
|
|
21
|
+
const payload = JSON.stringify(data, null, 2);
|
|
22
|
+
const previous = writeLocks.get(targetPath) ?? Promise.resolve();
|
|
23
|
+
const next = previous
|
|
24
|
+
.catch(() => {
|
|
25
|
+
// Preserve the queue even if a previous write failed.
|
|
26
|
+
})
|
|
27
|
+
.then(() => writeSnapshotFile(targetPath, payload, mode));
|
|
28
|
+
writeLocks.set(targetPath, next);
|
|
29
|
+
try {
|
|
30
|
+
await next;
|
|
31
|
+
}
|
|
32
|
+
finally {
|
|
33
|
+
if (writeLocks.get(targetPath) === next) {
|
|
34
|
+
writeLocks.delete(targetPath);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
export function clearSnapshotWriteLocksForTests() {
|
|
39
|
+
writeLocks.clear();
|
|
40
|
+
}
|
|
@@ -91,31 +91,52 @@ function createAccumulator(captureRawText) {
|
|
|
91
91
|
eventLogTruncated: false,
|
|
92
92
|
};
|
|
93
93
|
}
|
|
94
|
-
function
|
|
95
|
-
|
|
94
|
+
function utf8ByteLength(input) {
|
|
95
|
+
return Buffer.byteLength(input, "utf8");
|
|
96
|
+
}
|
|
97
|
+
function truncateUtf8String(input, maxBytes) {
|
|
98
|
+
if (utf8ByteLength(input) <= maxBytes) {
|
|
96
99
|
return input;
|
|
97
100
|
}
|
|
98
|
-
|
|
101
|
+
const markerBytes = utf8ByteLength(TRUNCATION_MARKER);
|
|
102
|
+
if (maxBytes <= 0 || maxBytes < markerBytes) {
|
|
103
|
+
return "";
|
|
104
|
+
}
|
|
105
|
+
let output = "";
|
|
106
|
+
let usedBytes = 0;
|
|
107
|
+
for (const char of input) {
|
|
108
|
+
const charBytes = utf8ByteLength(char);
|
|
109
|
+
if (usedBytes + charBytes + markerBytes > maxBytes) {
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
output += char;
|
|
113
|
+
usedBytes += charBytes;
|
|
114
|
+
}
|
|
115
|
+
return `${output}${TRUNCATION_MARKER}`;
|
|
116
|
+
}
|
|
117
|
+
function truncateString(input, maxBytes) {
|
|
118
|
+
return truncateUtf8String(input, maxBytes);
|
|
99
119
|
}
|
|
100
120
|
function appendCappedFragment(current, fragment, currentBytes, maxBytes) {
|
|
121
|
+
const fragmentBytes = utf8ByteLength(fragment);
|
|
101
122
|
if (currentBytes >= maxBytes) {
|
|
102
123
|
return {
|
|
103
124
|
value: current && current.endsWith(TRUNCATION_MARKER)
|
|
104
125
|
? current
|
|
105
126
|
: `${current ?? ""}${TRUNCATION_MARKER}`,
|
|
106
|
-
nextBytes: currentBytes +
|
|
127
|
+
nextBytes: currentBytes + fragmentBytes,
|
|
107
128
|
};
|
|
108
129
|
}
|
|
109
130
|
const remainingBytes = maxBytes - currentBytes;
|
|
110
|
-
const nextBytes = currentBytes +
|
|
111
|
-
if (
|
|
131
|
+
const nextBytes = currentBytes + fragmentBytes;
|
|
132
|
+
if (fragmentBytes <= remainingBytes) {
|
|
112
133
|
return {
|
|
113
134
|
value: `${current ?? ""}${fragment}`,
|
|
114
135
|
nextBytes,
|
|
115
136
|
};
|
|
116
137
|
}
|
|
117
138
|
return {
|
|
118
|
-
value: `${current ?? ""}${fragment
|
|
139
|
+
value: `${current ?? ""}${truncateUtf8String(fragment, remainingBytes)}`,
|
|
119
140
|
nextBytes,
|
|
120
141
|
};
|
|
121
142
|
}
|
|
@@ -129,15 +150,19 @@ function appendRawTextChunk(acc, chunk) {
|
|
|
129
150
|
acc.rawTextTruncated = true;
|
|
130
151
|
return;
|
|
131
152
|
}
|
|
132
|
-
|
|
153
|
+
const chunkBytes = utf8ByteLength(chunk);
|
|
154
|
+
if (chunkBytes <= remainingBytes) {
|
|
133
155
|
acc.rawTextChunks.push(chunk);
|
|
134
|
-
acc.rawTextBytes +=
|
|
156
|
+
acc.rawTextBytes += chunkBytes;
|
|
135
157
|
return;
|
|
136
158
|
}
|
|
137
|
-
acc.rawTextChunks.push(chunk
|
|
159
|
+
acc.rawTextChunks.push(truncateUtf8String(chunk, remainingBytes));
|
|
138
160
|
acc.rawTextBytes = MAX_RAW_TEXT_BYTES;
|
|
139
161
|
acc.rawTextTruncated = true;
|
|
140
162
|
}
|
|
163
|
+
function getBlockContentBytes(block) {
|
|
164
|
+
return utf8ByteLength(block.text ?? block.thinking ?? block.toolInput ?? "");
|
|
165
|
+
}
|
|
141
166
|
function finalize(acc) {
|
|
142
167
|
const totalTokens = acc.inputTokens + acc.outputTokens;
|
|
143
168
|
return {
|
|
@@ -199,7 +224,7 @@ function processContentBlockStart(acc, parsed) {
|
|
|
199
224
|
entry.toolInput = "";
|
|
200
225
|
}
|
|
201
226
|
acc.contentBlocks.push(entry);
|
|
202
|
-
acc.blockByteCounts.set(index,
|
|
227
|
+
acc.blockByteCounts.set(index, getBlockContentBytes(entry));
|
|
203
228
|
}
|
|
204
229
|
function processContentBlockDelta(acc, parsed) {
|
|
205
230
|
const index = parsed.index ?? 0;
|