@juspay/neurolink 9.15.0 → 9.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/adapters/video/videoAnalyzer.js +10 -8
- package/dist/cli/commands/setup-anthropic.js +1 -14
- package/dist/cli/commands/setup-azure.js +1 -12
- package/dist/cli/commands/setup-bedrock.js +1 -9
- package/dist/cli/commands/setup-google-ai.js +1 -12
- package/dist/cli/commands/setup-openai.js +1 -14
- package/dist/cli/commands/workflow.d.ts +27 -0
- package/dist/cli/commands/workflow.js +216 -0
- package/dist/cli/factories/commandFactory.js +79 -20
- package/dist/cli/index.js +0 -1
- package/dist/cli/parser.js +4 -1
- package/dist/cli/utils/maskCredential.d.ts +11 -0
- package/dist/cli/utils/maskCredential.js +23 -0
- package/dist/constants/contextWindows.js +107 -16
- package/dist/constants/enums.d.ts +99 -15
- package/dist/constants/enums.js +152 -22
- package/dist/context/budgetChecker.js +1 -1
- package/dist/context/contextCompactor.js +31 -4
- package/dist/context/emergencyTruncation.d.ts +21 -0
- package/dist/context/emergencyTruncation.js +88 -0
- package/dist/context/errorDetection.d.ts +16 -0
- package/dist/context/errorDetection.js +48 -1
- package/dist/context/errors.d.ts +19 -0
- package/dist/context/errors.js +21 -0
- package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/core/baseProvider.js +306 -200
- package/dist/core/conversationMemoryManager.js +104 -61
- package/dist/core/evaluationProviders.js +16 -33
- package/dist/core/factory.js +237 -164
- package/dist/core/modules/GenerationHandler.js +175 -116
- package/dist/core/modules/MessageBuilder.js +222 -170
- package/dist/core/modules/StreamHandler.d.ts +1 -0
- package/dist/core/modules/StreamHandler.js +95 -27
- package/dist/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/core/modules/TelemetryHandler.js +25 -7
- package/dist/core/modules/ToolsManager.js +115 -191
- package/dist/core/redisConversationMemoryManager.js +418 -282
- package/dist/factories/providerRegistry.d.ts +5 -0
- package/dist/factories/providerRegistry.js +20 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +4 -2
- package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
- package/dist/lib/constants/contextWindows.js +107 -16
- package/dist/lib/constants/enums.d.ts +99 -15
- package/dist/lib/constants/enums.js +152 -22
- package/dist/lib/context/budgetChecker.js +1 -1
- package/dist/lib/context/contextCompactor.js +31 -4
- package/dist/lib/context/emergencyTruncation.d.ts +21 -0
- package/dist/lib/context/emergencyTruncation.js +89 -0
- package/dist/lib/context/errorDetection.d.ts +16 -0
- package/dist/lib/context/errorDetection.js +48 -1
- package/dist/lib/context/errors.d.ts +19 -0
- package/dist/lib/context/errors.js +22 -0
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/lib/core/baseProvider.js +306 -200
- package/dist/lib/core/conversationMemoryManager.js +104 -61
- package/dist/lib/core/evaluationProviders.js +16 -33
- package/dist/lib/core/factory.js +237 -164
- package/dist/lib/core/modules/GenerationHandler.js +175 -116
- package/dist/lib/core/modules/MessageBuilder.js +222 -170
- package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
- package/dist/lib/core/modules/StreamHandler.js +95 -27
- package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/lib/core/modules/TelemetryHandler.js +25 -7
- package/dist/lib/core/modules/ToolsManager.js +115 -191
- package/dist/lib/core/redisConversationMemoryManager.js +418 -282
- package/dist/lib/factories/providerRegistry.d.ts +5 -0
- package/dist/lib/factories/providerRegistry.js +20 -2
- package/dist/lib/index.d.ts +2 -2
- package/dist/lib/index.js +4 -2
- package/dist/lib/mcp/externalServerManager.js +66 -0
- package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/lib/mcp/mcpClientFactory.js +16 -0
- package/dist/lib/mcp/toolDiscoveryService.js +32 -6
- package/dist/lib/mcp/toolRegistry.js +193 -123
- package/dist/lib/neurolink.d.ts +6 -0
- package/dist/lib/neurolink.js +1162 -646
- package/dist/lib/providers/amazonBedrock.d.ts +1 -1
- package/dist/lib/providers/amazonBedrock.js +521 -319
- package/dist/lib/providers/anthropic.js +73 -17
- package/dist/lib/providers/anthropicBaseProvider.js +77 -17
- package/dist/lib/providers/googleAiStudio.d.ts +1 -1
- package/dist/lib/providers/googleAiStudio.js +292 -227
- package/dist/lib/providers/googleVertex.d.ts +36 -1
- package/dist/lib/providers/googleVertex.js +553 -260
- package/dist/lib/providers/ollama.js +329 -278
- package/dist/lib/providers/openAI.js +77 -19
- package/dist/lib/providers/sagemaker/parsers.js +3 -3
- package/dist/lib/providers/sagemaker/streaming.js +3 -3
- package/dist/lib/proxy/proxyFetch.js +81 -48
- package/dist/lib/rag/ChunkerFactory.js +1 -1
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/markdownChunker.js +174 -2
- package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
- package/dist/lib/rag/ragIntegration.d.ts +18 -1
- package/dist/lib/rag/ragIntegration.js +94 -14
- package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
- package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/lib/telemetry/attributes.d.ts +52 -0
- package/dist/lib/telemetry/attributes.js +61 -0
- package/dist/lib/telemetry/index.d.ts +3 -0
- package/dist/lib/telemetry/index.js +3 -0
- package/dist/lib/telemetry/telemetryService.d.ts +6 -0
- package/dist/lib/telemetry/telemetryService.js +6 -0
- package/dist/lib/telemetry/tracers.d.ts +15 -0
- package/dist/lib/telemetry/tracers.js +17 -0
- package/dist/lib/telemetry/withSpan.d.ts +9 -0
- package/dist/lib/telemetry/withSpan.js +35 -0
- package/dist/lib/types/contextTypes.d.ts +10 -0
- package/dist/lib/types/streamTypes.d.ts +14 -0
- package/dist/lib/utils/conversationMemory.js +121 -82
- package/dist/lib/utils/logger.d.ts +5 -0
- package/dist/lib/utils/logger.js +50 -2
- package/dist/lib/utils/messageBuilder.js +22 -42
- package/dist/lib/utils/modelDetection.js +3 -3
- package/dist/lib/utils/providerRetry.d.ts +41 -0
- package/dist/lib/utils/providerRetry.js +114 -0
- package/dist/lib/utils/retryability.d.ts +14 -0
- package/dist/lib/utils/retryability.js +23 -0
- package/dist/lib/utils/sanitizers/svg.js +4 -5
- package/dist/lib/utils/tokenEstimation.d.ts +11 -1
- package/dist/lib/utils/tokenEstimation.js +19 -4
- package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
- package/dist/mcp/externalServerManager.js +66 -0
- package/dist/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/mcp/mcpClientFactory.js +16 -0
- package/dist/mcp/toolDiscoveryService.js +32 -6
- package/dist/mcp/toolRegistry.js +193 -123
- package/dist/neurolink.d.ts +6 -0
- package/dist/neurolink.js +1162 -646
- package/dist/providers/amazonBedrock.d.ts +1 -1
- package/dist/providers/amazonBedrock.js +521 -319
- package/dist/providers/anthropic.js +73 -17
- package/dist/providers/anthropicBaseProvider.js +77 -17
- package/dist/providers/googleAiStudio.d.ts +1 -1
- package/dist/providers/googleAiStudio.js +292 -227
- package/dist/providers/googleVertex.d.ts +36 -1
- package/dist/providers/googleVertex.js +553 -260
- package/dist/providers/ollama.js +329 -278
- package/dist/providers/openAI.js +77 -19
- package/dist/providers/sagemaker/parsers.js +3 -3
- package/dist/providers/sagemaker/streaming.js +3 -3
- package/dist/proxy/proxyFetch.js +81 -48
- package/dist/rag/ChunkerFactory.js +1 -1
- package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/rag/chunking/markdownChunker.js +174 -2
- package/dist/rag/pipeline/contextAssembly.js +2 -1
- package/dist/rag/ragIntegration.d.ts +18 -1
- package/dist/rag/ragIntegration.js +94 -14
- package/dist/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/server/abstract/baseServerAdapter.js +4 -1
- package/dist/server/adapters/fastifyAdapter.js +35 -30
- package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/telemetry/attributes.d.ts +52 -0
- package/dist/telemetry/attributes.js +60 -0
- package/dist/telemetry/index.d.ts +3 -0
- package/dist/telemetry/index.js +3 -0
- package/dist/telemetry/telemetryService.d.ts +6 -0
- package/dist/telemetry/telemetryService.js +6 -0
- package/dist/telemetry/tracers.d.ts +15 -0
- package/dist/telemetry/tracers.js +16 -0
- package/dist/telemetry/withSpan.d.ts +9 -0
- package/dist/telemetry/withSpan.js +34 -0
- package/dist/types/contextTypes.d.ts +10 -0
- package/dist/types/streamTypes.d.ts +14 -0
- package/dist/utils/conversationMemory.js +121 -82
- package/dist/utils/logger.d.ts +5 -0
- package/dist/utils/logger.js +50 -2
- package/dist/utils/messageBuilder.js +22 -42
- package/dist/utils/modelDetection.js +3 -3
- package/dist/utils/providerRetry.d.ts +41 -0
- package/dist/utils/providerRetry.js +113 -0
- package/dist/utils/retryability.d.ts +14 -0
- package/dist/utils/retryability.js +22 -0
- package/dist/utils/sanitizers/svg.js +4 -5
- package/dist/utils/tokenEstimation.d.ts +11 -1
- package/dist/utils/tokenEstimation.js +19 -4
- package/dist/utils/videoAnalysisProcessor.js +7 -3
- package/dist/workflow/config.d.ts +26 -26
- package/package.json +1 -1
package/dist/utils/logger.js
CHANGED
|
@@ -13,6 +13,22 @@
|
|
|
13
13
|
* - Structured data support for complex objects
|
|
14
14
|
* - Tabular data display
|
|
15
15
|
*/
|
|
16
|
+
// OTel trace context for log correlation (optional — gracefully no-ops if OTel not initialized)
|
|
17
|
+
let traceApi = null;
|
|
18
|
+
let traceApiPromise = null;
|
|
19
|
+
async function getTraceApi() {
|
|
20
|
+
if (!traceApiPromise) {
|
|
21
|
+
traceApiPromise = import("@opentelemetry/api")
|
|
22
|
+
.then((mod) => {
|
|
23
|
+
traceApi = mod;
|
|
24
|
+
return mod;
|
|
25
|
+
})
|
|
26
|
+
.catch(() => null);
|
|
27
|
+
}
|
|
28
|
+
return traceApiPromise;
|
|
29
|
+
}
|
|
30
|
+
// Eagerly kick off the import so the cached value is available for synchronous callers
|
|
31
|
+
void getTraceApi();
|
|
16
32
|
// Pre-computed uppercase log levels for performance optimization
|
|
17
33
|
const UPPERCASE_LOG_LEVELS = {
|
|
18
34
|
debug: "DEBUG",
|
|
@@ -95,6 +111,34 @@ class NeuroLinkLogger {
|
|
|
95
111
|
getLogPrefix(timestamp, level) {
|
|
96
112
|
return `[${timestamp}] [NEUROLINK:${UPPERCASE_LOG_LEVELS[level]}]`;
|
|
97
113
|
}
|
|
114
|
+
/**
|
|
115
|
+
* Extracts current OTel trace context (trace_id, span_id) if available.
|
|
116
|
+
* Returns empty object if OTel is not initialized or no active span exists.
|
|
117
|
+
*/
|
|
118
|
+
getTraceContext() {
|
|
119
|
+
if (!traceApi) {
|
|
120
|
+
return {};
|
|
121
|
+
}
|
|
122
|
+
try {
|
|
123
|
+
const span = traceApi.trace.getSpan(traceApi.context.active());
|
|
124
|
+
if (!span) {
|
|
125
|
+
return {};
|
|
126
|
+
}
|
|
127
|
+
const spanContext = span.spanContext();
|
|
128
|
+
if (!spanContext ||
|
|
129
|
+
spanContext.traceId === "00000000000000000000000000000000") {
|
|
130
|
+
return {};
|
|
131
|
+
}
|
|
132
|
+
return {
|
|
133
|
+
trace_id: spanContext.traceId,
|
|
134
|
+
span_id: spanContext.spanId,
|
|
135
|
+
trace_flags: String(spanContext.traceFlags),
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
return {};
|
|
140
|
+
}
|
|
141
|
+
}
|
|
98
142
|
/**
|
|
99
143
|
* Safely serialize data to fully expanded JSON string.
|
|
100
144
|
* Handles circular references and non-serializable values.
|
|
@@ -163,11 +207,15 @@ class NeuroLinkLogger {
|
|
|
163
207
|
warn: console.warn,
|
|
164
208
|
error: console.error,
|
|
165
209
|
}[level];
|
|
210
|
+
const traceCtx = this.getTraceContext();
|
|
211
|
+
const tracePrefix = traceCtx.trace_id
|
|
212
|
+
? ` [trace_id=${traceCtx.trace_id} span_id=${traceCtx.span_id}]`
|
|
213
|
+
: "";
|
|
166
214
|
if (data !== undefined && data !== null) {
|
|
167
|
-
logMethod(prefix, message, this.serializeData(data));
|
|
215
|
+
logMethod(prefix + tracePrefix, message, this.serializeData(data));
|
|
168
216
|
}
|
|
169
217
|
else {
|
|
170
|
-
logMethod(prefix, message);
|
|
218
|
+
logMethod(prefix + tracePrefix, message);
|
|
171
219
|
}
|
|
172
220
|
}
|
|
173
221
|
/**
|
|
@@ -436,56 +436,28 @@ function shouldUseStructuredOutput(options) {
|
|
|
436
436
|
}
|
|
437
437
|
/**
|
|
438
438
|
* Log structural metadata about a composed message array without logging content.
|
|
439
|
-
*
|
|
439
|
+
* Only logs a compact summary (role counts, total chars, estimated tokens).
|
|
440
|
+
* Per-message breakdown is intentionally omitted to avoid log noise
|
|
441
|
+
* (~600 lines per retry cascade with many messages).
|
|
440
442
|
*/
|
|
441
443
|
function logMessageComposition(messages, requestId) {
|
|
442
|
-
|
|
443
|
-
if (!logger.shouldLog("info")) {
|
|
444
|
+
if (!logger.shouldLog("debug")) {
|
|
444
445
|
return;
|
|
445
446
|
}
|
|
446
447
|
const roles = {};
|
|
447
448
|
let totalChars = 0;
|
|
448
449
|
for (const msg of messages) {
|
|
449
|
-
// Avoid JSON.stringify on multimodal content for the info-level summary;
|
|
450
|
-
// accurate per-message breakdown (with sizes) is computed only when debug
|
|
451
|
-
// logging is active (see below).
|
|
452
450
|
const chars = typeof msg.content === "string" ? msg.content.length : 0;
|
|
453
451
|
roles[msg.role] = (roles[msg.role] || 0) + 1;
|
|
454
452
|
totalChars += chars;
|
|
455
453
|
}
|
|
456
|
-
logger.
|
|
454
|
+
logger.debug("[MessageBuilder] Composed", {
|
|
457
455
|
requestId,
|
|
458
456
|
totalMessages: messages.length,
|
|
459
457
|
roles,
|
|
460
458
|
totalChars,
|
|
461
459
|
estimatedTokens: Math.ceil(totalChars / 4),
|
|
462
460
|
});
|
|
463
|
-
if (logger.shouldLog("debug")) {
|
|
464
|
-
const breakdown = messages.map((msg, i) => {
|
|
465
|
-
let chars;
|
|
466
|
-
if (typeof msg.content === "string") {
|
|
467
|
-
chars = msg.content.length;
|
|
468
|
-
}
|
|
469
|
-
else {
|
|
470
|
-
try {
|
|
471
|
-
chars = JSON.stringify(msg.content).length;
|
|
472
|
-
}
|
|
473
|
-
catch {
|
|
474
|
-
chars = String(msg.content).length;
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
return {
|
|
478
|
-
index: i,
|
|
479
|
-
role: msg.role,
|
|
480
|
-
chars,
|
|
481
|
-
estimatedTokens: Math.ceil(chars / 4),
|
|
482
|
-
};
|
|
483
|
-
});
|
|
484
|
-
logger.debug("[MessageBuilder] Per-message breakdown", {
|
|
485
|
-
requestId,
|
|
486
|
-
breakdown,
|
|
487
|
-
});
|
|
488
|
-
}
|
|
489
461
|
}
|
|
490
462
|
/**
|
|
491
463
|
* Build a properly formatted message array for AI providers
|
|
@@ -663,10 +635,11 @@ function enforceFileBudget(options, provider, model) {
|
|
|
663
635
|
fileType: f.fileType,
|
|
664
636
|
})), availableTokens);
|
|
665
637
|
if (budgetResult.excluded.length > 0) {
|
|
666
|
-
const
|
|
638
|
+
const includedIndices = new Set(budgetResult.included.map((f) => {
|
|
639
|
+
return budgetFiles.findIndex((bf) => bf.name === f.name);
|
|
640
|
+
}));
|
|
667
641
|
options.input.files = options.input.files.filter((_file, idx) => {
|
|
668
|
-
|
|
669
|
-
return includedNames.has(entry.name);
|
|
642
|
+
return includedIndices.has(idx);
|
|
670
643
|
});
|
|
671
644
|
options.input.text =
|
|
672
645
|
(options.input.text || "") + "\n\n" + budgetResult.notices.join("\n");
|
|
@@ -1071,12 +1044,19 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
1071
1044
|
// Add conversation history if available
|
|
1072
1045
|
const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0;
|
|
1073
1046
|
if (hasConversationHistory && options.conversationHistory) {
|
|
1074
|
-
options.conversationHistory
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1047
|
+
for (const msg of options.conversationHistory) {
|
|
1048
|
+
// Filter out tool_call and tool_result roles — only user/assistant/system are valid for AI providers
|
|
1049
|
+
if (msg.role === "user" ||
|
|
1050
|
+
msg.role === "assistant" ||
|
|
1051
|
+
msg.role === "system") {
|
|
1052
|
+
const providerOptions = msg.providerOptions;
|
|
1053
|
+
messages.push({
|
|
1054
|
+
role: msg.role,
|
|
1055
|
+
content: msg.content,
|
|
1056
|
+
...(providerOptions && { providerOptions }),
|
|
1057
|
+
});
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1080
1060
|
}
|
|
1081
1061
|
// Handle multimodal content
|
|
1082
1062
|
try {
|
|
@@ -11,7 +11,7 @@ export function isGemini3Model(modelName) {
|
|
|
11
11
|
if (!isValidModelName(modelName)) {
|
|
12
12
|
return false;
|
|
13
13
|
}
|
|
14
|
-
return /^gemini-3(-.*)?$/i.test(modelName);
|
|
14
|
+
return /^gemini-3(\.\d+)?(-.*)?$/i.test(modelName);
|
|
15
15
|
}
|
|
16
16
|
export function isGemini25Model(modelName) {
|
|
17
17
|
if (!isValidModelName(modelName)) {
|
|
@@ -46,10 +46,10 @@ export function getMaxThinkingBudgetTokens(modelName) {
|
|
|
46
46
|
if (!isValidModelName(modelName)) {
|
|
47
47
|
return 10000;
|
|
48
48
|
}
|
|
49
|
-
if (/^gemini-3
|
|
49
|
+
if (/^gemini-3(\.\d+)?-pro/i.test(modelName)) {
|
|
50
50
|
return 100000;
|
|
51
51
|
}
|
|
52
|
-
if (/^gemini-3
|
|
52
|
+
if (/^gemini-3(\.\d+)?-flash/i.test(modelName)) {
|
|
53
53
|
return 50000;
|
|
54
54
|
}
|
|
55
55
|
if (/^gemini-2\.5/i.test(modelName)) {
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-level retry utility for AI SDK calls (NL11)
|
|
3
|
+
*
|
|
4
|
+
* The Vercel AI SDK's `generateText()` and `streamText()` have built-in retry
|
|
5
|
+
* logic (`_retryWithExponentialBackoff()` with default `maxRetries: 2`) that
|
|
6
|
+
* retries on HTTP 429/500/503. These retries are completely invisible to OTel
|
|
7
|
+
* because they happen inside the AI SDK.
|
|
8
|
+
*
|
|
9
|
+
* This module provides an instrumented retry wrapper that:
|
|
10
|
+
* 1. Disables the AI SDK's internal retries (via `maxRetries: 0`)
|
|
11
|
+
* 2. Implements our own retry loop with full OTel span events
|
|
12
|
+
* 3. Records retry attempts, delays, status codes, and total attempt count
|
|
13
|
+
*
|
|
14
|
+
* @module utils/providerRetry
|
|
15
|
+
*/
|
|
16
|
+
import { type Span } from "@opentelemetry/api";
|
|
17
|
+
/** Maximum number of retry attempts after the initial call (total = 1 + MAX_PROVIDER_RETRIES). */
|
|
18
|
+
export declare const MAX_PROVIDER_RETRIES = 2;
|
|
19
|
+
/** Base delay in ms for exponential backoff between retries. */
|
|
20
|
+
export declare const BASE_RETRY_DELAY_MS = 1000;
|
|
21
|
+
/**
|
|
22
|
+
* Check whether an error thrown by the AI SDK is retryable.
|
|
23
|
+
*
|
|
24
|
+
* Uses `APICallError.isInstance()` for proper type-safe detection (the class
|
|
25
|
+
* uses a branded symbol marker, so `instanceof` doesn't work across package
|
|
26
|
+
* boundaries). Falls back to duck-typing for non-APICallError cases.
|
|
27
|
+
*/
|
|
28
|
+
export declare function isRetryableProviderError(error: unknown): boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Extract the HTTP status code from an AI SDK error, if available.
|
|
31
|
+
*/
|
|
32
|
+
export declare function getErrorStatusCode(error: unknown): number | undefined;
|
|
33
|
+
/**
|
|
34
|
+
* Execute a provider call with instrumented retry logic.
|
|
35
|
+
*
|
|
36
|
+
* @param operation - The async operation to execute (should already use `maxRetries: 0`)
|
|
37
|
+
* @param span - The OTel span to annotate with retry events and attributes
|
|
38
|
+
* @param label - A human-readable label for log messages (e.g. "generateText", "streamText")
|
|
39
|
+
* @returns The result of the operation
|
|
40
|
+
*/
|
|
41
|
+
export declare function withProviderRetry<T>(operation: () => Promise<T>, span: Span, label: string): Promise<T>;
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-level retry utility for AI SDK calls (NL11)
|
|
3
|
+
*
|
|
4
|
+
* The Vercel AI SDK's `generateText()` and `streamText()` have built-in retry
|
|
5
|
+
* logic (`_retryWithExponentialBackoff()` with default `maxRetries: 2`) that
|
|
6
|
+
* retries on HTTP 429/500/503. These retries are completely invisible to OTel
|
|
7
|
+
* because they happen inside the AI SDK.
|
|
8
|
+
*
|
|
9
|
+
* This module provides an instrumented retry wrapper that:
|
|
10
|
+
* 1. Disables the AI SDK's internal retries (via `maxRetries: 0`)
|
|
11
|
+
* 2. Implements our own retry loop with full OTel span events
|
|
12
|
+
* 3. Records retry attempts, delays, status codes, and total attempt count
|
|
13
|
+
*
|
|
14
|
+
* @module utils/providerRetry
|
|
15
|
+
*/
|
|
16
|
+
import {} from "@opentelemetry/api";
|
|
17
|
+
import { APICallError } from "@ai-sdk/provider";
|
|
18
|
+
import { logger } from "./logger.js";
|
|
19
|
+
/** Maximum number of retry attempts after the initial call (total = 1 + MAX_PROVIDER_RETRIES). */
|
|
20
|
+
export const MAX_PROVIDER_RETRIES = 2;
|
|
21
|
+
/** Base delay in ms for exponential backoff between retries. */
|
|
22
|
+
export const BASE_RETRY_DELAY_MS = 1000;
|
|
23
|
+
/**
|
|
24
|
+
* Check whether an error thrown by the AI SDK is retryable.
|
|
25
|
+
*
|
|
26
|
+
* Uses `APICallError.isInstance()` for proper type-safe detection (the class
|
|
27
|
+
* uses a branded symbol marker, so `instanceof` doesn't work across package
|
|
28
|
+
* boundaries). Falls back to duck-typing for non-APICallError cases.
|
|
29
|
+
*/
|
|
30
|
+
export function isRetryableProviderError(error) {
|
|
31
|
+
// Preferred path: use the AI SDK's own branded type check + isRetryable flag
|
|
32
|
+
if (APICallError.isInstance(error)) {
|
|
33
|
+
return error.isRetryable;
|
|
34
|
+
}
|
|
35
|
+
// Fallback: duck-type for status codes on errors that aren't APICallError
|
|
36
|
+
if (error && typeof error === "object" && "statusCode" in error) {
|
|
37
|
+
const statusCode = error.statusCode;
|
|
38
|
+
return statusCode === 429 || statusCode >= 500;
|
|
39
|
+
}
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Extract the HTTP status code from an AI SDK error, if available.
|
|
44
|
+
*/
|
|
45
|
+
export function getErrorStatusCode(error) {
|
|
46
|
+
if (APICallError.isInstance(error)) {
|
|
47
|
+
return error.statusCode;
|
|
48
|
+
}
|
|
49
|
+
if (error && typeof error === "object" && "statusCode" in error) {
|
|
50
|
+
return error.statusCode;
|
|
51
|
+
}
|
|
52
|
+
return undefined;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Execute a provider call with instrumented retry logic.
|
|
56
|
+
*
|
|
57
|
+
* @param operation - The async operation to execute (should already use `maxRetries: 0`)
|
|
58
|
+
* @param span - The OTel span to annotate with retry events and attributes
|
|
59
|
+
* @param label - A human-readable label for log messages (e.g. "generateText", "streamText")
|
|
60
|
+
* @returns The result of the operation
|
|
61
|
+
*/
|
|
62
|
+
export async function withProviderRetry(operation, span, label) {
|
|
63
|
+
for (let attempt = 0; attempt <= MAX_PROVIDER_RETRIES; attempt++) {
|
|
64
|
+
try {
|
|
65
|
+
const result = await operation();
|
|
66
|
+
// Record how many attempts it took on the span
|
|
67
|
+
span.setAttribute("gen_ai.provider.total_attempts", attempt + 1);
|
|
68
|
+
if (attempt > 0) {
|
|
69
|
+
logger.info(`[providerRetry] ${label} succeeded after ${attempt + 1} attempts`);
|
|
70
|
+
}
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
const retryable = isRetryableProviderError(error);
|
|
75
|
+
const statusCode = getErrorStatusCode(error);
|
|
76
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
77
|
+
if (!retryable || attempt === MAX_PROVIDER_RETRIES) {
|
|
78
|
+
// Record failure details before re-throwing
|
|
79
|
+
span.setAttribute("gen_ai.provider.total_attempts", attempt + 1);
|
|
80
|
+
if (attempt > 0) {
|
|
81
|
+
span.setAttribute("gen_ai.provider.retries_exhausted", true);
|
|
82
|
+
}
|
|
83
|
+
logger.warn(`[providerRetry] ${label} failed (non-retryable or retries exhausted)`, {
|
|
84
|
+
attempt: attempt + 1,
|
|
85
|
+
retryable,
|
|
86
|
+
statusCode,
|
|
87
|
+
error: errorMessage,
|
|
88
|
+
});
|
|
89
|
+
throw error;
|
|
90
|
+
}
|
|
91
|
+
// Calculate exponential backoff delay
|
|
92
|
+
const delay = BASE_RETRY_DELAY_MS * Math.pow(2, attempt);
|
|
93
|
+
// Record retry event on the OTel span
|
|
94
|
+
span.addEvent("gen_ai.provider.retry", {
|
|
95
|
+
"retry.attempt": attempt + 1,
|
|
96
|
+
"retry.delay_ms": delay,
|
|
97
|
+
...(statusCode !== undefined && { "retry.status_code": statusCode }),
|
|
98
|
+
"retry.error": errorMessage.slice(0, 256),
|
|
99
|
+
});
|
|
100
|
+
logger.warn(`[providerRetry] ${label} retrying after ${statusCode || "unknown"} error`, {
|
|
101
|
+
attempt: attempt + 1,
|
|
102
|
+
maxRetries: MAX_PROVIDER_RETRIES,
|
|
103
|
+
delayMs: delay,
|
|
104
|
+
statusCode,
|
|
105
|
+
error: errorMessage,
|
|
106
|
+
});
|
|
107
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// This should never be reached due to the throw inside the loop,
|
|
111
|
+
// but TypeScript requires it for exhaustiveness.
|
|
112
|
+
throw new Error(`[providerRetry] ${label} exhausted all retries`);
|
|
113
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared HTTP retryability constants.
|
|
3
|
+
*
|
|
4
|
+
* Centralises the status-code lists that were duplicated across
|
|
5
|
+
* httpRetryHandler, neurolink.ts, fileDetector.ts, and errorHelpers.
|
|
6
|
+
*/
|
|
7
|
+
/** Server-side and rate-limiting codes worth retrying. */
|
|
8
|
+
export declare const RETRYABLE_HTTP_STATUS_CODES: readonly number[];
|
|
9
|
+
/** Client-error codes where retrying is pointless. */
|
|
10
|
+
export declare const NON_RETRYABLE_HTTP_STATUS_CODES: readonly number[];
|
|
11
|
+
/** Check whether an HTTP status code is retryable. */
|
|
12
|
+
export declare function isRetryableStatusCode(code: number): boolean;
|
|
13
|
+
/** Check whether an HTTP status code is non-retryable. */
|
|
14
|
+
export declare function isNonRetryableStatusCode(code: number): boolean;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared HTTP retryability constants.
|
|
3
|
+
*
|
|
4
|
+
* Centralises the status-code lists that were duplicated across
|
|
5
|
+
* httpRetryHandler, neurolink.ts, fileDetector.ts, and errorHelpers.
|
|
6
|
+
*/
|
|
7
|
+
/** Server-side and rate-limiting codes worth retrying. */
|
|
8
|
+
export const RETRYABLE_HTTP_STATUS_CODES = [
|
|
9
|
+
408, 429, 500, 502, 503, 504,
|
|
10
|
+
];
|
|
11
|
+
/** Client-error codes where retrying is pointless. */
|
|
12
|
+
export const NON_RETRYABLE_HTTP_STATUS_CODES = [
|
|
13
|
+
400, 401, 403, 404, 405, 409, 422,
|
|
14
|
+
];
|
|
15
|
+
/** Check whether an HTTP status code is retryable. */
|
|
16
|
+
export function isRetryableStatusCode(code) {
|
|
17
|
+
return RETRYABLE_HTTP_STATUS_CODES.includes(code);
|
|
18
|
+
}
|
|
19
|
+
/** Check whether an HTTP status code is non-retryable. */
|
|
20
|
+
export function isNonRetryableStatusCode(code) {
|
|
21
|
+
return NON_RETRYABLE_HTTP_STATUS_CODES.includes(code);
|
|
22
|
+
}
|
|
@@ -339,8 +339,7 @@ function removeDangerousAttributes(content, removedItems) {
|
|
|
339
339
|
// Parse attributes
|
|
340
340
|
const attrRegex = /([a-zA-Z][a-zA-Z0-9:_-]*)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
|
|
341
341
|
const safeAttrs = [];
|
|
342
|
-
let attrMatch = attrRegex.exec(attrs);
|
|
343
|
-
while (attrMatch !== null) {
|
|
342
|
+
for (let attrMatch = attrRegex.exec(attrs); attrMatch !== null; attrMatch = attrRegex.exec(attrs)) {
|
|
344
343
|
const attrName = attrMatch[1];
|
|
345
344
|
const attrValue = attrMatch[2] ?? attrMatch[3] ?? "";
|
|
346
345
|
const lowerAttrName = attrName.toLowerCase();
|
|
@@ -390,13 +389,13 @@ function removeDangerousAttributes(content, removedItems) {
|
|
|
390
389
|
}
|
|
391
390
|
// Attribute is safe, keep it
|
|
392
391
|
safeAttrs.push(`${attrName}="${escapeAttributeValue(attrValue)}"`);
|
|
393
|
-
// Get next match
|
|
394
|
-
attrMatch = attrRegex.exec(attrs);
|
|
395
392
|
}
|
|
396
393
|
// Also keep standalone attributes (like xmlns without value in some cases)
|
|
397
394
|
const standaloneAttrRegex = /\s([a-zA-Z][a-zA-Z0-9:_-]*)(?=\s|>|$|\/)/g;
|
|
398
395
|
let standaloneMatch = standaloneAttrRegex.exec(attrs);
|
|
399
|
-
|
|
396
|
+
let iterations = 0;
|
|
397
|
+
const MAX_ITERATIONS = 1000;
|
|
398
|
+
while (standaloneMatch !== null && iterations++ < MAX_ITERATIONS) {
|
|
400
399
|
const attrName = standaloneMatch[1];
|
|
401
400
|
// Only keep if it looks like a valid attribute and is safe
|
|
402
401
|
if (SAFE_SVG_ATTRIBUTES.has(attrName) ||
|
|
@@ -16,7 +16,17 @@ import type { ChatMessage } from "../types/conversation.js";
|
|
|
16
16
|
export declare const CHARS_PER_TOKEN = 4;
|
|
17
17
|
/** Characters per token for code */
|
|
18
18
|
export declare const CODE_CHARS_PER_TOKEN = 3;
|
|
19
|
-
/**
|
|
19
|
+
/**
|
|
20
|
+
* Safety margin: additive fraction of baseTokens added to the provider-adjusted estimate.
|
|
21
|
+
* Using additive margin prevents compounding with provider multipliers.
|
|
22
|
+
*
|
|
23
|
+
* Old behavior: baseTokens * providerMultiplier * 1.15 (compounding)
|
|
24
|
+
* e.g. Anthropic: baseTokens * 1.23 * 1.15 = baseTokens * 1.4145
|
|
25
|
+
* New behavior: baseTokens * providerMultiplier + baseTokens * 0.05 (additive)
|
|
26
|
+
* e.g. Anthropic: baseTokens * 1.23 + baseTokens * 0.05 = baseTokens * 1.28
|
|
27
|
+
*/
|
|
28
|
+
export declare const TOKEN_SAFETY_MARGIN_ADDITIVE = 0.05;
|
|
29
|
+
/** @deprecated Use TOKEN_SAFETY_MARGIN_ADDITIVE instead. Kept for backward compatibility. */
|
|
20
30
|
export declare const TOKEN_SAFETY_MARGIN = 1.15;
|
|
21
31
|
/** Message framing overhead in tokens (role + delimiters) */
|
|
22
32
|
export declare const TOKENS_PER_MESSAGE = 4;
|
|
@@ -15,7 +15,17 @@
|
|
|
15
15
|
export const CHARS_PER_TOKEN = 4;
|
|
16
16
|
/** Characters per token for code */
|
|
17
17
|
export const CODE_CHARS_PER_TOKEN = 3;
|
|
18
|
-
/**
|
|
18
|
+
/**
|
|
19
|
+
* Safety margin: additive fraction of baseTokens added to the provider-adjusted estimate.
|
|
20
|
+
* Using additive margin prevents compounding with provider multipliers.
|
|
21
|
+
*
|
|
22
|
+
* Old behavior: baseTokens * providerMultiplier * 1.15 (compounding)
|
|
23
|
+
* e.g. Anthropic: baseTokens * 1.23 * 1.15 = baseTokens * 1.4145
|
|
24
|
+
* New behavior: baseTokens * providerMultiplier + baseTokens * 0.05 (additive)
|
|
25
|
+
* e.g. Anthropic: baseTokens * 1.23 + baseTokens * 0.05 = baseTokens * 1.28
|
|
26
|
+
*/
|
|
27
|
+
export const TOKEN_SAFETY_MARGIN_ADDITIVE = 0.05;
|
|
28
|
+
/** @deprecated Use TOKEN_SAFETY_MARGIN_ADDITIVE instead. Kept for backward compatibility. */
|
|
19
29
|
export const TOKEN_SAFETY_MARGIN = 1.15;
|
|
20
30
|
/** Message framing overhead in tokens (role + delimiters) */
|
|
21
31
|
export const TOKENS_PER_MESSAGE = 4;
|
|
@@ -64,7 +74,11 @@ export function estimateTokens(text, provider, isCode) {
|
|
|
64
74
|
const charsPerToken = isCode ? CODE_CHARS_PER_TOKEN : CHARS_PER_TOKEN;
|
|
65
75
|
const baseTokens = Math.ceil(text.length / charsPerToken);
|
|
66
76
|
const multiplier = getProviderMultiplier(provider);
|
|
67
|
-
|
|
77
|
+
// Apply provider multiplier and additive safety margin separately
|
|
78
|
+
// This prevents compounding (e.g. Anthropic: 1.23 * 1.15 = 1.41x was too aggressive)
|
|
79
|
+
const providerAdjusted = baseTokens * multiplier;
|
|
80
|
+
const safetyBuffer = baseTokens * TOKEN_SAFETY_MARGIN_ADDITIVE;
|
|
81
|
+
return Math.ceil(providerAdjusted + safetyBuffer);
|
|
68
82
|
}
|
|
69
83
|
/**
|
|
70
84
|
* Estimate token count for a single ChatMessage.
|
|
@@ -114,8 +128,9 @@ export function truncateToTokenBudget(text, maxTokens, provider) {
|
|
|
114
128
|
return { text, truncated: false };
|
|
115
129
|
}
|
|
116
130
|
const multiplier = getProviderMultiplier(provider);
|
|
117
|
-
|
|
118
|
-
const
|
|
131
|
+
// Use additive safety margin: effective multiplier = multiplier + additive margin
|
|
132
|
+
const effectiveMultiplier = multiplier + TOKEN_SAFETY_MARGIN_ADDITIVE;
|
|
133
|
+
const maxChars = Math.floor((maxTokens / effectiveMultiplier) * CHARS_PER_TOKEN);
|
|
119
134
|
if (maxChars <= 0) {
|
|
120
135
|
return { text: "", truncated: true };
|
|
121
136
|
}
|
|
@@ -21,10 +21,14 @@ export function hasVideoFrames(messages) {
|
|
|
21
21
|
return false;
|
|
22
22
|
}
|
|
23
23
|
if (Array.isArray(msg.content)) {
|
|
24
|
-
|
|
24
|
+
// Count image parts — only route to video analysis pipeline when there are
|
|
25
|
+
// multiple frames (3+), indicating actual video frame extraction.
|
|
26
|
+
// Single images or pairs should use the model's native vision capability.
|
|
27
|
+
const imageCount = msg.content.filter((part) => typeof part === "object" &&
|
|
25
28
|
part !== null &&
|
|
26
29
|
"type" in part &&
|
|
27
|
-
part.type === "image");
|
|
30
|
+
part.type === "image").length;
|
|
31
|
+
return imageCount >= 3;
|
|
28
32
|
}
|
|
29
33
|
return false;
|
|
30
34
|
});
|
|
@@ -53,7 +57,7 @@ export async function executeVideoAnalysis(messages, options) {
|
|
|
53
57
|
? undefined
|
|
54
58
|
: process.env.GOOGLE_VERTEX_PROJECT || process.env.GOOGLE_CLOUD_PROJECT,
|
|
55
59
|
location: options.region || process.env.GOOGLE_VERTEX_LOCATION,
|
|
56
|
-
model: options.model || "gemini-2.
|
|
60
|
+
model: options.model || "gemini-2.5-flash",
|
|
57
61
|
});
|
|
58
62
|
logger.debug("[VideoAnalysisProcessor] Video analysis completed", {
|
|
59
63
|
hasResult: !!videoAnalysisText,
|