@juspay/neurolink 9.70.7 → 9.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +362 -344
- package/dist/core/constants.d.ts +1 -0
- package/dist/core/constants.js +2 -0
- package/dist/core/toolRouting.d.ts +59 -0
- package/dist/core/toolRouting.js +232 -0
- package/dist/lib/core/constants.d.ts +1 -0
- package/dist/lib/core/constants.js +2 -0
- package/dist/lib/core/toolRouting.d.ts +59 -0
- package/dist/lib/core/toolRouting.js +233 -0
- package/dist/lib/neurolink.d.ts +31 -1
- package/dist/lib/neurolink.js +241 -17
- package/dist/lib/providers/googleVertex.js +257 -30
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
- package/dist/lib/telemetry/attributes.d.ts +31 -0
- package/dist/lib/telemetry/attributes.js +48 -0
- package/dist/lib/telemetry/index.d.ts +1 -1
- package/dist/lib/telemetry/index.js +1 -1
- package/dist/lib/types/config.d.ts +8 -0
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/toolRouting.d.ts +91 -0
- package/dist/lib/types/toolRouting.js +19 -0
- package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
- package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
- package/dist/lib/utils/mcpErrorText.d.ts +16 -0
- package/dist/lib/utils/mcpErrorText.js +36 -0
- package/dist/neurolink.d.ts +31 -1
- package/dist/neurolink.js +241 -17
- package/dist/providers/googleVertex.js +257 -30
- package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
- package/dist/services/server/ai/observability/instrumentation.js +36 -1
- package/dist/telemetry/attributes.d.ts +31 -0
- package/dist/telemetry/attributes.js +48 -0
- package/dist/telemetry/index.d.ts +1 -1
- package/dist/telemetry/index.js +1 -1
- package/dist/types/config.d.ts +8 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/toolRouting.d.ts +91 -0
- package/dist/types/toolRouting.js +18 -0
- package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
- package/dist/utils/anthropicTraceSanitizer.js +25 -0
- package/dist/utils/mcpErrorText.d.ts +16 -0
- package/dist/utils/mcpErrorText.js +36 -0
- package/package.json +2 -1
package/dist/lib/neurolink.d.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Enhanced AI provider system with natural MCP tool access.
|
|
6
6
|
* Uses real MCP infrastructure for tool discovery and execution.
|
|
7
7
|
*/
|
|
8
|
-
import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext } from "./types/index.js";
|
|
8
|
+
import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext, ToolRoutingServerDescriptor } from "./types/index.js";
|
|
9
9
|
import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
|
|
10
10
|
import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
|
|
11
11
|
import { ExternalServerManager } from "./mcp/externalServerManager.js";
|
|
@@ -100,6 +100,7 @@ export declare class NeuroLink {
|
|
|
100
100
|
conversationMemory?: ConversationMemoryManager | RedisConversationMemoryManager | null;
|
|
101
101
|
private conversationMemoryNeedsInit;
|
|
102
102
|
private conversationMemoryConfig?;
|
|
103
|
+
private toolRoutingConfig?;
|
|
103
104
|
private enableOrchestration;
|
|
104
105
|
private authProvider?;
|
|
105
106
|
private pendingAuthConfig?;
|
|
@@ -803,6 +804,35 @@ export declare class NeuroLink {
|
|
|
803
804
|
*/
|
|
804
805
|
private streamWithIterationFallback;
|
|
805
806
|
private executeStreamRequest;
|
|
807
|
+
/**
|
|
808
|
+
* Pre-call tool routing for stream(): runs the router LLM once per turn
|
|
809
|
+
* and appends the unpicked servers' registered tool names to
|
|
810
|
+
* `options.excludeTools` — the per-call denylist enforced by
|
|
811
|
+
* `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
|
|
812
|
+
* is true and a non-empty server catalog has been supplied. Never throws
|
|
813
|
+
* (the resolver fails open to an empty exclusion list).
|
|
814
|
+
*/
|
|
815
|
+
private applyToolRoutingExclusions;
|
|
816
|
+
/**
|
|
817
|
+
* Loads a bounded window of prior conversation turns for the router so a
|
|
818
|
+
* follow-up turn carries the context it needs to classify intent. Reads this
|
|
819
|
+
* turn's conversation memory (keyed by `context.sessionId`) with
|
|
820
|
+
* summarization disabled to keep the router cheap. Fails open to an empty
|
|
821
|
+
* list — routing then falls back to the current query alone (prior
|
|
822
|
+
* behaviour). On the first turn of a conversation memory may not be
|
|
823
|
+
* initialised yet; that also yields an empty list, which is fine since the
|
|
824
|
+
* opening message already carries its own context.
|
|
825
|
+
*/
|
|
826
|
+
private fetchRecentRoutingHistory;
|
|
827
|
+
/**
|
|
828
|
+
* Supplies (or replaces) the pre-call tool routing server catalog.
|
|
829
|
+
*
|
|
830
|
+
* For hosts that only know their tool servers after constructing NeuroLink
|
|
831
|
+
* (e.g. tools are registered per session/conversation). Routing must still
|
|
832
|
+
* be enabled via the constructor's `toolRouting.enabled` — setting servers
|
|
833
|
+
* alone does not activate it.
|
|
834
|
+
*/
|
|
835
|
+
setToolRoutingServers(servers: ToolRoutingServerDescriptor[]): void;
|
|
806
836
|
private validateStreamRequestOptions;
|
|
807
837
|
private maybeHandleWorkflowStreamRequest;
|
|
808
838
|
private runStandardStreamRequest;
|
package/dist/lib/neurolink.js
CHANGED
|
@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
|
|
|
28
28
|
import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
|
|
29
29
|
import { ContextBudgetExceededError } from "./context/errors.js";
|
|
30
30
|
import { repairToolPairs } from "./context/toolPairRepair.js";
|
|
31
|
-
import { SYSTEM_LIMITS } from "./core/constants.js";
|
|
31
|
+
import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
|
|
32
32
|
import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
|
|
33
|
+
import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
|
|
33
34
|
import { AIProviderFactory } from "./core/factory.js";
|
|
34
35
|
import { createToolEventPayload } from "./core/toolEvents.js";
|
|
35
36
|
import { ProviderRegistry } from "./factories/providerRegistry.js";
|
|
@@ -55,7 +56,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
|
|
|
55
56
|
import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
|
|
56
57
|
import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
|
|
57
58
|
import { SpanSerializer } from "./observability/utils/spanSerializer.js";
|
|
58
|
-
import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
|
|
59
|
+
import { flushOpenTelemetry, getLangfuseContext, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, stampGuestRescueIdentity, } from "./services/server/ai/observability/instrumentation.js";
|
|
59
60
|
import { TaskManager } from "./tasks/taskManager.js";
|
|
60
61
|
import { createTaskTools } from "./tasks/tools/taskTools.js";
|
|
61
62
|
import { ATTR } from "./telemetry/attributes.js";
|
|
@@ -436,6 +437,10 @@ export class NeuroLink {
|
|
|
436
437
|
conversationMemory;
|
|
437
438
|
conversationMemoryNeedsInit = false;
|
|
438
439
|
conversationMemoryConfig;
|
|
440
|
+
// Pre-call tool routing: instance-level config from the constructor.
|
|
441
|
+
// The server catalog inside it can be supplied/replaced later via
|
|
442
|
+
// setToolRoutingServers() for hosts that register tools after construction.
|
|
443
|
+
toolRoutingConfig;
|
|
439
444
|
// Add orchestration property
|
|
440
445
|
enableOrchestration;
|
|
441
446
|
// Authentication provider for secure access control
|
|
@@ -842,6 +847,12 @@ export class NeuroLink {
|
|
|
842
847
|
if (config?.modelChain) {
|
|
843
848
|
this.fallbackConfig.modelChain = config.modelChain;
|
|
844
849
|
}
|
|
850
|
+
if (config?.toolRouting) {
|
|
851
|
+
// Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
|
|
852
|
+
// can't leak into the caller's config object, which may be shared across
|
|
853
|
+
// multiple NeuroLink instances.
|
|
854
|
+
this.toolRoutingConfig = { ...config.toolRouting };
|
|
855
|
+
}
|
|
845
856
|
logger.setEventEmitter(this.emitter);
|
|
846
857
|
// Read tool cache duration from environment variables, with a default
|
|
847
858
|
const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
|
|
@@ -1378,11 +1389,8 @@ Current user's request: ${currentInput}`;
|
|
|
1378
1389
|
* Calls add(userId, content) which internally condenses old + new via LLM.
|
|
1379
1390
|
* Supports additional users with per-user prompt and maxWords overrides.
|
|
1380
1391
|
*/
|
|
1381
|
-
storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
|
|
1382
|
-
|
|
1383
|
-
// memory writes appear under the originating Langfuse trace instead of
|
|
1384
|
-
// becoming orphan spans.
|
|
1385
|
-
const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
|
|
1392
|
+
storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers, langfuseIdentity) {
|
|
1393
|
+
const memoryWrite = async () => {
|
|
1386
1394
|
try {
|
|
1387
1395
|
const client = this.ensureMemoryReady();
|
|
1388
1396
|
if (!client) {
|
|
@@ -1408,7 +1416,21 @@ Current user's request: ${currentInput}`;
|
|
|
1408
1416
|
catch (error) {
|
|
1409
1417
|
logger.warn("Memory storage failed:", error);
|
|
1410
1418
|
}
|
|
1411
|
-
}
|
|
1419
|
+
};
|
|
1420
|
+
// Carry the turn's identity across the setImmediate boundary so the
|
|
1421
|
+
// condensation generate + redis spans don't orphan to "guest". Keep the
|
|
1422
|
+
// ambient store when it survived (generate path — carries conversationId,
|
|
1423
|
+
// metadata, …); re-establish from the caller only when it was lost (stream
|
|
1424
|
+
// path, which fires after the caller consumed the stream).
|
|
1425
|
+
const ambient = getLangfuseContext();
|
|
1426
|
+
const wrappedMemoryWrite = !(ambient?.traceName || ambient?.userId) &&
|
|
1427
|
+
(langfuseIdentity?.traceName || langfuseIdentity?.sessionId)
|
|
1428
|
+
? () => setLangfuseContext({
|
|
1429
|
+
userId,
|
|
1430
|
+
sessionId: langfuseIdentity.sessionId ?? null,
|
|
1431
|
+
traceName: langfuseIdentity.traceName ?? null,
|
|
1432
|
+
}, memoryWrite)
|
|
1433
|
+
: runWithCurrentLangfuseContext(memoryWrite);
|
|
1412
1434
|
setImmediate(wrappedMemoryWrite);
|
|
1413
1435
|
}
|
|
1414
1436
|
/**
|
|
@@ -2801,7 +2823,15 @@ Current user's request: ${currentInput}`;
|
|
|
2801
2823
|
}
|
|
2802
2824
|
const startedAt = Date.now();
|
|
2803
2825
|
try {
|
|
2804
|
-
return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) =>
|
|
2826
|
+
return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => {
|
|
2827
|
+
// Capture root-ness before startActiveSpan makes generateSpan active.
|
|
2828
|
+
// The actual guest-rescue stamp is deferred to executeGenerateRequest,
|
|
2829
|
+
// AFTER prepareGenerateRequest merges auth/requestContext-derived
|
|
2830
|
+
// identity into options.context — otherwise an auth:{token} caller
|
|
2831
|
+
// with no pre-set context.userId would stamp the root span as guest.
|
|
2832
|
+
const generateIsRoot = !trace.getSpan(context.active());
|
|
2833
|
+
return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan, generateIsRoot));
|
|
2834
|
+
});
|
|
2805
2835
|
}
|
|
2806
2836
|
catch (error) {
|
|
2807
2837
|
// Lifecycle middleware (wrapGenerate.catch in builtin/lifecycle.ts)
|
|
@@ -2973,14 +3003,17 @@ Current user's request: ${currentInput}`;
|
|
|
2973
3003
|
return { error };
|
|
2974
3004
|
}
|
|
2975
3005
|
}
|
|
2976
|
-
async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
|
|
2977
|
-
return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
|
|
3006
|
+
async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan, isRootSpan) {
|
|
3007
|
+
return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan));
|
|
2978
3008
|
}
|
|
2979
|
-
async executeGenerateRequest(optionsOrPrompt, generateSpan) {
|
|
3009
|
+
async executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan) {
|
|
2980
3010
|
let resolvedOptions;
|
|
2981
3011
|
try {
|
|
2982
3012
|
const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
|
|
2983
3013
|
resolvedOptions = options;
|
|
3014
|
+
// Stamp now that prepareGenerateRequest has merged any auth/requestContext
|
|
3015
|
+
// identity into options.context (see capture of isRootSpan in generate()).
|
|
3016
|
+
stampGuestRescueIdentity(generateSpan, options.context, isRootSpan);
|
|
2984
3017
|
const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
|
|
2985
3018
|
if (earlyResult) {
|
|
2986
3019
|
generateSpan.setStatus({ code: SpanStatusCode.OK });
|
|
@@ -3545,7 +3578,7 @@ Current user's request: ${currentInput}`;
|
|
|
3545
3578
|
// Memory storage
|
|
3546
3579
|
if (this.shouldWriteMemory(options.memory, options.context?.userId, generateResult.content) &&
|
|
3547
3580
|
options.context?.userId) {
|
|
3548
|
-
this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers);
|
|
3581
|
+
this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers, options.context);
|
|
3549
3582
|
}
|
|
3550
3583
|
}
|
|
3551
3584
|
/**
|
|
@@ -5531,10 +5564,20 @@ Current user's request: ${currentInput}`;
|
|
|
5531
5564
|
[ATTR.NL_PROVIDER]: options.provider || "default",
|
|
5532
5565
|
[ATTR.GEN_AI_MODEL]: options.model || "default",
|
|
5533
5566
|
[ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
|
|
5534
|
-
|
|
5567
|
+
// Count registered custom tools too — chat hosts put their MCP tools
|
|
5568
|
+
// in the registry, so options.tools alone under-reports.
|
|
5569
|
+
[ATTR.NL_HAS_TOOLS]: !options.disableTools &&
|
|
5570
|
+
(!!(options.tools && Object.keys(options.tools).length > 0) ||
|
|
5571
|
+
this.getCustomTools().size > 0),
|
|
5535
5572
|
[ATTR.NL_STREAM_MODE]: true,
|
|
5536
5573
|
},
|
|
5537
5574
|
});
|
|
5575
|
+
// streamSpan isn't active yet, so context.active() is its parent — empty =
|
|
5576
|
+
// root. Capture root-ness here, but defer the actual guest-rescue stamp to
|
|
5577
|
+
// after validateStreamRequestOptions merges auth/requestContext identity
|
|
5578
|
+
// into options.context (below) — otherwise an auth:{token} caller with no
|
|
5579
|
+
// pre-set context.userId would stamp the root span as guest.
|
|
5580
|
+
const streamIsRoot = !trace.getSpan(context.active());
|
|
5538
5581
|
const spanStartTime = Date.now();
|
|
5539
5582
|
this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
|
|
5540
5583
|
try {
|
|
@@ -5576,6 +5619,8 @@ Current user's request: ${currentInput}`;
|
|
|
5576
5619
|
const originalPrompt = options.input?.text ?? "";
|
|
5577
5620
|
options.fileRegistry = this.fileRegistry;
|
|
5578
5621
|
await this.validateStreamRequestOptions(options, startTime);
|
|
5622
|
+
// options.context now carries any auth/requestContext-derived identity.
|
|
5623
|
+
stampGuestRescueIdentity(streamSpan, options.context, streamIsRoot);
|
|
5579
5624
|
const workflowResult = await this.maybeHandleWorkflowStreamRequest({
|
|
5580
5625
|
options,
|
|
5581
5626
|
startTime,
|
|
@@ -5585,6 +5630,19 @@ Current user's request: ${currentInput}`;
|
|
|
5585
5630
|
if (workflowResult) {
|
|
5586
5631
|
return workflowResult;
|
|
5587
5632
|
}
|
|
5633
|
+
// Make neurolink.stream the active span so every provider span (generations,
|
|
5634
|
+
// tool calls) parents under it — one Langfuse trace per turn, not a forest.
|
|
5635
|
+
const streamSpanContext = trace.setSpan(context.active(), streamSpan);
|
|
5636
|
+
// Pre-call tool routing: run inside the stream-span + Langfuse context so
|
|
5637
|
+
// the router's own generation span nests under this turn's trace instead
|
|
5638
|
+
// of starting a separate one. Asks a cheap router LLM which tool servers
|
|
5639
|
+
// the query needs and appends the unpicked servers' tools to
|
|
5640
|
+
// `excludeTools`. Fails open (no exclusions). Routes on the current
|
|
5641
|
+
// prompt enriched with a bounded window of recent conversation turns
|
|
5642
|
+
// (pulled from conversation memory) so contextless follow-ups still
|
|
5643
|
+
// classify correctly. After the workflow short-circuit, so workflow
|
|
5644
|
+
// streams skip it.
|
|
5645
|
+
await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
|
|
5588
5646
|
// TTS Mode 2 deferred: stream() emits text first, then synthesizes the
|
|
5589
5647
|
// accumulated response into a single audio chunk at end-of-stream and
|
|
5590
5648
|
// resolves `streamResult.audio` with the same TTSResult. The resolver is
|
|
@@ -5599,7 +5657,7 @@ Current user's request: ${currentInput}`;
|
|
|
5599
5657
|
resolveStreamTtsAudio = resolve;
|
|
5600
5658
|
})
|
|
5601
5659
|
: undefined;
|
|
5602
|
-
const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
|
|
5660
|
+
const streamResult = await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
|
|
5603
5661
|
options,
|
|
5604
5662
|
streamSpan,
|
|
5605
5663
|
spanStartTime,
|
|
@@ -5608,7 +5666,7 @@ Current user's request: ${currentInput}`;
|
|
|
5608
5666
|
streamId,
|
|
5609
5667
|
originalPrompt,
|
|
5610
5668
|
ttsResolver: resolveStreamTtsAudio,
|
|
5611
|
-
}));
|
|
5669
|
+
})));
|
|
5612
5670
|
if (streamSttTranscription) {
|
|
5613
5671
|
streamResult.transcription = streamSttTranscription;
|
|
5614
5672
|
}
|
|
@@ -5629,6 +5687,172 @@ Current user's request: ${currentInput}`;
|
|
|
5629
5687
|
throw error;
|
|
5630
5688
|
}
|
|
5631
5689
|
}
|
|
5690
|
+
/**
|
|
5691
|
+
* Pre-call tool routing for stream(): runs the router LLM once per turn
|
|
5692
|
+
* and appends the unpicked servers' registered tool names to
|
|
5693
|
+
* `options.excludeTools` — the per-call denylist enforced by
|
|
5694
|
+
* `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
|
|
5695
|
+
* is true and a non-empty server catalog has been supplied. Never throws
|
|
5696
|
+
* (the resolver fails open to an empty exclusion list).
|
|
5697
|
+
*/
|
|
5698
|
+
async applyToolRoutingExclusions(options, userQuery) {
|
|
5699
|
+
const routingConfig = this.toolRoutingConfig;
|
|
5700
|
+
if (!routingConfig?.enabled || options.disableTools) {
|
|
5701
|
+
return;
|
|
5702
|
+
}
|
|
5703
|
+
const servers = routingConfig.servers ?? [];
|
|
5704
|
+
if (servers.length === 0) {
|
|
5705
|
+
return;
|
|
5706
|
+
}
|
|
5707
|
+
// Whole setup is fail-open: catalog building (getCustomTools /
|
|
5708
|
+
// buildToolRoutingCatalog) and the router call degrade to no exclusions
|
|
5709
|
+
// rather than killing the stream, honoring this method's "never throws"
|
|
5710
|
+
// contract. Genuine stream cancellations still propagate.
|
|
5711
|
+
try {
|
|
5712
|
+
const registeredToolNames = Array.from(this.getCustomTools().keys());
|
|
5713
|
+
const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
|
|
5714
|
+
if (catalog.length === 0) {
|
|
5715
|
+
return;
|
|
5716
|
+
}
|
|
5717
|
+
// Fold a bounded window of recent conversation turns into the routing query.
|
|
5718
|
+
// The router runs pre-memory and would otherwise see only this turn's raw
|
|
5719
|
+
// text, so a contextless follow-up ("yes please") gives it nothing to
|
|
5720
|
+
// classify — it fails open and routing narrows nothing. The main model
|
|
5721
|
+
// still receives full history later via conversation memory; this only
|
|
5722
|
+
// enriches the router's view. Fails open to the current query alone.
|
|
5723
|
+
const recentMessages = await this.fetchRecentRoutingHistory(options);
|
|
5724
|
+
const routingQuery = recentMessages.length > 0
|
|
5725
|
+
? buildRoutingQueryFromHistory(recentMessages, userQuery)
|
|
5726
|
+
: userQuery;
|
|
5727
|
+
// The router call below re-enters the public generate(), whose finally
|
|
5728
|
+
// block resets _disableToolCacheForCurrentRequest to false. That flag is
|
|
5729
|
+
// stream-scoped (set at the top of this turn) and read by the main tool
|
|
5730
|
+
// execution path that runs after routing, so save it before the router
|
|
5731
|
+
// call and restore it afterward to keep the turn's cache setting intact.
|
|
5732
|
+
const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
|
|
5733
|
+
let routedExcludeTools;
|
|
5734
|
+
try {
|
|
5735
|
+
routedExcludeTools = await resolveToolRoutingExclusions({
|
|
5736
|
+
catalog,
|
|
5737
|
+
alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
|
|
5738
|
+
userQuery: routingQuery,
|
|
5739
|
+
routerPromptPrefix: routingConfig.routerPromptPrefix,
|
|
5740
|
+
routerModel: {
|
|
5741
|
+
provider: routingConfig.routerModel?.provider ??
|
|
5742
|
+
options.provider,
|
|
5743
|
+
model: routingConfig.routerModel?.model ?? options.model,
|
|
5744
|
+
region: routingConfig.routerModel?.region ?? options.region,
|
|
5745
|
+
temperature: routingConfig.routerModel?.temperature,
|
|
5746
|
+
},
|
|
5747
|
+
timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
|
|
5748
|
+
// Forward the stream's abort signal so a cancelled stream aborts the
|
|
5749
|
+
// router call promptly instead of waiting out the routing timeout.
|
|
5750
|
+
generateFn: (generateOptions) => this.generate({
|
|
5751
|
+
...generateOptions,
|
|
5752
|
+
abortSignal: options.abortSignal,
|
|
5753
|
+
}),
|
|
5754
|
+
});
|
|
5755
|
+
}
|
|
5756
|
+
finally {
|
|
5757
|
+
this._disableToolCacheForCurrentRequest =
|
|
5758
|
+
cacheDisabledForCurrentRequest;
|
|
5759
|
+
}
|
|
5760
|
+
// Aborted during the router call — skip applying now-stale exclusions;
|
|
5761
|
+
// the main generation path enforces the abort itself.
|
|
5762
|
+
if (options.abortSignal?.aborted) {
|
|
5763
|
+
return;
|
|
5764
|
+
}
|
|
5765
|
+
if (routedExcludeTools.length > 0) {
|
|
5766
|
+
options.excludeTools = [
|
|
5767
|
+
...(options.excludeTools ?? []),
|
|
5768
|
+
...routedExcludeTools,
|
|
5769
|
+
];
|
|
5770
|
+
}
|
|
5771
|
+
}
|
|
5772
|
+
catch (error) {
|
|
5773
|
+
if (isAbortError(error)) {
|
|
5774
|
+
throw error;
|
|
5775
|
+
}
|
|
5776
|
+
logger.warn("[ToolRouting] Routing setup failed, failing open", {
|
|
5777
|
+
error: error instanceof Error ? error.message : String(error),
|
|
5778
|
+
});
|
|
5779
|
+
}
|
|
5780
|
+
}
|
|
5781
|
+
/**
|
|
5782
|
+
* Loads a bounded window of prior conversation turns for the router so a
|
|
5783
|
+
* follow-up turn carries the context it needs to classify intent. Reads this
|
|
5784
|
+
* turn's conversation memory (keyed by `context.sessionId`) with
|
|
5785
|
+
* summarization disabled to keep the router cheap. Fails open to an empty
|
|
5786
|
+
* list — routing then falls back to the current query alone (prior
|
|
5787
|
+
* behaviour). On the first turn of a conversation memory may not be
|
|
5788
|
+
* initialised yet; that also yields an empty list, which is fine since the
|
|
5789
|
+
* opening message already carries its own context.
|
|
5790
|
+
*/
|
|
5791
|
+
async fetchRecentRoutingHistory(options) {
|
|
5792
|
+
try {
|
|
5793
|
+
const requestContext = options.context;
|
|
5794
|
+
// Inline multi-turn callers pass prior turns via options.conversationMessages
|
|
5795
|
+
// (the same field the main model reads) rather than server-side session
|
|
5796
|
+
// memory. Honor it directly so a contextless follow-up still routes with
|
|
5797
|
+
// context even when no sessionId is present.
|
|
5798
|
+
if (options.conversationMessages &&
|
|
5799
|
+
options.conversationMessages.length > 0) {
|
|
5800
|
+
return options.conversationMessages;
|
|
5801
|
+
}
|
|
5802
|
+
const sessionId = requestContext?.sessionId;
|
|
5803
|
+
if (typeof sessionId !== "string" || !sessionId) {
|
|
5804
|
+
return [];
|
|
5805
|
+
}
|
|
5806
|
+
// The pre-call router runs earlier in the stream pipeline than the main
|
|
5807
|
+
// generation path's own memory init (initializeConversationMemoryForGeneration),
|
|
5808
|
+
// so this.conversationMemory is still undefined at router time and the
|
|
5809
|
+
// router would only ever see the current turn. Trigger the same lazy init
|
|
5810
|
+
// the main path uses — it is idempotent, so the later call is a no-op —
|
|
5811
|
+
// so the router can read prior turns. Fails open via the surrounding catch.
|
|
5812
|
+
await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
|
|
5813
|
+
const memory = this.conversationMemory;
|
|
5814
|
+
if (!memory) {
|
|
5815
|
+
return [];
|
|
5816
|
+
}
|
|
5817
|
+
// Reuse the SAME reader the main model uses so the router sees identically
|
|
5818
|
+
// curated history: polluted turns dropped, read instrumented under the
|
|
5819
|
+
// neurolink.conversation.getMessages span. enableSummarization=false keeps
|
|
5820
|
+
// routing cheap and free of any summary-LLM side effect. The remaining
|
|
5821
|
+
// tool_call/tool_result turns are dropped at transcript-render time
|
|
5822
|
+
// (buildRoutingQueryFromHistory) to mirror what the main model is sent.
|
|
5823
|
+
const messages = await getConversationMessages(memory, {
|
|
5824
|
+
...options,
|
|
5825
|
+
enableSummarization: false,
|
|
5826
|
+
});
|
|
5827
|
+
logger.debug("[ToolRouting] Loaded conversation history for router", {
|
|
5828
|
+
sessionId,
|
|
5829
|
+
messageCount: messages.length,
|
|
5830
|
+
});
|
|
5831
|
+
return messages;
|
|
5832
|
+
}
|
|
5833
|
+
catch (error) {
|
|
5834
|
+
logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
|
|
5835
|
+
error: error instanceof Error ? error.message : String(error),
|
|
5836
|
+
});
|
|
5837
|
+
return [];
|
|
5838
|
+
}
|
|
5839
|
+
}
|
|
5840
|
+
/**
|
|
5841
|
+
* Supplies (or replaces) the pre-call tool routing server catalog.
|
|
5842
|
+
*
|
|
5843
|
+
* For hosts that only know their tool servers after constructing NeuroLink
|
|
5844
|
+
* (e.g. tools are registered per session/conversation). Routing must still
|
|
5845
|
+
* be enabled via the constructor's `toolRouting.enabled` — setting servers
|
|
5846
|
+
* alone does not activate it.
|
|
5847
|
+
*/
|
|
5848
|
+
setToolRoutingServers(servers) {
|
|
5849
|
+
if (!this.toolRoutingConfig) {
|
|
5850
|
+
logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
|
|
5851
|
+
this.toolRoutingConfig = { enabled: false, servers };
|
|
5852
|
+
return;
|
|
5853
|
+
}
|
|
5854
|
+
this.toolRoutingConfig.servers = servers;
|
|
5855
|
+
}
|
|
5632
5856
|
async validateStreamRequestOptions(options, startTime) {
|
|
5633
5857
|
await this.validateStreamInput(options);
|
|
5634
5858
|
this.enforceSessionBudget(options.maxBudgetUsd);
|
|
@@ -6512,7 +6736,7 @@ Current user's request: ${currentInput}`;
|
|
|
6512
6736
|
}
|
|
6513
6737
|
}
|
|
6514
6738
|
if (this.shouldWriteMemory(enhancedOptions.memory, enhancedOptions.context?.userId, accumulatedContent)) {
|
|
6515
|
-
this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers);
|
|
6739
|
+
this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers, enhancedOptions.context);
|
|
6516
6740
|
}
|
|
6517
6741
|
}
|
|
6518
6742
|
/**
|