@juspay/neurolink 9.57.0 → 9.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,85 @@ import { getAvailableInputTokens } from "../constants/contextWindows.js";
10
10
  import { buildSummarizationPrompt } from "../context/prompts/summarizationPrompt.js";
11
11
  import { logger } from "./logger.js";
12
12
  const memoryTracer = tracers.memory;
13
+ /**
14
+ * Legacy sentinel string formerly written by the abort branch of
15
+ * handleGenerateTextInternalFailure (Curator SI-069 / SI-071). The producer is
16
+ * removed in this fix, but historical Redis sessions may still contain entries
17
+ * with this content. Filtered at the prompt-builder boundary so they never
18
+ * reach the provider — sessions self-heal on the next read without any
19
+ * migration. Keep in sync with any future renames; do not remove without a
20
+ * cross-repo grep.
21
+ */
22
+ export const ABORT_LEGACY_SENTINEL = "[generation was interrupted]";
23
+ /**
24
+ * Tracks session IDs that have already emitted the
25
+ * "Dropped polluted assistant turns" warn log so we log once per session
26
+ * (not on every retrieval). The span attribute
27
+ * `neurolink.memory.polluted_turns_dropped` is still set every call, so
28
+ * Langfuse traces show the cleanup happening continuously even after the
29
+ * log is suppressed. Bounded to avoid unbounded growth on busy services —
30
+ * when capacity is reached the set is cleared (cheap) and warning resumes
31
+ * as if those sessions are new, which is acceptable behaviour.
32
+ */
33
+ const POLLUTED_WARN_DEDUP_MAX = 1024;
34
+ const pollutedWarnedSessions = new Set();
35
+ /**
36
+ * True if a stored assistant turn looks like it was carrying tool activity
37
+ * (and is therefore safe to keep even with empty text content). storeTurn
38
+ * paths historically populate one of several fields depending on which
39
+ * provider/codepath wrote it, so this checks all of them. Mirrored across
40
+ * read filter + storage guard for symmetry.
41
+ *
42
+ * - `msg.events` — stream-path event sequence (`tool:start`, `tool:end`)
43
+ * - `msg.tool` / `msg.args` — assistant turn that invoked a tool by name
44
+ * - `msg.result` — tool result attached to the assistant turn
45
+ *
46
+ * If none of these are set, the assistant turn is text-only.
47
+ *
48
+ * Named with the `message` prefix to avoid shadowing the local
49
+ * `hasToolActivity` boolean inside `storeConversationTurn` below — the two
50
+ * answer different questions (one inspects a stored message, the other
51
+ * inspects a live result object).
52
+ */
53
+ function messageHasToolActivity(msg) {
54
+ if (msg.tool || msg.args || msg.result) {
55
+ return true;
56
+ }
57
+ const events = msg.events;
58
+ if (!Array.isArray(events)) {
59
+ return false;
60
+ }
61
+ return events.some((e) => {
62
+ const type = e?.type;
63
+ return type === "tool:start" || type === "tool:end";
64
+ });
65
+ }
66
+ /**
67
+ * Decides whether an assistant turn loaded from conversation memory is safe to
68
+ * include in the prompt sent to the provider. Drops:
69
+ * - empty / whitespace-only text content with no tool activity
70
+ * - the legacy abort sentinel — but only when the turn carries no tool
71
+ * activity, mirroring the storeConversationTurn upper-layer guard so a
72
+ * hypothetical tool-call-then-aborted turn doesn't lose its tool half
73
+ * tool_call and tool_result role messages are always preserved — they
74
+ * legitimately carry empty `content` (see redisConversationMemoryManager.ts:1870
75
+ * "Can be empty for tool calls"). Filtering them would break tool-pair
76
+ * semantics that downstream `repairToolPairs` relies on.
77
+ */
78
+ function isPollutedAssistantTurn(msg) {
79
+ if (msg.role !== "assistant") {
80
+ return false;
81
+ }
82
+ const content = typeof msg.content === "string" ? msg.content : "";
83
+ const trimmed = content.trim();
84
+ if (trimmed === ABORT_LEGACY_SENTINEL) {
85
+ return !messageHasToolActivity(msg);
86
+ }
87
+ if (trimmed === "") {
88
+ return !messageHasToolActivity(msg);
89
+ }
90
+ return false;
91
+ }
13
92
  // Cached NeuroLink instance for summarization to avoid creating a new instance per call
14
93
  let cachedSummarizer = null;
15
94
  /**
@@ -66,12 +145,49 @@ export async function getConversationMessages(conversationMemory, options) {
66
145
  span.setAttribute("user.id", userId);
67
146
  }
68
147
  const enableSummarization = options.enableSummarization ?? undefined;
69
- const messages = await conversationMemory.buildContextMessages(sessionId, userId, enableSummarization);
148
+ const rawMessages = await conversationMemory.buildContextMessages(sessionId, userId, enableSummarization);
149
+ // Read-time filter: drop assistant turns that are empty/whitespace or
150
+ // carry the legacy abort sentinel before they reach the provider.
151
+ // Self-heals historical Redis sessions polluted by the now-removed
152
+ // abort-path memory write (Curator SI-069 / SI-071) and defends
153
+ // against any future "fabricate-on-error" regression. Telemetry
154
+ // attributes record how many turns were dropped so polluted sessions
155
+ // are visible in Langfuse traces.
156
+ const messages = rawMessages.filter((msg) => !isPollutedAssistantTurn(msg));
157
+ const droppedCount = rawMessages.length - messages.length;
158
+ if (droppedCount > 0) {
159
+ // Span attribute is always set so polluted sessions stay visible in
160
+ // Langfuse traces on every read — that's the persistent debugging
161
+ // signal. The warn log is deduped per session so a long-lived
162
+ // polluted conversation only generates one log line, not one per
163
+ // turn (would otherwise be noisy at scale).
164
+ span.setAttribute("neurolink.memory.polluted_turns_dropped", droppedCount);
165
+ const alreadyWarned = pollutedWarnedSessions.has(sessionId);
166
+ if (!alreadyWarned) {
167
+ if (pollutedWarnedSessions.size >= POLLUTED_WARN_DEDUP_MAX) {
168
+ pollutedWarnedSessions.clear();
169
+ }
170
+ pollutedWarnedSessions.add(sessionId);
171
+ logger.warn("[conversationMemoryUtils] Dropped polluted assistant turns from prompt context (logged once per session — span attribute records every read)", {
172
+ sessionId,
173
+ droppedCount,
174
+ remainingCount: messages.length,
175
+ });
176
+ }
177
+ else {
178
+ logger.debug("[conversationMemoryUtils] Dropped polluted assistant turns (warn already logged for this session)", {
179
+ sessionId,
180
+ droppedCount,
181
+ remainingCount: messages.length,
182
+ });
183
+ }
184
+ }
70
185
  span.setAttribute("message.count", messages.length);
71
186
  if (logger.shouldLog("debug")) {
72
187
  logger.debug("[conversationMemoryUtils] Conversation messages retrieved successfully", {
73
188
  sessionId,
74
189
  messageCount: messages.length,
190
+ droppedPollutedCount: droppedCount,
75
191
  messageTypes: messages.map((m) => m.role),
76
192
  });
77
193
  }
@@ -147,6 +263,19 @@ export async function storeConversationTurn(conversationMemory, originalOptions,
147
263
  });
148
264
  return;
149
265
  }
266
+ // Belt-and-braces guard against the abort sentinel (Curator SI-069 / SI-071).
267
+ // The abort path itself was fixed in handleGenerateTextInternalFailure to
268
+ // never call this function, but we reject the legacy sentinel here too so a
269
+ // future regression cannot re-introduce the same pollution. Tool-bearing
270
+ // turns are explicitly preserved (the model may call a tool then abort).
271
+ if (aiResponse.trim() === ABORT_LEGACY_SENTINEL && !hasToolActivity) {
272
+ logger.warn("[conversationMemoryUtils] Refusing to store legacy abort sentinel — see Curator SI-069 / SI-071", {
273
+ sessionId,
274
+ userId,
275
+ userMessageLength: userMessage.length,
276
+ });
277
+ return;
278
+ }
150
279
  let providerDetails;
151
280
  if (result.provider && result.model) {
152
281
  providerDetails = {
@@ -154,6 +283,60 @@ export async function storeConversationTurn(conversationMemory, originalOptions,
154
283
  model: result.model,
155
284
  };
156
285
  }
286
+ // Persist a minimal `events` marker only on tool-bearing assistant turns
287
+ // whose surface text would otherwise trigger the read-time filter (empty /
288
+ // whitespace-only content). Turns that already have substantive text are
289
+ // never dropped by isPollutedAssistantTurn, so attaching synthesised events
290
+ // to them would change the stored shape and token estimation for no
291
+ // benefit. Sentinel-content turns never reach this point — the upper-layer
292
+ // guard at line 340 short-circuits them.
293
+ let toolActivityEvents;
294
+ if (hasToolActivity && !aiResponse.trim()) {
295
+ const now = Date.now();
296
+ const usedNames = new Set();
297
+ if (Array.isArray(result.toolsUsed)) {
298
+ for (const t of result.toolsUsed) {
299
+ if (typeof t === "string" && t) {
300
+ usedNames.add(t);
301
+ }
302
+ }
303
+ }
304
+ if (Array.isArray(result.toolExecutions)) {
305
+ for (const exec of result.toolExecutions) {
306
+ const name = exec?.toolName;
307
+ if (typeof name === "string" && name) {
308
+ usedNames.add(name);
309
+ }
310
+ }
311
+ }
312
+ toolActivityEvents = [];
313
+ let seq = 0;
314
+ for (const name of usedNames) {
315
+ // Match the canonical ToolExecutionEvent shape (src/lib/types/tools.ts):
316
+ // `tool` is the required field, `toolName` is the documented compat
317
+ // alias. Populate both so downstream consumers reading either name
318
+ // work uniformly.
319
+ toolActivityEvents.push({
320
+ type: "tool:start",
321
+ seq: seq++,
322
+ timestamp: now,
323
+ tool: name,
324
+ toolName: name,
325
+ });
326
+ }
327
+ if (toolActivityEvents.length === 0) {
328
+ // Tool activity reported but no names extractable — still leave a
329
+ // marker so retrieval doesn't drop the turn. Both `tool` and
330
+ // `toolName` are populated for the same compat reason.
331
+ toolActivityEvents.push({
332
+ type: "tool:start",
333
+ seq: 0,
334
+ timestamp: now,
335
+ tool: "unknown",
336
+ toolName: "unknown",
337
+ });
338
+ }
339
+ }
157
340
  await memoryTracer.startActiveSpan("neurolink.conversation.storeTurn", {
158
341
  kind: SpanKind.INTERNAL,
159
342
  attributes: {
@@ -174,6 +357,7 @@ export async function storeConversationTurn(conversationMemory, originalOptions,
174
357
  providerDetails,
175
358
  enableSummarization: originalOptions.enableSummarization,
176
359
  requestId,
360
+ events: toolActivityEvents,
177
361
  tokenUsage: result.usage
178
362
  ? {
179
363
  inputTokens: result.usage.input,
@@ -17,6 +17,7 @@ export declare const ERROR_CODES: {
17
17
  readonly PROVIDER_NOT_AVAILABLE: "PROVIDER_NOT_AVAILABLE";
18
18
  readonly PROVIDER_AUTH_FAILED: "PROVIDER_AUTH_FAILED";
19
19
  readonly PROVIDER_QUOTA_EXCEEDED: "PROVIDER_QUOTA_EXCEEDED";
20
+ readonly OPERATION_ABORTED: "OPERATION_ABORTED";
20
21
  readonly INVALID_CONFIGURATION: "INVALID_CONFIGURATION";
21
22
  readonly MISSING_CONFIGURATION: "MISSING_CONFIGURATION";
22
23
  readonly INVALID_VIDEO_RESOLUTION: "INVALID_VIDEO_RESOLUTION";
@@ -106,6 +107,18 @@ export declare class ErrorFactory {
106
107
  * Create a memory exhaustion error
107
108
  */
108
109
  static memoryExhausted(toolName: string, memoryUsageMB: number): NeuroLinkError;
110
+ /**
111
+ * Create a typed abort error preserving the originating exception. Callers
112
+ * can switch on `error.category === ErrorCategory.ABORT` and
113
+ * `error.code === ERROR_CODES.OPERATION_ABORTED` instead of message-string
114
+ * matching DOMException / AI SDK error wrappers.
115
+ *
116
+ * `error.name` is intentionally set to "AbortError" (overriding the default
117
+ * "NeuroLinkError") so existing callers that branch on
118
+ * `err.name === "AbortError"` keep working without code changes — the new
119
+ * structured fields (category, code, retriable) are additive.
120
+ */
121
+ static aborted(originalError?: Error): NeuroLinkError;
109
122
  /**
110
123
  * Create a missing configuration error (e.g., missing API key)
111
124
  */
@@ -23,6 +23,8 @@ export const ERROR_CODES = {
23
23
  PROVIDER_NOT_AVAILABLE: "PROVIDER_NOT_AVAILABLE",
24
24
  PROVIDER_AUTH_FAILED: "PROVIDER_AUTH_FAILED",
25
25
  PROVIDER_QUOTA_EXCEEDED: "PROVIDER_QUOTA_EXCEEDED",
26
+ // Cancellation
27
+ OPERATION_ABORTED: "OPERATION_ABORTED",
26
28
  // Configuration errors
27
29
  INVALID_CONFIGURATION: "INVALID_CONFIGURATION",
28
30
  MISSING_CONFIGURATION: "MISSING_CONFIGURATION",
@@ -201,6 +203,30 @@ export class ErrorFactory {
201
203
  toolName,
202
204
  });
203
205
  }
206
+ /**
207
+ * Create a typed abort error preserving the originating exception. Callers
208
+ * can switch on `error.category === ErrorCategory.ABORT` and
209
+ * `error.code === ERROR_CODES.OPERATION_ABORTED` instead of message-string
210
+ * matching DOMException / AI SDK error wrappers.
211
+ *
212
+ * `error.name` is intentionally set to "AbortError" (overriding the default
213
+ * "NeuroLinkError") so existing callers that branch on
214
+ * `err.name === "AbortError"` keep working without code changes — the new
215
+ * structured fields (category, code, retriable) are additive.
216
+ */
217
+ static aborted(originalError) {
218
+ const err = new NeuroLinkError({
219
+ code: ERROR_CODES.OPERATION_ABORTED,
220
+ message: originalError?.message || "The operation was aborted",
221
+ category: ErrorCategory.ABORT,
222
+ severity: ErrorSeverity.LOW,
223
+ retriable: false,
224
+ context: {},
225
+ originalError,
226
+ });
227
+ err.name = "AbortError";
228
+ return err;
229
+ }
204
230
  // ============================================================================
205
231
  // CONFIGURATION ERRORS
206
232
  // ============================================================================
@@ -904,6 +930,11 @@ export function isAbortError(error) {
904
930
  if (error instanceof Error && error.name === "AbortError") {
905
931
  return true;
906
932
  }
933
+ // Typed NeuroLinkError abort - canonical from-now-on shape.
934
+ if (error instanceof NeuroLinkError &&
935
+ error.category === ErrorCategory.ABORT) {
936
+ return true;
937
+ }
907
938
  if (error instanceof Error &&
908
939
  (error.message?.includes("This operation was aborted") ||
909
940
  error.message?.includes("The operation was aborted") ||
@@ -60,6 +60,7 @@ export declare class NeuroLink {
60
60
  private pendingAuthConfig?;
61
61
  private authInitPromise?;
62
62
  private credentials?;
63
+ private readonly fallbackConfig;
63
64
  /**
64
65
  * Merge instance-level credentials with per-call credentials.
65
66
  *
@@ -541,6 +542,21 @@ export declare class NeuroLink {
541
542
  * @since 1.0.0
542
543
  */
543
544
  generate(optionsOrPrompt: GenerateOptions | DynamicOptions | string): Promise<GenerateResult>;
545
+ /**
546
+ * Curator P2-3: wraps a generate/stream call with the fallback
547
+ * orchestration (`providerFallback` callback + `modelChain` walker).
548
+ *
549
+ * On a model-access-denied error from the inner call:
550
+ * 1. Resolve the effective callback (per-call > instance > synthesised
551
+ * from modelChain) and the effective chain (per-call > instance).
552
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
553
+ * `model.fallback` event → re-call inner with the new {provider,
554
+ * model}.
555
+ * 3. Stop on first success, on a callback returning null, or after
556
+ * exhausting the chain (throw the most recent error).
557
+ */
558
+ private runWithFallbackOrchestration;
559
+ private attemptInner;
544
560
  private executeGenerateWithMetricsContext;
545
561
  private executeGenerateRequest;
546
562
  private prepareGenerateRequest;
@@ -697,6 +713,25 @@ export declare class NeuroLink {
697
713
  * @throws {Error} When conversation memory operations fail (if enabled)
698
714
  */
699
715
  stream(options: StreamOptions | DynamicOptions): Promise<StreamResult>;
716
+ /**
717
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
718
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
719
+ * `createLiteLLMTransformedStream`). The standard
720
+ * `runWithFallbackOrchestration` only catches errors thrown while the
721
+ * `StreamResult` is being created — once we hand the iterator back to
722
+ * the caller, errors raised during consumption used to bypass
723
+ * `providerFallback` / `modelChain`.
724
+ *
725
+ * This wrapper runs the orchestration to get an initial StreamResult,
726
+ * then wraps `result.stream` so that:
727
+ * - chunks are forwarded transparently while consumption succeeds
728
+ * - if iteration throws a model-access-denied error AND no chunks
729
+ * have been yielded yet, we resolve the next fallback target,
730
+ * emit `model.fallback`, and recurse
731
+ * - if chunks were already yielded, the error propagates (mid-stream
732
+ * recovery isn't safe — the consumer has half a response)
733
+ */
734
+ private streamWithIterationFallback;
700
735
  private executeStreamRequest;
701
736
  private validateStreamRequestOptions;
702
737
  private maybeHandleWorkflowStreamRequest;
@@ -881,8 +916,12 @@ export declare class NeuroLink {
881
916
  * **Generation Events:**
882
917
  * - `generation:start` - Fired when text generation begins
883
918
  * - `{ provider: string, timestamp: number }`
884
- * - `generation:end` - Fired when text generation completes
885
- * - `{ provider: string, responseTime: number, toolsUsed?: string[], timestamp: number }`
919
+ * - `generation:end` - Fired when text generation completes (or fails / is aborted)
920
+ * - `{ provider: string, responseTime: number, toolsUsed?: string[], timestamp: number, success?: boolean, aborted?: boolean, error?: string }`
921
+ * - `success` is `false` for both failures and client aborts; `aborted: true`
922
+ * distinguishes the latter so consumers can route cancellations
923
+ * differently from real errors. Pipeline B's metrics span maps
924
+ * `aborted: true` events to `SpanStatus.WARNING` (not ERROR).
886
925
  *
887
926
  * **Streaming Events:**
888
927
  * - `stream:start` - Fired when streaming begins