@juspay/neurolink 9.57.0 → 9.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -146,6 +146,36 @@ function mcpCategoryToErrorCategory(mcpCategory) {
146
146
  * For example, a NOT_FOUND error for a model causes 6 retries of a 418KB
147
147
  * message, wasting ~628,000 tokens and adding 10+ seconds of latency.
148
148
  */
149
+ /**
150
+ * Curator P2-3: detect model-access-denied without requiring the typed
151
+ * ModelAccessDeniedError class to be present (Issue #1 ships that class
152
+ * separately). Matches LiteLLM "team not allowed" / "team can only access
153
+ * models=[...]" plus typed-error markers when present.
154
+ */
155
+ function looksLikeModelAccessDenied(error) {
156
+ if (!error) {
157
+ return false;
158
+ }
159
+ const e = error;
160
+ if (e.name === "ModelAccessDeniedError") {
161
+ return true;
162
+ }
163
+ if (e.code === "MODEL_ACCESS_DENIED") {
164
+ return true;
165
+ }
166
+ const msg = typeof e.message === "string"
167
+ ? e.message
168
+ : error instanceof Error
169
+ ? error.message
170
+ : String(error);
171
+ if (!msg) {
172
+ return false;
173
+ }
174
+ const lower = msg.toLowerCase();
175
+ return ((lower.includes("team") && lower.includes("not allowed")) ||
176
+ lower.includes("team can only access") ||
177
+ /not\s+allowed\s+to\s+access\s+(this\s+)?model/i.test(msg));
178
+ }
149
179
  function isNonRetryableProviderError(error) {
150
180
  // Check for typed error classes from providers
151
181
  if (error instanceof InvalidModelError) {
@@ -334,6 +364,9 @@ export class NeuroLink {
334
364
  authInitPromise;
335
365
  // Per-provider credential overrides (instance-level default)
336
366
  credentials;
367
+ // Curator P2-3: instance-level fallback policy. Read by
368
+ // runWithFallbackOrchestration on model-access-denied.
369
+ fallbackConfig = {};
337
370
  /**
338
371
  * Merge instance-level credentials with per-call credentials.
339
372
  *
@@ -721,6 +754,14 @@ export class NeuroLink {
721
754
  if (config?.modelAliasConfig) {
722
755
  this.modelAliasConfig = config.modelAliasConfig;
723
756
  }
757
+ // Curator P2-3: capture fallback policy. Per-call options can still
758
+ // override, but these are the instance-level defaults.
759
+ if (config?.providerFallback) {
760
+ this.fallbackConfig.providerFallback = config.providerFallback;
761
+ }
762
+ if (config?.modelChain) {
763
+ this.fallbackConfig.modelChain = config.modelChain;
764
+ }
724
765
  logger.setEventEmitter(this.emitter);
725
766
  // Read tool cache duration from environment variables, with a default
726
767
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -2324,11 +2365,26 @@ Current user's request: ${currentInput}`;
2324
2365
  if (traceCtx) {
2325
2366
  span.parentSpanId = traceCtx.parentSpanId;
2326
2367
  }
2327
- // Mark failed generations with ERROR status so metrics count them correctly
2328
- const spanStatus = data.success === false || data.error
2329
- ? SpanStatus.ERROR
2330
- : SpanStatus.OK;
2331
- span = SpanSerializer.endSpan(span, spanStatus, data.error ? String(data.error) : undefined);
2368
+ // Mark failed generations with ERROR status so metrics count them
2369
+ // correctly. Client aborts (data.aborted === true) are NOT failures —
2370
+ // they are user-initiated cancellations and must not pollute the
2371
+ // failure rate. Map them to WARNING with the canonical
2372
+ // "Generation aborted by client" message (matches the Langfuse
2373
+ // ContextEnricher mapping for outer/internal generation spans).
2374
+ let spanStatus;
2375
+ let statusMessage;
2376
+ if (data.aborted === true) {
2377
+ spanStatus = SpanStatus.WARNING;
2378
+ statusMessage = "Generation aborted by client";
2379
+ }
2380
+ else if (data.success === false || data.error) {
2381
+ spanStatus = SpanStatus.ERROR;
2382
+ statusMessage = data.error ? String(data.error) : undefined;
2383
+ }
2384
+ else {
2385
+ spanStatus = SpanStatus.OK;
2386
+ }
2387
+ span = SpanSerializer.endSpan(span, spanStatus, statusMessage);
2332
2388
  span.durationMs = responseTime;
2333
2389
  // G2 fix: Check finishReason and escalate to WARNING for partial failures
2334
2390
  const finishReason = result?.finishReason ??
@@ -2654,7 +2710,121 @@ Current user's request: ${currentInput}`;
2654
2710
  * @since 1.0.0
2655
2711
  */
2656
2712
  async generate(optionsOrPrompt) {
2657
- return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan));
2713
+ return this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
2714
+ }
2715
+ /**
2716
+ * Curator P2-3: wraps a generate/stream call with the fallback
2717
+ * orchestration (`providerFallback` callback + `modelChain` walker).
2718
+ *
2719
+ * On a model-access-denied error from the inner call:
2720
+ * 1. Resolve the effective callback (per-call > instance > synthesised
2721
+ * from modelChain) and the effective chain (per-call > instance).
2722
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
2723
+ * `model.fallback` event → re-call inner with the new {provider,
2724
+ * model}.
2725
+ * 3. Stop on first success, on a callback returning null, or after
2726
+ * exhausting the chain (throw the most recent error).
2727
+ */
2728
+ async runWithFallbackOrchestration(optionsOrPrompt, kind, inner) {
2729
+ const initialAttempt = await this.attemptInner(inner, optionsOrPrompt);
2730
+ if ("ok" in initialAttempt) {
2731
+ return initialAttempt.ok;
2732
+ }
2733
+ let lastError = initialAttempt.error;
2734
+ if (!looksLikeModelAccessDenied(lastError)) {
2735
+ throw lastError;
2736
+ }
2737
+ // Build the chain orchestration.
2738
+ const requestedProvider = (typeof optionsOrPrompt === "object"
2739
+ ? optionsOrPrompt.provider
2740
+ : undefined);
2741
+ const requestedModel = (typeof optionsOrPrompt === "object"
2742
+ ? optionsOrPrompt.model
2743
+ : undefined);
2744
+ const callOpts = typeof optionsOrPrompt === "object"
2745
+ ? optionsOrPrompt
2746
+ : {};
2747
+ const perCallCallback = callOpts.providerFallback;
2748
+ const perCallChain = callOpts.modelChain;
2749
+ const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
2750
+ const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
2751
+ if (!effectiveCallback && !effectiveChain) {
2752
+ throw lastError;
2753
+ }
2754
+ // Synthesise a callback from modelChain if no explicit callback exists.
2755
+ const chainCursor = { i: 0, list: effectiveChain ?? [] };
2756
+ const synthesizedFromChain = async () => {
2757
+ while (chainCursor.i < chainCursor.list.length) {
2758
+ const next = chainCursor.list[chainCursor.i++];
2759
+ if (next !== requestedModel) {
2760
+ return { model: next };
2761
+ }
2762
+ }
2763
+ return null;
2764
+ };
2765
+ const callback = effectiveCallback ?? synthesizedFromChain;
2766
+ let attempts = 0;
2767
+ const maxAttempts = (effectiveChain?.length ?? 0) + 5;
2768
+ let attemptedRequestedModel = requestedModel;
2769
+ while (attempts++ < maxAttempts) {
2770
+ let next;
2771
+ try {
2772
+ next = await callback(lastError);
2773
+ }
2774
+ catch (cbErr) {
2775
+ logger.warn("[NeuroLink] providerFallback callback threw", {
2776
+ error: cbErr instanceof Error ? cbErr.message : String(cbErr),
2777
+ });
2778
+ throw lastError;
2779
+ }
2780
+ if (!next) {
2781
+ throw lastError;
2782
+ }
2783
+ // Emit model.fallback event so cost/audit listeners can record it.
2784
+ try {
2785
+ this.emitter.emit("model.fallback", {
2786
+ requestedProvider,
2787
+ requestedModel: attemptedRequestedModel,
2788
+ fallbackProvider: next.provider ?? requestedProvider,
2789
+ fallbackModel: next.model,
2790
+ reason: lastError instanceof Error ? lastError.message : String(lastError),
2791
+ kind,
2792
+ timestamp: Date.now(),
2793
+ });
2794
+ }
2795
+ catch {
2796
+ /* listener errors are non-fatal */
2797
+ }
2798
+ const retriedOptions = typeof optionsOrPrompt === "object"
2799
+ ? {
2800
+ ...optionsOrPrompt,
2801
+ ...(next.provider && { provider: next.provider }),
2802
+ ...(next.model && { model: next.model }),
2803
+ // Strip the fallback hooks so the retry doesn't re-orchestrate.
2804
+ providerFallback: undefined,
2805
+ modelChain: undefined,
2806
+ }
2807
+ : optionsOrPrompt;
2808
+ const retryAttempt = await this.attemptInner(inner, retriedOptions);
2809
+ if ("ok" in retryAttempt) {
2810
+ return retryAttempt.ok;
2811
+ }
2812
+ lastError = retryAttempt.error;
2813
+ attemptedRequestedModel = next.model ?? attemptedRequestedModel;
2814
+ if (!looksLikeModelAccessDenied(lastError)) {
2815
+ throw lastError;
2816
+ }
2817
+ }
2818
+ throw lastError;
2819
+ }
2820
+ async attemptInner(inner, options) {
2821
+ try {
2822
+ const ok = await inner(options);
2823
+ return { ok };
2824
+ }
2825
+ catch (error) {
2826
+ return { error };
2827
+ }
2658
2828
  }
2659
2829
  async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2660
2830
  return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
@@ -2674,10 +2844,22 @@ Current user's request: ${currentInput}`;
2674
2844
  return result;
2675
2845
  }
2676
2846
  catch (error) {
2677
- generateSpan.setStatus({
2678
- code: SpanStatusCode.ERROR,
2679
- message: error instanceof Error ? error.message : String(error),
2680
- });
2847
+ // Match the inner-span discrimination: client aborts are user-initiated
2848
+ // cancellations, not faults. Mark with finishReason=aborted and skip
2849
+ // ERROR status so ContextEnricher routes the outer trace to
2850
+ // langfuse.level=WARNING (matches Curator telemetry-gaps Issue 5a). All
2851
+ // other errors keep the existing ERROR status + recordException pair.
2852
+ if (isAbortError(error)) {
2853
+ generateSpan.setAttribute("ai.finishReason", "aborted");
2854
+ generateSpan.setAttribute("neurolink.aborted", true);
2855
+ }
2856
+ else {
2857
+ generateSpan.recordException(error instanceof Error ? error : new Error(String(error)));
2858
+ generateSpan.setStatus({
2859
+ code: SpanStatusCode.ERROR,
2860
+ message: error instanceof Error ? error.message : String(error),
2861
+ });
2862
+ }
2681
2863
  // G7 fix: Distinguish context overflow errors with dedicated attributes
2682
2864
  if (error instanceof ContextBudgetExceededError) {
2683
2865
  generateSpan.setAttribute("neurolink.error.type", "context_overflow");
@@ -2972,6 +3154,11 @@ Current user's request: ${currentInput}`;
2972
3154
  const errModel = typeof optionsOrPrompt === "object"
2973
3155
  ? optionsOrPrompt.model || "unknown"
2974
3156
  : "unknown";
3157
+ // Distinguish client aborts from real failures so consumers (and Langfuse)
3158
+ // can route them differently. `aborted: true` is additive — `success`
3159
+ // remains false for backwards-compat with existing listeners that only
3160
+ // branch on the boolean.
3161
+ const aborted = isAbortError(error);
2975
3162
  try {
2976
3163
  this.emitter.emit("generation:end", {
2977
3164
  provider: errProvider,
@@ -2979,6 +3166,7 @@ Current user's request: ${currentInput}`;
2979
3166
  responseTime: 0,
2980
3167
  error: error instanceof Error ? error.message : String(error),
2981
3168
  success: false,
3169
+ aborted,
2982
3170
  });
2983
3171
  }
2984
3172
  catch (emitError) {
@@ -3326,10 +3514,23 @@ Current user's request: ${currentInput}`;
3326
3514
  return await this.runGenerateTextInternalFlow(options, internalSpan, context);
3327
3515
  }
3328
3516
  catch (error) {
3329
- internalSpan.setStatus({
3330
- code: SpanStatusCode.ERROR,
3331
- message: error instanceof Error ? error.message : String(error),
3332
- });
3517
+ // Client aborts are user-initiated cancellations, not system faults.
3518
+ // Setting status=ERROR forces Langfuse to level=ERROR (see
3519
+ // ContextEnricher.onEnd instrumentation.ts:691). Instead leave status
3520
+ // unset and stamp ai.finishReason=aborted so applyNonErrorLangfuseLevel
3521
+ // maps it to level=WARNING with the canonical "Generation aborted by
3522
+ // client" status_message. Matches Curator telemetry-gaps Issue 5a.
3523
+ if (isAbortError(error)) {
3524
+ internalSpan.setAttribute("ai.finishReason", "aborted");
3525
+ internalSpan.setAttribute("neurolink.aborted", true);
3526
+ }
3527
+ else {
3528
+ internalSpan.recordException(error instanceof Error ? error : new Error(String(error)));
3529
+ internalSpan.setStatus({
3530
+ code: SpanStatusCode.ERROR,
3531
+ message: error instanceof Error ? error.message : String(error),
3532
+ });
3533
+ }
3333
3534
  throw error;
3334
3535
  }
3335
3536
  finally {
@@ -3385,6 +3586,13 @@ Current user's request: ${currentInput}`;
3385
3586
  if (recoveredResult) {
3386
3587
  return recoveredResult;
3387
3588
  }
3589
+ // Convert raw DOMException AbortErrors (and other untyped abort shapes)
3590
+ // into NeuroLinkError(ABORT) so callers can branch on
3591
+ // `error.category === ErrorCategory.ABORT` instead of message matching.
3592
+ // Skipped if the error is already a typed abort to avoid double-wrap.
3593
+ if (isAbortError(error) && !(error instanceof NeuroLinkError)) {
3594
+ throw ErrorFactory.aborted(error instanceof Error ? error : new Error(String(error)));
3595
+ }
3388
3596
  throw error;
3389
3597
  }
3390
3598
  }
@@ -3442,28 +3650,24 @@ Current user's request: ${currentInput}`;
3442
3650
  return recoveredResult;
3443
3651
  }
3444
3652
  if (isAbortError(error)) {
3445
- logger.info(`[${context.functionTag}] Generation aborted storing conversation turn for title generation`, {
3653
+ // Aborted generations DO NOT write to conversation memory.
3654
+ // Fabricating an assistant turn out of an error condition (the previous
3655
+ // "[generation was interrupted]" sentinel) pollutes the next prompt and
3656
+ // — at the right shape — causes the model to echo the sentinel as its
3657
+ // response. See Curator SI-069 / SI-071. Aborts are signalled to
3658
+ // callers via the thrown error and the "error" emitter event below;
3659
+ // there is nothing to persist, so persisting nothing is correct.
3660
+ //
3661
+ // Title generation continues to work: it reads the user message of the
3662
+ // first *successful* turn (RedisConversationMemoryManager
3663
+ // .generateConversationTitle) and never required a fabricated assistant
3664
+ // turn — the previous comment claiming otherwise was inaccurate.
3665
+ logger.info(`[${context.functionTag}] Generation aborted — skipping memory write (aborts must not pollute conversation history)`, {
3446
3666
  hasMemory: !!this.conversationMemory,
3447
3667
  memoryType: this.conversationMemory?.constructor?.name || "NONE",
3448
3668
  sessionId: options.context?.sessionId ||
3449
3669
  "unknown",
3450
3670
  });
3451
- try {
3452
- const abortedResult = {
3453
- content: "[generation was interrupted]",
3454
- provider: options.provider || "unknown",
3455
- model: options.model || "unknown",
3456
- responseTime: Date.now() - context.generateInternalStartTime,
3457
- };
3458
- await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(context.generateInternalStartTime), context.requestId), 5000);
3459
- }
3460
- catch (storeError) {
3461
- logger.warn(`[${context.functionTag}] Failed to store conversation turn after abort`, {
3462
- error: storeError instanceof Error
3463
- ? storeError.message
3464
- : String(storeError),
3465
- });
3466
- }
3467
3671
  }
3468
3672
  else {
3469
3673
  logger.error(`[${context.functionTag}] All generation methods failed`, {
@@ -3471,7 +3675,14 @@ Current user's request: ${currentInput}`;
3471
3675
  });
3472
3676
  }
3473
3677
  this.emitter.emit("response:end", "");
3474
- this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3678
+ // Node EventEmitter rethrows the original error from emit("error", e) if
3679
+ // there is no listener registered, which would short-circuit the caller's
3680
+ // catch block and prevent the abort-typed-error wrap from running. Only
3681
+ // emit when a consumer is listening; non-listening callers receive the
3682
+ // error via the thrown rejection instead, which is the canonical path.
3683
+ if (this.emitter.listenerCount("error") > 0) {
3684
+ this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3685
+ }
3475
3686
  return null;
3476
3687
  }
3477
3688
  async tryRecoverGenerateTextOverflow(options, functionTag, error) {
@@ -4510,7 +4721,128 @@ Current user's request: ${currentInput}`;
4510
4721
  : [],
4511
4722
  optionKeys: Object.keys(options),
4512
4723
  });
4513
- return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...options }));
4724
+ return this.streamWithIterationFallback(options);
4725
+ }
4726
+ /**
4727
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
4728
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
4729
+ * `createLiteLLMTransformedStream`). The standard
4730
+ * `runWithFallbackOrchestration` only catches errors thrown while the
4731
+ * `StreamResult` is being created — once we hand the iterator back to
4732
+ * the caller, errors raised during consumption used to bypass
4733
+ * `providerFallback` / `modelChain`.
4734
+ *
4735
+ * This wrapper runs the orchestration to get an initial StreamResult,
4736
+ * then wraps `result.stream` so that:
4737
+ * - chunks are forwarded transparently while consumption succeeds
4738
+ * - if iteration throws a model-access-denied error AND no chunks
4739
+ * have been yielded yet, we resolve the next fallback target,
4740
+ * emit `model.fallback`, and recurse
4741
+ * - if chunks were already yielded, the error propagates (mid-stream
4742
+ * recovery isn't safe — the consumer has half a response)
4743
+ */
4744
+ async streamWithIterationFallback(options) {
4745
+ const result = await this.runWithFallbackOrchestration(options, "stream", (opts) => metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...opts })));
4746
+ const callOpts = options;
4747
+ const perCallCallback = callOpts.providerFallback;
4748
+ const perCallChain = callOpts.modelChain;
4749
+ const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
4750
+ const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
4751
+ if (!effectiveCallback && !effectiveChain) {
4752
+ // No fallback configured — nothing to wrap.
4753
+ return result;
4754
+ }
4755
+ // Build a chain cursor scoped to this stream's lifetime; consumers
4756
+ // who set up `modelChain` get sequential progression here too.
4757
+ const chainCursor = {
4758
+ i: 0,
4759
+ list: effectiveChain ?? [],
4760
+ requestedModel: options.model,
4761
+ };
4762
+ const callback = effectiveCallback ??
4763
+ (async () => {
4764
+ while (chainCursor.i < chainCursor.list.length) {
4765
+ const next = chainCursor.list[chainCursor.i++];
4766
+ if (next !== chainCursor.requestedModel) {
4767
+ return { model: next };
4768
+ }
4769
+ }
4770
+ return null;
4771
+ });
4772
+ const self = this;
4773
+ // Yield type is the original stream's element type, threaded through
4774
+ // as unknown — we forward chunks unchanged so structural identity is
4775
+ // preserved without a local type alias (CLAUDE.md rule 2).
4776
+ const wrappedStream = (async function* () {
4777
+ let yielded = 0;
4778
+ let currentResult = result;
4779
+ let attemptedRequestedProvider = options.provider;
4780
+ let attemptedRequestedModel = options.model;
4781
+ const maxAttempts = (effectiveChain?.length ?? 0) + 5;
4782
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
4783
+ try {
4784
+ for await (const chunk of currentResult.stream) {
4785
+ yielded++;
4786
+ yield chunk;
4787
+ }
4788
+ return;
4789
+ }
4790
+ catch (err) {
4791
+ if (yielded > 0 || !looksLikeModelAccessDenied(err)) {
4792
+ throw err;
4793
+ }
4794
+ let next;
4795
+ try {
4796
+ next = await callback(err);
4797
+ }
4798
+ catch (cbErr) {
4799
+ logger.warn("[NeuroLink.stream] providerFallback callback threw during iteration", {
4800
+ error: cbErr instanceof Error ? cbErr.message : String(cbErr),
4801
+ });
4802
+ throw err;
4803
+ }
4804
+ if (!next) {
4805
+ throw err;
4806
+ }
4807
+ try {
4808
+ self.emitter.emit("model.fallback", {
4809
+ requestedProvider: attemptedRequestedProvider,
4810
+ requestedModel: attemptedRequestedModel,
4811
+ fallbackProvider: next.provider ?? attemptedRequestedProvider,
4812
+ fallbackModel: next.model,
4813
+ reason: err instanceof Error ? err.message : String(err),
4814
+ kind: "stream",
4815
+ phase: "iteration",
4816
+ timestamp: Date.now(),
4817
+ });
4818
+ }
4819
+ catch {
4820
+ /* listener errors are non-fatal */
4821
+ }
4822
+ const retriedOptions = {
4823
+ ...options,
4824
+ ...(next.provider && {
4825
+ provider: next.provider,
4826
+ }),
4827
+ ...(next.model && { model: next.model }),
4828
+ // Strip the hooks so the inner orchestration doesn't double-fall-back.
4829
+ providerFallback: undefined,
4830
+ modelChain: undefined,
4831
+ };
4832
+ attemptedRequestedProvider =
4833
+ next.provider ?? attemptedRequestedProvider;
4834
+ attemptedRequestedModel = next.model ?? attemptedRequestedModel;
4835
+ currentResult = await metricsTraceContextStorage.run(self.createMetricsTraceContext(), () => self.executeStreamRequest({ ...retriedOptions }));
4836
+ }
4837
+ }
4838
+ // Exhausted attempts — re-throw the most recent error captured by
4839
+ // the inner loop. We only get here if the loop didn't return.
4840
+ throw new Error(`[NeuroLink.stream] iteration fallback exhausted ${maxAttempts} attempts`);
4841
+ })();
4842
+ return {
4843
+ ...result,
4844
+ stream: wrappedStream,
4845
+ };
4514
4846
  }
4515
4847
  async executeStreamRequest(options) {
4516
4848
  // Dynamic argument resolution — resolve any function-valued options before downstream use
@@ -5701,8 +6033,12 @@ Current user's request: ${currentInput}`;
5701
6033
  * **Generation Events:**
5702
6034
  * - `generation:start` - Fired when text generation begins
5703
6035
  * - `{ provider: string, timestamp: number }`
5704
- * - `generation:end` - Fired when text generation completes
5705
- * - `{ provider: string, responseTime: number, toolsUsed?: string[], timestamp: number }`
6036
+ * - `generation:end` - Fired when text generation completes (or fails / is aborted)
6037
+ * - `{ provider: string, responseTime: number, toolsUsed?: string[], timestamp: number, success?: boolean, aborted?: boolean, error?: string }`
6038
+ * - `success` is `false` for both failures and client aborts; `aborted: true`
6039
+ * distinguishes the latter so consumers can route cancellations
6040
+ * differently from real errors. Pipeline B's metrics span maps
6041
+ * `aborted: true` events to `SpanStatus.WARNING` (not ERROR).
5706
6042
  *
5707
6043
  * **Streaming Events:**
5708
6044
  * - `stream:start` - Fired when streaming begins
@@ -6643,7 +6979,13 @@ Current user's request: ${currentInput}`;
6643
6979
  prepared.metrics.errorCategories[category] =
6644
6980
  (prepared.metrics.errorCategories[category] || 0) + 1;
6645
6981
  this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined, structuredError);
6646
- this.emitter.emit("error", structuredError);
6982
+ // Gate on listenerCount: Node EventEmitter rethrows the original error
6983
+ // from emit("error", e) when no listener is registered, which would
6984
+ // short-circuit the surrounding flow and surface as an unhandled
6985
+ // rejection. Same pattern as handleGenerateTextInternalFailure.
6986
+ if (this.emitter.listenerCount("error") > 0) {
6987
+ this.emitter.emit("error", structuredError);
6988
+ }
6647
6989
  structuredError = new NeuroLinkError({
6648
6990
  ...structuredError,
6649
6991
  context: {
@@ -6806,13 +7148,19 @@ Current user's request: ${currentInput}`;
6806
7148
  result.success === false) {
6807
7149
  const errorMessage = result.error || "Tool execution failed";
6808
7150
  const errorToEmit = new Error(errorMessage);
6809
- this.emitter.emit("error", errorToEmit);
7151
+ // Gate on listenerCount — see handleGenerateTextInternalFailure for
7152
+ // the rationale (Node EventEmitter rethrows on no listener).
7153
+ if (this.emitter.listenerCount("error") > 0) {
7154
+ this.emitter.emit("error", errorToEmit);
7155
+ }
6810
7156
  }
6811
7157
  return result;
6812
7158
  }
6813
7159
  catch (error) {
6814
7160
  const errorToEmit = error instanceof Error ? error : new Error(String(error));
6815
- this.emitter.emit("error", errorToEmit);
7161
+ if (this.emitter.listenerCount("error") > 0) {
7162
+ this.emitter.emit("error", errorToEmit);
7163
+ }
6816
7164
  // Check if tool was not found
6817
7165
  if (error instanceof Error && error.message.includes("not found")) {
6818
7166
  const availableTools = await this.getAllAvailableTools();
@@ -21,6 +21,16 @@ export type NeuroLinkConfig = {
21
21
  configVersion?: string;
22
22
  [key: string]: unknown;
23
23
  };
24
+ /**
25
+ * Curator P2-3: callback signature for centralized fallback policy. Invoked
26
+ * when a generate/stream call fails with what looks like a model-access-denied
27
+ * error. Return `{ provider, model }` (either / both optional) to drive a
28
+ * retry; return `null` to bubble the original error untouched.
29
+ */
30
+ export type ProviderFallbackCallback = (error: unknown) => Promise<{
31
+ provider?: string;
32
+ model?: string;
33
+ } | null>;
24
34
  /**
25
35
  * Configuration object for NeuroLink constructor.
26
36
  */
@@ -43,6 +53,19 @@ export type NeurolinkConstructorConfig = {
43
53
  * from this NeuroLink instance. Per-call credentials override these.
44
54
  */
45
55
  credentials?: NeurolinkCredentials;
56
+ /**
57
+ * Curator P2-3: callback invoked on model-access-denied. Lets a host (e.g.
58
+ * Curator) centrally drive fallback policy. The callback receives the
59
+ * original error and returns the next `{ provider, model }` to try, or
60
+ * `null` to bubble the error.
61
+ */
62
+ providerFallback?: ProviderFallbackCallback;
63
+ /**
64
+ * Curator P2-3: ordered list of model names to try in sequence on
65
+ * model-access-denied. Sugar over `providerFallback`. The current
66
+ * provider is preserved across the chain; only the model name changes.
67
+ */
68
+ modelChain?: string[];
46
69
  };
47
70
  /**
48
71
  * Configuration for MCP enhancement modules wired into generate()/stream() paths.
@@ -447,6 +447,19 @@ export type GenerateOptions = {
447
447
  * Unset providers fall through to instance credentials, then environment variables.
448
448
  */
449
449
  credentials?: NeurolinkCredentials;
450
+ /**
451
+ * Curator P2-3: per-call fallback callback. Overrides any
452
+ * instance-level `providerFallback` set on `new NeuroLink({...})`.
453
+ */
454
+ providerFallback?: (error: unknown) => Promise<{
455
+ provider?: string;
456
+ model?: string;
457
+ } | null>;
458
+ /**
459
+ * Curator P2-3: per-call ordered model chain. Overrides any
460
+ * instance-level `modelChain`. Tried in order on model-access-denied.
461
+ */
462
+ modelChain?: string[];
450
463
  /**
451
464
  * Per-call memory control.
452
465
  *
@@ -445,6 +445,19 @@ export type StreamOptions = {
445
445
  * Unset providers fall through to instance credentials, then environment variables.
446
446
  */
447
447
  credentials?: NeurolinkCredentials;
448
+ /**
449
+ * Curator P2-3: per-call fallback callback. Overrides any
450
+ * instance-level `providerFallback` set on `new NeuroLink({...})`.
451
+ */
452
+ providerFallback?: (error: unknown) => Promise<{
453
+ provider?: string;
454
+ model?: string;
455
+ } | null>;
456
+ /**
457
+ * Curator P2-3: per-call ordered model chain. Overrides any
458
+ * instance-level `modelChain`. Tried in order on model-access-denied.
459
+ */
460
+ modelChain?: string[];
448
461
  /**
449
462
  * Per-call memory control.
450
463
  *
@@ -5,6 +5,16 @@
5
5
  import type { ConversationMemoryManager } from "../core/conversationMemoryManager.js";
6
6
  import type { RedisConversationMemoryManager } from "../core/redisConversationMemoryManager.js";
7
7
  import type { ChatMessage, ConversationMemoryConfig, SessionMemory, TextGenerationOptions, TextGenerationResult } from "../types/index.js";
8
+ /**
9
+ * Legacy sentinel string formerly written by the abort branch of
10
+ * handleGenerateTextInternalFailure (Curator SI-069 / SI-071). The producer is
11
+ * removed in this fix, but historical Redis sessions may still contain entries
12
+ * with this content. Filtered at the prompt-builder boundary so they never
13
+ * reach the provider — sessions self-heal on the next read without any
14
+ * migration. Keep in sync with any future renames; do not remove without a
15
+ * cross-repo grep.
16
+ */
17
+ export declare const ABORT_LEGACY_SENTINEL = "[generation was interrupted]";
8
18
  /**
9
19
  * Apply conversation memory defaults to user configuration
10
20
  * Merges user config with environment variables and default values