@juspay/neurolink 9.57.1 → 9.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,6 +60,7 @@ export declare class NeuroLink {
60
60
  private pendingAuthConfig?;
61
61
  private authInitPromise?;
62
62
  private credentials?;
63
+ private readonly fallbackConfig;
63
64
  /**
64
65
  * Merge instance-level credentials with per-call credentials.
65
66
  *
@@ -541,6 +542,21 @@ export declare class NeuroLink {
541
542
  * @since 1.0.0
542
543
  */
543
544
  generate(optionsOrPrompt: GenerateOptions | DynamicOptions | string): Promise<GenerateResult>;
545
+ /**
546
+ * Curator P2-3: wraps a generate/stream call with the fallback
547
+ * orchestration (`providerFallback` callback + `modelChain` walker).
548
+ *
549
+ * On a model-access-denied error from the inner call:
550
+ * 1. Resolve the effective callback (per-call > instance > synthesised
551
+ * from modelChain) and the effective chain (per-call > instance).
552
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
553
+ * `model.fallback` event → re-call inner with the new {provider,
554
+ * model}.
555
+ * 3. Stop on first success, on a callback returning null, or after
556
+ * exhausting the chain (throw the most recent error).
557
+ */
558
+ private runWithFallbackOrchestration;
559
+ private attemptInner;
544
560
  private executeGenerateWithMetricsContext;
545
561
  private executeGenerateRequest;
546
562
  private prepareGenerateRequest;
@@ -697,6 +713,25 @@ export declare class NeuroLink {
697
713
  * @throws {Error} When conversation memory operations fail (if enabled)
698
714
  */
699
715
  stream(options: StreamOptions | DynamicOptions): Promise<StreamResult>;
716
+ /**
717
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
718
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
719
+ * `createLiteLLMTransformedStream`). The standard
720
+ * `runWithFallbackOrchestration` only catches errors thrown while the
721
+ * `StreamResult` is being created — once we hand the iterator back to
722
+ * the caller, errors raised during consumption used to bypass
723
+ * `providerFallback` / `modelChain`.
724
+ *
725
+ * This wrapper runs the orchestration to get an initial StreamResult,
726
+ * then wraps `result.stream` so that:
727
+ * - chunks are forwarded transparently while consumption succeeds
728
+ * - if iteration throws a model-access-denied error AND no chunks
729
+ * have been yielded yet, we resolve the next fallback target,
730
+ * emit `model.fallback`, and recurse
731
+ * - if chunks were already yielded, the error propagates (mid-stream
732
+ * recovery isn't safe — the consumer has half a response)
733
+ */
734
+ private streamWithIterationFallback;
700
735
  private executeStreamRequest;
701
736
  private validateStreamRequestOptions;
702
737
  private maybeHandleWorkflowStreamRequest;
@@ -146,6 +146,36 @@ function mcpCategoryToErrorCategory(mcpCategory) {
146
146
  * For example, a NOT_FOUND error for a model causes 6 retries of a 418KB
147
147
  * message, wasting ~628,000 tokens and adding 10+ seconds of latency.
148
148
  */
149
+ /**
150
+ * Curator P2-3: detect model-access-denied without requiring the typed
151
+ * ModelAccessDeniedError class to be present (Issue #1 ships that class
152
+ * separately). Matches LiteLLM "team not allowed" / "team can only access
153
+ * models=[...]" plus typed-error markers when present.
154
+ */
155
+ function looksLikeModelAccessDenied(error) {
156
+ if (!error) {
157
+ return false;
158
+ }
159
+ const e = error;
160
+ if (e.name === "ModelAccessDeniedError") {
161
+ return true;
162
+ }
163
+ if (e.code === "MODEL_ACCESS_DENIED") {
164
+ return true;
165
+ }
166
+ const msg = typeof e.message === "string"
167
+ ? e.message
168
+ : error instanceof Error
169
+ ? error.message
170
+ : String(error);
171
+ if (!msg) {
172
+ return false;
173
+ }
174
+ const lower = msg.toLowerCase();
175
+ return ((lower.includes("team") && lower.includes("not allowed")) ||
176
+ lower.includes("team can only access") ||
177
+ /not\s+allowed\s+to\s+access\s+(this\s+)?model/i.test(msg));
178
+ }
149
179
  function isNonRetryableProviderError(error) {
150
180
  // Check for typed error classes from providers
151
181
  if (error instanceof InvalidModelError) {
@@ -334,6 +364,9 @@ export class NeuroLink {
334
364
  authInitPromise;
335
365
  // Per-provider credential overrides (instance-level default)
336
366
  credentials;
367
+ // Curator P2-3: instance-level fallback policy. Read by
368
+ // runWithFallbackOrchestration on model-access-denied.
369
+ fallbackConfig = {};
337
370
  /**
338
371
  * Merge instance-level credentials with per-call credentials.
339
372
  *
@@ -721,6 +754,14 @@ export class NeuroLink {
721
754
  if (config?.modelAliasConfig) {
722
755
  this.modelAliasConfig = config.modelAliasConfig;
723
756
  }
757
+ // Curator P2-3: capture fallback policy. Per-call options can still
758
+ // override, but these are the instance-level defaults.
759
+ if (config?.providerFallback) {
760
+ this.fallbackConfig.providerFallback = config.providerFallback;
761
+ }
762
+ if (config?.modelChain) {
763
+ this.fallbackConfig.modelChain = config.modelChain;
764
+ }
724
765
  logger.setEventEmitter(this.emitter);
725
766
  // Read tool cache duration from environment variables, with a default
726
767
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -2669,7 +2710,121 @@ Current user's request: ${currentInput}`;
2669
2710
  * @since 1.0.0
2670
2711
  */
2671
2712
  async generate(optionsOrPrompt) {
2672
- return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan));
2713
+ return this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
2714
+ }
2715
+ /**
2716
+ * Curator P2-3: wraps a generate/stream call with the fallback
2717
+ * orchestration (`providerFallback` callback + `modelChain` walker).
2718
+ *
2719
+ * On a model-access-denied error from the inner call:
2720
+ * 1. Resolve the effective callback (per-call > instance > synthesised
2721
+ * from modelChain) and the effective chain (per-call > instance).
2722
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
2723
+ * `model.fallback` event → re-call inner with the new {provider,
2724
+ * model}.
2725
+ * 3. Stop on first success, on a callback returning null, or after
2726
+ * exhausting the chain (throw the most recent error).
2727
+ */
2728
+ async runWithFallbackOrchestration(optionsOrPrompt, kind, inner) {
2729
+ const initialAttempt = await this.attemptInner(inner, optionsOrPrompt);
2730
+ if ("ok" in initialAttempt) {
2731
+ return initialAttempt.ok;
2732
+ }
2733
+ let lastError = initialAttempt.error;
2734
+ if (!looksLikeModelAccessDenied(lastError)) {
2735
+ throw lastError;
2736
+ }
2737
+ // Build the chain orchestration.
2738
+ const requestedProvider = (typeof optionsOrPrompt === "object"
2739
+ ? optionsOrPrompt.provider
2740
+ : undefined);
2741
+ const requestedModel = (typeof optionsOrPrompt === "object"
2742
+ ? optionsOrPrompt.model
2743
+ : undefined);
2744
+ const callOpts = typeof optionsOrPrompt === "object"
2745
+ ? optionsOrPrompt
2746
+ : {};
2747
+ const perCallCallback = callOpts.providerFallback;
2748
+ const perCallChain = callOpts.modelChain;
2749
+ const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
2750
+ const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
2751
+ if (!effectiveCallback && !effectiveChain) {
2752
+ throw lastError;
2753
+ }
2754
+ // Synthesise a callback from modelChain if no explicit callback exists.
2755
+ const chainCursor = { i: 0, list: effectiveChain ?? [] };
2756
+ const synthesizedFromChain = async () => {
2757
+ while (chainCursor.i < chainCursor.list.length) {
2758
+ const next = chainCursor.list[chainCursor.i++];
2759
+ if (next !== requestedModel) {
2760
+ return { model: next };
2761
+ }
2762
+ }
2763
+ return null;
2764
+ };
2765
+ const callback = effectiveCallback ?? synthesizedFromChain;
2766
+ let attempts = 0;
2767
+ const maxAttempts = (effectiveChain?.length ?? 0) + 5;
2768
+ let attemptedRequestedModel = requestedModel;
2769
+ while (attempts++ < maxAttempts) {
2770
+ let next;
2771
+ try {
2772
+ next = await callback(lastError);
2773
+ }
2774
+ catch (cbErr) {
2775
+ logger.warn("[NeuroLink] providerFallback callback threw", {
2776
+ error: cbErr instanceof Error ? cbErr.message : String(cbErr),
2777
+ });
2778
+ throw lastError;
2779
+ }
2780
+ if (!next) {
2781
+ throw lastError;
2782
+ }
2783
+ // Emit model.fallback event so cost/audit listeners can record it.
2784
+ try {
2785
+ this.emitter.emit("model.fallback", {
2786
+ requestedProvider,
2787
+ requestedModel: attemptedRequestedModel,
2788
+ fallbackProvider: next.provider ?? requestedProvider,
2789
+ fallbackModel: next.model,
2790
+ reason: lastError instanceof Error ? lastError.message : String(lastError),
2791
+ kind,
2792
+ timestamp: Date.now(),
2793
+ });
2794
+ }
2795
+ catch {
2796
+ /* listener errors are non-fatal */
2797
+ }
2798
+ const retriedOptions = typeof optionsOrPrompt === "object"
2799
+ ? {
2800
+ ...optionsOrPrompt,
2801
+ ...(next.provider && { provider: next.provider }),
2802
+ ...(next.model && { model: next.model }),
2803
+ // Strip the fallback hooks so the retry doesn't re-orchestrate.
2804
+ providerFallback: undefined,
2805
+ modelChain: undefined,
2806
+ }
2807
+ : optionsOrPrompt;
2808
+ const retryAttempt = await this.attemptInner(inner, retriedOptions);
2809
+ if ("ok" in retryAttempt) {
2810
+ return retryAttempt.ok;
2811
+ }
2812
+ lastError = retryAttempt.error;
2813
+ attemptedRequestedModel = next.model ?? attemptedRequestedModel;
2814
+ if (!looksLikeModelAccessDenied(lastError)) {
2815
+ throw lastError;
2816
+ }
2817
+ }
2818
+ throw lastError;
2819
+ }
2820
+ async attemptInner(inner, options) {
2821
+ try {
2822
+ const ok = await inner(options);
2823
+ return { ok };
2824
+ }
2825
+ catch (error) {
2826
+ return { error };
2827
+ }
2673
2828
  }
2674
2829
  async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2675
2830
  return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
@@ -4566,7 +4721,128 @@ Current user's request: ${currentInput}`;
4566
4721
  : [],
4567
4722
  optionKeys: Object.keys(options),
4568
4723
  });
4569
- return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...options }));
4724
+ return this.streamWithIterationFallback(options);
4725
+ }
4726
+ /**
4727
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
4728
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
4729
+ * `createLiteLLMTransformedStream`). The standard
4730
+ * `runWithFallbackOrchestration` only catches errors thrown while the
4731
+ * `StreamResult` is being created — once we hand the iterator back to
4732
+ * the caller, errors raised during consumption used to bypass
4733
+ * `providerFallback` / `modelChain`.
4734
+ *
4735
+ * This wrapper runs the orchestration to get an initial StreamResult,
4736
+ * then wraps `result.stream` so that:
4737
+ * - chunks are forwarded transparently while consumption succeeds
4738
+ * - if iteration throws a model-access-denied error AND no chunks
4739
+ * have been yielded yet, we resolve the next fallback target,
4740
+ * emit `model.fallback`, and recurse
4741
+ * - if chunks were already yielded, the error propagates (mid-stream
4742
+ * recovery isn't safe — the consumer has half a response)
4743
+ */
4744
+ async streamWithIterationFallback(options) {
4745
+ const result = await this.runWithFallbackOrchestration(options, "stream", (opts) => metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...opts })));
4746
+ const callOpts = options;
4747
+ const perCallCallback = callOpts.providerFallback;
4748
+ const perCallChain = callOpts.modelChain;
4749
+ const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
4750
+ const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
4751
+ if (!effectiveCallback && !effectiveChain) {
4752
+ // No fallback configured — nothing to wrap.
4753
+ return result;
4754
+ }
4755
+ // Build a chain cursor scoped to this stream's lifetime; consumers
4756
+ // who set up `modelChain` get sequential progression here too.
4757
+ const chainCursor = {
4758
+ i: 0,
4759
+ list: effectiveChain ?? [],
4760
+ requestedModel: options.model,
4761
+ };
4762
+ const callback = effectiveCallback ??
4763
+ (async () => {
4764
+ while (chainCursor.i < chainCursor.list.length) {
4765
+ const next = chainCursor.list[chainCursor.i++];
4766
+ if (next !== chainCursor.requestedModel) {
4767
+ return { model: next };
4768
+ }
4769
+ }
4770
+ return null;
4771
+ });
4772
+ const self = this;
4773
+ // Yield type is the original stream's element type, threaded through
4774
+ // as unknown — we forward chunks unchanged so structural identity is
4775
+ // preserved without a local type alias (CLAUDE.md rule 2).
4776
+ const wrappedStream = (async function* () {
4777
+ let yielded = 0;
4778
+ let currentResult = result;
4779
+ let attemptedRequestedProvider = options.provider;
4780
+ let attemptedRequestedModel = options.model;
4781
+ const maxAttempts = (effectiveChain?.length ?? 0) + 5;
4782
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
4783
+ try {
4784
+ for await (const chunk of currentResult.stream) {
4785
+ yielded++;
4786
+ yield chunk;
4787
+ }
4788
+ return;
4789
+ }
4790
+ catch (err) {
4791
+ if (yielded > 0 || !looksLikeModelAccessDenied(err)) {
4792
+ throw err;
4793
+ }
4794
+ let next;
4795
+ try {
4796
+ next = await callback(err);
4797
+ }
4798
+ catch (cbErr) {
4799
+ logger.warn("[NeuroLink.stream] providerFallback callback threw during iteration", {
4800
+ error: cbErr instanceof Error ? cbErr.message : String(cbErr),
4801
+ });
4802
+ throw err;
4803
+ }
4804
+ if (!next) {
4805
+ throw err;
4806
+ }
4807
+ try {
4808
+ self.emitter.emit("model.fallback", {
4809
+ requestedProvider: attemptedRequestedProvider,
4810
+ requestedModel: attemptedRequestedModel,
4811
+ fallbackProvider: next.provider ?? attemptedRequestedProvider,
4812
+ fallbackModel: next.model,
4813
+ reason: err instanceof Error ? err.message : String(err),
4814
+ kind: "stream",
4815
+ phase: "iteration",
4816
+ timestamp: Date.now(),
4817
+ });
4818
+ }
4819
+ catch {
4820
+ /* listener errors are non-fatal */
4821
+ }
4822
+ const retriedOptions = {
4823
+ ...options,
4824
+ ...(next.provider && {
4825
+ provider: next.provider,
4826
+ }),
4827
+ ...(next.model && { model: next.model }),
4828
+ // Strip the hooks so the inner orchestration doesn't double-fall-back.
4829
+ providerFallback: undefined,
4830
+ modelChain: undefined,
4831
+ };
4832
+ attemptedRequestedProvider =
4833
+ next.provider ?? attemptedRequestedProvider;
4834
+ attemptedRequestedModel = next.model ?? attemptedRequestedModel;
4835
+ currentResult = await metricsTraceContextStorage.run(self.createMetricsTraceContext(), () => self.executeStreamRequest({ ...retriedOptions }));
4836
+ }
4837
+ }
4838
+ // Exhausted attempts — re-throw the most recent error captured by
4839
+ // the inner loop. We only get here if the loop didn't return.
4840
+ throw new Error(`[NeuroLink.stream] iteration fallback exhausted ${maxAttempts} attempts`);
4841
+ })();
4842
+ return {
4843
+ ...result,
4844
+ stream: wrappedStream,
4845
+ };
4570
4846
  }
4571
4847
  async executeStreamRequest(options) {
4572
4848
  // Dynamic argument resolution — resolve any function-valued options before downstream use
@@ -21,6 +21,16 @@ export type NeuroLinkConfig = {
21
21
  configVersion?: string;
22
22
  [key: string]: unknown;
23
23
  };
24
+ /**
25
+ * Curator P2-3: callback signature for centralized fallback policy. Invoked
26
+ * when a generate/stream call fails with what looks like a model-access-denied
27
+ * error. Return `{ provider, model }` (either / both optional) to drive a
28
+ * retry; return `null` to bubble the original error untouched.
29
+ */
30
+ export type ProviderFallbackCallback = (error: unknown) => Promise<{
31
+ provider?: string;
32
+ model?: string;
33
+ } | null>;
24
34
  /**
25
35
  * Configuration object for NeuroLink constructor.
26
36
  */
@@ -43,6 +53,19 @@ export type NeurolinkConstructorConfig = {
43
53
  * from this NeuroLink instance. Per-call credentials override these.
44
54
  */
45
55
  credentials?: NeurolinkCredentials;
56
+ /**
57
+ * Curator P2-3: callback invoked on model-access-denied. Lets a host (e.g.
58
+ * Curator) centrally drive fallback policy. The callback receives the
59
+ * original error and returns the next `{ provider, model }` to try, or
60
+ * `null` to bubble the error.
61
+ */
62
+ providerFallback?: ProviderFallbackCallback;
63
+ /**
64
+ * Curator P2-3: ordered list of model names to try in sequence on
65
+ * model-access-denied. Sugar over `providerFallback`. The current
66
+ * provider is preserved across the chain; only the model name changes.
67
+ */
68
+ modelChain?: string[];
46
69
  };
47
70
  /**
48
71
  * Configuration for MCP enhancement modules wired into generate()/stream() paths.
@@ -447,6 +447,19 @@ export type GenerateOptions = {
447
447
  * Unset providers fall through to instance credentials, then environment variables.
448
448
  */
449
449
  credentials?: NeurolinkCredentials;
450
+ /**
451
+ * Curator P2-3: per-call fallback callback. Overrides any
452
+ * instance-level `providerFallback` set on `new NeuroLink({...})`.
453
+ */
454
+ providerFallback?: (error: unknown) => Promise<{
455
+ provider?: string;
456
+ model?: string;
457
+ } | null>;
458
+ /**
459
+ * Curator P2-3: per-call ordered model chain. Overrides any
460
+ * instance-level `modelChain`. Tried in order on model-access-denied.
461
+ */
462
+ modelChain?: string[];
450
463
  /**
451
464
  * Per-call memory control.
452
465
  *
@@ -445,6 +445,19 @@ export type StreamOptions = {
445
445
  * Unset providers fall through to instance credentials, then environment variables.
446
446
  */
447
447
  credentials?: NeurolinkCredentials;
448
+ /**
449
+ * Curator P2-3: per-call fallback callback. Overrides any
450
+ * instance-level `providerFallback` set on `new NeuroLink({...})`.
451
+ */
452
+ providerFallback?: (error: unknown) => Promise<{
453
+ provider?: string;
454
+ model?: string;
455
+ } | null>;
456
+ /**
457
+ * Curator P2-3: per-call ordered model chain. Overrides any
458
+ * instance-level `modelChain`. Tried in order on model-access-denied.
459
+ */
460
+ modelChain?: string[];
448
461
  /**
449
462
  * Per-call memory control.
450
463
  *
@@ -60,6 +60,7 @@ export declare class NeuroLink {
60
60
  private pendingAuthConfig?;
61
61
  private authInitPromise?;
62
62
  private credentials?;
63
+ private readonly fallbackConfig;
63
64
  /**
64
65
  * Merge instance-level credentials with per-call credentials.
65
66
  *
@@ -541,6 +542,21 @@ export declare class NeuroLink {
541
542
  * @since 1.0.0
542
543
  */
543
544
  generate(optionsOrPrompt: GenerateOptions | DynamicOptions | string): Promise<GenerateResult>;
545
+ /**
546
+ * Curator P2-3: wraps a generate/stream call with the fallback
547
+ * orchestration (`providerFallback` callback + `modelChain` walker).
548
+ *
549
+ * On a model-access-denied error from the inner call:
550
+ * 1. Resolve the effective callback (per-call > instance > synthesised
551
+ * from modelChain) and the effective chain (per-call > instance).
552
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
553
+ * `model.fallback` event → re-call inner with the new {provider,
554
+ * model}.
555
+ * 3. Stop on first success, on a callback returning null, or after
556
+ * exhausting the chain (throw the most recent error).
557
+ */
558
+ private runWithFallbackOrchestration;
559
+ private attemptInner;
544
560
  private executeGenerateWithMetricsContext;
545
561
  private executeGenerateRequest;
546
562
  private prepareGenerateRequest;
@@ -697,6 +713,25 @@ export declare class NeuroLink {
697
713
  * @throws {Error} When conversation memory operations fail (if enabled)
698
714
  */
699
715
  stream(options: StreamOptions | DynamicOptions): Promise<StreamResult>;
716
+ /**
717
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
718
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
719
+ * `createLiteLLMTransformedStream`). The standard
720
+ * `runWithFallbackOrchestration` only catches errors thrown while the
721
+ * `StreamResult` is being created — once we hand the iterator back to
722
+ * the caller, errors raised during consumption used to bypass
723
+ * `providerFallback` / `modelChain`.
724
+ *
725
+ * This wrapper runs the orchestration to get an initial StreamResult,
726
+ * then wraps `result.stream` so that:
727
+ * - chunks are forwarded transparently while consumption succeeds
728
+ * - if iteration throws a model-access-denied error AND no chunks
729
+ * have been yielded yet, we resolve the next fallback target,
730
+ * emit `model.fallback`, and recurse
731
+ * - if chunks were already yielded, the error propagates (mid-stream
732
+ * recovery isn't safe — the consumer has half a response)
733
+ */
734
+ private streamWithIterationFallback;
700
735
  private executeStreamRequest;
701
736
  private validateStreamRequestOptions;
702
737
  private maybeHandleWorkflowStreamRequest;