@juspay/neurolink 9.57.1 → 9.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,6 +60,7 @@ export declare class NeuroLink {
60
60
  private pendingAuthConfig?;
61
61
  private authInitPromise?;
62
62
  private credentials?;
63
+ private readonly fallbackConfig;
63
64
  /**
64
65
  * Merge instance-level credentials with per-call credentials.
65
66
  *
@@ -541,6 +542,21 @@ export declare class NeuroLink {
541
542
  * @since 1.0.0
542
543
  */
543
544
  generate(optionsOrPrompt: GenerateOptions | DynamicOptions | string): Promise<GenerateResult>;
545
+ /**
546
+ * Curator P2-3: wraps a generate/stream call with the fallback
547
+ * orchestration (`providerFallback` callback + `modelChain` walker).
548
+ *
549
+ * On a model-access-denied error from the inner call:
550
+ * 1. Resolve the effective callback (per-call > instance > synthesised
551
+ * from modelChain) and the effective chain (per-call > instance).
552
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
553
+ * `model.fallback` event → re-call inner with the new {provider,
554
+ * model}.
555
+ * 3. Stop on first success, on a callback returning null, or after
556
+ * exhausting the chain (throw the most recent error).
557
+ */
558
+ private runWithFallbackOrchestration;
559
+ private attemptInner;
544
560
  private executeGenerateWithMetricsContext;
545
561
  private executeGenerateRequest;
546
562
  private prepareGenerateRequest;
@@ -697,6 +713,25 @@ export declare class NeuroLink {
697
713
  * @throws {Error} When conversation memory operations fail (if enabled)
698
714
  */
699
715
  stream(options: StreamOptions | DynamicOptions): Promise<StreamResult>;
716
+ /**
717
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
718
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
719
+ * `createLiteLLMTransformedStream`). The standard
720
+ * `runWithFallbackOrchestration` only catches errors thrown while the
721
+ * `StreamResult` is being created — once we hand the iterator back to
722
+ * the caller, errors raised during consumption used to bypass
723
+ * `providerFallback` / `modelChain`.
724
+ *
725
+ * This wrapper runs the orchestration to get an initial StreamResult,
726
+ * then wraps `result.stream` so that:
727
+ * - chunks are forwarded transparently while consumption succeeds
728
+ * - if iteration throws a model-access-denied error AND no chunks
729
+ * have been yielded yet, we resolve the next fallback target,
730
+ * emit `model.fallback`, and recurse
731
+ * - if chunks were already yielded, the error propagates (mid-stream
732
+ * recovery isn't safe — the consumer has half a response)
733
+ */
734
+ private streamWithIterationFallback;
700
735
  private executeStreamRequest;
701
736
  private validateStreamRequestOptions;
702
737
  private maybeHandleWorkflowStreamRequest;
@@ -933,6 +968,40 @@ export declare class NeuroLink {
933
968
  * @see {@link NeuroLink.executeTool} for events related to tool execution
934
969
  */
935
970
  getEventEmitter(): TypedEventEmitter<NeuroLinkEvents>;
971
+ /**
972
+ * Curator P1-1: synchronous credential health check for a single provider.
973
+ *
974
+ * Drives a tiny real call against the provider (1-token completion or
975
+ * `/models` listing depending on provider) to confirm the configured
976
+ * credentials are valid. Useful at startup so a service can refuse to
977
+ * boot if its primary provider's credentials are broken instead of
978
+ * discovering the problem on first user request.
979
+ *
980
+ * @example
981
+ * ```ts
982
+ * const health = await neurolink.checkCredentials({ provider: "litellm" });
983
+ * if (health.status !== "ok") {
984
+ * throw new Error(`provider not ready: ${health.detail}`);
985
+ * }
986
+ * ```
987
+ *
988
+ * @param input - the provider to check
989
+ * @returns `{ provider, status, detail }`. Possible status values:
990
+ * - `"ok"` — credentials valid and provider reachable
991
+ * - `"missing"` — required env / credentials not configured
992
+ * - `"expired"` — credentials present but rejected (401/403)
993
+ * - `"denied"` — credentials valid but team not whitelisted for any model
994
+ * - `"network"` — provider unreachable (timeout, ECONNREFUSED, DNS)
995
+ * - `"unknown"` — other error; consult `detail`
996
+ */
997
+ checkCredentials(input: {
998
+ provider: string;
999
+ model?: string;
1000
+ }): Promise<{
1001
+ provider: string;
1002
+ status: "ok" | "missing" | "expired" | "denied" | "network" | "unknown";
1003
+ detail: string;
1004
+ }>;
936
1005
  /**
937
1006
  * Emit tool start event with execution tracking
938
1007
  * @param toolName - Name of the tool being executed
@@ -52,7 +52,7 @@ import { resolveDynamicArgument } from "./dynamic/dynamicResolver.js";
52
52
  import { initializeHippocampus } from "./memory/hippocampusInitializer.js";
53
53
  import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
54
54
  import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
55
- import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/index.js";
55
+ import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
56
56
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
57
57
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
58
58
  import { TaskManager } from "./tasks/taskManager.js";
@@ -146,6 +146,36 @@ function mcpCategoryToErrorCategory(mcpCategory) {
146
146
  * For example, a NOT_FOUND error for a model causes 6 retries of a 418KB
147
147
  * message, wasting ~628,000 tokens and adding 10+ seconds of latency.
148
148
  */
149
+ /**
150
+ * Curator P2-3: detect model-access-denied without requiring the typed
151
+ * ModelAccessDeniedError class to be present (Issue #1 ships that class
152
+ * separately). Matches LiteLLM "team not allowed" / "team can only access
153
+ * models=[...]" plus typed-error markers when present.
154
+ */
155
+ function looksLikeModelAccessDenied(error) {
156
+ if (!error) {
157
+ return false;
158
+ }
159
+ const e = error;
160
+ if (e.name === "ModelAccessDeniedError") {
161
+ return true;
162
+ }
163
+ if (e.code === "MODEL_ACCESS_DENIED") {
164
+ return true;
165
+ }
166
+ const msg = typeof e.message === "string"
167
+ ? e.message
168
+ : error instanceof Error
169
+ ? error.message
170
+ : String(error);
171
+ if (!msg) {
172
+ return false;
173
+ }
174
+ const lower = msg.toLowerCase();
175
+ return ((lower.includes("team") && lower.includes("not allowed")) ||
176
+ lower.includes("team can only access") ||
177
+ /not\s+allowed\s+to\s+access\s+(this\s+)?model/i.test(msg));
178
+ }
149
179
  function isNonRetryableProviderError(error) {
150
180
  // Check for typed error classes from providers
151
181
  if (error instanceof InvalidModelError) {
@@ -157,6 +187,13 @@ function isNonRetryableProviderError(error) {
157
187
  if (error instanceof AuthorizationError) {
158
188
  return true;
159
189
  }
190
+ // Curator P1-1: model-access-denied is permanent for the (provider, model)
191
+ // pair until the team whitelist changes. Retrying with the same config
192
+ // would just waste a second roundtrip. Caller / fallback-orchestrator
193
+ // should pick a different model.
194
+ if (error instanceof ModelAccessDeniedError) {
195
+ return true;
196
+ }
160
197
  // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
161
198
  if (error && typeof error === "object") {
162
199
  const err = error;
@@ -334,6 +371,9 @@ export class NeuroLink {
334
371
  authInitPromise;
335
372
  // Per-provider credential overrides (instance-level default)
336
373
  credentials;
374
+ // Curator P2-3: instance-level fallback policy. Read by
375
+ // runWithFallbackOrchestration on model-access-denied.
376
+ fallbackConfig = {};
337
377
  /**
338
378
  * Merge instance-level credentials with per-call credentials.
339
379
  *
@@ -721,6 +761,14 @@ export class NeuroLink {
721
761
  if (config?.modelAliasConfig) {
722
762
  this.modelAliasConfig = config.modelAliasConfig;
723
763
  }
764
+ // Curator P2-3: capture fallback policy. Per-call options can still
765
+ // override, but these are the instance-level defaults.
766
+ if (config?.providerFallback) {
767
+ this.fallbackConfig.providerFallback = config.providerFallback;
768
+ }
769
+ if (config?.modelChain) {
770
+ this.fallbackConfig.modelChain = config.modelChain;
771
+ }
724
772
  logger.setEventEmitter(this.emitter);
725
773
  // Read tool cache duration from environment variables, with a default
726
774
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -2669,7 +2717,121 @@ Current user's request: ${currentInput}`;
2669
2717
  * @since 1.0.0
2670
2718
  */
2671
2719
  async generate(optionsOrPrompt) {
2672
- return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan));
2720
+ return this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
2721
+ }
2722
+ /**
2723
+ * Curator P2-3: wraps a generate/stream call with the fallback
2724
+ * orchestration (`providerFallback` callback + `modelChain` walker).
2725
+ *
2726
+ * On a model-access-denied error from the inner call:
2727
+ * 1. Resolve the effective callback (per-call > instance > synthesised
2728
+ * from modelChain) and the effective chain (per-call > instance).
2729
+ * 2. Walk attempts: invoke callback (or pop next chain entry) → emit
2730
+ * `model.fallback` event → re-call inner with the new {provider,
2731
+ * model}.
2732
+ * 3. Stop on first success, on a callback returning null, or after
2733
+ * exhausting the chain (throw the most recent error).
2734
+ */
2735
+ async runWithFallbackOrchestration(optionsOrPrompt, kind, inner) {
2736
+ const initialAttempt = await this.attemptInner(inner, optionsOrPrompt);
2737
+ if ("ok" in initialAttempt) {
2738
+ return initialAttempt.ok;
2739
+ }
2740
+ let lastError = initialAttempt.error;
2741
+ if (!looksLikeModelAccessDenied(lastError)) {
2742
+ throw lastError;
2743
+ }
2744
+ // Build the chain orchestration.
2745
+ const requestedProvider = (typeof optionsOrPrompt === "object"
2746
+ ? optionsOrPrompt.provider
2747
+ : undefined);
2748
+ const requestedModel = (typeof optionsOrPrompt === "object"
2749
+ ? optionsOrPrompt.model
2750
+ : undefined);
2751
+ const callOpts = typeof optionsOrPrompt === "object"
2752
+ ? optionsOrPrompt
2753
+ : {};
2754
+ const perCallCallback = callOpts.providerFallback;
2755
+ const perCallChain = callOpts.modelChain;
2756
+ const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
2757
+ const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
2758
+ if (!effectiveCallback && !effectiveChain) {
2759
+ throw lastError;
2760
+ }
2761
+ // Synthesise a callback from modelChain if no explicit callback exists.
2762
+ const chainCursor = { i: 0, list: effectiveChain ?? [] };
2763
+ const synthesizedFromChain = async () => {
2764
+ while (chainCursor.i < chainCursor.list.length) {
2765
+ const next = chainCursor.list[chainCursor.i++];
2766
+ if (next !== requestedModel) {
2767
+ return { model: next };
2768
+ }
2769
+ }
2770
+ return null;
2771
+ };
2772
+ const callback = effectiveCallback ?? synthesizedFromChain;
2773
+ let attempts = 0;
2774
+ const maxAttempts = (effectiveChain?.length ?? 0) + 5;
2775
+ let attemptedRequestedModel = requestedModel;
2776
+ while (attempts++ < maxAttempts) {
2777
+ let next;
2778
+ try {
2779
+ next = await callback(lastError);
2780
+ }
2781
+ catch (cbErr) {
2782
+ logger.warn("[NeuroLink] providerFallback callback threw", {
2783
+ error: cbErr instanceof Error ? cbErr.message : String(cbErr),
2784
+ });
2785
+ throw lastError;
2786
+ }
2787
+ if (!next) {
2788
+ throw lastError;
2789
+ }
2790
+ // Emit model.fallback event so cost/audit listeners can record it.
2791
+ try {
2792
+ this.emitter.emit("model.fallback", {
2793
+ requestedProvider,
2794
+ requestedModel: attemptedRequestedModel,
2795
+ fallbackProvider: next.provider ?? requestedProvider,
2796
+ fallbackModel: next.model,
2797
+ reason: lastError instanceof Error ? lastError.message : String(lastError),
2798
+ kind,
2799
+ timestamp: Date.now(),
2800
+ });
2801
+ }
2802
+ catch {
2803
+ /* listener errors are non-fatal */
2804
+ }
2805
+ const retriedOptions = typeof optionsOrPrompt === "object"
2806
+ ? {
2807
+ ...optionsOrPrompt,
2808
+ ...(next.provider && { provider: next.provider }),
2809
+ ...(next.model && { model: next.model }),
2810
+ // Strip the fallback hooks so the retry doesn't re-orchestrate.
2811
+ providerFallback: undefined,
2812
+ modelChain: undefined,
2813
+ }
2814
+ : optionsOrPrompt;
2815
+ const retryAttempt = await this.attemptInner(inner, retriedOptions);
2816
+ if ("ok" in retryAttempt) {
2817
+ return retryAttempt.ok;
2818
+ }
2819
+ lastError = retryAttempt.error;
2820
+ attemptedRequestedModel = next.model ?? attemptedRequestedModel;
2821
+ if (!looksLikeModelAccessDenied(lastError)) {
2822
+ throw lastError;
2823
+ }
2824
+ }
2825
+ throw lastError;
2826
+ }
2827
+ async attemptInner(inner, options) {
2828
+ try {
2829
+ const ok = await inner(options);
2830
+ return { ok };
2831
+ }
2832
+ catch (error) {
2833
+ return { error };
2834
+ }
2673
2835
  }
2674
2836
  async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2675
2837
  return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
@@ -4566,7 +4728,128 @@ Current user's request: ${currentInput}`;
4566
4728
  : [],
4567
4729
  optionKeys: Object.keys(options),
4568
4730
  });
4569
- return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...options }));
4731
+ return this.streamWithIterationFallback(options);
4732
+ }
4733
+ /**
4734
+ * Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
4735
+ * errors thrown during async iteration (e.g. LiteLLM throwing inside
4736
+ * `createLiteLLMTransformedStream`). The standard
4737
+ * `runWithFallbackOrchestration` only catches errors thrown while the
4738
+ * `StreamResult` is being created — once we hand the iterator back to
4739
+ * the caller, errors raised during consumption used to bypass
4740
+ * `providerFallback` / `modelChain`.
4741
+ *
4742
+ * This wrapper runs the orchestration to get an initial StreamResult,
4743
+ * then wraps `result.stream` so that:
4744
+ * - chunks are forwarded transparently while consumption succeeds
4745
+ * - if iteration throws a model-access-denied error AND no chunks
4746
+ * have been yielded yet, we resolve the next fallback target,
4747
+ * emit `model.fallback`, and recurse
4748
+ * - if chunks were already yielded, the error propagates (mid-stream
4749
+ * recovery isn't safe — the consumer has half a response)
4750
+ */
4751
+ async streamWithIterationFallback(options) {
4752
+ const result = await this.runWithFallbackOrchestration(options, "stream", (opts) => metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...opts })));
4753
+ const callOpts = options;
4754
+ const perCallCallback = callOpts.providerFallback;
4755
+ const perCallChain = callOpts.modelChain;
4756
+ const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
4757
+ const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
4758
+ if (!effectiveCallback && !effectiveChain) {
4759
+ // No fallback configured — nothing to wrap.
4760
+ return result;
4761
+ }
4762
+ // Build a chain cursor scoped to this stream's lifetime; consumers
4763
+ // who set up `modelChain` get sequential progression here too.
4764
+ const chainCursor = {
4765
+ i: 0,
4766
+ list: effectiveChain ?? [],
4767
+ requestedModel: options.model,
4768
+ };
4769
+ const callback = effectiveCallback ??
4770
+ (async () => {
4771
+ while (chainCursor.i < chainCursor.list.length) {
4772
+ const next = chainCursor.list[chainCursor.i++];
4773
+ if (next !== chainCursor.requestedModel) {
4774
+ return { model: next };
4775
+ }
4776
+ }
4777
+ return null;
4778
+ });
4779
+ const self = this;
4780
+ // Yield type is the original stream's element type, threaded through
4781
+ // as unknown — we forward chunks unchanged so structural identity is
4782
+ // preserved without a local type alias (CLAUDE.md rule 2).
4783
+ const wrappedStream = (async function* () {
4784
+ let yielded = 0;
4785
+ let currentResult = result;
4786
+ let attemptedRequestedProvider = options.provider;
4787
+ let attemptedRequestedModel = options.model;
4788
+ const maxAttempts = (effectiveChain?.length ?? 0) + 5;
4789
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
4790
+ try {
4791
+ for await (const chunk of currentResult.stream) {
4792
+ yielded++;
4793
+ yield chunk;
4794
+ }
4795
+ return;
4796
+ }
4797
+ catch (err) {
4798
+ if (yielded > 0 || !looksLikeModelAccessDenied(err)) {
4799
+ throw err;
4800
+ }
4801
+ let next;
4802
+ try {
4803
+ next = await callback(err);
4804
+ }
4805
+ catch (cbErr) {
4806
+ logger.warn("[NeuroLink.stream] providerFallback callback threw during iteration", {
4807
+ error: cbErr instanceof Error ? cbErr.message : String(cbErr),
4808
+ });
4809
+ throw err;
4810
+ }
4811
+ if (!next) {
4812
+ throw err;
4813
+ }
4814
+ try {
4815
+ self.emitter.emit("model.fallback", {
4816
+ requestedProvider: attemptedRequestedProvider,
4817
+ requestedModel: attemptedRequestedModel,
4818
+ fallbackProvider: next.provider ?? attemptedRequestedProvider,
4819
+ fallbackModel: next.model,
4820
+ reason: err instanceof Error ? err.message : String(err),
4821
+ kind: "stream",
4822
+ phase: "iteration",
4823
+ timestamp: Date.now(),
4824
+ });
4825
+ }
4826
+ catch {
4827
+ /* listener errors are non-fatal */
4828
+ }
4829
+ const retriedOptions = {
4830
+ ...options,
4831
+ ...(next.provider && {
4832
+ provider: next.provider,
4833
+ }),
4834
+ ...(next.model && { model: next.model }),
4835
+ // Strip the hooks so the inner orchestration doesn't double-fall-back.
4836
+ providerFallback: undefined,
4837
+ modelChain: undefined,
4838
+ };
4839
+ attemptedRequestedProvider =
4840
+ next.provider ?? attemptedRequestedProvider;
4841
+ attemptedRequestedModel = next.model ?? attemptedRequestedModel;
4842
+ currentResult = await metricsTraceContextStorage.run(self.createMetricsTraceContext(), () => self.executeStreamRequest({ ...retriedOptions }));
4843
+ }
4844
+ }
4845
+ // Exhausted attempts — re-throw the most recent error captured by
4846
+ // the inner loop. We only get here if the loop didn't return.
4847
+ throw new Error(`[NeuroLink.stream] iteration fallback exhausted ${maxAttempts} attempts`);
4848
+ })();
4849
+ return {
4850
+ ...result,
4851
+ stream: wrappedStream,
4852
+ };
4570
4853
  }
4571
4854
  async executeStreamRequest(options) {
4572
4855
  // Dynamic argument resolution — resolve any function-valued options before downstream use
@@ -5811,6 +6094,87 @@ Current user's request: ${currentInput}`;
5811
6094
  getEventEmitter() {
5812
6095
  return this.emitter;
5813
6096
  }
6097
+ /**
6098
+ * Curator P1-1: synchronous credential health check for a single provider.
6099
+ *
6100
+ * Drives a tiny real call against the provider (1-token completion or
6101
+ * `/models` listing depending on provider) to confirm the configured
6102
+ * credentials are valid. Useful at startup so a service can refuse to
6103
+ * boot if its primary provider's credentials are broken instead of
6104
+ * discovering the problem on first user request.
6105
+ *
6106
+ * @example
6107
+ * ```ts
6108
+ * const health = await neurolink.checkCredentials({ provider: "litellm" });
6109
+ * if (health.status !== "ok") {
6110
+ * throw new Error(`provider not ready: ${health.detail}`);
6111
+ * }
6112
+ * ```
6113
+ *
6114
+ * @param input - the provider to check
6115
+ * @returns `{ provider, status, detail }`. Possible status values:
6116
+ * - `"ok"` — credentials valid and provider reachable
6117
+ * - `"missing"` — required env / credentials not configured
6118
+ * - `"expired"` — credentials present but rejected (401/403)
6119
+ * - `"denied"` — credentials valid but team not whitelisted for any model
6120
+ * - `"network"` — provider unreachable (timeout, ECONNREFUSED, DNS)
6121
+ * - `"unknown"` — other error; consult `detail`
6122
+ */
6123
+ async checkCredentials(input) {
6124
+ const { provider, model } = input;
6125
+ const probeText = "ping";
6126
+ try {
6127
+ // 1-token probe is cheap, exercises auth + routing without much cost.
6128
+ await this.generate({
6129
+ provider: provider,
6130
+ ...(model && { model }),
6131
+ input: { text: probeText },
6132
+ maxTokens: 16,
6133
+ disableTools: true,
6134
+ });
6135
+ return { provider, status: "ok", detail: "credentials valid" };
6136
+ }
6137
+ catch (err) {
6138
+ const msg = err instanceof Error ? err.message : String(err);
6139
+ const lower = msg.toLowerCase();
6140
+ if (err instanceof ModelAccessDeniedError) {
6141
+ return {
6142
+ provider,
6143
+ status: "denied",
6144
+ detail: msg,
6145
+ };
6146
+ }
6147
+ if (lower.includes("authentication") ||
6148
+ lower.includes("401") ||
6149
+ lower.includes("invalid api key") ||
6150
+ lower.includes("incorrect api key") ||
6151
+ lower.includes("api_key_invalid") ||
6152
+ lower.includes("token has expired") ||
6153
+ lower.includes("expired credentials")) {
6154
+ return { provider, status: "expired", detail: msg };
6155
+ }
6156
+ if (lower.includes("not configured") ||
6157
+ lower.includes("missing api") ||
6158
+ lower.includes("api key is required") ||
6159
+ lower.includes("no api key") ||
6160
+ lower.includes("application default credentials") ||
6161
+ lower.includes("google_application_credentials") ||
6162
+ lower.includes("project_id") ||
6163
+ lower.includes("default credentials") ||
6164
+ lower.includes("service account")) {
6165
+ return { provider, status: "missing", detail: msg };
6166
+ }
6167
+ if (lower.includes("econnrefused") ||
6168
+ lower.includes("enotfound") ||
6169
+ lower.includes("could not resolve") ||
6170
+ lower.includes("timeout") ||
6171
+ lower.includes("network") ||
6172
+ lower.includes("cannot connect")) {
6173
+ return { provider, status: "network", detail: msg };
6174
+ }
6175
+ return { provider, status: "unknown", detail: msg };
6176
+ }
6177
+ }
5814
6178
  // ========================================
5815
6179
  // ENHANCED: Tool Event Emission API
5816
6180
  // ========================================
@@ -5,7 +5,7 @@ import { BaseProvider } from "../core/baseProvider.js";
5
5
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
6
6
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
7
7
  import { createProxyFetch } from "../proxy/proxyFetch.js";
8
- import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
8
+ import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
9
9
  import { isAbortError } from "../utils/errorHandling.js";
10
10
  import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
11
11
  import { logger } from "../utils/logger.js";
@@ -100,6 +100,17 @@ export class LiteLLMProvider extends BaseProvider {
100
100
  return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
101
101
  `${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
102
102
  }
103
+ // Curator P1-1: detect "team not allowed to access model" responses
104
+ // and surface as ModelAccessDeniedError with the allowed_models array
105
+ // parsed from the body. Must run before the generic "API key" check
106
+ // because LiteLLM phrases this as a 403 distinct from auth.
107
+ if (isModelAccessDeniedMessage(errorRecord.message)) {
108
+ return new ModelAccessDeniedError(errorRecord.message, {
109
+ provider: this.providerName,
110
+ requestedModel: this.modelName,
111
+ allowedModels: parseAllowedModels(errorRecord.message),
112
+ });
113
+ }
103
114
  if (errorRecord.message.includes("API_KEY_INVALID") ||
104
115
  errorRecord.message.includes("Invalid API key")) {
105
116
  return new AuthenticationError("Invalid LiteLLM configuration. Please check your LITELLM_API_KEY environment variable.", this.providerName);
@@ -235,10 +235,27 @@ export class OpenAIProvider extends BaseProvider {
235
235
  const errorType = errorObj?.type && typeof errorObj.type === "string"
236
236
  ? errorObj.type
237
237
  : undefined;
238
+ const statusCode = typeof errorObj?.status === "number"
239
+ ? errorObj.status
240
+ : typeof errorObj?.statusCode === "number"
241
+ ? errorObj.statusCode
242
+ : undefined;
243
+ // Curator P1-1 / Reviewer Finding #4: only the explicit auth markers
244
+ // map to AuthenticationError. Earlier we treated every
245
+ // `invalid_request_error` as an auth failure — that's OpenAI's catch-all
246
+ // for any bad request (unsupported parameter, malformed JSON, etc.) and
247
+ // mislabelled them as "invalid API key". Use credential-specific
248
+ // signals only.
238
249
  if (message.includes("API_KEY_INVALID") ||
239
250
  message.includes("Invalid API key") ||
240
- errorType === "invalid_api_key") {
241
- return new AuthenticationError("Invalid OpenAI API key. Please check your OPENAI_API_KEY environment variable.", this.providerName);
251
+ message.includes("Incorrect API key") ||
252
+ message.includes("invalid_api_key") ||
253
+ errorType === "invalid_api_key" ||
254
+ statusCode === 401) {
255
+ return new AuthenticationError(message.includes("Incorrect API key") ||
256
+ message.includes("Invalid API key")
257
+ ? message
258
+ : "Invalid OpenAI API key. Please check your OPENAI_API_KEY environment variable.", this.providerName);
242
259
  }
243
260
  if (message.includes("rate limit") || errorType === "rate_limit_error") {
244
261
  return new RateLimitError("OpenAI rate limit exceeded. Please try again later.", this.providerName);
@@ -21,6 +21,16 @@ export type NeuroLinkConfig = {
21
21
  configVersion?: string;
22
22
  [key: string]: unknown;
23
23
  };
24
+ /**
25
+ * Curator P2-3: callback signature for centralized fallback policy. Invoked
26
+ * when a generate/stream call fails with what looks like a model-access-denied
27
+ * error. Return `{ provider, model }` (either / both optional) to drive a
28
+ * retry; return `null` to bubble the original error untouched.
29
+ */
30
+ export type ProviderFallbackCallback = (error: unknown) => Promise<{
31
+ provider?: string;
32
+ model?: string;
33
+ } | null>;
24
34
  /**
25
35
  * Configuration object for NeuroLink constructor.
26
36
  */
@@ -43,6 +53,19 @@ export type NeurolinkConstructorConfig = {
43
53
  * from this NeuroLink instance. Per-call credentials override these.
44
54
  */
45
55
  credentials?: NeurolinkCredentials;
56
+ /**
57
+ * Curator P2-3: callback invoked on model-access-denied. Lets a host (e.g.
58
+ * Curator) centrally drive fallback policy. The callback receives the
59
+ * original error and returns the next `{ provider, model }` to try, or
60
+ * `null` to bubble the error.
61
+ */
62
+ providerFallback?: ProviderFallbackCallback;
63
+ /**
64
+ * Curator P2-3: ordered list of model names to try in sequence on
65
+ * model-access-denied. Sugar over `providerFallback`. The current
66
+ * provider is preserved across the chain; only the model name changes.
67
+ */
68
+ modelChain?: string[];
46
69
  };
47
70
  /**
48
71
  * Configuration for MCP enhancement modules wired into generate()/stream() paths.
@@ -104,3 +104,45 @@ export declare class ModelAccessError extends BaseError {
104
104
  readonly requiredTier: string;
105
105
  constructor(model: string, tier: string, requiredTier: string);
106
106
  }
107
+ /**
108
+ * Curator P1-1: thrown when a provider rejects a request because the
109
+ * caller's team / API key is not whitelisted for the requested model.
110
+ *
111
+ * LiteLLM's `team not allowed to access model. This team can only access
112
+ * models=['glm-latest', 'kimi-latest', ...]` is the canonical example —
113
+ * the list is parsed off the error body so callers / fallback orchestrators
114
+ * can choose a whitelisted alternative without scraping strings.
115
+ */
116
+ export declare class ModelAccessDeniedError extends ProviderError {
117
+ readonly requestedModel: string | undefined;
118
+ readonly allowedModels: string[] | undefined;
119
+ readonly code: "MODEL_ACCESS_DENIED";
120
+ constructor(message: string, options?: {
121
+ provider?: string;
122
+ requestedModel?: string;
123
+ allowedModels?: string[];
124
+ });
125
+ }
126
+ /**
127
+ * Parse the `allowed_models` array out of a provider error message body.
128
+ * Currently targets the LiteLLM team-whitelist response shape:
129
+ *
130
+ * "team not allowed to access model. This team can only access
131
+ * models=['glm-latest', 'kimi-latest', 'open-large']"
132
+ *
133
+ * Implementation note: deliberately uses `indexOf`/`slice` instead of a
134
+ * single `/models\s*=\s*\[([^\]]*)\]/` regex. CodeQL flagged the latter
135
+ * as `js/polynomial-redos` because the `[^\]]*` greedy quantifier on
136
+ * library-supplied input can be exploited by a crafted long string. The
137
+ * indexOf/slice path is O(n) with no backtracking and we additionally
138
+ * cap the input length.
139
+ *
140
+ * Returns undefined when no list is found.
141
+ */
142
+ export declare function parseAllowedModels(message: string): string[] | undefined;
143
+ /**
144
+ * Returns true when `message` looks like a model-access-denied response
145
+ * (LiteLLM "team not allowed", generic "not allowed to access model",
146
+ * or "team can only access models=[...]").
147
+ */
148
+ export declare function isModelAccessDeniedMessage(message: string): boolean;