@juspay/neurolink 9.58.0 → 9.59.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,13 +5,41 @@
5
5
  * Enhanced AI provider system with natural MCP tool access.
6
6
  * Uses real MCP infrastructure for tool discovery and execution.
7
7
  */
8
- import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult } from "./types/index.js";
8
+ import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext } from "./types/index.js";
9
9
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
10
10
  import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
11
11
  import { ExternalServerManager } from "./mcp/externalServerManager.js";
12
12
  import { MCPToolRegistry } from "./mcp/toolRegistry.js";
13
13
  import type { DynamicOptions } from "./types/index.js";
14
14
  import { TaskManager } from "./tasks/taskManager.js";
15
+ /**
16
+ * Curator P2-4 dedup (concurrency-safe): native providers emit
17
+ * `generation:end` on the shared SDK emitter. We attach a fresh
18
+ * mutable `dedupContext` object directly to the per-call
19
+ * `StreamOptions` (under `_streamDedupContext`) so each stream gets
20
+ * its own instance — concurrent streams have different option objects
21
+ * and therefore different contexts, so they cannot interfere.
22
+ *
23
+ * Native provider emit sites read `options._streamDedupContext` and
24
+ * flip `.providerEmitted = true` before emitting; the orchestration's
25
+ * finally block reads the same closed-over reference and skips its
26
+ * own emit when the flag is set.
27
+ *
28
+ * This avoids the AsyncLocalStorage approach which doesn't reliably
29
+ * propagate through async-generator yield boundaries when iteration
30
+ * happens from outside the original `run()` scope (e.g. when the
31
+ * consumer drives `for await of result.stream` after `sdk.stream(...)`
32
+ * returns).
33
+ */
34
+ export declare const STREAM_DEDUP_CONTEXT_KEY: "_streamDedupContext";
35
+ /**
36
+ * Native providers call this from their `generation:end` emit sites,
37
+ * passing the same `options` object they received. Safe no-op when
38
+ * the field isn't set.
39
+ */
40
+ export declare function markStreamProviderEmittedGenerationEnd(options: {
41
+ _streamDedupContext?: StreamGenerationEndContext;
42
+ } | undefined): void;
15
43
  export declare class NeuroLink {
16
44
  private mcpInitialized;
17
45
  private mcpSkipped;
@@ -968,6 +996,40 @@ export declare class NeuroLink {
968
996
  * @see {@link NeuroLink.executeTool} for events related to tool execution
969
997
  */
970
998
  getEventEmitter(): TypedEventEmitter<NeuroLinkEvents>;
999
+ /**
1000
+ * Curator P1-1: synchronous credential health check for a single provider.
1001
+ *
1002
+ * Drives a tiny real call against the provider (1-token completion or
1003
+ * `/models` listing depending on provider) to confirm the configured
1004
+ * credentials are valid. Useful at startup so a service can refuse to
1005
+ * boot if its primary provider's credentials are broken instead of
1006
+ * discovering the problem on first user request.
1007
+ *
1008
+ * @example
1009
+ * ```ts
1010
+ * const health = await neurolink.checkCredentials({ provider: "litellm" });
1011
+ * if (health.status !== "ok") {
1012
+ * throw new Error(`provider not ready: ${health.detail}`);
1013
+ * }
1014
+ * ```
1015
+ *
1016
+ * @param input - the provider to check
1017
+ * @returns `{ provider, status, detail }`. Possible status values:
1018
+ * - `"ok"` — credentials valid and provider reachable
1019
+ * - `"missing"` — required env / credentials not configured
1020
+ * - `"expired"` — credentials present but rejected (401/403)
1021
+ * - `"denied"` — credentials valid but team not whitelisted for any model
1022
+ * - `"network"` — provider unreachable (timeout, ECONNREFUSED, DNS)
1023
+ * - `"unknown"` — other error; consult `detail`
1024
+ */
1025
+ checkCredentials(input: {
1026
+ provider: string;
1027
+ model?: string;
1028
+ }): Promise<{
1029
+ provider: string;
1030
+ status: "ok" | "missing" | "expired" | "denied" | "network" | "unknown";
1031
+ detail: string;
1032
+ }>;
971
1033
  /**
972
1034
  * Emit tool start event with execution tracking
973
1035
  * @param toolName - Name of the tool being executed
@@ -52,7 +52,7 @@ import { resolveDynamicArgument } from "./dynamic/dynamicResolver.js";
52
52
  import { initializeHippocampus } from "./memory/hippocampusInitializer.js";
53
53
  import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
54
54
  import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
55
- import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/index.js";
55
+ import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
56
56
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
57
57
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
58
58
  import { TaskManager } from "./tasks/taskManager.js";
@@ -187,6 +187,13 @@ function isNonRetryableProviderError(error) {
187
187
  if (error instanceof AuthorizationError) {
188
188
  return true;
189
189
  }
190
+ // Curator P1-1: model-access-denied is permanent for the (provider, model)
191
+ // pair until the team whitelist changes. Retrying with the same config
192
+ // would just waste a second roundtrip. Caller / fallback-orchestrator
193
+ // should pick a different model.
194
+ if (error instanceof ModelAccessDeniedError) {
195
+ return true;
196
+ }
190
197
  // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
191
198
  if (error && typeof error === "object") {
192
199
  const err = error;
@@ -290,6 +297,37 @@ function isNonRetryableProviderError(error) {
290
297
  * same NeuroLink instance would clobber each other's trace context.
291
298
  */
292
299
  const metricsTraceContextStorage = new AsyncLocalStorage();
300
+ /**
301
+ * Curator P2-4 dedup (concurrency-safe): native providers emit
302
+ * `generation:end` on the shared SDK emitter. We attach a fresh
303
+ * mutable `dedupContext` object directly to the per-call
304
+ * `StreamOptions` (under `_streamDedupContext`) so each stream gets
305
+ * its own instance — concurrent streams have different option objects
306
+ * and therefore different contexts, so they cannot interfere.
307
+ *
308
+ * Native provider emit sites read `options._streamDedupContext` and
309
+ * flip `.providerEmitted = true` before emitting; the orchestration's
310
+ * finally block reads the same closed-over reference and skips its
311
+ * own emit when the flag is set.
312
+ *
313
+ * This avoids the AsyncLocalStorage approach which doesn't reliably
314
+ * propagate through async-generator yield boundaries when iteration
315
+ * happens from outside the original `run()` scope (e.g. when the
316
+ * consumer drives `for await of result.stream` after `sdk.stream(...)`
317
+ * returns).
318
+ */
319
+ export const STREAM_DEDUP_CONTEXT_KEY = "_streamDedupContext";
320
+ /**
321
+ * Native providers call this from their `generation:end` emit sites,
322
+ * passing the same `options` object they received. Safe no-op when
323
+ * the field isn't set.
324
+ */
325
+ export function markStreamProviderEmittedGenerationEnd(options) {
326
+ const ctx = options?._streamDedupContext;
327
+ if (ctx) {
328
+ ctx.providerEmitted = true;
329
+ }
330
+ }
293
331
  export class NeuroLink {
294
332
  mcpInitialized = false;
295
333
  mcpSkipped = false;
@@ -4977,8 +5015,23 @@ Current user's request: ${currentInput}`;
4977
5015
  const streamStartTime = Date.now();
4978
5016
  const sessionId = enhancedOptions.context
4979
5017
  ?.sessionId;
5018
+ // Curator P2-4 dedup (concurrency-safe): native provider stream paths
5019
+ // (Gemini 3 on Vertex / Google AI Studio) emit `generation:end`
5020
+ // themselves. We attach a per-stream mutable flag directly to
5021
+ // `enhancedOptions._streamDedupContext` — native providers receive
5022
+ // these options and flip the flag before their emit; this finally
5023
+ // block reads the same closed-over reference. Concurrent streams
5024
+ // have different option objects so the contexts don't interfere.
5025
+ const dedupContext = {
5026
+ providerEmitted: false,
5027
+ };
5028
+ enhancedOptions._streamDedupContext = dedupContext;
4980
5029
  const processedStream = (async function* () {
4981
5030
  let streamError;
5031
+ // Curator P2-4: hoist `resolvedUsage` so the finally block can emit a
5032
+ // single `generation:end` event with cost data. Cost listeners
5033
+ // subscribe here; previously the stream path never fired it.
5034
+ let resolvedUsage;
4982
5035
  try {
4983
5036
  for await (const chunk of mcpStream) {
4984
5037
  chunkCount++;
@@ -5008,7 +5061,7 @@ Current user's request: ${currentInput}`;
5008
5061
  accumulatedContent += content;
5009
5062
  });
5010
5063
  }
5011
- let resolvedUsage = streamUsage;
5064
+ resolvedUsage = streamUsage;
5012
5065
  if (!resolvedUsage && streamAnalytics) {
5013
5066
  try {
5014
5067
  const resolved = await Promise.resolve(streamAnalytics);
@@ -5083,6 +5136,61 @@ Current user's request: ${currentInput}`;
5083
5136
  guardrailsBlocked: metadata.guardrailsBlocked,
5084
5137
  error: metadata.error,
5085
5138
  });
5139
+ // Curator P2-4: emit `generation:end` exactly once per stream so
5140
+ // cost listeners receive the same contract as for `generate()`.
5141
+ // The previous implementation only fired `stream:complete`, leaving
5142
+ // any subscriber to `generation:end` with zero events.
5143
+ //
5144
+ // Dedup: native provider stream paths (Gemini 3 on Vertex / Google
5145
+ // AI Studio) already emit `generation:end` themselves so Pipeline B
5146
+ // (Langfuse) records a GENERATION observation. Skip our emit when
5147
+ // they already fired — preserves their Pipeline B observation
5148
+ // source and keeps the "exactly once" contract. Per-stream flag
5149
+ // is concurrency-safe because it's scoped via AsyncLocalStorage.
5150
+ if (!dedupContext.providerEmitted) {
5151
+ try {
5152
+ const finalProvider = metadata.fallbackProvider ?? providerName ?? "unknown";
5153
+ const finalModel = metadata.fallbackModel ??
5154
+ streamModel ??
5155
+ enhancedOptions.model ??
5156
+ "unknown";
5157
+ const finalFinishReason = streamError
5158
+ ? "error"
5159
+ : (streamState.finishReason ?? "stop");
5160
+ self.emitter.emit("generation:end", {
5161
+ provider: finalProvider,
5162
+ model: finalModel,
5163
+ responseTime: Date.now() - streamStartTime,
5164
+ toolsUsed: streamState.toolCalls?.map((t) => t.toolName),
5165
+ timestamp: Date.now(),
5166
+ result: {
5167
+ content: accumulatedContent,
5168
+ usage: resolvedUsage,
5169
+ model: finalModel,
5170
+ provider: finalProvider,
5171
+ finishReason: finalFinishReason,
5172
+ },
5173
+ prompt: enhancedOptions.input?.text ||
5174
+ enhancedOptions.prompt,
5175
+ temperature: enhancedOptions.temperature,
5176
+ maxTokens: enhancedOptions.maxTokens,
5177
+ success: !streamError,
5178
+ error: streamError
5179
+ ? streamError instanceof Error
5180
+ ? streamError.message
5181
+ : String(streamError)
5182
+ : undefined,
5183
+ pipelineAHandled: true,
5184
+ });
5185
+ }
5186
+ catch (emitError) {
5187
+ logger.debug("[NeuroLink.stream] generation:end listener threw — ignored", {
5188
+ error: emitError instanceof Error
5189
+ ? emitError.message
5190
+ : String(emitError),
5191
+ });
5192
+ }
5193
+ }
5086
5194
  self._disableToolCacheForCurrentRequest = false;
5087
5195
  cleanupListeners();
5088
5196
  streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
@@ -6087,6 +6195,87 @@ Current user's request: ${currentInput}`;
6087
6195
  getEventEmitter() {
6088
6196
  return this.emitter;
6089
6197
  }
6198
+ /**
6199
+ * Curator P1-1: synchronous credential health check for a single provider.
6200
+ *
6201
+ * Drives a tiny real call against the provider (1-token completion or
6202
+ * `/models` listing depending on provider) to confirm the configured
6203
+ * credentials are valid. Useful at startup so a service can refuse to
6204
+ * boot if its primary provider's credentials are broken instead of
6205
+ * discovering the problem on first user request.
6206
+ *
6207
+ * @example
6208
+ * ```ts
6209
+ * const health = await neurolink.checkCredentials({ provider: "litellm" });
6210
+ * if (health.status !== "ok") {
6211
+ * throw new Error(`provider not ready: ${health.detail}`);
6212
+ * }
6213
+ * ```
6214
+ *
6215
+ * @param input - the provider to check
6216
+ * @returns `{ provider, status, detail }`. Possible status values:
6217
+ * - `"ok"` — credentials valid and provider reachable
6218
+ * - `"missing"` — required env / credentials not configured
6219
+ * - `"expired"` — credentials present but rejected (401/403)
6220
+ * - `"denied"` — credentials valid but team not whitelisted for any model
6221
+ * - `"network"` — provider unreachable (timeout, ECONNREFUSED, DNS)
6222
+ * - `"unknown"` — other error; consult `detail`
6223
+ */
6224
+ async checkCredentials(input) {
6225
+ const { provider, model } = input;
6226
+ const probeText = "ping";
6227
+ try {
6228
+ // 1-token probe is cheap, exercises auth + routing without much cost.
6229
+ await this.generate({
6230
+ provider: provider,
6231
+ ...(model && { model }),
6232
+ input: { text: probeText },
6233
+ maxTokens: 16,
6234
+ disableTools: true,
6235
+ });
6236
+ return { provider, status: "ok", detail: "credentials valid" };
6237
+ }
6238
+ catch (err) {
6239
+ const msg = err instanceof Error ? err.message : String(err);
6240
+ const lower = msg.toLowerCase();
6241
+ if (err instanceof ModelAccessDeniedError) {
6242
+ return {
6243
+ provider,
6244
+ status: "denied",
6245
+ detail: msg,
6246
+ };
6247
+ }
6248
+ if (lower.includes("authentication") ||
6249
+ lower.includes("401") ||
6250
+ lower.includes("invalid api key") ||
6251
+ lower.includes("incorrect api key") ||
6252
+ lower.includes("api_key_invalid") ||
6253
+ lower.includes("token has expired") ||
6254
+ lower.includes("expired credentials")) {
6255
+ return { provider, status: "expired", detail: msg };
6256
+ }
6257
+ if (lower.includes("not configured") ||
6258
+ lower.includes("missing api") ||
6259
+ lower.includes("api key is required") ||
6260
+ lower.includes("no api key") ||
6261
+ lower.includes("application default credentials") ||
6262
+ lower.includes("google_application_credentials") ||
6263
+ lower.includes("project_id") ||
6264
+ lower.includes("default credentials") ||
6265
+ lower.includes("service account")) {
6266
+ return { provider, status: "missing", detail: msg };
6267
+ }
6268
+ if (lower.includes("econnrefused") ||
6269
+ lower.includes("enotfound") ||
6270
+ lower.includes("could not resolve") ||
6271
+ lower.includes("timeout") ||
6272
+ lower.includes("network") ||
6273
+ lower.includes("cannot connect")) {
6274
+ return { provider, status: "network", detail: msg };
6275
+ }
6276
+ return { provider, status: "unknown", detail: msg };
6277
+ }
6278
+ }
6090
6279
  // ========================================
6091
6280
  // ENHANCED: Tool Event Emission API
6092
6281
  // ========================================
@@ -4,6 +4,7 @@ import { ErrorCategory, ErrorSeverity, GoogleAIModels, } from "../constants/enum
4
4
  import { BaseProvider } from "../core/baseProvider.js";
5
5
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
6
6
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
7
+ import { markStreamProviderEmittedGenerationEnd, } from "../neurolink.js";
7
8
  import { SpanStatusCode } from "@opentelemetry/api";
8
9
  import { ATTR, tracers, withClientSpan } from "../telemetry/index.js";
9
10
  import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
@@ -735,6 +736,9 @@ export class GoogleAIStudioProvider extends BaseProvider {
735
736
  // AI SDK so experimental_telemetry is never injected; we emit manually.
736
737
  const nativeStreamEmitter = this.neurolink?.getEventEmitter();
737
738
  if (nativeStreamEmitter) {
739
+ // Curator P2-4 dedup: flag the per-stream context attached
740
+ // to options so the orchestration skips its own emit.
741
+ markStreamProviderEmittedGenerationEnd(options);
738
742
  nativeStreamEmitter.emit("generation:end", {
739
743
  provider: this.providerName,
740
744
  responseTime,
@@ -767,6 +771,9 @@ export class GoogleAIStudioProvider extends BaseProvider {
767
771
  // Emit failure generation:end so Pipeline B records the failed stream
768
772
  const errorEmitter = this.neurolink?.getEventEmitter();
769
773
  if (errorEmitter) {
774
+ // Curator P2-4 dedup: flag the per-stream context attached
775
+ // to options so the orchestration skips its own emit.
776
+ markStreamProviderEmittedGenerationEnd(options);
770
777
  errorEmitter.emit("generation:end", {
771
778
  provider: this.providerName,
772
779
  responseTime: Date.now() - startTime,
@@ -10,6 +10,7 @@ import { ErrorCategory, ErrorSeverity, } from "../constants/enums.js";
10
10
  import { BaseProvider } from "../core/baseProvider.js";
11
11
  import { DEFAULT_MAX_STEPS, GLOBAL_LOCATION_MODELS, } from "../core/constants.js";
12
12
  import { ModelConfigurationManager } from "../core/modelConfiguration.js";
13
+ import { markStreamProviderEmittedGenerationEnd, } from "../neurolink.js";
13
14
  import { createProxyFetch } from "../proxy/proxyFetch.js";
14
15
  import { ATTR, tracers, withClientSpan } from "../telemetry/index.js";
15
16
  import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
@@ -1630,8 +1631,12 @@ export class GoogleVertexProvider extends BaseProvider {
1630
1631
  // Emit generation:end so Pipeline B (Langfuse) creates a GENERATION
1631
1632
  // observation. The native @google/genai stream path on Vertex bypasses the
1632
1633
  // Vercel AI SDK so experimental_telemetry is never injected; we emit manually.
1634
+ // Curator P2-4 dedup: flag the per-stream context attached to options
1635
+ // so the orchestration in `runStandardStreamRequest` knows we already
1636
+ // emitted and skips its own emit (preserving exactly-once).
1633
1637
  const vertexStreamEmitter = this.neurolink?.getEventEmitter();
1634
1638
  if (vertexStreamEmitter) {
1639
+ markStreamProviderEmittedGenerationEnd(params.options);
1635
1640
  vertexStreamEmitter.emit("generation:end", {
1636
1641
  provider: this.providerName,
1637
1642
  responseTime,
@@ -5,7 +5,7 @@ import { BaseProvider } from "../core/baseProvider.js";
5
5
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
6
6
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
7
7
  import { createProxyFetch } from "../proxy/proxyFetch.js";
8
- import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
8
+ import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
9
9
  import { isAbortError } from "../utils/errorHandling.js";
10
10
  import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
11
11
  import { logger } from "../utils/logger.js";
@@ -100,6 +100,17 @@ export class LiteLLMProvider extends BaseProvider {
100
100
  return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
101
101
  `${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
102
102
  }
103
+ // Curator P1-1: detect "team not allowed to access model" responses
104
+ // and surface as ModelAccessDeniedError with the allowed_models array
105
+ // parsed from the body. Must run before the generic "API key" check
106
+ // because LiteLLM phrases this as a 403 distinct from auth.
107
+ if (isModelAccessDeniedMessage(errorRecord.message)) {
108
+ return new ModelAccessDeniedError(errorRecord.message, {
109
+ provider: this.providerName,
110
+ requestedModel: this.modelName,
111
+ allowedModels: parseAllowedModels(errorRecord.message),
112
+ });
113
+ }
103
114
  if (errorRecord.message.includes("API_KEY_INVALID") ||
104
115
  errorRecord.message.includes("Invalid API key")) {
105
116
  return new AuthenticationError("Invalid LiteLLM configuration. Please check your LITELLM_API_KEY environment variable.", this.providerName);
@@ -235,10 +235,27 @@ export class OpenAIProvider extends BaseProvider {
235
235
  const errorType = errorObj?.type && typeof errorObj.type === "string"
236
236
  ? errorObj.type
237
237
  : undefined;
238
+ const statusCode = typeof errorObj?.status === "number"
239
+ ? errorObj.status
240
+ : typeof errorObj?.statusCode === "number"
241
+ ? errorObj.statusCode
242
+ : undefined;
243
+ // Curator P1-1 / Reviewer Finding #4: only the explicit auth markers
244
+ // map to AuthenticationError. Earlier we treated every
245
+ // `invalid_request_error` as an auth failure — that's OpenAI's catch-all
246
+ // for any bad request (unsupported parameter, malformed JSON, etc.) and
247
+ // mislabelled them as "invalid API key". Use credential-specific
248
+ // signals only.
238
249
  if (message.includes("API_KEY_INVALID") ||
239
250
  message.includes("Invalid API key") ||
240
- errorType === "invalid_api_key") {
241
- return new AuthenticationError("Invalid OpenAI API key. Please check your OPENAI_API_KEY environment variable.", this.providerName);
251
+ message.includes("Incorrect API key") ||
252
+ message.includes("invalid_api_key") ||
253
+ errorType === "invalid_api_key" ||
254
+ statusCode === 401) {
255
+ return new AuthenticationError(message.includes("Incorrect API key") ||
256
+ message.includes("Invalid API key")
257
+ ? message
258
+ : "Invalid OpenAI API key. Please check your OPENAI_API_KEY environment variable.", this.providerName);
242
259
  }
243
260
  if (message.includes("rate limit") || errorType === "rate_limit_error") {
244
261
  return new RateLimitError("OpenAI rate limit exceeded. Please try again later.", this.providerName);
@@ -104,3 +104,45 @@ export declare class ModelAccessError extends BaseError {
104
104
  readonly requiredTier: string;
105
105
  constructor(model: string, tier: string, requiredTier: string);
106
106
  }
107
+ /**
108
+ * Curator P1-1: thrown when a provider rejects a request because the
109
+ * caller's team / API key is not whitelisted for the requested model.
110
+ *
111
+ * LiteLLM's `team not allowed to access model. This team can only access
112
+ * models=['glm-latest', 'kimi-latest', ...]` is the canonical example —
113
+ * the list is parsed off the error body so callers / fallback orchestrators
114
+ * can choose a whitelisted alternative without scraping strings.
115
+ */
116
+ export declare class ModelAccessDeniedError extends ProviderError {
117
+ readonly requestedModel: string | undefined;
118
+ readonly allowedModels: string[] | undefined;
119
+ readonly code: "MODEL_ACCESS_DENIED";
120
+ constructor(message: string, options?: {
121
+ provider?: string;
122
+ requestedModel?: string;
123
+ allowedModels?: string[];
124
+ });
125
+ }
126
+ /**
127
+ * Parse the `allowed_models` array out of a provider error message body.
128
+ * Currently targets the LiteLLM team-whitelist response shape:
129
+ *
130
+ * "team not allowed to access model. This team can only access
131
+ * models=['glm-latest', 'kimi-latest', 'open-large']"
132
+ *
133
+ * Implementation note: deliberately uses `indexOf`/`slice` instead of a
134
+ * single `/models\s*=\s*\[([^\]]*)\]/` regex. CodeQL flagged the latter
135
+ * as `js/polynomial-redos` because the `[^\]]*` greedy quantifier on
136
+ * library-supplied input can be exploited by a crafted long string. The
137
+ * indexOf/slice path is O(n) with no backtracking and we additionally
138
+ * cap the input length.
139
+ *
140
+ * Returns undefined when no list is found.
141
+ */
142
+ export declare function parseAllowedModels(message: string): string[] | undefined;
143
+ /**
144
+ * Returns true when `message` looks like a model-access-denied response
145
+ * (LiteLLM "team not allowed", generic "not allowed to access model",
146
+ * or "team can only access models=[...]").
147
+ */
148
+ export declare function isModelAccessDeniedMessage(message: string): boolean;
@@ -165,4 +165,98 @@ export class ModelAccessError extends BaseError {
165
165
  this.requiredTier = requiredTier;
166
166
  }
167
167
  }
168
+ /**
169
+ * Curator P1-1: thrown when a provider rejects a request because the
170
+ * caller's team / API key is not whitelisted for the requested model.
171
+ *
172
+ * LiteLLM's `team not allowed to access model. This team can only access
173
+ * models=['glm-latest', 'kimi-latest', ...]` is the canonical example —
174
+ * the list is parsed off the error body so callers / fallback orchestrators
175
+ * can choose a whitelisted alternative without scraping strings.
176
+ */
177
+ export class ModelAccessDeniedError extends ProviderError {
178
+ requestedModel;
179
+ allowedModels;
180
+ code = "MODEL_ACCESS_DENIED";
181
+ constructor(message, options = {}) {
182
+ super(message, options.provider);
183
+ this.name = "ModelAccessDeniedError";
184
+ this.requestedModel = options.requestedModel;
185
+ this.allowedModels = options.allowedModels;
186
+ }
187
+ }
188
+ /** Maximum body length we'll attempt to parse. Real provider error
189
+ * bodies are well under 10 KB; longer inputs are either truncated
190
+ * log output or a deliberate ReDoS attempt. */
191
+ const MAX_ALLOWED_MODELS_INPUT = 10_000;
192
+ /**
193
+ * Parse the `allowed_models` array out of a provider error message body.
194
+ * Currently targets the LiteLLM team-whitelist response shape:
195
+ *
196
+ * "team not allowed to access model. This team can only access
197
+ * models=['glm-latest', 'kimi-latest', 'open-large']"
198
+ *
199
+ * Implementation note: deliberately uses `indexOf`/`slice` instead of a
200
+ * single `/models\s*=\s*\[([^\]]*)\]/` regex. CodeQL flagged the latter
201
+ * as `js/polynomial-redos` because the `[^\]]*` greedy quantifier on
202
+ * library-supplied input can be exploited by a crafted long string. The
203
+ * indexOf/slice path is O(n) with no backtracking and we additionally
204
+ * cap the input length.
205
+ *
206
+ * Returns undefined when no list is found.
207
+ */
208
+ export function parseAllowedModels(message) {
209
+ if (typeof message !== "string" || message.length === 0) {
210
+ return undefined;
211
+ }
212
+ if (message.length > MAX_ALLOWED_MODELS_INPUT) {
213
+ return undefined;
214
+ }
215
+ // Locate `models` keyword case-insensitively, then walk forward to
216
+ // confirm `=` and `[` markers — no regex backtracking.
217
+ const lower = message.toLowerCase();
218
+ let idx = lower.indexOf("models", 0);
219
+ while (idx !== -1) {
220
+ let cursor = idx + "models".length;
221
+ // Skip whitespace
222
+ while (cursor < message.length && /\s/.test(message[cursor])) {
223
+ cursor++;
224
+ }
225
+ if (message[cursor] !== "=") {
226
+ idx = lower.indexOf("models", idx + 1);
227
+ continue;
228
+ }
229
+ cursor++;
230
+ while (cursor < message.length && /\s/.test(message[cursor])) {
231
+ cursor++;
232
+ }
233
+ if (message[cursor] !== "[") {
234
+ idx = lower.indexOf("models", idx + 1);
235
+ continue;
236
+ }
237
+ const open = cursor;
238
+ const close = message.indexOf("]", open + 1);
239
+ if (close === -1) {
240
+ return undefined;
241
+ }
242
+ const inside = message.slice(open + 1, close);
243
+ const items = inside
244
+ .split(",")
245
+ .map((s) => s.trim().replace(/^['"]|['"]$/g, ""))
246
+ .filter((s) => s.length > 0);
247
+ return items.length > 0 ? items : undefined;
248
+ }
249
+ return undefined;
250
+ }
251
+ /**
252
+ * Returns true when `message` looks like a model-access-denied response
253
+ * (LiteLLM "team not allowed", generic "not allowed to access model",
254
+ * or "team can only access models=[...]").
255
+ */
256
+ export function isModelAccessDeniedMessage(message) {
257
+ const lower = message.toLowerCase();
258
+ return ((lower.includes("team") && lower.includes("not allowed")) ||
259
+ lower.includes("team can only access") ||
260
+ /not\s+allowed\s+to\s+access\s+(this\s+)?model/i.test(message));
261
+ }
168
262
  //# sourceMappingURL=errors.js.map
@@ -57,3 +57,4 @@ export * from "./span.js";
57
57
  export * from "./imageGen.js";
58
58
  export * from "./elicitation.js";
59
59
  export * from "./dynamic.js";
60
+ export * from "./streamDedup.js";
@@ -60,4 +60,6 @@ export * from "./imageGen.js";
60
60
  export * from "./elicitation.js";
61
61
  // Dynamic Arguments types
62
62
  export * from "./dynamic.js";
63
+ // Curator P2-4 dedup: per-stream AsyncLocalStorage context
64
+ export * from "./streamDedup.js";
63
65
  //# sourceMappingURL=index.js.map
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Curator P2-4 dedup (concurrency-safe): per-stream context that lets
3
+ * the orchestration's `runStandardStreamRequest` finally block know
4
+ * whether a *native provider* path within THIS stream's async chain
5
+ * already emitted `generation:end`. Native providers (Vertex / Google
6
+ * AI Studio for Gemini 3, etc.) emit on the shared SDK emitter; without
7
+ * scoping, a concurrent unrelated stream's emit on the same NeuroLink
8
+ * instance would suppress the wrong stream's orchestration emit.
9
+ *
10
+ * AsyncLocalStorage scopes each stream's flag to its own async chain.
11
+ */
12
+ export type StreamGenerationEndContext = {
13
+ providerEmitted: boolean;
14
+ };
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=streamDedup.js.map