@juspay/neurolink 9.57.1 → 9.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +314 -314
- package/dist/lib/neurolink.d.ts +69 -0
- package/dist/lib/neurolink.js +367 -3
- package/dist/lib/providers/litellm.js +12 -1
- package/dist/lib/providers/openAI.js +19 -2
- package/dist/lib/types/config.d.ts +23 -0
- package/dist/lib/types/errors.d.ts +42 -0
- package/dist/lib/types/errors.js +94 -0
- package/dist/lib/types/generate.d.ts +13 -0
- package/dist/lib/types/stream.d.ts +13 -0
- package/dist/neurolink.d.ts +69 -0
- package/dist/neurolink.js +367 -3
- package/dist/providers/litellm.js +12 -1
- package/dist/providers/openAI.js +19 -2
- package/dist/types/config.d.ts +23 -0
- package/dist/types/errors.d.ts +42 -0
- package/dist/types/errors.js +94 -0
- package/dist/types/generate.d.ts +13 -0
- package/dist/types/stream.d.ts +13 -0
- package/package.json +1 -1
package/dist/lib/types/errors.js
CHANGED
|
@@ -165,4 +165,98 @@ export class ModelAccessError extends BaseError {
|
|
|
165
165
|
this.requiredTier = requiredTier;
|
|
166
166
|
}
|
|
167
167
|
}
|
|
168
|
+
/**
|
|
169
|
+
* Curator P1-1: thrown when a provider rejects a request because the
|
|
170
|
+
* caller's team / API key is not whitelisted for the requested model.
|
|
171
|
+
*
|
|
172
|
+
* LiteLLM's `team not allowed to access model. This team can only access
|
|
173
|
+
* models=['glm-latest', 'kimi-latest', ...]` is the canonical example —
|
|
174
|
+
* the list is parsed off the error body so callers / fallback orchestrators
|
|
175
|
+
* can choose a whitelisted alternative without scraping strings.
|
|
176
|
+
*/
|
|
177
|
+
export class ModelAccessDeniedError extends ProviderError {
|
|
178
|
+
requestedModel;
|
|
179
|
+
allowedModels;
|
|
180
|
+
code = "MODEL_ACCESS_DENIED";
|
|
181
|
+
constructor(message, options = {}) {
|
|
182
|
+
super(message, options.provider);
|
|
183
|
+
this.name = "ModelAccessDeniedError";
|
|
184
|
+
this.requestedModel = options.requestedModel;
|
|
185
|
+
this.allowedModels = options.allowedModels;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/** Maximum body length we'll attempt to parse. Real provider error
|
|
189
|
+
* bodies are well under 10 KB; longer inputs are either truncated
|
|
190
|
+
* log output or a deliberate ReDoS attempt. */
|
|
191
|
+
const MAX_ALLOWED_MODELS_INPUT = 10_000;
|
|
192
|
+
/**
|
|
193
|
+
* Parse the `allowed_models` array out of a provider error message body.
|
|
194
|
+
* Currently targets the LiteLLM team-whitelist response shape:
|
|
195
|
+
*
|
|
196
|
+
* "team not allowed to access model. This team can only access
|
|
197
|
+
* models=['glm-latest', 'kimi-latest', 'open-large']"
|
|
198
|
+
*
|
|
199
|
+
* Implementation note: deliberately uses `indexOf`/`slice` instead of a
|
|
200
|
+
* single `/models\s*=\s*\[([^\]]*)\]/` regex. CodeQL flagged the latter
|
|
201
|
+
* as `js/polynomial-redos` because the `[^\]]*` greedy quantifier on
|
|
202
|
+
* library-supplied input can be exploited by a crafted long string. The
|
|
203
|
+
* indexOf/slice path is O(n) with no backtracking and we additionally
|
|
204
|
+
* cap the input length.
|
|
205
|
+
*
|
|
206
|
+
* Returns undefined when no list is found.
|
|
207
|
+
*/
|
|
208
|
+
export function parseAllowedModels(message) {
|
|
209
|
+
if (typeof message !== "string" || message.length === 0) {
|
|
210
|
+
return undefined;
|
|
211
|
+
}
|
|
212
|
+
if (message.length > MAX_ALLOWED_MODELS_INPUT) {
|
|
213
|
+
return undefined;
|
|
214
|
+
}
|
|
215
|
+
// Locate `models` keyword case-insensitively, then walk forward to
|
|
216
|
+
// confirm `=` and `[` markers — no regex backtracking.
|
|
217
|
+
const lower = message.toLowerCase();
|
|
218
|
+
let idx = lower.indexOf("models", 0);
|
|
219
|
+
while (idx !== -1) {
|
|
220
|
+
let cursor = idx + "models".length;
|
|
221
|
+
// Skip whitespace
|
|
222
|
+
while (cursor < message.length && /\s/.test(message[cursor])) {
|
|
223
|
+
cursor++;
|
|
224
|
+
}
|
|
225
|
+
if (message[cursor] !== "=") {
|
|
226
|
+
idx = lower.indexOf("models", idx + 1);
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
229
|
+
cursor++;
|
|
230
|
+
while (cursor < message.length && /\s/.test(message[cursor])) {
|
|
231
|
+
cursor++;
|
|
232
|
+
}
|
|
233
|
+
if (message[cursor] !== "[") {
|
|
234
|
+
idx = lower.indexOf("models", idx + 1);
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
const open = cursor;
|
|
238
|
+
const close = message.indexOf("]", open + 1);
|
|
239
|
+
if (close === -1) {
|
|
240
|
+
return undefined;
|
|
241
|
+
}
|
|
242
|
+
const inside = message.slice(open + 1, close);
|
|
243
|
+
const items = inside
|
|
244
|
+
.split(",")
|
|
245
|
+
.map((s) => s.trim().replace(/^['"]|['"]$/g, ""))
|
|
246
|
+
.filter((s) => s.length > 0);
|
|
247
|
+
return items.length > 0 ? items : undefined;
|
|
248
|
+
}
|
|
249
|
+
return undefined;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Returns true when `message` looks like a model-access-denied response
|
|
253
|
+
* (LiteLLM "team not allowed", generic "not allowed to access model",
|
|
254
|
+
* or "team can only access models=[...]").
|
|
255
|
+
*/
|
|
256
|
+
export function isModelAccessDeniedMessage(message) {
|
|
257
|
+
const lower = message.toLowerCase();
|
|
258
|
+
return ((lower.includes("team") && lower.includes("not allowed")) ||
|
|
259
|
+
lower.includes("team can only access") ||
|
|
260
|
+
/not\s+allowed\s+to\s+access\s+(this\s+)?model/i.test(message));
|
|
261
|
+
}
|
|
168
262
|
//# sourceMappingURL=errors.js.map
|
|
@@ -447,6 +447,19 @@ export type GenerateOptions = {
|
|
|
447
447
|
* Unset providers fall through to instance credentials, then environment variables.
|
|
448
448
|
*/
|
|
449
449
|
credentials?: NeurolinkCredentials;
|
|
450
|
+
/**
|
|
451
|
+
* Curator P2-3: per-call fallback callback. Overrides any
|
|
452
|
+
* instance-level `providerFallback` set on `new NeuroLink({...})`.
|
|
453
|
+
*/
|
|
454
|
+
providerFallback?: (error: unknown) => Promise<{
|
|
455
|
+
provider?: string;
|
|
456
|
+
model?: string;
|
|
457
|
+
} | null>;
|
|
458
|
+
/**
|
|
459
|
+
* Curator P2-3: per-call ordered model chain. Overrides any
|
|
460
|
+
* instance-level `modelChain`. Tried in order on model-access-denied.
|
|
461
|
+
*/
|
|
462
|
+
modelChain?: string[];
|
|
450
463
|
/**
|
|
451
464
|
* Per-call memory control.
|
|
452
465
|
*
|
|
@@ -445,6 +445,19 @@ export type StreamOptions = {
|
|
|
445
445
|
* Unset providers fall through to instance credentials, then environment variables.
|
|
446
446
|
*/
|
|
447
447
|
credentials?: NeurolinkCredentials;
|
|
448
|
+
/**
|
|
449
|
+
* Curator P2-3: per-call fallback callback. Overrides any
|
|
450
|
+
* instance-level `providerFallback` set on `new NeuroLink({...})`.
|
|
451
|
+
*/
|
|
452
|
+
providerFallback?: (error: unknown) => Promise<{
|
|
453
|
+
provider?: string;
|
|
454
|
+
model?: string;
|
|
455
|
+
} | null>;
|
|
456
|
+
/**
|
|
457
|
+
* Curator P2-3: per-call ordered model chain. Overrides any
|
|
458
|
+
* instance-level `modelChain`. Tried in order on model-access-denied.
|
|
459
|
+
*/
|
|
460
|
+
modelChain?: string[];
|
|
448
461
|
/**
|
|
449
462
|
* Per-call memory control.
|
|
450
463
|
*
|
package/dist/neurolink.d.ts
CHANGED
|
@@ -60,6 +60,7 @@ export declare class NeuroLink {
|
|
|
60
60
|
private pendingAuthConfig?;
|
|
61
61
|
private authInitPromise?;
|
|
62
62
|
private credentials?;
|
|
63
|
+
private readonly fallbackConfig;
|
|
63
64
|
/**
|
|
64
65
|
* Merge instance-level credentials with per-call credentials.
|
|
65
66
|
*
|
|
@@ -541,6 +542,21 @@ export declare class NeuroLink {
|
|
|
541
542
|
* @since 1.0.0
|
|
542
543
|
*/
|
|
543
544
|
generate(optionsOrPrompt: GenerateOptions | DynamicOptions | string): Promise<GenerateResult>;
|
|
545
|
+
/**
|
|
546
|
+
* Curator P2-3: wraps a generate/stream call with the fallback
|
|
547
|
+
* orchestration (`providerFallback` callback + `modelChain` walker).
|
|
548
|
+
*
|
|
549
|
+
* On a model-access-denied error from the inner call:
|
|
550
|
+
* 1. Resolve the effective callback (per-call > instance > synthesised
|
|
551
|
+
* from modelChain) and the effective chain (per-call > instance).
|
|
552
|
+
* 2. Walk attempts: invoke callback (or pop next chain entry) → emit
|
|
553
|
+
* `model.fallback` event → re-call inner with the new {provider,
|
|
554
|
+
* model}.
|
|
555
|
+
* 3. Stop on first success, on a callback returning null, or after
|
|
556
|
+
* exhausting the chain (throw the most recent error).
|
|
557
|
+
*/
|
|
558
|
+
private runWithFallbackOrchestration;
|
|
559
|
+
private attemptInner;
|
|
544
560
|
private executeGenerateWithMetricsContext;
|
|
545
561
|
private executeGenerateRequest;
|
|
546
562
|
private prepareGenerateRequest;
|
|
@@ -697,6 +713,25 @@ export declare class NeuroLink {
|
|
|
697
713
|
* @throws {Error} When conversation memory operations fail (if enabled)
|
|
698
714
|
*/
|
|
699
715
|
stream(options: StreamOptions | DynamicOptions): Promise<StreamResult>;
|
|
716
|
+
/**
|
|
717
|
+
* Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
|
|
718
|
+
* errors thrown during async iteration (e.g. LiteLLM throwing inside
|
|
719
|
+
* `createLiteLLMTransformedStream`). The standard
|
|
720
|
+
* `runWithFallbackOrchestration` only catches errors thrown while the
|
|
721
|
+
* `StreamResult` is being created — once we hand the iterator back to
|
|
722
|
+
* the caller, errors raised during consumption used to bypass
|
|
723
|
+
* `providerFallback` / `modelChain`.
|
|
724
|
+
*
|
|
725
|
+
* This wrapper runs the orchestration to get an initial StreamResult,
|
|
726
|
+
* then wraps `result.stream` so that:
|
|
727
|
+
* - chunks are forwarded transparently while consumption succeeds
|
|
728
|
+
* - if iteration throws a model-access-denied error AND no chunks
|
|
729
|
+
* have been yielded yet, we resolve the next fallback target,
|
|
730
|
+
* emit `model.fallback`, and recurse
|
|
731
|
+
* - if chunks were already yielded, the error propagates (mid-stream
|
|
732
|
+
* recovery isn't safe — the consumer has half a response)
|
|
733
|
+
*/
|
|
734
|
+
private streamWithIterationFallback;
|
|
700
735
|
private executeStreamRequest;
|
|
701
736
|
private validateStreamRequestOptions;
|
|
702
737
|
private maybeHandleWorkflowStreamRequest;
|
|
@@ -933,6 +968,40 @@ export declare class NeuroLink {
|
|
|
933
968
|
* @see {@link NeuroLink.executeTool} for events related to tool execution
|
|
934
969
|
*/
|
|
935
970
|
getEventEmitter(): TypedEventEmitter<NeuroLinkEvents>;
|
|
971
|
+
/**
|
|
972
|
+
* Curator P1-1: synchronous credential health check for a single provider.
|
|
973
|
+
*
|
|
974
|
+
* Drives a tiny real call against the provider (1-token completion or
|
|
975
|
+
* `/models` listing depending on provider) to confirm the configured
|
|
976
|
+
* credentials are valid. Useful at startup so a service can refuse to
|
|
977
|
+
* boot if its primary provider's credentials are broken instead of
|
|
978
|
+
* discovering the problem on first user request.
|
|
979
|
+
*
|
|
980
|
+
* @example
|
|
981
|
+
* ```ts
|
|
982
|
+
* const health = await neurolink.checkCredentials({ provider: "litellm" });
|
|
983
|
+
* if (health.status !== "ok") {
|
|
984
|
+
* throw new Error(`provider not ready: ${health.detail}`);
|
|
985
|
+
* }
|
|
986
|
+
* ```
|
|
987
|
+
*
|
|
988
|
+
* @param input - the provider to check
|
|
989
|
+
* @returns `{ provider, status, detail }`. Possible status values:
|
|
990
|
+
* - `"ok"` — credentials valid and provider reachable
|
|
991
|
+
* - `"missing"` — required env / credentials not configured
|
|
992
|
+
* - `"expired"` — credentials present but rejected (401/403)
|
|
993
|
+
* - `"denied"` — credentials valid but team not whitelisted for any model
|
|
994
|
+
* - `"network"` — provider unreachable (timeout, ECONNREFUSED, DNS)
|
|
995
|
+
* - `"unknown"` — other error; consult `detail`
|
|
996
|
+
*/
|
|
997
|
+
checkCredentials(input: {
|
|
998
|
+
provider: string;
|
|
999
|
+
model?: string;
|
|
1000
|
+
}): Promise<{
|
|
1001
|
+
provider: string;
|
|
1002
|
+
status: "ok" | "missing" | "expired" | "denied" | "network" | "unknown";
|
|
1003
|
+
detail: string;
|
|
1004
|
+
}>;
|
|
936
1005
|
/**
|
|
937
1006
|
* Emit tool start event with execution tracking
|
|
938
1007
|
* @param toolName - Name of the tool being executed
|
package/dist/neurolink.js
CHANGED
|
@@ -52,7 +52,7 @@ import { resolveDynamicArgument } from "./dynamic/dynamicResolver.js";
|
|
|
52
52
|
import { initializeHippocampus } from "./memory/hippocampusInitializer.js";
|
|
53
53
|
import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
|
|
54
54
|
import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
|
|
55
|
-
import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/index.js";
|
|
55
|
+
import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
|
|
56
56
|
import { SpanSerializer } from "./observability/utils/spanSerializer.js";
|
|
57
57
|
import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
|
|
58
58
|
import { TaskManager } from "./tasks/taskManager.js";
|
|
@@ -146,6 +146,36 @@ function mcpCategoryToErrorCategory(mcpCategory) {
|
|
|
146
146
|
* For example, a NOT_FOUND error for a model causes 6 retries of a 418KB
|
|
147
147
|
* message, wasting ~628,000 tokens and adding 10+ seconds of latency.
|
|
148
148
|
*/
|
|
149
|
+
/**
|
|
150
|
+
* Curator P2-3: detect model-access-denied without requiring the typed
|
|
151
|
+
* ModelAccessDeniedError class to be present (Issue #1 ships that class
|
|
152
|
+
* separately). Matches LiteLLM "team not allowed" / "team can only access
|
|
153
|
+
* models=[...]" plus typed-error markers when present.
|
|
154
|
+
*/
|
|
155
|
+
function looksLikeModelAccessDenied(error) {
|
|
156
|
+
if (!error) {
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
const e = error;
|
|
160
|
+
if (e.name === "ModelAccessDeniedError") {
|
|
161
|
+
return true;
|
|
162
|
+
}
|
|
163
|
+
if (e.code === "MODEL_ACCESS_DENIED") {
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
const msg = typeof e.message === "string"
|
|
167
|
+
? e.message
|
|
168
|
+
: error instanceof Error
|
|
169
|
+
? error.message
|
|
170
|
+
: String(error);
|
|
171
|
+
if (!msg) {
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
const lower = msg.toLowerCase();
|
|
175
|
+
return ((lower.includes("team") && lower.includes("not allowed")) ||
|
|
176
|
+
lower.includes("team can only access") ||
|
|
177
|
+
/not\s+allowed\s+to\s+access\s+(this\s+)?model/i.test(msg));
|
|
178
|
+
}
|
|
149
179
|
function isNonRetryableProviderError(error) {
|
|
150
180
|
// Check for typed error classes from providers
|
|
151
181
|
if (error instanceof InvalidModelError) {
|
|
@@ -157,6 +187,13 @@ function isNonRetryableProviderError(error) {
|
|
|
157
187
|
if (error instanceof AuthorizationError) {
|
|
158
188
|
return true;
|
|
159
189
|
}
|
|
190
|
+
// Curator P1-1: model-access-denied is permanent for the (provider, model)
|
|
191
|
+
// pair until the team whitelist changes. Retrying with the same config
|
|
192
|
+
// would just waste a second roundtrip. Caller / fallback-orchestrator
|
|
193
|
+
// should pick a different model.
|
|
194
|
+
if (error instanceof ModelAccessDeniedError) {
|
|
195
|
+
return true;
|
|
196
|
+
}
|
|
160
197
|
// Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
|
|
161
198
|
if (error && typeof error === "object") {
|
|
162
199
|
const err = error;
|
|
@@ -334,6 +371,9 @@ export class NeuroLink {
|
|
|
334
371
|
authInitPromise;
|
|
335
372
|
// Per-provider credential overrides (instance-level default)
|
|
336
373
|
credentials;
|
|
374
|
+
// Curator P2-3: instance-level fallback policy. Read by
|
|
375
|
+
// runWithFallbackOrchestration on model-access-denied.
|
|
376
|
+
fallbackConfig = {};
|
|
337
377
|
/**
|
|
338
378
|
* Merge instance-level credentials with per-call credentials.
|
|
339
379
|
*
|
|
@@ -721,6 +761,14 @@ export class NeuroLink {
|
|
|
721
761
|
if (config?.modelAliasConfig) {
|
|
722
762
|
this.modelAliasConfig = config.modelAliasConfig;
|
|
723
763
|
}
|
|
764
|
+
// Curator P2-3: capture fallback policy. Per-call options can still
|
|
765
|
+
// override, but these are the instance-level defaults.
|
|
766
|
+
if (config?.providerFallback) {
|
|
767
|
+
this.fallbackConfig.providerFallback = config.providerFallback;
|
|
768
|
+
}
|
|
769
|
+
if (config?.modelChain) {
|
|
770
|
+
this.fallbackConfig.modelChain = config.modelChain;
|
|
771
|
+
}
|
|
724
772
|
logger.setEventEmitter(this.emitter);
|
|
725
773
|
// Read tool cache duration from environment variables, with a default
|
|
726
774
|
const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
|
|
@@ -2669,7 +2717,121 @@ Current user's request: ${currentInput}`;
|
|
|
2669
2717
|
* @since 1.0.0
|
|
2670
2718
|
*/
|
|
2671
2719
|
async generate(optionsOrPrompt) {
|
|
2672
|
-
return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(
|
|
2720
|
+
return this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
|
|
2721
|
+
}
|
|
2722
|
+
/**
|
|
2723
|
+
* Curator P2-3: wraps a generate/stream call with the fallback
|
|
2724
|
+
* orchestration (`providerFallback` callback + `modelChain` walker).
|
|
2725
|
+
*
|
|
2726
|
+
* On a model-access-denied error from the inner call:
|
|
2727
|
+
* 1. Resolve the effective callback (per-call > instance > synthesised
|
|
2728
|
+
* from modelChain) and the effective chain (per-call > instance).
|
|
2729
|
+
* 2. Walk attempts: invoke callback (or pop next chain entry) → emit
|
|
2730
|
+
* `model.fallback` event → re-call inner with the new {provider,
|
|
2731
|
+
* model}.
|
|
2732
|
+
* 3. Stop on first success, on a callback returning null, or after
|
|
2733
|
+
* exhausting the chain (throw the most recent error).
|
|
2734
|
+
*/
|
|
2735
|
+
async runWithFallbackOrchestration(optionsOrPrompt, kind, inner) {
|
|
2736
|
+
const initialAttempt = await this.attemptInner(inner, optionsOrPrompt);
|
|
2737
|
+
if ("ok" in initialAttempt) {
|
|
2738
|
+
return initialAttempt.ok;
|
|
2739
|
+
}
|
|
2740
|
+
let lastError = initialAttempt.error;
|
|
2741
|
+
if (!looksLikeModelAccessDenied(lastError)) {
|
|
2742
|
+
throw lastError;
|
|
2743
|
+
}
|
|
2744
|
+
// Build the chain orchestration.
|
|
2745
|
+
const requestedProvider = (typeof optionsOrPrompt === "object"
|
|
2746
|
+
? optionsOrPrompt.provider
|
|
2747
|
+
: undefined);
|
|
2748
|
+
const requestedModel = (typeof optionsOrPrompt === "object"
|
|
2749
|
+
? optionsOrPrompt.model
|
|
2750
|
+
: undefined);
|
|
2751
|
+
const callOpts = typeof optionsOrPrompt === "object"
|
|
2752
|
+
? optionsOrPrompt
|
|
2753
|
+
: {};
|
|
2754
|
+
const perCallCallback = callOpts.providerFallback;
|
|
2755
|
+
const perCallChain = callOpts.modelChain;
|
|
2756
|
+
const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
|
|
2757
|
+
const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
|
|
2758
|
+
if (!effectiveCallback && !effectiveChain) {
|
|
2759
|
+
throw lastError;
|
|
2760
|
+
}
|
|
2761
|
+
// Synthesise a callback from modelChain if no explicit callback exists.
|
|
2762
|
+
const chainCursor = { i: 0, list: effectiveChain ?? [] };
|
|
2763
|
+
const synthesizedFromChain = async () => {
|
|
2764
|
+
while (chainCursor.i < chainCursor.list.length) {
|
|
2765
|
+
const next = chainCursor.list[chainCursor.i++];
|
|
2766
|
+
if (next !== requestedModel) {
|
|
2767
|
+
return { model: next };
|
|
2768
|
+
}
|
|
2769
|
+
}
|
|
2770
|
+
return null;
|
|
2771
|
+
};
|
|
2772
|
+
const callback = effectiveCallback ?? synthesizedFromChain;
|
|
2773
|
+
let attempts = 0;
|
|
2774
|
+
const maxAttempts = (effectiveChain?.length ?? 0) + 5;
|
|
2775
|
+
let attemptedRequestedModel = requestedModel;
|
|
2776
|
+
while (attempts++ < maxAttempts) {
|
|
2777
|
+
let next;
|
|
2778
|
+
try {
|
|
2779
|
+
next = await callback(lastError);
|
|
2780
|
+
}
|
|
2781
|
+
catch (cbErr) {
|
|
2782
|
+
logger.warn("[NeuroLink] providerFallback callback threw", {
|
|
2783
|
+
error: cbErr instanceof Error ? cbErr.message : String(cbErr),
|
|
2784
|
+
});
|
|
2785
|
+
throw lastError;
|
|
2786
|
+
}
|
|
2787
|
+
if (!next) {
|
|
2788
|
+
throw lastError;
|
|
2789
|
+
}
|
|
2790
|
+
// Emit model.fallback event so cost/audit listeners can record it.
|
|
2791
|
+
try {
|
|
2792
|
+
this.emitter.emit("model.fallback", {
|
|
2793
|
+
requestedProvider,
|
|
2794
|
+
requestedModel: attemptedRequestedModel,
|
|
2795
|
+
fallbackProvider: next.provider ?? requestedProvider,
|
|
2796
|
+
fallbackModel: next.model,
|
|
2797
|
+
reason: lastError instanceof Error ? lastError.message : String(lastError),
|
|
2798
|
+
kind,
|
|
2799
|
+
timestamp: Date.now(),
|
|
2800
|
+
});
|
|
2801
|
+
}
|
|
2802
|
+
catch {
|
|
2803
|
+
/* listener errors are non-fatal */
|
|
2804
|
+
}
|
|
2805
|
+
const retriedOptions = typeof optionsOrPrompt === "object"
|
|
2806
|
+
? {
|
|
2807
|
+
...optionsOrPrompt,
|
|
2808
|
+
...(next.provider && { provider: next.provider }),
|
|
2809
|
+
...(next.model && { model: next.model }),
|
|
2810
|
+
// Strip the fallback hooks so the retry doesn't re-orchestrate.
|
|
2811
|
+
providerFallback: undefined,
|
|
2812
|
+
modelChain: undefined,
|
|
2813
|
+
}
|
|
2814
|
+
: optionsOrPrompt;
|
|
2815
|
+
const retryAttempt = await this.attemptInner(inner, retriedOptions);
|
|
2816
|
+
if ("ok" in retryAttempt) {
|
|
2817
|
+
return retryAttempt.ok;
|
|
2818
|
+
}
|
|
2819
|
+
lastError = retryAttempt.error;
|
|
2820
|
+
attemptedRequestedModel = next.model ?? attemptedRequestedModel;
|
|
2821
|
+
if (!looksLikeModelAccessDenied(lastError)) {
|
|
2822
|
+
throw lastError;
|
|
2823
|
+
}
|
|
2824
|
+
}
|
|
2825
|
+
throw lastError;
|
|
2826
|
+
}
|
|
2827
|
+
async attemptInner(inner, options) {
|
|
2828
|
+
try {
|
|
2829
|
+
const ok = await inner(options);
|
|
2830
|
+
return { ok };
|
|
2831
|
+
}
|
|
2832
|
+
catch (error) {
|
|
2833
|
+
return { error };
|
|
2834
|
+
}
|
|
2673
2835
|
}
|
|
2674
2836
|
async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
|
|
2675
2837
|
return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
|
|
@@ -4566,7 +4728,128 @@ Current user's request: ${currentInput}`;
|
|
|
4566
4728
|
: [],
|
|
4567
4729
|
optionKeys: Object.keys(options),
|
|
4568
4730
|
});
|
|
4569
|
-
return
|
|
4731
|
+
return this.streamWithIterationFallback(options);
|
|
4732
|
+
}
|
|
4733
|
+
/**
|
|
4734
|
+
* Curator P2-3 / Reviewer Finding #2: stream-fallback that also covers
|
|
4735
|
+
* errors thrown during async iteration (e.g. LiteLLM throwing inside
|
|
4736
|
+
* `createLiteLLMTransformedStream`). The standard
|
|
4737
|
+
* `runWithFallbackOrchestration` only catches errors thrown while the
|
|
4738
|
+
* `StreamResult` is being created — once we hand the iterator back to
|
|
4739
|
+
* the caller, errors raised during consumption used to bypass
|
|
4740
|
+
* `providerFallback` / `modelChain`.
|
|
4741
|
+
*
|
|
4742
|
+
* This wrapper runs the orchestration to get an initial StreamResult,
|
|
4743
|
+
* then wraps `result.stream` so that:
|
|
4744
|
+
* - chunks are forwarded transparently while consumption succeeds
|
|
4745
|
+
* - if iteration throws a model-access-denied error AND no chunks
|
|
4746
|
+
* have been yielded yet, we resolve the next fallback target,
|
|
4747
|
+
* emit `model.fallback`, and recurse
|
|
4748
|
+
* - if chunks were already yielded, the error propagates (mid-stream
|
|
4749
|
+
* recovery isn't safe — the consumer has half a response)
|
|
4750
|
+
*/
|
|
4751
|
+
async streamWithIterationFallback(options) {
|
|
4752
|
+
const result = await this.runWithFallbackOrchestration(options, "stream", (opts) => metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...opts })));
|
|
4753
|
+
const callOpts = options;
|
|
4754
|
+
const perCallCallback = callOpts.providerFallback;
|
|
4755
|
+
const perCallChain = callOpts.modelChain;
|
|
4756
|
+
const effectiveCallback = perCallCallback ?? this.fallbackConfig.providerFallback;
|
|
4757
|
+
const effectiveChain = perCallChain ?? this.fallbackConfig.modelChain;
|
|
4758
|
+
if (!effectiveCallback && !effectiveChain) {
|
|
4759
|
+
// No fallback configured — nothing to wrap.
|
|
4760
|
+
return result;
|
|
4761
|
+
}
|
|
4762
|
+
// Build a chain cursor scoped to this stream's lifetime; consumers
|
|
4763
|
+
// who set up `modelChain` get sequential progression here too.
|
|
4764
|
+
const chainCursor = {
|
|
4765
|
+
i: 0,
|
|
4766
|
+
list: effectiveChain ?? [],
|
|
4767
|
+
requestedModel: options.model,
|
|
4768
|
+
};
|
|
4769
|
+
const callback = effectiveCallback ??
|
|
4770
|
+
(async () => {
|
|
4771
|
+
while (chainCursor.i < chainCursor.list.length) {
|
|
4772
|
+
const next = chainCursor.list[chainCursor.i++];
|
|
4773
|
+
if (next !== chainCursor.requestedModel) {
|
|
4774
|
+
return { model: next };
|
|
4775
|
+
}
|
|
4776
|
+
}
|
|
4777
|
+
return null;
|
|
4778
|
+
});
|
|
4779
|
+
const self = this;
|
|
4780
|
+
// Yield type is the original stream's element type, threaded through
|
|
4781
|
+
// as unknown — we forward chunks unchanged so structural identity is
|
|
4782
|
+
// preserved without a local type alias (CLAUDE.md rule 2).
|
|
4783
|
+
const wrappedStream = (async function* () {
|
|
4784
|
+
let yielded = 0;
|
|
4785
|
+
let currentResult = result;
|
|
4786
|
+
let attemptedRequestedProvider = options.provider;
|
|
4787
|
+
let attemptedRequestedModel = options.model;
|
|
4788
|
+
const maxAttempts = (effectiveChain?.length ?? 0) + 5;
|
|
4789
|
+
for (let attempt = 0; attempt <= maxAttempts; attempt++) {
|
|
4790
|
+
try {
|
|
4791
|
+
for await (const chunk of currentResult.stream) {
|
|
4792
|
+
yielded++;
|
|
4793
|
+
yield chunk;
|
|
4794
|
+
}
|
|
4795
|
+
return;
|
|
4796
|
+
}
|
|
4797
|
+
catch (err) {
|
|
4798
|
+
if (yielded > 0 || !looksLikeModelAccessDenied(err)) {
|
|
4799
|
+
throw err;
|
|
4800
|
+
}
|
|
4801
|
+
let next;
|
|
4802
|
+
try {
|
|
4803
|
+
next = await callback(err);
|
|
4804
|
+
}
|
|
4805
|
+
catch (cbErr) {
|
|
4806
|
+
logger.warn("[NeuroLink.stream] providerFallback callback threw during iteration", {
|
|
4807
|
+
error: cbErr instanceof Error ? cbErr.message : String(cbErr),
|
|
4808
|
+
});
|
|
4809
|
+
throw err;
|
|
4810
|
+
}
|
|
4811
|
+
if (!next) {
|
|
4812
|
+
throw err;
|
|
4813
|
+
}
|
|
4814
|
+
try {
|
|
4815
|
+
self.emitter.emit("model.fallback", {
|
|
4816
|
+
requestedProvider: attemptedRequestedProvider,
|
|
4817
|
+
requestedModel: attemptedRequestedModel,
|
|
4818
|
+
fallbackProvider: next.provider ?? attemptedRequestedProvider,
|
|
4819
|
+
fallbackModel: next.model,
|
|
4820
|
+
reason: err instanceof Error ? err.message : String(err),
|
|
4821
|
+
kind: "stream",
|
|
4822
|
+
phase: "iteration",
|
|
4823
|
+
timestamp: Date.now(),
|
|
4824
|
+
});
|
|
4825
|
+
}
|
|
4826
|
+
catch {
|
|
4827
|
+
/* listener errors are non-fatal */
|
|
4828
|
+
}
|
|
4829
|
+
const retriedOptions = {
|
|
4830
|
+
...options,
|
|
4831
|
+
...(next.provider && {
|
|
4832
|
+
provider: next.provider,
|
|
4833
|
+
}),
|
|
4834
|
+
...(next.model && { model: next.model }),
|
|
4835
|
+
// Strip the hooks so the inner orchestration doesn't double-fall-back.
|
|
4836
|
+
providerFallback: undefined,
|
|
4837
|
+
modelChain: undefined,
|
|
4838
|
+
};
|
|
4839
|
+
attemptedRequestedProvider =
|
|
4840
|
+
next.provider ?? attemptedRequestedProvider;
|
|
4841
|
+
attemptedRequestedModel = next.model ?? attemptedRequestedModel;
|
|
4842
|
+
currentResult = await metricsTraceContextStorage.run(self.createMetricsTraceContext(), () => self.executeStreamRequest({ ...retriedOptions }));
|
|
4843
|
+
}
|
|
4844
|
+
}
|
|
4845
|
+
// Exhausted attempts — re-throw the most recent error captured by
|
|
4846
|
+
// the inner loop. We only get here if the loop didn't return.
|
|
4847
|
+
throw new Error(`[NeuroLink.stream] iteration fallback exhausted ${maxAttempts} attempts`);
|
|
4848
|
+
})();
|
|
4849
|
+
return {
|
|
4850
|
+
...result,
|
|
4851
|
+
stream: wrappedStream,
|
|
4852
|
+
};
|
|
4570
4853
|
}
|
|
4571
4854
|
async executeStreamRequest(options) {
|
|
4572
4855
|
// Dynamic argument resolution — resolve any function-valued options before downstream use
|
|
@@ -5811,6 +6094,87 @@ Current user's request: ${currentInput}`;
|
|
|
5811
6094
|
getEventEmitter() {
|
|
5812
6095
|
return this.emitter;
|
|
5813
6096
|
}
|
|
6097
|
+
/**
|
|
6098
|
+
* Curator P1-1: synchronous credential health check for a single provider.
|
|
6099
|
+
*
|
|
6100
|
+
* Drives a tiny real call against the provider (1-token completion or
|
|
6101
|
+
* `/models` listing depending on provider) to confirm the configured
|
|
6102
|
+
* credentials are valid. Useful at startup so a service can refuse to
|
|
6103
|
+
* boot if its primary provider's credentials are broken instead of
|
|
6104
|
+
* discovering the problem on first user request.
|
|
6105
|
+
*
|
|
6106
|
+
* @example
|
|
6107
|
+
* ```ts
|
|
6108
|
+
* const health = await neurolink.checkCredentials({ provider: "litellm" });
|
|
6109
|
+
* if (health.status !== "ok") {
|
|
6110
|
+
* throw new Error(`provider not ready: ${health.detail}`);
|
|
6111
|
+
* }
|
|
6112
|
+
* ```
|
|
6113
|
+
*
|
|
6114
|
+
* @param input - the provider to check
|
|
6115
|
+
* @returns `{ provider, status, detail }`. Possible status values:
|
|
6116
|
+
* - `"ok"` — credentials valid and provider reachable
|
|
6117
|
+
* - `"missing"` — required env / credentials not configured
|
|
6118
|
+
* - `"expired"` — credentials present but rejected (401/403)
|
|
6119
|
+
* - `"denied"` — credentials valid but team not whitelisted for any model
|
|
6120
|
+
* - `"network"` — provider unreachable (timeout, ECONNREFUSED, DNS)
|
|
6121
|
+
* - `"unknown"` — other error; consult `detail`
|
|
6122
|
+
*/
|
|
6123
|
+
async checkCredentials(input) {
|
|
6124
|
+
const { provider, model } = input;
|
|
6125
|
+
const probeText = "ping";
|
|
6126
|
+
try {
|
|
6127
|
+
// 1-token probe is cheap, exercises auth + routing without much cost.
|
|
6128
|
+
await this.generate({
|
|
6129
|
+
provider: provider,
|
|
6130
|
+
...(model && { model }),
|
|
6131
|
+
input: { text: probeText },
|
|
6132
|
+
maxTokens: 16,
|
|
6133
|
+
disableTools: true,
|
|
6134
|
+
});
|
|
6135
|
+
return { provider, status: "ok", detail: "credentials valid" };
|
|
6136
|
+
}
|
|
6137
|
+
catch (err) {
|
|
6138
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
6139
|
+
const lower = msg.toLowerCase();
|
|
6140
|
+
if (err instanceof ModelAccessDeniedError) {
|
|
6141
|
+
return {
|
|
6142
|
+
provider,
|
|
6143
|
+
status: "denied",
|
|
6144
|
+
detail: msg,
|
|
6145
|
+
};
|
|
6146
|
+
}
|
|
6147
|
+
if (lower.includes("authentication") ||
|
|
6148
|
+
lower.includes("401") ||
|
|
6149
|
+
lower.includes("invalid api key") ||
|
|
6150
|
+
lower.includes("incorrect api key") ||
|
|
6151
|
+
lower.includes("api_key_invalid") ||
|
|
6152
|
+
lower.includes("token has expired") ||
|
|
6153
|
+
lower.includes("expired credentials")) {
|
|
6154
|
+
return { provider, status: "expired", detail: msg };
|
|
6155
|
+
}
|
|
6156
|
+
if (lower.includes("not configured") ||
|
|
6157
|
+
lower.includes("missing api") ||
|
|
6158
|
+
lower.includes("api key is required") ||
|
|
6159
|
+
lower.includes("no api key") ||
|
|
6160
|
+
lower.includes("application default credentials") ||
|
|
6161
|
+
lower.includes("google_application_credentials") ||
|
|
6162
|
+
lower.includes("project_id") ||
|
|
6163
|
+
lower.includes("default credentials") ||
|
|
6164
|
+
lower.includes("service account")) {
|
|
6165
|
+
return { provider, status: "missing", detail: msg };
|
|
6166
|
+
}
|
|
6167
|
+
if (lower.includes("econnrefused") ||
|
|
6168
|
+
lower.includes("enotfound") ||
|
|
6169
|
+
lower.includes("could not resolve") ||
|
|
6170
|
+
lower.includes("timeout") ||
|
|
6171
|
+
lower.includes("network") ||
|
|
6172
|
+
lower.includes("cannot connect")) {
|
|
6173
|
+
return { provider, status: "network", detail: msg };
|
|
6174
|
+
}
|
|
6175
|
+
return { provider, status: "unknown", detail: msg };
|
|
6176
|
+
}
|
|
6177
|
+
}
|
|
5814
6178
|
// ========================================
|
|
5815
6179
|
// ENHANCED: Tool Event Emission API
|
|
5816
6180
|
// ========================================
|