@juspay/neurolink 9.59.1 → 9.59.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +355 -355
- package/dist/core/baseProvider.d.ts +10 -3
- package/dist/core/baseProvider.js +8 -3
- package/dist/core/modules/StreamHandler.d.ts +22 -3
- package/dist/core/modules/StreamHandler.js +42 -20
- package/dist/lib/core/baseProvider.d.ts +10 -3
- package/dist/lib/core/baseProvider.js +8 -3
- package/dist/lib/core/modules/StreamHandler.d.ts +22 -3
- package/dist/lib/core/modules/StreamHandler.js +42 -20
- package/dist/lib/neurolink.js +361 -39
- package/dist/lib/providers/anthropic.js +13 -1
- package/dist/lib/providers/anthropicBaseProvider.js +30 -2
- package/dist/lib/providers/azureOpenai.js +12 -1
- package/dist/lib/providers/googleAiStudio.js +12 -1
- package/dist/lib/providers/googleVertex.js +11 -1
- package/dist/lib/providers/huggingFace.js +29 -2
- package/dist/lib/providers/litellm.js +44 -4
- package/dist/lib/providers/mistral.js +12 -1
- package/dist/lib/providers/openAI.js +34 -3
- package/dist/lib/providers/openRouter.js +33 -2
- package/dist/lib/providers/openaiCompatible.js +34 -2
- package/dist/lib/services/server/ai/observability/instrumentation.js +7 -2
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +2 -0
- package/dist/lib/types/noOutputSentinel.d.ts +26 -0
- package/dist/lib/types/noOutputSentinel.js +2 -0
- package/dist/lib/types/stream.d.ts +2 -1
- package/dist/lib/utils/noOutputSentinel.d.ts +80 -0
- package/dist/lib/utils/noOutputSentinel.js +193 -0
- package/dist/neurolink.js +361 -39
- package/dist/providers/anthropic.js +13 -1
- package/dist/providers/anthropicBaseProvider.js +30 -2
- package/dist/providers/azureOpenai.js +12 -1
- package/dist/providers/googleAiStudio.js +12 -1
- package/dist/providers/googleVertex.js +11 -1
- package/dist/providers/huggingFace.js +29 -2
- package/dist/providers/litellm.js +44 -4
- package/dist/providers/mistral.js +12 -1
- package/dist/providers/openAI.js +34 -3
- package/dist/providers/openRouter.js +33 -2
- package/dist/providers/openaiCompatible.js +34 -2
- package/dist/services/server/ai/observability/instrumentation.js +7 -2
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/noOutputSentinel.d.ts +26 -0
- package/dist/types/noOutputSentinel.js +1 -0
- package/dist/types/stream.d.ts +2 -1
- package/dist/utils/noOutputSentinel.d.ts +80 -0
- package/dist/utils/noOutputSentinel.js +192 -0
- package/package.json +1 -1
package/dist/neurolink.js
CHANGED
|
@@ -194,6 +194,12 @@ function isNonRetryableProviderError(error) {
|
|
|
194
194
|
if (error instanceof ModelAccessDeniedError) {
|
|
195
195
|
return true;
|
|
196
196
|
}
|
|
197
|
+
// Note: ContextBudgetExceededError is intentionally NOT non-retryable.
|
|
198
|
+
// Each provider has its own context window, so a budget rejection on
|
|
199
|
+
// one provider doesn't preclude another provider's window fitting the
|
|
200
|
+
// same payload. The directProviderGeneration loop should continue
|
|
201
|
+
// trying alternate providers; the after-loop rethrow preserves the
|
|
202
|
+
// typed error when all providers reject (see `directProviderGeneration`).
|
|
197
203
|
// Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
|
|
198
204
|
if (error && typeof error === "object") {
|
|
199
205
|
const err = error;
|
|
@@ -3724,7 +3730,16 @@ Current user's request: ${currentInput}`;
|
|
|
3724
3730
|
return null;
|
|
3725
3731
|
}
|
|
3726
3732
|
async tryRecoverGenerateTextOverflow(options, functionTag, error) {
|
|
3727
|
-
|
|
3733
|
+
// Reviewer Finding #3: drop the `!this.conversationMemory` gate so
|
|
3734
|
+
// inline-conversationMessages callers also benefit from post-provider
|
|
3735
|
+
// recovery when their pre-dispatch estimate happens to undershoot
|
|
3736
|
+
// and the provider rejects at a higher real token count.
|
|
3737
|
+
if (!isContextOverflowError(error)) {
|
|
3738
|
+
return null;
|
|
3739
|
+
}
|
|
3740
|
+
const inlineMessages = options._originalConversationMessages;
|
|
3741
|
+
const callerMessages = options.conversationMessages;
|
|
3742
|
+
if (!this.conversationMemory && !inlineMessages && !callerMessages) {
|
|
3728
3743
|
return null;
|
|
3729
3744
|
}
|
|
3730
3745
|
logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
|
|
@@ -3733,8 +3748,11 @@ Current user's request: ${currentInput}`;
|
|
|
3733
3748
|
});
|
|
3734
3749
|
try {
|
|
3735
3750
|
const actualOverflow = parseProviderOverflowDetails(error);
|
|
3736
|
-
const originalMessages =
|
|
3737
|
-
|
|
3751
|
+
const originalMessages = inlineMessages ??
|
|
3752
|
+
callerMessages ??
|
|
3753
|
+
(this.conversationMemory
|
|
3754
|
+
? await getConversationMessages(this.conversationMemory, options)
|
|
3755
|
+
: []);
|
|
3738
3756
|
const recoveryBudget = checkContextBudget({
|
|
3739
3757
|
provider: options.provider || "openai",
|
|
3740
3758
|
model: options.model,
|
|
@@ -3748,49 +3766,129 @@ Current user's request: ${currentInput}`;
|
|
|
3748
3766
|
const requiredReduction = actualTokens > 0
|
|
3749
3767
|
? (actualTokens - compactionTarget) / actualTokens
|
|
3750
3768
|
: 0.5;
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
const
|
|
3759
|
-
|
|
3760
|
-
|
|
3769
|
+
// Reviewer Finding #3: escalating truncation across attempts. The
|
|
3770
|
+
// first attempt uses the budget-derived fraction (single-round
|
|
3771
|
+
// compaction). If that still leaves the conversation over budget,
|
|
3772
|
+
// subsequent attempts apply progressively harder truncation
|
|
3773
|
+
// (0.5 → 0.75 → 0.9) before giving up. This replaces the previous
|
|
3774
|
+
// single-pass behaviour where one undersized fraction guaranteed
|
|
3775
|
+
// failure on the next provider call.
|
|
3776
|
+
const escalationFractions = [
|
|
3777
|
+
Math.min(0.9, requiredReduction + 0.15),
|
|
3778
|
+
0.5,
|
|
3779
|
+
0.75,
|
|
3780
|
+
0.9,
|
|
3781
|
+
];
|
|
3782
|
+
let lastCompactionResult = null;
|
|
3783
|
+
let compactedMessages = originalMessages;
|
|
3784
|
+
let verifiedBudget = null;
|
|
3785
|
+
let recoveredFraction = -1;
|
|
3786
|
+
for (let i = 0; i < escalationFractions.length; i++) {
|
|
3787
|
+
const fraction = escalationFractions[i];
|
|
3788
|
+
const compactor = new ContextCompactor({
|
|
3789
|
+
enableSummarize: false,
|
|
3790
|
+
enablePrune: true,
|
|
3791
|
+
enableDeduplicate: true,
|
|
3792
|
+
enableTruncate: true,
|
|
3793
|
+
truncationFraction: fraction,
|
|
3794
|
+
});
|
|
3795
|
+
const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
|
|
3796
|
+
if (!compactionResult.compacted) {
|
|
3797
|
+
continue;
|
|
3798
|
+
}
|
|
3799
|
+
lastCompactionResult = compactionResult;
|
|
3800
|
+
const repairedResult = repairToolPairs(compactionResult.messages);
|
|
3801
|
+
const verifyBudget = checkContextBudget({
|
|
3802
|
+
provider: options.provider || "openai",
|
|
3803
|
+
model: options.model,
|
|
3804
|
+
maxTokens: options.maxTokens,
|
|
3805
|
+
systemPrompt: options.systemPrompt,
|
|
3806
|
+
currentPrompt: options.prompt,
|
|
3807
|
+
conversationMessages: repairedResult.messages,
|
|
3808
|
+
});
|
|
3809
|
+
if (verifyBudget.withinBudget) {
|
|
3810
|
+
compactedMessages = repairedResult.messages;
|
|
3811
|
+
verifiedBudget = verifyBudget;
|
|
3812
|
+
recoveredFraction = fraction;
|
|
3813
|
+
break;
|
|
3814
|
+
}
|
|
3815
|
+
verifiedBudget = verifyBudget;
|
|
3816
|
+
}
|
|
3817
|
+
if (!lastCompactionResult) {
|
|
3818
|
+
// Reviewer follow-up: when no escalation fraction managed to
|
|
3819
|
+
// compact the conversation, the request will hit the same
|
|
3820
|
+
// provider 400 again on retry. Surface a typed
|
|
3821
|
+
// ContextBudgetExceededError + `compaction.insufficient` event
|
|
3822
|
+
// instead of returning null (which lets callers propagate the
|
|
3823
|
+
// opaque provider error).
|
|
3824
|
+
try {
|
|
3825
|
+
this.emitter.emit("compaction.insufficient", {
|
|
3826
|
+
stagesAttempted: [],
|
|
3827
|
+
finalTokens: actualTokens,
|
|
3828
|
+
budget: budgetTokens,
|
|
3829
|
+
provider: options.provider || "openai",
|
|
3830
|
+
model: options.model,
|
|
3831
|
+
phase: "post-provider-recovery-no-compaction",
|
|
3832
|
+
fractionsTried: escalationFractions,
|
|
3833
|
+
timestamp: Date.now(),
|
|
3834
|
+
});
|
|
3835
|
+
}
|
|
3836
|
+
catch {
|
|
3837
|
+
/* listener errors are non-fatal */
|
|
3838
|
+
}
|
|
3839
|
+
throw new ContextBudgetExceededError(`Context overflow recovery: no compaction stage was able to ` +
|
|
3840
|
+
`reduce conversation messages. Provider rejected at ` +
|
|
3841
|
+
`~${actualTokens} tokens; budget is ${budgetTokens} tokens.`, {
|
|
3842
|
+
estimatedTokens: actualTokens,
|
|
3843
|
+
availableTokens: budgetTokens,
|
|
3844
|
+
stagesUsed: [],
|
|
3845
|
+
breakdown: {},
|
|
3846
|
+
});
|
|
3761
3847
|
}
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
|
|
3766
|
-
|
|
3767
|
-
|
|
3768
|
-
currentPrompt: options.prompt,
|
|
3769
|
-
conversationMessages: repairedResult.messages,
|
|
3770
|
-
});
|
|
3771
|
-
if (!verifyBudget.withinBudget) {
|
|
3772
|
-
logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
|
|
3773
|
-
estimatedTokens: verifyBudget.estimatedInputTokens,
|
|
3774
|
-
availableTokens: verifyBudget.availableInputTokens,
|
|
3848
|
+
if (!verifiedBudget?.withinBudget) {
|
|
3849
|
+
logger.error(`[${functionTag}] Recovery compaction insufficient after escalation, aborting retry`, {
|
|
3850
|
+
estimatedTokens: verifiedBudget?.estimatedInputTokens,
|
|
3851
|
+
availableTokens: verifiedBudget?.availableInputTokens,
|
|
3852
|
+
stagesAttempted: lastCompactionResult.stagesUsed,
|
|
3853
|
+
fractionsTried: escalationFractions,
|
|
3775
3854
|
});
|
|
3855
|
+
// Reviewer Finding #3: emit `compaction.insufficient` so
|
|
3856
|
+
// cost / audit listeners record the specific failure mode.
|
|
3857
|
+
try {
|
|
3858
|
+
this.emitter.emit("compaction.insufficient", {
|
|
3859
|
+
stagesAttempted: lastCompactionResult.stagesUsed,
|
|
3860
|
+
finalTokens: verifiedBudget?.estimatedInputTokens,
|
|
3861
|
+
budget: verifiedBudget?.availableInputTokens,
|
|
3862
|
+
provider: options.provider || "openai",
|
|
3863
|
+
model: options.model,
|
|
3864
|
+
phase: "post-provider-recovery",
|
|
3865
|
+
fractionsTried: escalationFractions,
|
|
3866
|
+
timestamp: Date.now(),
|
|
3867
|
+
});
|
|
3868
|
+
}
|
|
3869
|
+
catch {
|
|
3870
|
+
/* listener errors are non-fatal */
|
|
3871
|
+
}
|
|
3776
3872
|
throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
|
|
3777
|
-
`recovery compaction achieved ${
|
|
3778
|
-
`but budget is ${budgetTokens} tokens
|
|
3779
|
-
|
|
3873
|
+
`recovery compaction achieved ${lastCompactionResult.tokensAfter} tokens ` +
|
|
3874
|
+
`but budget is ${budgetTokens} tokens (after escalation through ` +
|
|
3875
|
+
`${escalationFractions.length} fractions).`, {
|
|
3876
|
+
estimatedTokens: lastCompactionResult.tokensAfter,
|
|
3780
3877
|
availableTokens: budgetTokens,
|
|
3781
|
-
stagesUsed:
|
|
3782
|
-
breakdown:
|
|
3878
|
+
stagesUsed: lastCompactionResult.stagesUsed,
|
|
3879
|
+
breakdown: verifiedBudget?.breakdown ?? {},
|
|
3783
3880
|
});
|
|
3784
3881
|
}
|
|
3785
3882
|
logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
|
|
3786
|
-
tokensSaved:
|
|
3883
|
+
tokensSaved: lastCompactionResult.tokensSaved,
|
|
3787
3884
|
compactionTarget,
|
|
3788
|
-
verifiedTokens:
|
|
3789
|
-
verifiedBudget:
|
|
3885
|
+
verifiedTokens: verifiedBudget.estimatedInputTokens,
|
|
3886
|
+
verifiedBudget: verifiedBudget.availableInputTokens,
|
|
3887
|
+
recoveredFraction,
|
|
3790
3888
|
});
|
|
3791
3889
|
return this.directProviderGeneration({
|
|
3792
3890
|
...options,
|
|
3793
|
-
conversationMessages:
|
|
3891
|
+
conversationMessages: compactedMessages,
|
|
3794
3892
|
});
|
|
3795
3893
|
}
|
|
3796
3894
|
catch (retryError) {
|
|
@@ -4421,8 +4519,51 @@ Current user's request: ${currentInput}`;
|
|
|
4421
4519
|
});
|
|
4422
4520
|
const dpgMessageCount = conversationMessages?.length || 0;
|
|
4423
4521
|
const dpgCompactionSessionId = this.getCompactionSessionId(options);
|
|
4522
|
+
// Curator P1-2: pre-dispatch compaction must run for inline
|
|
4523
|
+
// `conversationMessages` too (not just conversationMemory). Without
|
|
4524
|
+
// this, a 1.3M-token caller-supplied conversation against a 128K
|
|
4525
|
+
// window dispatches anyway and the provider returns
|
|
4526
|
+
// "prompt is too long" — the bug Curator's report cited.
|
|
4527
|
+
const dpgHasInlineMessages = !!optionsWithMessages.conversationMessages?.length;
|
|
4528
|
+
// Reviewer follow-up: gate the hard cap on the *actual compactable
|
|
4529
|
+
// history* rather than `this.conversationMemory`. A configured-but-
|
|
4530
|
+
// empty memory store leaves nothing to compact yet still satisfies
|
|
4531
|
+
// `!this.conversationMemory === false`, so the previous check
|
|
4532
|
+
// skipped the hard cap and dispatched the oversized payload.
|
|
4533
|
+
const dpgHasCompactableMessages = dpgMessageCount > 0;
|
|
4534
|
+
// Reviewer Finding #4: pre-dispatch hard cap for the standalone
|
|
4535
|
+
// oversized case. When the budget check shows the request is
|
|
4536
|
+
// over budget but there's nothing to compact (no memory + no
|
|
4537
|
+
// inline messages — e.g. a huge prompt or huge tool definitions
|
|
4538
|
+
// alone), throw before dispatch instead of wasting a roundtrip.
|
|
4539
|
+
if (!budgetCheck.withinBudget && !dpgHasCompactableMessages) {
|
|
4540
|
+
try {
|
|
4541
|
+
this.emitter.emit("compaction.insufficient", {
|
|
4542
|
+
stagesAttempted: ["pre-dispatch hard cap"],
|
|
4543
|
+
finalTokens: budgetCheck.estimatedInputTokens,
|
|
4544
|
+
budget: budgetCheck.availableInputTokens,
|
|
4545
|
+
provider: providerName,
|
|
4546
|
+
model: options.model,
|
|
4547
|
+
phase: "pre-dispatch-no-recovery",
|
|
4548
|
+
timestamp: Date.now(),
|
|
4549
|
+
});
|
|
4550
|
+
}
|
|
4551
|
+
catch {
|
|
4552
|
+
/* listener errors are non-fatal */
|
|
4553
|
+
}
|
|
4554
|
+
throw new ContextBudgetExceededError(`Context exceeds model budget and no compaction is possible ` +
|
|
4555
|
+
`(no conversationMemory, no inline conversationMessages — only ` +
|
|
4556
|
+
`prompt + tools). Estimated: ${budgetCheck.estimatedInputTokens} ` +
|
|
4557
|
+
`tokens, budget: ${budgetCheck.availableInputTokens} tokens. ` +
|
|
4558
|
+
`Reduce prompt or tool-definition size, or trim the request.`, {
|
|
4559
|
+
estimatedTokens: budgetCheck.estimatedInputTokens,
|
|
4560
|
+
availableTokens: budgetCheck.availableInputTokens,
|
|
4561
|
+
stagesUsed: [],
|
|
4562
|
+
breakdown: budgetCheck.breakdown,
|
|
4563
|
+
});
|
|
4564
|
+
}
|
|
4424
4565
|
if (budgetCheck.shouldCompact &&
|
|
4425
|
-
this.conversationMemory &&
|
|
4566
|
+
(this.conversationMemory || dpgHasInlineMessages) &&
|
|
4426
4567
|
dpgMessageCount >
|
|
4427
4568
|
(this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
|
|
4428
4569
|
const compactor = new ContextCompactor({
|
|
@@ -4456,6 +4597,26 @@ Current user's request: ${currentInput}`;
|
|
|
4456
4597
|
availableTokens: postCompactBudget.availableInputTokens,
|
|
4457
4598
|
overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
|
|
4458
4599
|
});
|
|
4600
|
+
// Curator P1-2: emit `compaction.insufficient` whenever a
|
|
4601
|
+
// single round of compaction wasn't enough — even when
|
|
4602
|
+
// emergency truncation will save the day. Lets cost / audit
|
|
4603
|
+
// listeners track the "compaction was insufficient" signal
|
|
4604
|
+
// separately from the eventual outcome.
|
|
4605
|
+
try {
|
|
4606
|
+
this.emitter.emit("compaction.insufficient", {
|
|
4607
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
4608
|
+
finalTokens: postCompactBudget.estimatedInputTokens,
|
|
4609
|
+
budget: postCompactBudget.availableInputTokens,
|
|
4610
|
+
provider: providerName,
|
|
4611
|
+
model: options.model,
|
|
4612
|
+
phase: "mid-compaction",
|
|
4613
|
+
willEmergencyTruncate: true,
|
|
4614
|
+
timestamp: Date.now(),
|
|
4615
|
+
});
|
|
4616
|
+
}
|
|
4617
|
+
catch {
|
|
4618
|
+
/* listener errors are non-fatal */
|
|
4619
|
+
}
|
|
4459
4620
|
conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
|
|
4460
4621
|
const finalBudget = checkContextBudget({
|
|
4461
4622
|
provider: providerName,
|
|
@@ -4471,6 +4632,23 @@ Current user's request: ${currentInput}`;
|
|
|
4471
4632
|
if (!finalBudget.withinBudget) {
|
|
4472
4633
|
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
4473
4634
|
this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
|
|
4635
|
+
// Curator P1-2: emit `compaction.insufficient` so cost / audit
|
|
4636
|
+
// listeners can record the specific failure mode (separate
|
|
4637
|
+
// from a generic provider error).
|
|
4638
|
+
try {
|
|
4639
|
+
this.emitter.emit("compaction.insufficient", {
|
|
4640
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
4641
|
+
finalTokens: finalBudget.estimatedInputTokens,
|
|
4642
|
+
budget: finalBudget.availableInputTokens,
|
|
4643
|
+
provider: providerName,
|
|
4644
|
+
model: options.model,
|
|
4645
|
+
phase: "post-emergency-truncation",
|
|
4646
|
+
timestamp: Date.now(),
|
|
4647
|
+
});
|
|
4648
|
+
}
|
|
4649
|
+
catch {
|
|
4650
|
+
/* listener errors are non-fatal */
|
|
4651
|
+
}
|
|
4474
4652
|
throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
|
|
4475
4653
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
4476
4654
|
`Budget: ${finalBudget.availableInputTokens} tokens.`, {
|
|
@@ -4577,6 +4755,14 @@ Current user's request: ${currentInput}`;
|
|
|
4577
4755
|
lastError: lastError?.message,
|
|
4578
4756
|
responseTime,
|
|
4579
4757
|
});
|
|
4758
|
+
// Reviewer follow-up: preserve typed ContextBudgetExceededError after
|
|
4759
|
+
// the per-provider fallback loop. Each provider's hard cap is
|
|
4760
|
+
// per-window; we let the loop try them all, but if every provider
|
|
4761
|
+
// rejected on budget the caller still needs the typed error to
|
|
4762
|
+
// distinguish "context too large" from a generic provider failure.
|
|
4763
|
+
if (lastError instanceof ContextBudgetExceededError) {
|
|
4764
|
+
throw lastError;
|
|
4765
|
+
}
|
|
4580
4766
|
throw new Error(`Failed to generate text with all providers. Last error: ${lastError?.message || "Unknown error"}`);
|
|
4581
4767
|
}
|
|
4582
4768
|
/**
|
|
@@ -5032,9 +5218,36 @@ Current user's request: ${currentInput}`;
|
|
|
5032
5218
|
// single `generation:end` event with cost data. Cost listeners
|
|
5033
5219
|
// subscribe here; previously the stream path never fired it.
|
|
5034
5220
|
let resolvedUsage;
|
|
5221
|
+
// Reviewer follow-up: track *non-sentinel output chunks* (text,
|
|
5222
|
+
// audio, image — anything the SDK considers real output) so the
|
|
5223
|
+
// fallback gate fires only when the stream produced nothing
|
|
5224
|
+
// useful. Counting only text content here would have spuriously
|
|
5225
|
+
// triggered fallback for valid audio-only (Google Live) and
|
|
5226
|
+
// image-only streams. The sentinel is the only thing we exclude
|
|
5227
|
+
// — that path can mask real provider failures (DNS, auth,
|
|
5228
|
+
// retry-exhaustion) that AI SDK rejects with
|
|
5229
|
+
// NoOutputGeneratedError, and we want fallback to fire there.
|
|
5230
|
+
let realOutputChunks = 0;
|
|
5035
5231
|
try {
|
|
5036
5232
|
for await (const chunk of mcpStream) {
|
|
5037
5233
|
chunkCount++;
|
|
5234
|
+
const isNoOutputSentinel = chunk !== null &&
|
|
5235
|
+
typeof chunk === "object" &&
|
|
5236
|
+
"metadata" in chunk &&
|
|
5237
|
+
chunk.metadata
|
|
5238
|
+
?.noOutput === true;
|
|
5239
|
+
const hasTextContent = chunk &&
|
|
5240
|
+
"content" in chunk &&
|
|
5241
|
+
typeof chunk.content === "string" &&
|
|
5242
|
+
chunk.content.length > 0;
|
|
5243
|
+
const hasMediaPayload = chunk !== null &&
|
|
5244
|
+
typeof chunk === "object" &&
|
|
5245
|
+
"type" in chunk &&
|
|
5246
|
+
(chunk.type === "audio" ||
|
|
5247
|
+
chunk.type === "image");
|
|
5248
|
+
if (!isNoOutputSentinel && (hasTextContent || hasMediaPayload)) {
|
|
5249
|
+
realOutputChunks++;
|
|
5250
|
+
}
|
|
5038
5251
|
if (chunk &&
|
|
5039
5252
|
"content" in chunk &&
|
|
5040
5253
|
typeof chunk.content === "string") {
|
|
@@ -5046,13 +5259,17 @@ Current user's request: ${currentInput}`;
|
|
|
5046
5259
|
metadata: {
|
|
5047
5260
|
chunkIndex: chunkCount,
|
|
5048
5261
|
totalLength: accumulatedContent.length,
|
|
5262
|
+
...(isNoOutputSentinel && { noOutput: true }),
|
|
5049
5263
|
},
|
|
5050
5264
|
timestamp: Date.now(),
|
|
5051
5265
|
});
|
|
5052
5266
|
}
|
|
5053
5267
|
yield chunk;
|
|
5054
5268
|
}
|
|
5055
|
-
|
|
5269
|
+
// Reviewer follow-up: fire fallback when no *non-sentinel*
|
|
5270
|
+
// output was produced — sentinel-only and truly empty streams
|
|
5271
|
+
// both qualify, but media-only streams (audio/image) do not.
|
|
5272
|
+
if (realOutputChunks === 0 &&
|
|
5056
5273
|
!metadata.fallbackAttempted &&
|
|
5057
5274
|
!enhancedOptions.disableInternalFallback &&
|
|
5058
5275
|
streamState.toolCalls.length === 0 &&
|
|
@@ -5549,9 +5766,32 @@ Current user's request: ${currentInput}`;
|
|
|
5549
5766
|
streamState.finishReason =
|
|
5550
5767
|
fallbackResult.finishReason ?? streamState.finishReason;
|
|
5551
5768
|
}
|
|
5769
|
+
// Reviewer follow-up: count *real* output chunks for the fallback
|
|
5770
|
+
// success gate, mirroring the primary stream wrapper. A fallback
|
|
5771
|
+
// that yields only the NoOutputSentinel must not be treated as
|
|
5772
|
+
// success — that's the same masked-failure scenario as the primary.
|
|
5552
5773
|
let fallbackChunkCount = 0;
|
|
5774
|
+
let fallbackRealOutputChunks = 0;
|
|
5553
5775
|
for await (const fallbackChunk of fallbackResult.stream) {
|
|
5554
5776
|
fallbackChunkCount++;
|
|
5777
|
+
const isFallbackNoOutputSentinel = fallbackChunk !== null &&
|
|
5778
|
+
typeof fallbackChunk === "object" &&
|
|
5779
|
+
"metadata" in fallbackChunk &&
|
|
5780
|
+
fallbackChunk.metadata
|
|
5781
|
+
?.noOutput === true;
|
|
5782
|
+
const fallbackHasTextContent = fallbackChunk &&
|
|
5783
|
+
"content" in fallbackChunk &&
|
|
5784
|
+
typeof fallbackChunk.content === "string" &&
|
|
5785
|
+
fallbackChunk.content.length > 0;
|
|
5786
|
+
const fallbackHasMediaPayload = fallbackChunk !== null &&
|
|
5787
|
+
typeof fallbackChunk === "object" &&
|
|
5788
|
+
"type" in fallbackChunk &&
|
|
5789
|
+
(fallbackChunk.type === "audio" ||
|
|
5790
|
+
fallbackChunk.type === "image");
|
|
5791
|
+
if (!isFallbackNoOutputSentinel &&
|
|
5792
|
+
(fallbackHasTextContent || fallbackHasMediaPayload)) {
|
|
5793
|
+
fallbackRealOutputChunks++;
|
|
5794
|
+
}
|
|
5555
5795
|
if (fallbackChunk &&
|
|
5556
5796
|
"content" in fallbackChunk &&
|
|
5557
5797
|
typeof fallbackChunk.content === "string") {
|
|
@@ -5560,10 +5800,10 @@ Current user's request: ${currentInput}`;
|
|
|
5560
5800
|
}
|
|
5561
5801
|
yield fallbackChunk;
|
|
5562
5802
|
}
|
|
5563
|
-
if (
|
|
5803
|
+
if (fallbackRealOutputChunks === 0 &&
|
|
5564
5804
|
fallbackToolCalls.length === 0 &&
|
|
5565
5805
|
fallbackToolResults.length === 0) {
|
|
5566
|
-
throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
|
|
5806
|
+
throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 real output chunks (chunkCount=${fallbackChunkCount}, sentinel-only or empty)`);
|
|
5567
5807
|
}
|
|
5568
5808
|
// Fallback succeeded - likely guardrails blocked primary
|
|
5569
5809
|
metadata.fallbackProvider = fallbackRoute.provider;
|
|
@@ -5742,6 +5982,42 @@ Current user's request: ${currentInput}`;
|
|
|
5742
5982
|
});
|
|
5743
5983
|
const streamMessageCount = conversationMessages?.length || 0;
|
|
5744
5984
|
const streamCompactionSessionId = this.getCompactionSessionId(options);
|
|
5985
|
+
// Reviewer follow-up: gate the hard cap on the *actual compactable
|
|
5986
|
+
// history* rather than `this.conversationMemory`. A configured-but-
|
|
5987
|
+
// empty memory store leaves nothing to compact yet still satisfies
|
|
5988
|
+
// `!this.conversationMemory === false`, so the previous check
|
|
5989
|
+
// skipped the hard cap and dispatched the oversized payload.
|
|
5990
|
+
const streamHasCompactableMessages = streamMessageCount > 0;
|
|
5991
|
+
// Curator P1-2: pre-dispatch hard cap mirrors directProviderGeneration.
|
|
5992
|
+
// When the budget check fails AND there's nothing to compact (no memory
|
|
5993
|
+
// + no inline messages — only prompt + tools), throw before dispatch
|
|
5994
|
+
// instead of wasting a roundtrip on a payload the provider will reject.
|
|
5995
|
+
if (!streamBudget.withinBudget && !streamHasCompactableMessages) {
|
|
5996
|
+
try {
|
|
5997
|
+
this.emitter.emit("compaction.insufficient", {
|
|
5998
|
+
stagesAttempted: ["pre-dispatch hard cap"],
|
|
5999
|
+
finalTokens: streamBudget.estimatedInputTokens,
|
|
6000
|
+
budget: streamBudget.availableInputTokens,
|
|
6001
|
+
provider: providerName,
|
|
6002
|
+
model: options.model,
|
|
6003
|
+
phase: "pre-dispatch-no-recovery",
|
|
6004
|
+
timestamp: Date.now(),
|
|
6005
|
+
});
|
|
6006
|
+
}
|
|
6007
|
+
catch {
|
|
6008
|
+
/* listener errors are non-fatal */
|
|
6009
|
+
}
|
|
6010
|
+
throw new ContextBudgetExceededError(`Stream context exceeds model budget and no compaction is possible ` +
|
|
6011
|
+
`(no conversationMemory, no inline conversationMessages — only ` +
|
|
6012
|
+
`prompt + tools). Estimated: ${streamBudget.estimatedInputTokens} ` +
|
|
6013
|
+
`tokens, budget: ${streamBudget.availableInputTokens} tokens. ` +
|
|
6014
|
+
`Reduce prompt or tool-definition size, or trim the request.`, {
|
|
6015
|
+
estimatedTokens: streamBudget.estimatedInputTokens,
|
|
6016
|
+
availableTokens: streamBudget.availableInputTokens,
|
|
6017
|
+
stagesUsed: [],
|
|
6018
|
+
breakdown: streamBudget.breakdown,
|
|
6019
|
+
});
|
|
6020
|
+
}
|
|
5745
6021
|
if (streamBudget.shouldCompact &&
|
|
5746
6022
|
(hasCallerConversationHistory || this.conversationMemory) &&
|
|
5747
6023
|
streamMessageCount >
|
|
@@ -5778,6 +6054,26 @@ Current user's request: ${currentInput}`;
|
|
|
5778
6054
|
availableTokens: postCompactBudget.availableInputTokens,
|
|
5779
6055
|
overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
|
|
5780
6056
|
});
|
|
6057
|
+
// Curator P1-2: emit `compaction.insufficient` whenever a single
|
|
6058
|
+
// round of compaction wasn't enough — even when emergency
|
|
6059
|
+
// truncation will save the day. Lets cost / audit listeners track
|
|
6060
|
+
// the "compaction was insufficient" signal separately from the
|
|
6061
|
+
// eventual outcome.
|
|
6062
|
+
try {
|
|
6063
|
+
this.emitter.emit("compaction.insufficient", {
|
|
6064
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
6065
|
+
finalTokens: postCompactBudget.estimatedInputTokens,
|
|
6066
|
+
budget: postCompactBudget.availableInputTokens,
|
|
6067
|
+
provider: providerName,
|
|
6068
|
+
model: options.model,
|
|
6069
|
+
phase: "mid-compaction",
|
|
6070
|
+
willEmergencyTruncate: true,
|
|
6071
|
+
timestamp: Date.now(),
|
|
6072
|
+
});
|
|
6073
|
+
}
|
|
6074
|
+
catch {
|
|
6075
|
+
/* listener errors are non-fatal */
|
|
6076
|
+
}
|
|
5781
6077
|
conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
|
|
5782
6078
|
// Keep options in sync after emergency truncation so fallback paths
|
|
5783
6079
|
// use the truncated history.
|
|
@@ -5794,6 +6090,23 @@ Current user's request: ${currentInput}`;
|
|
|
5794
6090
|
if (!finalBudget.withinBudget) {
|
|
5795
6091
|
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
5796
6092
|
this.lastCompactionMessageCount.delete(streamCompactionSessionId);
|
|
6093
|
+
// Curator P1-2: emit `compaction.insufficient` on the terminal
|
|
6094
|
+
// failure path so cost / audit listeners can record the specific
|
|
6095
|
+
// failure mode (compaction + emergency truncation both insufficient).
|
|
6096
|
+
try {
|
|
6097
|
+
this.emitter.emit("compaction.insufficient", {
|
|
6098
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
6099
|
+
finalTokens: finalBudget.estimatedInputTokens,
|
|
6100
|
+
budget: finalBudget.availableInputTokens,
|
|
6101
|
+
provider: providerName,
|
|
6102
|
+
model: options.model,
|
|
6103
|
+
phase: "post-emergency-truncation",
|
|
6104
|
+
timestamp: Date.now(),
|
|
6105
|
+
});
|
|
6106
|
+
}
|
|
6107
|
+
catch {
|
|
6108
|
+
/* listener errors are non-fatal */
|
|
6109
|
+
}
|
|
5797
6110
|
throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
|
|
5798
6111
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
5799
6112
|
`Budget: ${finalBudget.availableInputTokens} tokens.`, {
|
|
@@ -5881,6 +6194,15 @@ Current user's request: ${currentInput}`;
|
|
|
5881
6194
|
* Handle stream error with fallback
|
|
5882
6195
|
*/
|
|
5883
6196
|
async handleStreamError(error, options, startTime, streamId, enhancedOptions, _factoryResult) {
|
|
6197
|
+
// Curator P1-2: when the pre-dispatch hard cap or post-emergency
|
|
6198
|
+
// truncation budget check throws ContextBudgetExceededError, the
|
|
6199
|
+
// payload is too large for the model and a same-payload retry would
|
|
6200
|
+
// just fail again at the provider — wasting the same tokens that
|
|
6201
|
+
// the hard cap was meant to save. Rethrow so the caller sees the
|
|
6202
|
+
// typed error instead of a fallback ProviderError that hides it.
|
|
6203
|
+
if (error instanceof ContextBudgetExceededError) {
|
|
6204
|
+
throw error;
|
|
6205
|
+
}
|
|
5884
6206
|
logger.error("Stream generation failed, attempting fallback", {
|
|
5885
6207
|
error: error instanceof Error ? error.message : String(error),
|
|
5886
6208
|
});
|
|
@@ -790,6 +790,10 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
790
790
|
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
791
791
|
},
|
|
792
792
|
});
|
|
793
|
+
// Reviewer follow-up: capture upstream provider errors via onError
|
|
794
|
+
// so the post-stream NoOutput sentinel carries the real cause in
|
|
795
|
+
// providerError / modelResponseRaw.
|
|
796
|
+
let capturedProviderError;
|
|
793
797
|
let result;
|
|
794
798
|
try {
|
|
795
799
|
result = streamText({
|
|
@@ -802,6 +806,14 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
802
806
|
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
803
807
|
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
804
808
|
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
809
|
+
onError: (event) => {
|
|
810
|
+
capturedProviderError = event.error;
|
|
811
|
+
logger.error("Anthropic: Stream error", {
|
|
812
|
+
error: event.error instanceof Error
|
|
813
|
+
? event.error.message
|
|
814
|
+
: String(event.error),
|
|
815
|
+
});
|
|
816
|
+
},
|
|
805
817
|
experimental_repairToolCall: this.getToolCallRepairFn(options),
|
|
806
818
|
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
807
819
|
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
@@ -868,7 +880,7 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
868
880
|
streamSpan.end();
|
|
869
881
|
});
|
|
870
882
|
timeoutController?.cleanup();
|
|
871
|
-
const transformedStream = this.createTextStream(result);
|
|
883
|
+
const transformedStream = this.createTextStream(result, () => capturedProviderError);
|
|
872
884
|
// ✅ Note: Vercel AI SDK's streamText() method limitations with tools
|
|
873
885
|
// The streamText() function doesn't provide the same tool result access as generateText()
|
|
874
886
|
// Full tool support is now available with real streaming
|
|
@@ -5,6 +5,7 @@ import { AnthropicModels } from "../constants/enums.js";
|
|
|
5
5
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
6
6
|
import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
|
|
7
7
|
import { logger } from "../utils/logger.js";
|
|
8
|
+
import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
|
|
8
9
|
import { calculateCost } from "../utils/pricing.js";
|
|
9
10
|
import { createAnthropicBaseConfig, validateApiKey, } from "../utils/providerConfig.js";
|
|
10
11
|
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
@@ -81,6 +82,10 @@ export class AnthropicProviderV2 extends BaseProvider {
|
|
|
81
82
|
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
82
83
|
},
|
|
83
84
|
});
|
|
85
|
+
// Reviewer follow-up: capture upstream provider errors via onError
|
|
86
|
+
// so the post-stream NoOutput detect can propagate the real cause
|
|
87
|
+
// into the sentinel's providerError / modelResponseRaw.
|
|
88
|
+
let capturedProviderError;
|
|
84
89
|
let result;
|
|
85
90
|
try {
|
|
86
91
|
result = streamText({
|
|
@@ -95,6 +100,14 @@ export class AnthropicProviderV2 extends BaseProvider {
|
|
|
95
100
|
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
96
101
|
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
97
102
|
experimental_repairToolCall: this.getToolCallRepairFn(options),
|
|
103
|
+
onError: (event) => {
|
|
104
|
+
capturedProviderError = event.error;
|
|
105
|
+
logger.error("AnthropicBaseProvider: Stream error", {
|
|
106
|
+
error: event.error instanceof Error
|
|
107
|
+
? event.error.message
|
|
108
|
+
: String(event.error),
|
|
109
|
+
});
|
|
110
|
+
},
|
|
98
111
|
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
99
112
|
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
100
113
|
logger.warn("[AnthropicBaseProvider] Failed to store tool executions", {
|
|
@@ -153,19 +166,34 @@ export class AnthropicProviderV2 extends BaseProvider {
|
|
|
153
166
|
timeoutController?.cleanup();
|
|
154
167
|
// Transform string stream to content object stream (match Google AI pattern)
|
|
155
168
|
const transformedStream = async function* () {
|
|
169
|
+
let chunkCount = 0;
|
|
156
170
|
try {
|
|
157
171
|
for await (const chunk of result.textStream) {
|
|
172
|
+
chunkCount++;
|
|
158
173
|
yield { content: chunk };
|
|
159
174
|
}
|
|
160
175
|
}
|
|
161
176
|
catch (streamError) {
|
|
162
|
-
// AI SDK v6 throws NoOutputGeneratedError when the stream produced no output.
|
|
163
177
|
if (NoOutputGeneratedError.isInstance(streamError)) {
|
|
164
|
-
logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError)");
|
|
178
|
+
logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
|
|
179
|
+
const sentinel = await buildNoOutputSentinel(streamError, result, capturedProviderError);
|
|
180
|
+
stampNoOutputSpan(sentinel);
|
|
181
|
+
yield sentinel;
|
|
165
182
|
return;
|
|
166
183
|
}
|
|
167
184
|
throw streamError;
|
|
168
185
|
}
|
|
186
|
+
// Curator P3-6 (round-2 fix): production trigger sets the error
|
|
187
|
+
// on result.finishReason rejection, not on textStream iteration.
|
|
188
|
+
// Surface that path here so the sentinel actually fires.
|
|
189
|
+
if (chunkCount === 0) {
|
|
190
|
+
const detected = await detectPostStreamNoOutput(result, capturedProviderError);
|
|
191
|
+
if (detected) {
|
|
192
|
+
logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
|
|
193
|
+
stampNoOutputSpan(detected.sentinel);
|
|
194
|
+
yield detected.sentinel;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
169
197
|
};
|
|
170
198
|
return {
|
|
171
199
|
stream: transformedStream(),
|