@juspay/neurolink 9.59.1 → 9.59.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +355 -355
  3. package/dist/core/baseProvider.d.ts +10 -3
  4. package/dist/core/baseProvider.js +8 -3
  5. package/dist/core/modules/StreamHandler.d.ts +22 -3
  6. package/dist/core/modules/StreamHandler.js +42 -20
  7. package/dist/lib/core/baseProvider.d.ts +10 -3
  8. package/dist/lib/core/baseProvider.js +8 -3
  9. package/dist/lib/core/modules/StreamHandler.d.ts +22 -3
  10. package/dist/lib/core/modules/StreamHandler.js +42 -20
  11. package/dist/lib/neurolink.js +361 -39
  12. package/dist/lib/providers/anthropic.js +13 -1
  13. package/dist/lib/providers/anthropicBaseProvider.js +30 -2
  14. package/dist/lib/providers/azureOpenai.js +12 -1
  15. package/dist/lib/providers/googleAiStudio.js +12 -1
  16. package/dist/lib/providers/googleVertex.js +11 -1
  17. package/dist/lib/providers/huggingFace.js +29 -2
  18. package/dist/lib/providers/litellm.js +44 -4
  19. package/dist/lib/providers/mistral.js +12 -1
  20. package/dist/lib/providers/openAI.js +34 -3
  21. package/dist/lib/providers/openRouter.js +33 -2
  22. package/dist/lib/providers/openaiCompatible.js +34 -2
  23. package/dist/lib/services/server/ai/observability/instrumentation.js +7 -2
  24. package/dist/lib/types/index.d.ts +1 -0
  25. package/dist/lib/types/index.js +2 -0
  26. package/dist/lib/types/noOutputSentinel.d.ts +26 -0
  27. package/dist/lib/types/noOutputSentinel.js +2 -0
  28. package/dist/lib/types/stream.d.ts +2 -1
  29. package/dist/lib/utils/noOutputSentinel.d.ts +80 -0
  30. package/dist/lib/utils/noOutputSentinel.js +193 -0
  31. package/dist/neurolink.js +361 -39
  32. package/dist/providers/anthropic.js +13 -1
  33. package/dist/providers/anthropicBaseProvider.js +30 -2
  34. package/dist/providers/azureOpenai.js +12 -1
  35. package/dist/providers/googleAiStudio.js +12 -1
  36. package/dist/providers/googleVertex.js +11 -1
  37. package/dist/providers/huggingFace.js +29 -2
  38. package/dist/providers/litellm.js +44 -4
  39. package/dist/providers/mistral.js +12 -1
  40. package/dist/providers/openAI.js +34 -3
  41. package/dist/providers/openRouter.js +33 -2
  42. package/dist/providers/openaiCompatible.js +34 -2
  43. package/dist/services/server/ai/observability/instrumentation.js +7 -2
  44. package/dist/types/index.d.ts +1 -0
  45. package/dist/types/index.js +2 -0
  46. package/dist/types/noOutputSentinel.d.ts +26 -0
  47. package/dist/types/noOutputSentinel.js +1 -0
  48. package/dist/types/stream.d.ts +2 -1
  49. package/dist/utils/noOutputSentinel.d.ts +80 -0
  50. package/dist/utils/noOutputSentinel.js +192 -0
  51. package/package.json +1 -1
@@ -194,6 +194,12 @@ function isNonRetryableProviderError(error) {
194
194
  if (error instanceof ModelAccessDeniedError) {
195
195
  return true;
196
196
  }
197
+ // Note: ContextBudgetExceededError is intentionally NOT non-retryable.
198
+ // Each provider has its own context window, so a budget rejection on
199
+ // one provider doesn't preclude another provider's window fitting the
200
+ // same payload. The directProviderGeneration loop should continue
201
+ // trying alternate providers; the after-loop rethrow preserves the
202
+ // typed error when all providers reject (see `directProviderGeneration`).
197
203
  // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
198
204
  if (error && typeof error === "object") {
199
205
  const err = error;
@@ -3724,7 +3730,16 @@ Current user's request: ${currentInput}`;
3724
3730
  return null;
3725
3731
  }
3726
3732
  async tryRecoverGenerateTextOverflow(options, functionTag, error) {
3727
- if (!isContextOverflowError(error) || !this.conversationMemory) {
3733
+ // Reviewer Finding #3: drop the `!this.conversationMemory` gate so
3734
+ // inline-conversationMessages callers also benefit from post-provider
3735
+ // recovery when their pre-dispatch estimate happens to undershoot
3736
+ // and the provider rejects at a higher real token count.
3737
+ if (!isContextOverflowError(error)) {
3738
+ return null;
3739
+ }
3740
+ const inlineMessages = options._originalConversationMessages;
3741
+ const callerMessages = options.conversationMessages;
3742
+ if (!this.conversationMemory && !inlineMessages && !callerMessages) {
3728
3743
  return null;
3729
3744
  }
3730
3745
  logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
@@ -3733,8 +3748,11 @@ Current user's request: ${currentInput}`;
3733
3748
  });
3734
3749
  try {
3735
3750
  const actualOverflow = parseProviderOverflowDetails(error);
3736
- const originalMessages = options._originalConversationMessages ??
3737
- (await getConversationMessages(this.conversationMemory, options));
3751
+ const originalMessages = inlineMessages ??
3752
+ callerMessages ??
3753
+ (this.conversationMemory
3754
+ ? await getConversationMessages(this.conversationMemory, options)
3755
+ : []);
3738
3756
  const recoveryBudget = checkContextBudget({
3739
3757
  provider: options.provider || "openai",
3740
3758
  model: options.model,
@@ -3748,49 +3766,129 @@ Current user's request: ${currentInput}`;
3748
3766
  const requiredReduction = actualTokens > 0
3749
3767
  ? (actualTokens - compactionTarget) / actualTokens
3750
3768
  : 0.5;
3751
- const compactor = new ContextCompactor({
3752
- enableSummarize: false,
3753
- enablePrune: true,
3754
- enableDeduplicate: true,
3755
- enableTruncate: true,
3756
- truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3757
- });
3758
- const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3759
- if (!compactionResult.compacted) {
3760
- return null;
3769
+ // Reviewer Finding #3: escalating truncation across attempts. The
3770
+ // first attempt uses the budget-derived fraction (single-round
3771
+ // compaction). If that still leaves the conversation over budget,
3772
+ // subsequent attempts apply progressively harder truncation
3773
+ // (0.5 → 0.75 → 0.9) before giving up. This replaces the previous
3774
+ // single-pass behaviour where one undersized fraction guaranteed
3775
+ // failure on the next provider call.
3776
+ const escalationFractions = [
3777
+ Math.min(0.9, requiredReduction + 0.15),
3778
+ 0.5,
3779
+ 0.75,
3780
+ 0.9,
3781
+ ];
3782
+ let lastCompactionResult = null;
3783
+ let compactedMessages = originalMessages;
3784
+ let verifiedBudget = null;
3785
+ let recoveredFraction = -1;
3786
+ for (let i = 0; i < escalationFractions.length; i++) {
3787
+ const fraction = escalationFractions[i];
3788
+ const compactor = new ContextCompactor({
3789
+ enableSummarize: false,
3790
+ enablePrune: true,
3791
+ enableDeduplicate: true,
3792
+ enableTruncate: true,
3793
+ truncationFraction: fraction,
3794
+ });
3795
+ const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3796
+ if (!compactionResult.compacted) {
3797
+ continue;
3798
+ }
3799
+ lastCompactionResult = compactionResult;
3800
+ const repairedResult = repairToolPairs(compactionResult.messages);
3801
+ const verifyBudget = checkContextBudget({
3802
+ provider: options.provider || "openai",
3803
+ model: options.model,
3804
+ maxTokens: options.maxTokens,
3805
+ systemPrompt: options.systemPrompt,
3806
+ currentPrompt: options.prompt,
3807
+ conversationMessages: repairedResult.messages,
3808
+ });
3809
+ if (verifyBudget.withinBudget) {
3810
+ compactedMessages = repairedResult.messages;
3811
+ verifiedBudget = verifyBudget;
3812
+ recoveredFraction = fraction;
3813
+ break;
3814
+ }
3815
+ verifiedBudget = verifyBudget;
3816
+ }
3817
+ if (!lastCompactionResult) {
3818
+ // Reviewer follow-up: when no escalation fraction managed to
3819
+ // compact the conversation, the request will hit the same
3820
+ // provider 400 again on retry. Surface a typed
3821
+ // ContextBudgetExceededError + `compaction.insufficient` event
3822
+ // instead of returning null (which lets callers propagate the
3823
+ // opaque provider error).
3824
+ try {
3825
+ this.emitter.emit("compaction.insufficient", {
3826
+ stagesAttempted: [],
3827
+ finalTokens: actualTokens,
3828
+ budget: budgetTokens,
3829
+ provider: options.provider || "openai",
3830
+ model: options.model,
3831
+ phase: "post-provider-recovery-no-compaction",
3832
+ fractionsTried: escalationFractions,
3833
+ timestamp: Date.now(),
3834
+ });
3835
+ }
3836
+ catch {
3837
+ /* listener errors are non-fatal */
3838
+ }
3839
+ throw new ContextBudgetExceededError(`Context overflow recovery: no compaction stage was able to ` +
3840
+ `reduce conversation messages. Provider rejected at ` +
3841
+ `~${actualTokens} tokens; budget is ${budgetTokens} tokens.`, {
3842
+ estimatedTokens: actualTokens,
3843
+ availableTokens: budgetTokens,
3844
+ stagesUsed: [],
3845
+ breakdown: {},
3846
+ });
3761
3847
  }
3762
- const repairedResult = repairToolPairs(compactionResult.messages);
3763
- const verifyBudget = checkContextBudget({
3764
- provider: options.provider || "openai",
3765
- model: options.model,
3766
- maxTokens: options.maxTokens,
3767
- systemPrompt: options.systemPrompt,
3768
- currentPrompt: options.prompt,
3769
- conversationMessages: repairedResult.messages,
3770
- });
3771
- if (!verifyBudget.withinBudget) {
3772
- logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3773
- estimatedTokens: verifyBudget.estimatedInputTokens,
3774
- availableTokens: verifyBudget.availableInputTokens,
3848
+ if (!verifiedBudget?.withinBudget) {
3849
+ logger.error(`[${functionTag}] Recovery compaction insufficient after escalation, aborting retry`, {
3850
+ estimatedTokens: verifiedBudget?.estimatedInputTokens,
3851
+ availableTokens: verifiedBudget?.availableInputTokens,
3852
+ stagesAttempted: lastCompactionResult.stagesUsed,
3853
+ fractionsTried: escalationFractions,
3775
3854
  });
3855
+ // Reviewer Finding #3: emit `compaction.insufficient` so
3856
+ // cost / audit listeners record the specific failure mode.
3857
+ try {
3858
+ this.emitter.emit("compaction.insufficient", {
3859
+ stagesAttempted: lastCompactionResult.stagesUsed,
3860
+ finalTokens: verifiedBudget?.estimatedInputTokens,
3861
+ budget: verifiedBudget?.availableInputTokens,
3862
+ provider: options.provider || "openai",
3863
+ model: options.model,
3864
+ phase: "post-provider-recovery",
3865
+ fractionsTried: escalationFractions,
3866
+ timestamp: Date.now(),
3867
+ });
3868
+ }
3869
+ catch {
3870
+ /* listener errors are non-fatal */
3871
+ }
3776
3872
  throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3777
- `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3778
- `but budget is ${budgetTokens} tokens.`, {
3779
- estimatedTokens: compactionResult.tokensAfter,
3873
+ `recovery compaction achieved ${lastCompactionResult.tokensAfter} tokens ` +
3874
+ `but budget is ${budgetTokens} tokens (after escalation through ` +
3875
+ `${escalationFractions.length} fractions).`, {
3876
+ estimatedTokens: lastCompactionResult.tokensAfter,
3780
3877
  availableTokens: budgetTokens,
3781
- stagesUsed: compactionResult.stagesUsed,
3782
- breakdown: verifyBudget.breakdown,
3878
+ stagesUsed: lastCompactionResult.stagesUsed,
3879
+ breakdown: verifiedBudget?.breakdown ?? {},
3783
3880
  });
3784
3881
  }
3785
3882
  logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3786
- tokensSaved: compactionResult.tokensSaved,
3883
+ tokensSaved: lastCompactionResult.tokensSaved,
3787
3884
  compactionTarget,
3788
- verifiedTokens: verifyBudget.estimatedInputTokens,
3789
- verifiedBudget: verifyBudget.availableInputTokens,
3885
+ verifiedTokens: verifiedBudget.estimatedInputTokens,
3886
+ verifiedBudget: verifiedBudget.availableInputTokens,
3887
+ recoveredFraction,
3790
3888
  });
3791
3889
  return this.directProviderGeneration({
3792
3890
  ...options,
3793
- conversationMessages: repairedResult.messages,
3891
+ conversationMessages: compactedMessages,
3794
3892
  });
3795
3893
  }
3796
3894
  catch (retryError) {
@@ -4421,8 +4519,51 @@ Current user's request: ${currentInput}`;
4421
4519
  });
4422
4520
  const dpgMessageCount = conversationMessages?.length || 0;
4423
4521
  const dpgCompactionSessionId = this.getCompactionSessionId(options);
4522
+ // Curator P1-2: pre-dispatch compaction must run for inline
4523
+ // `conversationMessages` too (not just conversationMemory). Without
4524
+ // this, a 1.3M-token caller-supplied conversation against a 128K
4525
+ // window dispatches anyway and the provider returns
4526
+ // "prompt is too long" — the bug Curator's report cited.
4527
+ const dpgHasInlineMessages = !!optionsWithMessages.conversationMessages?.length;
4528
+ // Reviewer follow-up: gate the hard cap on the *actual compactable
4529
+ // history* rather than `this.conversationMemory`. A configured-but-
4530
+ // empty memory store leaves nothing to compact yet still satisfies
4531
+ // `!this.conversationMemory === false`, so the previous check
4532
+ // skipped the hard cap and dispatched the oversized payload.
4533
+ const dpgHasCompactableMessages = dpgMessageCount > 0;
4534
+ // Reviewer Finding #4: pre-dispatch hard cap for the standalone
4535
+ // oversized case. When the budget check shows the request is
4536
+ // over budget but there's nothing to compact (no memory + no
4537
+ // inline messages — e.g. a huge prompt or huge tool definitions
4538
+ // alone), throw before dispatch instead of wasting a roundtrip.
4539
+ if (!budgetCheck.withinBudget && !dpgHasCompactableMessages) {
4540
+ try {
4541
+ this.emitter.emit("compaction.insufficient", {
4542
+ stagesAttempted: ["pre-dispatch hard cap"],
4543
+ finalTokens: budgetCheck.estimatedInputTokens,
4544
+ budget: budgetCheck.availableInputTokens,
4545
+ provider: providerName,
4546
+ model: options.model,
4547
+ phase: "pre-dispatch-no-recovery",
4548
+ timestamp: Date.now(),
4549
+ });
4550
+ }
4551
+ catch {
4552
+ /* listener errors are non-fatal */
4553
+ }
4554
+ throw new ContextBudgetExceededError(`Context exceeds model budget and no compaction is possible ` +
4555
+ `(no conversationMemory, no inline conversationMessages — only ` +
4556
+ `prompt + tools). Estimated: ${budgetCheck.estimatedInputTokens} ` +
4557
+ `tokens, budget: ${budgetCheck.availableInputTokens} tokens. ` +
4558
+ `Reduce prompt or tool-definition size, or trim the request.`, {
4559
+ estimatedTokens: budgetCheck.estimatedInputTokens,
4560
+ availableTokens: budgetCheck.availableInputTokens,
4561
+ stagesUsed: [],
4562
+ breakdown: budgetCheck.breakdown,
4563
+ });
4564
+ }
4424
4565
  if (budgetCheck.shouldCompact &&
4425
- this.conversationMemory &&
4566
+ (this.conversationMemory || dpgHasInlineMessages) &&
4426
4567
  dpgMessageCount >
4427
4568
  (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
4428
4569
  const compactor = new ContextCompactor({
@@ -4456,6 +4597,26 @@ Current user's request: ${currentInput}`;
4456
4597
  availableTokens: postCompactBudget.availableInputTokens,
4457
4598
  overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
4458
4599
  });
4600
+ // Curator P1-2: emit `compaction.insufficient` whenever a
4601
+ // single round of compaction wasn't enough — even when
4602
+ // emergency truncation will save the day. Lets cost / audit
4603
+ // listeners track the "compaction was insufficient" signal
4604
+ // separately from the eventual outcome.
4605
+ try {
4606
+ this.emitter.emit("compaction.insufficient", {
4607
+ stagesAttempted: compactionResult.stagesUsed,
4608
+ finalTokens: postCompactBudget.estimatedInputTokens,
4609
+ budget: postCompactBudget.availableInputTokens,
4610
+ provider: providerName,
4611
+ model: options.model,
4612
+ phase: "mid-compaction",
4613
+ willEmergencyTruncate: true,
4614
+ timestamp: Date.now(),
4615
+ });
4616
+ }
4617
+ catch {
4618
+ /* listener errors are non-fatal */
4619
+ }
4459
4620
  conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
4460
4621
  const finalBudget = checkContextBudget({
4461
4622
  provider: providerName,
@@ -4471,6 +4632,23 @@ Current user's request: ${currentInput}`;
4471
4632
  if (!finalBudget.withinBudget) {
4472
4633
  // Clear watermark so handleContextOverflow recovery can re-compact
4473
4634
  this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
4635
+ // Curator P1-2: emit `compaction.insufficient` so cost / audit
4636
+ // listeners can record the specific failure mode (separate
4637
+ // from a generic provider error).
4638
+ try {
4639
+ this.emitter.emit("compaction.insufficient", {
4640
+ stagesAttempted: compactionResult.stagesUsed,
4641
+ finalTokens: finalBudget.estimatedInputTokens,
4642
+ budget: finalBudget.availableInputTokens,
4643
+ provider: providerName,
4644
+ model: options.model,
4645
+ phase: "post-emergency-truncation",
4646
+ timestamp: Date.now(),
4647
+ });
4648
+ }
4649
+ catch {
4650
+ /* listener errors are non-fatal */
4651
+ }
4474
4652
  throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
4475
4653
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
4476
4654
  `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -4577,6 +4755,14 @@ Current user's request: ${currentInput}`;
4577
4755
  lastError: lastError?.message,
4578
4756
  responseTime,
4579
4757
  });
4758
+ // Reviewer follow-up: preserve typed ContextBudgetExceededError after
4759
+ // the per-provider fallback loop. Each provider's hard cap is
4760
+ // per-window; we let the loop try them all, but if every provider
4761
+ // rejected on budget the caller still needs the typed error to
4762
+ // distinguish "context too large" from a generic provider failure.
4763
+ if (lastError instanceof ContextBudgetExceededError) {
4764
+ throw lastError;
4765
+ }
4580
4766
  throw new Error(`Failed to generate text with all providers. Last error: ${lastError?.message || "Unknown error"}`);
4581
4767
  }
4582
4768
  /**
@@ -5032,9 +5218,36 @@ Current user's request: ${currentInput}`;
5032
5218
  // single `generation:end` event with cost data. Cost listeners
5033
5219
  // subscribe here; previously the stream path never fired it.
5034
5220
  let resolvedUsage;
5221
+ // Reviewer follow-up: track *non-sentinel output chunks* (text,
5222
+ // audio, image — anything the SDK considers real output) so the
5223
+ // fallback gate fires only when the stream produced nothing
5224
+ // useful. Counting only text content here would have spuriously
5225
+ // triggered fallback for valid audio-only (Google Live) and
5226
+ // image-only streams. The sentinel is the only thing we exclude
5227
+ // — that path can mask real provider failures (DNS, auth,
5228
+ // retry-exhaustion) that AI SDK rejects with
5229
+ // NoOutputGeneratedError, and we want fallback to fire there.
5230
+ let realOutputChunks = 0;
5035
5231
  try {
5036
5232
  for await (const chunk of mcpStream) {
5037
5233
  chunkCount++;
5234
+ const isNoOutputSentinel = chunk !== null &&
5235
+ typeof chunk === "object" &&
5236
+ "metadata" in chunk &&
5237
+ chunk.metadata
5238
+ ?.noOutput === true;
5239
+ const hasTextContent = chunk &&
5240
+ "content" in chunk &&
5241
+ typeof chunk.content === "string" &&
5242
+ chunk.content.length > 0;
5243
+ const hasMediaPayload = chunk !== null &&
5244
+ typeof chunk === "object" &&
5245
+ "type" in chunk &&
5246
+ (chunk.type === "audio" ||
5247
+ chunk.type === "image");
5248
+ if (!isNoOutputSentinel && (hasTextContent || hasMediaPayload)) {
5249
+ realOutputChunks++;
5250
+ }
5038
5251
  if (chunk &&
5039
5252
  "content" in chunk &&
5040
5253
  typeof chunk.content === "string") {
@@ -5046,13 +5259,17 @@ Current user's request: ${currentInput}`;
5046
5259
  metadata: {
5047
5260
  chunkIndex: chunkCount,
5048
5261
  totalLength: accumulatedContent.length,
5262
+ ...(isNoOutputSentinel && { noOutput: true }),
5049
5263
  },
5050
5264
  timestamp: Date.now(),
5051
5265
  });
5052
5266
  }
5053
5267
  yield chunk;
5054
5268
  }
5055
- if (chunkCount === 0 &&
5269
+ // Reviewer follow-up: fire fallback when no *non-sentinel*
5270
+ // output was produced — sentinel-only and truly empty streams
5271
+ // both qualify, but media-only streams (audio/image) do not.
5272
+ if (realOutputChunks === 0 &&
5056
5273
  !metadata.fallbackAttempted &&
5057
5274
  !enhancedOptions.disableInternalFallback &&
5058
5275
  streamState.toolCalls.length === 0 &&
@@ -5549,9 +5766,32 @@ Current user's request: ${currentInput}`;
5549
5766
  streamState.finishReason =
5550
5767
  fallbackResult.finishReason ?? streamState.finishReason;
5551
5768
  }
5769
+ // Reviewer follow-up: count *real* output chunks for the fallback
5770
+ // success gate, mirroring the primary stream wrapper. A fallback
5771
+ // that yields only the NoOutputSentinel must not be treated as
5772
+ // success — that's the same masked-failure scenario as the primary.
5552
5773
  let fallbackChunkCount = 0;
5774
+ let fallbackRealOutputChunks = 0;
5553
5775
  for await (const fallbackChunk of fallbackResult.stream) {
5554
5776
  fallbackChunkCount++;
5777
+ const isFallbackNoOutputSentinel = fallbackChunk !== null &&
5778
+ typeof fallbackChunk === "object" &&
5779
+ "metadata" in fallbackChunk &&
5780
+ fallbackChunk.metadata
5781
+ ?.noOutput === true;
5782
+ const fallbackHasTextContent = fallbackChunk &&
5783
+ "content" in fallbackChunk &&
5784
+ typeof fallbackChunk.content === "string" &&
5785
+ fallbackChunk.content.length > 0;
5786
+ const fallbackHasMediaPayload = fallbackChunk !== null &&
5787
+ typeof fallbackChunk === "object" &&
5788
+ "type" in fallbackChunk &&
5789
+ (fallbackChunk.type === "audio" ||
5790
+ fallbackChunk.type === "image");
5791
+ if (!isFallbackNoOutputSentinel &&
5792
+ (fallbackHasTextContent || fallbackHasMediaPayload)) {
5793
+ fallbackRealOutputChunks++;
5794
+ }
5555
5795
  if (fallbackChunk &&
5556
5796
  "content" in fallbackChunk &&
5557
5797
  typeof fallbackChunk.content === "string") {
@@ -5560,10 +5800,10 @@ Current user's request: ${currentInput}`;
5560
5800
  }
5561
5801
  yield fallbackChunk;
5562
5802
  }
5563
- if (fallbackChunkCount === 0 &&
5803
+ if (fallbackRealOutputChunks === 0 &&
5564
5804
  fallbackToolCalls.length === 0 &&
5565
5805
  fallbackToolResults.length === 0) {
5566
- throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
5806
+ throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 real output chunks (chunkCount=${fallbackChunkCount}, sentinel-only or empty)`);
5567
5807
  }
5568
5808
  // Fallback succeeded - likely guardrails blocked primary
5569
5809
  metadata.fallbackProvider = fallbackRoute.provider;
@@ -5742,6 +5982,42 @@ Current user's request: ${currentInput}`;
5742
5982
  });
5743
5983
  const streamMessageCount = conversationMessages?.length || 0;
5744
5984
  const streamCompactionSessionId = this.getCompactionSessionId(options);
5985
+ // Reviewer follow-up: gate the hard cap on the *actual compactable
5986
+ // history* rather than `this.conversationMemory`. A configured-but-
5987
+ // empty memory store leaves nothing to compact yet still satisfies
5988
+ // `!this.conversationMemory === false`, so the previous check
5989
+ // skipped the hard cap and dispatched the oversized payload.
5990
+ const streamHasCompactableMessages = streamMessageCount > 0;
5991
+ // Curator P1-2: pre-dispatch hard cap mirrors directProviderGeneration.
5992
+ // When the budget check fails AND there's nothing to compact (no memory
5993
+ // + no inline messages — only prompt + tools), throw before dispatch
5994
+ // instead of wasting a roundtrip on a payload the provider will reject.
5995
+ if (!streamBudget.withinBudget && !streamHasCompactableMessages) {
5996
+ try {
5997
+ this.emitter.emit("compaction.insufficient", {
5998
+ stagesAttempted: ["pre-dispatch hard cap"],
5999
+ finalTokens: streamBudget.estimatedInputTokens,
6000
+ budget: streamBudget.availableInputTokens,
6001
+ provider: providerName,
6002
+ model: options.model,
6003
+ phase: "pre-dispatch-no-recovery",
6004
+ timestamp: Date.now(),
6005
+ });
6006
+ }
6007
+ catch {
6008
+ /* listener errors are non-fatal */
6009
+ }
6010
+ throw new ContextBudgetExceededError(`Stream context exceeds model budget and no compaction is possible ` +
6011
+ `(no conversationMemory, no inline conversationMessages — only ` +
6012
+ `prompt + tools). Estimated: ${streamBudget.estimatedInputTokens} ` +
6013
+ `tokens, budget: ${streamBudget.availableInputTokens} tokens. ` +
6014
+ `Reduce prompt or tool-definition size, or trim the request.`, {
6015
+ estimatedTokens: streamBudget.estimatedInputTokens,
6016
+ availableTokens: streamBudget.availableInputTokens,
6017
+ stagesUsed: [],
6018
+ breakdown: streamBudget.breakdown,
6019
+ });
6020
+ }
5745
6021
  if (streamBudget.shouldCompact &&
5746
6022
  (hasCallerConversationHistory || this.conversationMemory) &&
5747
6023
  streamMessageCount >
@@ -5778,6 +6054,26 @@ Current user's request: ${currentInput}`;
5778
6054
  availableTokens: postCompactBudget.availableInputTokens,
5779
6055
  overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
5780
6056
  });
6057
+ // Curator P1-2: emit `compaction.insufficient` whenever a single
6058
+ // round of compaction wasn't enough — even when emergency
6059
+ // truncation will save the day. Lets cost / audit listeners track
6060
+ // the "compaction was insufficient" signal separately from the
6061
+ // eventual outcome.
6062
+ try {
6063
+ this.emitter.emit("compaction.insufficient", {
6064
+ stagesAttempted: compactionResult.stagesUsed,
6065
+ finalTokens: postCompactBudget.estimatedInputTokens,
6066
+ budget: postCompactBudget.availableInputTokens,
6067
+ provider: providerName,
6068
+ model: options.model,
6069
+ phase: "mid-compaction",
6070
+ willEmergencyTruncate: true,
6071
+ timestamp: Date.now(),
6072
+ });
6073
+ }
6074
+ catch {
6075
+ /* listener errors are non-fatal */
6076
+ }
5781
6077
  conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
5782
6078
  // Keep options in sync after emergency truncation so fallback paths
5783
6079
  // use the truncated history.
@@ -5794,6 +6090,23 @@ Current user's request: ${currentInput}`;
5794
6090
  if (!finalBudget.withinBudget) {
5795
6091
  // Clear watermark so handleContextOverflow recovery can re-compact
5796
6092
  this.lastCompactionMessageCount.delete(streamCompactionSessionId);
6093
+ // Curator P1-2: emit `compaction.insufficient` on the terminal
6094
+ // failure path so cost / audit listeners can record the specific
6095
+ // failure mode (compaction + emergency truncation both insufficient).
6096
+ try {
6097
+ this.emitter.emit("compaction.insufficient", {
6098
+ stagesAttempted: compactionResult.stagesUsed,
6099
+ finalTokens: finalBudget.estimatedInputTokens,
6100
+ budget: finalBudget.availableInputTokens,
6101
+ provider: providerName,
6102
+ model: options.model,
6103
+ phase: "post-emergency-truncation",
6104
+ timestamp: Date.now(),
6105
+ });
6106
+ }
6107
+ catch {
6108
+ /* listener errors are non-fatal */
6109
+ }
5797
6110
  throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
5798
6111
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
5799
6112
  `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -5881,6 +6194,15 @@ Current user's request: ${currentInput}`;
5881
6194
  * Handle stream error with fallback
5882
6195
  */
5883
6196
  async handleStreamError(error, options, startTime, streamId, enhancedOptions, _factoryResult) {
6197
+ // Curator P1-2: when the pre-dispatch hard cap or post-emergency
6198
+ // truncation budget check throws ContextBudgetExceededError, the
6199
+ // payload is too large for the model and a same-payload retry would
6200
+ // just fail again at the provider — wasting the same tokens that
6201
+ // the hard cap was meant to save. Rethrow so the caller sees the
6202
+ // typed error instead of a fallback ProviderError that hides it.
6203
+ if (error instanceof ContextBudgetExceededError) {
6204
+ throw error;
6205
+ }
5884
6206
  logger.error("Stream generation failed, attempting fallback", {
5885
6207
  error: error instanceof Error ? error.message : String(error),
5886
6208
  });
@@ -790,6 +790,10 @@ export class AnthropicProvider extends BaseProvider {
790
790
  "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
791
791
  },
792
792
  });
793
+ // Reviewer follow-up: capture upstream provider errors via onError
794
+ // so the post-stream NoOutput sentinel carries the real cause in
795
+ // providerError / modelResponseRaw.
796
+ let capturedProviderError;
793
797
  let result;
794
798
  try {
795
799
  result = streamText({
@@ -802,6 +806,14 @@ export class AnthropicProvider extends BaseProvider {
802
806
  stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
803
807
  toolChoice: resolveToolChoice(options, tools, shouldUseTools),
804
808
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
809
+ onError: (event) => {
810
+ capturedProviderError = event.error;
811
+ logger.error("Anthropic: Stream error", {
812
+ error: event.error instanceof Error
813
+ ? event.error.message
814
+ : String(event.error),
815
+ });
816
+ },
805
817
  experimental_repairToolCall: this.getToolCallRepairFn(options),
806
818
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
807
819
  onStepFinish: ({ toolCalls, toolResults }) => {
@@ -868,7 +880,7 @@ export class AnthropicProvider extends BaseProvider {
868
880
  streamSpan.end();
869
881
  });
870
882
  timeoutController?.cleanup();
871
- const transformedStream = this.createTextStream(result);
883
+ const transformedStream = this.createTextStream(result, () => capturedProviderError);
872
884
  // ✅ Note: Vercel AI SDK's streamText() method limitations with tools
873
885
  // The streamText() function doesn't provide the same tool result access as generateText()
874
886
  // Full tool support is now available with real streaming
@@ -5,6 +5,7 @@ import { AnthropicModels } from "../constants/enums.js";
5
5
  import { BaseProvider } from "../core/baseProvider.js";
6
6
  import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
7
7
  import { logger } from "../utils/logger.js";
8
+ import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
8
9
  import { calculateCost } from "../utils/pricing.js";
9
10
  import { createAnthropicBaseConfig, validateApiKey, } from "../utils/providerConfig.js";
10
11
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
@@ -81,6 +82,10 @@ export class AnthropicProviderV2 extends BaseProvider {
81
82
  "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
82
83
  },
83
84
  });
85
+ // Reviewer follow-up: capture upstream provider errors via onError
86
+ // so the post-stream NoOutput detect can propagate the real cause
87
+ // into the sentinel's providerError / modelResponseRaw.
88
+ let capturedProviderError;
84
89
  let result;
85
90
  try {
86
91
  result = streamText({
@@ -95,6 +100,14 @@ export class AnthropicProviderV2 extends BaseProvider {
95
100
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
96
101
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
97
102
  experimental_repairToolCall: this.getToolCallRepairFn(options),
103
+ onError: (event) => {
104
+ capturedProviderError = event.error;
105
+ logger.error("AnthropicBaseProvider: Stream error", {
106
+ error: event.error instanceof Error
107
+ ? event.error.message
108
+ : String(event.error),
109
+ });
110
+ },
98
111
  onStepFinish: ({ toolCalls, toolResults }) => {
99
112
  this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
100
113
  logger.warn("[AnthropicBaseProvider] Failed to store tool executions", {
@@ -153,19 +166,34 @@ export class AnthropicProviderV2 extends BaseProvider {
153
166
  timeoutController?.cleanup();
154
167
  // Transform string stream to content object stream (match Google AI pattern)
155
168
  const transformedStream = async function* () {
169
+ let chunkCount = 0;
156
170
  try {
157
171
  for await (const chunk of result.textStream) {
172
+ chunkCount++;
158
173
  yield { content: chunk };
159
174
  }
160
175
  }
161
176
  catch (streamError) {
162
- // AI SDK v6 throws NoOutputGeneratedError when the stream produced no output.
163
177
  if (NoOutputGeneratedError.isInstance(streamError)) {
164
- logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError)");
178
+ logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
179
+ const sentinel = await buildNoOutputSentinel(streamError, result, capturedProviderError);
180
+ stampNoOutputSpan(sentinel);
181
+ yield sentinel;
165
182
  return;
166
183
  }
167
184
  throw streamError;
168
185
  }
186
+ // Curator P3-6 (round-2 fix): production trigger sets the error
187
+ // on result.finishReason rejection, not on textStream iteration.
188
+ // Surface that path here so the sentinel actually fires.
189
+ if (chunkCount === 0) {
190
+ const detected = await detectPostStreamNoOutput(result, capturedProviderError);
191
+ if (detected) {
192
+ logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
193
+ stampNoOutputSpan(detected.sentinel);
194
+ yield detected.sentinel;
195
+ }
196
+ }
169
197
  };
170
198
  return {
171
199
  stream: transformedStream(),