npm - @juspay/neurolink - Versions diffs - 9.59.1 → 9.59.3 - Mend

@juspay/neurolink 9.59.1 → 9.59.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +355 -355
package/dist/core/baseProvider.d.ts +10 -3
package/dist/core/baseProvider.js +8 -3
package/dist/core/modules/StreamHandler.d.ts +22 -3
package/dist/core/modules/StreamHandler.js +42 -20
package/dist/lib/core/baseProvider.d.ts +10 -3
package/dist/lib/core/baseProvider.js +8 -3
package/dist/lib/core/modules/StreamHandler.d.ts +22 -3
package/dist/lib/core/modules/StreamHandler.js +42 -20
package/dist/lib/neurolink.js +361 -39
package/dist/lib/providers/anthropic.js +13 -1
package/dist/lib/providers/anthropicBaseProvider.js +30 -2
package/dist/lib/providers/azureOpenai.js +12 -1
package/dist/lib/providers/googleAiStudio.js +12 -1
package/dist/lib/providers/googleVertex.js +11 -1
package/dist/lib/providers/huggingFace.js +29 -2
package/dist/lib/providers/litellm.js +44 -4
package/dist/lib/providers/mistral.js +12 -1
package/dist/lib/providers/openAI.js +34 -3
package/dist/lib/providers/openRouter.js +33 -2
package/dist/lib/providers/openaiCompatible.js +34 -2
package/dist/lib/services/server/ai/observability/instrumentation.js +7 -2
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +2 -0
package/dist/lib/types/noOutputSentinel.d.ts +26 -0
package/dist/lib/types/noOutputSentinel.js +2 -0
package/dist/lib/types/stream.d.ts +2 -1
package/dist/lib/utils/noOutputSentinel.d.ts +80 -0
package/dist/lib/utils/noOutputSentinel.js +193 -0
package/dist/neurolink.js +361 -39
package/dist/providers/anthropic.js +13 -1
package/dist/providers/anthropicBaseProvider.js +30 -2
package/dist/providers/azureOpenai.js +12 -1
package/dist/providers/googleAiStudio.js +12 -1
package/dist/providers/googleVertex.js +11 -1
package/dist/providers/huggingFace.js +29 -2
package/dist/providers/litellm.js +44 -4
package/dist/providers/mistral.js +12 -1
package/dist/providers/openAI.js +34 -3
package/dist/providers/openRouter.js +33 -2
package/dist/providers/openaiCompatible.js +34 -2
package/dist/services/server/ai/observability/instrumentation.js +7 -2
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +2 -0
package/dist/types/noOutputSentinel.d.ts +26 -0
package/dist/types/noOutputSentinel.js +1 -0
package/dist/types/stream.d.ts +2 -1
package/dist/utils/noOutputSentinel.d.ts +80 -0
package/dist/utils/noOutputSentinel.js +192 -0
package/package.json +1 -1

package/dist/neurolink.js CHANGED Viewed

@@ -194,6 +194,12 @@ function isNonRetryableProviderError(error) {
     if (error instanceof ModelAccessDeniedError) {
         return true;
     }
+    // Note: ContextBudgetExceededError is intentionally NOT non-retryable.
+    // Each provider has its own context window, so a budget rejection on
+    // one provider doesn't preclude another provider's window fitting the
+    // same payload. The directProviderGeneration loop should continue
+    // trying alternate providers; the after-loop rethrow preserves the
+    // typed error when all providers reject (see `directProviderGeneration`).
     // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
     if (error && typeof error === "object") {
         const err = error;
@@ -3724,7 +3730,16 @@ Current user's request: ${currentInput}`;
         return null;
     }
     async tryRecoverGenerateTextOverflow(options, functionTag, error) {
-        if (!isContextOverflowError(error) || !this.conversationMemory) {
+        // Reviewer Finding #3: drop the `!this.conversationMemory` gate so
+        // inline-conversationMessages callers also benefit from post-provider
+        // recovery when their pre-dispatch estimate happens to undershoot
+        // and the provider rejects at a higher real token count.
+        if (!isContextOverflowError(error)) {
+            return null;
+        }
+        const inlineMessages = options._originalConversationMessages;
+        const callerMessages = options.conversationMessages;
+        if (!this.conversationMemory && !inlineMessages && !callerMessages) {
             return null;
         }
         logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
@@ -3733,8 +3748,11 @@ Current user's request: ${currentInput}`;
         });
         try {
             const actualOverflow = parseProviderOverflowDetails(error);
-            const originalMessages = options._originalConversationMessages ??
-                (await getConversationMessages(this.conversationMemory, options));
+            const originalMessages = inlineMessages ??
+                callerMessages ??
+                (this.conversationMemory
+                    ? await getConversationMessages(this.conversationMemory, options)
+                    : []);
             const recoveryBudget = checkContextBudget({
                 provider: options.provider || "openai",
                 model: options.model,
@@ -3748,49 +3766,129 @@ Current user's request: ${currentInput}`;
             const requiredReduction = actualTokens > 0
                 ? (actualTokens - compactionTarget) / actualTokens
                 : 0.5;
-            const compactor = new ContextCompactor({
-                enableSummarize: false,
-                enablePrune: true,
-                enableDeduplicate: true,
-                enableTruncate: true,
-                truncationFraction: Math.min(0.9, requiredReduction + 0.15),
-            });
-            const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
-            if (!compactionResult.compacted) {
-                return null;
+            // Reviewer Finding #3: escalating truncation across attempts. The
+            // first attempt uses the budget-derived fraction (single-round
+            // compaction). If that still leaves the conversation over budget,
+            // subsequent attempts apply progressively harder truncation
+            // (0.5 → 0.75 → 0.9) before giving up. This replaces the previous
+            // single-pass behaviour where one undersized fraction guaranteed
+            // failure on the next provider call.
+            const escalationFractions = [
+                Math.min(0.9, requiredReduction + 0.15),
+                0.5,
+                0.75,
+                0.9,
+            ];
+            let lastCompactionResult = null;
+            let compactedMessages = originalMessages;
+            let verifiedBudget = null;
+            let recoveredFraction = -1;
+            for (let i = 0; i < escalationFractions.length; i++) {
+                const fraction = escalationFractions[i];
+                const compactor = new ContextCompactor({
+                    enableSummarize: false,
+                    enablePrune: true,
+                    enableDeduplicate: true,
+                    enableTruncate: true,
+                    truncationFraction: fraction,
+                });
+                const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
+                if (!compactionResult.compacted) {
+                    continue;
+                }
+                lastCompactionResult = compactionResult;
+                const repairedResult = repairToolPairs(compactionResult.messages);
+                const verifyBudget = checkContextBudget({
+                    provider: options.provider || "openai",
+                    model: options.model,
+                    maxTokens: options.maxTokens,
+                    systemPrompt: options.systemPrompt,
+                    currentPrompt: options.prompt,
+                    conversationMessages: repairedResult.messages,
+                });
+                if (verifyBudget.withinBudget) {
+                    compactedMessages = repairedResult.messages;
+                    verifiedBudget = verifyBudget;
+                    recoveredFraction = fraction;
+                    break;
+                }
+                verifiedBudget = verifyBudget;
+            }
+            if (!lastCompactionResult) {
+                // Reviewer follow-up: when no escalation fraction managed to
+                // compact the conversation, the request will hit the same
+                // provider 400 again on retry. Surface a typed
+                // ContextBudgetExceededError + `compaction.insufficient` event
+                // instead of returning null (which lets callers propagate the
+                // opaque provider error).
+                try {
+                    this.emitter.emit("compaction.insufficient", {
+                        stagesAttempted: [],
+                        finalTokens: actualTokens,
+                        budget: budgetTokens,
+                        provider: options.provider || "openai",
+                        model: options.model,
+                        phase: "post-provider-recovery-no-compaction",
+                        fractionsTried: escalationFractions,
+                        timestamp: Date.now(),
+                    });
+                }
+                catch {
+                    /* listener errors are non-fatal */
+                }
+                throw new ContextBudgetExceededError(`Context overflow recovery: no compaction stage was able to ` +
+                    `reduce conversation messages. Provider rejected at ` +
+                    `~${actualTokens} tokens; budget is ${budgetTokens} tokens.`, {
+                    estimatedTokens: actualTokens,
+                    availableTokens: budgetTokens,
+                    stagesUsed: [],
+                    breakdown: {},
+                });
             }
-            const repairedResult = repairToolPairs(compactionResult.messages);
-            const verifyBudget = checkContextBudget({
-                provider: options.provider || "openai",
-                model: options.model,
-                maxTokens: options.maxTokens,
-                systemPrompt: options.systemPrompt,
-                currentPrompt: options.prompt,
-                conversationMessages: repairedResult.messages,
-            });
-            if (!verifyBudget.withinBudget) {
-                logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
-                    estimatedTokens: verifyBudget.estimatedInputTokens,
-                    availableTokens: verifyBudget.availableInputTokens,
+            if (!verifiedBudget?.withinBudget) {
+                logger.error(`[${functionTag}] Recovery compaction insufficient after escalation, aborting retry`, {
+                    estimatedTokens: verifiedBudget?.estimatedInputTokens,
+                    availableTokens: verifiedBudget?.availableInputTokens,
+                    stagesAttempted: lastCompactionResult.stagesUsed,
+                    fractionsTried: escalationFractions,
                 });
+                // Reviewer Finding #3: emit `compaction.insufficient` so
+                // cost / audit listeners record the specific failure mode.
+                try {
+                    this.emitter.emit("compaction.insufficient", {
+                        stagesAttempted: lastCompactionResult.stagesUsed,
+                        finalTokens: verifiedBudget?.estimatedInputTokens,
+                        budget: verifiedBudget?.availableInputTokens,
+                        provider: options.provider || "openai",
+                        model: options.model,
+                        phase: "post-provider-recovery",
+                        fractionsTried: escalationFractions,
+                        timestamp: Date.now(),
+                    });
+                }
+                catch {
+                    /* listener errors are non-fatal */
+                }
                 throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
-                    `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
-                    `but budget is ${budgetTokens} tokens.`, {
-                    estimatedTokens: compactionResult.tokensAfter,
+                    `recovery compaction achieved ${lastCompactionResult.tokensAfter} tokens ` +
+                    `but budget is ${budgetTokens} tokens (after escalation through ` +
+                    `${escalationFractions.length} fractions).`, {
+                    estimatedTokens: lastCompactionResult.tokensAfter,
                     availableTokens: budgetTokens,
-                    stagesUsed: compactionResult.stagesUsed,
-                    breakdown: verifyBudget.breakdown,
+                    stagesUsed: lastCompactionResult.stagesUsed,
+                    breakdown: verifiedBudget?.breakdown ?? {},
                 });
             }
             logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
-                tokensSaved: compactionResult.tokensSaved,
+                tokensSaved: lastCompactionResult.tokensSaved,
                 compactionTarget,
-                verifiedTokens: verifyBudget.estimatedInputTokens,
-                verifiedBudget: verifyBudget.availableInputTokens,
+                verifiedTokens: verifiedBudget.estimatedInputTokens,
+                verifiedBudget: verifiedBudget.availableInputTokens,
+                recoveredFraction,
             });
             return this.directProviderGeneration({
                 ...options,
-                conversationMessages: repairedResult.messages,
+                conversationMessages: compactedMessages,
             });
         }
         catch (retryError) {
@@ -4421,8 +4519,51 @@ Current user's request: ${currentInput}`;
                 });
                 const dpgMessageCount = conversationMessages?.length || 0;
                 const dpgCompactionSessionId = this.getCompactionSessionId(options);
+                // Curator P1-2: pre-dispatch compaction must run for inline
+                // `conversationMessages` too (not just conversationMemory). Without
+                // this, a 1.3M-token caller-supplied conversation against a 128K
+                // window dispatches anyway and the provider returns
+                // "prompt is too long" — the bug Curator's report cited.
+                const dpgHasInlineMessages = !!optionsWithMessages.conversationMessages?.length;
+                // Reviewer follow-up: gate the hard cap on the *actual compactable
+                // history* rather than `this.conversationMemory`. A configured-but-
+                // empty memory store leaves nothing to compact yet still satisfies
+                // `!this.conversationMemory === false`, so the previous check
+                // skipped the hard cap and dispatched the oversized payload.
+                const dpgHasCompactableMessages = dpgMessageCount > 0;
+                // Reviewer Finding #4: pre-dispatch hard cap for the standalone
+                // oversized case. When the budget check shows the request is
+                // over budget but there's nothing to compact (no memory + no
+                // inline messages — e.g. a huge prompt or huge tool definitions
+                // alone), throw before dispatch instead of wasting a roundtrip.
+                if (!budgetCheck.withinBudget && !dpgHasCompactableMessages) {
+                    try {
+                        this.emitter.emit("compaction.insufficient", {
+                            stagesAttempted: ["pre-dispatch hard cap"],
+                            finalTokens: budgetCheck.estimatedInputTokens,
+                            budget: budgetCheck.availableInputTokens,
+                            provider: providerName,
+                            model: options.model,
+                            phase: "pre-dispatch-no-recovery",
+                            timestamp: Date.now(),
+                        });
+                    }
+                    catch {
+                        /* listener errors are non-fatal */
+                    }
+                    throw new ContextBudgetExceededError(`Context exceeds model budget and no compaction is possible ` +
+                        `(no conversationMemory, no inline conversationMessages — only ` +
+                        `prompt + tools). Estimated: ${budgetCheck.estimatedInputTokens} ` +
+                        `tokens, budget: ${budgetCheck.availableInputTokens} tokens. ` +
+                        `Reduce prompt or tool-definition size, or trim the request.`, {
+                        estimatedTokens: budgetCheck.estimatedInputTokens,
+                        availableTokens: budgetCheck.availableInputTokens,
+                        stagesUsed: [],
+                        breakdown: budgetCheck.breakdown,
+                    });
+                }
                 if (budgetCheck.shouldCompact &&
-                    this.conversationMemory &&
+                    (this.conversationMemory || dpgHasInlineMessages) &&
                     dpgMessageCount >
                         (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
                     const compactor = new ContextCompactor({
@@ -4456,6 +4597,26 @@ Current user's request: ${currentInput}`;
                             availableTokens: postCompactBudget.availableInputTokens,
                             overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
                         });
+                        // Curator P1-2: emit `compaction.insufficient` whenever a
+                        // single round of compaction wasn't enough — even when
+                        // emergency truncation will save the day. Lets cost / audit
+                        // listeners track the "compaction was insufficient" signal
+                        // separately from the eventual outcome.
+                        try {
+                            this.emitter.emit("compaction.insufficient", {
+                                stagesAttempted: compactionResult.stagesUsed,
+                                finalTokens: postCompactBudget.estimatedInputTokens,
+                                budget: postCompactBudget.availableInputTokens,
+                                provider: providerName,
+                                model: options.model,
+                                phase: "mid-compaction",
+                                willEmergencyTruncate: true,
+                                timestamp: Date.now(),
+                            });
+                        }
+                        catch {
+                            /* listener errors are non-fatal */
+                        }
                         conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
                         const finalBudget = checkContextBudget({
                             provider: providerName,
@@ -4471,6 +4632,23 @@ Current user's request: ${currentInput}`;
                         if (!finalBudget.withinBudget) {
                             // Clear watermark so handleContextOverflow recovery can re-compact
                             this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
+                            // Curator P1-2: emit `compaction.insufficient` so cost / audit
+                            // listeners can record the specific failure mode (separate
+                            // from a generic provider error).
+                            try {
+                                this.emitter.emit("compaction.insufficient", {
+                                    stagesAttempted: compactionResult.stagesUsed,
+                                    finalTokens: finalBudget.estimatedInputTokens,
+                                    budget: finalBudget.availableInputTokens,
+                                    provider: providerName,
+                                    model: options.model,
+                                    phase: "post-emergency-truncation",
+                                    timestamp: Date.now(),
+                                });
+                            }
+                            catch {
+                                /* listener errors are non-fatal */
+                            }
                             throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
                                 `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                                 `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -4577,6 +4755,14 @@ Current user's request: ${currentInput}`;
             lastError: lastError?.message,
             responseTime,
         });
+        // Reviewer follow-up: preserve typed ContextBudgetExceededError after
+        // the per-provider fallback loop. Each provider's hard cap is
+        // per-window; we let the loop try them all, but if every provider
+        // rejected on budget the caller still needs the typed error to
+        // distinguish "context too large" from a generic provider failure.
+        if (lastError instanceof ContextBudgetExceededError) {
+            throw lastError;
+        }
         throw new Error(`Failed to generate text with all providers. Last error: ${lastError?.message || "Unknown error"}`);
     }
     /**
@@ -5032,9 +5218,36 @@ Current user's request: ${currentInput}`;
                 // single `generation:end` event with cost data. Cost listeners
                 // subscribe here; previously the stream path never fired it.
                 let resolvedUsage;
+                // Reviewer follow-up: track *non-sentinel output chunks* (text,
+                // audio, image — anything the SDK considers real output) so the
+                // fallback gate fires only when the stream produced nothing
+                // useful. Counting only text content here would have spuriously
+                // triggered fallback for valid audio-only (Google Live) and
+                // image-only streams. The sentinel is the only thing we exclude
+                // — that path can mask real provider failures (DNS, auth,
+                // retry-exhaustion) that AI SDK rejects with
+                // NoOutputGeneratedError, and we want fallback to fire there.
+                let realOutputChunks = 0;
                 try {
                     for await (const chunk of mcpStream) {
                         chunkCount++;
+                        const isNoOutputSentinel = chunk !== null &&
+                            typeof chunk === "object" &&
+                            "metadata" in chunk &&
+                            chunk.metadata
+                                ?.noOutput === true;
+                        const hasTextContent = chunk &&
+                            "content" in chunk &&
+                            typeof chunk.content === "string" &&
+                            chunk.content.length > 0;
+                        const hasMediaPayload = chunk !== null &&
+                            typeof chunk === "object" &&
+                            "type" in chunk &&
+                            (chunk.type === "audio" ||
+                                chunk.type === "image");
+                        if (!isNoOutputSentinel && (hasTextContent || hasMediaPayload)) {
+                            realOutputChunks++;
+                        }
                         if (chunk &&
                             "content" in chunk &&
                             typeof chunk.content === "string") {
@@ -5046,13 +5259,17 @@ Current user's request: ${currentInput}`;
                                 metadata: {
                                     chunkIndex: chunkCount,
                                     totalLength: accumulatedContent.length,
+                                    ...(isNoOutputSentinel && { noOutput: true }),
                                 },
                                 timestamp: Date.now(),
                             });
                         }
                         yield chunk;
                     }
-                    if (chunkCount === 0 &&
+                    // Reviewer follow-up: fire fallback when no *non-sentinel*
+                    // output was produced — sentinel-only and truly empty streams
+                    // both qualify, but media-only streams (audio/image) do not.
+                    if (realOutputChunks === 0 &&
                         !metadata.fallbackAttempted &&
                         !enhancedOptions.disableInternalFallback &&
                         streamState.toolCalls.length === 0 &&
@@ -5549,9 +5766,32 @@ Current user's request: ${currentInput}`;
                 streamState.finishReason =
                     fallbackResult.finishReason ?? streamState.finishReason;
             }
+            // Reviewer follow-up: count *real* output chunks for the fallback
+            // success gate, mirroring the primary stream wrapper. A fallback
+            // that yields only the NoOutputSentinel must not be treated as
+            // success — that's the same masked-failure scenario as the primary.
             let fallbackChunkCount = 0;
+            let fallbackRealOutputChunks = 0;
             for await (const fallbackChunk of fallbackResult.stream) {
                 fallbackChunkCount++;
+                const isFallbackNoOutputSentinel = fallbackChunk !== null &&
+                    typeof fallbackChunk === "object" &&
+                    "metadata" in fallbackChunk &&
+                    fallbackChunk.metadata
+                        ?.noOutput === true;
+                const fallbackHasTextContent = fallbackChunk &&
+                    "content" in fallbackChunk &&
+                    typeof fallbackChunk.content === "string" &&
+                    fallbackChunk.content.length > 0;
+                const fallbackHasMediaPayload = fallbackChunk !== null &&
+                    typeof fallbackChunk === "object" &&
+                    "type" in fallbackChunk &&
+                    (fallbackChunk.type === "audio" ||
+                        fallbackChunk.type === "image");
+                if (!isFallbackNoOutputSentinel &&
+                    (fallbackHasTextContent || fallbackHasMediaPayload)) {
+                    fallbackRealOutputChunks++;
+                }
                 if (fallbackChunk &&
                     "content" in fallbackChunk &&
                     typeof fallbackChunk.content === "string") {
@@ -5560,10 +5800,10 @@ Current user's request: ${currentInput}`;
                 }
                 yield fallbackChunk;
             }
-            if (fallbackChunkCount === 0 &&
+            if (fallbackRealOutputChunks === 0 &&
                 fallbackToolCalls.length === 0 &&
                 fallbackToolResults.length === 0) {
-                throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
+                throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 real output chunks (chunkCount=${fallbackChunkCount}, sentinel-only or empty)`);
             }
             // Fallback succeeded - likely guardrails blocked primary
             metadata.fallbackProvider = fallbackRoute.provider;
@@ -5742,6 +5982,42 @@ Current user's request: ${currentInput}`;
         });
         const streamMessageCount = conversationMessages?.length || 0;
         const streamCompactionSessionId = this.getCompactionSessionId(options);
+        // Reviewer follow-up: gate the hard cap on the *actual compactable
+        // history* rather than `this.conversationMemory`. A configured-but-
+        // empty memory store leaves nothing to compact yet still satisfies
+        // `!this.conversationMemory === false`, so the previous check
+        // skipped the hard cap and dispatched the oversized payload.
+        const streamHasCompactableMessages = streamMessageCount > 0;
+        // Curator P1-2: pre-dispatch hard cap mirrors directProviderGeneration.
+        // When the budget check fails AND there's nothing to compact (no memory
+        // + no inline messages — only prompt + tools), throw before dispatch
+        // instead of wasting a roundtrip on a payload the provider will reject.
+        if (!streamBudget.withinBudget && !streamHasCompactableMessages) {
+            try {
+                this.emitter.emit("compaction.insufficient", {
+                    stagesAttempted: ["pre-dispatch hard cap"],
+                    finalTokens: streamBudget.estimatedInputTokens,
+                    budget: streamBudget.availableInputTokens,
+                    provider: providerName,
+                    model: options.model,
+                    phase: "pre-dispatch-no-recovery",
+                    timestamp: Date.now(),
+                });
+            }
+            catch {
+                /* listener errors are non-fatal */
+            }
+            throw new ContextBudgetExceededError(`Stream context exceeds model budget and no compaction is possible ` +
+                `(no conversationMemory, no inline conversationMessages — only ` +
+                `prompt + tools). Estimated: ${streamBudget.estimatedInputTokens} ` +
+                `tokens, budget: ${streamBudget.availableInputTokens} tokens. ` +
+                `Reduce prompt or tool-definition size, or trim the request.`, {
+                estimatedTokens: streamBudget.estimatedInputTokens,
+                availableTokens: streamBudget.availableInputTokens,
+                stagesUsed: [],
+                breakdown: streamBudget.breakdown,
+            });
+        }
         if (streamBudget.shouldCompact &&
             (hasCallerConversationHistory || this.conversationMemory) &&
             streamMessageCount >
@@ -5778,6 +6054,26 @@ Current user's request: ${currentInput}`;
                     availableTokens: postCompactBudget.availableInputTokens,
                     overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
                 });
+                // Curator P1-2: emit `compaction.insufficient` whenever a single
+                // round of compaction wasn't enough — even when emergency
+                // truncation will save the day. Lets cost / audit listeners track
+                // the "compaction was insufficient" signal separately from the
+                // eventual outcome.
+                try {
+                    this.emitter.emit("compaction.insufficient", {
+                        stagesAttempted: compactionResult.stagesUsed,
+                        finalTokens: postCompactBudget.estimatedInputTokens,
+                        budget: postCompactBudget.availableInputTokens,
+                        provider: providerName,
+                        model: options.model,
+                        phase: "mid-compaction",
+                        willEmergencyTruncate: true,
+                        timestamp: Date.now(),
+                    });
+                }
+                catch {
+                    /* listener errors are non-fatal */
+                }
                 conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
                 // Keep options in sync after emergency truncation so fallback paths
                 // use the truncated history.
@@ -5794,6 +6090,23 @@ Current user's request: ${currentInput}`;
                 if (!finalBudget.withinBudget) {
                     // Clear watermark so handleContextOverflow recovery can re-compact
                     this.lastCompactionMessageCount.delete(streamCompactionSessionId);
+                    // Curator P1-2: emit `compaction.insufficient` on the terminal
+                    // failure path so cost / audit listeners can record the specific
+                    // failure mode (compaction + emergency truncation both insufficient).
+                    try {
+                        this.emitter.emit("compaction.insufficient", {
+                            stagesAttempted: compactionResult.stagesUsed,
+                            finalTokens: finalBudget.estimatedInputTokens,
+                            budget: finalBudget.availableInputTokens,
+                            provider: providerName,
+                            model: options.model,
+                            phase: "post-emergency-truncation",
+                            timestamp: Date.now(),
+                        });
+                    }
+                    catch {
+                        /* listener errors are non-fatal */
+                    }
                     throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
                         `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                         `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -5881,6 +6194,15 @@ Current user's request: ${currentInput}`;
      * Handle stream error with fallback
      */
     async handleStreamError(error, options, startTime, streamId, enhancedOptions, _factoryResult) {
+        // Curator P1-2: when the pre-dispatch hard cap or post-emergency
+        // truncation budget check throws ContextBudgetExceededError, the
+        // payload is too large for the model and a same-payload retry would
+        // just fail again at the provider — wasting the same tokens that
+        // the hard cap was meant to save. Rethrow so the caller sees the
+        // typed error instead of a fallback ProviderError that hides it.
+        if (error instanceof ContextBudgetExceededError) {
+            throw error;
+        }
         logger.error("Stream generation failed, attempting fallback", {
             error: error instanceof Error ? error.message : String(error),
         });

package/dist/providers/anthropic.js CHANGED Viewed

@@ -790,6 +790,10 @@ export class AnthropicProvider extends BaseProvider {
                     "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
                 },
             });
+            // Reviewer follow-up: capture upstream provider errors via onError
+            // so the post-stream NoOutput sentinel carries the real cause in
+            // providerError / modelResponseRaw.
+            let capturedProviderError;
             let result;
             try {
                 result = streamText({
@@ -802,6 +806,14 @@ export class AnthropicProvider extends BaseProvider {
                     stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
                     toolChoice: resolveToolChoice(options, tools, shouldUseTools),
                     abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
+                    onError: (event) => {
+                        capturedProviderError = event.error;
+                        logger.error("Anthropic: Stream error", {
+                            error: event.error instanceof Error
+                                ? event.error.message
+                                : String(event.error),
+                        });
+                    },
                     experimental_repairToolCall: this.getToolCallRepairFn(options),
                     experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
                     onStepFinish: ({ toolCalls, toolResults }) => {
@@ -868,7 +880,7 @@ export class AnthropicProvider extends BaseProvider {
                 streamSpan.end();
             });
             timeoutController?.cleanup();
-            const transformedStream = this.createTextStream(result);
+            const transformedStream = this.createTextStream(result, () => capturedProviderError);
             // ✅ Note: Vercel AI SDK's streamText() method limitations with tools
             // The streamText() function doesn't provide the same tool result access as generateText()
             // Full tool support is now available with real streaming

package/dist/providers/anthropicBaseProvider.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { AnthropicModels } from "../constants/enums.js";
 import { BaseProvider } from "../core/baseProvider.js";
 import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
 import { logger } from "../utils/logger.js";
+import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
 import { calculateCost } from "../utils/pricing.js";
 import { createAnthropicBaseConfig, validateApiKey, } from "../utils/providerConfig.js";
 import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
@@ -81,6 +82,10 @@ export class AnthropicProviderV2 extends BaseProvider {
                     "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
                 },
             });
+            // Reviewer follow-up: capture upstream provider errors via onError
+            // so the post-stream NoOutput detect can propagate the real cause
+            // into the sentinel's providerError / modelResponseRaw.
+            let capturedProviderError;
             let result;
             try {
                 result = streamText({
@@ -95,6 +100,14 @@ export class AnthropicProviderV2 extends BaseProvider {
                     abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
                     experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
                     experimental_repairToolCall: this.getToolCallRepairFn(options),
+                    onError: (event) => {
+                        capturedProviderError = event.error;
+                        logger.error("AnthropicBaseProvider: Stream error", {
+                            error: event.error instanceof Error
+                                ? event.error.message
+                                : String(event.error),
+                        });
+                    },
                     onStepFinish: ({ toolCalls, toolResults }) => {
                         this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
                             logger.warn("[AnthropicBaseProvider] Failed to store tool executions", {
@@ -153,19 +166,34 @@ export class AnthropicProviderV2 extends BaseProvider {
             timeoutController?.cleanup();
             // Transform string stream to content object stream (match Google AI pattern)
             const transformedStream = async function* () {
+                let chunkCount = 0;
                 try {
                     for await (const chunk of result.textStream) {
+                        chunkCount++;
                         yield { content: chunk };
                     }
                 }
                 catch (streamError) {
-                    // AI SDK v6 throws NoOutputGeneratedError when the stream produced no output.
                     if (NoOutputGeneratedError.isInstance(streamError)) {
-                        logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError)");
+                        logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
+                        const sentinel = await buildNoOutputSentinel(streamError, result, capturedProviderError);
+                        stampNoOutputSpan(sentinel);
+                        yield sentinel;
                         return;
                     }
                     throw streamError;
                 }
+                // Curator P3-6 (round-2 fix): production trigger sets the error
+                // on result.finishReason rejection, not on textStream iteration.
+                // Surface that path here so the sentinel actually fires.
+                if (chunkCount === 0) {
+                    const detected = await detectPostStreamNoOutput(result, capturedProviderError);
+                    if (detected) {
+                        logger.warn("AnthropicBaseProvider: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
+                        stampNoOutputSpan(detected.sentinel);
+                        yield detected.sentinel;
+                    }
+                }
             };
             return {
                 stream: transformedStream(),