npm - @juspay/neurolink - Versions diffs - 9.59.1 → 9.59.2 - Mend

@juspay/neurolink 9.59.1 → 9.59.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +6 -0
package/dist/browser/neurolink.min.js +180 -180
package/dist/lib/neurolink.js +304 -36
package/dist/neurolink.js +304 -36
package/package.json +1 -1

package/dist/neurolink.js CHANGED Viewed

@@ -194,6 +194,12 @@ function isNonRetryableProviderError(error) {
     if (error instanceof ModelAccessDeniedError) {
         return true;
     }
+    // Note: ContextBudgetExceededError is intentionally NOT non-retryable.
+    // Each provider has its own context window, so a budget rejection on
+    // one provider doesn't preclude another provider's window fitting the
+    // same payload. The directProviderGeneration loop should continue
+    // trying alternate providers; the after-loop rethrow preserves the
+    // typed error when all providers reject (see `directProviderGeneration`).
     // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
     if (error && typeof error === "object") {
         const err = error;
@@ -3724,7 +3730,16 @@ Current user's request: ${currentInput}`;
         return null;
     }
     async tryRecoverGenerateTextOverflow(options, functionTag, error) {
-        if (!isContextOverflowError(error) || !this.conversationMemory) {
+        // Reviewer Finding #3: drop the `!this.conversationMemory` gate so
+        // inline-conversationMessages callers also benefit from post-provider
+        // recovery when their pre-dispatch estimate happens to undershoot
+        // and the provider rejects at a higher real token count.
+        if (!isContextOverflowError(error)) {
+            return null;
+        }
+        const inlineMessages = options._originalConversationMessages;
+        const callerMessages = options.conversationMessages;
+        if (!this.conversationMemory && !inlineMessages && !callerMessages) {
             return null;
         }
         logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
@@ -3733,8 +3748,11 @@ Current user's request: ${currentInput}`;
         });
         try {
             const actualOverflow = parseProviderOverflowDetails(error);
-            const originalMessages = options._originalConversationMessages ??
-                (await getConversationMessages(this.conversationMemory, options));
+            const originalMessages = inlineMessages ??
+                callerMessages ??
+                (this.conversationMemory
+                    ? await getConversationMessages(this.conversationMemory, options)
+                    : []);
             const recoveryBudget = checkContextBudget({
                 provider: options.provider || "openai",
                 model: options.model,
@@ -3748,49 +3766,129 @@ Current user's request: ${currentInput}`;
             const requiredReduction = actualTokens > 0
                 ? (actualTokens - compactionTarget) / actualTokens
                 : 0.5;
-            const compactor = new ContextCompactor({
-                enableSummarize: false,
-                enablePrune: true,
-                enableDeduplicate: true,
-                enableTruncate: true,
-                truncationFraction: Math.min(0.9, requiredReduction + 0.15),
-            });
-            const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
-            if (!compactionResult.compacted) {
-                return null;
+            // Reviewer Finding #3: escalating truncation across attempts. The
+            // first attempt uses the budget-derived fraction (single-round
+            // compaction). If that still leaves the conversation over budget,
+            // subsequent attempts apply progressively harder truncation
+            // (0.5 → 0.75 → 0.9) before giving up. This replaces the previous
+            // single-pass behaviour where one undersized fraction guaranteed
+            // failure on the next provider call.
+            const escalationFractions = [
+                Math.min(0.9, requiredReduction + 0.15),
+                0.5,
+                0.75,
+                0.9,
+            ];
+            let lastCompactionResult = null;
+            let compactedMessages = originalMessages;
+            let verifiedBudget = null;
+            let recoveredFraction = -1;
+            for (let i = 0; i < escalationFractions.length; i++) {
+                const fraction = escalationFractions[i];
+                const compactor = new ContextCompactor({
+                    enableSummarize: false,
+                    enablePrune: true,
+                    enableDeduplicate: true,
+                    enableTruncate: true,
+                    truncationFraction: fraction,
+                });
+                const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
+                if (!compactionResult.compacted) {
+                    continue;
+                }
+                lastCompactionResult = compactionResult;
+                const repairedResult = repairToolPairs(compactionResult.messages);
+                const verifyBudget = checkContextBudget({
+                    provider: options.provider || "openai",
+                    model: options.model,
+                    maxTokens: options.maxTokens,
+                    systemPrompt: options.systemPrompt,
+                    currentPrompt: options.prompt,
+                    conversationMessages: repairedResult.messages,
+                });
+                if (verifyBudget.withinBudget) {
+                    compactedMessages = repairedResult.messages;
+                    verifiedBudget = verifyBudget;
+                    recoveredFraction = fraction;
+                    break;
+                }
+                verifiedBudget = verifyBudget;
+            }
+            if (!lastCompactionResult) {
+                // Reviewer follow-up: when no escalation fraction managed to
+                // compact the conversation, the request will hit the same
+                // provider 400 again on retry. Surface a typed
+                // ContextBudgetExceededError + `compaction.insufficient` event
+                // instead of returning null (which lets callers propagate the
+                // opaque provider error).
+                try {
+                    this.emitter.emit("compaction.insufficient", {
+                        stagesAttempted: [],
+                        finalTokens: actualTokens,
+                        budget: budgetTokens,
+                        provider: options.provider || "openai",
+                        model: options.model,
+                        phase: "post-provider-recovery-no-compaction",
+                        fractionsTried: escalationFractions,
+                        timestamp: Date.now(),
+                    });
+                }
+                catch {
+                    /* listener errors are non-fatal */
+                }
+                throw new ContextBudgetExceededError(`Context overflow recovery: no compaction stage was able to ` +
+                    `reduce conversation messages. Provider rejected at ` +
+                    `~${actualTokens} tokens; budget is ${budgetTokens} tokens.`, {
+                    estimatedTokens: actualTokens,
+                    availableTokens: budgetTokens,
+                    stagesUsed: [],
+                    breakdown: {},
+                });
             }
-            const repairedResult = repairToolPairs(compactionResult.messages);
-            const verifyBudget = checkContextBudget({
-                provider: options.provider || "openai",
-                model: options.model,
-                maxTokens: options.maxTokens,
-                systemPrompt: options.systemPrompt,
-                currentPrompt: options.prompt,
-                conversationMessages: repairedResult.messages,
-            });
-            if (!verifyBudget.withinBudget) {
-                logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
-                    estimatedTokens: verifyBudget.estimatedInputTokens,
-                    availableTokens: verifyBudget.availableInputTokens,
+            if (!verifiedBudget?.withinBudget) {
+                logger.error(`[${functionTag}] Recovery compaction insufficient after escalation, aborting retry`, {
+                    estimatedTokens: verifiedBudget?.estimatedInputTokens,
+                    availableTokens: verifiedBudget?.availableInputTokens,
+                    stagesAttempted: lastCompactionResult.stagesUsed,
+                    fractionsTried: escalationFractions,
                 });
+                // Reviewer Finding #3: emit `compaction.insufficient` so
+                // cost / audit listeners record the specific failure mode.
+                try {
+                    this.emitter.emit("compaction.insufficient", {
+                        stagesAttempted: lastCompactionResult.stagesUsed,
+                        finalTokens: verifiedBudget?.estimatedInputTokens,
+                        budget: verifiedBudget?.availableInputTokens,
+                        provider: options.provider || "openai",
+                        model: options.model,
+                        phase: "post-provider-recovery",
+                        fractionsTried: escalationFractions,
+                        timestamp: Date.now(),
+                    });
+                }
+                catch {
+                    /* listener errors are non-fatal */
+                }
                 throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
-                    `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
-                    `but budget is ${budgetTokens} tokens.`, {
-                    estimatedTokens: compactionResult.tokensAfter,
+                    `recovery compaction achieved ${lastCompactionResult.tokensAfter} tokens ` +
+                    `but budget is ${budgetTokens} tokens (after escalation through ` +
+                    `${escalationFractions.length} fractions).`, {
+                    estimatedTokens: lastCompactionResult.tokensAfter,
                     availableTokens: budgetTokens,
-                    stagesUsed: compactionResult.stagesUsed,
-                    breakdown: verifyBudget.breakdown,
+                    stagesUsed: lastCompactionResult.stagesUsed,
+                    breakdown: verifiedBudget?.breakdown ?? {},
                 });
             }
             logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
-                tokensSaved: compactionResult.tokensSaved,
+                tokensSaved: lastCompactionResult.tokensSaved,
                 compactionTarget,
-                verifiedTokens: verifyBudget.estimatedInputTokens,
-                verifiedBudget: verifyBudget.availableInputTokens,
+                verifiedTokens: verifiedBudget.estimatedInputTokens,
+                verifiedBudget: verifiedBudget.availableInputTokens,
+                recoveredFraction,
             });
             return this.directProviderGeneration({
                 ...options,
-                conversationMessages: repairedResult.messages,
+                conversationMessages: compactedMessages,
             });
         }
         catch (retryError) {
@@ -4421,8 +4519,51 @@ Current user's request: ${currentInput}`;
                 });
                 const dpgMessageCount = conversationMessages?.length || 0;
                 const dpgCompactionSessionId = this.getCompactionSessionId(options);
+                // Curator P1-2: pre-dispatch compaction must run for inline
+                // `conversationMessages` too (not just conversationMemory). Without
+                // this, a 1.3M-token caller-supplied conversation against a 128K
+                // window dispatches anyway and the provider returns
+                // "prompt is too long" — the bug Curator's report cited.
+                const dpgHasInlineMessages = !!optionsWithMessages.conversationMessages?.length;
+                // Reviewer follow-up: gate the hard cap on the *actual compactable
+                // history* rather than `this.conversationMemory`. A configured-but-
+                // empty memory store leaves nothing to compact yet still satisfies
+                // `!this.conversationMemory === false`, so the previous check
+                // skipped the hard cap and dispatched the oversized payload.
+                const dpgHasCompactableMessages = dpgMessageCount > 0;
+                // Reviewer Finding #4: pre-dispatch hard cap for the standalone
+                // oversized case. When the budget check shows the request is
+                // over budget but there's nothing to compact (no memory + no
+                // inline messages — e.g. a huge prompt or huge tool definitions
+                // alone), throw before dispatch instead of wasting a roundtrip.
+                if (!budgetCheck.withinBudget && !dpgHasCompactableMessages) {
+                    try {
+                        this.emitter.emit("compaction.insufficient", {
+                            stagesAttempted: ["pre-dispatch hard cap"],
+                            finalTokens: budgetCheck.estimatedInputTokens,
+                            budget: budgetCheck.availableInputTokens,
+                            provider: providerName,
+                            model: options.model,
+                            phase: "pre-dispatch-no-recovery",
+                            timestamp: Date.now(),
+                        });
+                    }
+                    catch {
+                        /* listener errors are non-fatal */
+                    }
+                    throw new ContextBudgetExceededError(`Context exceeds model budget and no compaction is possible ` +
+                        `(no conversationMemory, no inline conversationMessages — only ` +
+                        `prompt + tools). Estimated: ${budgetCheck.estimatedInputTokens} ` +
+                        `tokens, budget: ${budgetCheck.availableInputTokens} tokens. ` +
+                        `Reduce prompt or tool-definition size, or trim the request.`, {
+                        estimatedTokens: budgetCheck.estimatedInputTokens,
+                        availableTokens: budgetCheck.availableInputTokens,
+                        stagesUsed: [],
+                        breakdown: budgetCheck.breakdown,
+                    });
+                }
                 if (budgetCheck.shouldCompact &&
-                    this.conversationMemory &&
+                    (this.conversationMemory || dpgHasInlineMessages) &&
                     dpgMessageCount >
                         (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
                     const compactor = new ContextCompactor({
@@ -4456,6 +4597,26 @@ Current user's request: ${currentInput}`;
                             availableTokens: postCompactBudget.availableInputTokens,
                             overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
                         });
+                        // Curator P1-2: emit `compaction.insufficient` whenever a
+                        // single round of compaction wasn't enough — even when
+                        // emergency truncation will save the day. Lets cost / audit
+                        // listeners track the "compaction was insufficient" signal
+                        // separately from the eventual outcome.
+                        try {
+                            this.emitter.emit("compaction.insufficient", {
+                                stagesAttempted: compactionResult.stagesUsed,
+                                finalTokens: postCompactBudget.estimatedInputTokens,
+                                budget: postCompactBudget.availableInputTokens,
+                                provider: providerName,
+                                model: options.model,
+                                phase: "mid-compaction",
+                                willEmergencyTruncate: true,
+                                timestamp: Date.now(),
+                            });
+                        }
+                        catch {
+                            /* listener errors are non-fatal */
+                        }
                         conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
                         const finalBudget = checkContextBudget({
                             provider: providerName,
@@ -4471,6 +4632,23 @@ Current user's request: ${currentInput}`;
                         if (!finalBudget.withinBudget) {
                             // Clear watermark so handleContextOverflow recovery can re-compact
                             this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
+                            // Curator P1-2: emit `compaction.insufficient` so cost / audit
+                            // listeners can record the specific failure mode (separate
+                            // from a generic provider error).
+                            try {
+                                this.emitter.emit("compaction.insufficient", {
+                                    stagesAttempted: compactionResult.stagesUsed,
+                                    finalTokens: finalBudget.estimatedInputTokens,
+                                    budget: finalBudget.availableInputTokens,
+                                    provider: providerName,
+                                    model: options.model,
+                                    phase: "post-emergency-truncation",
+                                    timestamp: Date.now(),
+                                });
+                            }
+                            catch {
+                                /* listener errors are non-fatal */
+                            }
                             throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
                                 `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                                 `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -4577,6 +4755,14 @@ Current user's request: ${currentInput}`;
             lastError: lastError?.message,
             responseTime,
         });
+        // Reviewer follow-up: preserve typed ContextBudgetExceededError after
+        // the per-provider fallback loop. Each provider's hard cap is
+        // per-window; we let the loop try them all, but if every provider
+        // rejected on budget the caller still needs the typed error to
+        // distinguish "context too large" from a generic provider failure.
+        if (lastError instanceof ContextBudgetExceededError) {
+            throw lastError;
+        }
         throw new Error(`Failed to generate text with all providers. Last error: ${lastError?.message || "Unknown error"}`);
     }
     /**
@@ -5742,6 +5928,42 @@ Current user's request: ${currentInput}`;
         });
         const streamMessageCount = conversationMessages?.length || 0;
         const streamCompactionSessionId = this.getCompactionSessionId(options);
+        // Reviewer follow-up: gate the hard cap on the *actual compactable
+        // history* rather than `this.conversationMemory`. A configured-but-
+        // empty memory store leaves nothing to compact yet still satisfies
+        // `!this.conversationMemory === false`, so the previous check
+        // skipped the hard cap and dispatched the oversized payload.
+        const streamHasCompactableMessages = streamMessageCount > 0;
+        // Curator P1-2: pre-dispatch hard cap mirrors directProviderGeneration.
+        // When the budget check fails AND there's nothing to compact (no memory
+        // + no inline messages — only prompt + tools), throw before dispatch
+        // instead of wasting a roundtrip on a payload the provider will reject.
+        if (!streamBudget.withinBudget && !streamHasCompactableMessages) {
+            try {
+                this.emitter.emit("compaction.insufficient", {
+                    stagesAttempted: ["pre-dispatch hard cap"],
+                    finalTokens: streamBudget.estimatedInputTokens,
+                    budget: streamBudget.availableInputTokens,
+                    provider: providerName,
+                    model: options.model,
+                    phase: "pre-dispatch-no-recovery",
+                    timestamp: Date.now(),
+                });
+            }
+            catch {
+                /* listener errors are non-fatal */
+            }
+            throw new ContextBudgetExceededError(`Stream context exceeds model budget and no compaction is possible ` +
+                `(no conversationMemory, no inline conversationMessages — only ` +
+                `prompt + tools). Estimated: ${streamBudget.estimatedInputTokens} ` +
+                `tokens, budget: ${streamBudget.availableInputTokens} tokens. ` +
+                `Reduce prompt or tool-definition size, or trim the request.`, {
+                estimatedTokens: streamBudget.estimatedInputTokens,
+                availableTokens: streamBudget.availableInputTokens,
+                stagesUsed: [],
+                breakdown: streamBudget.breakdown,
+            });
+        }
         if (streamBudget.shouldCompact &&
             (hasCallerConversationHistory || this.conversationMemory) &&
             streamMessageCount >
@@ -5778,6 +6000,26 @@ Current user's request: ${currentInput}`;
                     availableTokens: postCompactBudget.availableInputTokens,
                     overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
                 });
+                // Curator P1-2: emit `compaction.insufficient` whenever a single
+                // round of compaction wasn't enough — even when emergency
+                // truncation will save the day. Lets cost / audit listeners track
+                // the "compaction was insufficient" signal separately from the
+                // eventual outcome.
+                try {
+                    this.emitter.emit("compaction.insufficient", {
+                        stagesAttempted: compactionResult.stagesUsed,
+                        finalTokens: postCompactBudget.estimatedInputTokens,
+                        budget: postCompactBudget.availableInputTokens,
+                        provider: providerName,
+                        model: options.model,
+                        phase: "mid-compaction",
+                        willEmergencyTruncate: true,
+                        timestamp: Date.now(),
+                    });
+                }
+                catch {
+                    /* listener errors are non-fatal */
+                }
                 conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
                 // Keep options in sync after emergency truncation so fallback paths
                 // use the truncated history.
@@ -5794,6 +6036,23 @@ Current user's request: ${currentInput}`;
                 if (!finalBudget.withinBudget) {
                     // Clear watermark so handleContextOverflow recovery can re-compact
                     this.lastCompactionMessageCount.delete(streamCompactionSessionId);
+                    // Curator P1-2: emit `compaction.insufficient` on the terminal
+                    // failure path so cost / audit listeners can record the specific
+                    // failure mode (compaction + emergency truncation both insufficient).
+                    try {
+                        this.emitter.emit("compaction.insufficient", {
+                            stagesAttempted: compactionResult.stagesUsed,
+                            finalTokens: finalBudget.estimatedInputTokens,
+                            budget: finalBudget.availableInputTokens,
+                            provider: providerName,
+                            model: options.model,
+                            phase: "post-emergency-truncation",
+                            timestamp: Date.now(),
+                        });
+                    }
+                    catch {
+                        /* listener errors are non-fatal */
+                    }
                     throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
                         `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                         `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -5881,6 +6140,15 @@ Current user's request: ${currentInput}`;
      * Handle stream error with fallback
      */
     async handleStreamError(error, options, startTime, streamId, enhancedOptions, _factoryResult) {
+        // Curator P1-2: when the pre-dispatch hard cap or post-emergency
+        // truncation budget check throws ContextBudgetExceededError, the
+        // payload is too large for the model and a same-payload retry would
+        // just fail again at the provider — wasting the same tokens that
+        // the hard cap was meant to save. Rethrow so the caller sees the
+        // typed error instead of a fallback ProviderError that hides it.
+        if (error instanceof ContextBudgetExceededError) {
+            throw error;
+        }
         logger.error("Stream generation failed, attempting fallback", {
             error: error instanceof Error ? error.message : String(error),
         });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@juspay/neurolink",
-  "version": "9.59.1",
+  "version": "9.59.2",
   "packageManager": "pnpm@10.15.1",
   "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
   "author": {