@juspay/neurolink 9.59.1 → 9.59.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/neurolink.js CHANGED
@@ -194,6 +194,12 @@ function isNonRetryableProviderError(error) {
194
194
  if (error instanceof ModelAccessDeniedError) {
195
195
  return true;
196
196
  }
197
+ // Note: ContextBudgetExceededError is intentionally NOT non-retryable.
198
+ // Each provider has its own context window, so a budget rejection on
199
+ // one provider doesn't preclude another provider's window fitting the
200
+ // same payload. The directProviderGeneration loop should continue
201
+ // trying alternate providers; the after-loop rethrow preserves the
202
+ // typed error when all providers reject (see `directProviderGeneration`).
197
203
  // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
198
204
  if (error && typeof error === "object") {
199
205
  const err = error;
@@ -3724,7 +3730,16 @@ Current user's request: ${currentInput}`;
3724
3730
  return null;
3725
3731
  }
3726
3732
  async tryRecoverGenerateTextOverflow(options, functionTag, error) {
3727
- if (!isContextOverflowError(error) || !this.conversationMemory) {
3733
+ // Reviewer Finding #3: drop the `!this.conversationMemory` gate so
3734
+ // inline-conversationMessages callers also benefit from post-provider
3735
+ // recovery when their pre-dispatch estimate happens to undershoot
3736
+ // and the provider rejects at a higher real token count.
3737
+ if (!isContextOverflowError(error)) {
3738
+ return null;
3739
+ }
3740
+ const inlineMessages = options._originalConversationMessages;
3741
+ const callerMessages = options.conversationMessages;
3742
+ if (!this.conversationMemory && !inlineMessages && !callerMessages) {
3728
3743
  return null;
3729
3744
  }
3730
3745
  logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
@@ -3733,8 +3748,11 @@ Current user's request: ${currentInput}`;
3733
3748
  });
3734
3749
  try {
3735
3750
  const actualOverflow = parseProviderOverflowDetails(error);
3736
- const originalMessages = options._originalConversationMessages ??
3737
- (await getConversationMessages(this.conversationMemory, options));
3751
+ const originalMessages = inlineMessages ??
3752
+ callerMessages ??
3753
+ (this.conversationMemory
3754
+ ? await getConversationMessages(this.conversationMemory, options)
3755
+ : []);
3738
3756
  const recoveryBudget = checkContextBudget({
3739
3757
  provider: options.provider || "openai",
3740
3758
  model: options.model,
@@ -3748,49 +3766,129 @@ Current user's request: ${currentInput}`;
3748
3766
  const requiredReduction = actualTokens > 0
3749
3767
  ? (actualTokens - compactionTarget) / actualTokens
3750
3768
  : 0.5;
3751
- const compactor = new ContextCompactor({
3752
- enableSummarize: false,
3753
- enablePrune: true,
3754
- enableDeduplicate: true,
3755
- enableTruncate: true,
3756
- truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3757
- });
3758
- const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3759
- if (!compactionResult.compacted) {
3760
- return null;
3769
+ // Reviewer Finding #3: escalating truncation across attempts. The
3770
+ // first attempt uses the budget-derived fraction (single-round
3771
+ // compaction). If that still leaves the conversation over budget,
3772
+ // subsequent attempts apply progressively harder truncation
3773
+ // (0.5 → 0.75 → 0.9) before giving up. This replaces the previous
3774
+ // single-pass behaviour where one undersized fraction guaranteed
3775
+ // failure on the next provider call.
3776
+ const escalationFractions = [
3777
+ Math.min(0.9, requiredReduction + 0.15),
3778
+ 0.5,
3779
+ 0.75,
3780
+ 0.9,
3781
+ ];
3782
+ let lastCompactionResult = null;
3783
+ let compactedMessages = originalMessages;
3784
+ let verifiedBudget = null;
3785
+ let recoveredFraction = -1;
3786
+ for (let i = 0; i < escalationFractions.length; i++) {
3787
+ const fraction = escalationFractions[i];
3788
+ const compactor = new ContextCompactor({
3789
+ enableSummarize: false,
3790
+ enablePrune: true,
3791
+ enableDeduplicate: true,
3792
+ enableTruncate: true,
3793
+ truncationFraction: fraction,
3794
+ });
3795
+ const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3796
+ if (!compactionResult.compacted) {
3797
+ continue;
3798
+ }
3799
+ lastCompactionResult = compactionResult;
3800
+ const repairedResult = repairToolPairs(compactionResult.messages);
3801
+ const verifyBudget = checkContextBudget({
3802
+ provider: options.provider || "openai",
3803
+ model: options.model,
3804
+ maxTokens: options.maxTokens,
3805
+ systemPrompt: options.systemPrompt,
3806
+ currentPrompt: options.prompt,
3807
+ conversationMessages: repairedResult.messages,
3808
+ });
3809
+ if (verifyBudget.withinBudget) {
3810
+ compactedMessages = repairedResult.messages;
3811
+ verifiedBudget = verifyBudget;
3812
+ recoveredFraction = fraction;
3813
+ break;
3814
+ }
3815
+ verifiedBudget = verifyBudget;
3816
+ }
3817
+ if (!lastCompactionResult) {
3818
+ // Reviewer follow-up: when no escalation fraction managed to
3819
+ // compact the conversation, the request will hit the same
3820
+ // provider 400 again on retry. Surface a typed
3821
+ // ContextBudgetExceededError + `compaction.insufficient` event
3822
+ // instead of returning null (which lets callers propagate the
3823
+ // opaque provider error).
3824
+ try {
3825
+ this.emitter.emit("compaction.insufficient", {
3826
+ stagesAttempted: [],
3827
+ finalTokens: actualTokens,
3828
+ budget: budgetTokens,
3829
+ provider: options.provider || "openai",
3830
+ model: options.model,
3831
+ phase: "post-provider-recovery-no-compaction",
3832
+ fractionsTried: escalationFractions,
3833
+ timestamp: Date.now(),
3834
+ });
3835
+ }
3836
+ catch {
3837
+ /* listener errors are non-fatal */
3838
+ }
3839
+ throw new ContextBudgetExceededError(`Context overflow recovery: no compaction stage was able to ` +
3840
+ `reduce conversation messages. Provider rejected at ` +
3841
+ `~${actualTokens} tokens; budget is ${budgetTokens} tokens.`, {
3842
+ estimatedTokens: actualTokens,
3843
+ availableTokens: budgetTokens,
3844
+ stagesUsed: [],
3845
+ breakdown: {},
3846
+ });
3761
3847
  }
3762
- const repairedResult = repairToolPairs(compactionResult.messages);
3763
- const verifyBudget = checkContextBudget({
3764
- provider: options.provider || "openai",
3765
- model: options.model,
3766
- maxTokens: options.maxTokens,
3767
- systemPrompt: options.systemPrompt,
3768
- currentPrompt: options.prompt,
3769
- conversationMessages: repairedResult.messages,
3770
- });
3771
- if (!verifyBudget.withinBudget) {
3772
- logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3773
- estimatedTokens: verifyBudget.estimatedInputTokens,
3774
- availableTokens: verifyBudget.availableInputTokens,
3848
+ if (!verifiedBudget?.withinBudget) {
3849
+ logger.error(`[${functionTag}] Recovery compaction insufficient after escalation, aborting retry`, {
3850
+ estimatedTokens: verifiedBudget?.estimatedInputTokens,
3851
+ availableTokens: verifiedBudget?.availableInputTokens,
3852
+ stagesAttempted: lastCompactionResult.stagesUsed,
3853
+ fractionsTried: escalationFractions,
3775
3854
  });
3855
+ // Reviewer Finding #3: emit `compaction.insufficient` so
3856
+ // cost / audit listeners record the specific failure mode.
3857
+ try {
3858
+ this.emitter.emit("compaction.insufficient", {
3859
+ stagesAttempted: lastCompactionResult.stagesUsed,
3860
+ finalTokens: verifiedBudget?.estimatedInputTokens,
3861
+ budget: verifiedBudget?.availableInputTokens,
3862
+ provider: options.provider || "openai",
3863
+ model: options.model,
3864
+ phase: "post-provider-recovery",
3865
+ fractionsTried: escalationFractions,
3866
+ timestamp: Date.now(),
3867
+ });
3868
+ }
3869
+ catch {
3870
+ /* listener errors are non-fatal */
3871
+ }
3776
3872
  throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3777
- `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3778
- `but budget is ${budgetTokens} tokens.`, {
3779
- estimatedTokens: compactionResult.tokensAfter,
3873
+ `recovery compaction achieved ${lastCompactionResult.tokensAfter} tokens ` +
3874
+ `but budget is ${budgetTokens} tokens (after escalation through ` +
3875
+ `${escalationFractions.length} fractions).`, {
3876
+ estimatedTokens: lastCompactionResult.tokensAfter,
3780
3877
  availableTokens: budgetTokens,
3781
- stagesUsed: compactionResult.stagesUsed,
3782
- breakdown: verifyBudget.breakdown,
3878
+ stagesUsed: lastCompactionResult.stagesUsed,
3879
+ breakdown: verifiedBudget?.breakdown ?? {},
3783
3880
  });
3784
3881
  }
3785
3882
  logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3786
- tokensSaved: compactionResult.tokensSaved,
3883
+ tokensSaved: lastCompactionResult.tokensSaved,
3787
3884
  compactionTarget,
3788
- verifiedTokens: verifyBudget.estimatedInputTokens,
3789
- verifiedBudget: verifyBudget.availableInputTokens,
3885
+ verifiedTokens: verifiedBudget.estimatedInputTokens,
3886
+ verifiedBudget: verifiedBudget.availableInputTokens,
3887
+ recoveredFraction,
3790
3888
  });
3791
3889
  return this.directProviderGeneration({
3792
3890
  ...options,
3793
- conversationMessages: repairedResult.messages,
3891
+ conversationMessages: compactedMessages,
3794
3892
  });
3795
3893
  }
3796
3894
  catch (retryError) {
@@ -4421,8 +4519,51 @@ Current user's request: ${currentInput}`;
4421
4519
  });
4422
4520
  const dpgMessageCount = conversationMessages?.length || 0;
4423
4521
  const dpgCompactionSessionId = this.getCompactionSessionId(options);
4522
+ // Curator P1-2: pre-dispatch compaction must run for inline
4523
+ // `conversationMessages` too (not just conversationMemory). Without
4524
+ // this, a 1.3M-token caller-supplied conversation against a 128K
4525
+ // window dispatches anyway and the provider returns
4526
+ // "prompt is too long" — the bug Curator's report cited.
4527
+ const dpgHasInlineMessages = !!optionsWithMessages.conversationMessages?.length;
4528
+ // Reviewer follow-up: gate the hard cap on the *actual compactable
4529
+ // history* rather than `this.conversationMemory`. A configured-but-
4530
+ // empty memory store leaves nothing to compact yet still satisfies
4531
+ // `!this.conversationMemory === false`, so the previous check
4532
+ // skipped the hard cap and dispatched the oversized payload.
4533
+ const dpgHasCompactableMessages = dpgMessageCount > 0;
4534
+ // Reviewer Finding #4: pre-dispatch hard cap for the standalone
4535
+ // oversized case. When the budget check shows the request is
4536
+ // over budget but there's nothing to compact (no memory + no
4537
+ // inline messages — e.g. a huge prompt or huge tool definitions
4538
+ // alone), throw before dispatch instead of wasting a roundtrip.
4539
+ if (!budgetCheck.withinBudget && !dpgHasCompactableMessages) {
4540
+ try {
4541
+ this.emitter.emit("compaction.insufficient", {
4542
+ stagesAttempted: ["pre-dispatch hard cap"],
4543
+ finalTokens: budgetCheck.estimatedInputTokens,
4544
+ budget: budgetCheck.availableInputTokens,
4545
+ provider: providerName,
4546
+ model: options.model,
4547
+ phase: "pre-dispatch-no-recovery",
4548
+ timestamp: Date.now(),
4549
+ });
4550
+ }
4551
+ catch {
4552
+ /* listener errors are non-fatal */
4553
+ }
4554
+ throw new ContextBudgetExceededError(`Context exceeds model budget and no compaction is possible ` +
4555
+ `(no conversationMemory, no inline conversationMessages — only ` +
4556
+ `prompt + tools). Estimated: ${budgetCheck.estimatedInputTokens} ` +
4557
+ `tokens, budget: ${budgetCheck.availableInputTokens} tokens. ` +
4558
+ `Reduce prompt or tool-definition size, or trim the request.`, {
4559
+ estimatedTokens: budgetCheck.estimatedInputTokens,
4560
+ availableTokens: budgetCheck.availableInputTokens,
4561
+ stagesUsed: [],
4562
+ breakdown: budgetCheck.breakdown,
4563
+ });
4564
+ }
4424
4565
  if (budgetCheck.shouldCompact &&
4425
- this.conversationMemory &&
4566
+ (this.conversationMemory || dpgHasInlineMessages) &&
4426
4567
  dpgMessageCount >
4427
4568
  (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
4428
4569
  const compactor = new ContextCompactor({
@@ -4456,6 +4597,26 @@ Current user's request: ${currentInput}`;
4456
4597
  availableTokens: postCompactBudget.availableInputTokens,
4457
4598
  overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
4458
4599
  });
4600
+ // Curator P1-2: emit `compaction.insufficient` whenever a
4601
+ // single round of compaction wasn't enough — even when
4602
+ // emergency truncation will save the day. Lets cost / audit
4603
+ // listeners track the "compaction was insufficient" signal
4604
+ // separately from the eventual outcome.
4605
+ try {
4606
+ this.emitter.emit("compaction.insufficient", {
4607
+ stagesAttempted: compactionResult.stagesUsed,
4608
+ finalTokens: postCompactBudget.estimatedInputTokens,
4609
+ budget: postCompactBudget.availableInputTokens,
4610
+ provider: providerName,
4611
+ model: options.model,
4612
+ phase: "mid-compaction",
4613
+ willEmergencyTruncate: true,
4614
+ timestamp: Date.now(),
4615
+ });
4616
+ }
4617
+ catch {
4618
+ /* listener errors are non-fatal */
4619
+ }
4459
4620
  conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
4460
4621
  const finalBudget = checkContextBudget({
4461
4622
  provider: providerName,
@@ -4471,6 +4632,23 @@ Current user's request: ${currentInput}`;
4471
4632
  if (!finalBudget.withinBudget) {
4472
4633
  // Clear watermark so handleContextOverflow recovery can re-compact
4473
4634
  this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
4635
+ // Curator P1-2: emit `compaction.insufficient` so cost / audit
4636
+ // listeners can record the specific failure mode (separate
4637
+ // from a generic provider error).
4638
+ try {
4639
+ this.emitter.emit("compaction.insufficient", {
4640
+ stagesAttempted: compactionResult.stagesUsed,
4641
+ finalTokens: finalBudget.estimatedInputTokens,
4642
+ budget: finalBudget.availableInputTokens,
4643
+ provider: providerName,
4644
+ model: options.model,
4645
+ phase: "post-emergency-truncation",
4646
+ timestamp: Date.now(),
4647
+ });
4648
+ }
4649
+ catch {
4650
+ /* listener errors are non-fatal */
4651
+ }
4474
4652
  throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
4475
4653
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
4476
4654
  `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -4577,6 +4755,14 @@ Current user's request: ${currentInput}`;
4577
4755
  lastError: lastError?.message,
4578
4756
  responseTime,
4579
4757
  });
4758
+ // Reviewer follow-up: preserve typed ContextBudgetExceededError after
4759
+ // the per-provider fallback loop. Each provider's hard cap is
4760
+ // per-window; we let the loop try them all, but if every provider
4761
+ // rejected on budget the caller still needs the typed error to
4762
+ // distinguish "context too large" from a generic provider failure.
4763
+ if (lastError instanceof ContextBudgetExceededError) {
4764
+ throw lastError;
4765
+ }
4580
4766
  throw new Error(`Failed to generate text with all providers. Last error: ${lastError?.message || "Unknown error"}`);
4581
4767
  }
4582
4768
  /**
@@ -5742,6 +5928,42 @@ Current user's request: ${currentInput}`;
5742
5928
  });
5743
5929
  const streamMessageCount = conversationMessages?.length || 0;
5744
5930
  const streamCompactionSessionId = this.getCompactionSessionId(options);
5931
+ // Reviewer follow-up: gate the hard cap on the *actual compactable
5932
+ // history* rather than `this.conversationMemory`. A configured-but-
5933
+ // empty memory store leaves nothing to compact yet still satisfies
5934
+ // `!this.conversationMemory === false`, so the previous check
5935
+ // skipped the hard cap and dispatched the oversized payload.
5936
+ const streamHasCompactableMessages = streamMessageCount > 0;
5937
+ // Curator P1-2: pre-dispatch hard cap mirrors directProviderGeneration.
5938
+ // When the budget check fails AND there's nothing to compact (no memory
5939
+ // + no inline messages — only prompt + tools), throw before dispatch
5940
+ // instead of wasting a roundtrip on a payload the provider will reject.
5941
+ if (!streamBudget.withinBudget && !streamHasCompactableMessages) {
5942
+ try {
5943
+ this.emitter.emit("compaction.insufficient", {
5944
+ stagesAttempted: ["pre-dispatch hard cap"],
5945
+ finalTokens: streamBudget.estimatedInputTokens,
5946
+ budget: streamBudget.availableInputTokens,
5947
+ provider: providerName,
5948
+ model: options.model,
5949
+ phase: "pre-dispatch-no-recovery",
5950
+ timestamp: Date.now(),
5951
+ });
5952
+ }
5953
+ catch {
5954
+ /* listener errors are non-fatal */
5955
+ }
5956
+ throw new ContextBudgetExceededError(`Stream context exceeds model budget and no compaction is possible ` +
5957
+ `(no conversationMemory, no inline conversationMessages — only ` +
5958
+ `prompt + tools). Estimated: ${streamBudget.estimatedInputTokens} ` +
5959
+ `tokens, budget: ${streamBudget.availableInputTokens} tokens. ` +
5960
+ `Reduce prompt or tool-definition size, or trim the request.`, {
5961
+ estimatedTokens: streamBudget.estimatedInputTokens,
5962
+ availableTokens: streamBudget.availableInputTokens,
5963
+ stagesUsed: [],
5964
+ breakdown: streamBudget.breakdown,
5965
+ });
5966
+ }
5745
5967
  if (streamBudget.shouldCompact &&
5746
5968
  (hasCallerConversationHistory || this.conversationMemory) &&
5747
5969
  streamMessageCount >
@@ -5778,6 +6000,26 @@ Current user's request: ${currentInput}`;
5778
6000
  availableTokens: postCompactBudget.availableInputTokens,
5779
6001
  overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
5780
6002
  });
6003
+ // Curator P1-2: emit `compaction.insufficient` whenever a single
6004
+ // round of compaction wasn't enough — even when emergency
6005
+ // truncation will save the day. Lets cost / audit listeners track
6006
+ // the "compaction was insufficient" signal separately from the
6007
+ // eventual outcome.
6008
+ try {
6009
+ this.emitter.emit("compaction.insufficient", {
6010
+ stagesAttempted: compactionResult.stagesUsed,
6011
+ finalTokens: postCompactBudget.estimatedInputTokens,
6012
+ budget: postCompactBudget.availableInputTokens,
6013
+ provider: providerName,
6014
+ model: options.model,
6015
+ phase: "mid-compaction",
6016
+ willEmergencyTruncate: true,
6017
+ timestamp: Date.now(),
6018
+ });
6019
+ }
6020
+ catch {
6021
+ /* listener errors are non-fatal */
6022
+ }
5781
6023
  conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
5782
6024
  // Keep options in sync after emergency truncation so fallback paths
5783
6025
  // use the truncated history.
@@ -5794,6 +6036,23 @@ Current user's request: ${currentInput}`;
5794
6036
  if (!finalBudget.withinBudget) {
5795
6037
  // Clear watermark so handleContextOverflow recovery can re-compact
5796
6038
  this.lastCompactionMessageCount.delete(streamCompactionSessionId);
6039
+ // Curator P1-2: emit `compaction.insufficient` on the terminal
6040
+ // failure path so cost / audit listeners can record the specific
6041
+ // failure mode (compaction + emergency truncation both insufficient).
6042
+ try {
6043
+ this.emitter.emit("compaction.insufficient", {
6044
+ stagesAttempted: compactionResult.stagesUsed,
6045
+ finalTokens: finalBudget.estimatedInputTokens,
6046
+ budget: finalBudget.availableInputTokens,
6047
+ provider: providerName,
6048
+ model: options.model,
6049
+ phase: "post-emergency-truncation",
6050
+ timestamp: Date.now(),
6051
+ });
6052
+ }
6053
+ catch {
6054
+ /* listener errors are non-fatal */
6055
+ }
5797
6056
  throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
5798
6057
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
5799
6058
  `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -5881,6 +6140,15 @@ Current user's request: ${currentInput}`;
5881
6140
  * Handle stream error with fallback
5882
6141
  */
5883
6142
  async handleStreamError(error, options, startTime, streamId, enhancedOptions, _factoryResult) {
6143
+ // Curator P1-2: when the pre-dispatch hard cap or post-emergency
6144
+ // truncation budget check throws ContextBudgetExceededError, the
6145
+ // payload is too large for the model and a same-payload retry would
6146
+ // just fail again at the provider — wasting the same tokens that
6147
+ // the hard cap was meant to save. Rethrow so the caller sees the
6148
+ // typed error instead of a fallback ProviderError that hides it.
6149
+ if (error instanceof ContextBudgetExceededError) {
6150
+ throw error;
6151
+ }
5884
6152
  logger.error("Stream generation failed, attempting fallback", {
5885
6153
  error: error instanceof Error ? error.message : String(error),
5886
6154
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.59.1",
3
+ "version": "9.59.2",
4
4
  "packageManager": "pnpm@10.15.1",
5
5
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
6
6
  "author": {