@juspay/neurolink 9.59.0 → 9.59.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +1025 -1025
- package/dist/lib/neurolink.d.ts +29 -1
- package/dist/lib/neurolink.js +406 -37
- package/dist/lib/providers/googleAiStudio.js +7 -0
- package/dist/lib/providers/googleVertex.js +5 -0
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +2 -0
- package/dist/lib/types/streamDedup.d.ts +14 -0
- package/dist/lib/types/streamDedup.js +2 -0
- package/dist/neurolink.d.ts +29 -1
- package/dist/neurolink.js +406 -37
- package/dist/providers/googleAiStudio.js +7 -0
- package/dist/providers/googleVertex.js +5 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/streamDedup.d.ts +14 -0
- package/dist/types/streamDedup.js +1 -0
- package/package.json +1 -1
package/dist/neurolink.js
CHANGED
|
@@ -194,6 +194,12 @@ function isNonRetryableProviderError(error) {
|
|
|
194
194
|
if (error instanceof ModelAccessDeniedError) {
|
|
195
195
|
return true;
|
|
196
196
|
}
|
|
197
|
+
// Note: ContextBudgetExceededError is intentionally NOT non-retryable.
|
|
198
|
+
// Each provider has its own context window, so a budget rejection on
|
|
199
|
+
// one provider doesn't preclude another provider's window fitting the
|
|
200
|
+
// same payload. The directProviderGeneration loop should continue
|
|
201
|
+
// trying alternate providers; the after-loop rethrow preserves the
|
|
202
|
+
// typed error when all providers reject (see `directProviderGeneration`).
|
|
197
203
|
// Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
|
|
198
204
|
if (error && typeof error === "object") {
|
|
199
205
|
const err = error;
|
|
@@ -297,6 +303,37 @@ function isNonRetryableProviderError(error) {
|
|
|
297
303
|
* same NeuroLink instance would clobber each other's trace context.
|
|
298
304
|
*/
|
|
299
305
|
const metricsTraceContextStorage = new AsyncLocalStorage();
|
|
306
|
+
/**
|
|
307
|
+
* Curator P2-4 dedup (concurrency-safe): native providers emit
|
|
308
|
+
* `generation:end` on the shared SDK emitter. We attach a fresh
|
|
309
|
+
* mutable `dedupContext` object directly to the per-call
|
|
310
|
+
* `StreamOptions` (under `_streamDedupContext`) so each stream gets
|
|
311
|
+
* its own instance — concurrent streams have different option objects
|
|
312
|
+
* and therefore different contexts, so they cannot interfere.
|
|
313
|
+
*
|
|
314
|
+
* Native provider emit sites read `options._streamDedupContext` and
|
|
315
|
+
* flip `.providerEmitted = true` before emitting; the orchestration's
|
|
316
|
+
* finally block reads the same closed-over reference and skips its
|
|
317
|
+
* own emit when the flag is set.
|
|
318
|
+
*
|
|
319
|
+
* This avoids the AsyncLocalStorage approach which doesn't reliably
|
|
320
|
+
* propagate through async-generator yield boundaries when iteration
|
|
321
|
+
* happens from outside the original `run()` scope (e.g. when the
|
|
322
|
+
* consumer drives `for await of result.stream` after `sdk.stream(...)`
|
|
323
|
+
* returns).
|
|
324
|
+
*/
|
|
325
|
+
export const STREAM_DEDUP_CONTEXT_KEY = "_streamDedupContext";
|
|
326
|
+
/**
|
|
327
|
+
* Native providers call this from their `generation:end` emit sites,
|
|
328
|
+
* passing the same `options` object they received. Safe no-op when
|
|
329
|
+
* the field isn't set.
|
|
330
|
+
*/
|
|
331
|
+
export function markStreamProviderEmittedGenerationEnd(options) {
|
|
332
|
+
const ctx = options?._streamDedupContext;
|
|
333
|
+
if (ctx) {
|
|
334
|
+
ctx.providerEmitted = true;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
300
337
|
export class NeuroLink {
|
|
301
338
|
mcpInitialized = false;
|
|
302
339
|
mcpSkipped = false;
|
|
@@ -3693,7 +3730,16 @@ Current user's request: ${currentInput}`;
|
|
|
3693
3730
|
return null;
|
|
3694
3731
|
}
|
|
3695
3732
|
async tryRecoverGenerateTextOverflow(options, functionTag, error) {
|
|
3696
|
-
|
|
3733
|
+
// Reviewer Finding #3: drop the `!this.conversationMemory` gate so
|
|
3734
|
+
// inline-conversationMessages callers also benefit from post-provider
|
|
3735
|
+
// recovery when their pre-dispatch estimate happens to undershoot
|
|
3736
|
+
// and the provider rejects at a higher real token count.
|
|
3737
|
+
if (!isContextOverflowError(error)) {
|
|
3738
|
+
return null;
|
|
3739
|
+
}
|
|
3740
|
+
const inlineMessages = options._originalConversationMessages;
|
|
3741
|
+
const callerMessages = options.conversationMessages;
|
|
3742
|
+
if (!this.conversationMemory && !inlineMessages && !callerMessages) {
|
|
3697
3743
|
return null;
|
|
3698
3744
|
}
|
|
3699
3745
|
logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
|
|
@@ -3702,8 +3748,11 @@ Current user's request: ${currentInput}`;
|
|
|
3702
3748
|
});
|
|
3703
3749
|
try {
|
|
3704
3750
|
const actualOverflow = parseProviderOverflowDetails(error);
|
|
3705
|
-
const originalMessages =
|
|
3706
|
-
|
|
3751
|
+
const originalMessages = inlineMessages ??
|
|
3752
|
+
callerMessages ??
|
|
3753
|
+
(this.conversationMemory
|
|
3754
|
+
? await getConversationMessages(this.conversationMemory, options)
|
|
3755
|
+
: []);
|
|
3707
3756
|
const recoveryBudget = checkContextBudget({
|
|
3708
3757
|
provider: options.provider || "openai",
|
|
3709
3758
|
model: options.model,
|
|
@@ -3717,49 +3766,129 @@ Current user's request: ${currentInput}`;
|
|
|
3717
3766
|
const requiredReduction = actualTokens > 0
|
|
3718
3767
|
? (actualTokens - compactionTarget) / actualTokens
|
|
3719
3768
|
: 0.5;
|
|
3720
|
-
|
|
3721
|
-
|
|
3722
|
-
|
|
3723
|
-
|
|
3724
|
-
|
|
3725
|
-
|
|
3726
|
-
|
|
3727
|
-
const
|
|
3728
|
-
|
|
3729
|
-
|
|
3769
|
+
// Reviewer Finding #3: escalating truncation across attempts. The
|
|
3770
|
+
// first attempt uses the budget-derived fraction (single-round
|
|
3771
|
+
// compaction). If that still leaves the conversation over budget,
|
|
3772
|
+
// subsequent attempts apply progressively harder truncation
|
|
3773
|
+
// (0.5 → 0.75 → 0.9) before giving up. This replaces the previous
|
|
3774
|
+
// single-pass behaviour where one undersized fraction guaranteed
|
|
3775
|
+
// failure on the next provider call.
|
|
3776
|
+
const escalationFractions = [
|
|
3777
|
+
Math.min(0.9, requiredReduction + 0.15),
|
|
3778
|
+
0.5,
|
|
3779
|
+
0.75,
|
|
3780
|
+
0.9,
|
|
3781
|
+
];
|
|
3782
|
+
let lastCompactionResult = null;
|
|
3783
|
+
let compactedMessages = originalMessages;
|
|
3784
|
+
let verifiedBudget = null;
|
|
3785
|
+
let recoveredFraction = -1;
|
|
3786
|
+
for (let i = 0; i < escalationFractions.length; i++) {
|
|
3787
|
+
const fraction = escalationFractions[i];
|
|
3788
|
+
const compactor = new ContextCompactor({
|
|
3789
|
+
enableSummarize: false,
|
|
3790
|
+
enablePrune: true,
|
|
3791
|
+
enableDeduplicate: true,
|
|
3792
|
+
enableTruncate: true,
|
|
3793
|
+
truncationFraction: fraction,
|
|
3794
|
+
});
|
|
3795
|
+
const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
|
|
3796
|
+
if (!compactionResult.compacted) {
|
|
3797
|
+
continue;
|
|
3798
|
+
}
|
|
3799
|
+
lastCompactionResult = compactionResult;
|
|
3800
|
+
const repairedResult = repairToolPairs(compactionResult.messages);
|
|
3801
|
+
const verifyBudget = checkContextBudget({
|
|
3802
|
+
provider: options.provider || "openai",
|
|
3803
|
+
model: options.model,
|
|
3804
|
+
maxTokens: options.maxTokens,
|
|
3805
|
+
systemPrompt: options.systemPrompt,
|
|
3806
|
+
currentPrompt: options.prompt,
|
|
3807
|
+
conversationMessages: repairedResult.messages,
|
|
3808
|
+
});
|
|
3809
|
+
if (verifyBudget.withinBudget) {
|
|
3810
|
+
compactedMessages = repairedResult.messages;
|
|
3811
|
+
verifiedBudget = verifyBudget;
|
|
3812
|
+
recoveredFraction = fraction;
|
|
3813
|
+
break;
|
|
3814
|
+
}
|
|
3815
|
+
verifiedBudget = verifyBudget;
|
|
3816
|
+
}
|
|
3817
|
+
if (!lastCompactionResult) {
|
|
3818
|
+
// Reviewer follow-up: when no escalation fraction managed to
|
|
3819
|
+
// compact the conversation, the request will hit the same
|
|
3820
|
+
// provider 400 again on retry. Surface a typed
|
|
3821
|
+
// ContextBudgetExceededError + `compaction.insufficient` event
|
|
3822
|
+
// instead of returning null (which lets callers propagate the
|
|
3823
|
+
// opaque provider error).
|
|
3824
|
+
try {
|
|
3825
|
+
this.emitter.emit("compaction.insufficient", {
|
|
3826
|
+
stagesAttempted: [],
|
|
3827
|
+
finalTokens: actualTokens,
|
|
3828
|
+
budget: budgetTokens,
|
|
3829
|
+
provider: options.provider || "openai",
|
|
3830
|
+
model: options.model,
|
|
3831
|
+
phase: "post-provider-recovery-no-compaction",
|
|
3832
|
+
fractionsTried: escalationFractions,
|
|
3833
|
+
timestamp: Date.now(),
|
|
3834
|
+
});
|
|
3835
|
+
}
|
|
3836
|
+
catch {
|
|
3837
|
+
/* listener errors are non-fatal */
|
|
3838
|
+
}
|
|
3839
|
+
throw new ContextBudgetExceededError(`Context overflow recovery: no compaction stage was able to ` +
|
|
3840
|
+
`reduce conversation messages. Provider rejected at ` +
|
|
3841
|
+
`~${actualTokens} tokens; budget is ${budgetTokens} tokens.`, {
|
|
3842
|
+
estimatedTokens: actualTokens,
|
|
3843
|
+
availableTokens: budgetTokens,
|
|
3844
|
+
stagesUsed: [],
|
|
3845
|
+
breakdown: {},
|
|
3846
|
+
});
|
|
3730
3847
|
}
|
|
3731
|
-
|
|
3732
|
-
|
|
3733
|
-
|
|
3734
|
-
|
|
3735
|
-
|
|
3736
|
-
|
|
3737
|
-
currentPrompt: options.prompt,
|
|
3738
|
-
conversationMessages: repairedResult.messages,
|
|
3739
|
-
});
|
|
3740
|
-
if (!verifyBudget.withinBudget) {
|
|
3741
|
-
logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
|
|
3742
|
-
estimatedTokens: verifyBudget.estimatedInputTokens,
|
|
3743
|
-
availableTokens: verifyBudget.availableInputTokens,
|
|
3848
|
+
if (!verifiedBudget?.withinBudget) {
|
|
3849
|
+
logger.error(`[${functionTag}] Recovery compaction insufficient after escalation, aborting retry`, {
|
|
3850
|
+
estimatedTokens: verifiedBudget?.estimatedInputTokens,
|
|
3851
|
+
availableTokens: verifiedBudget?.availableInputTokens,
|
|
3852
|
+
stagesAttempted: lastCompactionResult.stagesUsed,
|
|
3853
|
+
fractionsTried: escalationFractions,
|
|
3744
3854
|
});
|
|
3855
|
+
// Reviewer Finding #3: emit `compaction.insufficient` so
|
|
3856
|
+
// cost / audit listeners record the specific failure mode.
|
|
3857
|
+
try {
|
|
3858
|
+
this.emitter.emit("compaction.insufficient", {
|
|
3859
|
+
stagesAttempted: lastCompactionResult.stagesUsed,
|
|
3860
|
+
finalTokens: verifiedBudget?.estimatedInputTokens,
|
|
3861
|
+
budget: verifiedBudget?.availableInputTokens,
|
|
3862
|
+
provider: options.provider || "openai",
|
|
3863
|
+
model: options.model,
|
|
3864
|
+
phase: "post-provider-recovery",
|
|
3865
|
+
fractionsTried: escalationFractions,
|
|
3866
|
+
timestamp: Date.now(),
|
|
3867
|
+
});
|
|
3868
|
+
}
|
|
3869
|
+
catch {
|
|
3870
|
+
/* listener errors are non-fatal */
|
|
3871
|
+
}
|
|
3745
3872
|
throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
|
|
3746
|
-
`recovery compaction achieved ${
|
|
3747
|
-
`but budget is ${budgetTokens} tokens
|
|
3748
|
-
|
|
3873
|
+
`recovery compaction achieved ${lastCompactionResult.tokensAfter} tokens ` +
|
|
3874
|
+
`but budget is ${budgetTokens} tokens (after escalation through ` +
|
|
3875
|
+
`${escalationFractions.length} fractions).`, {
|
|
3876
|
+
estimatedTokens: lastCompactionResult.tokensAfter,
|
|
3749
3877
|
availableTokens: budgetTokens,
|
|
3750
|
-
stagesUsed:
|
|
3751
|
-
breakdown:
|
|
3878
|
+
stagesUsed: lastCompactionResult.stagesUsed,
|
|
3879
|
+
breakdown: verifiedBudget?.breakdown ?? {},
|
|
3752
3880
|
});
|
|
3753
3881
|
}
|
|
3754
3882
|
logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
|
|
3755
|
-
tokensSaved:
|
|
3883
|
+
tokensSaved: lastCompactionResult.tokensSaved,
|
|
3756
3884
|
compactionTarget,
|
|
3757
|
-
verifiedTokens:
|
|
3758
|
-
verifiedBudget:
|
|
3885
|
+
verifiedTokens: verifiedBudget.estimatedInputTokens,
|
|
3886
|
+
verifiedBudget: verifiedBudget.availableInputTokens,
|
|
3887
|
+
recoveredFraction,
|
|
3759
3888
|
});
|
|
3760
3889
|
return this.directProviderGeneration({
|
|
3761
3890
|
...options,
|
|
3762
|
-
conversationMessages:
|
|
3891
|
+
conversationMessages: compactedMessages,
|
|
3763
3892
|
});
|
|
3764
3893
|
}
|
|
3765
3894
|
catch (retryError) {
|
|
@@ -4390,8 +4519,51 @@ Current user's request: ${currentInput}`;
|
|
|
4390
4519
|
});
|
|
4391
4520
|
const dpgMessageCount = conversationMessages?.length || 0;
|
|
4392
4521
|
const dpgCompactionSessionId = this.getCompactionSessionId(options);
|
|
4522
|
+
// Curator P1-2: pre-dispatch compaction must run for inline
|
|
4523
|
+
// `conversationMessages` too (not just conversationMemory). Without
|
|
4524
|
+
// this, a 1.3M-token caller-supplied conversation against a 128K
|
|
4525
|
+
// window dispatches anyway and the provider returns
|
|
4526
|
+
// "prompt is too long" — the bug Curator's report cited.
|
|
4527
|
+
const dpgHasInlineMessages = !!optionsWithMessages.conversationMessages?.length;
|
|
4528
|
+
// Reviewer follow-up: gate the hard cap on the *actual compactable
|
|
4529
|
+
// history* rather than `this.conversationMemory`. A configured-but-
|
|
4530
|
+
// empty memory store leaves nothing to compact yet still satisfies
|
|
4531
|
+
// `!this.conversationMemory === false`, so the previous check
|
|
4532
|
+
// skipped the hard cap and dispatched the oversized payload.
|
|
4533
|
+
const dpgHasCompactableMessages = dpgMessageCount > 0;
|
|
4534
|
+
// Reviewer Finding #4: pre-dispatch hard cap for the standalone
|
|
4535
|
+
// oversized case. When the budget check shows the request is
|
|
4536
|
+
// over budget but there's nothing to compact (no memory + no
|
|
4537
|
+
// inline messages — e.g. a huge prompt or huge tool definitions
|
|
4538
|
+
// alone), throw before dispatch instead of wasting a roundtrip.
|
|
4539
|
+
if (!budgetCheck.withinBudget && !dpgHasCompactableMessages) {
|
|
4540
|
+
try {
|
|
4541
|
+
this.emitter.emit("compaction.insufficient", {
|
|
4542
|
+
stagesAttempted: ["pre-dispatch hard cap"],
|
|
4543
|
+
finalTokens: budgetCheck.estimatedInputTokens,
|
|
4544
|
+
budget: budgetCheck.availableInputTokens,
|
|
4545
|
+
provider: providerName,
|
|
4546
|
+
model: options.model,
|
|
4547
|
+
phase: "pre-dispatch-no-recovery",
|
|
4548
|
+
timestamp: Date.now(),
|
|
4549
|
+
});
|
|
4550
|
+
}
|
|
4551
|
+
catch {
|
|
4552
|
+
/* listener errors are non-fatal */
|
|
4553
|
+
}
|
|
4554
|
+
throw new ContextBudgetExceededError(`Context exceeds model budget and no compaction is possible ` +
|
|
4555
|
+
`(no conversationMemory, no inline conversationMessages — only ` +
|
|
4556
|
+
`prompt + tools). Estimated: ${budgetCheck.estimatedInputTokens} ` +
|
|
4557
|
+
`tokens, budget: ${budgetCheck.availableInputTokens} tokens. ` +
|
|
4558
|
+
`Reduce prompt or tool-definition size, or trim the request.`, {
|
|
4559
|
+
estimatedTokens: budgetCheck.estimatedInputTokens,
|
|
4560
|
+
availableTokens: budgetCheck.availableInputTokens,
|
|
4561
|
+
stagesUsed: [],
|
|
4562
|
+
breakdown: budgetCheck.breakdown,
|
|
4563
|
+
});
|
|
4564
|
+
}
|
|
4393
4565
|
if (budgetCheck.shouldCompact &&
|
|
4394
|
-
this.conversationMemory &&
|
|
4566
|
+
(this.conversationMemory || dpgHasInlineMessages) &&
|
|
4395
4567
|
dpgMessageCount >
|
|
4396
4568
|
(this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
|
|
4397
4569
|
const compactor = new ContextCompactor({
|
|
@@ -4425,6 +4597,26 @@ Current user's request: ${currentInput}`;
|
|
|
4425
4597
|
availableTokens: postCompactBudget.availableInputTokens,
|
|
4426
4598
|
overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
|
|
4427
4599
|
});
|
|
4600
|
+
// Curator P1-2: emit `compaction.insufficient` whenever a
|
|
4601
|
+
// single round of compaction wasn't enough — even when
|
|
4602
|
+
// emergency truncation will save the day. Lets cost / audit
|
|
4603
|
+
// listeners track the "compaction was insufficient" signal
|
|
4604
|
+
// separately from the eventual outcome.
|
|
4605
|
+
try {
|
|
4606
|
+
this.emitter.emit("compaction.insufficient", {
|
|
4607
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
4608
|
+
finalTokens: postCompactBudget.estimatedInputTokens,
|
|
4609
|
+
budget: postCompactBudget.availableInputTokens,
|
|
4610
|
+
provider: providerName,
|
|
4611
|
+
model: options.model,
|
|
4612
|
+
phase: "mid-compaction",
|
|
4613
|
+
willEmergencyTruncate: true,
|
|
4614
|
+
timestamp: Date.now(),
|
|
4615
|
+
});
|
|
4616
|
+
}
|
|
4617
|
+
catch {
|
|
4618
|
+
/* listener errors are non-fatal */
|
|
4619
|
+
}
|
|
4428
4620
|
conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
|
|
4429
4621
|
const finalBudget = checkContextBudget({
|
|
4430
4622
|
provider: providerName,
|
|
@@ -4440,6 +4632,23 @@ Current user's request: ${currentInput}`;
|
|
|
4440
4632
|
if (!finalBudget.withinBudget) {
|
|
4441
4633
|
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
4442
4634
|
this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
|
|
4635
|
+
// Curator P1-2: emit `compaction.insufficient` so cost / audit
|
|
4636
|
+
// listeners can record the specific failure mode (separate
|
|
4637
|
+
// from a generic provider error).
|
|
4638
|
+
try {
|
|
4639
|
+
this.emitter.emit("compaction.insufficient", {
|
|
4640
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
4641
|
+
finalTokens: finalBudget.estimatedInputTokens,
|
|
4642
|
+
budget: finalBudget.availableInputTokens,
|
|
4643
|
+
provider: providerName,
|
|
4644
|
+
model: options.model,
|
|
4645
|
+
phase: "post-emergency-truncation",
|
|
4646
|
+
timestamp: Date.now(),
|
|
4647
|
+
});
|
|
4648
|
+
}
|
|
4649
|
+
catch {
|
|
4650
|
+
/* listener errors are non-fatal */
|
|
4651
|
+
}
|
|
4443
4652
|
throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
|
|
4444
4653
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
4445
4654
|
`Budget: ${finalBudget.availableInputTokens} tokens.`, {
|
|
@@ -4546,6 +4755,14 @@ Current user's request: ${currentInput}`;
|
|
|
4546
4755
|
lastError: lastError?.message,
|
|
4547
4756
|
responseTime,
|
|
4548
4757
|
});
|
|
4758
|
+
// Reviewer follow-up: preserve typed ContextBudgetExceededError after
|
|
4759
|
+
// the per-provider fallback loop. Each provider's hard cap is
|
|
4760
|
+
// per-window; we let the loop try them all, but if every provider
|
|
4761
|
+
// rejected on budget the caller still needs the typed error to
|
|
4762
|
+
// distinguish "context too large" from a generic provider failure.
|
|
4763
|
+
if (lastError instanceof ContextBudgetExceededError) {
|
|
4764
|
+
throw lastError;
|
|
4765
|
+
}
|
|
4549
4766
|
throw new Error(`Failed to generate text with all providers. Last error: ${lastError?.message || "Unknown error"}`);
|
|
4550
4767
|
}
|
|
4551
4768
|
/**
|
|
@@ -4984,8 +5201,23 @@ Current user's request: ${currentInput}`;
|
|
|
4984
5201
|
const streamStartTime = Date.now();
|
|
4985
5202
|
const sessionId = enhancedOptions.context
|
|
4986
5203
|
?.sessionId;
|
|
5204
|
+
// Curator P2-4 dedup (concurrency-safe): native provider stream paths
|
|
5205
|
+
// (Gemini 3 on Vertex / Google AI Studio) emit `generation:end`
|
|
5206
|
+
// themselves. We attach a per-stream mutable flag directly to
|
|
5207
|
+
// `enhancedOptions._streamDedupContext` — native providers receive
|
|
5208
|
+
// these options and flip the flag before their emit; this finally
|
|
5209
|
+
// block reads the same closed-over reference. Concurrent streams
|
|
5210
|
+
// have different option objects so the contexts don't interfere.
|
|
5211
|
+
const dedupContext = {
|
|
5212
|
+
providerEmitted: false,
|
|
5213
|
+
};
|
|
5214
|
+
enhancedOptions._streamDedupContext = dedupContext;
|
|
4987
5215
|
const processedStream = (async function* () {
|
|
4988
5216
|
let streamError;
|
|
5217
|
+
// Curator P2-4: hoist `resolvedUsage` so the finally block can emit a
|
|
5218
|
+
// single `generation:end` event with cost data. Cost listeners
|
|
5219
|
+
// subscribe here; previously the stream path never fired it.
|
|
5220
|
+
let resolvedUsage;
|
|
4989
5221
|
try {
|
|
4990
5222
|
for await (const chunk of mcpStream) {
|
|
4991
5223
|
chunkCount++;
|
|
@@ -5015,7 +5247,7 @@ Current user's request: ${currentInput}`;
|
|
|
5015
5247
|
accumulatedContent += content;
|
|
5016
5248
|
});
|
|
5017
5249
|
}
|
|
5018
|
-
|
|
5250
|
+
resolvedUsage = streamUsage;
|
|
5019
5251
|
if (!resolvedUsage && streamAnalytics) {
|
|
5020
5252
|
try {
|
|
5021
5253
|
const resolved = await Promise.resolve(streamAnalytics);
|
|
@@ -5090,6 +5322,61 @@ Current user's request: ${currentInput}`;
|
|
|
5090
5322
|
guardrailsBlocked: metadata.guardrailsBlocked,
|
|
5091
5323
|
error: metadata.error,
|
|
5092
5324
|
});
|
|
5325
|
+
// Curator P2-4: emit `generation:end` exactly once per stream so
|
|
5326
|
+
// cost listeners receive the same contract as for `generate()`.
|
|
5327
|
+
// The previous implementation only fired `stream:complete`, leaving
|
|
5328
|
+
// any subscriber to `generation:end` with zero events.
|
|
5329
|
+
//
|
|
5330
|
+
// Dedup: native provider stream paths (Gemini 3 on Vertex / Google
|
|
5331
|
+
// AI Studio) already emit `generation:end` themselves so Pipeline B
|
|
5332
|
+
// (Langfuse) records a GENERATION observation. Skip our emit when
|
|
5333
|
+
// they already fired — preserves their Pipeline B observation
|
|
5334
|
+
// source and keeps the "exactly once" contract. Per-stream flag
|
|
5335
|
+
// is concurrency-safe because it's scoped via AsyncLocalStorage.
|
|
5336
|
+
if (!dedupContext.providerEmitted) {
|
|
5337
|
+
try {
|
|
5338
|
+
const finalProvider = metadata.fallbackProvider ?? providerName ?? "unknown";
|
|
5339
|
+
const finalModel = metadata.fallbackModel ??
|
|
5340
|
+
streamModel ??
|
|
5341
|
+
enhancedOptions.model ??
|
|
5342
|
+
"unknown";
|
|
5343
|
+
const finalFinishReason = streamError
|
|
5344
|
+
? "error"
|
|
5345
|
+
: (streamState.finishReason ?? "stop");
|
|
5346
|
+
self.emitter.emit("generation:end", {
|
|
5347
|
+
provider: finalProvider,
|
|
5348
|
+
model: finalModel,
|
|
5349
|
+
responseTime: Date.now() - streamStartTime,
|
|
5350
|
+
toolsUsed: streamState.toolCalls?.map((t) => t.toolName),
|
|
5351
|
+
timestamp: Date.now(),
|
|
5352
|
+
result: {
|
|
5353
|
+
content: accumulatedContent,
|
|
5354
|
+
usage: resolvedUsage,
|
|
5355
|
+
model: finalModel,
|
|
5356
|
+
provider: finalProvider,
|
|
5357
|
+
finishReason: finalFinishReason,
|
|
5358
|
+
},
|
|
5359
|
+
prompt: enhancedOptions.input?.text ||
|
|
5360
|
+
enhancedOptions.prompt,
|
|
5361
|
+
temperature: enhancedOptions.temperature,
|
|
5362
|
+
maxTokens: enhancedOptions.maxTokens,
|
|
5363
|
+
success: !streamError,
|
|
5364
|
+
error: streamError
|
|
5365
|
+
? streamError instanceof Error
|
|
5366
|
+
? streamError.message
|
|
5367
|
+
: String(streamError)
|
|
5368
|
+
: undefined,
|
|
5369
|
+
pipelineAHandled: true,
|
|
5370
|
+
});
|
|
5371
|
+
}
|
|
5372
|
+
catch (emitError) {
|
|
5373
|
+
logger.debug("[NeuroLink.stream] generation:end listener threw — ignored", {
|
|
5374
|
+
error: emitError instanceof Error
|
|
5375
|
+
? emitError.message
|
|
5376
|
+
: String(emitError),
|
|
5377
|
+
});
|
|
5378
|
+
}
|
|
5379
|
+
}
|
|
5093
5380
|
self._disableToolCacheForCurrentRequest = false;
|
|
5094
5381
|
cleanupListeners();
|
|
5095
5382
|
streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
|
|
@@ -5641,6 +5928,42 @@ Current user's request: ${currentInput}`;
|
|
|
5641
5928
|
});
|
|
5642
5929
|
const streamMessageCount = conversationMessages?.length || 0;
|
|
5643
5930
|
const streamCompactionSessionId = this.getCompactionSessionId(options);
|
|
5931
|
+
// Reviewer follow-up: gate the hard cap on the *actual compactable
|
|
5932
|
+
// history* rather than `this.conversationMemory`. A configured-but-
|
|
5933
|
+
// empty memory store leaves nothing to compact yet still satisfies
|
|
5934
|
+
// `!this.conversationMemory === false`, so the previous check
|
|
5935
|
+
// skipped the hard cap and dispatched the oversized payload.
|
|
5936
|
+
const streamHasCompactableMessages = streamMessageCount > 0;
|
|
5937
|
+
// Curator P1-2: pre-dispatch hard cap mirrors directProviderGeneration.
|
|
5938
|
+
// When the budget check fails AND there's nothing to compact (no memory
|
|
5939
|
+
// + no inline messages — only prompt + tools), throw before dispatch
|
|
5940
|
+
// instead of wasting a roundtrip on a payload the provider will reject.
|
|
5941
|
+
if (!streamBudget.withinBudget && !streamHasCompactableMessages) {
|
|
5942
|
+
try {
|
|
5943
|
+
this.emitter.emit("compaction.insufficient", {
|
|
5944
|
+
stagesAttempted: ["pre-dispatch hard cap"],
|
|
5945
|
+
finalTokens: streamBudget.estimatedInputTokens,
|
|
5946
|
+
budget: streamBudget.availableInputTokens,
|
|
5947
|
+
provider: providerName,
|
|
5948
|
+
model: options.model,
|
|
5949
|
+
phase: "pre-dispatch-no-recovery",
|
|
5950
|
+
timestamp: Date.now(),
|
|
5951
|
+
});
|
|
5952
|
+
}
|
|
5953
|
+
catch {
|
|
5954
|
+
/* listener errors are non-fatal */
|
|
5955
|
+
}
|
|
5956
|
+
throw new ContextBudgetExceededError(`Stream context exceeds model budget and no compaction is possible ` +
|
|
5957
|
+
`(no conversationMemory, no inline conversationMessages — only ` +
|
|
5958
|
+
`prompt + tools). Estimated: ${streamBudget.estimatedInputTokens} ` +
|
|
5959
|
+
`tokens, budget: ${streamBudget.availableInputTokens} tokens. ` +
|
|
5960
|
+
`Reduce prompt or tool-definition size, or trim the request.`, {
|
|
5961
|
+
estimatedTokens: streamBudget.estimatedInputTokens,
|
|
5962
|
+
availableTokens: streamBudget.availableInputTokens,
|
|
5963
|
+
stagesUsed: [],
|
|
5964
|
+
breakdown: streamBudget.breakdown,
|
|
5965
|
+
});
|
|
5966
|
+
}
|
|
5644
5967
|
if (streamBudget.shouldCompact &&
|
|
5645
5968
|
(hasCallerConversationHistory || this.conversationMemory) &&
|
|
5646
5969
|
streamMessageCount >
|
|
@@ -5677,6 +6000,26 @@ Current user's request: ${currentInput}`;
|
|
|
5677
6000
|
availableTokens: postCompactBudget.availableInputTokens,
|
|
5678
6001
|
overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
|
|
5679
6002
|
});
|
|
6003
|
+
// Curator P1-2: emit `compaction.insufficient` whenever a single
|
|
6004
|
+
// round of compaction wasn't enough — even when emergency
|
|
6005
|
+
// truncation will save the day. Lets cost / audit listeners track
|
|
6006
|
+
// the "compaction was insufficient" signal separately from the
|
|
6007
|
+
// eventual outcome.
|
|
6008
|
+
try {
|
|
6009
|
+
this.emitter.emit("compaction.insufficient", {
|
|
6010
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
6011
|
+
finalTokens: postCompactBudget.estimatedInputTokens,
|
|
6012
|
+
budget: postCompactBudget.availableInputTokens,
|
|
6013
|
+
provider: providerName,
|
|
6014
|
+
model: options.model,
|
|
6015
|
+
phase: "mid-compaction",
|
|
6016
|
+
willEmergencyTruncate: true,
|
|
6017
|
+
timestamp: Date.now(),
|
|
6018
|
+
});
|
|
6019
|
+
}
|
|
6020
|
+
catch {
|
|
6021
|
+
/* listener errors are non-fatal */
|
|
6022
|
+
}
|
|
5680
6023
|
conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
|
|
5681
6024
|
// Keep options in sync after emergency truncation so fallback paths
|
|
5682
6025
|
// use the truncated history.
|
|
@@ -5693,6 +6036,23 @@ Current user's request: ${currentInput}`;
|
|
|
5693
6036
|
if (!finalBudget.withinBudget) {
|
|
5694
6037
|
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
5695
6038
|
this.lastCompactionMessageCount.delete(streamCompactionSessionId);
|
|
6039
|
+
// Curator P1-2: emit `compaction.insufficient` on the terminal
|
|
6040
|
+
// failure path so cost / audit listeners can record the specific
|
|
6041
|
+
// failure mode (compaction + emergency truncation both insufficient).
|
|
6042
|
+
try {
|
|
6043
|
+
this.emitter.emit("compaction.insufficient", {
|
|
6044
|
+
stagesAttempted: compactionResult.stagesUsed,
|
|
6045
|
+
finalTokens: finalBudget.estimatedInputTokens,
|
|
6046
|
+
budget: finalBudget.availableInputTokens,
|
|
6047
|
+
provider: providerName,
|
|
6048
|
+
model: options.model,
|
|
6049
|
+
phase: "post-emergency-truncation",
|
|
6050
|
+
timestamp: Date.now(),
|
|
6051
|
+
});
|
|
6052
|
+
}
|
|
6053
|
+
catch {
|
|
6054
|
+
/* listener errors are non-fatal */
|
|
6055
|
+
}
|
|
5696
6056
|
throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
|
|
5697
6057
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
5698
6058
|
`Budget: ${finalBudget.availableInputTokens} tokens.`, {
|
|
@@ -5780,6 +6140,15 @@ Current user's request: ${currentInput}`;
|
|
|
5780
6140
|
* Handle stream error with fallback
|
|
5781
6141
|
*/
|
|
5782
6142
|
async handleStreamError(error, options, startTime, streamId, enhancedOptions, _factoryResult) {
|
|
6143
|
+
// Curator P1-2: when the pre-dispatch hard cap or post-emergency
|
|
6144
|
+
// truncation budget check throws ContextBudgetExceededError, the
|
|
6145
|
+
// payload is too large for the model and a same-payload retry would
|
|
6146
|
+
// just fail again at the provider — wasting the same tokens that
|
|
6147
|
+
// the hard cap was meant to save. Rethrow so the caller sees the
|
|
6148
|
+
// typed error instead of a fallback ProviderError that hides it.
|
|
6149
|
+
if (error instanceof ContextBudgetExceededError) {
|
|
6150
|
+
throw error;
|
|
6151
|
+
}
|
|
5783
6152
|
logger.error("Stream generation failed, attempting fallback", {
|
|
5784
6153
|
error: error instanceof Error ? error.message : String(error),
|
|
5785
6154
|
});
|
|
@@ -4,6 +4,7 @@ import { ErrorCategory, ErrorSeverity, GoogleAIModels, } from "../constants/enum
|
|
|
4
4
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
5
5
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
6
6
|
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
7
|
+
import { markStreamProviderEmittedGenerationEnd, } from "../neurolink.js";
|
|
7
8
|
import { SpanStatusCode } from "@opentelemetry/api";
|
|
8
9
|
import { ATTR, tracers, withClientSpan } from "../telemetry/index.js";
|
|
9
10
|
import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
|
|
@@ -735,6 +736,9 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
735
736
|
// AI SDK so experimental_telemetry is never injected; we emit manually.
|
|
736
737
|
const nativeStreamEmitter = this.neurolink?.getEventEmitter();
|
|
737
738
|
if (nativeStreamEmitter) {
|
|
739
|
+
// Curator P2-4 dedup: flag the per-stream context attached
|
|
740
|
+
// to options so the orchestration skips its own emit.
|
|
741
|
+
markStreamProviderEmittedGenerationEnd(options);
|
|
738
742
|
nativeStreamEmitter.emit("generation:end", {
|
|
739
743
|
provider: this.providerName,
|
|
740
744
|
responseTime,
|
|
@@ -767,6 +771,9 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
767
771
|
// Emit failure generation:end so Pipeline B records the failed stream
|
|
768
772
|
const errorEmitter = this.neurolink?.getEventEmitter();
|
|
769
773
|
if (errorEmitter) {
|
|
774
|
+
// Curator P2-4 dedup: flag the per-stream context attached
|
|
775
|
+
// to options so the orchestration skips its own emit.
|
|
776
|
+
markStreamProviderEmittedGenerationEnd(options);
|
|
770
777
|
errorEmitter.emit("generation:end", {
|
|
771
778
|
provider: this.providerName,
|
|
772
779
|
responseTime: Date.now() - startTime,
|
|
@@ -10,6 +10,7 @@ import { ErrorCategory, ErrorSeverity, } from "../constants/enums.js";
|
|
|
10
10
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
11
11
|
import { DEFAULT_MAX_STEPS, GLOBAL_LOCATION_MODELS, } from "../core/constants.js";
|
|
12
12
|
import { ModelConfigurationManager } from "../core/modelConfiguration.js";
|
|
13
|
+
import { markStreamProviderEmittedGenerationEnd, } from "../neurolink.js";
|
|
13
14
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
14
15
|
import { ATTR, tracers, withClientSpan } from "../telemetry/index.js";
|
|
15
16
|
import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
|
|
@@ -1630,8 +1631,12 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1630
1631
|
// Emit generation:end so Pipeline B (Langfuse) creates a GENERATION
|
|
1631
1632
|
// observation. The native @google/genai stream path on Vertex bypasses the
|
|
1632
1633
|
// Vercel AI SDK so experimental_telemetry is never injected; we emit manually.
|
|
1634
|
+
// Curator P2-4 dedup: flag the per-stream context attached to options
|
|
1635
|
+
// so the orchestration in `runStandardStreamRequest` knows we already
|
|
1636
|
+
// emitted and skips its own emit (preserving exactly-once).
|
|
1633
1637
|
const vertexStreamEmitter = this.neurolink?.getEventEmitter();
|
|
1634
1638
|
if (vertexStreamEmitter) {
|
|
1639
|
+
markStreamProviderEmittedGenerationEnd(params.options);
|
|
1635
1640
|
vertexStreamEmitter.emit("generation:end", {
|
|
1636
1641
|
provider: this.providerName,
|
|
1637
1642
|
responseTime,
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/index.js
CHANGED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator P2-4 dedup (concurrency-safe): per-stream context that lets
|
|
3
|
+
* the orchestration's `runStandardStreamRequest` finally block know
|
|
4
|
+
* whether a *native provider* path within THIS stream's async chain
|
|
5
|
+
* already emitted `generation:end`. Native providers (Vertex / Google
|
|
6
|
+
* AI Studio for Gemini 3, etc.) emit on the shared SDK emitter; without
|
|
7
|
+
* scoping, a concurrent unrelated stream's emit on the same NeuroLink
|
|
8
|
+
* instance would suppress the wrong stream's orchestration emit.
|
|
9
|
+
*
|
|
10
|
+
* AsyncLocalStorage scopes each stream's flag to its own async chain.
|
|
11
|
+
*/
|
|
12
|
+
export type StreamGenerationEndContext = {
|
|
13
|
+
providerEmitted: boolean;
|
|
14
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|