@librechat/agents 3.1.85 → 3.1.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -0
- package/dist/cjs/agents/AgentContext.cjs +7 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/events.cjs +23 -0
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +133 -18
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/graphs/MultiAgentGraph.cjs +1 -1
- package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/index.cjs +251 -53
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/init.cjs +1 -5
- package/dist/cjs/llm/init.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +113 -24
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +3 -1
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +18 -5
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/openai/index.cjs +253 -0
- package/dist/cjs/openai/index.cjs.map +1 -0
- package/dist/cjs/responses/index.cjs +448 -0
- package/dist/cjs/responses/index.cjs.map +1 -0
- package/dist/cjs/run.cjs +108 -7
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/session/AgentSession.cjs +1057 -0
- package/dist/cjs/session/AgentSession.cjs.map +1 -0
- package/dist/cjs/session/JsonlSessionStore.cjs +425 -0
- package/dist/cjs/session/JsonlSessionStore.cjs.map +1 -0
- package/dist/cjs/session/handlers.cjs +221 -0
- package/dist/cjs/session/handlers.cjs.map +1 -0
- package/dist/cjs/session/ids.cjs +22 -0
- package/dist/cjs/session/ids.cjs.map +1 -0
- package/dist/cjs/session/messageSerialization.cjs +179 -0
- package/dist/cjs/session/messageSerialization.cjs.map +1 -0
- package/dist/cjs/stream.cjs +472 -11
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +1 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +177 -59
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/eagerEventExecution.cjs +113 -0
- package/dist/cjs/tools/eagerEventExecution.cjs.map +1 -0
- package/dist/cjs/tools/handlers.cjs +1 -1
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs +42 -0
- package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +7 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/events.mjs +23 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +133 -18
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/graphs/MultiAgentGraph.mjs +1 -1
- package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/index.mjs +251 -53
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/init.mjs +1 -5
- package/dist/esm/llm/init.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +113 -25
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +4 -2
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/main.mjs +5 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/openai/index.mjs +246 -0
- package/dist/esm/openai/index.mjs.map +1 -0
- package/dist/esm/responses/index.mjs +440 -0
- package/dist/esm/responses/index.mjs.map +1 -0
- package/dist/esm/run.mjs +108 -7
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/session/AgentSession.mjs +1054 -0
- package/dist/esm/session/AgentSession.mjs.map +1 -0
- package/dist/esm/session/JsonlSessionStore.mjs +422 -0
- package/dist/esm/session/JsonlSessionStore.mjs.map +1 -0
- package/dist/esm/session/handlers.mjs +219 -0
- package/dist/esm/session/handlers.mjs.map +1 -0
- package/dist/esm/session/ids.mjs +17 -0
- package/dist/esm/session/ids.mjs.map +1 -0
- package/dist/esm/session/messageSerialization.mjs +173 -0
- package/dist/esm/session/messageSerialization.mjs.map +1 -0
- package/dist/esm/stream.mjs +473 -12
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +1 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +177 -59
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/eagerEventExecution.mjs +107 -0
- package/dist/esm/tools/eagerEventExecution.mjs.map +1 -0
- package/dist/esm/tools/handlers.mjs +1 -1
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/streamedToolCallSeals.mjs +36 -0
- package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -0
- package/dist/types/events.d.ts +1 -0
- package/dist/types/graphs/Graph.d.ts +24 -9
- package/dist/types/index.d.ts +1 -0
- package/dist/types/llm/openai/index.d.ts +1 -0
- package/dist/types/openai/index.d.ts +75 -0
- package/dist/types/responses/index.d.ts +97 -0
- package/dist/types/run.d.ts +2 -0
- package/dist/types/session/AgentSession.d.ts +32 -0
- package/dist/types/session/JsonlSessionStore.d.ts +67 -0
- package/dist/types/session/handlers.d.ts +8 -0
- package/dist/types/session/ids.d.ts +4 -0
- package/dist/types/session/index.d.ts +5 -0
- package/dist/types/session/messageSerialization.d.ts +7 -0
- package/dist/types/session/types.d.ts +191 -0
- package/dist/types/tools/ToolNode.d.ts +12 -1
- package/dist/types/tools/eagerEventExecution.d.ts +23 -0
- package/dist/types/tools/streamedToolCallSeals.d.ts +13 -0
- package/dist/types/types/hitl.d.ts +4 -0
- package/dist/types/types/run.d.ts +11 -1
- package/dist/types/types/tools.d.ts +36 -0
- package/package.json +19 -2
- package/src/__tests__/stream.eagerEventExecution.test.ts +2458 -0
- package/src/agents/AgentContext.ts +7 -2
- package/src/agents/__tests__/AgentContext.test.ts +254 -5
- package/src/events.ts +29 -0
- package/src/graphs/Graph.ts +224 -50
- package/src/graphs/MultiAgentGraph.ts +1 -1
- package/src/graphs/__tests__/composition.smoke.test.ts +30 -0
- package/src/index.ts +3 -0
- package/src/llm/anthropic/index.ts +356 -84
- package/src/llm/anthropic/llm.spec.ts +64 -0
- package/src/llm/custom-chat-models.smoke.test.ts +175 -4
- package/src/llm/openai/contentBlocks.test.ts +35 -0
- package/src/llm/openai/deepseek.test.ts +201 -2
- package/src/llm/openai/index.ts +171 -26
- package/src/llm/openai/utils/index.ts +22 -0
- package/src/llm/openrouter/index.ts +4 -2
- package/src/openai/__tests__/openai.test.ts +337 -0
- package/src/openai/index.ts +404 -0
- package/src/responses/__tests__/responses.test.ts +652 -0
- package/src/responses/index.ts +677 -0
- package/src/run.ts +158 -8
- package/src/scripts/compare_pi_vs_ours.ts +592 -173
- package/src/scripts/session_live.ts +548 -0
- package/src/session/AgentSession.ts +1432 -0
- package/src/session/JsonlSessionStore.ts +572 -0
- package/src/session/__tests__/JsonlSessionStore.test.ts +1410 -0
- package/src/session/__tests__/handlers.test.ts +161 -0
- package/src/session/handlers.ts +272 -0
- package/src/session/ids.ts +17 -0
- package/src/session/index.ts +44 -0
- package/src/session/messageSerialization.ts +207 -0
- package/src/session/types.ts +275 -0
- package/src/specs/custom-event-await.test.ts +89 -0
- package/src/specs/summarization.test.ts +1 -1
- package/src/stream.ts +755 -48
- package/src/summarization/node.ts +1 -1
- package/src/tools/ToolNode.ts +299 -126
- package/src/tools/__tests__/ToolNode.eagerEventExecution.test.ts +373 -0
- package/src/tools/__tests__/handlers.test.ts +2 -1
- package/src/tools/__tests__/hitl.test.ts +206 -110
- package/src/tools/eagerEventExecution.ts +153 -0
- package/src/tools/handlers.ts +8 -4
- package/src/tools/streamedToolCallSeals.ts +57 -0
- package/src/types/hitl.ts +4 -0
- package/src/types/run.ts +11 -0
- package/src/types/tools.ts +36 -0
- package/dist/cjs/llm/text.cjs +0 -69
- package/dist/cjs/llm/text.cjs.map +0 -1
- package/dist/esm/llm/text.mjs +0 -67
- package/dist/esm/llm/text.mjs.map +0 -1
|
@@ -21,7 +21,14 @@ import type {
|
|
|
21
21
|
import { _makeMessageChunkFromAnthropicEvent } from './utils/message_outputs';
|
|
22
22
|
import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
|
|
23
23
|
import { handleToolChoice } from './utils/tools';
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
const DEFAULT_STREAM_DELAY = 25;
|
|
26
|
+
const MAX_STREAM_QUEUE_CHUNKS = 256;
|
|
27
|
+
const MAX_STREAM_QUEUE_TEXT_CHARS = 8192;
|
|
28
|
+
const STREAM_CHUNK_MIN_SIZE = 4;
|
|
29
|
+
const STREAM_BOUNDARIES = new Set([' ', '.', ',', '!', '?', ';', ':']);
|
|
30
|
+
|
|
31
|
+
type StreamTokenType = 'string' | 'input' | 'content';
|
|
25
32
|
|
|
26
33
|
const ANTHROPIC_TOOL_BETAS: Partial<Record<string, AnthropicBeta>> = {
|
|
27
34
|
tool_search_tool_regex_20251119: 'advanced-tool-use-2025-11-20',
|
|
@@ -236,9 +243,86 @@ function getSamplingParams({
|
|
|
236
243
|
};
|
|
237
244
|
}
|
|
238
245
|
|
|
246
|
+
function findStreamChunkBoundary(text: string, minSize: number): number {
|
|
247
|
+
if (minSize >= text.length) {
|
|
248
|
+
return text.length;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
for (let position = minSize; position < text.length; position++) {
|
|
252
|
+
if (STREAM_BOUNDARIES.has(text[position])) {
|
|
253
|
+
return position + 1;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return text.length;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function splitStreamToken(text: string): string[] {
|
|
261
|
+
const chunks: string[] = [];
|
|
262
|
+
let currentIndex = 0;
|
|
263
|
+
|
|
264
|
+
while (currentIndex < text.length) {
|
|
265
|
+
const remainingText = text.slice(currentIndex);
|
|
266
|
+
const chunkSize = findStreamChunkBoundary(
|
|
267
|
+
remainingText,
|
|
268
|
+
STREAM_CHUNK_MIN_SIZE
|
|
269
|
+
);
|
|
270
|
+
chunks.push(text.slice(currentIndex, currentIndex + chunkSize));
|
|
271
|
+
currentIndex += chunkSize;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return chunks;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function getCadencedStreamDelay({
|
|
278
|
+
targetDelay,
|
|
279
|
+
lastVisibleTextAt,
|
|
280
|
+
now,
|
|
281
|
+
}: {
|
|
282
|
+
targetDelay: number;
|
|
283
|
+
lastVisibleTextAt?: number;
|
|
284
|
+
now: number;
|
|
285
|
+
}): number {
|
|
286
|
+
if (targetDelay <= 0 || lastVisibleTextAt == null) {
|
|
287
|
+
return 0;
|
|
288
|
+
}
|
|
289
|
+
return Math.max(0, targetDelay - (now - lastVisibleTextAt));
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
async function waitForStreamDelay(
|
|
293
|
+
delay: number,
|
|
294
|
+
signal?: AbortSignal
|
|
295
|
+
): Promise<void> {
|
|
296
|
+
if (delay <= 0 || isSignalAborted(signal)) {
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
await new Promise<void>((resolve) => {
|
|
300
|
+
const timeoutRef: { current?: ReturnType<typeof setTimeout> } = {};
|
|
301
|
+
const onAbort = (): void => {
|
|
302
|
+
if (timeoutRef.current) {
|
|
303
|
+
clearTimeout(timeoutRef.current);
|
|
304
|
+
}
|
|
305
|
+
signal?.removeEventListener('abort', onAbort);
|
|
306
|
+
resolve();
|
|
307
|
+
};
|
|
308
|
+
timeoutRef.current = setTimeout(() => {
|
|
309
|
+
signal?.removeEventListener('abort', onAbort);
|
|
310
|
+
resolve();
|
|
311
|
+
}, delay);
|
|
312
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
313
|
+
if (isSignalAborted(signal)) {
|
|
314
|
+
onAbort();
|
|
315
|
+
}
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function isSignalAborted(signal?: AbortSignal): boolean {
|
|
320
|
+
return signal?.aborted === true;
|
|
321
|
+
}
|
|
322
|
+
|
|
239
323
|
function extractToken(
|
|
240
324
|
chunk: AIMessageChunk
|
|
241
|
-
): [string,
|
|
325
|
+
): [string, StreamTokenType] | [undefined] {
|
|
242
326
|
if (typeof chunk.content === 'string') {
|
|
243
327
|
return [chunk.content, 'string'];
|
|
244
328
|
} else if (
|
|
@@ -269,7 +353,7 @@ function extractToken(
|
|
|
269
353
|
|
|
270
354
|
function cloneChunk(
|
|
271
355
|
text: string,
|
|
272
|
-
tokenType:
|
|
356
|
+
tokenType: StreamTokenType,
|
|
273
357
|
chunk: AIMessageChunk
|
|
274
358
|
): AIMessageChunk {
|
|
275
359
|
if (tokenType === 'string') {
|
|
@@ -278,20 +362,19 @@ function cloneChunk(
|
|
|
278
362
|
return chunk;
|
|
279
363
|
}
|
|
280
364
|
const content = chunk.content[0] as MessageContentComplex;
|
|
281
|
-
if (
|
|
365
|
+
if (content.type === 'text') {
|
|
282
366
|
return new AIMessageChunk(
|
|
283
367
|
Object.assign({}, chunk, {
|
|
284
368
|
content: [Object.assign({}, content, { text })],
|
|
285
369
|
})
|
|
286
370
|
);
|
|
287
|
-
} else if (
|
|
371
|
+
} else if (content.type === 'text_delta') {
|
|
288
372
|
return new AIMessageChunk(
|
|
289
373
|
Object.assign({}, chunk, {
|
|
290
374
|
content: [Object.assign({}, content, { text })],
|
|
291
375
|
})
|
|
292
376
|
);
|
|
293
377
|
} else if (
|
|
294
|
-
tokenType === 'content' &&
|
|
295
378
|
typeof content.type === 'string' &&
|
|
296
379
|
content.type.startsWith('thinking')
|
|
297
380
|
) {
|
|
@@ -354,6 +437,13 @@ type CustomAnthropicInvocationParams = {
|
|
|
354
437
|
output_config?: AnthropicOutputConfig;
|
|
355
438
|
};
|
|
356
439
|
|
|
440
|
+
type QueuedGenerationChunk = {
|
|
441
|
+
chunk: ChatGenerationChunk;
|
|
442
|
+
token: string;
|
|
443
|
+
smooth: boolean;
|
|
444
|
+
textLength: number;
|
|
445
|
+
};
|
|
446
|
+
|
|
357
447
|
export class CustomAnthropic extends ChatAnthropicMessages {
|
|
358
448
|
_lc_stream_delay: number;
|
|
359
449
|
private tools_in_params?: boolean;
|
|
@@ -365,7 +455,10 @@ export class CustomAnthropic extends ChatAnthropicMessages {
|
|
|
365
455
|
super(fields);
|
|
366
456
|
this.resetTokenEvents();
|
|
367
457
|
this.setDirectFields(fields);
|
|
368
|
-
this._lc_stream_delay =
|
|
458
|
+
this._lc_stream_delay = Math.max(
|
|
459
|
+
0,
|
|
460
|
+
fields?._lc_stream_delay ?? DEFAULT_STREAM_DELAY
|
|
461
|
+
);
|
|
369
462
|
this.outputConfig = fields?.outputConfig;
|
|
370
463
|
this.inferenceGeo = fields?.inferenceGeo;
|
|
371
464
|
this.contextManagement = fields?.contextManagement;
|
|
@@ -524,102 +617,281 @@ export class CustomAnthropic extends ChatAnthropicMessages {
|
|
|
524
617
|
|
|
525
618
|
const shouldStreamUsage = options.streamUsage ?? this.streamUsage;
|
|
526
619
|
let messageDeltaOutputTokens = 0;
|
|
620
|
+
const queuedChunks: QueuedGenerationChunk[] = [];
|
|
621
|
+
const producerState: {
|
|
622
|
+
done: boolean;
|
|
623
|
+
error?: unknown;
|
|
624
|
+
} = { done: false };
|
|
625
|
+
let queuedChunkIndex = 0;
|
|
626
|
+
let bufferedTextLength = 0;
|
|
627
|
+
let consumerClosed = false;
|
|
628
|
+
let notifyConsumer: (() => void) | undefined;
|
|
629
|
+
let notifyProducer: (() => void) | undefined;
|
|
630
|
+
|
|
631
|
+
const notifyConsumerForChunk = (): void => {
|
|
632
|
+
notifyConsumer?.();
|
|
633
|
+
notifyConsumer = undefined;
|
|
634
|
+
};
|
|
527
635
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
}
|
|
636
|
+
const notifyProducerForSpace = (): void => {
|
|
637
|
+
notifyProducer?.();
|
|
638
|
+
notifyProducer = undefined;
|
|
639
|
+
};
|
|
533
640
|
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
{
|
|
537
|
-
streamUsage: shouldStreamUsage,
|
|
538
|
-
coerceContentToString,
|
|
539
|
-
}
|
|
540
|
-
);
|
|
541
|
-
if (!result) continue;
|
|
641
|
+
const hasQueuedChunks = (): boolean =>
|
|
642
|
+
queuedChunkIndex < queuedChunks.length;
|
|
542
643
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
644
|
+
const getQueuedChunkCount = (): number =>
|
|
645
|
+
queuedChunks.length - queuedChunkIndex;
|
|
646
|
+
|
|
647
|
+
const isQueueAtCapacity = (): boolean =>
|
|
648
|
+
getQueuedChunkCount() >= MAX_STREAM_QUEUE_CHUNKS ||
|
|
649
|
+
bufferedTextLength >= MAX_STREAM_QUEUE_TEXT_CHARS;
|
|
650
|
+
|
|
651
|
+
const waitForNextChunk = async (): Promise<void> => {
|
|
652
|
+
if (
|
|
653
|
+
hasQueuedChunks() ||
|
|
654
|
+
producerState.done ||
|
|
655
|
+
producerState.error != null
|
|
656
|
+
) {
|
|
657
|
+
return;
|
|
658
|
+
}
|
|
659
|
+
await new Promise<void>((resolve) => {
|
|
660
|
+
notifyConsumer = resolve;
|
|
661
|
+
});
|
|
662
|
+
};
|
|
663
|
+
|
|
664
|
+
const waitForQueueSpace = async (): Promise<void> => {
|
|
665
|
+
while (
|
|
666
|
+
isQueueAtCapacity() &&
|
|
667
|
+
!consumerClosed &&
|
|
668
|
+
!isSignalAborted(options.signal)
|
|
669
|
+
) {
|
|
670
|
+
await new Promise<void>((resolve) => {
|
|
671
|
+
const signal = options.signal;
|
|
672
|
+
const onAbort = (): void => {
|
|
673
|
+
signal?.removeEventListener('abort', onAbort);
|
|
674
|
+
resolve();
|
|
675
|
+
};
|
|
676
|
+
const onSpace = (): void => {
|
|
677
|
+
signal?.removeEventListener('abort', onAbort);
|
|
678
|
+
resolve();
|
|
679
|
+
};
|
|
680
|
+
notifyProducer = onSpace;
|
|
681
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
682
|
+
if (isSignalAborted(signal)) {
|
|
683
|
+
onAbort();
|
|
684
|
+
}
|
|
685
|
+
});
|
|
551
686
|
}
|
|
552
|
-
|
|
687
|
+
};
|
|
553
688
|
|
|
689
|
+
const dequeue = (): QueuedGenerationChunk | undefined => {
|
|
690
|
+
if (!hasQueuedChunks()) {
|
|
691
|
+
return undefined;
|
|
692
|
+
}
|
|
693
|
+
const queuedChunk = queuedChunks[queuedChunkIndex];
|
|
694
|
+
queuedChunkIndex++;
|
|
554
695
|
if (
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
(token === '' && (chunk.usage_metadata != null || chunk.id != null))
|
|
696
|
+
queuedChunkIndex > 128 &&
|
|
697
|
+
queuedChunkIndex * 2 >= queuedChunks.length
|
|
558
698
|
) {
|
|
559
|
-
|
|
699
|
+
queuedChunks.splice(0, queuedChunkIndex);
|
|
700
|
+
queuedChunkIndex = 0;
|
|
701
|
+
}
|
|
702
|
+
return queuedChunk;
|
|
703
|
+
};
|
|
704
|
+
|
|
705
|
+
const enqueue = async (
|
|
706
|
+
queuedChunk: QueuedGenerationChunk
|
|
707
|
+
): Promise<void> => {
|
|
708
|
+
await waitForQueueSpace();
|
|
709
|
+
if (consumerClosed || isSignalAborted(options.signal)) {
|
|
710
|
+
stream.controller.abort();
|
|
711
|
+
throw new Error('AbortError: User aborted the request.');
|
|
712
|
+
}
|
|
713
|
+
queuedChunks.push(queuedChunk);
|
|
714
|
+
if (queuedChunk.smooth) {
|
|
715
|
+
bufferedTextLength += queuedChunk.textLength;
|
|
716
|
+
}
|
|
717
|
+
notifyConsumerForChunk();
|
|
718
|
+
};
|
|
719
|
+
|
|
720
|
+
const enqueueChunk = async ({
|
|
721
|
+
token,
|
|
722
|
+
chunk,
|
|
723
|
+
smooth,
|
|
724
|
+
}: {
|
|
725
|
+
token: string;
|
|
726
|
+
chunk: AIMessageChunk;
|
|
727
|
+
smooth: boolean;
|
|
728
|
+
}): Promise<void> => {
|
|
729
|
+
await enqueue({
|
|
730
|
+
token,
|
|
731
|
+
smooth,
|
|
732
|
+
textLength: smooth ? token.length : 0,
|
|
733
|
+
chunk: this.createGenerationChunk({
|
|
560
734
|
token,
|
|
561
735
|
chunk,
|
|
562
736
|
shouldStreamUsage,
|
|
563
|
-
})
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
737
|
+
}),
|
|
738
|
+
});
|
|
739
|
+
};
|
|
740
|
+
|
|
741
|
+
const enqueueTextChunks = (
|
|
742
|
+
token: string,
|
|
743
|
+
tokenType: StreamTokenType,
|
|
744
|
+
chunk: AIMessageChunk
|
|
745
|
+
): Promise<void> => {
|
|
746
|
+
if (token === '') {
|
|
747
|
+
return Promise.resolve();
|
|
748
|
+
}
|
|
749
|
+
if (this._lc_stream_delay <= 0) {
|
|
750
|
+
return enqueueChunk({ token, chunk, smooth: false });
|
|
574
751
|
}
|
|
575
752
|
|
|
576
|
-
const
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
753
|
+
const tokenChunks = splitStreamToken(token);
|
|
754
|
+
if (tokenChunks.length <= 1) {
|
|
755
|
+
return enqueueChunk({ token, chunk, smooth: true });
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
let emittedUsage = false;
|
|
759
|
+
return tokenChunks.reduce(async (previous, currentToken) => {
|
|
760
|
+
await previous;
|
|
761
|
+
const newChunk = cloneChunk(currentToken, tokenType, chunk);
|
|
762
|
+
const chunkForToken =
|
|
763
|
+
emittedUsage && newChunk.usage_metadata != null
|
|
764
|
+
? new AIMessageChunk(
|
|
765
|
+
Object.assign({}, newChunk, { usage_metadata: undefined })
|
|
766
|
+
)
|
|
767
|
+
: newChunk;
|
|
768
|
+
|
|
769
|
+
await enqueueChunk({
|
|
770
|
+
token: currentToken,
|
|
771
|
+
chunk: chunkForToken,
|
|
772
|
+
smooth: true,
|
|
773
|
+
});
|
|
774
|
+
|
|
775
|
+
if (newChunk.usage_metadata != null && !emittedUsage) {
|
|
776
|
+
emittedUsage = true;
|
|
777
|
+
}
|
|
778
|
+
}, Promise.resolve());
|
|
779
|
+
};
|
|
582
780
|
|
|
583
|
-
|
|
781
|
+
const producer = (async (): Promise<void> => {
|
|
584
782
|
try {
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
783
|
+
for await (const data of stream) {
|
|
784
|
+
if (isSignalAborted(options.signal)) {
|
|
785
|
+
stream.controller.abort();
|
|
786
|
+
throw new Error('AbortError: User aborted the request.');
|
|
589
787
|
}
|
|
590
|
-
|
|
591
|
-
const
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
const generationChunk = this.createGenerationChunk({
|
|
599
|
-
token: currentToken,
|
|
600
|
-
chunk: chunkForToken,
|
|
601
|
-
shouldStreamUsage,
|
|
602
|
-
});
|
|
603
|
-
|
|
604
|
-
if (newChunk.usage_metadata != null && !emittedUsage) {
|
|
605
|
-
emittedUsage = true;
|
|
606
|
-
}
|
|
607
|
-
yield generationChunk;
|
|
608
|
-
|
|
609
|
-
await runManager?.handleLLMNewToken(
|
|
610
|
-
currentToken,
|
|
611
|
-
undefined,
|
|
612
|
-
undefined,
|
|
613
|
-
undefined,
|
|
614
|
-
undefined,
|
|
615
|
-
{ chunk: generationChunk }
|
|
788
|
+
|
|
789
|
+
const result = _makeMessageChunkFromAnthropicEvent(
|
|
790
|
+
data as Anthropic.Beta.Messages.BetaRawMessageStreamEvent,
|
|
791
|
+
{
|
|
792
|
+
streamUsage: shouldStreamUsage,
|
|
793
|
+
coerceContentToString,
|
|
794
|
+
}
|
|
616
795
|
);
|
|
796
|
+
if (!result) {
|
|
797
|
+
continue;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
let { chunk } = result;
|
|
801
|
+
if (data.type === 'message_delta') {
|
|
802
|
+
const incremental = withIncrementalMessageDeltaUsage(
|
|
803
|
+
chunk,
|
|
804
|
+
messageDeltaOutputTokens
|
|
805
|
+
);
|
|
806
|
+
chunk = incremental.chunk;
|
|
807
|
+
messageDeltaOutputTokens = incremental.outputTokens;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
const [token = '', tokenType] = extractToken(chunk);
|
|
811
|
+
if (
|
|
812
|
+
!tokenType ||
|
|
813
|
+
tokenType === 'input' ||
|
|
814
|
+
(token === '' && (chunk.usage_metadata != null || chunk.id != null))
|
|
815
|
+
) {
|
|
816
|
+
await enqueueChunk({ token, chunk, smooth: false });
|
|
817
|
+
continue;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
await enqueueTextChunks(token, tokenType, chunk);
|
|
617
821
|
}
|
|
822
|
+
} catch (error) {
|
|
823
|
+
producerState.error = error;
|
|
618
824
|
} finally {
|
|
619
|
-
|
|
825
|
+
producerState.done = true;
|
|
826
|
+
notifyConsumerForChunk();
|
|
620
827
|
}
|
|
621
|
-
}
|
|
828
|
+
})();
|
|
622
829
|
|
|
623
|
-
|
|
830
|
+
let hasEmittedText = false;
|
|
831
|
+
let lastVisibleTextAt: number | undefined;
|
|
832
|
+
let keepStreaming = true;
|
|
833
|
+
try {
|
|
834
|
+
while (keepStreaming) {
|
|
835
|
+
if (isSignalAborted(options.signal)) {
|
|
836
|
+
stream.controller.abort();
|
|
837
|
+
throw new Error('AbortError: User aborted the request.');
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
await waitForNextChunk();
|
|
841
|
+
const queuedChunk = dequeue();
|
|
842
|
+
|
|
843
|
+
if (!queuedChunk) {
|
|
844
|
+
if (producerState.error != null) {
|
|
845
|
+
throw producerState.error;
|
|
846
|
+
}
|
|
847
|
+
if (producerState.done) {
|
|
848
|
+
keepStreaming = false;
|
|
849
|
+
}
|
|
850
|
+
continue;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
if (queuedChunk.smooth) {
|
|
854
|
+
bufferedTextLength = Math.max(
|
|
855
|
+
0,
|
|
856
|
+
bufferedTextLength - queuedChunk.textLength
|
|
857
|
+
);
|
|
858
|
+
notifyProducerForSpace();
|
|
859
|
+
await waitForStreamDelay(
|
|
860
|
+
getCadencedStreamDelay({
|
|
861
|
+
targetDelay: hasEmittedText ? this._lc_stream_delay : 0,
|
|
862
|
+
lastVisibleTextAt,
|
|
863
|
+
now: Date.now(),
|
|
864
|
+
}),
|
|
865
|
+
options.signal
|
|
866
|
+
);
|
|
867
|
+
if (isSignalAborted(options.signal)) {
|
|
868
|
+
stream.controller.abort();
|
|
869
|
+
throw new Error('AbortError: User aborted the request.');
|
|
870
|
+
}
|
|
871
|
+
hasEmittedText = true;
|
|
872
|
+
lastVisibleTextAt = Date.now();
|
|
873
|
+
} else {
|
|
874
|
+
notifyProducerForSpace();
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
yield queuedChunk.chunk;
|
|
878
|
+
await runManager?.handleLLMNewToken(
|
|
879
|
+
queuedChunk.token,
|
|
880
|
+
undefined,
|
|
881
|
+
undefined,
|
|
882
|
+
undefined,
|
|
883
|
+
undefined,
|
|
884
|
+
{ chunk: queuedChunk.chunk }
|
|
885
|
+
);
|
|
886
|
+
}
|
|
887
|
+
} finally {
|
|
888
|
+
consumerClosed = true;
|
|
889
|
+
if (!producerState.done) {
|
|
890
|
+
stream.controller.abort();
|
|
891
|
+
notifyProducerForSpace();
|
|
892
|
+
}
|
|
893
|
+
await producer;
|
|
894
|
+
this.resetTokenEvents();
|
|
895
|
+
}
|
|
624
896
|
}
|
|
625
897
|
}
|
|
@@ -1143,6 +1143,70 @@ test('Anthropic stream usage handles multiple cumulative message_delta events',
|
|
|
1143
1143
|
});
|
|
1144
1144
|
});
|
|
1145
1145
|
|
|
1146
|
+
test('Anthropic stream smoothing skips empty text block starts', async () => {
|
|
1147
|
+
const events: AnthropicStreamEvent[] = [
|
|
1148
|
+
{
|
|
1149
|
+
type: 'content_block_start',
|
|
1150
|
+
index: 0,
|
|
1151
|
+
content_block: {
|
|
1152
|
+
type: 'text',
|
|
1153
|
+
text: '',
|
|
1154
|
+
citations: null,
|
|
1155
|
+
},
|
|
1156
|
+
},
|
|
1157
|
+
{
|
|
1158
|
+
type: 'content_block_delta',
|
|
1159
|
+
index: 0,
|
|
1160
|
+
delta: {
|
|
1161
|
+
type: 'text_delta',
|
|
1162
|
+
text: 'hello',
|
|
1163
|
+
},
|
|
1164
|
+
},
|
|
1165
|
+
{ type: 'message_stop' },
|
|
1166
|
+
];
|
|
1167
|
+
const model = new MockStreamingAnthropic(events);
|
|
1168
|
+
const contents: string[] = [];
|
|
1169
|
+
|
|
1170
|
+
for await (const chunk of await model.stream('hello')) {
|
|
1171
|
+
if (typeof chunk.content === 'string') {
|
|
1172
|
+
contents.push(chunk.content);
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
expect(contents).toEqual(['hello']);
|
|
1177
|
+
});
|
|
1178
|
+
|
|
1179
|
+
test('Anthropic stream smoothing closes a queue-full producer after early break', async () => {
|
|
1180
|
+
const events: AnthropicStreamEvent[] = [
|
|
1181
|
+
{
|
|
1182
|
+
type: 'content_block_delta',
|
|
1183
|
+
index: 0,
|
|
1184
|
+
delta: {
|
|
1185
|
+
type: 'text_delta',
|
|
1186
|
+
text: 'word '.repeat(3000),
|
|
1187
|
+
},
|
|
1188
|
+
},
|
|
1189
|
+
{ type: 'message_stop' },
|
|
1190
|
+
];
|
|
1191
|
+
const model = new MockStreamingAnthropic(events);
|
|
1192
|
+
const readOneChunk = async (): Promise<void> => {
|
|
1193
|
+
for await (const chunk of await model.stream('hello')) {
|
|
1194
|
+
if (typeof chunk.content === 'string' && chunk.content.length > 0) {
|
|
1195
|
+
break;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
};
|
|
1199
|
+
const timeout = new Promise<never>((_, reject) => {
|
|
1200
|
+
setTimeout(() => {
|
|
1201
|
+
reject(new Error('stream close timed out'));
|
|
1202
|
+
}, 1000);
|
|
1203
|
+
});
|
|
1204
|
+
|
|
1205
|
+
await expect(
|
|
1206
|
+
Promise.race([readOneChunk(), timeout])
|
|
1207
|
+
).resolves.toBeUndefined();
|
|
1208
|
+
});
|
|
1209
|
+
|
|
1146
1210
|
test('Anthropic live stream usage matches raw cumulative output snapshots', async () => {
|
|
1147
1211
|
const model = new RecordingStreamingAnthropic({
|
|
1148
1212
|
modelName,
|