@librechat/agents 3.1.85 → 3.1.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/README.md +69 -0
  2. package/dist/cjs/agents/AgentContext.cjs +7 -2
  3. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  4. package/dist/cjs/events.cjs +23 -0
  5. package/dist/cjs/events.cjs.map +1 -1
  6. package/dist/cjs/graphs/Graph.cjs +133 -18
  7. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  8. package/dist/cjs/graphs/MultiAgentGraph.cjs +1 -1
  9. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  10. package/dist/cjs/llm/anthropic/index.cjs +251 -53
  11. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  12. package/dist/cjs/llm/init.cjs +1 -5
  13. package/dist/cjs/llm/init.cjs.map +1 -1
  14. package/dist/cjs/llm/openai/index.cjs +113 -24
  15. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  16. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  17. package/dist/cjs/llm/openrouter/index.cjs +3 -1
  18. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  19. package/dist/cjs/main.cjs +18 -5
  20. package/dist/cjs/main.cjs.map +1 -1
  21. package/dist/cjs/openai/index.cjs +253 -0
  22. package/dist/cjs/openai/index.cjs.map +1 -0
  23. package/dist/cjs/responses/index.cjs +448 -0
  24. package/dist/cjs/responses/index.cjs.map +1 -0
  25. package/dist/cjs/run.cjs +108 -7
  26. package/dist/cjs/run.cjs.map +1 -1
  27. package/dist/cjs/session/AgentSession.cjs +1057 -0
  28. package/dist/cjs/session/AgentSession.cjs.map +1 -0
  29. package/dist/cjs/session/JsonlSessionStore.cjs +425 -0
  30. package/dist/cjs/session/JsonlSessionStore.cjs.map +1 -0
  31. package/dist/cjs/session/handlers.cjs +221 -0
  32. package/dist/cjs/session/handlers.cjs.map +1 -0
  33. package/dist/cjs/session/ids.cjs +22 -0
  34. package/dist/cjs/session/ids.cjs.map +1 -0
  35. package/dist/cjs/session/messageSerialization.cjs +179 -0
  36. package/dist/cjs/session/messageSerialization.cjs.map +1 -0
  37. package/dist/cjs/stream.cjs +472 -11
  38. package/dist/cjs/stream.cjs.map +1 -1
  39. package/dist/cjs/summarization/node.cjs +1 -1
  40. package/dist/cjs/summarization/node.cjs.map +1 -1
  41. package/dist/cjs/tools/ToolNode.cjs +177 -59
  42. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  43. package/dist/cjs/tools/eagerEventExecution.cjs +113 -0
  44. package/dist/cjs/tools/eagerEventExecution.cjs.map +1 -0
  45. package/dist/cjs/tools/handlers.cjs +1 -1
  46. package/dist/cjs/tools/handlers.cjs.map +1 -1
  47. package/dist/cjs/tools/streamedToolCallSeals.cjs +42 -0
  48. package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -0
  49. package/dist/esm/agents/AgentContext.mjs +7 -2
  50. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  51. package/dist/esm/events.mjs +23 -1
  52. package/dist/esm/events.mjs.map +1 -1
  53. package/dist/esm/graphs/Graph.mjs +133 -18
  54. package/dist/esm/graphs/Graph.mjs.map +1 -1
  55. package/dist/esm/graphs/MultiAgentGraph.mjs +1 -1
  56. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  57. package/dist/esm/llm/anthropic/index.mjs +251 -53
  58. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  59. package/dist/esm/llm/init.mjs +1 -5
  60. package/dist/esm/llm/init.mjs.map +1 -1
  61. package/dist/esm/llm/openai/index.mjs +113 -25
  62. package/dist/esm/llm/openai/index.mjs.map +1 -1
  63. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  64. package/dist/esm/llm/openrouter/index.mjs +4 -2
  65. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  66. package/dist/esm/main.mjs +5 -1
  67. package/dist/esm/main.mjs.map +1 -1
  68. package/dist/esm/openai/index.mjs +246 -0
  69. package/dist/esm/openai/index.mjs.map +1 -0
  70. package/dist/esm/responses/index.mjs +440 -0
  71. package/dist/esm/responses/index.mjs.map +1 -0
  72. package/dist/esm/run.mjs +108 -7
  73. package/dist/esm/run.mjs.map +1 -1
  74. package/dist/esm/session/AgentSession.mjs +1054 -0
  75. package/dist/esm/session/AgentSession.mjs.map +1 -0
  76. package/dist/esm/session/JsonlSessionStore.mjs +422 -0
  77. package/dist/esm/session/JsonlSessionStore.mjs.map +1 -0
  78. package/dist/esm/session/handlers.mjs +219 -0
  79. package/dist/esm/session/handlers.mjs.map +1 -0
  80. package/dist/esm/session/ids.mjs +17 -0
  81. package/dist/esm/session/ids.mjs.map +1 -0
  82. package/dist/esm/session/messageSerialization.mjs +173 -0
  83. package/dist/esm/session/messageSerialization.mjs.map +1 -0
  84. package/dist/esm/stream.mjs +473 -12
  85. package/dist/esm/stream.mjs.map +1 -1
  86. package/dist/esm/summarization/node.mjs +1 -1
  87. package/dist/esm/summarization/node.mjs.map +1 -1
  88. package/dist/esm/tools/ToolNode.mjs +177 -59
  89. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  90. package/dist/esm/tools/eagerEventExecution.mjs +107 -0
  91. package/dist/esm/tools/eagerEventExecution.mjs.map +1 -0
  92. package/dist/esm/tools/handlers.mjs +1 -1
  93. package/dist/esm/tools/handlers.mjs.map +1 -1
  94. package/dist/esm/tools/streamedToolCallSeals.mjs +36 -0
  95. package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -0
  96. package/dist/types/events.d.ts +1 -0
  97. package/dist/types/graphs/Graph.d.ts +24 -9
  98. package/dist/types/index.d.ts +1 -0
  99. package/dist/types/llm/openai/index.d.ts +1 -0
  100. package/dist/types/openai/index.d.ts +75 -0
  101. package/dist/types/responses/index.d.ts +97 -0
  102. package/dist/types/run.d.ts +2 -0
  103. package/dist/types/session/AgentSession.d.ts +32 -0
  104. package/dist/types/session/JsonlSessionStore.d.ts +67 -0
  105. package/dist/types/session/handlers.d.ts +8 -0
  106. package/dist/types/session/ids.d.ts +4 -0
  107. package/dist/types/session/index.d.ts +5 -0
  108. package/dist/types/session/messageSerialization.d.ts +7 -0
  109. package/dist/types/session/types.d.ts +191 -0
  110. package/dist/types/tools/ToolNode.d.ts +12 -1
  111. package/dist/types/tools/eagerEventExecution.d.ts +23 -0
  112. package/dist/types/tools/streamedToolCallSeals.d.ts +13 -0
  113. package/dist/types/types/hitl.d.ts +4 -0
  114. package/dist/types/types/run.d.ts +11 -1
  115. package/dist/types/types/tools.d.ts +36 -0
  116. package/package.json +19 -2
  117. package/src/__tests__/stream.eagerEventExecution.test.ts +2458 -0
  118. package/src/agents/AgentContext.ts +7 -2
  119. package/src/agents/__tests__/AgentContext.test.ts +254 -5
  120. package/src/events.ts +29 -0
  121. package/src/graphs/Graph.ts +224 -50
  122. package/src/graphs/MultiAgentGraph.ts +1 -1
  123. package/src/graphs/__tests__/composition.smoke.test.ts +30 -0
  124. package/src/index.ts +3 -0
  125. package/src/llm/anthropic/index.ts +356 -84
  126. package/src/llm/anthropic/llm.spec.ts +64 -0
  127. package/src/llm/custom-chat-models.smoke.test.ts +175 -4
  128. package/src/llm/openai/contentBlocks.test.ts +35 -0
  129. package/src/llm/openai/deepseek.test.ts +201 -2
  130. package/src/llm/openai/index.ts +171 -26
  131. package/src/llm/openai/utils/index.ts +22 -0
  132. package/src/llm/openrouter/index.ts +4 -2
  133. package/src/openai/__tests__/openai.test.ts +337 -0
  134. package/src/openai/index.ts +404 -0
  135. package/src/responses/__tests__/responses.test.ts +652 -0
  136. package/src/responses/index.ts +677 -0
  137. package/src/run.ts +158 -8
  138. package/src/scripts/compare_pi_vs_ours.ts +592 -173
  139. package/src/scripts/session_live.ts +548 -0
  140. package/src/session/AgentSession.ts +1432 -0
  141. package/src/session/JsonlSessionStore.ts +572 -0
  142. package/src/session/__tests__/JsonlSessionStore.test.ts +1410 -0
  143. package/src/session/__tests__/handlers.test.ts +161 -0
  144. package/src/session/handlers.ts +272 -0
  145. package/src/session/ids.ts +17 -0
  146. package/src/session/index.ts +44 -0
  147. package/src/session/messageSerialization.ts +207 -0
  148. package/src/session/types.ts +275 -0
  149. package/src/specs/custom-event-await.test.ts +89 -0
  150. package/src/specs/summarization.test.ts +1 -1
  151. package/src/stream.ts +755 -48
  152. package/src/summarization/node.ts +1 -1
  153. package/src/tools/ToolNode.ts +299 -126
  154. package/src/tools/__tests__/ToolNode.eagerEventExecution.test.ts +373 -0
  155. package/src/tools/__tests__/handlers.test.ts +2 -1
  156. package/src/tools/__tests__/hitl.test.ts +206 -110
  157. package/src/tools/eagerEventExecution.ts +153 -0
  158. package/src/tools/handlers.ts +8 -4
  159. package/src/tools/streamedToolCallSeals.ts +57 -0
  160. package/src/types/hitl.ts +4 -0
  161. package/src/types/run.ts +11 -0
  162. package/src/types/tools.ts +36 -0
  163. package/dist/cjs/llm/text.cjs +0 -69
  164. package/dist/cjs/llm/text.cjs.map +0 -1
  165. package/dist/esm/llm/text.mjs +0 -67
  166. package/dist/esm/llm/text.mjs.map +0 -1
@@ -21,7 +21,14 @@ import type {
21
21
  import { _makeMessageChunkFromAnthropicEvent } from './utils/message_outputs';
22
22
  import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
23
23
  import { handleToolChoice } from './utils/tools';
24
- import { TextStream } from '@/llm/text';
24
+
25
+ const DEFAULT_STREAM_DELAY = 25;
26
+ const MAX_STREAM_QUEUE_CHUNKS = 256;
27
+ const MAX_STREAM_QUEUE_TEXT_CHARS = 8192;
28
+ const STREAM_CHUNK_MIN_SIZE = 4;
29
+ const STREAM_BOUNDARIES = new Set([' ', '.', ',', '!', '?', ';', ':']);
30
+
31
+ type StreamTokenType = 'string' | 'input' | 'content';
25
32
 
26
33
  const ANTHROPIC_TOOL_BETAS: Partial<Record<string, AnthropicBeta>> = {
27
34
  tool_search_tool_regex_20251119: 'advanced-tool-use-2025-11-20',
@@ -236,9 +243,86 @@ function getSamplingParams({
236
243
  };
237
244
  }
238
245
 
246
+ function findStreamChunkBoundary(text: string, minSize: number): number {
247
+ if (minSize >= text.length) {
248
+ return text.length;
249
+ }
250
+
251
+ for (let position = minSize; position < text.length; position++) {
252
+ if (STREAM_BOUNDARIES.has(text[position])) {
253
+ return position + 1;
254
+ }
255
+ }
256
+
257
+ return text.length;
258
+ }
259
+
260
+ function splitStreamToken(text: string): string[] {
261
+ const chunks: string[] = [];
262
+ let currentIndex = 0;
263
+
264
+ while (currentIndex < text.length) {
265
+ const remainingText = text.slice(currentIndex);
266
+ const chunkSize = findStreamChunkBoundary(
267
+ remainingText,
268
+ STREAM_CHUNK_MIN_SIZE
269
+ );
270
+ chunks.push(text.slice(currentIndex, currentIndex + chunkSize));
271
+ currentIndex += chunkSize;
272
+ }
273
+
274
+ return chunks;
275
+ }
276
+
277
+ function getCadencedStreamDelay({
278
+ targetDelay,
279
+ lastVisibleTextAt,
280
+ now,
281
+ }: {
282
+ targetDelay: number;
283
+ lastVisibleTextAt?: number;
284
+ now: number;
285
+ }): number {
286
+ if (targetDelay <= 0 || lastVisibleTextAt == null) {
287
+ return 0;
288
+ }
289
+ return Math.max(0, targetDelay - (now - lastVisibleTextAt));
290
+ }
291
+
292
+ async function waitForStreamDelay(
293
+ delay: number,
294
+ signal?: AbortSignal
295
+ ): Promise<void> {
296
+ if (delay <= 0 || isSignalAborted(signal)) {
297
+ return;
298
+ }
299
+ await new Promise<void>((resolve) => {
300
+ const timeoutRef: { current?: ReturnType<typeof setTimeout> } = {};
301
+ const onAbort = (): void => {
302
+ if (timeoutRef.current) {
303
+ clearTimeout(timeoutRef.current);
304
+ }
305
+ signal?.removeEventListener('abort', onAbort);
306
+ resolve();
307
+ };
308
+ timeoutRef.current = setTimeout(() => {
309
+ signal?.removeEventListener('abort', onAbort);
310
+ resolve();
311
+ }, delay);
312
+ signal?.addEventListener('abort', onAbort, { once: true });
313
+ if (isSignalAborted(signal)) {
314
+ onAbort();
315
+ }
316
+ });
317
+ }
318
+
319
+ function isSignalAborted(signal?: AbortSignal): boolean {
320
+ return signal?.aborted === true;
321
+ }
322
+
239
323
  function extractToken(
240
324
  chunk: AIMessageChunk
241
- ): [string, 'string' | 'input' | 'content'] | [undefined] {
325
+ ): [string, StreamTokenType] | [undefined] {
242
326
  if (typeof chunk.content === 'string') {
243
327
  return [chunk.content, 'string'];
244
328
  } else if (
@@ -269,7 +353,7 @@ function extractToken(
269
353
 
270
354
  function cloneChunk(
271
355
  text: string,
272
- tokenType: string,
356
+ tokenType: StreamTokenType,
273
357
  chunk: AIMessageChunk
274
358
  ): AIMessageChunk {
275
359
  if (tokenType === 'string') {
@@ -278,20 +362,19 @@ function cloneChunk(
278
362
  return chunk;
279
363
  }
280
364
  const content = chunk.content[0] as MessageContentComplex;
281
- if (tokenType === 'content' && content.type === 'text') {
365
+ if (content.type === 'text') {
282
366
  return new AIMessageChunk(
283
367
  Object.assign({}, chunk, {
284
368
  content: [Object.assign({}, content, { text })],
285
369
  })
286
370
  );
287
- } else if (tokenType === 'content' && content.type === 'text_delta') {
371
+ } else if (content.type === 'text_delta') {
288
372
  return new AIMessageChunk(
289
373
  Object.assign({}, chunk, {
290
374
  content: [Object.assign({}, content, { text })],
291
375
  })
292
376
  );
293
377
  } else if (
294
- tokenType === 'content' &&
295
378
  typeof content.type === 'string' &&
296
379
  content.type.startsWith('thinking')
297
380
  ) {
@@ -354,6 +437,13 @@ type CustomAnthropicInvocationParams = {
354
437
  output_config?: AnthropicOutputConfig;
355
438
  };
356
439
 
440
+ type QueuedGenerationChunk = {
441
+ chunk: ChatGenerationChunk;
442
+ token: string;
443
+ smooth: boolean;
444
+ textLength: number;
445
+ };
446
+
357
447
  export class CustomAnthropic extends ChatAnthropicMessages {
358
448
  _lc_stream_delay: number;
359
449
  private tools_in_params?: boolean;
@@ -365,7 +455,10 @@ export class CustomAnthropic extends ChatAnthropicMessages {
365
455
  super(fields);
366
456
  this.resetTokenEvents();
367
457
  this.setDirectFields(fields);
368
- this._lc_stream_delay = fields?._lc_stream_delay ?? 25;
458
+ this._lc_stream_delay = Math.max(
459
+ 0,
460
+ fields?._lc_stream_delay ?? DEFAULT_STREAM_DELAY
461
+ );
369
462
  this.outputConfig = fields?.outputConfig;
370
463
  this.inferenceGeo = fields?.inferenceGeo;
371
464
  this.contextManagement = fields?.contextManagement;
@@ -524,102 +617,281 @@ export class CustomAnthropic extends ChatAnthropicMessages {
524
617
 
525
618
  const shouldStreamUsage = options.streamUsage ?? this.streamUsage;
526
619
  let messageDeltaOutputTokens = 0;
620
+ const queuedChunks: QueuedGenerationChunk[] = [];
621
+ const producerState: {
622
+ done: boolean;
623
+ error?: unknown;
624
+ } = { done: false };
625
+ let queuedChunkIndex = 0;
626
+ let bufferedTextLength = 0;
627
+ let consumerClosed = false;
628
+ let notifyConsumer: (() => void) | undefined;
629
+ let notifyProducer: (() => void) | undefined;
630
+
631
+ const notifyConsumerForChunk = (): void => {
632
+ notifyConsumer?.();
633
+ notifyConsumer = undefined;
634
+ };
527
635
 
528
- for await (const data of stream) {
529
- if (options.signal?.aborted === true) {
530
- stream.controller.abort();
531
- throw new Error('AbortError: User aborted the request.');
532
- }
636
+ const notifyProducerForSpace = (): void => {
637
+ notifyProducer?.();
638
+ notifyProducer = undefined;
639
+ };
533
640
 
534
- const result = _makeMessageChunkFromAnthropicEvent(
535
- data as Anthropic.Beta.Messages.BetaRawMessageStreamEvent,
536
- {
537
- streamUsage: shouldStreamUsage,
538
- coerceContentToString,
539
- }
540
- );
541
- if (!result) continue;
641
+ const hasQueuedChunks = (): boolean =>
642
+ queuedChunkIndex < queuedChunks.length;
542
643
 
543
- let { chunk } = result;
544
- if (data.type === 'message_delta') {
545
- const incremental = withIncrementalMessageDeltaUsage(
546
- chunk,
547
- messageDeltaOutputTokens
548
- );
549
- chunk = incremental.chunk;
550
- messageDeltaOutputTokens = incremental.outputTokens;
644
+ const getQueuedChunkCount = (): number =>
645
+ queuedChunks.length - queuedChunkIndex;
646
+
647
+ const isQueueAtCapacity = (): boolean =>
648
+ getQueuedChunkCount() >= MAX_STREAM_QUEUE_CHUNKS ||
649
+ bufferedTextLength >= MAX_STREAM_QUEUE_TEXT_CHARS;
650
+
651
+ const waitForNextChunk = async (): Promise<void> => {
652
+ if (
653
+ hasQueuedChunks() ||
654
+ producerState.done ||
655
+ producerState.error != null
656
+ ) {
657
+ return;
658
+ }
659
+ await new Promise<void>((resolve) => {
660
+ notifyConsumer = resolve;
661
+ });
662
+ };
663
+
664
+ const waitForQueueSpace = async (): Promise<void> => {
665
+ while (
666
+ isQueueAtCapacity() &&
667
+ !consumerClosed &&
668
+ !isSignalAborted(options.signal)
669
+ ) {
670
+ await new Promise<void>((resolve) => {
671
+ const signal = options.signal;
672
+ const onAbort = (): void => {
673
+ signal?.removeEventListener('abort', onAbort);
674
+ resolve();
675
+ };
676
+ const onSpace = (): void => {
677
+ signal?.removeEventListener('abort', onAbort);
678
+ resolve();
679
+ };
680
+ notifyProducer = onSpace;
681
+ signal?.addEventListener('abort', onAbort, { once: true });
682
+ if (isSignalAborted(signal)) {
683
+ onAbort();
684
+ }
685
+ });
551
686
  }
552
- const [token = '', tokenType] = extractToken(chunk);
687
+ };
553
688
 
689
+ const dequeue = (): QueuedGenerationChunk | undefined => {
690
+ if (!hasQueuedChunks()) {
691
+ return undefined;
692
+ }
693
+ const queuedChunk = queuedChunks[queuedChunkIndex];
694
+ queuedChunkIndex++;
554
695
  if (
555
- !tokenType ||
556
- tokenType === 'input' ||
557
- (token === '' && (chunk.usage_metadata != null || chunk.id != null))
696
+ queuedChunkIndex > 128 &&
697
+ queuedChunkIndex * 2 >= queuedChunks.length
558
698
  ) {
559
- const generationChunk = this.createGenerationChunk({
699
+ queuedChunks.splice(0, queuedChunkIndex);
700
+ queuedChunkIndex = 0;
701
+ }
702
+ return queuedChunk;
703
+ };
704
+
705
+ const enqueue = async (
706
+ queuedChunk: QueuedGenerationChunk
707
+ ): Promise<void> => {
708
+ await waitForQueueSpace();
709
+ if (consumerClosed || isSignalAborted(options.signal)) {
710
+ stream.controller.abort();
711
+ throw new Error('AbortError: User aborted the request.');
712
+ }
713
+ queuedChunks.push(queuedChunk);
714
+ if (queuedChunk.smooth) {
715
+ bufferedTextLength += queuedChunk.textLength;
716
+ }
717
+ notifyConsumerForChunk();
718
+ };
719
+
720
+ const enqueueChunk = async ({
721
+ token,
722
+ chunk,
723
+ smooth,
724
+ }: {
725
+ token: string;
726
+ chunk: AIMessageChunk;
727
+ smooth: boolean;
728
+ }): Promise<void> => {
729
+ await enqueue({
730
+ token,
731
+ smooth,
732
+ textLength: smooth ? token.length : 0,
733
+ chunk: this.createGenerationChunk({
560
734
  token,
561
735
  chunk,
562
736
  shouldStreamUsage,
563
- });
564
- yield generationChunk;
565
- await runManager?.handleLLMNewToken(
566
- token,
567
- undefined,
568
- undefined,
569
- undefined,
570
- undefined,
571
- { chunk: generationChunk }
572
- );
573
- continue;
737
+ }),
738
+ });
739
+ };
740
+
741
+ const enqueueTextChunks = (
742
+ token: string,
743
+ tokenType: StreamTokenType,
744
+ chunk: AIMessageChunk
745
+ ): Promise<void> => {
746
+ if (token === '') {
747
+ return Promise.resolve();
748
+ }
749
+ if (this._lc_stream_delay <= 0) {
750
+ return enqueueChunk({ token, chunk, smooth: false });
574
751
  }
575
752
 
576
- const textStream = new TextStream(token, {
577
- delay: this._lc_stream_delay,
578
- firstWordChunk: true,
579
- minChunkSize: 4,
580
- maxChunkSize: 8,
581
- });
753
+ const tokenChunks = splitStreamToken(token);
754
+ if (tokenChunks.length <= 1) {
755
+ return enqueueChunk({ token, chunk, smooth: true });
756
+ }
757
+
758
+ let emittedUsage = false;
759
+ return tokenChunks.reduce(async (previous, currentToken) => {
760
+ await previous;
761
+ const newChunk = cloneChunk(currentToken, tokenType, chunk);
762
+ const chunkForToken =
763
+ emittedUsage && newChunk.usage_metadata != null
764
+ ? new AIMessageChunk(
765
+ Object.assign({}, newChunk, { usage_metadata: undefined })
766
+ )
767
+ : newChunk;
768
+
769
+ await enqueueChunk({
770
+ token: currentToken,
771
+ chunk: chunkForToken,
772
+ smooth: true,
773
+ });
774
+
775
+ if (newChunk.usage_metadata != null && !emittedUsage) {
776
+ emittedUsage = true;
777
+ }
778
+ }, Promise.resolve());
779
+ };
582
780
 
583
- const generator = textStream.generateText(options.signal);
781
+ const producer = (async (): Promise<void> => {
584
782
  try {
585
- let emittedUsage = false;
586
- for await (const currentToken of generator) {
587
- if ((options.signal as AbortSignal | undefined)?.aborted === true) {
588
- break;
783
+ for await (const data of stream) {
784
+ if (isSignalAborted(options.signal)) {
785
+ stream.controller.abort();
786
+ throw new Error('AbortError: User aborted the request.');
589
787
  }
590
- const newChunk = cloneChunk(currentToken, tokenType, chunk);
591
- const chunkForToken =
592
- emittedUsage && newChunk.usage_metadata != null
593
- ? new AIMessageChunk(
594
- Object.assign({}, newChunk, { usage_metadata: undefined })
595
- )
596
- : newChunk;
597
-
598
- const generationChunk = this.createGenerationChunk({
599
- token: currentToken,
600
- chunk: chunkForToken,
601
- shouldStreamUsage,
602
- });
603
-
604
- if (newChunk.usage_metadata != null && !emittedUsage) {
605
- emittedUsage = true;
606
- }
607
- yield generationChunk;
608
-
609
- await runManager?.handleLLMNewToken(
610
- currentToken,
611
- undefined,
612
- undefined,
613
- undefined,
614
- undefined,
615
- { chunk: generationChunk }
788
+
789
+ const result = _makeMessageChunkFromAnthropicEvent(
790
+ data as Anthropic.Beta.Messages.BetaRawMessageStreamEvent,
791
+ {
792
+ streamUsage: shouldStreamUsage,
793
+ coerceContentToString,
794
+ }
616
795
  );
796
+ if (!result) {
797
+ continue;
798
+ }
799
+
800
+ let { chunk } = result;
801
+ if (data.type === 'message_delta') {
802
+ const incremental = withIncrementalMessageDeltaUsage(
803
+ chunk,
804
+ messageDeltaOutputTokens
805
+ );
806
+ chunk = incremental.chunk;
807
+ messageDeltaOutputTokens = incremental.outputTokens;
808
+ }
809
+
810
+ const [token = '', tokenType] = extractToken(chunk);
811
+ if (
812
+ !tokenType ||
813
+ tokenType === 'input' ||
814
+ (token === '' && (chunk.usage_metadata != null || chunk.id != null))
815
+ ) {
816
+ await enqueueChunk({ token, chunk, smooth: false });
817
+ continue;
818
+ }
819
+
820
+ await enqueueTextChunks(token, tokenType, chunk);
617
821
  }
822
+ } catch (error) {
823
+ producerState.error = error;
618
824
  } finally {
619
- await generator.return();
825
+ producerState.done = true;
826
+ notifyConsumerForChunk();
620
827
  }
621
- }
828
+ })();
622
829
 
623
- this.resetTokenEvents();
830
+ let hasEmittedText = false;
831
+ let lastVisibleTextAt: number | undefined;
832
+ let keepStreaming = true;
833
+ try {
834
+ while (keepStreaming) {
835
+ if (isSignalAborted(options.signal)) {
836
+ stream.controller.abort();
837
+ throw new Error('AbortError: User aborted the request.');
838
+ }
839
+
840
+ await waitForNextChunk();
841
+ const queuedChunk = dequeue();
842
+
843
+ if (!queuedChunk) {
844
+ if (producerState.error != null) {
845
+ throw producerState.error;
846
+ }
847
+ if (producerState.done) {
848
+ keepStreaming = false;
849
+ }
850
+ continue;
851
+ }
852
+
853
+ if (queuedChunk.smooth) {
854
+ bufferedTextLength = Math.max(
855
+ 0,
856
+ bufferedTextLength - queuedChunk.textLength
857
+ );
858
+ notifyProducerForSpace();
859
+ await waitForStreamDelay(
860
+ getCadencedStreamDelay({
861
+ targetDelay: hasEmittedText ? this._lc_stream_delay : 0,
862
+ lastVisibleTextAt,
863
+ now: Date.now(),
864
+ }),
865
+ options.signal
866
+ );
867
+ if (isSignalAborted(options.signal)) {
868
+ stream.controller.abort();
869
+ throw new Error('AbortError: User aborted the request.');
870
+ }
871
+ hasEmittedText = true;
872
+ lastVisibleTextAt = Date.now();
873
+ } else {
874
+ notifyProducerForSpace();
875
+ }
876
+
877
+ yield queuedChunk.chunk;
878
+ await runManager?.handleLLMNewToken(
879
+ queuedChunk.token,
880
+ undefined,
881
+ undefined,
882
+ undefined,
883
+ undefined,
884
+ { chunk: queuedChunk.chunk }
885
+ );
886
+ }
887
+ } finally {
888
+ consumerClosed = true;
889
+ if (!producerState.done) {
890
+ stream.controller.abort();
891
+ notifyProducerForSpace();
892
+ }
893
+ await producer;
894
+ this.resetTokenEvents();
895
+ }
624
896
  }
625
897
  }
@@ -1143,6 +1143,70 @@ test('Anthropic stream usage handles multiple cumulative message_delta events',
1143
1143
  });
1144
1144
  });
1145
1145
 
1146
+ test('Anthropic stream smoothing skips empty text block starts', async () => {
1147
+ const events: AnthropicStreamEvent[] = [
1148
+ {
1149
+ type: 'content_block_start',
1150
+ index: 0,
1151
+ content_block: {
1152
+ type: 'text',
1153
+ text: '',
1154
+ citations: null,
1155
+ },
1156
+ },
1157
+ {
1158
+ type: 'content_block_delta',
1159
+ index: 0,
1160
+ delta: {
1161
+ type: 'text_delta',
1162
+ text: 'hello',
1163
+ },
1164
+ },
1165
+ { type: 'message_stop' },
1166
+ ];
1167
+ const model = new MockStreamingAnthropic(events);
1168
+ const contents: string[] = [];
1169
+
1170
+ for await (const chunk of await model.stream('hello')) {
1171
+ if (typeof chunk.content === 'string') {
1172
+ contents.push(chunk.content);
1173
+ }
1174
+ }
1175
+
1176
+ expect(contents).toEqual(['hello']);
1177
+ });
1178
+
1179
+ test('Anthropic stream smoothing closes a queue-full producer after early break', async () => {
1180
+ const events: AnthropicStreamEvent[] = [
1181
+ {
1182
+ type: 'content_block_delta',
1183
+ index: 0,
1184
+ delta: {
1185
+ type: 'text_delta',
1186
+ text: 'word '.repeat(3000),
1187
+ },
1188
+ },
1189
+ { type: 'message_stop' },
1190
+ ];
1191
+ const model = new MockStreamingAnthropic(events);
1192
+ const readOneChunk = async (): Promise<void> => {
1193
+ for await (const chunk of await model.stream('hello')) {
1194
+ if (typeof chunk.content === 'string' && chunk.content.length > 0) {
1195
+ break;
1196
+ }
1197
+ }
1198
+ };
1199
+ const timeout = new Promise<never>((_, reject) => {
1200
+ setTimeout(() => {
1201
+ reject(new Error('stream close timed out'));
1202
+ }, 1000);
1203
+ });
1204
+
1205
+ await expect(
1206
+ Promise.race([readOneChunk(), timeout])
1207
+ ).resolves.toBeUndefined();
1208
+ });
1209
+
1146
1210
  test('Anthropic live stream usage matches raw cumulative output snapshots', async () => {
1147
1211
  const model = new RecordingStreamingAnthropic({
1148
1212
  modelName,