@librechat/agents 2.4.84 → 2.4.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,28 +17,28 @@ import type {
17
17
  } from '@/types';
18
18
  import { Providers, ContentTypes } from '@/common';
19
19
 
20
- interface VisionMessageParams {
20
+ interface MediaMessageParams {
21
21
  message: {
22
22
  role: string;
23
23
  content: string;
24
24
  name?: string;
25
25
  [key: string]: any;
26
26
  };
27
- image_urls: MessageContentImageUrl[];
27
+ mediaParts: MessageContentComplex[];
28
28
  endpoint?: Providers;
29
29
  }
30
30
 
31
31
  /**
32
- * Formats a message to OpenAI Vision API payload format.
32
+ * Formats a message with media content (images, documents, videos, audios) to API payload format.
33
33
  *
34
- * @param {VisionMessageParams} params - The parameters for formatting.
35
- * @returns {Object} - The formatted message.
34
+ * @param params - The parameters for formatting.
35
+ * @returns - The formatted message.
36
36
  */
37
- export const formatVisionMessage = ({
37
+ export const formatMediaMessage = ({
38
38
  message,
39
- image_urls,
40
39
  endpoint,
41
- }: VisionMessageParams): {
40
+ mediaParts,
41
+ }: MediaMessageParams): {
42
42
  role: string;
43
43
  content: MessageContentComplex[];
44
44
  name?: string;
@@ -57,7 +57,7 @@ export const formatVisionMessage = ({
57
57
 
58
58
  if (endpoint === Providers.ANTHROPIC) {
59
59
  result.content = [
60
- ...image_urls,
60
+ ...mediaParts,
61
61
  { type: ContentTypes.TEXT, text: message.content },
62
62
  ] as MessageContentComplex[];
63
63
  return result;
@@ -65,7 +65,7 @@ export const formatVisionMessage = ({
65
65
 
66
66
  result.content = [
67
67
  { type: ContentTypes.TEXT, text: message.content },
68
- ...image_urls,
68
+ ...mediaParts,
69
69
  ] as MessageContentComplex[];
70
70
 
71
71
  return result;
@@ -78,6 +78,9 @@ interface MessageInput {
78
78
  text?: string;
79
79
  content?: string | MessageContentComplex[];
80
80
  image_urls?: MessageContentImageUrl[];
81
+ documents?: MessageContentComplex[];
82
+ videos?: MessageContentComplex[];
83
+ audios?: MessageContentComplex[];
81
84
  lc_id?: string[];
82
85
  [key: string]: any;
83
86
  }
@@ -100,14 +103,14 @@ interface FormattedMessage {
100
103
  /**
101
104
  * Formats a message to OpenAI payload format based on the provided options.
102
105
  *
103
- * @param {FormatMessageParams} params - The parameters for formatting.
104
- * @returns {FormattedMessage | HumanMessage | AIMessage | SystemMessage} - The formatted message.
106
+ * @param params - The parameters for formatting.
107
+ * @returns - The formatted message.
105
108
  */
106
109
  export const formatMessage = ({
107
110
  message,
108
111
  userName,
109
- assistantName,
110
112
  endpoint,
113
+ assistantName,
111
114
  langChain = false,
112
115
  }: FormatMessageParams):
113
116
  | FormattedMessage
@@ -135,21 +138,7 @@ export const formatMessage = ({
135
138
  content,
136
139
  };
137
140
 
138
- const { image_urls } = message;
139
- if (Array.isArray(image_urls) && image_urls.length > 0 && role === 'user') {
140
- return formatVisionMessage({
141
- message: {
142
- ...formattedMessage,
143
- content:
144
- typeof formattedMessage.content === 'string'
145
- ? formattedMessage.content
146
- : '',
147
- },
148
- image_urls,
149
- endpoint,
150
- });
151
- }
152
-
141
+ // Set name fields first
153
142
  if (_name != null && _name) {
154
143
  formattedMessage.name = _name;
155
144
  }
@@ -179,6 +168,45 @@ export const formatMessage = ({
179
168
  }
180
169
  }
181
170
 
171
+ const { image_urls, documents, videos, audios } = message;
172
+ const mediaParts: MessageContentComplex[] = [];
173
+
174
+ if (Array.isArray(documents) && documents.length > 0) {
175
+ mediaParts.push(...documents);
176
+ }
177
+
178
+ if (Array.isArray(videos) && videos.length > 0) {
179
+ mediaParts.push(...videos);
180
+ }
181
+
182
+ if (Array.isArray(audios) && audios.length > 0) {
183
+ mediaParts.push(...audios);
184
+ }
185
+
186
+ if (Array.isArray(image_urls) && image_urls.length > 0) {
187
+ mediaParts.push(...image_urls);
188
+ }
189
+
190
+ if (mediaParts.length > 0 && role === 'user') {
191
+ const mediaMessage = formatMediaMessage({
192
+ message: {
193
+ ...formattedMessage,
194
+ content:
195
+ typeof formattedMessage.content === 'string'
196
+ ? formattedMessage.content
197
+ : '',
198
+ },
199
+ mediaParts,
200
+ endpoint,
201
+ });
202
+
203
+ if (!langChain) {
204
+ return mediaMessage;
205
+ }
206
+
207
+ return new HumanMessage(mediaMessage);
208
+ }
209
+
182
210
  if (!langChain) {
183
211
  return formattedMessage;
184
212
  }
@@ -195,9 +223,9 @@ export const formatMessage = ({
195
223
  /**
196
224
  * Formats an array of messages for LangChain.
197
225
  *
198
- * @param {Array<MessageInput>} messages - The array of messages to format.
199
- * @param {Omit<FormatMessageParams, 'message' | 'langChain'>} formatOptions - The options for formatting each message.
200
- * @returns {Array<HumanMessage | AIMessage | SystemMessage>} - The array of formatted LangChain messages.
226
+ * @param messages - The array of messages to format.
227
+ * @param formatOptions - The options for formatting each message.
228
+ * @returns - The array of formatted LangChain messages.
201
229
  */
202
230
  export const formatLangChainMessages = (
203
231
  messages: Array<MessageInput>,
@@ -228,8 +256,8 @@ interface LangChainMessage {
228
256
  /**
229
257
  * Formats a LangChain message object by merging properties from `lc_kwargs` or `kwargs` and `additional_kwargs`.
230
258
  *
231
- * @param {LangChainMessage} message - The message object to format.
232
- * @returns {Record<string, any>} The formatted LangChain message.
259
+ * @param message - The message object to format.
260
+ * @returns - The formatted LangChain message.
233
261
  */
234
262
  export const formatFromLangChain = (
235
263
  message: LangChainMessage
@@ -357,10 +385,10 @@ function formatAssistantMessage(
357
385
  /**
358
386
  * Formats an array of messages for LangChain, handling tool calls and creating ToolMessage instances.
359
387
  *
360
- * @param {TPayload} payload - The array of messages to format.
361
- * @param {Record<number, number>} [indexTokenCountMap] - Optional map of message indices to token counts.
362
- * @param {Set<string>} [tools] - Optional set of tool names that are allowed in the request.
363
- * @returns {Object} - Object containing formatted messages and updated indexTokenCountMap if provided.
388
+ * @param payload - The array of messages to format.
389
+ * @param indexTokenCountMap - Optional map of message indices to token counts.
390
+ * @param tools - Optional set of tool names that are allowed in the request.
391
+ * @returns - Object containing formatted messages and updated indexTokenCountMap if provided.
364
392
  */
365
393
  export const formatAgentMessages = (
366
394
  payload: TPayload,
@@ -539,8 +567,8 @@ export const formatAgentMessages = (
539
567
 
540
568
  /**
541
569
  * Formats an array of messages for LangChain, making sure all content fields are strings
542
- * @param {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} payload - The array of messages to format.
543
- * @returns {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} - The array of formatted LangChain messages, including ToolMessages for tool calls.
570
+ * @param payload - The array of messages to format.
571
+ * @returns - The array of formatted LangChain messages, including ToolMessages for tool calls.
544
572
  */
545
573
  export const formatContentStrings = (
546
574
  payload: Array<BaseMessage>
@@ -1,8 +1,43 @@
1
- import { HumanMessage, AIMessage, SystemMessage } from '@langchain/core/messages';
2
- import { formatMessage, formatLangChainMessages, formatFromLangChain } from './format';
1
+ import {
2
+ HumanMessage,
3
+ AIMessage,
4
+ SystemMessage,
5
+ } from '@langchain/core/messages';
6
+ import type { MessageContentComplex } from '@/types';
7
+ import {
8
+ formatMessage,
9
+ formatLangChainMessages,
10
+ formatFromLangChain,
11
+ formatMediaMessage,
12
+ } from './format';
13
+ import { Providers } from '@/common';
3
14
 
4
15
  const NO_PARENT = '00000000-0000-0000-0000-000000000000';
5
16
 
17
+ /**
18
+ * Type for formatted message results with media content
19
+ */
20
+ interface FormattedMediaMessage {
21
+ role: string;
22
+ content: MessageContentComplex[];
23
+ name?: string;
24
+ }
25
+
26
+ /**
27
+ * Type guard to check if result is a FormattedMediaMessage
28
+ */
29
+ function isFormattedMediaMessage(
30
+ result: unknown
31
+ ): result is FormattedMediaMessage {
32
+ return (
33
+ typeof result === 'object' &&
34
+ result !== null &&
35
+ 'role' in result &&
36
+ 'content' in result &&
37
+ Array.isArray((result as FormattedMediaMessage).content)
38
+ );
39
+ }
40
+
6
41
  describe('formatMessage', () => {
7
42
  it('formats user message', () => {
8
43
  const input = {
@@ -187,6 +222,387 @@ describe('formatMessage', () => {
187
222
  });
188
223
  });
189
224
 
225
+ describe('formatMediaMessage', () => {
226
+ it('formats message with images for default provider', () => {
227
+ const message = {
228
+ role: 'user',
229
+ content: 'Check out this image',
230
+ name: 'John',
231
+ };
232
+ const mediaParts = [
233
+ {
234
+ type: 'image_url',
235
+ image_url: { url: 'https://example.com/image1.jpg' },
236
+ },
237
+ {
238
+ type: 'image_url',
239
+ image_url: { url: 'https://example.com/image2.jpg' },
240
+ },
241
+ ];
242
+
243
+ const result = formatMediaMessage({ message, mediaParts });
244
+
245
+ expect(result.role).toBe('user');
246
+ expect(result.name).toBe('John');
247
+ expect(Array.isArray(result.content)).toBe(true);
248
+ expect(result.content).toHaveLength(3);
249
+ expect(result.content[0]).toEqual({
250
+ type: 'text',
251
+ text: 'Check out this image',
252
+ });
253
+ expect(result.content[1]).toEqual(mediaParts[0]);
254
+ expect(result.content[2]).toEqual(mediaParts[1]);
255
+ });
256
+
257
+ it('formats message with images for Anthropic (media first)', () => {
258
+ const message = {
259
+ role: 'user',
260
+ content: 'Check out this image',
261
+ };
262
+ const mediaParts = [
263
+ {
264
+ type: 'image_url',
265
+ image_url: { url: 'https://example.com/image.jpg' },
266
+ },
267
+ ];
268
+
269
+ const result = formatMediaMessage({
270
+ message,
271
+ mediaParts,
272
+ endpoint: Providers.ANTHROPIC,
273
+ });
274
+
275
+ expect(result.content).toHaveLength(2);
276
+ expect(result.content[0]).toEqual(mediaParts[0]);
277
+ expect(result.content[1]).toEqual({
278
+ type: 'text',
279
+ text: 'Check out this image',
280
+ });
281
+ });
282
+
283
+ it('formats message with multiple media types', () => {
284
+ const message = {
285
+ role: 'user',
286
+ content: 'Check out these files',
287
+ };
288
+ const mediaParts = [
289
+ { type: 'document', document: { url: 'https://example.com/doc.pdf' } },
290
+ { type: 'video', video: { url: 'https://example.com/video.mp4' } },
291
+ { type: 'audio', audio: { url: 'https://example.com/audio.mp3' } },
292
+ {
293
+ type: 'image_url',
294
+ image_url: { url: 'https://example.com/image.jpg' },
295
+ },
296
+ ];
297
+
298
+ const result = formatMediaMessage({ message, mediaParts });
299
+
300
+ expect(result.content).toHaveLength(5);
301
+ expect(result.content[0]).toEqual({
302
+ type: 'text',
303
+ text: 'Check out these files',
304
+ });
305
+ expect(result.content[1]).toEqual(mediaParts[0]);
306
+ expect(result.content[2]).toEqual(mediaParts[1]);
307
+ expect(result.content[3]).toEqual(mediaParts[2]);
308
+ expect(result.content[4]).toEqual(mediaParts[3]);
309
+ });
310
+ });
311
+
312
+ describe('formatMessage with media', () => {
313
+ it('formats user message with image_urls (backward compatibility)', () => {
314
+ const input = {
315
+ message: {
316
+ sender: 'user',
317
+ text: 'Check out this image',
318
+ image_urls: [
319
+ {
320
+ type: 'image_url' as const,
321
+ image_url: { url: 'https://example.com/image.jpg' },
322
+ },
323
+ ],
324
+ },
325
+ userName: 'John',
326
+ };
327
+
328
+ const result = formatMessage(input);
329
+
330
+ expect(isFormattedMediaMessage(result)).toBe(true);
331
+ if (isFormattedMediaMessage(result)) {
332
+ expect(result.role).toBe('user');
333
+ expect(result.name).toBe('John');
334
+ expect(Array.isArray(result.content)).toBe(true);
335
+ expect(result.content).toHaveLength(2);
336
+ expect(result.content[0]).toEqual({
337
+ type: 'text',
338
+ text: 'Check out this image',
339
+ });
340
+ expect(result.content[1]).toEqual(input.message.image_urls[0]);
341
+ }
342
+ });
343
+
344
+ it('formats user message with documents', () => {
345
+ const input = {
346
+ message: {
347
+ role: 'user',
348
+ content: 'Review this document',
349
+ documents: [
350
+ {
351
+ type: 'document',
352
+ document: { url: 'https://example.com/report.pdf' },
353
+ },
354
+ ],
355
+ },
356
+ };
357
+
358
+ const result = formatMessage(input);
359
+
360
+ expect(isFormattedMediaMessage(result)).toBe(true);
361
+ if (isFormattedMediaMessage(result)) {
362
+ expect(result.role).toBe('user');
363
+ expect(Array.isArray(result.content)).toBe(true);
364
+ expect(result.content).toHaveLength(2);
365
+ expect(result.content[0]).toEqual({
366
+ type: 'text',
367
+ text: 'Review this document',
368
+ });
369
+ expect(result.content[1]).toEqual(input.message.documents[0]);
370
+ }
371
+ });
372
+
373
+ it('formats user message with videos', () => {
374
+ const input = {
375
+ message: {
376
+ role: 'user',
377
+ content: 'Watch this video',
378
+ videos: [
379
+ { type: 'video', video: { url: 'https://example.com/demo.mp4' } },
380
+ ],
381
+ },
382
+ };
383
+
384
+ const result = formatMessage(input);
385
+
386
+ expect(isFormattedMediaMessage(result)).toBe(true);
387
+ if (isFormattedMediaMessage(result)) {
388
+ expect(result.role).toBe('user');
389
+ expect(Array.isArray(result.content)).toBe(true);
390
+ expect(result.content).toHaveLength(2);
391
+ expect(result.content[0]).toEqual({
392
+ type: 'text',
393
+ text: 'Watch this video',
394
+ });
395
+ expect(result.content[1]).toEqual(input.message.videos[0]);
396
+ }
397
+ });
398
+
399
+ it('formats user message with audios', () => {
400
+ const input = {
401
+ message: {
402
+ role: 'user',
403
+ content: 'Listen to this',
404
+ audios: [
405
+ { type: 'audio', audio: { url: 'https://example.com/podcast.mp3' } },
406
+ ],
407
+ },
408
+ };
409
+
410
+ const result = formatMessage(input);
411
+
412
+ expect(isFormattedMediaMessage(result)).toBe(true);
413
+ if (isFormattedMediaMessage(result)) {
414
+ expect(result.role).toBe('user');
415
+ expect(Array.isArray(result.content)).toBe(true);
416
+ expect(result.content).toHaveLength(2);
417
+ expect(result.content[0]).toEqual({
418
+ type: 'text',
419
+ text: 'Listen to this',
420
+ });
421
+ expect(result.content[1]).toEqual(input.message.audios[0]);
422
+ }
423
+ });
424
+
425
+ it('formats user message with all media types in correct order', () => {
426
+ const input = {
427
+ message: {
428
+ role: 'user',
429
+ content: 'Check out all these files',
430
+ documents: [
431
+ {
432
+ type: 'document',
433
+ document: { url: 'https://example.com/doc.pdf' },
434
+ },
435
+ ],
436
+ videos: [
437
+ { type: 'video', video: { url: 'https://example.com/video.mp4' } },
438
+ ],
439
+ audios: [
440
+ { type: 'audio', audio: { url: 'https://example.com/audio.mp3' } },
441
+ ],
442
+ image_urls: [
443
+ {
444
+ type: 'image_url' as const,
445
+ image_url: { url: 'https://example.com/image.jpg' },
446
+ },
447
+ ],
448
+ },
449
+ };
450
+
451
+ const result = formatMessage(input);
452
+
453
+ expect(isFormattedMediaMessage(result)).toBe(true);
454
+ if (isFormattedMediaMessage(result)) {
455
+ expect(result.role).toBe('user');
456
+ expect(Array.isArray(result.content)).toBe(true);
457
+ expect(result.content).toHaveLength(5);
458
+ // Text first
459
+ expect(result.content[0]).toEqual({
460
+ type: 'text',
461
+ text: 'Check out all these files',
462
+ });
463
+ // Then documents, videos, audios, images
464
+ expect(result.content[1]).toEqual(input.message.documents[0]);
465
+ expect(result.content[2]).toEqual(input.message.videos[0]);
466
+ expect(result.content[3]).toEqual(input.message.audios[0]);
467
+ expect(result.content[4]).toEqual(input.message.image_urls[0]);
468
+ }
469
+ });
470
+
471
+ it('formats user message with multiple files of the same type', () => {
472
+ const input = {
473
+ message: {
474
+ role: 'user',
475
+ content: 'Review these documents',
476
+ documents: [
477
+ {
478
+ type: 'document',
479
+ document: { url: 'https://example.com/doc1.pdf' },
480
+ },
481
+ {
482
+ type: 'document',
483
+ document: { url: 'https://example.com/doc2.pdf' },
484
+ },
485
+ {
486
+ type: 'document',
487
+ document: { url: 'https://example.com/doc3.pdf' },
488
+ },
489
+ ],
490
+ },
491
+ };
492
+
493
+ const result = formatMessage(input);
494
+
495
+ expect(isFormattedMediaMessage(result)).toBe(true);
496
+ if (isFormattedMediaMessage(result)) {
497
+ expect(result.content).toHaveLength(4);
498
+ expect(result.content[0].type).toBe('text');
499
+ expect(result.content[1]).toEqual(input.message.documents[0]);
500
+ expect(result.content[2]).toEqual(input.message.documents[1]);
501
+ expect(result.content[3]).toEqual(input.message.documents[2]);
502
+ }
503
+ });
504
+
505
+ it('respects Anthropic provider ordering (media before text)', () => {
506
+ const input = {
507
+ message: {
508
+ role: 'user',
509
+ content: 'Check this out',
510
+ documents: [
511
+ {
512
+ type: 'document',
513
+ document: { url: 'https://example.com/doc.pdf' },
514
+ },
515
+ ],
516
+ image_urls: [
517
+ {
518
+ type: 'image_url' as const,
519
+ image_url: { url: 'https://example.com/image.jpg' },
520
+ },
521
+ ],
522
+ },
523
+ endpoint: Providers.ANTHROPIC,
524
+ };
525
+
526
+ const result = formatMessage(input);
527
+
528
+ expect(isFormattedMediaMessage(result)).toBe(true);
529
+ if (isFormattedMediaMessage(result)) {
530
+ expect(result.content).toHaveLength(3);
531
+ // Media first for Anthropic
532
+ expect(result.content[0]).toEqual(input.message.documents[0]);
533
+ expect(result.content[1]).toEqual(input.message.image_urls[0]);
534
+ expect(result.content[2]).toEqual({
535
+ type: 'text',
536
+ text: 'Check this out',
537
+ });
538
+ }
539
+ });
540
+
541
+ it('does not format media for assistant messages', () => {
542
+ const input = {
543
+ message: {
544
+ role: 'assistant',
545
+ content: 'Here is a response',
546
+ documents: [
547
+ {
548
+ type: 'document',
549
+ document: { url: 'https://example.com/doc.pdf' },
550
+ },
551
+ ],
552
+ },
553
+ };
554
+
555
+ const result = formatMessage(input);
556
+
557
+ expect(result).toMatchObject({
558
+ role: 'assistant',
559
+ content: 'Here is a response',
560
+ });
561
+ });
562
+
563
+ it('handles empty media arrays gracefully', () => {
564
+ const input = {
565
+ message: {
566
+ role: 'user',
567
+ content: 'Just text',
568
+ documents: [],
569
+ videos: [],
570
+ audios: [],
571
+ image_urls: [],
572
+ },
573
+ };
574
+
575
+ const result = formatMessage(input);
576
+
577
+ expect(result).toMatchObject({
578
+ role: 'user',
579
+ content: 'Just text',
580
+ });
581
+ });
582
+
583
+ it('formats media with langChain flag', () => {
584
+ const input = {
585
+ message: {
586
+ role: 'user',
587
+ content: 'Check this image',
588
+ image_urls: [
589
+ {
590
+ type: 'image_url' as const,
591
+ image_url: { url: 'https://example.com/image.jpg' },
592
+ },
593
+ ],
594
+ },
595
+ langChain: true,
596
+ };
597
+
598
+ const result = formatMessage(input);
599
+
600
+ expect(result).toBeInstanceOf(HumanMessage);
601
+ expect(Array.isArray(result.lc_kwargs.content)).toBe(true);
602
+ expect(result.lc_kwargs.content).toHaveLength(2);
603
+ });
604
+ });
605
+
190
606
  describe('formatLangChainMessages', () => {
191
607
  it('formats an array of messages for LangChain', () => {
192
608
  const messages = [
package/src/stream.ts CHANGED
@@ -394,9 +394,13 @@ export function createContentAggregator(): t.ContentAggregatorResult {
394
394
 
395
395
  const updateContent = (
396
396
  index: number,
397
- contentPart: t.MessageContentComplex,
397
+ contentPart?: t.MessageContentComplex,
398
398
  finalUpdate = false
399
399
  ): void => {
400
+ if (!contentPart) {
401
+ console.warn('No content part found in \'updateContent\'');
402
+ return;
403
+ }
400
404
  const partType = contentPart.type ?? '';
401
405
  if (!partType) {
402
406
  console.warn('No content type found in content part');
@@ -574,7 +578,10 @@ export function createContentAggregator(): t.ContentAggregatorResult {
574
578
  event === GraphEvents.ON_AGENT_UPDATE &&
575
579
  (data as t.AgentUpdate | undefined)?.agent_update
576
580
  ) {
577
- const contentPart = data as t.AgentUpdate;
581
+ const contentPart = data as t.AgentUpdate | undefined;
582
+ if (!contentPart) {
583
+ return;
584
+ }
578
585
  updateContent(contentPart.agent_update.index, contentPart);
579
586
  } else if (event === GraphEvents.ON_REASONING_DELTA) {
580
587
  const reasoningDelta = data as t.ReasoningDeltaEvent;