@lobehub/chat 1.131.4 → 1.132.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/changelog/v1.json +9 -0
- package/package.json +1 -1
- package/packages/context-engine/src/processors/MessageContent.ts +45 -10
- package/packages/context-engine/src/processors/__tests__/MessageContent.test.ts +179 -1
- package/packages/database/src/models/message.ts +9 -1
- package/packages/model-bank/src/aiModels/google.ts +7 -0
- package/packages/model-runtime/src/providers/google/index.ts +31 -8
- package/packages/model-runtime/src/types/chat.ts +6 -0
- package/packages/prompts/src/prompts/files/index.test.ts +148 -3
- package/packages/prompts/src/prompts/files/index.ts +17 -5
- package/packages/prompts/src/prompts/files/video.ts +17 -0
- package/packages/types/src/agent/index.ts +1 -1
- package/packages/types/src/message/chat.ts +2 -4
- package/packages/types/src/message/index.ts +1 -0
- package/packages/types/src/message/video.ts +5 -0
- package/packages/utils/src/client/index.ts +1 -0
- package/packages/utils/src/client/videoValidation.test.ts +53 -0
- package/packages/utils/src/client/videoValidation.ts +21 -0
- package/packages/utils/src/parseModels.ts +4 -0
- package/src/app/[variants]/(main)/chat/(workspace)/@conversation/features/ChatInput/useSend.ts +9 -4
- package/src/components/ModelSelect/index.tsx +14 -2
- package/src/features/ChatInput/ActionBar/Upload/ClientMode.tsx +7 -0
- package/src/features/ChatInput/ActionBar/Upload/ServerMode.tsx +29 -3
- package/src/features/ChatInput/components/UploadDetail/UploadStatus.tsx +1 -1
- package/src/features/Conversation/Messages/Assistant/index.tsx +4 -1
- package/src/features/Conversation/Messages/User/VideoFileListViewer.tsx +31 -0
- package/src/features/Conversation/Messages/User/index.tsx +3 -1
- package/src/hooks/useModelSupportVideo.ts +10 -0
- package/src/locales/default/chat.ts +4 -0
- package/src/locales/default/components.ts +1 -0
- package/src/services/chat/contextEngineering.test.ts +0 -1
- package/src/services/chat/contextEngineering.ts +3 -1
- package/src/services/chat/helper.ts +4 -0
- package/src/services/upload.ts +1 -1
- package/src/store/aiInfra/slices/aiModel/selectors.ts +7 -0
- package/src/store/chat/slices/aiChat/actions/generateAIChatV2.ts +22 -0
- package/src/store/chat/slices/message/action.ts +15 -14
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
# Changelog
|
|
4
4
|
|
|
5
|
+
## [Version 1.132.0](https://github.com/lobehub/lobe-chat/compare/v1.131.4...v1.132.0)
|
|
6
|
+
|
|
7
|
+
<sup>Released on **2025-09-21**</sup>
|
|
8
|
+
|
|
9
|
+
#### ✨ Features
|
|
10
|
+
|
|
11
|
+
- **misc**: Support google video understanding.
|
|
12
|
+
|
|
13
|
+
<br/>
|
|
14
|
+
|
|
15
|
+
<details>
|
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
|
17
|
+
|
|
18
|
+
#### What's improved
|
|
19
|
+
|
|
20
|
+
- **misc**: Support google video understanding, closes [#8761](https://github.com/lobehub/lobe-chat/issues/8761) ([f02d43b](https://github.com/lobehub/lobe-chat/commit/f02d43b))
|
|
21
|
+
|
|
22
|
+
</details>
|
|
23
|
+
|
|
24
|
+
<div align="right">
|
|
25
|
+
|
|
26
|
+
[](#readme-top)
|
|
27
|
+
|
|
28
|
+
</div>
|
|
29
|
+
|
|
5
30
|
### [Version 1.131.4](https://github.com/lobehub/lobe-chat/compare/v1.131.3...v1.131.4)
|
|
6
31
|
|
|
7
32
|
<sup>Released on **2025-09-21**</sup>
|
package/changelog/v1.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lobehub/chat",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.132.0",
|
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"framework",
|
|
@@ -17,6 +17,8 @@ export interface FileContextConfig {
|
|
|
17
17
|
export interface MessageContentConfig {
|
|
18
18
|
/** File context configuration */
|
|
19
19
|
fileContext?: FileContextConfig;
|
|
20
|
+
/** Function to check if video is supported */
|
|
21
|
+
isCanUseVideo?: (model: string, provider: string) => boolean | undefined;
|
|
20
22
|
/** Function to check if vision is supported */
|
|
21
23
|
isCanUseVision?: (model: string, provider: string) => boolean | undefined;
|
|
22
24
|
/** Model name */
|
|
@@ -33,7 +35,10 @@ export interface UserMessageContentPart {
|
|
|
33
35
|
signature?: string;
|
|
34
36
|
text?: string;
|
|
35
37
|
thinking?: string;
|
|
36
|
-
type: 'text' | 'image_url' | 'thinking';
|
|
38
|
+
type: 'text' | 'image_url' | 'thinking' | 'video_url';
|
|
39
|
+
video_url?: {
|
|
40
|
+
url: string;
|
|
41
|
+
};
|
|
37
42
|
}
|
|
38
43
|
|
|
39
44
|
/**
|
|
@@ -104,12 +109,13 @@ export class MessageContentProcessor extends BaseProcessor {
|
|
|
104
109
|
* Process user message content
|
|
105
110
|
*/
|
|
106
111
|
private async processUserMessage(message: any): Promise<any> {
|
|
107
|
-
// Check if images or files need processing
|
|
112
|
+
// Check if images, videos or files need processing
|
|
108
113
|
const hasImages = message.imageList && message.imageList.length > 0;
|
|
114
|
+
const hasVideos = message.videoList && message.videoList.length > 0;
|
|
109
115
|
const hasFiles = message.fileList && message.fileList.length > 0;
|
|
110
116
|
|
|
111
|
-
// If no images and files, return plain text content directly
|
|
112
|
-
if (!hasImages && !hasFiles) {
|
|
117
|
+
// If no images, videos and files, return plain text content directly
|
|
118
|
+
if (!hasImages && !hasVideos && !hasFiles) {
|
|
113
119
|
return {
|
|
114
120
|
...message,
|
|
115
121
|
content: message.content,
|
|
@@ -121,12 +127,13 @@ export class MessageContentProcessor extends BaseProcessor {
|
|
|
121
127
|
// Add text content
|
|
122
128
|
let textContent = message.content || '';
|
|
123
129
|
|
|
124
|
-
// Add file context (if file context is enabled and has files or
|
|
125
|
-
if ((hasFiles || hasImages) && this.config.fileContext?.enabled) {
|
|
130
|
+
// Add file context (if file context is enabled and has files, images or videos)
|
|
131
|
+
if ((hasFiles || hasImages || hasVideos) && this.config.fileContext?.enabled) {
|
|
126
132
|
const filesContext = filesPrompts({
|
|
127
133
|
addUrl: this.config.fileContext.includeFileUrl ?? true,
|
|
128
134
|
fileList: message.fileList,
|
|
129
|
-
imageList: message.imageList,
|
|
135
|
+
imageList: message.imageList || [],
|
|
136
|
+
videoList: message.videoList || [],
|
|
130
137
|
});
|
|
131
138
|
|
|
132
139
|
if (filesContext) {
|
|
@@ -148,17 +155,26 @@ export class MessageContentProcessor extends BaseProcessor {
|
|
|
148
155
|
contentParts.push(...imageContentParts);
|
|
149
156
|
}
|
|
150
157
|
|
|
158
|
+
// Process video content
|
|
159
|
+
if (hasVideos && this.config.isCanUseVideo?.(this.config.model, this.config.provider)) {
|
|
160
|
+
const videoContentParts = await this.processVideoList(message.videoList || []);
|
|
161
|
+
contentParts.push(...videoContentParts);
|
|
162
|
+
}
|
|
163
|
+
|
|
151
164
|
// 明确返回的字段,只保留必要的消息字段
|
|
152
|
-
const hasFileContext = (hasFiles || hasImages) && this.config.fileContext?.enabled;
|
|
165
|
+
const hasFileContext = (hasFiles || hasImages || hasVideos) && this.config.fileContext?.enabled;
|
|
153
166
|
const hasVisionContent =
|
|
154
167
|
hasImages && this.config.isCanUseVision?.(this.config.model, this.config.provider);
|
|
168
|
+
const hasVideoContent =
|
|
169
|
+
hasVideos && this.config.isCanUseVideo?.(this.config.model, this.config.provider);
|
|
155
170
|
|
|
156
|
-
//
|
|
171
|
+
// 如果只有文本内容且没有添加文件上下文也没有视觉/视频内容,返回纯文本
|
|
157
172
|
if (
|
|
158
173
|
contentParts.length === 1 &&
|
|
159
174
|
contentParts[0].type === 'text' &&
|
|
160
175
|
!hasFileContext &&
|
|
161
|
-
!hasVisionContent
|
|
176
|
+
!hasVisionContent &&
|
|
177
|
+
!hasVideoContent
|
|
162
178
|
) {
|
|
163
179
|
return {
|
|
164
180
|
content: contentParts[0].text,
|
|
@@ -274,6 +290,22 @@ export class MessageContentProcessor extends BaseProcessor {
|
|
|
274
290
|
);
|
|
275
291
|
}
|
|
276
292
|
|
|
293
|
+
/**
|
|
294
|
+
* 处理视频列表
|
|
295
|
+
*/
|
|
296
|
+
private async processVideoList(videoList: any[]): Promise<UserMessageContentPart[]> {
|
|
297
|
+
if (!videoList || videoList.length === 0) {
|
|
298
|
+
return [];
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return videoList.map((video) => {
|
|
302
|
+
return {
|
|
303
|
+
type: 'video_url',
|
|
304
|
+
video_url: { url: video.url },
|
|
305
|
+
} as UserMessageContentPart;
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
277
309
|
/**
|
|
278
310
|
* 验证内容部分格式
|
|
279
311
|
*/
|
|
@@ -290,6 +322,9 @@ export class MessageContentProcessor extends BaseProcessor {
|
|
|
290
322
|
case 'thinking': {
|
|
291
323
|
return !!(part.thinking && part.signature);
|
|
292
324
|
}
|
|
325
|
+
case 'video_url': {
|
|
326
|
+
return !!(part.video_url && part.video_url.url);
|
|
327
|
+
}
|
|
293
328
|
default: {
|
|
294
329
|
return false;
|
|
295
330
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ChatImageItem, ChatMessage } from '@lobechat/types';
|
|
1
|
+
import { ChatImageItem, ChatMessage, ChatVideoItem } from '@lobechat/types';
|
|
2
2
|
import { describe, expect, it, vi } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import type { PipelineContext } from '../../types';
|
|
@@ -26,6 +26,7 @@ const createContext = (messages: ChatMessage[]): PipelineContext => ({
|
|
|
26
26
|
});
|
|
27
27
|
|
|
28
28
|
const mockIsCanUseVision = vi.fn();
|
|
29
|
+
const mockIsCanUseVideo = vi.fn();
|
|
29
30
|
|
|
30
31
|
describe('MessageContentProcessor', () => {
|
|
31
32
|
describe('Image processing functionality', () => {
|
|
@@ -391,4 +392,181 @@ describe('MessageContentProcessor', () => {
|
|
|
391
392
|
expect(result.metadata.assistantMessagesProcessed).toBe(1);
|
|
392
393
|
});
|
|
393
394
|
});
|
|
395
|
+
|
|
396
|
+
describe('Video processing functionality', () => {
|
|
397
|
+
it('should return empty video content parts if model cannot use video', async () => {
|
|
398
|
+
mockIsCanUseVideo.mockReturnValue(false);
|
|
399
|
+
|
|
400
|
+
const processor = new MessageContentProcessor({
|
|
401
|
+
model: 'any-model',
|
|
402
|
+
provider: 'any-provider',
|
|
403
|
+
isCanUseVideo: mockIsCanUseVideo,
|
|
404
|
+
fileContext: { enabled: false },
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
const messages: ChatMessage[] = [
|
|
408
|
+
{
|
|
409
|
+
id: 'test',
|
|
410
|
+
role: 'user',
|
|
411
|
+
content: 'Hello',
|
|
412
|
+
videoList: [{ url: 'video_url', alt: 'test video', id: 'test' } as ChatVideoItem],
|
|
413
|
+
createdAt: Date.now(),
|
|
414
|
+
updatedAt: Date.now(),
|
|
415
|
+
meta: {},
|
|
416
|
+
},
|
|
417
|
+
];
|
|
418
|
+
|
|
419
|
+
const result = await processor.process(createContext(messages));
|
|
420
|
+
|
|
421
|
+
// Should return plain text when video is not supported
|
|
422
|
+
expect(result.messages[0].content).toBe('Hello');
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
it('should process videos if model can use video', async () => {
|
|
426
|
+
mockIsCanUseVideo.mockReturnValue(true);
|
|
427
|
+
|
|
428
|
+
const processor = new MessageContentProcessor({
|
|
429
|
+
model: 'gpt-4-vision',
|
|
430
|
+
provider: 'openai',
|
|
431
|
+
isCanUseVideo: mockIsCanUseVideo,
|
|
432
|
+
fileContext: { enabled: false },
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
const messages: ChatMessage[] = [
|
|
436
|
+
{
|
|
437
|
+
id: 'test',
|
|
438
|
+
role: 'user',
|
|
439
|
+
content: 'Hello',
|
|
440
|
+
videoList: [
|
|
441
|
+
{ url: 'http://example.com/video.mp4', alt: 'test video', id: 'test1' },
|
|
442
|
+
{ url: 'http://example.com/video2.mp4', alt: 'test video 2', id: 'test2' },
|
|
443
|
+
] as ChatVideoItem[],
|
|
444
|
+
createdAt: Date.now(),
|
|
445
|
+
updatedAt: Date.now(),
|
|
446
|
+
meta: {},
|
|
447
|
+
},
|
|
448
|
+
];
|
|
449
|
+
|
|
450
|
+
const result = await processor.process(createContext(messages));
|
|
451
|
+
|
|
452
|
+
const content = result.messages[0].content as any[];
|
|
453
|
+
expect(content).toHaveLength(3); // text + 2 videos
|
|
454
|
+
expect(content[0].type).toBe('text');
|
|
455
|
+
expect(content[0].text).toBe('Hello');
|
|
456
|
+
expect(content[1].type).toBe('video_url');
|
|
457
|
+
expect(content[1].video_url.url).toBe('http://example.com/video.mp4');
|
|
458
|
+
expect(content[2].type).toBe('video_url');
|
|
459
|
+
expect(content[2].video_url.url).toBe('http://example.com/video2.mp4');
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
it('should handle video disabled scenario correctly', async () => {
|
|
463
|
+
mockIsCanUseVideo.mockReturnValue(false);
|
|
464
|
+
|
|
465
|
+
const processor = new MessageContentProcessor({
|
|
466
|
+
model: 'text-model',
|
|
467
|
+
provider: 'openai',
|
|
468
|
+
isCanUseVideo: mockIsCanUseVideo,
|
|
469
|
+
fileContext: { enabled: false },
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
const messages: ChatMessage[] = [
|
|
473
|
+
{
|
|
474
|
+
id: 'test',
|
|
475
|
+
role: 'user',
|
|
476
|
+
content: 'Analyze this video',
|
|
477
|
+
videoList: [
|
|
478
|
+
{ url: 'http://example.com/video.mp4', alt: 'test video', id: 'test' },
|
|
479
|
+
] as ChatVideoItem[],
|
|
480
|
+
createdAt: Date.now(),
|
|
481
|
+
updatedAt: Date.now(),
|
|
482
|
+
meta: {},
|
|
483
|
+
},
|
|
484
|
+
];
|
|
485
|
+
|
|
486
|
+
const result = await processor.process(createContext(messages));
|
|
487
|
+
|
|
488
|
+
// Should return plain text only when video not supported
|
|
489
|
+
expect(result.messages[0].content).toBe('Analyze this video');
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
it('should include videos in file context when enabled', async () => {
|
|
493
|
+
mockIsCanUseVideo.mockReturnValue(false); // Video processing disabled but file context enabled
|
|
494
|
+
|
|
495
|
+
const processor = new MessageContentProcessor({
|
|
496
|
+
model: 'gpt-4',
|
|
497
|
+
provider: 'openai',
|
|
498
|
+
isCanUseVideo: mockIsCanUseVideo,
|
|
499
|
+
fileContext: { enabled: true, includeFileUrl: true },
|
|
500
|
+
});
|
|
501
|
+
|
|
502
|
+
const messages: ChatMessage[] = [
|
|
503
|
+
{
|
|
504
|
+
id: 'test',
|
|
505
|
+
role: 'user',
|
|
506
|
+
content: 'Hello',
|
|
507
|
+
videoList: [
|
|
508
|
+
{
|
|
509
|
+
id: 'video1',
|
|
510
|
+
url: 'http://example.com/video.mp4',
|
|
511
|
+
alt: 'Test video',
|
|
512
|
+
},
|
|
513
|
+
] as ChatVideoItem[],
|
|
514
|
+
createdAt: Date.now(),
|
|
515
|
+
updatedAt: Date.now(),
|
|
516
|
+
meta: {},
|
|
517
|
+
},
|
|
518
|
+
];
|
|
519
|
+
|
|
520
|
+
const result = await processor.process(createContext(messages));
|
|
521
|
+
|
|
522
|
+
// Should return structured content when has videos and file context enabled
|
|
523
|
+
expect(Array.isArray(result.messages[0].content)).toBe(true);
|
|
524
|
+
const content = result.messages[0].content as any[];
|
|
525
|
+
expect(content).toHaveLength(1);
|
|
526
|
+
expect(content[0].type).toBe('text');
|
|
527
|
+
expect(content[0].text).toContain('SYSTEM CONTEXT');
|
|
528
|
+
expect(content[0].text).toContain('Hello');
|
|
529
|
+
});
|
|
530
|
+
|
|
531
|
+
it('should handle mixed images and videos correctly', async () => {
|
|
532
|
+
mockIsCanUseVision.mockReturnValue(true);
|
|
533
|
+
mockIsCanUseVideo.mockReturnValue(true);
|
|
534
|
+
|
|
535
|
+
const processor = new MessageContentProcessor({
|
|
536
|
+
model: 'gpt-4-vision',
|
|
537
|
+
provider: 'openai',
|
|
538
|
+
isCanUseVideo: mockIsCanUseVideo,
|
|
539
|
+
isCanUseVision: mockIsCanUseVision,
|
|
540
|
+
fileContext: { enabled: false },
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
const messages: ChatMessage[] = [
|
|
544
|
+
{
|
|
545
|
+
id: 'test',
|
|
546
|
+
role: 'user',
|
|
547
|
+
content: 'Analyze these media files',
|
|
548
|
+
imageList: [
|
|
549
|
+
{ url: 'http://example.com/image.jpg', alt: 'test image', id: 'img1' },
|
|
550
|
+
] as ChatImageItem[],
|
|
551
|
+
videoList: [
|
|
552
|
+
{ url: 'http://example.com/video.mp4', alt: 'test video', id: 'vid1' },
|
|
553
|
+
] as ChatVideoItem[],
|
|
554
|
+
createdAt: Date.now(),
|
|
555
|
+
updatedAt: Date.now(),
|
|
556
|
+
meta: {},
|
|
557
|
+
},
|
|
558
|
+
];
|
|
559
|
+
|
|
560
|
+
const result = await processor.process(createContext(messages));
|
|
561
|
+
|
|
562
|
+
const content = result.messages[0].content as any[];
|
|
563
|
+
expect(content).toHaveLength(3); // text + image + video
|
|
564
|
+
expect(content[0].type).toBe('text');
|
|
565
|
+
expect(content[0].text).toBe('Analyze these media files');
|
|
566
|
+
expect(content[1].type).toBe('image_url');
|
|
567
|
+
expect(content[1].image_url.url).toBe('http://example.com/image.jpg');
|
|
568
|
+
expect(content[2].type).toBe('video_url');
|
|
569
|
+
expect(content[2].video_url.url).toBe('http://example.com/video.mp4');
|
|
570
|
+
});
|
|
571
|
+
});
|
|
394
572
|
});
|
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
ChatTTS,
|
|
6
6
|
ChatToolPayload,
|
|
7
7
|
ChatTranslate,
|
|
8
|
+
ChatVideoItem,
|
|
8
9
|
CreateMessageParams,
|
|
9
10
|
MessageItem,
|
|
10
11
|
ModelRankItem,
|
|
@@ -175,7 +176,10 @@ export class MessageModel {
|
|
|
175
176
|
}
|
|
176
177
|
|
|
177
178
|
const imageList = relatedFileList.filter((i) => (i.fileType || '').startsWith('image'));
|
|
178
|
-
const
|
|
179
|
+
const videoList = relatedFileList.filter((i) => (i.fileType || '').startsWith('video'));
|
|
180
|
+
const fileList = relatedFileList.filter(
|
|
181
|
+
(i) => !(i.fileType || '').startsWith('image') && !(i.fileType || '').startsWith('video'),
|
|
182
|
+
);
|
|
179
183
|
|
|
180
184
|
// 3. get relative file chunks
|
|
181
185
|
const chunksList = await this.db
|
|
@@ -251,6 +255,10 @@ export class MessageModel {
|
|
|
251
255
|
ragQuery: messageQuery?.rewriteQuery,
|
|
252
256
|
ragQueryId: messageQuery?.id,
|
|
253
257
|
ragRawQuery: messageQuery?.userQuery,
|
|
258
|
+
videoList: videoList
|
|
259
|
+
.filter((relation) => relation.messageId === item.id)
|
|
260
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
261
|
+
.map<ChatVideoItem>(({ id, url, name }) => ({ alt: name!, id, url })),
|
|
254
262
|
} as unknown as ChatMessage;
|
|
255
263
|
},
|
|
256
264
|
);
|
|
@@ -7,6 +7,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
7
7
|
functionCall: true,
|
|
8
8
|
reasoning: true,
|
|
9
9
|
search: true,
|
|
10
|
+
video: true,
|
|
10
11
|
vision: true,
|
|
11
12
|
},
|
|
12
13
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -60,6 +61,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
60
61
|
functionCall: true,
|
|
61
62
|
reasoning: true,
|
|
62
63
|
search: true,
|
|
64
|
+
video: true,
|
|
63
65
|
vision: true,
|
|
64
66
|
},
|
|
65
67
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -112,6 +114,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
112
114
|
functionCall: true,
|
|
113
115
|
reasoning: true,
|
|
114
116
|
search: true,
|
|
117
|
+
video: true,
|
|
115
118
|
vision: true,
|
|
116
119
|
},
|
|
117
120
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -163,6 +166,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
163
166
|
functionCall: true,
|
|
164
167
|
reasoning: true,
|
|
165
168
|
search: true,
|
|
169
|
+
video: true,
|
|
166
170
|
vision: true,
|
|
167
171
|
},
|
|
168
172
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -191,6 +195,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
191
195
|
functionCall: true,
|
|
192
196
|
reasoning: true,
|
|
193
197
|
search: true,
|
|
198
|
+
video: true,
|
|
194
199
|
vision: true,
|
|
195
200
|
},
|
|
196
201
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -240,6 +245,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
240
245
|
functionCall: true,
|
|
241
246
|
reasoning: true,
|
|
242
247
|
search: true,
|
|
248
|
+
video: true,
|
|
243
249
|
vision: true,
|
|
244
250
|
},
|
|
245
251
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -267,6 +273,7 @@ const googleChatModels: AIChatModelCard[] = [
|
|
|
267
273
|
functionCall: true,
|
|
268
274
|
reasoning: true,
|
|
269
275
|
search: true,
|
|
276
|
+
video: true,
|
|
270
277
|
vision: true,
|
|
271
278
|
},
|
|
272
279
|
contextWindowTokens: 1_048_576 + 65_536,
|
|
@@ -439,10 +439,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
|
|
|
439
439
|
}
|
|
440
440
|
|
|
441
441
|
return {
|
|
442
|
-
inlineData: {
|
|
443
|
-
data: base64,
|
|
444
|
-
mimeType: mimeType || 'image/png',
|
|
445
|
-
},
|
|
442
|
+
inlineData: { data: base64, mimeType: mimeType || 'image/png' },
|
|
446
443
|
};
|
|
447
444
|
}
|
|
448
445
|
|
|
@@ -450,15 +447,41 @@ export class LobeGoogleAI implements LobeRuntimeAI {
|
|
|
450
447
|
const { base64, mimeType } = await imageUrlToBase64(content.image_url.url);
|
|
451
448
|
|
|
452
449
|
return {
|
|
453
|
-
inlineData: {
|
|
454
|
-
data: base64,
|
|
455
|
-
mimeType,
|
|
456
|
-
},
|
|
450
|
+
inlineData: { data: base64, mimeType },
|
|
457
451
|
};
|
|
458
452
|
}
|
|
459
453
|
|
|
460
454
|
throw new TypeError(`currently we don't support image url: ${content.image_url.url}`);
|
|
461
455
|
}
|
|
456
|
+
|
|
457
|
+
case 'video_url': {
|
|
458
|
+
const { mimeType, base64, type } = parseDataUri(content.video_url.url);
|
|
459
|
+
|
|
460
|
+
if (type === 'base64') {
|
|
461
|
+
if (!base64) {
|
|
462
|
+
throw new TypeError("Video URL doesn't contain base64 data");
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
inlineData: { data: base64, mimeType: mimeType || 'video/mp4' },
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (type === 'url') {
|
|
471
|
+
// For video URLs, we need to fetch and convert to base64
|
|
472
|
+
// Note: This might need size/duration limits for practical use
|
|
473
|
+
const response = await fetch(content.video_url.url);
|
|
474
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
475
|
+
const base64 = Buffer.from(arrayBuffer).toString('base64');
|
|
476
|
+
const mimeType = response.headers.get('content-type') || 'video/mp4';
|
|
477
|
+
|
|
478
|
+
return {
|
|
479
|
+
inlineData: { data: base64, mimeType },
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
throw new TypeError(`currently we don't support video url: ${content.video_url.url}`);
|
|
484
|
+
}
|
|
462
485
|
}
|
|
463
486
|
};
|
|
464
487
|
|
|
@@ -21,9 +21,15 @@ interface UserMessageContentPartImage {
|
|
|
21
21
|
type: 'image_url';
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
interface UserMessageContentPartVideo {
|
|
25
|
+
type: 'video_url';
|
|
26
|
+
video_url: { url: string };
|
|
27
|
+
}
|
|
28
|
+
|
|
24
29
|
export type UserMessageContentPart =
|
|
25
30
|
| UserMessageContentPartText
|
|
26
31
|
| UserMessageContentPartImage
|
|
32
|
+
| UserMessageContentPartVideo
|
|
27
33
|
| UserMessageContentPartThinking;
|
|
28
34
|
|
|
29
35
|
export interface OpenAIChatMessage {
|