@lobehub/chat 1.71.0 → 1.71.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +58 -0
  2. package/changelog/v1.json +21 -0
  3. package/locales/ar/components.json +1 -0
  4. package/locales/ar/models.json +3 -0
  5. package/locales/bg-BG/components.json +1 -0
  6. package/locales/bg-BG/models.json +3 -0
  7. package/locales/de-DE/components.json +1 -0
  8. package/locales/de-DE/models.json +3 -0
  9. package/locales/en-US/components.json +1 -0
  10. package/locales/en-US/models.json +3 -0
  11. package/locales/es-ES/components.json +1 -0
  12. package/locales/es-ES/models.json +3 -0
  13. package/locales/fa-IR/components.json +1 -0
  14. package/locales/fa-IR/models.json +3 -0
  15. package/locales/fr-FR/components.json +1 -0
  16. package/locales/fr-FR/models.json +3 -0
  17. package/locales/it-IT/components.json +1 -0
  18. package/locales/it-IT/models.json +3 -0
  19. package/locales/ja-JP/components.json +1 -0
  20. package/locales/ja-JP/models.json +3 -0
  21. package/locales/ko-KR/components.json +1 -0
  22. package/locales/ko-KR/models.json +3 -0
  23. package/locales/nl-NL/components.json +1 -0
  24. package/locales/nl-NL/models.json +3 -0
  25. package/locales/pl-PL/components.json +1 -0
  26. package/locales/pl-PL/models.json +3 -0
  27. package/locales/pt-BR/components.json +1 -0
  28. package/locales/pt-BR/models.json +3 -0
  29. package/locales/ru-RU/components.json +1 -0
  30. package/locales/ru-RU/models.json +3 -0
  31. package/locales/tr-TR/components.json +1 -0
  32. package/locales/tr-TR/models.json +3 -0
  33. package/locales/vi-VN/components.json +1 -0
  34. package/locales/vi-VN/models.json +3 -0
  35. package/locales/zh-CN/components.json +1 -0
  36. package/locales/zh-CN/models.json +3 -0
  37. package/locales/zh-TW/components.json +1 -0
  38. package/locales/zh-TW/models.json +3 -0
  39. package/package.json +1 -1
  40. package/src/components/ModelSelect/index.tsx +16 -0
  41. package/src/config/aiModels/google.ts +19 -0
  42. package/src/config/aiModels/vertexai.ts +24 -6
  43. package/src/config/modelProviders/vertexai.ts +1 -1
  44. package/src/const/settings/agent.ts +3 -4
  45. package/src/database/server/models/__tests__/chunk.test.ts +4 -4
  46. package/src/database/server/models/chunk.ts +2 -1
  47. package/src/libs/agent-runtime/anthropic/index.ts +10 -1
  48. package/src/libs/agent-runtime/google/index.ts +25 -10
  49. package/src/libs/agent-runtime/runtimeMap.ts +47 -48
  50. package/src/libs/agent-runtime/utils/streams/google-ai.test.ts +90 -0
  51. package/src/libs/agent-runtime/utils/streams/google-ai.ts +2 -1
  52. package/src/libs/agent-runtime/utils/streams/index.ts +1 -0
  53. package/src/locales/default/components.ts +1 -0
  54. package/src/server/routers/lambda/agent.ts +10 -7
  55. package/src/server/routers/lambda/chunk.ts +65 -49
  56. package/src/store/chat/slices/aiChat/actions/rag.ts +18 -12
  57. package/src/types/aiModel.ts +4 -1
@@ -495,13 +495,13 @@ content in Table html is below:
495
495
  });
496
496
 
497
497
  // 测试结果限制
498
- it('should limit results to 5 items', async () => {
498
+ it('should limit results to 15 items', async () => {
499
499
  const fileId = '1';
500
- // Create 6 chunks
500
+ // Create 24 chunks
501
501
  const chunkResult = await serverDB
502
502
  .insert(chunks)
503
503
  .values(
504
- Array(6)
504
+ Array(24)
505
505
  .fill(0)
506
506
  .map((_, i) => ({ text: `Test Chunk ${i}`, userId })),
507
507
  )
@@ -528,7 +528,7 @@ content in Table html is below:
528
528
  query: 'test',
529
529
  });
530
530
 
531
- expect(result).toHaveLength(5);
531
+ expect(result).toHaveLength(15);
532
532
  });
533
533
  });
534
534
  });
@@ -207,7 +207,8 @@ export class ChunkModel {
207
207
  .leftJoin(files, eq(files.id, fileChunks.fileId))
208
208
  .where(inArray(fileChunks.fileId, fileIds))
209
209
  .orderBy((t) => desc(t.similarity))
210
- .limit(5);
210
+ // 先放宽到 15
211
+ .limit(15);
211
212
 
212
213
  return result.map((item) => {
213
214
  return {
@@ -38,6 +38,10 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
38
38
  apiKey?: string;
39
39
  private id: string;
40
40
 
41
+ private isDebug() {
42
+ return process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1';
43
+ }
44
+
41
45
  constructor({ apiKey, baseURL = DEFAULT_BASE_URL, id, ...res }: AnthropicAIParams = {}) {
42
46
  if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
43
47
 
@@ -51,6 +55,11 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
51
55
  try {
52
56
  const anthropicPayload = await this.buildAnthropicPayload(payload);
53
57
 
58
+ if (this.isDebug()) {
59
+ console.log('[requestPayload]');
60
+ console.log(JSON.stringify(anthropicPayload), '\n');
61
+ }
62
+
54
63
  const response = await this.client.messages.create(
55
64
  { ...anthropicPayload, stream: true },
56
65
  {
@@ -60,7 +69,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
60
69
 
61
70
  const [prod, debug] = response.tee();
62
71
 
63
- if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') {
72
+ if (this.isDebug()) {
64
73
  debugStream(debug.toReadableStream()).catch(console.error);
65
74
  }
66
75
 
@@ -10,7 +10,6 @@ import {
10
10
  SchemaType,
11
11
  } from '@google/generative-ai';
12
12
 
13
- import { VertexAIStream } from '@/libs/agent-runtime/utils/streams/vertex-ai';
14
13
  import type { ChatModelCard } from '@/types/llm';
15
14
  import { imageUrlToBase64 } from '@/utils/imageToBase64';
16
15
  import { safeParseJSON } from '@/utils/safeParseJSON';
@@ -24,15 +23,27 @@ import {
24
23
  OpenAIChatMessage,
25
24
  UserMessageContentPart,
26
25
  } from '../types';
27
- import { ModelProvider } from '../types/type';
28
26
  import { AgentRuntimeError } from '../utils/createError';
29
27
  import { debugStream } from '../utils/debugStream';
30
28
  import { StreamingResponse } from '../utils/response';
31
- import { GoogleGenerativeAIStream, convertIterableToStream } from '../utils/streams';
29
+ import {
30
+ GoogleGenerativeAIStream,
31
+ VertexAIStream,
32
+ convertIterableToStream,
33
+ } from '../utils/streams';
32
34
  import { parseDataUri } from '../utils/uriParser';
33
35
 
34
36
  const modelsOffSafetySettings = new Set(['gemini-2.0-flash-exp']);
35
- const modelsWithModalities = new Set(['gemini-2.0-flash-exp']);
37
+
38
+ const modelsWithModalities = new Set([
39
+ 'gemini-2.0-flash-exp',
40
+ 'gemini-2.0-flash-exp-image-generation',
41
+ ]);
42
+
43
+ const modelsDisableInstuction = new Set([
44
+ 'gemini-2.0-flash-exp',
45
+ 'gemini-2.0-flash-exp-image-generation',
46
+ ]);
36
47
 
37
48
  export interface GoogleModelCard {
38
49
  displayName: string;
@@ -65,6 +76,7 @@ interface LobeGoogleAIParams {
65
76
  apiKey?: string;
66
77
  baseURL?: string;
67
78
  client?: GoogleGenerativeAI | VertexAI;
79
+ id?: string;
68
80
  isVertexAi?: boolean;
69
81
  }
70
82
 
@@ -73,8 +85,9 @@ export class LobeGoogleAI implements LobeRuntimeAI {
73
85
  private isVertexAi: boolean;
74
86
  baseURL?: string;
75
87
  apiKey?: string;
88
+ provider: string;
76
89
 
77
- constructor({ apiKey, baseURL, client, isVertexAi }: LobeGoogleAIParams = {}) {
90
+ constructor({ apiKey, baseURL, client, isVertexAi, id }: LobeGoogleAIParams = {}) {
78
91
  if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
79
92
 
80
93
  this.client = new GoogleGenerativeAI(apiKey);
@@ -82,6 +95,8 @@ export class LobeGoogleAI implements LobeRuntimeAI {
82
95
  this.client = client ? (client as GoogleGenerativeAI) : new GoogleGenerativeAI(apiKey);
83
96
  this.baseURL = client ? undefined : baseURL || DEFAULT_BASE_URL;
84
97
  this.isVertexAi = isVertexAi || false;
98
+
99
+ this.provider = id || (isVertexAi ? 'vertexai' : 'google');
85
100
  }
86
101
 
87
102
  async chat(rawPayload: ChatStreamPayload, options?: ChatCompetitionOptions) {
@@ -97,9 +112,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
97
112
  generationConfig: {
98
113
  maxOutputTokens: payload.max_tokens,
99
114
  // @ts-expect-error - Google SDK 0.24.0 doesn't have this property for now with
100
- response_modalities: modelsWithModalities.has(model)
101
- ? ['Text', 'Image']
102
- : undefined,
115
+ response_modalities: modelsWithModalities.has(model) ? ['Text', 'Image'] : undefined,
103
116
  temperature: payload.temperature,
104
117
  topP: payload.top_p,
105
118
  },
@@ -129,7 +142,9 @@ export class LobeGoogleAI implements LobeRuntimeAI {
129
142
  )
130
143
  .generateContentStream({
131
144
  contents,
132
- systemInstruction: payload.system as string,
145
+ systemInstruction: modelsDisableInstuction.has(model)
146
+ ? undefined
147
+ : (payload.system as string),
133
148
  tools: this.buildGoogleTools(payload.tools, payload),
134
149
  });
135
150
 
@@ -156,7 +171,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
156
171
  console.log(err);
157
172
  const { errorType, error } = this.parseErrorMessage(err.message);
158
173
 
159
- throw AgentRuntimeError.chat({ error, errorType, provider: ModelProvider.Google });
174
+ throw AgentRuntimeError.chat({ error, errorType, provider: this.provider });
160
175
  }
161
176
  }
162
177
 
@@ -37,7 +37,6 @@ import { LobeStepfunAI } from './stepfun';
37
37
  import { LobeTaichuAI } from './taichu';
38
38
  import { LobeTencentCloudAI } from './tencentcloud';
39
39
  import { LobeTogetherAI } from './togetherai';
40
- import { ModelProvider } from './types';
41
40
  import { LobeUpstageAI } from './upstage';
42
41
  import { LobeVLLMAI } from './vllm';
43
42
  import { LobeVolcengineAI } from './volcengine';
@@ -47,51 +46,51 @@ import { LobeZeroOneAI } from './zeroone';
47
46
  import { LobeZhipuAI } from './zhipu';
48
47
 
49
48
  export const providerRuntimeMap = {
50
- [ModelProvider.OpenAI]: LobeOpenAI,
51
- [ModelProvider.Azure]: LobeAzureOpenAI,
52
- [ModelProvider.AzureAI]: LobeAzureAI,
53
- [ModelProvider.ZhiPu]: LobeZhipuAI,
54
- [ModelProvider.Google]: LobeGoogleAI,
55
- [ModelProvider.Moonshot]: LobeMoonshotAI,
56
- [ModelProvider.Bedrock]: LobeBedrockAI,
57
- [ModelProvider.LMStudio]: LobeLMStudioAI,
58
- [ModelProvider.Ollama]: LobeOllamaAI,
59
- [ModelProvider.VLLM]: LobeVLLMAI,
60
- [ModelProvider.Perplexity]: LobePerplexityAI,
61
- [ModelProvider.Anthropic]: LobeAnthropicAI,
62
- [ModelProvider.DeepSeek]: LobeDeepSeekAI,
63
- [ModelProvider.HuggingFace]: LobeHuggingFaceAI,
64
- [ModelProvider.Minimax]: LobeMinimaxAI,
65
- [ModelProvider.Mistral]: LobeMistralAI,
66
- [ModelProvider.Groq]: LobeGroq,
67
- [ModelProvider.Github]: LobeGithubAI,
68
- [ModelProvider.OpenRouter]: LobeOpenRouterAI,
69
- [ModelProvider.TogetherAI]: LobeTogetherAI,
70
- [ModelProvider.FireworksAI]: LobeFireworksAI,
71
- [ModelProvider.ZeroOne]: LobeZeroOneAI,
72
- [ModelProvider.Stepfun]: LobeStepfunAI,
73
- [ModelProvider.Qwen]: LobeQwenAI,
74
- [ModelProvider.Novita]: LobeNovitaAI,
75
- [ModelProvider.Nvidia]: LobeNvidiaAI,
76
- [ModelProvider.Taichu]: LobeTaichuAI,
77
- [ModelProvider.Baichuan]: LobeBaichuanAI,
78
- [ModelProvider.Ai360]: LobeAi360AI,
79
- [ModelProvider.SiliconCloud]: LobeSiliconCloudAI,
80
- [ModelProvider.GiteeAI]: LobeGiteeAI,
81
- [ModelProvider.Upstage]: LobeUpstageAI,
82
- [ModelProvider.Spark]: LobeSparkAI,
83
- [ModelProvider.Ai21]: LobeAi21AI,
84
- [ModelProvider.Hunyuan]: LobeHunyuanAI,
85
- [ModelProvider.SenseNova]: LobeSenseNovaAI,
86
- [ModelProvider.XAI]: LobeXAI,
87
- [ModelProvider.Jina]: LobeJinaAI,
88
- [ModelProvider.SambaNova]: LobeSambaNovaAI,
89
- [ModelProvider.Cloudflare]: LobeCloudflareAI,
90
- [ModelProvider.InternLM]: LobeInternLMAI,
91
- [ModelProvider.Higress]: LobeHigressAI,
92
- [ModelProvider.TencentCloud]: LobeTencentCloudAI,
93
- [ModelProvider.Volcengine]: LobeVolcengineAI,
94
- [ModelProvider.PPIO]: LobePPIOAI,
95
- [ModelProvider.Doubao]: LobeVolcengineAI,
96
- [ModelProvider.Wenxin]: LobeWenxinAI,
49
+ ai21: LobeAi21AI,
50
+ ai360: LobeAi360AI,
51
+ anthropic: LobeAnthropicAI,
52
+ azure: LobeAzureOpenAI,
53
+ azureai: LobeAzureAI,
54
+ baichuan: LobeBaichuanAI,
55
+ bedrock: LobeBedrockAI,
56
+ cloudflare: LobeCloudflareAI,
57
+ deepseek: LobeDeepSeekAI,
58
+ doubao: LobeVolcengineAI,
59
+ fireworksai: LobeFireworksAI,
60
+ giteeai: LobeGiteeAI,
61
+ github: LobeGithubAI,
62
+ google: LobeGoogleAI,
63
+ groq: LobeGroq,
64
+ higress: LobeHigressAI,
65
+ huggingface: LobeHuggingFaceAI,
66
+ hunyuan: LobeHunyuanAI,
67
+ internlm: LobeInternLMAI,
68
+ jina: LobeJinaAI,
69
+ lmstudio: LobeLMStudioAI,
70
+ minimax: LobeMinimaxAI,
71
+ mistral: LobeMistralAI,
72
+ moonshot: LobeMoonshotAI,
73
+ novita: LobeNovitaAI,
74
+ nvidia: LobeNvidiaAI,
75
+ ollama: LobeOllamaAI,
76
+ openai: LobeOpenAI,
77
+ openrouter: LobeOpenRouterAI,
78
+ perplexity: LobePerplexityAI,
79
+ ppio: LobePPIOAI,
80
+ qwen: LobeQwenAI,
81
+ sambanova: LobeSambaNovaAI,
82
+ sensenova: LobeSenseNovaAI,
83
+ siliconcloud: LobeSiliconCloudAI,
84
+ spark: LobeSparkAI,
85
+ stepfun: LobeStepfunAI,
86
+ taichu: LobeTaichuAI,
87
+ tencentcloud: LobeTencentCloudAI,
88
+ togetherai: LobeTogetherAI,
89
+ upstage: LobeUpstageAI,
90
+ vllm: LobeVLLMAI,
91
+ volcengine: LobeVolcengineAI,
92
+ wenxin: LobeWenxinAI,
93
+ xai: LobeXAI,
94
+ zeroone: LobeZeroOneAI,
95
+ zhipu: LobeZhipuAI,
97
96
  };
@@ -193,4 +193,94 @@ describe('GoogleGenerativeAIStream', () => {
193
193
  `data: {"inputImageTokens":258,"inputTextTokens":8,"totalInputTokens":266,"totalTokens":266}\n\n`,
194
194
  ]);
195
195
  });
196
+
197
+ it('should handle stop with content', async () => {
198
+ vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
199
+
200
+ const data = [
201
+ {
202
+ candidates: [
203
+ {
204
+ content: { parts: [{ text: '234' }], role: 'model' },
205
+ safetyRatings: [
206
+ { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
207
+ { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
208
+ { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
209
+ { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
210
+ ],
211
+ },
212
+ ],
213
+ text: () => '234',
214
+ usageMetadata: {
215
+ promptTokenCount: 20,
216
+ totalTokenCount: 20,
217
+ promptTokensDetails: [{ modality: 'TEXT', tokenCount: 20 }],
218
+ },
219
+ modelVersion: 'gemini-2.0-flash-exp-image-generation',
220
+ },
221
+ {
222
+ text: () => '567890\n',
223
+ candidates: [
224
+ {
225
+ content: { parts: [{ text: '567890\n' }], role: 'model' },
226
+ finishReason: 'STOP',
227
+ safetyRatings: [
228
+ { category: 'HARM_CATEGORY_HATE_SPEECH', probability: 'NEGLIGIBLE' },
229
+ { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', probability: 'NEGLIGIBLE' },
230
+ { category: 'HARM_CATEGORY_HARASSMENT', probability: 'NEGLIGIBLE' },
231
+ { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', probability: 'NEGLIGIBLE' },
232
+ ],
233
+ },
234
+ ],
235
+ usageMetadata: {
236
+ promptTokenCount: 19,
237
+ candidatesTokenCount: 11,
238
+ totalTokenCount: 30,
239
+ promptTokensDetails: [{ modality: 'TEXT', tokenCount: 19 }],
240
+ candidatesTokensDetails: [{ modality: 'TEXT', tokenCount: 11 }],
241
+ },
242
+ modelVersion: 'gemini-2.0-flash-exp-image-generation',
243
+ },
244
+ ];
245
+
246
+ const mockGoogleStream = new ReadableStream({
247
+ start(controller) {
248
+ data.forEach((item) => {
249
+ controller.enqueue(item);
250
+ });
251
+
252
+ controller.close();
253
+ },
254
+ });
255
+
256
+ const protocolStream = GoogleGenerativeAIStream(mockGoogleStream);
257
+
258
+ const decoder = new TextDecoder();
259
+ const chunks = [];
260
+
261
+ // @ts-ignore
262
+ for await (const chunk of protocolStream) {
263
+ chunks.push(decoder.decode(chunk, { stream: true }));
264
+ }
265
+
266
+ expect(chunks).toEqual(
267
+ [
268
+ 'id: chat_1',
269
+ 'event: text',
270
+ 'data: "234"\n',
271
+
272
+ 'id: chat_1',
273
+ 'event: text',
274
+ `data: "567890\\n"\n`,
275
+ // stop
276
+ 'id: chat_1',
277
+ 'event: stop',
278
+ `data: "STOP"\n`,
279
+ // usage
280
+ 'id: chat_1',
281
+ 'event: usage',
282
+ `data: {"inputTextTokens":19,"totalInputTokens":19,"totalOutputTokens":11,"totalTokens":30}\n`,
283
+ ].map((i) => i + '\n'),
284
+ );
285
+ });
196
286
  });
@@ -71,6 +71,7 @@ const transformGoogleGenerativeAIStream = (
71
71
  if (chunk.usageMetadata) {
72
72
  const usage = chunk.usageMetadata;
73
73
  return [
74
+ !!text ? { data: text, id: context?.id, type: 'text' } : undefined,
74
75
  { data: candidate.finishReason, id: context?.id, type: 'stop' },
75
76
  {
76
77
  data: {
@@ -88,7 +89,7 @@ const transformGoogleGenerativeAIStream = (
88
89
  id: context?.id,
89
90
  type: 'usage',
90
91
  },
91
- ];
92
+ ].filter(Boolean) as StreamProtocolChunk[];
92
93
  }
93
94
  return { data: candidate.finishReason, id: context?.id, type: 'stop' };
94
95
  }
@@ -6,3 +6,4 @@ export * from './openai';
6
6
  export * from './protocol';
7
7
  export * from './qwen';
8
8
  export * from './spark';
9
+ export * from './vertex-ai';
@@ -78,6 +78,7 @@ export default {
78
78
  custom: '自定义模型,默认设定同时支持函数调用与视觉识别,请根据实际情况验证上述能力的可用性',
79
79
  file: '该模型支持上传文件读取与识别',
80
80
  functionCall: '该模型支持函数调用(Function Call)',
81
+ imageOutput: '该模型支持生成图片',
81
82
  reasoning: '该模型支持深度思考',
82
83
  search: '该模型支持联网搜索',
83
84
  tokens: '该模型单个会话最多支持 {{tokens}} Tokens',
@@ -122,13 +122,16 @@ export const agentRouter = router({
122
122
  const knowledge = await ctx.agentModel.getAgentAssignedKnowledge(input.agentId);
123
123
 
124
124
  return [
125
- ...files.map((file) => ({
126
- enabled: knowledge.files.some((item) => item.id === file.id),
127
- fileType: file.fileType,
128
- id: file.id,
129
- name: file.name,
130
- type: KnowledgeType.File,
131
- })),
125
+ ...files
126
+ // 过滤掉所有图片
127
+ .filter((file) => !file.fileType.startsWith('image'))
128
+ .map((file) => ({
129
+ enabled: knowledge.files.some((item) => item.id === file.id),
130
+ fileType: file.fileType,
131
+ id: file.id,
132
+ name: file.name,
133
+ type: KnowledgeType.File,
134
+ })),
132
135
  ...knowledgeBases.map((knowledgeBase) => ({
133
136
  avatar: knowledgeBase.avatar,
134
137
  description: knowledgeBase.description,
@@ -1,3 +1,4 @@
1
+ import { TRPCError } from '@trpc/server';
1
2
  import { inArray } from 'drizzle-orm/expressions';
2
3
  import { z } from 'zod';
3
4
 
@@ -126,60 +127,75 @@ export const chunkRouter = router({
126
127
  semanticSearchForChat: chunkProcedure
127
128
  .input(SemanticSearchSchema)
128
129
  .mutation(async ({ ctx, input }) => {
129
- const item = await ctx.messageModel.findMessageQueriesById(input.messageId);
130
- const { model, provider } =
131
- getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
132
- let embedding: number[];
133
- let ragQueryId: string;
134
- // if there is no message rag or it's embeddings, then we need to create one
135
- if (!item || !item.embeddings) {
136
- // TODO: need to support customize
137
- const agentRuntime = await initAgentRuntimeWithUserPayload(provider, ctx.jwtPayload);
138
-
139
- const embeddings = await agentRuntime.embeddings({
140
- dimensions: 1024,
141
- input: input.rewriteQuery,
142
- model,
143
- });
144
-
145
- embedding = embeddings![0];
146
- const embeddingsId = await ctx.embeddingModel.create({
147
- embeddings: embedding,
148
- model,
130
+ try {
131
+ const item = await ctx.messageModel.findMessageQueriesById(input.messageId);
132
+ const { model, provider } =
133
+ getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
134
+ let embedding: number[];
135
+ let ragQueryId: string;
136
+
137
+ // if there is no message rag or it's embeddings, then we need to create one
138
+ if (!item || !item.embeddings) {
139
+ // TODO: need to support customize
140
+ const agentRuntime = await initAgentRuntimeWithUserPayload(provider, ctx.jwtPayload);
141
+
142
+ // slice content to make sure in the context window limit
143
+ const query =
144
+ input.rewriteQuery.length > 8000
145
+ ? input.rewriteQuery.slice(0, 8000)
146
+ : input.rewriteQuery;
147
+
148
+ const embeddings = await agentRuntime.embeddings({
149
+ dimensions: 1024,
150
+ input: query,
151
+ model,
152
+ });
153
+
154
+ embedding = embeddings![0];
155
+ const embeddingsId = await ctx.embeddingModel.create({
156
+ embeddings: embedding,
157
+ model,
158
+ });
159
+
160
+ const result = await ctx.messageModel.createMessageQuery({
161
+ embeddingsId,
162
+ messageId: input.messageId,
163
+ rewriteQuery: input.rewriteQuery,
164
+ userQuery: input.userQuery,
165
+ });
166
+
167
+ ragQueryId = result.id;
168
+ } else {
169
+ embedding = item.embeddings;
170
+ ragQueryId = item.id;
171
+ }
172
+
173
+ let finalFileIds = input.fileIds ?? [];
174
+
175
+ if (input.knowledgeIds && input.knowledgeIds.length > 0) {
176
+ const knowledgeFiles = await serverDB.query.knowledgeBaseFiles.findMany({
177
+ where: inArray(knowledgeBaseFiles.knowledgeBaseId, input.knowledgeIds),
178
+ });
179
+
180
+ finalFileIds = knowledgeFiles.map((f) => f.fileId).concat(finalFileIds);
181
+ }
182
+
183
+ const chunks = await ctx.chunkModel.semanticSearchForChat({
184
+ embedding,
185
+ fileIds: finalFileIds,
186
+ query: input.rewriteQuery,
149
187
  });
150
188
 
151
- const result = await ctx.messageModel.createMessageQuery({
152
- embeddingsId,
153
- messageId: input.messageId,
154
- rewriteQuery: input.rewriteQuery,
155
- userQuery: input.userQuery,
156
- });
189
+ // TODO: need to rerank the chunks
157
190
 
158
- ragQueryId = result.id;
159
- } else {
160
- embedding = item.embeddings;
161
- ragQueryId = item.id;
162
- }
191
+ return { chunks, queryId: ragQueryId };
192
+ } catch (e) {
193
+ console.error(e);
163
194
 
164
- console.time('semanticSearch');
165
- let finalFileIds = input.fileIds ?? [];
166
-
167
- if (input.knowledgeIds && input.knowledgeIds.length > 0) {
168
- const knowledgeFiles = await serverDB.query.knowledgeBaseFiles.findMany({
169
- where: inArray(knowledgeBaseFiles.knowledgeBaseId, input.knowledgeIds),
195
+ throw new TRPCError({
196
+ code: 'INTERNAL_SERVER_ERROR',
197
+ message: (e as any).errorType || JSON.stringify(e),
170
198
  });
171
-
172
- finalFileIds = knowledgeFiles.map((f) => f.fileId).concat(finalFileIds);
173
199
  }
174
-
175
- const chunks = await ctx.chunkModel.semanticSearchForChat({
176
- embedding,
177
- fileIds: finalFileIds,
178
- query: input.rewriteQuery,
179
- });
180
- // TODO: need to rerank the chunks
181
- console.timeEnd('semanticSearch');
182
-
183
- return { chunks, queryId: ragQueryId };
184
200
  }),
185
201
  });
@@ -21,7 +21,7 @@ export interface ChatRAGAction {
21
21
  id: string,
22
22
  userQuery: string,
23
23
  messages: string[],
24
- ) => Promise<{ chunks: ChatSemanticSearchChunk[]; queryId: string; rewriteQuery?: string }>;
24
+ ) => Promise<{ chunks: ChatSemanticSearchChunk[]; queryId?: string; rewriteQuery?: string }>;
25
25
  /**
26
26
  * Rewrite user content to better RAG query
27
27
  */
@@ -74,17 +74,23 @@ export const chatRag: StateCreator<ChatStore, [['zustand/devtools', never]], [],
74
74
 
75
75
  // 2. retrieve chunks from semantic search
76
76
  const files = chatSelectors.currentUserFiles(get()).map((f) => f.id);
77
- const { chunks, queryId } = await ragService.semanticSearchForChat({
78
- fileIds: knowledgeIds().fileIds.concat(files),
79
- knowledgeIds: knowledgeIds().knowledgeBaseIds,
80
- messageId: id,
81
- rewriteQuery: rewriteQuery || userQuery,
82
- userQuery,
83
- });
84
-
85
- get().internal_toggleMessageRAGLoading(false, id);
86
-
87
- return { chunks, queryId, rewriteQuery };
77
+ try {
78
+ const { chunks, queryId } = await ragService.semanticSearchForChat({
79
+ fileIds: knowledgeIds().fileIds.concat(files),
80
+ knowledgeIds: knowledgeIds().knowledgeBaseIds,
81
+ messageId: id,
82
+ rewriteQuery: rewriteQuery || userQuery,
83
+ userQuery,
84
+ });
85
+
86
+ get().internal_toggleMessageRAGLoading(false, id);
87
+
88
+ return { chunks, queryId, rewriteQuery };
89
+ } catch {
90
+ get().internal_toggleMessageRAGLoading(false, id);
91
+
92
+ return { chunks: [] };
93
+ }
88
94
  },
89
95
  internal_rewriteQuery: async (id, content, messages) => {
90
96
  let rewriteQuery = content;
@@ -28,6 +28,10 @@ export interface ModelAbilities {
28
28
  * whether model supports function call
29
29
  */
30
30
  functionCall?: boolean;
31
+ /**
32
+ * whether model supports image output
33
+ */
34
+ imageOutput?: boolean;
31
35
  /**
32
36
  * whether model supports reasoning
33
37
  */
@@ -36,7 +40,6 @@ export interface ModelAbilities {
36
40
  * whether model supports search web
37
41
  */
38
42
  search?: boolean;
39
-
40
43
  /**
41
44
  * whether model supports vision
42
45
  */