@lobehub/chat 1.71.1 → 1.71.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,56 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.71.3](https://github.com/lobehub/lobe-chat/compare/v1.71.2...v1.71.3)
6
+
7
+ <sup>Released on **2025-03-15**</sup>
8
+
9
+ #### 🐛 Bug Fixes
10
+
11
+ - **misc**: Fix claude 3.5+ models context max output.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's fixed
19
+
20
+ - **misc**: Fix claude 3.5+ models context max output, closes [#6984](https://github.com/lobehub/lobe-chat/issues/6984) ([da342dd](https://github.com/lobehub/lobe-chat/commit/da342dd))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
30
+ ### [Version 1.71.2](https://github.com/lobehub/lobe-chat/compare/v1.71.1...v1.71.2)
31
+
32
+ <sup>Released on **2025-03-15**</sup>
33
+
34
+ #### 🐛 Bug Fixes
35
+
36
+ - **misc**: Fix knowledge base issue.
37
+
38
+ <br/>
39
+
40
+ <details>
41
+ <summary><kbd>Improvements and Fixes</kbd></summary>
42
+
43
+ #### What's fixed
44
+
45
+ - **misc**: Fix knowledge base issue, closes [#6973](https://github.com/lobehub/lobe-chat/issues/6973) ([15f39ef](https://github.com/lobehub/lobe-chat/commit/15f39ef))
46
+
47
+ </details>
48
+
49
+ <div align="right">
50
+
51
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
52
+
53
+ </div>
54
+
5
55
  ### [Version 1.71.1](https://github.com/lobehub/lobe-chat/compare/v1.71.0...v1.71.1)
6
56
 
7
57
  <sup>Released on **2025-03-15**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,22 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "fixes": [
5
+ "Fix claude 3.5+ models context max output."
6
+ ]
7
+ },
8
+ "date": "2025-03-15",
9
+ "version": "1.71.3"
10
+ },
11
+ {
12
+ "children": {
13
+ "fixes": [
14
+ "Fix knowledge base issue."
15
+ ]
16
+ },
17
+ "date": "2025-03-15",
18
+ "version": "1.71.2"
19
+ },
2
20
  {
3
21
  "children": {
4
22
  "fixes": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.71.1",
3
+ "version": "1.71.3",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -1,6 +1,5 @@
1
1
  import { DEFAULT_AGENT_META } from '@/const/meta';
2
- import { DEFAULT_MODEL } from '@/const/settings/llm';
3
- import { ModelProvider } from '@/libs/agent-runtime';
2
+ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from '@/const/settings/llm';
4
3
  import { LobeAgentChatConfig, LobeAgentConfig, LobeAgentTTSConfig } from '@/types/agent';
5
4
  import { UserDefaultAgent } from '@/types/user/settings';
6
5
 
@@ -15,7 +14,7 @@ export const DEFAUTT_AGENT_TTS_CONFIG: LobeAgentTTSConfig = {
15
14
 
16
15
  export const DEFAULT_AGENT_SEARCH_FC_MODEL = {
17
16
  model: DEFAULT_MODEL,
18
- provider: ModelProvider.OpenAI,
17
+ provider: DEFAULT_PROVIDER,
19
18
  };
20
19
 
21
20
  export const DEFAULT_AGENT_CHAT_CONFIG: LobeAgentChatConfig = {
@@ -41,7 +40,7 @@ export const DEFAULT_AGENT_CONFIG: LobeAgentConfig = {
41
40
  top_p: 1,
42
41
  },
43
42
  plugins: [],
44
- provider: ModelProvider.OpenAI,
43
+ provider: DEFAULT_PROVIDER,
45
44
  systemRole: '',
46
45
  tts: DEFAUTT_AGENT_TTS_CONFIG,
47
46
  };
@@ -495,13 +495,13 @@ content in Table html is below:
495
495
  });
496
496
 
497
497
  // 测试结果限制
498
- it('should limit results to 5 items', async () => {
498
+ it('should limit results to 15 items', async () => {
499
499
  const fileId = '1';
500
- // Create 6 chunks
500
+ // Create 24 chunks
501
501
  const chunkResult = await serverDB
502
502
  .insert(chunks)
503
503
  .values(
504
- Array(6)
504
+ Array(24)
505
505
  .fill(0)
506
506
  .map((_, i) => ({ text: `Test Chunk ${i}`, userId })),
507
507
  )
@@ -528,7 +528,7 @@ content in Table html is below:
528
528
  query: 'test',
529
529
  });
530
530
 
531
- expect(result).toHaveLength(5);
531
+ expect(result).toHaveLength(15);
532
532
  });
533
533
  });
534
534
  });
@@ -207,7 +207,8 @@ export class ChunkModel {
207
207
  .leftJoin(files, eq(files.id, fileChunks.fileId))
208
208
  .where(inArray(fileChunks.fileId, fileIds))
209
209
  .orderBy((t) => desc(t.similarity))
210
- .limit(5);
210
+ // 先放宽到 15
211
+ .limit(15);
211
212
 
212
213
  return result.map((item) => {
213
214
  return {
@@ -123,21 +123,21 @@ describe('LobeAnthropicAI', () => {
123
123
  { content: 'You are an awesome greeter', role: 'system' },
124
124
  { content: 'Hello', role: 'user' },
125
125
  ],
126
- model: 'claude-3-haiku-20240307',
126
+ model: 'claude-3-7-sonnet-20250219',
127
127
  temperature: 0,
128
128
  });
129
129
 
130
130
  // Assert
131
131
  expect(instance['client'].messages.create).toHaveBeenCalledWith(
132
132
  {
133
- max_tokens: 4096,
133
+ max_tokens: 8192,
134
134
  messages: [
135
135
  {
136
136
  content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
137
137
  role: 'user',
138
138
  },
139
139
  ],
140
- model: 'claude-3-haiku-20240307',
140
+ model: 'claude-3-7-sonnet-20250219',
141
141
  stream: true,
142
142
  system: [
143
143
  {
@@ -25,6 +25,8 @@ export interface AnthropicModelCard {
25
25
  id: string;
26
26
  }
27
27
 
28
+ const modelsWithSmallContextWindow = new Set(['claude-3-opus-20240229', 'claude-3-haiku-20240307']);
29
+
28
30
  const DEFAULT_BASE_URL = 'https://api.anthropic.com';
29
31
 
30
32
  interface AnthropicAIParams extends ClientOptions {
@@ -38,6 +40,10 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
38
40
  apiKey?: string;
39
41
  private id: string;
40
42
 
43
+ private isDebug() {
44
+ return process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1';
45
+ }
46
+
41
47
  constructor({ apiKey, baseURL = DEFAULT_BASE_URL, id, ...res }: AnthropicAIParams = {}) {
42
48
  if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
43
49
 
@@ -51,6 +57,11 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
51
57
  try {
52
58
  const anthropicPayload = await this.buildAnthropicPayload(payload);
53
59
 
60
+ if (this.isDebug()) {
61
+ console.log('[requestPayload]');
62
+ console.log(JSON.stringify(anthropicPayload), '\n');
63
+ }
64
+
54
65
  const response = await this.client.messages.create(
55
66
  { ...anthropicPayload, stream: true },
56
67
  {
@@ -60,7 +71,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
60
71
 
61
72
  const [prod, debug] = response.tee();
62
73
 
63
- if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') {
74
+ if (this.isDebug()) {
64
75
  debugStream(debug.toReadableStream()).catch(console.error);
65
76
  }
66
77
 
@@ -123,7 +134,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
123
134
  return {
124
135
  // claude 3 series model hax max output token of 4096, 3.x series has 8192
125
136
  // https://docs.anthropic.com/en/docs/about-claude/models/all-models#:~:text=200K-,Max%20output,-Normal%3A
126
- max_tokens: max_tokens ?? (model.startsWith('claude-3-') ? 4096 : 8192),
137
+ max_tokens: max_tokens ?? (modelsWithSmallContextWindow.has(model) ? 4096 : 8192),
127
138
  messages: postMessages,
128
139
  model,
129
140
  system: systemPrompts,
@@ -340,7 +340,9 @@ describe('LobeGoogleAI', () => {
340
340
  expect(e).toEqual({
341
341
  errorType: bizErrorType,
342
342
  error: {
343
- message: `[GoogleGenerativeAI Error]: Error fetching from https://generativelanguage.googleapis.com/v1/models/gemini-pro:streamGenerateContent?alt=sse: [400 Bad Request] API key not valid. Please pass a valid API key. [{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"Error","domain":"googleapis.com","metadata":{"service":"generativelanguage.googleapis.com}}]`,
343
+ message: `API key not valid. Please pass a valid API key. [{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"Error","domain":"googleapis.com","metadata":{"service":"generativelanguage.googleapis.com}}]`,
344
+ statusCode: 400,
345
+ statusCodeText: '[400 Bad Request]',
344
346
  },
345
347
  provider,
346
348
  });
@@ -23,7 +23,6 @@ import {
23
23
  OpenAIChatMessage,
24
24
  UserMessageContentPart,
25
25
  } from '../types';
26
- import { ModelProvider } from '../types/type';
27
26
  import { AgentRuntimeError } from '../utils/createError';
28
27
  import { debugStream } from '../utils/debugStream';
29
28
  import { StreamingResponse } from '../utils/response';
@@ -77,6 +76,7 @@ interface LobeGoogleAIParams {
77
76
  apiKey?: string;
78
77
  baseURL?: string;
79
78
  client?: GoogleGenerativeAI | VertexAI;
79
+ id?: string;
80
80
  isVertexAi?: boolean;
81
81
  }
82
82
 
@@ -85,8 +85,9 @@ export class LobeGoogleAI implements LobeRuntimeAI {
85
85
  private isVertexAi: boolean;
86
86
  baseURL?: string;
87
87
  apiKey?: string;
88
+ provider: string;
88
89
 
89
- constructor({ apiKey, baseURL, client, isVertexAi }: LobeGoogleAIParams = {}) {
90
+ constructor({ apiKey, baseURL, client, isVertexAi, id }: LobeGoogleAIParams = {}) {
90
91
  if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
91
92
 
92
93
  this.client = new GoogleGenerativeAI(apiKey);
@@ -94,6 +95,8 @@ export class LobeGoogleAI implements LobeRuntimeAI {
94
95
  this.client = client ? (client as GoogleGenerativeAI) : new GoogleGenerativeAI(apiKey);
95
96
  this.baseURL = client ? undefined : baseURL || DEFAULT_BASE_URL;
96
97
  this.isVertexAi = isVertexAi || false;
98
+
99
+ this.provider = id || (isVertexAi ? 'vertexai' : 'google');
97
100
  }
98
101
 
99
102
  async chat(rawPayload: ChatStreamPayload, options?: ChatCompetitionOptions) {
@@ -168,7 +171,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
168
171
  console.log(err);
169
172
  const { errorType, error } = this.parseErrorMessage(err.message);
170
173
 
171
- throw AgentRuntimeError.chat({ error, errorType, provider: ModelProvider.Google });
174
+ throw AgentRuntimeError.chat({ error, errorType, provider: this.provider });
172
175
  }
173
176
  }
174
177
 
@@ -322,12 +325,12 @@ export class LobeGoogleAI implements LobeRuntimeAI {
322
325
  if (message.includes('location is not supported'))
323
326
  return { error: { message }, errorType: AgentRuntimeErrorType.LocationNotSupportError };
324
327
 
325
- try {
326
- const startIndex = message.lastIndexOf('[');
327
- if (startIndex === -1) {
328
- return defaultError;
329
- }
328
+ const startIndex = message.lastIndexOf('[');
329
+ if (startIndex === -1) {
330
+ return defaultError;
331
+ }
330
332
 
333
+ try {
331
334
  // 从开始位置截取字符串到最后
332
335
  const jsonString = message.slice(startIndex);
333
336
 
@@ -346,9 +349,18 @@ export class LobeGoogleAI implements LobeRuntimeAI {
346
349
  }
347
350
  }
348
351
  } catch {
349
- // 如果解析失败,则返回原始错误消息
350
- return defaultError;
352
+ //
353
+ }
354
+
355
+ const errorObj = this.extractErrorObjectFromError(message);
356
+
357
+ const { errorDetails } = errorObj;
358
+
359
+ if (errorDetails) {
360
+ return { error: errorDetails, errorType: AgentRuntimeErrorType.ProviderBizError };
351
361
  }
362
+
363
+ return defaultError;
352
364
  }
353
365
 
354
366
  private buildGoogleTools(
@@ -389,6 +401,40 @@ export class LobeGoogleAI implements LobeRuntimeAI {
389
401
  },
390
402
  };
391
403
  };
404
+
405
+ private extractErrorObjectFromError(message: string) {
406
+ // 使用正则表达式匹配状态码部分 [数字 描述文本]
407
+ const regex = /^(.*?)(\[\d+ [^\]]+])(.*)$/;
408
+ const match = message.match(regex);
409
+
410
+ if (match) {
411
+ const prefix = match[1].trim();
412
+ const statusCodeWithBrackets = match[2].trim();
413
+ const message = match[3].trim();
414
+
415
+ // 提取状态码数字
416
+ const statusCodeMatch = statusCodeWithBrackets.match(/\[(\d+)/);
417
+ const statusCode = statusCodeMatch ? parseInt(statusCodeMatch[1]) : null;
418
+
419
+ // 创建包含状态码和消息的JSON
420
+ const resultJson = {
421
+ message: message,
422
+ statusCode: statusCode,
423
+ statusCodeText: statusCodeWithBrackets,
424
+ };
425
+
426
+ return {
427
+ errorDetails: resultJson,
428
+ prefix: prefix,
429
+ };
430
+ }
431
+
432
+ // 如果无法匹配,返回原始消息
433
+ return {
434
+ errorDetails: null,
435
+ prefix: message,
436
+ };
437
+ }
392
438
  }
393
439
 
394
440
  export default LobeGoogleAI;
@@ -37,7 +37,6 @@ import { LobeStepfunAI } from './stepfun';
37
37
  import { LobeTaichuAI } from './taichu';
38
38
  import { LobeTencentCloudAI } from './tencentcloud';
39
39
  import { LobeTogetherAI } from './togetherai';
40
- import { ModelProvider } from './types';
41
40
  import { LobeUpstageAI } from './upstage';
42
41
  import { LobeVLLMAI } from './vllm';
43
42
  import { LobeVolcengineAI } from './volcengine';
@@ -47,51 +46,51 @@ import { LobeZeroOneAI } from './zeroone';
47
46
  import { LobeZhipuAI } from './zhipu';
48
47
 
49
48
  export const providerRuntimeMap = {
50
- [ModelProvider.OpenAI]: LobeOpenAI,
51
- [ModelProvider.Azure]: LobeAzureOpenAI,
52
- [ModelProvider.AzureAI]: LobeAzureAI,
53
- [ModelProvider.ZhiPu]: LobeZhipuAI,
54
- [ModelProvider.Google]: LobeGoogleAI,
55
- [ModelProvider.Moonshot]: LobeMoonshotAI,
56
- [ModelProvider.Bedrock]: LobeBedrockAI,
57
- [ModelProvider.LMStudio]: LobeLMStudioAI,
58
- [ModelProvider.Ollama]: LobeOllamaAI,
59
- [ModelProvider.VLLM]: LobeVLLMAI,
60
- [ModelProvider.Perplexity]: LobePerplexityAI,
61
- [ModelProvider.Anthropic]: LobeAnthropicAI,
62
- [ModelProvider.DeepSeek]: LobeDeepSeekAI,
63
- [ModelProvider.HuggingFace]: LobeHuggingFaceAI,
64
- [ModelProvider.Minimax]: LobeMinimaxAI,
65
- [ModelProvider.Mistral]: LobeMistralAI,
66
- [ModelProvider.Groq]: LobeGroq,
67
- [ModelProvider.Github]: LobeGithubAI,
68
- [ModelProvider.OpenRouter]: LobeOpenRouterAI,
69
- [ModelProvider.TogetherAI]: LobeTogetherAI,
70
- [ModelProvider.FireworksAI]: LobeFireworksAI,
71
- [ModelProvider.ZeroOne]: LobeZeroOneAI,
72
- [ModelProvider.Stepfun]: LobeStepfunAI,
73
- [ModelProvider.Qwen]: LobeQwenAI,
74
- [ModelProvider.Novita]: LobeNovitaAI,
75
- [ModelProvider.Nvidia]: LobeNvidiaAI,
76
- [ModelProvider.Taichu]: LobeTaichuAI,
77
- [ModelProvider.Baichuan]: LobeBaichuanAI,
78
- [ModelProvider.Ai360]: LobeAi360AI,
79
- [ModelProvider.SiliconCloud]: LobeSiliconCloudAI,
80
- [ModelProvider.GiteeAI]: LobeGiteeAI,
81
- [ModelProvider.Upstage]: LobeUpstageAI,
82
- [ModelProvider.Spark]: LobeSparkAI,
83
- [ModelProvider.Ai21]: LobeAi21AI,
84
- [ModelProvider.Hunyuan]: LobeHunyuanAI,
85
- [ModelProvider.SenseNova]: LobeSenseNovaAI,
86
- [ModelProvider.XAI]: LobeXAI,
87
- [ModelProvider.Jina]: LobeJinaAI,
88
- [ModelProvider.SambaNova]: LobeSambaNovaAI,
89
- [ModelProvider.Cloudflare]: LobeCloudflareAI,
90
- [ModelProvider.InternLM]: LobeInternLMAI,
91
- [ModelProvider.Higress]: LobeHigressAI,
92
- [ModelProvider.TencentCloud]: LobeTencentCloudAI,
93
- [ModelProvider.Volcengine]: LobeVolcengineAI,
94
- [ModelProvider.PPIO]: LobePPIOAI,
95
- [ModelProvider.Doubao]: LobeVolcengineAI,
96
- [ModelProvider.Wenxin]: LobeWenxinAI,
49
+ ai21: LobeAi21AI,
50
+ ai360: LobeAi360AI,
51
+ anthropic: LobeAnthropicAI,
52
+ azure: LobeAzureOpenAI,
53
+ azureai: LobeAzureAI,
54
+ baichuan: LobeBaichuanAI,
55
+ bedrock: LobeBedrockAI,
56
+ cloudflare: LobeCloudflareAI,
57
+ deepseek: LobeDeepSeekAI,
58
+ doubao: LobeVolcengineAI,
59
+ fireworksai: LobeFireworksAI,
60
+ giteeai: LobeGiteeAI,
61
+ github: LobeGithubAI,
62
+ google: LobeGoogleAI,
63
+ groq: LobeGroq,
64
+ higress: LobeHigressAI,
65
+ huggingface: LobeHuggingFaceAI,
66
+ hunyuan: LobeHunyuanAI,
67
+ internlm: LobeInternLMAI,
68
+ jina: LobeJinaAI,
69
+ lmstudio: LobeLMStudioAI,
70
+ minimax: LobeMinimaxAI,
71
+ mistral: LobeMistralAI,
72
+ moonshot: LobeMoonshotAI,
73
+ novita: LobeNovitaAI,
74
+ nvidia: LobeNvidiaAI,
75
+ ollama: LobeOllamaAI,
76
+ openai: LobeOpenAI,
77
+ openrouter: LobeOpenRouterAI,
78
+ perplexity: LobePerplexityAI,
79
+ ppio: LobePPIOAI,
80
+ qwen: LobeQwenAI,
81
+ sambanova: LobeSambaNovaAI,
82
+ sensenova: LobeSenseNovaAI,
83
+ siliconcloud: LobeSiliconCloudAI,
84
+ spark: LobeSparkAI,
85
+ stepfun: LobeStepfunAI,
86
+ taichu: LobeTaichuAI,
87
+ tencentcloud: LobeTencentCloudAI,
88
+ togetherai: LobeTogetherAI,
89
+ upstage: LobeUpstageAI,
90
+ vllm: LobeVLLMAI,
91
+ volcengine: LobeVolcengineAI,
92
+ wenxin: LobeWenxinAI,
93
+ xai: LobeXAI,
94
+ zeroone: LobeZeroOneAI,
95
+ zhipu: LobeZhipuAI,
97
96
  };
@@ -122,13 +122,16 @@ export const agentRouter = router({
122
122
  const knowledge = await ctx.agentModel.getAgentAssignedKnowledge(input.agentId);
123
123
 
124
124
  return [
125
- ...files.map((file) => ({
126
- enabled: knowledge.files.some((item) => item.id === file.id),
127
- fileType: file.fileType,
128
- id: file.id,
129
- name: file.name,
130
- type: KnowledgeType.File,
131
- })),
125
+ ...files
126
+ // 过滤掉所有图片
127
+ .filter((file) => !file.fileType.startsWith('image'))
128
+ .map((file) => ({
129
+ enabled: knowledge.files.some((item) => item.id === file.id),
130
+ fileType: file.fileType,
131
+ id: file.id,
132
+ name: file.name,
133
+ type: KnowledgeType.File,
134
+ })),
132
135
  ...knowledgeBases.map((knowledgeBase) => ({
133
136
  avatar: knowledgeBase.avatar,
134
137
  description: knowledgeBase.description,
@@ -1,3 +1,4 @@
1
+ import { TRPCError } from '@trpc/server';
1
2
  import { inArray } from 'drizzle-orm/expressions';
2
3
  import { z } from 'zod';
3
4
 
@@ -126,60 +127,75 @@ export const chunkRouter = router({
126
127
  semanticSearchForChat: chunkProcedure
127
128
  .input(SemanticSearchSchema)
128
129
  .mutation(async ({ ctx, input }) => {
129
- const item = await ctx.messageModel.findMessageQueriesById(input.messageId);
130
- const { model, provider } =
131
- getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
132
- let embedding: number[];
133
- let ragQueryId: string;
134
- // if there is no message rag or it's embeddings, then we need to create one
135
- if (!item || !item.embeddings) {
136
- // TODO: need to support customize
137
- const agentRuntime = await initAgentRuntimeWithUserPayload(provider, ctx.jwtPayload);
138
-
139
- const embeddings = await agentRuntime.embeddings({
140
- dimensions: 1024,
141
- input: input.rewriteQuery,
142
- model,
143
- });
144
-
145
- embedding = embeddings![0];
146
- const embeddingsId = await ctx.embeddingModel.create({
147
- embeddings: embedding,
148
- model,
130
+ try {
131
+ const item = await ctx.messageModel.findMessageQueriesById(input.messageId);
132
+ const { model, provider } =
133
+ getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
134
+ let embedding: number[];
135
+ let ragQueryId: string;
136
+
137
+ // if there is no message rag or it's embeddings, then we need to create one
138
+ if (!item || !item.embeddings) {
139
+ // TODO: need to support customize
140
+ const agentRuntime = await initAgentRuntimeWithUserPayload(provider, ctx.jwtPayload);
141
+
142
+ // slice content to make sure in the context window limit
143
+ const query =
144
+ input.rewriteQuery.length > 8000
145
+ ? input.rewriteQuery.slice(0, 8000)
146
+ : input.rewriteQuery;
147
+
148
+ const embeddings = await agentRuntime.embeddings({
149
+ dimensions: 1024,
150
+ input: query,
151
+ model,
152
+ });
153
+
154
+ embedding = embeddings![0];
155
+ const embeddingsId = await ctx.embeddingModel.create({
156
+ embeddings: embedding,
157
+ model,
158
+ });
159
+
160
+ const result = await ctx.messageModel.createMessageQuery({
161
+ embeddingsId,
162
+ messageId: input.messageId,
163
+ rewriteQuery: input.rewriteQuery,
164
+ userQuery: input.userQuery,
165
+ });
166
+
167
+ ragQueryId = result.id;
168
+ } else {
169
+ embedding = item.embeddings;
170
+ ragQueryId = item.id;
171
+ }
172
+
173
+ let finalFileIds = input.fileIds ?? [];
174
+
175
+ if (input.knowledgeIds && input.knowledgeIds.length > 0) {
176
+ const knowledgeFiles = await serverDB.query.knowledgeBaseFiles.findMany({
177
+ where: inArray(knowledgeBaseFiles.knowledgeBaseId, input.knowledgeIds),
178
+ });
179
+
180
+ finalFileIds = knowledgeFiles.map((f) => f.fileId).concat(finalFileIds);
181
+ }
182
+
183
+ const chunks = await ctx.chunkModel.semanticSearchForChat({
184
+ embedding,
185
+ fileIds: finalFileIds,
186
+ query: input.rewriteQuery,
149
187
  });
150
188
 
151
- const result = await ctx.messageModel.createMessageQuery({
152
- embeddingsId,
153
- messageId: input.messageId,
154
- rewriteQuery: input.rewriteQuery,
155
- userQuery: input.userQuery,
156
- });
189
+ // TODO: need to rerank the chunks
157
190
 
158
- ragQueryId = result.id;
159
- } else {
160
- embedding = item.embeddings;
161
- ragQueryId = item.id;
162
- }
191
+ return { chunks, queryId: ragQueryId };
192
+ } catch (e) {
193
+ console.error(e);
163
194
 
164
- console.time('semanticSearch');
165
- let finalFileIds = input.fileIds ?? [];
166
-
167
- if (input.knowledgeIds && input.knowledgeIds.length > 0) {
168
- const knowledgeFiles = await serverDB.query.knowledgeBaseFiles.findMany({
169
- where: inArray(knowledgeBaseFiles.knowledgeBaseId, input.knowledgeIds),
195
+ throw new TRPCError({
196
+ code: 'INTERNAL_SERVER_ERROR',
197
+ message: (e as any).errorType || JSON.stringify(e),
170
198
  });
171
-
172
- finalFileIds = knowledgeFiles.map((f) => f.fileId).concat(finalFileIds);
173
199
  }
174
-
175
- const chunks = await ctx.chunkModel.semanticSearchForChat({
176
- embedding,
177
- fileIds: finalFileIds,
178
- query: input.rewriteQuery,
179
- });
180
- // TODO: need to rerank the chunks
181
- console.timeEnd('semanticSearch');
182
-
183
- return { chunks, queryId: ragQueryId };
184
200
  }),
185
201
  });
@@ -21,7 +21,7 @@ export interface ChatRAGAction {
21
21
  id: string,
22
22
  userQuery: string,
23
23
  messages: string[],
24
- ) => Promise<{ chunks: ChatSemanticSearchChunk[]; queryId: string; rewriteQuery?: string }>;
24
+ ) => Promise<{ chunks: ChatSemanticSearchChunk[]; queryId?: string; rewriteQuery?: string }>;
25
25
  /**
26
26
  * Rewrite user content to better RAG query
27
27
  */
@@ -74,17 +74,23 @@ export const chatRag: StateCreator<ChatStore, [['zustand/devtools', never]], [],
74
74
 
75
75
  // 2. retrieve chunks from semantic search
76
76
  const files = chatSelectors.currentUserFiles(get()).map((f) => f.id);
77
- const { chunks, queryId } = await ragService.semanticSearchForChat({
78
- fileIds: knowledgeIds().fileIds.concat(files),
79
- knowledgeIds: knowledgeIds().knowledgeBaseIds,
80
- messageId: id,
81
- rewriteQuery: rewriteQuery || userQuery,
82
- userQuery,
83
- });
84
-
85
- get().internal_toggleMessageRAGLoading(false, id);
86
-
87
- return { chunks, queryId, rewriteQuery };
77
+ try {
78
+ const { chunks, queryId } = await ragService.semanticSearchForChat({
79
+ fileIds: knowledgeIds().fileIds.concat(files),
80
+ knowledgeIds: knowledgeIds().knowledgeBaseIds,
81
+ messageId: id,
82
+ rewriteQuery: rewriteQuery || userQuery,
83
+ userQuery,
84
+ });
85
+
86
+ get().internal_toggleMessageRAGLoading(false, id);
87
+
88
+ return { chunks, queryId, rewriteQuery };
89
+ } catch {
90
+ get().internal_toggleMessageRAGLoading(false, id);
91
+
92
+ return { chunks: [] };
93
+ }
88
94
  },
89
95
  internal_rewriteQuery: async (id, content, messages) => {
90
96
  let rewriteQuery = content;