@lobehub/chat 1.45.17 → 1.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.env.example +4 -1
  2. package/CHANGELOG.md +26 -0
  3. package/changelog/v1.json +9 -0
  4. package/docs/self-hosting/advanced/knowledge-base.mdx +9 -0
  5. package/docs/self-hosting/advanced/knowledge-base.zh-CN.mdx +9 -0
  6. package/locales/ar/providers.json +3 -0
  7. package/locales/bg-BG/providers.json +3 -0
  8. package/locales/de-DE/providers.json +3 -0
  9. package/locales/en-US/providers.json +3 -0
  10. package/locales/es-ES/providers.json +3 -0
  11. package/locales/fa-IR/providers.json +3 -0
  12. package/locales/fr-FR/providers.json +3 -0
  13. package/locales/it-IT/providers.json +3 -0
  14. package/locales/ja-JP/providers.json +3 -0
  15. package/locales/ko-KR/providers.json +3 -0
  16. package/locales/nl-NL/providers.json +3 -0
  17. package/locales/pl-PL/providers.json +3 -0
  18. package/locales/pt-BR/providers.json +3 -0
  19. package/locales/ru-RU/providers.json +3 -0
  20. package/locales/tr-TR/providers.json +3 -0
  21. package/locales/vi-VN/providers.json +3 -0
  22. package/locales/zh-CN/providers.json +3 -0
  23. package/locales/zh-TW/providers.json +3 -0
  24. package/package.json +1 -1
  25. package/src/app/(main)/settings/provider/(detail)/[id]/index.tsx +0 -1
  26. package/src/config/aiModels/index.ts +3 -0
  27. package/src/config/aiModels/lmstudio.ts +27 -0
  28. package/src/config/knowledge.ts +2 -0
  29. package/src/config/modelProviders/index.ts +6 -3
  30. package/src/config/modelProviders/lmstudio.ts +25 -0
  31. package/src/const/settings/knowledge.ts +25 -0
  32. package/src/const/settings/llm.ts +9 -0
  33. package/src/database/schemas/ragEvals.ts +2 -2
  34. package/src/libs/agent-runtime/AgentRuntime.ts +7 -0
  35. package/src/libs/agent-runtime/bedrock/index.ts +64 -3
  36. package/src/libs/agent-runtime/lmstudio/index.test.ts +255 -0
  37. package/src/libs/agent-runtime/lmstudio/index.ts +11 -0
  38. package/src/libs/agent-runtime/ollama/index.ts +37 -1
  39. package/src/libs/agent-runtime/types/type.ts +1 -0
  40. package/src/server/globalConfig/index.ts +6 -0
  41. package/src/server/globalConfig/parseFilesConfig.test.ts +17 -0
  42. package/src/server/globalConfig/parseFilesConfig.ts +57 -0
  43. package/src/server/routers/async/file.ts +8 -8
  44. package/src/server/routers/lambda/chunk.ts +12 -16
  45. package/src/types/knowledgeBase/index.ts +8 -0
  46. package/src/types/user/settings/filesConfig.ts +9 -0
  47. package/src/types/user/settings/keyVaults.ts +1 -0
@@ -0,0 +1,255 @@
1
+ // @vitest-environment node
2
+ import OpenAI from 'openai';
3
+ import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
4
+
5
+ import {
6
+ ChatStreamCallbacks,
7
+ LobeOpenAICompatibleRuntime,
8
+ ModelProvider,
9
+ } from '@/libs/agent-runtime';
10
+
11
+ import * as debugStreamModule from '../utils/debugStream';
12
+ import { LobeLMStudioAI } from './index';
13
+
14
+ const provider = ModelProvider.LMStudio;
15
+ const defaultBaseURL = 'http://localhost:1234/v1';
16
+
17
+ const bizErrorType = 'ProviderBizError';
18
+ const invalidErrorType = 'InvalidProviderAPIKey';
19
+
20
+ // Mock the console.error to avoid polluting test output
21
+ vi.spyOn(console, 'error').mockImplementation(() => {});
22
+
23
+ let instance: LobeOpenAICompatibleRuntime;
24
+
25
+ beforeEach(() => {
26
+ instance = new LobeLMStudioAI({ apiKey: 'test' });
27
+
28
+ // 使用 vi.spyOn 来模拟 chat.completions.create 方法
29
+ vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
30
+ new ReadableStream() as any,
31
+ );
32
+ });
33
+
34
+ afterEach(() => {
35
+ vi.clearAllMocks();
36
+ });
37
+
38
+ describe('LobeLMStudioAI', () => {
39
+ describe('init', () => {
40
+ it('should correctly initialize with an API key', async () => {
41
+ const instance = new LobeLMStudioAI({ apiKey: 'test_api_key' });
42
+ expect(instance).toBeInstanceOf(LobeLMStudioAI);
43
+ expect(instance.baseURL).toEqual(defaultBaseURL);
44
+ });
45
+ });
46
+
47
+ describe('chat', () => {
48
+ describe('Error', () => {
49
+ it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => {
50
+ // Arrange
51
+ const apiError = new OpenAI.APIError(
52
+ 400,
53
+ {
54
+ status: 400,
55
+ error: {
56
+ message: 'Bad Request',
57
+ },
58
+ },
59
+ 'Error message',
60
+ {},
61
+ );
62
+
63
+ vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
64
+
65
+ // Act
66
+ try {
67
+ await instance.chat({
68
+ messages: [{ content: 'Hello', role: 'user' }],
69
+ model: 'deepseek-chat',
70
+ temperature: 0,
71
+ });
72
+ } catch (e) {
73
+ expect(e).toEqual({
74
+ endpoint: defaultBaseURL,
75
+ error: {
76
+ error: { message: 'Bad Request' },
77
+ status: 400,
78
+ },
79
+ errorType: bizErrorType,
80
+ provider,
81
+ });
82
+ }
83
+ });
84
+
85
+ it('should throw AgentRuntimeError with NoOpenAIAPIKey if no apiKey is provided', async () => {
86
+ try {
87
+ new LobeLMStudioAI({});
88
+ } catch (e) {
89
+ expect(e).toEqual({ errorType: invalidErrorType });
90
+ }
91
+ });
92
+
93
+ it('should return OpenAIBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
94
+ // Arrange
95
+ const errorInfo = {
96
+ stack: 'abc',
97
+ cause: {
98
+ message: 'api is undefined',
99
+ },
100
+ };
101
+ const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
102
+
103
+ vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
104
+
105
+ // Act
106
+ try {
107
+ await instance.chat({
108
+ messages: [{ content: 'Hello', role: 'user' }],
109
+ model: 'deepseek-chat',
110
+ temperature: 0,
111
+ });
112
+ } catch (e) {
113
+ expect(e).toEqual({
114
+ endpoint: defaultBaseURL,
115
+ error: {
116
+ cause: { message: 'api is undefined' },
117
+ stack: 'abc',
118
+ },
119
+ errorType: bizErrorType,
120
+ provider,
121
+ });
122
+ }
123
+ });
124
+
125
+ it('should return OpenAIBizError with an cause response with desensitize Url', async () => {
126
+ // Arrange
127
+ const errorInfo = {
128
+ stack: 'abc',
129
+ cause: { message: 'api is undefined' },
130
+ };
131
+ const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
132
+
133
+ instance = new LobeLMStudioAI({
134
+ apiKey: 'test',
135
+
136
+ baseURL: 'https://api.abc.com/v1',
137
+ });
138
+
139
+ vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
140
+
141
+ // Act
142
+ try {
143
+ await instance.chat({
144
+ messages: [{ content: 'Hello', role: 'user' }],
145
+ model: 'deepseek-chat',
146
+ temperature: 0,
147
+ });
148
+ } catch (e) {
149
+ expect(e).toEqual({
150
+ endpoint: 'https://api.***.com/v1',
151
+ error: {
152
+ cause: { message: 'api is undefined' },
153
+ stack: 'abc',
154
+ },
155
+ errorType: bizErrorType,
156
+ provider,
157
+ });
158
+ }
159
+ });
160
+
161
+ it('should throw an InvalidDeepSeekAPIKey error type on 401 status code', async () => {
162
+ // Mock the API call to simulate a 401 error
163
+ const error = new Error('Unauthorized') as any;
164
+ error.status = 401;
165
+ vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
166
+
167
+ try {
168
+ await instance.chat({
169
+ messages: [{ content: 'Hello', role: 'user' }],
170
+ model: 'deepseek-chat',
171
+ temperature: 0,
172
+ });
173
+ } catch (e) {
174
+ // Expect the chat method to throw an error with InvalidDeepSeekAPIKey
175
+ expect(e).toEqual({
176
+ endpoint: defaultBaseURL,
177
+ error: new Error('Unauthorized'),
178
+ errorType: invalidErrorType,
179
+ provider,
180
+ });
181
+ }
182
+ });
183
+
184
+ it('should return AgentRuntimeError for non-OpenAI errors', async () => {
185
+ // Arrange
186
+ const genericError = new Error('Generic Error');
187
+
188
+ vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
189
+
190
+ // Act
191
+ try {
192
+ await instance.chat({
193
+ messages: [{ content: 'Hello', role: 'user' }],
194
+ model: 'deepseek-chat',
195
+ temperature: 0,
196
+ });
197
+ } catch (e) {
198
+ expect(e).toEqual({
199
+ endpoint: defaultBaseURL,
200
+ errorType: 'AgentRuntimeError',
201
+ provider,
202
+ error: {
203
+ name: genericError.name,
204
+ cause: genericError.cause,
205
+ message: genericError.message,
206
+ stack: genericError.stack,
207
+ },
208
+ });
209
+ }
210
+ });
211
+ });
212
+
213
+ describe('DEBUG', () => {
214
+ it('should call debugStream and return StreamingTextResponse when DEBUG_LMSTUDIO_CHAT_COMPLETION is 1', async () => {
215
+ // Arrange
216
+ const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
217
+ const mockDebugStream = new ReadableStream({
218
+ start(controller) {
219
+ controller.enqueue('Debug stream content');
220
+ controller.close();
221
+ },
222
+ }) as any;
223
+ mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
224
+
225
+ // 模拟 chat.completions.create 返回值,包括模拟的 tee 方法
226
+ (instance['client'].chat.completions.create as Mock).mockResolvedValue({
227
+ tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
228
+ });
229
+
230
+ // 保存原始环境变量值
231
+ const originalDebugValue = process.env.DEBUG_LMSTUDIO_CHAT_COMPLETION;
232
+
233
+ // 模拟环境变量
234
+ process.env.DEBUG_LMSTUDIO_CHAT_COMPLETION = '1';
235
+ vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
236
+
237
+ // 执行测试
238
+ // 运行你的测试函数,确保它会在条件满足时调用 debugStream
239
+ // 假设的测试函数调用,你可能需要根据实际情况调整
240
+ await instance.chat({
241
+ messages: [{ content: 'Hello', role: 'user' }],
242
+ model: 'deepseek-chat',
243
+ stream: true,
244
+ temperature: 0,
245
+ });
246
+
247
+ // 验证 debugStream 被调用
248
+ expect(debugStreamModule.debugStream).toHaveBeenCalled();
249
+
250
+ // 恢复原始环境变量值
251
+ process.env.DEBUG_LMSTUDIO_CHAT_COMPLETION = originalDebugValue;
252
+ });
253
+ });
254
+ });
255
+ });
@@ -0,0 +1,11 @@
1
+ import { ModelProvider } from '../types';
2
+ import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
3
+
4
+ export const LobeLMStudioAI = LobeOpenAICompatibleFactory({
5
+ apiKey: 'placeholder-to-avoid-error',
6
+ baseURL: 'http://localhost:1234/v1',
7
+ debug: {
8
+ chatCompletion: () => process.env.DEBUG_LMSTUDIO_CHAT_COMPLETION === '1',
9
+ },
10
+ provider: ModelProvider.LMStudio,
11
+ });
@@ -6,7 +6,13 @@ import { ChatModelCard } from '@/types/llm';
6
6
 
7
7
  import { LobeRuntimeAI } from '../BaseAI';
8
8
  import { AgentRuntimeErrorType } from '../error';
9
- import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
9
+ import {
10
+ ChatCompetitionOptions,
11
+ ChatStreamPayload,
12
+ Embeddings,
13
+ EmbeddingsPayload,
14
+ ModelProvider,
15
+ } from '../types';
10
16
  import { AgentRuntimeError } from '../utils/createError';
11
17
  import { debugStream } from '../utils/debugStream';
12
18
  import { StreamingResponse } from '../utils/response';
@@ -84,6 +90,18 @@ export class LobeOllamaAI implements LobeRuntimeAI {
84
90
  }
85
91
  }
86
92
 
93
+ async embeddings(payload: EmbeddingsPayload): Promise<Embeddings[]> {
94
+ const input = Array.isArray(payload.input) ? payload.input : [payload.input];
95
+ const promises = input.map((inputText: string) =>
96
+ this.invokeEmbeddingModel({
97
+ dimensions: payload.dimensions,
98
+ input: inputText,
99
+ model: payload.model,
100
+ }),
101
+ );
102
+ return await Promise.all(promises);
103
+ }
104
+
87
105
  async models(): Promise<ChatModelCard[]> {
88
106
  const list = await this.client.list();
89
107
  return list.models.map((model) => ({
@@ -91,6 +109,24 @@ export class LobeOllamaAI implements LobeRuntimeAI {
91
109
  }));
92
110
  }
93
111
 
112
+ private invokeEmbeddingModel = async (payload: EmbeddingsPayload): Promise<Embeddings> => {
113
+ try {
114
+ const responseBody = await this.client.embeddings({
115
+ model: payload.model,
116
+ prompt: payload.input as string,
117
+ });
118
+ return responseBody.embedding;
119
+ } catch (error) {
120
+ const e = error as { message: string; name: string; status_code: number };
121
+
122
+ throw AgentRuntimeError.chat({
123
+ error: { message: e.message, name: e.name, status_code: e.status_code },
124
+ errorType: AgentRuntimeErrorType.OllamaBizError,
125
+ provider: ModelProvider.Ollama,
126
+ });
127
+ }
128
+ };
129
+
94
130
  private buildOllamaMessages(messages: OpenAIChatMessage[]) {
95
131
  return messages.map((message) => this.convertContentToOllamaMessage(message));
96
132
  }
@@ -39,6 +39,7 @@ export enum ModelProvider {
39
39
  HuggingFace = 'huggingface',
40
40
  Hunyuan = 'hunyuan',
41
41
  InternLM = 'internlm',
42
+ LMStudio = 'lmstudio',
42
43
  Minimax = 'minimax',
43
44
  Mistral = 'mistral',
44
45
  Moonshot = 'moonshot',
@@ -1,6 +1,7 @@
1
1
  import { appEnv, getAppConfig } from '@/config/app';
2
2
  import { authEnv } from '@/config/auth';
3
3
  import { fileEnv } from '@/config/file';
4
+ import { knowledgeEnv } from '@/config/knowledge';
4
5
  import { langfuseEnv } from '@/config/langfuse';
5
6
  import { enableNextAuth } from '@/const/auth';
6
7
  import { parseSystemAgent } from '@/server/globalConfig/parseSystemAgent';
@@ -9,6 +10,7 @@ import { GlobalServerConfig } from '@/types/serverConfig';
9
10
  import { genServerLLMConfig } from './_deprecated';
10
11
  import { genServerAiProvidersConfig } from './genServerAiProviderConfig';
11
12
  import { parseAgentConfig } from './parseDefaultAgent';
13
+ import { parseFilesConfig } from './parseFilesConfig';
12
14
 
13
15
  export const getServerGlobalConfig = () => {
14
16
  const { ACCESS_CODES, DEFAULT_AGENT_CONFIG } = getAppConfig();
@@ -73,3 +75,7 @@ export const getServerDefaultAgentConfig = () => {
73
75
 
74
76
  return parseAgentConfig(DEFAULT_AGENT_CONFIG) || {};
75
77
  };
78
+
79
+ export const getServerDefaultFilesConfig = () => {
80
+ return parseFilesConfig(knowledgeEnv.DEFAULT_FILES_CONFIG);
81
+ };
@@ -0,0 +1,17 @@
1
+ import { describe, expect, it } from 'vitest';
2
+
3
+ import { parseFilesConfig } from './parseFilesConfig';
4
+
5
+ describe('parseFilesConfig', () => {
6
+ // 测试embeddings配置是否被正确解析
7
+ it('parses embeddings configuration correctly', () => {
8
+ const envStr =
9
+ 'embedding_model=openai/embedding-text-3-large,reranker_model=cohere/rerank-english-v3.0,query_model=full_text';
10
+ const expected = {
11
+ embeddingModel: { provider: 'openai', model: 'embedding-text-3-large' },
12
+ rerankerModel: { provider: 'cohere', model: 'rerank-english-v3.0' },
13
+ queryModel: 'full_text',
14
+ };
15
+ expect(parseFilesConfig(envStr)).toEqual(expected);
16
+ });
17
+ });
@@ -0,0 +1,57 @@
1
+ import { DEFAULT_FILES_CONFIG } from '@/const/settings/knowledge';
2
+ import { SystemEmbeddingConfig } from '@/types/knowledgeBase';
3
+ import { FilesConfig } from '@/types/user/settings/filesConfig';
4
+
5
+ const protectedKeys = Object.keys({
6
+ embedding_model: null,
7
+ query_model: null,
8
+ reranker_model: null,
9
+ });
10
+
11
+ export const parseFilesConfig = (envString: string = ''): SystemEmbeddingConfig => {
12
+ if (!envString) return DEFAULT_FILES_CONFIG;
13
+ const config: FilesConfig = {} as any;
14
+
15
+ // 处理全角逗号和多余空格
16
+ let envValue = envString.replaceAll(',', ',').trim();
17
+
18
+ const pairs = envValue.split(',');
19
+
20
+ for (const pair of pairs) {
21
+ const [key, value] = pair.split('=').map((s) => s.trim());
22
+
23
+ if (key && value) {
24
+ const [provider, ...modelParts] = value.split('/');
25
+ const model = modelParts.join('/');
26
+
27
+ if ((!provider || !model) && key !== 'query_model') {
28
+ throw new Error('Missing model or provider value');
29
+ }
30
+
31
+ if (key === 'query_model' && value === '') {
32
+ throw new Error('Missing query mode value');
33
+ }
34
+
35
+ if (protectedKeys.includes(key)) {
36
+ switch (key) {
37
+ case 'embedding_model': {
38
+ config.embeddingModel = { model: model.trim(), provider: provider.trim() };
39
+ break;
40
+ }
41
+ case 'reranker_model': {
42
+ config.rerankerModel = { model: model.trim(), provider: provider.trim() };
43
+ break;
44
+ }
45
+ case 'query_model': {
46
+ config.queryModel = value;
47
+ break;
48
+ }
49
+ }
50
+ }
51
+ } else {
52
+ throw new Error('Invalid environment variable format');
53
+ }
54
+ }
55
+
56
+ return config;
57
+ };
@@ -5,15 +5,15 @@ import { z } from 'zod';
5
5
 
6
6
  import { serverDBEnv } from '@/config/db';
7
7
  import { fileEnv } from '@/config/file';
8
- import { DEFAULT_EMBEDDING_MODEL } from '@/const/settings';
8
+ import { DEFAULT_FILE_EMBEDDING_MODEL_ITEM } from '@/const/settings/knowledge';
9
9
  import { NewChunkItem, NewEmbeddingsItem } from '@/database/schemas';
10
10
  import { serverDB } from '@/database/server';
11
11
  import { ASYNC_TASK_TIMEOUT, AsyncTaskModel } from '@/database/server/models/asyncTask';
12
12
  import { ChunkModel } from '@/database/server/models/chunk';
13
13
  import { EmbeddingModel } from '@/database/server/models/embedding';
14
14
  import { FileModel } from '@/database/server/models/file';
15
- import { ModelProvider } from '@/libs/agent-runtime';
16
15
  import { asyncAuthedProcedure, asyncRouter as router } from '@/libs/trpc/async';
16
+ import { getServerDefaultFilesConfig } from '@/server/globalConfig';
17
17
  import { initAgentRuntimeWithUserPayload } from '@/server/modules/AgentRuntime';
18
18
  import { S3 } from '@/server/modules/S3';
19
19
  import { ChunkService } from '@/server/services/chunk';
@@ -44,7 +44,6 @@ export const fileRouter = router({
44
44
  .input(
45
45
  z.object({
46
46
  fileId: z.string(),
47
- model: z.string().default(DEFAULT_EMBEDDING_MODEL),
48
47
  taskId: z.string(),
49
48
  }),
50
49
  )
@@ -57,6 +56,9 @@ export const fileRouter = router({
57
56
 
58
57
  const asyncTask = await ctx.asyncTaskModel.findById(input.taskId);
59
58
 
59
+ const { model, provider } =
60
+ getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
61
+
60
62
  if (!asyncTask) throw new TRPCError({ code: 'BAD_REQUEST', message: 'Async Task not found' });
61
63
 
62
64
  try {
@@ -84,13 +86,12 @@ export const fileRouter = router({
84
86
 
85
87
  const chunks = await ctx.chunkModel.getChunksTextByFileId(input.fileId);
86
88
  const requestArray = chunk(chunks, CHUNK_SIZE);
87
-
88
89
  try {
89
90
  await pMap(
90
91
  requestArray,
91
92
  async (chunks, index) => {
92
93
  const agentRuntime = await initAgentRuntimeWithUserPayload(
93
- ModelProvider.OpenAI,
94
+ provider,
94
95
  ctx.jwtPayload,
95
96
  );
96
97
 
@@ -98,11 +99,10 @@ export const fileRouter = router({
98
99
  console.log(`执行第 ${number} 个任务`);
99
100
 
100
101
  console.time(`任务[${number}]: embeddings`);
101
-
102
102
  const embeddings = await agentRuntime.embeddings({
103
103
  dimensions: 1024,
104
104
  input: chunks.map((c) => c.text),
105
- model: input.model,
105
+ model,
106
106
  });
107
107
  console.timeEnd(`任务[${number}]: embeddings`);
108
108
 
@@ -111,7 +111,7 @@ export const fileRouter = router({
111
111
  chunkId: chunks[idx].id,
112
112
  embeddings: e,
113
113
  fileId: input.fileId,
114
- model: input.model,
114
+ model,
115
115
  })) || [];
116
116
 
117
117
  console.time(`任务[${number}]: insert db`);
@@ -1,7 +1,7 @@
1
1
  import { inArray } from 'drizzle-orm/expressions';
2
2
  import { z } from 'zod';
3
3
 
4
- import { DEFAULT_EMBEDDING_MODEL } from '@/const/settings';
4
+ import { DEFAULT_FILE_EMBEDDING_MODEL_ITEM } from '@/const/settings/knowledge';
5
5
  import { knowledgeBaseFiles } from '@/database/schemas';
6
6
  import { serverDB } from '@/database/server';
7
7
  import { AsyncTaskModel } from '@/database/server/models/asyncTask';
@@ -9,9 +9,9 @@ import { ChunkModel } from '@/database/server/models/chunk';
9
9
  import { EmbeddingModel } from '@/database/server/models/embedding';
10
10
  import { FileModel } from '@/database/server/models/file';
11
11
  import { MessageModel } from '@/database/server/models/message';
12
- import { ModelProvider } from '@/libs/agent-runtime';
13
12
  import { authedProcedure, router } from '@/libs/trpc';
14
13
  import { keyVaults } from '@/libs/trpc/middleware/keyVaults';
14
+ import { getServerDefaultFilesConfig } from '@/server/globalConfig';
15
15
  import { initAgentRuntimeWithUserPayload } from '@/server/modules/AgentRuntime';
16
16
  import { ChunkService } from '@/server/services/chunk';
17
17
  import { SemanticSearchSchema } from '@/types/rag';
@@ -101,21 +101,18 @@ export const chunkRouter = router({
101
101
  .input(
102
102
  z.object({
103
103
  fileIds: z.array(z.string()).optional(),
104
- model: z.string().default(DEFAULT_EMBEDDING_MODEL),
105
104
  query: z.string(),
106
105
  }),
107
106
  )
108
107
  .mutation(async ({ ctx, input }) => {
109
- console.time('embedding');
110
- const agentRuntime = await initAgentRuntimeWithUserPayload(
111
- ModelProvider.OpenAI,
112
- ctx.jwtPayload,
113
- );
108
+ const { model, provider } =
109
+ getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
110
+ const agentRuntime = await initAgentRuntimeWithUserPayload(provider, ctx.jwtPayload);
114
111
 
115
112
  const embeddings = await agentRuntime.embeddings({
116
113
  dimensions: 1024,
117
114
  input: input.query,
118
- model: input.model,
115
+ model,
119
116
  });
120
117
  console.timeEnd('embedding');
121
118
 
@@ -130,27 +127,25 @@ export const chunkRouter = router({
130
127
  .input(SemanticSearchSchema)
131
128
  .mutation(async ({ ctx, input }) => {
132
129
  const item = await ctx.messageModel.findMessageQueriesById(input.messageId);
130
+ const { model, provider } =
131
+ getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
133
132
  let embedding: number[];
134
133
  let ragQueryId: string;
135
-
136
134
  // if there is no message rag or it's embeddings, then we need to create one
137
135
  if (!item || !item.embeddings) {
138
136
  // TODO: need to support customize
139
- const agentRuntime = await initAgentRuntimeWithUserPayload(
140
- ModelProvider.OpenAI,
141
- ctx.jwtPayload,
142
- );
137
+ const agentRuntime = await initAgentRuntimeWithUserPayload(provider, ctx.jwtPayload);
143
138
 
144
139
  const embeddings = await agentRuntime.embeddings({
145
140
  dimensions: 1024,
146
141
  input: input.rewriteQuery,
147
- model: input.model || DEFAULT_EMBEDDING_MODEL,
142
+ model,
148
143
  });
149
144
 
150
145
  embedding = embeddings![0];
151
146
  const embeddingsId = await ctx.embeddingModel.create({
152
147
  embeddings: embedding,
153
- model: input.model,
148
+ model,
154
149
  });
155
150
 
156
151
  const result = await ctx.messageModel.createMessageQuery({
@@ -182,6 +177,7 @@ export const chunkRouter = router({
182
177
  fileIds: finalFileIds,
183
178
  query: input.rewriteQuery,
184
179
  });
180
+ // TODO: need to rerank the chunks
185
181
  console.timeEnd('semanticSearch');
186
182
 
187
183
  return { chunks, queryId: ragQueryId };
@@ -1,3 +1,5 @@
1
+ import { FilesConfigItem } from '../user/settings/filesConfig';
2
+
1
3
  export enum KnowledgeBaseTabs {
2
4
  Files = 'files',
3
5
  Settings = 'Settings',
@@ -43,3 +45,9 @@ export interface KnowledgeItem {
43
45
  name: string;
44
46
  type: KnowledgeType;
45
47
  }
48
+
49
+ export interface SystemEmbeddingConfig {
50
+ embeddingModel: FilesConfigItem;
51
+ queryModel: string;
52
+ rerankerModel: FilesConfigItem;
53
+ }
@@ -0,0 +1,9 @@
1
+ export interface FilesConfigItem {
2
+ model: string;
3
+ provider: string;
4
+ }
5
+ export interface FilesConfig {
6
+ embeddingModel: FilesConfigItem;
7
+ queryModel: string;
8
+ rerankerModel: FilesConfigItem;
9
+ }
@@ -44,6 +44,7 @@ export interface UserKeyVaults {
44
44
  huggingface?: OpenAICompatibleKeyVault;
45
45
  hunyuan?: OpenAICompatibleKeyVault;
46
46
  internlm?: OpenAICompatibleKeyVault;
47
+ lmstudio?: OpenAICompatibleKeyVault;
47
48
  lobehub?: any;
48
49
  minimax?: OpenAICompatibleKeyVault;
49
50
  mistral?: OpenAICompatibleKeyVault;