@lobehub/chat 1.15.7 → 1.15.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.i18nrc.js +0 -1
- package/CHANGELOG.md +50 -0
- package/README.md +6 -6
- package/README.zh-CN.md +6 -6
- package/locales/ar/knowledgeBase.json +1 -0
- package/locales/ar/ragEval.json +91 -0
- package/locales/bg-BG/knowledgeBase.json +1 -0
- package/locales/bg-BG/ragEval.json +91 -0
- package/locales/de-DE/knowledgeBase.json +1 -0
- package/locales/de-DE/ragEval.json +91 -0
- package/locales/en-US/knowledgeBase.json +1 -0
- package/locales/en-US/ragEval.json +91 -0
- package/locales/es-ES/knowledgeBase.json +1 -0
- package/locales/es-ES/ragEval.json +91 -0
- package/locales/fr-FR/knowledgeBase.json +1 -0
- package/locales/fr-FR/ragEval.json +91 -0
- package/locales/it-IT/knowledgeBase.json +1 -0
- package/locales/it-IT/ragEval.json +91 -0
- package/locales/ja-JP/knowledgeBase.json +1 -0
- package/locales/ja-JP/ragEval.json +91 -0
- package/locales/ko-KR/knowledgeBase.json +1 -0
- package/locales/ko-KR/ragEval.json +91 -0
- package/locales/nl-NL/knowledgeBase.json +1 -0
- package/locales/nl-NL/ragEval.json +91 -0
- package/locales/pl-PL/knowledgeBase.json +1 -0
- package/locales/pl-PL/ragEval.json +91 -0
- package/locales/pt-BR/knowledgeBase.json +1 -0
- package/locales/pt-BR/ragEval.json +91 -0
- package/locales/ru-RU/knowledgeBase.json +1 -0
- package/locales/ru-RU/ragEval.json +91 -0
- package/locales/tr-TR/knowledgeBase.json +1 -0
- package/locales/tr-TR/ragEval.json +91 -0
- package/locales/vi-VN/knowledgeBase.json +1 -0
- package/locales/vi-VN/ragEval.json +91 -0
- package/locales/zh-CN/knowledgeBase.json +1 -0
- package/locales/zh-CN/ragEval.json +91 -0
- package/locales/zh-TW/knowledgeBase.json +1 -0
- package/locales/zh-TW/ragEval.json +91 -0
- package/package.json +2 -1
- package/src/app/(main)/repos/[id]/@menu/Head/index.tsx +4 -13
- package/src/app/(main)/repos/[id]/@menu/Menu/index.tsx +30 -21
- package/src/app/(main)/repos/[id]/@menu/default.tsx +8 -2
- package/src/app/(main)/repos/[id]/evals/components/Container.tsx +25 -0
- package/src/app/(main)/repos/[id]/evals/components/Tabs.tsx +35 -0
- package/src/app/(main)/repos/[id]/evals/dataset/CreateDataset/CreateForm.tsx +72 -0
- package/src/app/(main)/repos/[id]/evals/dataset/CreateDataset/index.tsx +37 -0
- package/src/app/(main)/repos/[id]/evals/dataset/DatasetDetail/index.tsx +126 -0
- package/src/app/(main)/repos/[id]/evals/dataset/DatasetList/Item.tsx +59 -0
- package/src/app/(main)/repos/[id]/evals/dataset/DatasetList/index.tsx +32 -0
- package/src/app/(main)/repos/[id]/evals/dataset/EmptyGuide/index.tsx +33 -0
- package/src/app/(main)/repos/[id]/evals/dataset/page.tsx +47 -0
- package/src/app/(main)/repos/[id]/evals/evaluation/CreateEvaluation/CreateForm.tsx +93 -0
- package/src/app/(main)/repos/[id]/evals/evaluation/CreateEvaluation/index.tsx +28 -0
- package/src/app/(main)/repos/[id]/evals/evaluation/CreateEvaluation/useModal.tsx +39 -0
- package/src/app/(main)/repos/[id]/evals/evaluation/EmptyGuide/index.tsx +25 -0
- package/src/app/(main)/repos/[id]/evals/evaluation/EvaluationList/index.tsx +209 -0
- package/src/app/(main)/repos/[id]/evals/evaluation/page.tsx +32 -0
- package/src/app/(main)/repos/[id]/evals/layout.tsx +22 -0
- package/src/app/(main)/repos/[id]/evals/page.tsx +9 -0
- package/src/app/(main)/repos/[id]/evals/type.ts +5 -0
- package/src/app/(main)/repos/[id]/not-found.tsx +3 -0
- package/src/chains/answerWithContext.ts +6 -7
- package/src/components/FileIcon/index.tsx +2 -2
- package/src/config/featureFlags/schema.ts +3 -1
- package/src/database/server/migrations/0008_add_rag_evals.sql +120 -0
- package/src/database/server/migrations/meta/0008_snapshot.json +3463 -0
- package/src/database/server/migrations/meta/_journal.json +7 -0
- package/src/database/server/models/file.ts +11 -2
- package/src/database/server/models/ragEval/dataset.ts +59 -0
- package/src/database/server/models/ragEval/datasetRecord.ts +87 -0
- package/src/database/server/models/ragEval/evaluation.ts +96 -0
- package/src/database/server/models/ragEval/evaluationRecord.ts +64 -0
- package/src/database/server/models/ragEval/index.ts +4 -0
- package/src/database/server/schemas/lobechat/asyncTask.ts +24 -0
- package/src/database/server/schemas/lobechat/file.ts +2 -18
- package/src/database/server/schemas/lobechat/index.ts +2 -0
- package/src/database/server/schemas/lobechat/ragEvals.ts +105 -0
- package/src/database/server/schemas/lobechat/relations.ts +2 -1
- package/src/libs/agent-runtime/types/chat.ts +3 -0
- package/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +3 -1
- package/src/libs/langchain/loaders/index.ts +1 -1
- package/src/locales/default/index.ts +2 -0
- package/src/locales/default/knowledgeBase.ts +1 -0
- package/src/locales/default/ragEval.ts +93 -0
- package/src/server/modules/S3/index.ts +11 -0
- package/src/server/routers/async/index.ts +2 -0
- package/src/server/routers/async/ragEval.ts +138 -0
- package/src/server/routers/lambda/index.ts +2 -1
- package/src/server/routers/lambda/ragEval.ts +296 -0
- package/src/services/ragEval.ts +67 -0
- package/src/services/upload.ts +12 -5
- package/src/store/file/slices/upload/action.ts +8 -6
- package/src/store/knowledgeBase/initialState.ts +3 -1
- package/src/store/knowledgeBase/slices/ragEval/actions/dataset.ts +88 -0
- package/src/store/knowledgeBase/slices/ragEval/actions/evaluation.ts +62 -0
- package/src/store/knowledgeBase/slices/ragEval/actions/index.ts +20 -0
- package/src/store/knowledgeBase/slices/ragEval/index.ts +2 -0
- package/src/store/knowledgeBase/slices/ragEval/initialState.ts +7 -0
- package/src/store/knowledgeBase/store.ts +9 -3
- package/src/store/serverConfig/selectors.test.ts +1 -0
- package/src/types/eval/dataset.ts +47 -0
- package/src/types/eval/evaluation.ts +53 -0
- package/src/types/eval/index.ts +3 -0
- package/src/types/eval/ragas.ts +9 -0
- package/src/types/files/upload.ts +1 -1
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
addDataset: {
|
|
3
|
+
confirm: '新建',
|
|
4
|
+
description: {
|
|
5
|
+
placeholder: '数据集简介(选填)',
|
|
6
|
+
},
|
|
7
|
+
name: {
|
|
8
|
+
placeholder: '数据集名称',
|
|
9
|
+
required: '请填写数据集名称',
|
|
10
|
+
},
|
|
11
|
+
title: '添加数据集',
|
|
12
|
+
},
|
|
13
|
+
dataset: {
|
|
14
|
+
addNewButton: '创建数据集',
|
|
15
|
+
emptyGuide: '当前数据集为空,请创建一个数据集。',
|
|
16
|
+
list: {
|
|
17
|
+
table: {
|
|
18
|
+
actions: {
|
|
19
|
+
importData: '导入数据',
|
|
20
|
+
},
|
|
21
|
+
columns: {
|
|
22
|
+
actions: '操作',
|
|
23
|
+
ideal: {
|
|
24
|
+
title: '期望回答',
|
|
25
|
+
},
|
|
26
|
+
question: {
|
|
27
|
+
title: '问题',
|
|
28
|
+
},
|
|
29
|
+
referenceFiles: {
|
|
30
|
+
title: '参考文件',
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
notSelected: '请在左侧选择数据集',
|
|
34
|
+
title: '数据集详情',
|
|
35
|
+
},
|
|
36
|
+
title: '数据集',
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
evaluation: {
|
|
40
|
+
addEvaluation: {
|
|
41
|
+
confirm: '新建',
|
|
42
|
+
datasetId: {
|
|
43
|
+
placeholder: '请选择你的评测数据集',
|
|
44
|
+
required: '请选择评测数据集',
|
|
45
|
+
},
|
|
46
|
+
description: {
|
|
47
|
+
placeholder: '评测任务简介(选填)',
|
|
48
|
+
},
|
|
49
|
+
name: {
|
|
50
|
+
placeholder: '评测任务名称',
|
|
51
|
+
required: '请填写评测任务名称',
|
|
52
|
+
},
|
|
53
|
+
title: '添加评测任务',
|
|
54
|
+
},
|
|
55
|
+
addNewButton: '创建评测',
|
|
56
|
+
emptyGuide: '当前评测任务为空,开始创建评测。',
|
|
57
|
+
|
|
58
|
+
table: {
|
|
59
|
+
columns: {
|
|
60
|
+
actions: {
|
|
61
|
+
checkStatus: '检查状态',
|
|
62
|
+
confirmDelete: '是否删除本条评测',
|
|
63
|
+
confirmRun:
|
|
64
|
+
'是否开始运行?开始运行后将在后台异步执行评测任务,关闭页面不影响异步任务的执行',
|
|
65
|
+
downloadRecords: '下载评测',
|
|
66
|
+
retry: '重试',
|
|
67
|
+
run: '运行',
|
|
68
|
+
title: '操作',
|
|
69
|
+
},
|
|
70
|
+
datasetId: {
|
|
71
|
+
title: '数据集',
|
|
72
|
+
},
|
|
73
|
+
name: {
|
|
74
|
+
title: '评测任务名称',
|
|
75
|
+
},
|
|
76
|
+
records: {
|
|
77
|
+
title: '评测记录数',
|
|
78
|
+
},
|
|
79
|
+
referenceFiles: {
|
|
80
|
+
title: '参考文件',
|
|
81
|
+
},
|
|
82
|
+
status: {
|
|
83
|
+
error: '执行出错',
|
|
84
|
+
pending: '待运行',
|
|
85
|
+
processing: '运行中',
|
|
86
|
+
success: '执行成功',
|
|
87
|
+
title: '状态',
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
title: '评测任务列表',
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
};
|
|
@@ -104,4 +104,15 @@ export class S3 {
|
|
|
104
104
|
|
|
105
105
|
return getSignedUrl(this.client, command, { expiresIn: 3600 });
|
|
106
106
|
}
|
|
107
|
+
|
|
108
|
+
public async uploadContent(path: string, content: string) {
|
|
109
|
+
const command = new PutObjectCommand({
|
|
110
|
+
ACL: this.setAcl ? 'public-read' : undefined,
|
|
111
|
+
Body: content,
|
|
112
|
+
Bucket: this.bucket,
|
|
113
|
+
Key: path,
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
return this.client.send(command);
|
|
117
|
+
}
|
|
107
118
|
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { publicProcedure, asyncRouter as router } from '@/libs/trpc/async';
|
|
2
2
|
|
|
3
3
|
import { fileRouter } from './file';
|
|
4
|
+
import { ragEvalRouter } from './ragEval';
|
|
4
5
|
|
|
5
6
|
export const asyncRouter = router({
|
|
6
7
|
file: fileRouter,
|
|
7
8
|
healthcheck: publicProcedure.query(() => "i'm live!"),
|
|
9
|
+
ragEval: ragEvalRouter,
|
|
8
10
|
});
|
|
9
11
|
|
|
10
12
|
export type AsyncRouter = typeof asyncRouter;
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { TRPCError } from '@trpc/server';
|
|
2
|
+
import OpenAI from 'openai';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
|
|
5
|
+
import { initAgentRuntimeWithUserPayload } from '@/app/api/chat/agentRuntime';
|
|
6
|
+
import { chainAnswerWithContext } from '@/chains/answerWithContext';
|
|
7
|
+
import { DEFAULT_EMBEDDING_MODEL, DEFAULT_MODEL } from '@/const/settings';
|
|
8
|
+
import { ChunkModel } from '@/database/server/models/chunk';
|
|
9
|
+
import { EmbeddingModel } from '@/database/server/models/embedding';
|
|
10
|
+
import { FileModel } from '@/database/server/models/file';
|
|
11
|
+
import {
|
|
12
|
+
EvalDatasetRecordModel,
|
|
13
|
+
EvalEvaluationModel,
|
|
14
|
+
EvaluationRecordModel,
|
|
15
|
+
} from '@/database/server/models/ragEval';
|
|
16
|
+
import { ModelProvider } from '@/libs/agent-runtime';
|
|
17
|
+
import { asyncAuthedProcedure, asyncRouter as router } from '@/libs/trpc/async';
|
|
18
|
+
import { ChunkService } from '@/server/services/chunk';
|
|
19
|
+
import { AsyncTaskError } from '@/types/asyncTask';
|
|
20
|
+
import { EvalEvaluationStatus } from '@/types/eval';
|
|
21
|
+
|
|
22
|
+
const ragEvalProcedure = asyncAuthedProcedure.use(async (opts) => {
|
|
23
|
+
const { ctx } = opts;
|
|
24
|
+
|
|
25
|
+
return opts.next({
|
|
26
|
+
ctx: {
|
|
27
|
+
chunkModel: new ChunkModel(ctx.userId),
|
|
28
|
+
chunkService: new ChunkService(ctx.userId),
|
|
29
|
+
datasetRecordModel: new EvalDatasetRecordModel(ctx.userId),
|
|
30
|
+
embeddingModel: new EmbeddingModel(ctx.userId),
|
|
31
|
+
evalRecordModel: new EvaluationRecordModel(ctx.userId),
|
|
32
|
+
evaluationModel: new EvalEvaluationModel(ctx.userId),
|
|
33
|
+
fileModel: new FileModel(ctx.userId),
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
export const ragEvalRouter = router({
|
|
39
|
+
runRecordEvaluation: ragEvalProcedure
|
|
40
|
+
.input(
|
|
41
|
+
z.object({
|
|
42
|
+
evalRecordId: z.number(),
|
|
43
|
+
}),
|
|
44
|
+
)
|
|
45
|
+
.mutation(async ({ ctx, input }) => {
|
|
46
|
+
const evalRecord = await ctx.evalRecordModel.findById(input.evalRecordId);
|
|
47
|
+
|
|
48
|
+
if (!evalRecord) {
|
|
49
|
+
throw new TRPCError({ code: 'BAD_REQUEST', message: 'Evaluation not found' });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const now = Date.now();
|
|
53
|
+
try {
|
|
54
|
+
const agentRuntime = await initAgentRuntimeWithUserPayload(
|
|
55
|
+
ModelProvider.OpenAI,
|
|
56
|
+
ctx.jwtPayload,
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
const { question, languageModel, embeddingModel } = evalRecord;
|
|
60
|
+
|
|
61
|
+
let questionEmbeddingId = evalRecord.questionEmbeddingId;
|
|
62
|
+
let context = evalRecord.context;
|
|
63
|
+
|
|
64
|
+
// 如果不存在 questionEmbeddingId,那么就需要做一次 embedding
|
|
65
|
+
if (!questionEmbeddingId) {
|
|
66
|
+
const embeddings = await agentRuntime.embeddings({
|
|
67
|
+
dimensions: 1024,
|
|
68
|
+
input: question,
|
|
69
|
+
model: !!embeddingModel ? embeddingModel : DEFAULT_EMBEDDING_MODEL,
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const embeddingId = await ctx.embeddingModel.create({
|
|
73
|
+
embeddings: embeddings?.[0].embedding,
|
|
74
|
+
model: embeddingModel,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
await ctx.evalRecordModel.update(evalRecord.id, {
|
|
78
|
+
questionEmbeddingId: embeddingId,
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
questionEmbeddingId = embeddingId;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// 如果不存在 context,那么就需要做一次检索
|
|
85
|
+
if (!context || context.length === 0) {
|
|
86
|
+
const datasetRecord = await ctx.datasetRecordModel.findById(evalRecord.datasetRecordId);
|
|
87
|
+
|
|
88
|
+
const embeddingItem = await ctx.embeddingModel.findById(questionEmbeddingId);
|
|
89
|
+
|
|
90
|
+
const chunks = await ctx.chunkModel.semanticSearchForChat({
|
|
91
|
+
embedding: embeddingItem!.embeddings!,
|
|
92
|
+
fileIds: datasetRecord!.referenceFiles!,
|
|
93
|
+
query: evalRecord.question,
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
context = chunks.map((item) => item.text).filter(Boolean) as string[];
|
|
97
|
+
await ctx.evalRecordModel.update(evalRecord.id, { context });
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// 做一次生成 LLM 答案生成
|
|
101
|
+
const { messages } = chainAnswerWithContext({ context, knowledge: [], question });
|
|
102
|
+
|
|
103
|
+
const response = await agentRuntime.chat({
|
|
104
|
+
messages: messages!,
|
|
105
|
+
model: !!languageModel ? languageModel : DEFAULT_MODEL,
|
|
106
|
+
responseMode: 'json',
|
|
107
|
+
stream: false,
|
|
108
|
+
temperature: 1,
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
const data = (await response.json()) as OpenAI.ChatCompletion;
|
|
112
|
+
|
|
113
|
+
const answer = data.choices[0].message.content;
|
|
114
|
+
|
|
115
|
+
await ctx.evalRecordModel.update(input.evalRecordId, {
|
|
116
|
+
answer,
|
|
117
|
+
duration: Date.now() - now,
|
|
118
|
+
languageModel,
|
|
119
|
+
status: EvalEvaluationStatus.Success,
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
return { success: true };
|
|
123
|
+
} catch (e) {
|
|
124
|
+
await ctx.evalRecordModel.update(input.evalRecordId, {
|
|
125
|
+
error: new AsyncTaskError((e as Error).name, (e as Error).message),
|
|
126
|
+
status: EvalEvaluationStatus.Error,
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
await ctx.evaluationModel.update(evalRecord.evaluationId, {
|
|
130
|
+
status: EvalEvaluationStatus.Error,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
console.error('[RAGEvaluation] error', e);
|
|
134
|
+
|
|
135
|
+
return { success: false };
|
|
136
|
+
}
|
|
137
|
+
}),
|
|
138
|
+
});
|
|
@@ -5,12 +5,12 @@ import { publicProcedure, router } from '@/libs/trpc';
|
|
|
5
5
|
|
|
6
6
|
import { agentRouter } from './agent';
|
|
7
7
|
import { chunkRouter } from './chunk';
|
|
8
|
-
// router that connect to db
|
|
9
8
|
import { fileRouter } from './file';
|
|
10
9
|
import { importerRouter } from './importer';
|
|
11
10
|
import { knowledgeBaseRouter } from './knowledgeBase';
|
|
12
11
|
import { messageRouter } from './message';
|
|
13
12
|
import { pluginRouter } from './plugin';
|
|
13
|
+
import { ragEvalRouter } from './ragEval';
|
|
14
14
|
import { sessionRouter } from './session';
|
|
15
15
|
import { sessionGroupRouter } from './sessionGroup';
|
|
16
16
|
import { topicRouter } from './topic';
|
|
@@ -25,6 +25,7 @@ export const lambdaRouter = router({
|
|
|
25
25
|
knowledgeBase: knowledgeBaseRouter,
|
|
26
26
|
message: messageRouter,
|
|
27
27
|
plugin: pluginRouter,
|
|
28
|
+
ragEval: ragEvalRouter,
|
|
28
29
|
session: sessionRouter,
|
|
29
30
|
sessionGroup: sessionGroupRouter,
|
|
30
31
|
topic: topicRouter,
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/* eslint-disable sort-keys-fix/sort-keys-fix */
|
|
2
|
+
import { TRPCError } from '@trpc/server';
|
|
3
|
+
import dayjs from 'dayjs';
|
|
4
|
+
import JSONL from 'jsonl-parse-stringify';
|
|
5
|
+
import pMap from 'p-map';
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
|
|
8
|
+
import { DEFAULT_EMBEDDING_MODEL, DEFAULT_MODEL } from '@/const/settings';
|
|
9
|
+
import { FileModel } from '@/database/server/models/file';
|
|
10
|
+
import {
|
|
11
|
+
EvalDatasetModel,
|
|
12
|
+
EvalDatasetRecordModel,
|
|
13
|
+
EvalEvaluationModel,
|
|
14
|
+
EvaluationRecordModel,
|
|
15
|
+
} from '@/database/server/models/ragEval';
|
|
16
|
+
import { authedProcedure, router } from '@/libs/trpc';
|
|
17
|
+
import { keyVaults } from '@/libs/trpc/middleware/keyVaults';
|
|
18
|
+
import { S3 } from '@/server/modules/S3';
|
|
19
|
+
import { createAsyncServerClient } from '@/server/routers/async';
|
|
20
|
+
import { getFullFileUrl } from '@/server/utils/files';
|
|
21
|
+
import {
|
|
22
|
+
EvalDatasetRecord,
|
|
23
|
+
EvalEvaluationStatus,
|
|
24
|
+
InsertEvalDatasetRecord,
|
|
25
|
+
RAGEvalDataSetItem,
|
|
26
|
+
insertEvalDatasetRecordSchema,
|
|
27
|
+
insertEvalDatasetsSchema,
|
|
28
|
+
insertEvalEvaluationSchema,
|
|
29
|
+
} from '@/types/eval';
|
|
30
|
+
|
|
31
|
+
const ragEvalProcedure = authedProcedure.use(keyVaults).use(async (opts) => {
|
|
32
|
+
const { ctx } = opts;
|
|
33
|
+
|
|
34
|
+
return opts.next({
|
|
35
|
+
ctx: {
|
|
36
|
+
datasetModel: new EvalDatasetModel(ctx.userId),
|
|
37
|
+
fileModel: new FileModel(ctx.userId),
|
|
38
|
+
datasetRecordModel: new EvalDatasetRecordModel(ctx.userId),
|
|
39
|
+
evaluationModel: new EvalEvaluationModel(ctx.userId),
|
|
40
|
+
evaluationRecordModel: new EvaluationRecordModel(ctx.userId),
|
|
41
|
+
s3: new S3(),
|
|
42
|
+
},
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
export const ragEvalRouter = router({
|
|
47
|
+
createDataset: ragEvalProcedure
|
|
48
|
+
.input(
|
|
49
|
+
z.object({
|
|
50
|
+
description: z.string().optional(),
|
|
51
|
+
knowledgeBaseId: z.string(),
|
|
52
|
+
name: z.string(),
|
|
53
|
+
}),
|
|
54
|
+
)
|
|
55
|
+
.mutation(async ({ input, ctx }) => {
|
|
56
|
+
const data = await ctx.datasetModel.create({
|
|
57
|
+
description: input.description,
|
|
58
|
+
knowledgeBaseId: input.knowledgeBaseId,
|
|
59
|
+
name: input.name,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
return data?.id;
|
|
63
|
+
}),
|
|
64
|
+
|
|
65
|
+
getDatasets: ragEvalProcedure
|
|
66
|
+
.input(z.object({ knowledgeBaseId: z.string() }))
|
|
67
|
+
|
|
68
|
+
.query(async ({ ctx, input }): Promise<RAGEvalDataSetItem[]> => {
|
|
69
|
+
return ctx.datasetModel.query(input.knowledgeBaseId);
|
|
70
|
+
}),
|
|
71
|
+
|
|
72
|
+
removeDataset: ragEvalProcedure
|
|
73
|
+
.input(z.object({ id: z.number() }))
|
|
74
|
+
.mutation(async ({ input, ctx }) => {
|
|
75
|
+
return ctx.datasetModel.delete(input.id);
|
|
76
|
+
}),
|
|
77
|
+
|
|
78
|
+
updateDataset: ragEvalProcedure
|
|
79
|
+
.input(
|
|
80
|
+
z.object({
|
|
81
|
+
id: z.number(),
|
|
82
|
+
value: insertEvalDatasetsSchema.partial(),
|
|
83
|
+
}),
|
|
84
|
+
)
|
|
85
|
+
.mutation(async ({ input, ctx }) => {
|
|
86
|
+
return ctx.datasetModel.update(input.id, input.value);
|
|
87
|
+
}),
|
|
88
|
+
|
|
89
|
+
// Dataset Item operations
|
|
90
|
+
createDatasetRecords: ragEvalProcedure
|
|
91
|
+
.input(
|
|
92
|
+
z.object({
|
|
93
|
+
datasetId: z.number(),
|
|
94
|
+
question: z.string(),
|
|
95
|
+
ideal: z.string().optional(),
|
|
96
|
+
referenceFiles: z.array(z.string()).optional(),
|
|
97
|
+
metadata: z.record(z.unknown()).optional(),
|
|
98
|
+
}),
|
|
99
|
+
)
|
|
100
|
+
.mutation(async ({ input, ctx }) => {
|
|
101
|
+
const data = await ctx.datasetRecordModel.create(input);
|
|
102
|
+
return data?.id;
|
|
103
|
+
}),
|
|
104
|
+
|
|
105
|
+
getDatasetRecords: ragEvalProcedure
|
|
106
|
+
.input(z.object({ datasetId: z.number() }))
|
|
107
|
+
.query(async ({ ctx, input }): Promise<EvalDatasetRecord[]> => {
|
|
108
|
+
return ctx.datasetRecordModel.query(input.datasetId);
|
|
109
|
+
}),
|
|
110
|
+
|
|
111
|
+
removeDatasetRecords: ragEvalProcedure
|
|
112
|
+
.input(z.object({ id: z.number() }))
|
|
113
|
+
.mutation(async ({ input, ctx }) => {
|
|
114
|
+
return ctx.datasetRecordModel.delete(input.id);
|
|
115
|
+
}),
|
|
116
|
+
|
|
117
|
+
updateDatasetRecords: ragEvalProcedure
|
|
118
|
+
.input(
|
|
119
|
+
z.object({
|
|
120
|
+
id: z.number(),
|
|
121
|
+
value: z
|
|
122
|
+
.object({
|
|
123
|
+
question: z.string(),
|
|
124
|
+
ideal: z.string(),
|
|
125
|
+
referenceFiles: z.array(z.string()),
|
|
126
|
+
metadata: z.record(z.unknown()),
|
|
127
|
+
})
|
|
128
|
+
.partial(),
|
|
129
|
+
}),
|
|
130
|
+
)
|
|
131
|
+
.mutation(async ({ input, ctx }) => {
|
|
132
|
+
return ctx.datasetRecordModel.update(input.id, input.value);
|
|
133
|
+
}),
|
|
134
|
+
|
|
135
|
+
importDatasetRecords: ragEvalProcedure
|
|
136
|
+
.input(
|
|
137
|
+
z.object({
|
|
138
|
+
datasetId: z.number(),
|
|
139
|
+
pathname: z.string(),
|
|
140
|
+
}),
|
|
141
|
+
)
|
|
142
|
+
.mutation(async ({ input, ctx }) => {
|
|
143
|
+
const dataStr = await ctx.s3.getFileContent(input.pathname);
|
|
144
|
+
const items = JSONL.parse<InsertEvalDatasetRecord>(dataStr);
|
|
145
|
+
|
|
146
|
+
insertEvalDatasetRecordSchema.array().parse(items);
|
|
147
|
+
|
|
148
|
+
const data = await Promise.all(
|
|
149
|
+
items.map(async ({ referenceFiles, question, ideal }) => {
|
|
150
|
+
const files = typeof referenceFiles === 'string' ? [referenceFiles] : referenceFiles;
|
|
151
|
+
|
|
152
|
+
let fileIds: string[] | undefined = undefined;
|
|
153
|
+
|
|
154
|
+
if (files) {
|
|
155
|
+
const items = await ctx.fileModel.findByNames(files);
|
|
156
|
+
|
|
157
|
+
fileIds = items.map((item) => item.id);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
question,
|
|
162
|
+
ideal,
|
|
163
|
+
referenceFiles: fileIds,
|
|
164
|
+
datasetId: input.datasetId,
|
|
165
|
+
};
|
|
166
|
+
}),
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
return ctx.datasetRecordModel.batchCreate(data);
|
|
170
|
+
}),
|
|
171
|
+
|
|
172
|
+
// Evaluation operations
|
|
173
|
+
startEvaluationTask: ragEvalProcedure
|
|
174
|
+
.input(z.object({ id: z.number() }))
|
|
175
|
+
.mutation(async ({ input, ctx }) => {
|
|
176
|
+
// Start evaluation task
|
|
177
|
+
const evaluation = await ctx.evaluationModel.findById(input.id);
|
|
178
|
+
|
|
179
|
+
if (!evaluation) {
|
|
180
|
+
throw new TRPCError({ code: 'BAD_REQUEST', message: 'Evaluation not found' });
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// create evaluation records by dataset records
|
|
184
|
+
const datasetRecords = await ctx.datasetRecordModel.findByDatasetId(evaluation.datasetId);
|
|
185
|
+
|
|
186
|
+
if (datasetRecords.length === 0) {
|
|
187
|
+
throw new TRPCError({ code: 'BAD_REQUEST', message: 'Dataset record is empty' });
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const evalRecords = await ctx.evaluationRecordModel.batchCreate(
|
|
191
|
+
datasetRecords.map((record) => ({
|
|
192
|
+
evaluationId: input.id,
|
|
193
|
+
datasetRecordId: record.id,
|
|
194
|
+
question: record.question!,
|
|
195
|
+
ideal: record.ideal,
|
|
196
|
+
status: EvalEvaluationStatus.Pending,
|
|
197
|
+
embeddingModel: DEFAULT_EMBEDDING_MODEL,
|
|
198
|
+
languageModel: DEFAULT_MODEL,
|
|
199
|
+
})),
|
|
200
|
+
);
|
|
201
|
+
|
|
202
|
+
const asyncCaller = await createAsyncServerClient(ctx.userId, ctx.jwtPayload);
|
|
203
|
+
|
|
204
|
+
await ctx.evaluationModel.update(input.id, { status: EvalEvaluationStatus.Processing });
|
|
205
|
+
try {
|
|
206
|
+
await pMap(
|
|
207
|
+
evalRecords,
|
|
208
|
+
async (record) => {
|
|
209
|
+
asyncCaller.ragEval.runRecordEvaluation
|
|
210
|
+
.mutate({ evalRecordId: record.id })
|
|
211
|
+
.catch(async (e) => {
|
|
212
|
+
await ctx.evaluationModel.update(input.id, { status: EvalEvaluationStatus.Error });
|
|
213
|
+
|
|
214
|
+
throw new TRPCError({
|
|
215
|
+
code: 'BAD_GATEWAY',
|
|
216
|
+
message: `[ASYNC_TASK] Failed to start evaluation task: ${e.message}`,
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
concurrency: 30,
|
|
222
|
+
},
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
return { success: true };
|
|
226
|
+
} catch (e) {
|
|
227
|
+
console.error('[startEvaluationTask]:', e);
|
|
228
|
+
|
|
229
|
+
await ctx.evaluationModel.update(input.id, { status: EvalEvaluationStatus.Error });
|
|
230
|
+
|
|
231
|
+
return { success: false };
|
|
232
|
+
}
|
|
233
|
+
}),
|
|
234
|
+
|
|
235
|
+
checkEvaluationStatus: ragEvalProcedure
|
|
236
|
+
.input(z.object({ id: z.number() }))
|
|
237
|
+
.query(async ({ input, ctx }) => {
|
|
238
|
+
const evaluation = await ctx.evaluationModel.findById(input.id);
|
|
239
|
+
|
|
240
|
+
if (!evaluation) {
|
|
241
|
+
throw new TRPCError({ code: 'BAD_REQUEST', message: 'Evaluation not found' });
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const records = await ctx.evaluationRecordModel.findByEvaluationId(input.id);
|
|
245
|
+
|
|
246
|
+
const isSuccess = records.every((record) => record.status === EvalEvaluationStatus.Success);
|
|
247
|
+
|
|
248
|
+
if (isSuccess) {
|
|
249
|
+
// 将结果上传到 S3
|
|
250
|
+
|
|
251
|
+
const evalRecords = records.map((record) => ({
|
|
252
|
+
question: record.question,
|
|
253
|
+
context: record.context,
|
|
254
|
+
answer: record.answer,
|
|
255
|
+
ground_truth: record.ideal,
|
|
256
|
+
}));
|
|
257
|
+
const date = dayjs().format('YYYY-MM-DD-HH-mm');
|
|
258
|
+
const filename = `${date}-eval_${evaluation.id}-${evaluation.name}.jsonl`;
|
|
259
|
+
const path = `rag_eval_records/${filename}`;
|
|
260
|
+
|
|
261
|
+
await ctx.s3.uploadContent(path, JSONL.stringify(evalRecords));
|
|
262
|
+
|
|
263
|
+
// 保存数据
|
|
264
|
+
await ctx.evaluationModel.update(input.id, {
|
|
265
|
+
status: EvalEvaluationStatus.Success,
|
|
266
|
+
evalRecordsUrl: getFullFileUrl(path),
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return { success: isSuccess };
|
|
271
|
+
}),
|
|
272
|
+
createEvaluation: ragEvalProcedure
|
|
273
|
+
.input(insertEvalEvaluationSchema)
|
|
274
|
+
.mutation(async ({ input, ctx }) => {
|
|
275
|
+
const data = await ctx.evaluationModel.create({
|
|
276
|
+
description: input.description,
|
|
277
|
+
knowledgeBaseId: input.knowledgeBaseId,
|
|
278
|
+
datasetId: input.datasetId,
|
|
279
|
+
name: input.name,
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
return data?.id;
|
|
283
|
+
}),
|
|
284
|
+
|
|
285
|
+
removeEvaluation: ragEvalProcedure
|
|
286
|
+
.input(z.object({ id: z.number() }))
|
|
287
|
+
.mutation(async ({ input, ctx }) => {
|
|
288
|
+
return ctx.evaluationModel.delete(input.id);
|
|
289
|
+
}),
|
|
290
|
+
|
|
291
|
+
getEvaluationList: ragEvalProcedure
|
|
292
|
+
.input(z.object({ knowledgeBaseId: z.string() }))
|
|
293
|
+
.query(async ({ ctx, input }) => {
|
|
294
|
+
return ctx.evaluationModel.queryByKnowledgeBaseId(input.knowledgeBaseId);
|
|
295
|
+
}),
|
|
296
|
+
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { lambdaClient } from '@/libs/trpc/client';
|
|
2
|
+
import { uploadService } from '@/services/upload';
|
|
3
|
+
import {
|
|
4
|
+
CreateNewEvalDatasets,
|
|
5
|
+
CreateNewEvalEvaluation,
|
|
6
|
+
EvalDatasetRecord,
|
|
7
|
+
RAGEvalDataSetItem,
|
|
8
|
+
RAGEvalEvaluationItem,
|
|
9
|
+
insertEvalDatasetsSchema,
|
|
10
|
+
} from '@/types/eval';
|
|
11
|
+
|
|
12
|
+
class RAGEvalService {
|
|
13
|
+
// Dataset
|
|
14
|
+
async createDataset(params: CreateNewEvalDatasets): Promise<number | undefined> {
|
|
15
|
+
return await lambdaClient.ragEval.createDataset.mutate(params);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async getDatasets(knowledgeBaseId: string): Promise<RAGEvalDataSetItem[]> {
|
|
19
|
+
return lambdaClient.ragEval.getDatasets.query({ knowledgeBaseId });
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async removeDataset(id: number): Promise<void> {
|
|
23
|
+
await lambdaClient.ragEval.removeDataset.mutate({ id });
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async updateDataset(id: number, value: Partial<typeof insertEvalDatasetsSchema>): Promise<void> {
|
|
27
|
+
await lambdaClient.ragEval.updateDataset.mutate({ id, value });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Dataset Records
|
|
31
|
+
async getDatasetRecords(datasetId: number): Promise<EvalDatasetRecord[]> {
|
|
32
|
+
return lambdaClient.ragEval.getDatasetRecords.query({ datasetId });
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async removeDatasetRecord(id: number): Promise<void> {
|
|
36
|
+
await lambdaClient.ragEval.removeDatasetRecords.mutate({ id });
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async importDatasetRecords(datasetId: number, file: File): Promise<void> {
|
|
40
|
+
const { path } = await uploadService.uploadWithProgress(file, { directory: 'ragEval' });
|
|
41
|
+
|
|
42
|
+
await lambdaClient.ragEval.importDatasetRecords.mutate({ datasetId, pathname: path });
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Evaluation
|
|
46
|
+
async createEvaluation(params: CreateNewEvalEvaluation): Promise<number | undefined> {
|
|
47
|
+
return await lambdaClient.ragEval.createEvaluation.mutate(params);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async getEvaluationList(knowledgeBaseId: string): Promise<RAGEvalEvaluationItem[]> {
|
|
51
|
+
return lambdaClient.ragEval.getEvaluationList.query({ knowledgeBaseId });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async startEvaluationTask(id: number) {
|
|
55
|
+
return lambdaClient.ragEval.startEvaluationTask.mutate({ id });
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async removeEvaluation(id: number): Promise<void> {
|
|
59
|
+
await lambdaClient.ragEval.removeEvaluation.mutate({ id });
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async checkEvaluationStatus(id: number): Promise<{ success: boolean }> {
|
|
63
|
+
return lambdaClient.ragEval.checkEvaluationStatus.query({ id });
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export const ragEvalService = new RAGEvalService();
|
package/src/services/upload.ts
CHANGED
|
@@ -11,11 +11,17 @@ export const UPLOAD_NETWORK_ERROR = 'NetWorkError';
|
|
|
11
11
|
class UploadService {
|
|
12
12
|
uploadWithProgress = async (
|
|
13
13
|
file: File,
|
|
14
|
-
|
|
14
|
+
{
|
|
15
|
+
onProgress,
|
|
16
|
+
directory,
|
|
17
|
+
}: {
|
|
18
|
+
directory?: string;
|
|
19
|
+
onProgress?: (status: FileUploadStatus, state: FileUploadState) => void;
|
|
20
|
+
},
|
|
15
21
|
): Promise<FileMetadata> => {
|
|
16
22
|
const xhr = new XMLHttpRequest();
|
|
17
23
|
|
|
18
|
-
const { preSignUrl, ...result } = await this.getSignedUploadUrl(file);
|
|
24
|
+
const { preSignUrl, ...result } = await this.getSignedUploadUrl(file, directory);
|
|
19
25
|
let startTime = Date.now();
|
|
20
26
|
xhr.upload.addEventListener('progress', (event) => {
|
|
21
27
|
if (event.lengthComputable) {
|
|
@@ -29,7 +35,7 @@ class UploadService {
|
|
|
29
35
|
// so make it as 99.9 and let users think it's still uploading
|
|
30
36
|
progress: progress === 100 ? 99.9 : progress,
|
|
31
37
|
restTime: (event.total - event.loaded) / speedInByte,
|
|
32
|
-
speed: speedInByte
|
|
38
|
+
speed: speedInByte,
|
|
33
39
|
});
|
|
34
40
|
}
|
|
35
41
|
});
|
|
@@ -41,7 +47,7 @@ class UploadService {
|
|
|
41
47
|
await new Promise((resolve, reject) => {
|
|
42
48
|
xhr.addEventListener('load', () => {
|
|
43
49
|
if (xhr.status >= 200 && xhr.status < 300) {
|
|
44
|
-
onProgress('success', {
|
|
50
|
+
onProgress?.('success', {
|
|
45
51
|
progress: 100,
|
|
46
52
|
restTime: 0,
|
|
47
53
|
speed: file.size / ((Date.now() - startTime) / 1000),
|
|
@@ -95,6 +101,7 @@ class UploadService {
|
|
|
95
101
|
|
|
96
102
|
private getSignedUploadUrl = async (
|
|
97
103
|
file: File,
|
|
104
|
+
directory?: string,
|
|
98
105
|
): Promise<
|
|
99
106
|
FileMetadata & {
|
|
100
107
|
preSignUrl: string;
|
|
@@ -104,7 +111,7 @@ class UploadService {
|
|
|
104
111
|
|
|
105
112
|
// 精确到以 h 为单位的 path
|
|
106
113
|
const date = (Date.now() / 1000 / 60 / 60).toFixed(0);
|
|
107
|
-
const dirname = `${fileEnv.NEXT_PUBLIC_S3_FILE_PATH}/${date}`;
|
|
114
|
+
const dirname = `${directory || fileEnv.NEXT_PUBLIC_S3_FILE_PATH}/${date}`;
|
|
108
115
|
const pathname = `${dirname}/${filename}`;
|
|
109
116
|
|
|
110
117
|
const preSignUrl = await edgeClient.upload.createS3PreSignedUrl.mutate({ pathname });
|