@lobehub/chat 1.127.1 → 1.127.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.127.2](https://github.com/lobehub/lobe-chat/compare/v1.127.1...v1.127.2)
6
+
7
+ <sup>Released on **2025-09-11**</sup>
8
+
9
+ #### 🐛 Bug Fixes
10
+
11
+ - **misc**: Delete files should delete chunks、embedings、fileChunk.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's fixed
19
+
20
+ - **misc**: Delete files should delete chunks、embedings、fileChunk, closes [#9196](https://github.com/lobehub/lobe-chat/issues/9196) ([4ee1d29](https://github.com/lobehub/lobe-chat/commit/4ee1d29))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ### [Version 1.127.1](https://github.com/lobehub/lobe-chat/compare/v1.127.0...v1.127.1)
6
31
 
7
32
  <sup>Released on **2025-09-11**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "fixes": [
5
+ "Delete files should delete chunks、embedings、fileChunk."
6
+ ]
7
+ },
8
+ "date": "2025-09-11",
9
+ "version": "1.127.2"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "fixes": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.127.1",
3
+ "version": "1.127.2",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -4,7 +4,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
4
4
 
5
5
  import { FilesTabs, SortType } from '@/types/files';
6
6
 
7
- import { files, globalFiles, knowledgeBaseFiles, knowledgeBases, users } from '../../schemas';
7
+ import { chunks, embeddings, fileChunks, files, globalFiles, knowledgeBaseFiles, knowledgeBases, users } from '../../schemas';
8
8
  import { LobeChatDatabase } from '../../type';
9
9
  import { FileModel } from '../file';
10
10
  import { getTestDB } from './_util';
@@ -1058,15 +1058,8 @@ describe('FileModel', () => {
1058
1058
  // Create many chunks for this file to trigger batch processing
1059
1059
  // Note: This is a simplified test since we can't easily create 3000+ chunks
1060
1060
  // But it will still exercise the batch deletion code path
1061
- const chunkData = Array.from({ length: 10 }, (_, i) => ({
1062
- id: `chunk-${i}`,
1063
- text: `chunk content ${i}`,
1064
- index: i,
1065
- type: 'text' as const,
1066
- userId,
1067
- }));
1068
1061
 
1069
- // Insert chunks (this might need to be done through proper API)
1062
+ // Insert chunks (this might need to be done through proper API)
1070
1063
  // For testing purposes, we'll delete the file which should trigger the batch deletion
1071
1064
  await fileModel.delete(fileId, true);
1072
1065
 
@@ -1077,4 +1070,205 @@ describe('FileModel', () => {
1077
1070
  expect(deletedFile).toBeUndefined();
1078
1071
  });
1079
1072
  });
1073
+
1074
+ describe('deleteFileChunks error handling', () => {
1075
+ let consoleWarnSpy: any;
1076
+
1077
+ beforeEach(() => {
1078
+ consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
1079
+ });
1080
+
1081
+ afterEach(() => {
1082
+ consoleWarnSpy.mockRestore();
1083
+ });
1084
+
1085
+ it('should delete file even when chunks deletion fails', async () => {
1086
+ // 创建测试文件
1087
+ const testFile = {
1088
+ name: 'error-test-file.txt',
1089
+ url: 'https://example.com/error-test-file.txt',
1090
+ size: 100,
1091
+ fileType: 'text/plain',
1092
+ fileHash: 'error-test-hash',
1093
+ };
1094
+
1095
+ const { id: fileId } = await fileModel.create(testFile, true);
1096
+
1097
+ // 创建一些测试数据来模拟chunks关联
1098
+ const chunkId1 = '550e8400-e29b-41d4-a716-446655440001';
1099
+ const chunkId2 = '550e8400-e29b-41d4-a716-446655440002';
1100
+
1101
+ // 插入chunks
1102
+ await serverDB.insert(chunks).values([
1103
+ { id: chunkId1, text: 'chunk 1', userId, type: 'text' },
1104
+ { id: chunkId2, text: 'chunk 2', userId, type: 'text' },
1105
+ ]);
1106
+
1107
+ // 插入fileChunks关联
1108
+ await serverDB.insert(fileChunks).values([
1109
+ { fileId, chunkId: chunkId1, userId },
1110
+ { fileId, chunkId: chunkId2, userId },
1111
+ ]);
1112
+
1113
+ // 插入embeddings (1024维向量)
1114
+ const testEmbedding = new Array(1024).fill(0.1);
1115
+ await serverDB.insert(embeddings).values([
1116
+ { chunkId: chunkId1, embeddings: testEmbedding, model: 'test-model', userId },
1117
+ ]);
1118
+
1119
+ // 跳过 documentChunks 测试,因为需要先创建 documents 记录
1120
+
1121
+ // 删除文件,应该会清理所有相关数据
1122
+ const result = await fileModel.delete(fileId, true);
1123
+
1124
+ // 验证文件被删除
1125
+ const deletedFile = await serverDB.query.files.findFirst({
1126
+ where: eq(files.id, fileId),
1127
+ });
1128
+ expect(deletedFile).toBeUndefined();
1129
+
1130
+ // 验证chunks被删除
1131
+ const remainingChunks = await serverDB.query.chunks.findMany({
1132
+ where: inArray(chunks.id, [chunkId1, chunkId2]),
1133
+ });
1134
+ expect(remainingChunks).toHaveLength(0);
1135
+
1136
+ // 验证embeddings被删除
1137
+ const remainingEmbeddings = await serverDB.query.embeddings.findMany({
1138
+ where: inArray(embeddings.chunkId, [chunkId1, chunkId2]),
1139
+ });
1140
+ expect(remainingEmbeddings).toHaveLength(0);
1141
+
1142
+ // 验证fileChunks被删除
1143
+ const remainingFileChunks = await serverDB.query.fileChunks.findMany({
1144
+ where: eq(fileChunks.fileId, fileId),
1145
+ });
1146
+ expect(remainingFileChunks).toHaveLength(0);
1147
+
1148
+ expect(result).toBeDefined();
1149
+ });
1150
+
1151
+ it('should successfully delete file with all related chunks and embeddings', async () => {
1152
+ // 简化测试:只验证正常的完整删除流程(移除知识库保护后)
1153
+ const testFile = {
1154
+ name: 'complete-deletion-test.txt',
1155
+ url: 'https://example.com/complete-deletion-test.txt',
1156
+ size: 100,
1157
+ fileType: 'text/plain',
1158
+ fileHash: 'complete-deletion-hash',
1159
+ };
1160
+
1161
+ const { id: fileId } = await fileModel.create(testFile, true);
1162
+
1163
+ const chunkId = '550e8400-e29b-41d4-a716-446655440003';
1164
+
1165
+ // 插入chunk
1166
+ await serverDB.insert(chunks).values([
1167
+ { id: chunkId, text: 'complete test chunk', userId, type: 'text' },
1168
+ ]);
1169
+
1170
+ // 插入fileChunks关联
1171
+ await serverDB.insert(fileChunks).values([
1172
+ { fileId, chunkId, userId },
1173
+ ]);
1174
+
1175
+ // 插入embeddings
1176
+ const testEmbedding = new Array(1024).fill(0.1);
1177
+ await serverDB.insert(embeddings).values([
1178
+ { chunkId, embeddings: testEmbedding, model: 'test-model', userId },
1179
+ ]);
1180
+
1181
+ // 删除文件
1182
+ await fileModel.delete(fileId, true);
1183
+
1184
+ // 验证文件被删除
1185
+ const deletedFile = await serverDB.query.files.findFirst({
1186
+ where: eq(files.id, fileId),
1187
+ });
1188
+ expect(deletedFile).toBeUndefined();
1189
+
1190
+ // 验证chunks被删除
1191
+ const remainingChunks = await serverDB.query.chunks.findMany({
1192
+ where: eq(chunks.id, chunkId),
1193
+ });
1194
+ expect(remainingChunks).toHaveLength(0);
1195
+
1196
+ // 验证embeddings被删除
1197
+ const remainingEmbeddings = await serverDB.query.embeddings.findMany({
1198
+ where: eq(embeddings.chunkId, chunkId),
1199
+ });
1200
+ expect(remainingEmbeddings).toHaveLength(0);
1201
+
1202
+ // 验证fileChunks被删除
1203
+ const remainingFileChunks = await serverDB.query.fileChunks.findMany({
1204
+ where: eq(fileChunks.fileId, fileId),
1205
+ });
1206
+ expect(remainingFileChunks).toHaveLength(0);
1207
+ });
1208
+
1209
+
1210
+ it('should delete files that are in knowledge bases (removed protection)', async () => {
1211
+ // 测试修复后的逻辑:知识库中的文件也应该被删除
1212
+ const testFile = {
1213
+ name: 'knowledge-base-file.txt',
1214
+ url: 'https://example.com/knowledge-base-file.txt',
1215
+ size: 100,
1216
+ fileType: 'text/plain',
1217
+ fileHash: 'kb-file-hash',
1218
+ knowledgeBaseId: 'kb1',
1219
+ };
1220
+
1221
+ const { id: fileId } = await fileModel.create(testFile, true);
1222
+
1223
+ const chunkId = '550e8400-e29b-41d4-a716-446655440007';
1224
+
1225
+ // 插入chunk和关联数据
1226
+ await serverDB.insert(chunks).values([
1227
+ { id: chunkId, text: 'knowledge base chunk', userId, type: 'text' },
1228
+ ]);
1229
+
1230
+ await serverDB.insert(fileChunks).values([
1231
+ { fileId, chunkId, userId },
1232
+ ]);
1233
+
1234
+ // 插入embeddings (1024维向量)
1235
+ const testEmbedding = new Array(1024).fill(0.1);
1236
+ await serverDB.insert(embeddings).values([
1237
+ { chunkId, embeddings: testEmbedding, model: 'test-model', userId },
1238
+ ]);
1239
+
1240
+ // 验证文件确实在知识库中
1241
+ const kbFile = await serverDB.query.knowledgeBaseFiles.findFirst({
1242
+ where: eq(knowledgeBaseFiles.fileId, fileId),
1243
+ });
1244
+ expect(kbFile).toBeDefined();
1245
+
1246
+ // 删除文件
1247
+ await fileModel.delete(fileId, true);
1248
+
1249
+ // 验证知识库中的文件也被完全删除
1250
+ const deletedFile = await serverDB.query.files.findFirst({
1251
+ where: eq(files.id, fileId),
1252
+ });
1253
+ expect(deletedFile).toBeUndefined();
1254
+
1255
+ // 验证chunks被删除(这是修复的核心:之前知识库文件的chunks不会被删除)
1256
+ const remainingChunks = await serverDB.query.chunks.findMany({
1257
+ where: eq(chunks.id, chunkId),
1258
+ });
1259
+ expect(remainingChunks).toHaveLength(0);
1260
+
1261
+ // 验证embeddings被删除
1262
+ const remainingEmbeddings = await serverDB.query.embeddings.findMany({
1263
+ where: eq(embeddings.chunkId, chunkId),
1264
+ });
1265
+ expect(remainingEmbeddings).toHaveLength(0);
1266
+
1267
+ // 验证fileChunks被删除
1268
+ const remainingFileChunks = await serverDB.query.fileChunks.findMany({
1269
+ where: eq(fileChunks.fileId, fileId),
1270
+ });
1271
+ expect(remainingFileChunks).toHaveLength(0);
1272
+ });
1273
+ });
1080
1274
  });
@@ -1,7 +1,6 @@
1
1
  import { and, asc, count, desc, eq, ilike, inArray, like, notExists, or, sum } from 'drizzle-orm';
2
2
  import type { PgTransaction } from 'drizzle-orm/pg-core';
3
3
 
4
- import { LobeChatDatabase, Transaction } from '../type';
5
4
  import { FilesTabs, QueryFileListParams, SortType } from '@/types/files';
6
5
 
7
6
  import {
@@ -9,12 +8,14 @@ import {
9
8
  NewFile,
10
9
  NewGlobalFile,
11
10
  chunks,
11
+ documentChunks,
12
12
  embeddings,
13
13
  fileChunks,
14
14
  files,
15
15
  globalFiles,
16
16
  knowledgeBaseFiles,
17
17
  } from '../schemas';
18
+ import { LobeChatDatabase, Transaction } from '../type';
18
19
 
19
20
  export class FileModel {
20
21
  private readonly userId: string;
@@ -326,30 +327,19 @@ export class FileModel {
326
327
  private deleteFileChunks = async (trx: PgTransaction<any>, fileIds: string[]) => {
327
328
  if (fileIds.length === 0) return;
328
329
 
329
- // 直接使用 JOIN 优化查询,减少数据传输量
330
+ // 获取要删除的文件相关的所有 chunk IDs(移除知识库保护逻辑)
330
331
  const relatedChunks = await trx
331
332
  .select({ chunkId: fileChunks.chunkId })
332
333
  .from(fileChunks)
333
- .where(
334
- and(
335
- inArray(fileChunks.fileId, fileIds),
336
- // 确保只查询有效的 chunkId
337
- notExists(
338
- trx
339
- .select()
340
- .from(knowledgeBaseFiles)
341
- .where(eq(knowledgeBaseFiles.fileId, fileChunks.fileId)),
342
- ),
343
- ),
344
- );
334
+ .where(inArray(fileChunks.fileId, fileIds));
345
335
 
346
336
  const chunkIds = relatedChunks.map((c) => c.chunkId).filter(Boolean) as string[];
347
337
 
348
338
  if (chunkIds.length === 0) return;
349
339
 
350
340
  // 批量处理配置
351
- const BATCH_SIZE = 1000; // 增加批处理量
352
- const MAX_CONCURRENT_BATCHES = 3; // 最大并行批次数
341
+ const BATCH_SIZE = 1000;
342
+ const MAX_CONCURRENT_BATCHES = 3;
353
343
 
354
344
  // 分批并行处理
355
345
  for (let i = 0; i < chunkIds.length; i += BATCH_SIZE * MAX_CONCURRENT_BATCHES) {
@@ -363,12 +353,31 @@ export class FileModel {
363
353
  const batchChunkIds = chunkIds.slice(startIdx, startIdx + BATCH_SIZE);
364
354
  if (batchChunkIds.length === 0) continue;
365
355
 
366
- // 为每个批次创建一个删除任务
356
+ // 按正确的删除顺序处理每个批次,失败不阻止流程
367
357
  const batchPromise = (async () => {
368
- // 先删除嵌入向量
369
- await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds));
370
- // 再删除块
371
- await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds));
358
+ // 1. 删除 embeddings (最顶层,有外键依赖)
359
+ try {
360
+ await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds));
361
+ } catch (e) {
362
+ // 静默处理,不阻止删除流程
363
+ console.warn('Failed to delete embeddings:', e);
364
+ }
365
+
366
+ // 2. 删除 documentChunks 关联 (如果存在)
367
+ try {
368
+ await trx.delete(documentChunks).where(inArray(documentChunks.chunkId, batchChunkIds));
369
+ } catch (e) {
370
+ // 静默处理,不阻止删除流程
371
+ console.warn('Failed to delete documentChunks:', e);
372
+ }
373
+
374
+ // 3. 删除 chunks (核心数据)
375
+ try {
376
+ await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds));
377
+ } catch (e) {
378
+ // 静默处理,不阻止删除流程
379
+ console.warn('Failed to delete chunks:', e);
380
+ }
372
381
  })();
373
382
 
374
383
  batchPromises.push(batchPromise);
@@ -378,6 +387,14 @@ export class FileModel {
378
387
  await Promise.all(batchPromises);
379
388
  }
380
389
 
390
+ // 4. 最后删除 fileChunks 关联表记录
391
+ try {
392
+ await trx.delete(fileChunks).where(inArray(fileChunks.fileId, fileIds));
393
+ } catch (e) {
394
+ // 静默处理,不阻止删除流程
395
+ console.warn('Failed to delete fileChunks:', e);
396
+ }
397
+
381
398
  return chunkIds;
382
399
  };
383
400
  }