@shenghuabi/knowledge 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/embedding/embedding.service.d.ts +4 -0
  2. package/embedding/index.d.ts +2 -0
  3. package/embedding/type.d.ts +12 -0
  4. package/embedding.mjs +61 -0
  5. package/embedding.mjs.map +7 -0
  6. package/file-parser/const.d.ts +20 -0
  7. package/file-parser/dict/dict-format/dsl/dsl-parse.service.d.ts +6 -0
  8. package/file-parser/dict/dict-format/dsl/dsl.format.d.ts +1 -0
  9. package/file-parser/dict/dict-format/mdict-parse.service.d.ts +20 -0
  10. package/file-parser/dict/dict-format/stardict-parse.service.d.ts +12 -0
  11. package/file-parser/dict/dict-format/yaml-parse.service.d.ts +25 -0
  12. package/file-parser/dict/dict.service.d.ts +10 -0
  13. package/file-parser/dict/index.d.ts +2 -0
  14. package/file-parser/dict/type.d.ts +24 -0
  15. package/file-parser/document-file-parser.service.d.ts +15 -0
  16. package/file-parser/document-loader/pdf-img.loader.d.ts +8 -0
  17. package/file-parser/document-loader/xlsx.loader.d.ts +6 -0
  18. package/file-parser/file-parser.service.d.ts +13 -0
  19. package/file-parser/index.d.ts +6 -0
  20. package/file-parser/text-analyse.d.ts +1 -0
  21. package/file-parser/text-parser.d.ts +3 -0
  22. package/file-parser/vl-parser/markdown.parser.d.ts +8 -0
  23. package/file-parser.mjs +850 -0
  24. package/file-parser.mjs.map +7 -0
  25. package/image/convert.d.ts +25 -0
  26. package/image/extract.d.ts +2 -0
  27. package/image/image-metadata.d.ts +2 -0
  28. package/image/index.d.ts +3 -0
  29. package/image.mjs +134 -0
  30. package/image.mjs.map +7 -0
  31. package/knowledge/article/article.knowledge.service.d.ts +53 -0
  32. package/knowledge/article/define/config.d.ts +60 -0
  33. package/knowledge/article/define/index.d.ts +2 -0
  34. package/knowledge/article/define/payload.d.ts +16 -0
  35. package/knowledge/article/index.d.ts +2 -0
  36. package/knowledge/common/common.knowledge.service.d.ts +240 -0
  37. package/knowledge/common/define/base.d.ts +7 -0
  38. package/knowledge/common/define/chunk.d.ts +14 -0
  39. package/knowledge/common/define/embedding.d.ts +5 -0
  40. package/knowledge/common/define/index.d.ts +3 -0
  41. package/knowledge/common/index.d.ts +1 -0
  42. package/knowledge/common/query.d.ts +7 -0
  43. package/knowledge/const.d.ts +95 -0
  44. package/knowledge/define/index.d.ts +245 -0
  45. package/knowledge/dict/define/config.d.ts +68 -0
  46. package/knowledge/dict/define/index.d.ts +1 -0
  47. package/knowledge/dict/dict.knowledge.service.d.ts +67 -0
  48. package/knowledge/graph/const.d.ts +20 -0
  49. package/knowledge/graph/define/config.d.ts +169 -0
  50. package/knowledge/graph/define/define.d.ts +402 -0
  51. package/knowledge/graph/define/index.d.ts +2 -0
  52. package/knowledge/graph/define/query.d.ts +14 -0
  53. package/knowledge/graph/graph.handle.service.d.ts +28 -0
  54. package/knowledge/graph/graph.knowledge.service.d.ts +40 -0
  55. package/knowledge/graph/graph.local.service.d.ts +85 -0
  56. package/knowledge/graph/graph.query.service.d.ts +160 -0
  57. package/knowledge/graph/graph.service.d.ts +24 -0
  58. package/knowledge/graph/graph.util.service.d.ts +31 -0
  59. package/knowledge/graph/type.d.ts +11 -0
  60. package/knowledge/graph/util/format-attr.d.ts +48 -0
  61. package/knowledge/graph/util/graph-util.d.ts +5 -0
  62. package/knowledge/graph/util.d.ts +1 -0
  63. package/knowledge/graph/vecotr-format.d.ts +11 -0
  64. package/knowledge/index.d.ts +17 -0
  65. package/knowledge/knowledge.manager.service.d.ts +42 -0
  66. package/knowledge/knowledge.util.service.d.ts +21 -0
  67. package/knowledge/normal/define/config.d.ts +60 -0
  68. package/knowledge/normal/define/index.d.ts +1 -0
  69. package/knowledge/normal/normal.knowledge.service.d.ts +49 -0
  70. package/knowledge/template.format.d.ts +6 -0
  71. package/knowledge/type.d.ts +28 -0
  72. package/knowledge.mjs +2856 -0
  73. package/knowledge.mjs.map +7 -0
  74. package/ocr/FileUtils.d.ts +4 -0
  75. package/ocr/ImageRaw.d.ts +11 -0
  76. package/ocr/index.d.ts +2 -0
  77. package/ocr/model-config.d.ts +8 -0
  78. package/ocr/ocr.d.ts +29 -0
  79. package/ocr.mjs +351 -0
  80. package/ocr.mjs.map +7 -0
  81. package/package.json +105 -0
  82. package/qdrant/index.d.ts +3 -0
  83. package/qdrant/qdrant-client.service.d.ts +396 -0
  84. package/qdrant/qdrant-server.service.d.ts +21 -0
  85. package/qdrant/type.d.ts +18 -0
  86. package/qdrant/util.d.ts +1 -0
  87. package/qdrant.mjs +274 -0
  88. package/qdrant.mjs.map +7 -0
  89. package/util/batch-queue.d.ts +6 -0
  90. package/util/cache-queue.d.ts +10 -0
  91. package/util/clone.d.ts +1 -0
  92. package/util/embedding-queue.d.ts +3 -0
  93. package/util/get-hash.d.ts +2 -0
  94. package/util/html-to-text/index.d.ts +5 -0
  95. package/util/index.d.ts +10 -0
  96. package/util/is-truthy.d.ts +1 -0
  97. package/util/log.service.d.ts +6 -0
  98. package/util/promise.d.ts +5 -0
  99. package/util/type.d.ts +1 -0
  100. package/util/uniq-object-key.d.ts +1 -0
  101. package/util.mjs +219 -0
  102. package/util.mjs.map +7 -0
  103. package/worker/custom-cache.d.ts +28 -0
  104. package/worker/ocr/index.d.ts +17 -0
  105. package/worker/ocr.mjs +75 -0
  106. package/worker/ocr.mjs.map +7 -0
  107. package/worker/reranker.mjs +180 -0
  108. package/worker/reranker.mjs.map +7 -0
  109. package/worker/set-transformers-config.d.ts +19 -0
  110. package/worker/text2vec/index.d.ts +9 -0
  111. package/worker/text2vec.mjs +194 -0
  112. package/worker/text2vec.mjs.map +7 -0
package/knowledge.mjs ADDED
@@ -0,0 +1,2856 @@
1
+ // packages/knowledge/knowledge.manager.service.ts
2
+ import {
3
+ createInjector as createInjector2,
4
+ inject as inject12,
5
+ Injector as Injector4,
6
+ RootStaticInjectOptions as RootStaticInjectOptions2
7
+ } from "static-injector";
8
+
9
+ // packages/knowledge/const.ts
10
+ import { InjectionToken } from "static-injector";
11
+ function getGraphCollectionName(name) {
12
+ return `[图谱]${name}-定义`;
13
+ }
14
+ function getActivateCollectionName(name) {
15
+ return `${name}[激活]`;
16
+ }
17
+ var DICT_PREFIX = "[字典]-";
18
+ var Text2VecToken = new InjectionToken("text2vec");
19
+ var TextSplitterToken = new InjectionToken(
20
+ "TextSplitter"
21
+ );
22
+ var ConfigToken = new InjectionToken(
23
+ "config"
24
+ );
25
+ var GetConfigToken = new InjectionToken("getConfig");
26
+ var OCRToken = new InjectionToken("OCR");
27
+ var DirToken = new InjectionToken("dir");
28
+ var ReRankerToken = new InjectionToken("reranker");
29
+
30
+ // packages/knowledge/dict/dict.knowledge.service.ts
31
+ import { computed, inject as inject3 } from "static-injector";
32
+ import { QdrantClientService as QdrantClientService3 } from "@shenghuabi/knowledge/qdrant";
33
+ import { DictService } from "@shenghuabi/knowledge/file-parser";
34
+ import { v4 as v42 } from "uuid";
35
+ import { promise as fastq2 } from "fastq";
36
+ import {
37
+ BatchQueue,
38
+ CacheQueue as CacheQueue2,
39
+ html2Text,
40
+ isTruthy,
41
+ LogToken
42
+ } from "@shenghuabi/knowledge/util";
43
+
44
+ // packages/knowledge/template.format.ts
45
+ import hbs from "handlebars";
46
+ function interpolate(input, value) {
47
+ return hbs.compile(input, { noEscape: true, preventIndent: true })(value, {
48
+ allowProtoPropertiesByDefault: true
49
+ });
50
+ }
51
+ function entryFormat(payload, knowledge, defaultContent, embedingTemplate) {
52
+ return embedingTemplate?.enable && embedingTemplate?.value ? interpolate(embedingTemplate.value, {
53
+ ENTRY: { ...payload, knowledge }
54
+ }).trim() : defaultContent;
55
+ }
56
+
57
+ // packages/knowledge/dict/dict.knowledge.service.ts
58
+ import { createNormalizeVfs, path } from "@cyia/vfs2";
59
+
60
+ // packages/knowledge/knowledge.util.service.ts
61
+ import { inject, RootStaticInjectOptions } from "static-injector";
62
+ import { CacheQueue } from "@shenghuabi/knowledge/util";
63
+ import { promise as fastq } from "fastq";
64
+ import { QdrantClientService } from "@shenghuabi/knowledge/qdrant";
65
+ var KnowledgeUtilService = class extends RootStaticInjectOptions {
66
+ #qdClient = inject(QdrantClientService);
67
+ multiDelete(list, filter) {
68
+ return Promise.all(list.map((item) => this.#qdClient.delete(item, filter)));
69
+ }
70
+ updatePointsQueue(collectionName) {
71
+ return new CacheQueue(
72
+ fastq(
73
+ (list) => this.#qdClient.upsert(collectionName, {
74
+ wait: true,
75
+ points: list
76
+ }),
77
+ 8
78
+ ),
79
+ 20
80
+ );
81
+ }
82
+ async destroyKnowledge(list, vfs) {
83
+ await Promise.all(
84
+ list.map(
85
+ (collectionName) => this.#qdClient.deleteCollection(collectionName)
86
+ )
87
+ );
88
+ if (vfs && await vfs.exists("")) {
89
+ await vfs.rm("", { recursive: true, force: true });
90
+ }
91
+ }
92
+ export(list) {
93
+ return Promise.all(
94
+ list.map(
95
+ (collectionName) => this.#qdClient.createSnapshot(collectionName).then((result) => ({
96
+ ...result,
97
+ collection: collectionName
98
+ }))
99
+ )
100
+ );
101
+ }
102
+ };
103
+
104
+ // packages/knowledge/common/define/base.ts
105
+ import * as v from "valibot";
106
+ var BaseKnowledgeConfig = v.object({
107
+ /** 知识库名 */
108
+ name: v.string(),
109
+ activateCollection: v.string()
110
+ });
111
+
112
+ // packages/knowledge/normal/define/config.ts
113
+ import * as v3 from "valibot";
114
+
115
+ // packages/knowledge/common/define/embedding.ts
116
+ import * as v2 from "valibot";
117
+ var EmbeddingTemplateDefine = v2.object({
118
+ enable: v2.boolean(),
119
+ value: v2.optional(v2.string())
120
+ });
121
+
122
+ // packages/knowledge/normal/define/config.ts
123
+ var NormalCollectionDefine = v3.object({
124
+ collectionName: v3.string(),
125
+ embeddingTemplate: v3.optional(
126
+ v3.object({
127
+ entry: v3.optional(EmbeddingTemplateDefine)
128
+ })
129
+ ),
130
+ size: v3.number()
131
+ });
132
+ var NormalKnowledgeConfigDefine = v3.pipe(
133
+ v3.object({
134
+ ...BaseKnowledgeConfig.entries,
135
+ type: v3.optional(v3.literal("normal"), "normal"),
136
+ collectionList: v3.array(NormalCollectionDefine)
137
+ }),
138
+ v3.transform((item) => ({
139
+ ...item,
140
+ /** 激活的普通知识库(文件切片) */
141
+ activateName: getActivateCollectionName(item.name)
142
+ }))
143
+ );
144
+
145
+ // packages/knowledge/dict/define/config.ts
146
+ import * as v4 from "valibot";
147
+ var DictCollectionDefine = NormalCollectionDefine;
148
+ var DictKnowledgeConfigDefine = v4.pipe(
149
+ v4.object({
150
+ ...BaseKnowledgeConfig.entries,
151
+ type: v4.optional(v4.literal("dict"), "dict"),
152
+ collectionList: v4.array(DictCollectionDefine),
153
+ /** 词条提取 */
154
+ extractorWord: v4.optional(v4.boolean()),
155
+ /** 图像识别 */
156
+ useOcr: v4.optional(v4.boolean())
157
+ }),
158
+ v4.transform((item) => ({
159
+ ...item,
160
+ /** 激活的普通知识库(文件切片) */
161
+ activateName: getActivateCollectionName(item.name)
162
+ }))
163
+ );
164
+
165
+ // packages/knowledge/dict/dict.knowledge.service.ts
166
+ import * as v5 from "valibot";
167
+
168
+ // packages/knowledge/common/common.knowledge.service.ts
169
+ import { inject as inject2 } from "static-injector";
170
+ import { QdrantClientService as QdrantClientService2 } from "@shenghuabi/knowledge/qdrant";
171
+ var CommonKnowledgeService = class {
172
+ #qdClient = inject2(QdrantClientService2);
173
+ #config = inject2(
174
+ ConfigToken
175
+ );
176
+ #text2vec = inject2(Text2VecToken);
177
+ #reranker = inject2(ReRankerToken);
178
+ async searchChunk(text, filter, options) {
179
+ const queryResult = await this.#qdClient.search(
180
+ this.#config().activateName,
181
+ {
182
+ limit: options?.limit ? options.limit * this.#reranker.getQueryRatio() : void 0,
183
+ filter,
184
+ with_payload: true,
185
+ with_vector: false,
186
+ score_threshold: options?.score,
187
+ offset: options?.offset,
188
+ vector: {
189
+ name: "chunk",
190
+ vector: await this.#text2vec(text, this.#config().activateCollection)
191
+ }
192
+ }
193
+ );
194
+ const resultList = await this.#reranker.run({
195
+ value: text,
196
+ docs: queryResult.map(
197
+ (item) => item.payload?.["embeddingChunk"]
198
+ )
199
+ });
200
+ return resultList.slice(0, options?.limit).map(({ index }) => queryResult[index]);
201
+ }
202
+ getCollection() {
203
+ return this.#qdClient.getCollection(this.#config().activateName);
204
+ }
205
+ };
206
+
207
+ // packages/knowledge/dict/dict.knowledge.service.ts
208
+ var DictKnowledgeService = class extends CommonKnowledgeService {
209
+ #text2vec = inject3(Text2VecToken);
210
+ #reranker = inject3(ReRankerToken);
211
+ #textSplitter = inject3(TextSplitterToken);
212
+ #ocr = inject3(OCRToken, { optional: true }) || void 0;
213
+ #config = inject3(ConfigToken);
214
+ #vfs = computed(() => createNormalizeVfs({ dir: this.#dir() }));
215
+ #qdClient = inject3(QdrantClientService3);
216
+ #dict = inject3(DictService);
217
+ #util = inject3(KnowledgeUtilService);
218
+ #dir = inject3(DirToken);
219
+ #log = inject3(LogToken);
220
+ formatCollection(input) {
221
+ return v5.parse(DictCollectionDefine, input);
222
+ }
223
+ async #createCollection(collection) {
224
+ const { exists } = await this.#qdClient.collectionExists(
225
+ collection.collectionName
226
+ );
227
+ if (exists) {
228
+ throw new Error(`集合${collection.collectionName}已存在`);
229
+ }
230
+ await this.#qdClient.createCollection(collection.collectionName, {
231
+ vectors: {
232
+ chunk: {
233
+ size: collection.size,
234
+ distance: "Cosine",
235
+ on_disk: true
236
+ },
237
+ word: {
238
+ size: collection.size,
239
+ distance: "Cosine",
240
+ on_disk: true
241
+ }
242
+ }
243
+ });
244
+ await this.#qdClient.createPayloadKeywordIndex(
245
+ collection.collectionName,
246
+ "word"
247
+ );
248
+ await this.#qdClient.createPayloadKeywordIndex(
249
+ collection.collectionName,
250
+ "chunk"
251
+ );
252
+ await this.#qdClient.createPayloadKeywordIndex(
253
+ collection.collectionName,
254
+ "content"
255
+ );
256
+ }
257
+ /** 只创建这一次,剩下的都是插入或更新 */
258
+ async create(collection) {
259
+ await this.#createCollection(collection);
260
+ await this.#qdClient.setActivateCollection(
261
+ this.#config().name,
262
+ this.#config().activateName
263
+ );
264
+ }
265
+ async #getImportQueue(collection, assetHandle) {
266
+ const assetFolder = path.join(this.#dir(), "assets");
267
+ const contentBatchQueue = new BatchQueue(
268
+ (str) => this.#text2vec(str, collection.collectionName)
269
+ );
270
+ let sum = 0;
271
+ const updateCacheQueue = this.#util.updatePointsQueue(collection.collectionName);
272
+ let updateError;
273
+ updateCacheQueue.queue.error((error) => {
274
+ if (error) {
275
+ updateError = error;
276
+ updateCacheQueue.queue.killAndDrain();
277
+ }
278
+ });
279
+ const importCacheQueue = new CacheQueue2(
280
+ fastq2(async (wordList) => {
281
+ const preMergeList = (await Promise.all(
282
+ wordList.map(async (entryItem) => {
283
+ if (assetHandle) {
284
+ if (this.#config().extractorWord) {
285
+ this.#vfs().write(entryItem.word, entryItem.content);
286
+ }
287
+ }
288
+ const formatedContent = assetHandle ? await this.#formatContent(entryItem, assetFolder, {
289
+ useOcr: this.#config().useOcr,
290
+ refReplace: true
291
+ }) : entryItem.formatedContent || entryItem.content;
292
+ const splitContentList = (await this.#textSplitter(
293
+ formatedContent,
294
+ {
295
+ ...entryItem,
296
+ formatedContent
297
+ },
298
+ collection.collectionName
299
+ )).filter((item) => !!item.pageContent.trim());
300
+ if (!splitContentList.length) {
301
+ return void 0;
302
+ }
303
+ return {
304
+ word: entryItem.word,
305
+ contentList: splitContentList.map((item) => {
306
+ const pageContent = item.pageContent.trim();
307
+ const metadata = {
308
+ ...item.metadata,
309
+ chunk: pageContent
310
+ };
311
+ const embeddingChunk = entryFormat(
312
+ metadata,
313
+ this.#config().name,
314
+ `${metadata["word"]}
315
+ ${pageContent}`,
316
+ collection.embeddingTemplate?.entry
317
+ );
318
+ if (!embeddingChunk) {
319
+ this.#log.warn(
320
+ `内容格式化后内容为空,跳过
321
+ ${JSON.stringify({ payload: metadata, knowledge: this.#config().name, pageContent })}`
322
+ );
323
+ return;
324
+ }
325
+ metadata["embeddingChunk"] = embeddingChunk;
326
+ return {
327
+ chunk: embeddingChunk,
328
+ metadata
329
+ };
330
+ }).filter(isTruthy)
331
+ };
332
+ })
333
+ )).filter(isTruthy).filter((item) => !!item.contentList.length);
334
+ if (!preMergeList.length) {
335
+ return;
336
+ }
337
+ const [wordVecResult, contentVecResult] = await Promise.all([
338
+ this.#text2vec(
339
+ preMergeList.map((item) => item.word),
340
+ collection.collectionName
341
+ ),
342
+ contentBatchQueue.then(
343
+ Promise.all(
344
+ preMergeList.flatMap(
345
+ (item) => item.contentList.map(
346
+ (item2) => contentBatchQueue.push(item2.chunk)
347
+ )
348
+ )
349
+ )
350
+ )
351
+ ]);
352
+ let startIndex = 0;
353
+ for (let i = 0; i < preMergeList.length; i++) {
354
+ const preData = preMergeList[i];
355
+ const wordVector = wordVecResult[i];
356
+ const endIndex = startIndex + preData.contentList.length;
357
+ const extResult = contentVecResult.slice(startIndex, endIndex);
358
+ startIndex = endIndex;
359
+ extResult.forEach((item, j) => {
360
+ updateCacheQueue.push({
361
+ id: v42(),
362
+ vector: {
363
+ word: wordVector,
364
+ chunk: extResult[j]
365
+ },
366
+ payload: preData.contentList[j].metadata
367
+ });
368
+ });
369
+ }
370
+ sum += wordList.length;
371
+ this.#log.info(`已导入 ${sum} 条`);
372
+ }, 2),
373
+ 256
374
+ );
375
+ importCacheQueue.queue.error((error) => {
376
+ if (error) {
377
+ updateError = error;
378
+ importCacheQueue.queue.killAndDrain();
379
+ }
380
+ });
381
+ return {
382
+ importQueue: importCacheQueue,
383
+ getSum: () => sum,
384
+ updateQueue: updateCacheQueue,
385
+ getError() {
386
+ return updateError;
387
+ }
388
+ };
389
+ }
390
+ async importDict(input) {
391
+ const collection = this.#config().collectionList[0];
392
+ return this.#dict.importDict(this.#config().name, this.#dir(), input).then(async (generator) => {
393
+ const { importQueue, getSum, updateQueue, getError } = await this.#getImportQueue(collection, true);
394
+ for await (const item of generator) {
395
+ importQueue.push(item);
396
+ }
397
+ importQueue.complete();
398
+ await importQueue.queue.drained();
399
+ updateQueue.complete();
400
+ await updateQueue.queue.drained();
401
+ const error = getError();
402
+ if (error) {
403
+ throw error;
404
+ }
405
+ return getSum();
406
+ }).catch(async (rej) => {
407
+ this.#log.error(`导入失败`, rej);
408
+ await this.destroy();
409
+ throw rej;
410
+ });
411
+ }
412
+ async #formatContent(wordItem, assetFolder, options) {
413
+ const result = await html2Text(wordItem.htmlContent ?? wordItem.content, {
414
+ useOcr: options.useOcr,
415
+ ocrFn: this.#ocr,
416
+ assetFolder
417
+ });
418
+ return options.refReplace ? result.replaceAll(`~`, wordItem.word) : result;
419
+ }
420
+ async addCollection(collection) {
421
+ await this.#createCollection(collection);
422
+ const { importQueue, updateQueue, getError } = await this.#getImportQueue(
423
+ collection,
424
+ false
425
+ );
426
+ try {
427
+ let offset;
428
+ const activateCollectionName = this.#config().activateName;
429
+ const wordSet = /* @__PURE__ */ new Set();
430
+ do {
431
+ const { points, next_page_offset } = await this.#qdClient.scroll(
432
+ activateCollectionName,
433
+ {
434
+ limit: 5e3,
435
+ with_payload: true,
436
+ offset
437
+ }
438
+ );
439
+ for (const point of points) {
440
+ const id = `${point.payload["word"]}|${point.payload["formatedContent"] || point.payload["content"]}`;
441
+ if (wordSet.has(id)) {
442
+ continue;
443
+ }
444
+ wordSet.add(id);
445
+ importQueue.push(point.payload);
446
+ }
447
+ offset = next_page_offset;
448
+ } while (offset);
449
+ importQueue.complete();
450
+ await importQueue.queue.drained();
451
+ updateQueue.complete();
452
+ await updateQueue.queue.drained();
453
+ const error = getError();
454
+ if (error) {
455
+ throw error;
456
+ }
457
+ } catch (error) {
458
+ await this.#qdClient.deleteCollection(collection.collectionName);
459
+ throw error;
460
+ }
461
+ await this.#qdClient.setActivateCollection(
462
+ collection.collectionName,
463
+ this.#config().activateName
464
+ );
465
+ }
466
+ async deleteCollection(collectionName) {
467
+ const isActivate = this.#config().activateCollection === collectionName;
468
+ if (isActivate) {
469
+ return false;
470
+ }
471
+ const collection = this.#config().collectionList.find(
472
+ (item) => item.collectionName === collectionName
473
+ );
474
+ if (!collection) {
475
+ return false;
476
+ }
477
+ await this.#qdClient.deleteCollection(collectionName);
478
+ return true;
479
+ }
480
+ async changeActivateCollection(collectionName) {
481
+ await this.#qdClient.setActivateCollection(
482
+ collectionName,
483
+ this.#config().activateName
484
+ );
485
+ }
486
+ export() {
487
+ return this.#util.export(
488
+ this.#config().collectionList.map(({ collectionName }) => collectionName)
489
+ );
490
+ }
491
+ async destroy() {
492
+ return this.#util.destroyKnowledge(
493
+ this.#config().collectionList.map(({ collectionName }) => collectionName),
494
+ this.#vfs()
495
+ );
496
+ }
497
+ async searchWord(text, options) {
498
+ const queryResult = await this.#qdClient.search(
499
+ this.#config().activateName,
500
+ {
501
+ limit: options.limit ? options.limit * this.#reranker.getQueryRatio() : void 0,
502
+ with_payload: true,
503
+ with_vector: false,
504
+ score_threshold: options.score,
505
+ offset: options.offset,
506
+ vector: {
507
+ name: "word",
508
+ vector: await this.#text2vec(text, this.#config().activateCollection)
509
+ }
510
+ }
511
+ );
512
+ const resultList = await this.#reranker.run({
513
+ value: text,
514
+ docs: queryResult.map((item) => item.payload?.["word"])
515
+ });
516
+ return resultList.slice(0, options?.limit).map(({ index }) => queryResult[index]);
517
+ }
518
+ /** 当普通数据库用 */
519
+ matchWord(text, options) {
520
+ return this.#qdClient.scroll(this.#config().activateName, {
521
+ limit: options.limit,
522
+ filter: {
523
+ must: {
524
+ key: "word",
525
+ match: {
526
+ value: text
527
+ }
528
+ }
529
+ },
530
+ with_payload: true,
531
+ with_vector: false
532
+ });
533
+ }
534
+ };
535
+
536
+ // packages/knowledge/graph/graph.knowledge.service.ts
537
+ import { inject as inject7, Injector } from "static-injector";
538
+ import { QdrantClientService as QdrantClientService7 } from "@shenghuabi/knowledge/qdrant";
539
+ import { LogToken as LogToken4 } from "@shenghuabi/knowledge/util";
540
+ import { promise as fastq4 } from "fastq";
541
+
542
+ // packages/knowledge/graph/const.ts
543
+ import { InjectionToken as InjectionToken2 } from "static-injector";
544
+ var ContentParserToken = new InjectionToken2(
545
+ "ContentParser"
546
+ );
547
+ var QueryParamsToken = new InjectionToken2("QueryParams");
548
+ var RagChatToken = new InjectionToken2("RagChat");
549
+ var CHAT_INPUT = `$$INPUT$$`;
550
+
551
+ // packages/knowledge/graph/vecotr-format.ts
552
+ function edgeVectorString(options) {
553
+ return `${options.keywords?.join(",") ?? ""},${options.source},${options.target},${options.description}`;
554
+ }
555
+ function nodeVectorString(options) {
556
+ return `${options.name},${options.description}`;
557
+ }
558
+
559
+ // packages/knowledge/graph/graph.knowledge.service.ts
560
+ import { v4 as v44 } from "uuid";
561
+
562
+ // packages/knowledge/normal/normal.knowledge.service.ts
563
+ import { QdrantClientService as QdrantClientService4 } from "@shenghuabi/knowledge/qdrant";
564
+ import { computed as computed2, inject as inject4 } from "static-injector";
565
+ import { createNormalizeVfs as createNormalizeVfs2 } from "@cyia/vfs2";
566
+ import { promise as fastq3 } from "fastq";
567
+ import { getHash, isTruthy as isTruthy2, runInEmbeddingContext } from "@shenghuabi/knowledge/util";
568
+ import { LogToken as LogToken2 } from "@shenghuabi/knowledge/util";
569
+ import * as v6 from "valibot";
570
+ var NormalKnowledgeService = class extends CommonKnowledgeService {
571
+ #text2vec = inject4(Text2VecToken);
572
+ #textSplitter = inject4(TextSplitterToken);
573
+ #config = inject4(ConfigToken);
574
+ #util = inject4(KnowledgeUtilService);
575
+ #qdClient = inject4(QdrantClientService4);
576
+ #channel = inject4(LogToken2);
577
+ #vfs = computed2(() => createNormalizeVfs2({ dir: this.#dir() }));
578
+ #dir = inject4(DirToken);
579
+ KeyWordIndex = ["fileName"];
580
+ getPayload(fileName, content) {
581
+ return { fileName };
582
+ }
583
+ formatCollection(input) {
584
+ return v6.parse(NormalCollectionDefine, input);
585
+ }
586
+ async createCollection(collection) {
587
+ this.#channel.info(
588
+ `创建集合:${this.#config().name};嵌入长度:${collection.size}`
589
+ );
590
+ const { exists } = await this.#qdClient.collectionExists(
591
+ collection.collectionName
592
+ );
593
+ if (exists) {
594
+ throw new Error(`集合${collection.collectionName}已存在`);
595
+ }
596
+ await this.#qdClient.createCollection(collection.collectionName, {
597
+ vectors: {
598
+ chunk: {
599
+ size: collection.size,
600
+ distance: "Cosine",
601
+ on_disk: true
602
+ }
603
+ }
604
+ });
605
+ this.#channel.info(`创建索引`);
606
+ for (const keyword of this.KeyWordIndex) {
607
+ await this.#qdClient.createPayloadKeywordIndex(
608
+ collection.collectionName,
609
+ keyword
610
+ );
611
+ }
612
+ }
613
+ /** 创建知识库 */
614
+ async create(collection) {
615
+ this.#channel.info(`准备创建知识库:${this.#config().name}`);
616
+ await this.createCollection(collection);
617
+ await this.#qdClient.setActivateCollection(
618
+ collection.collectionName,
619
+ this.#config().activateName
620
+ );
621
+ this.#channel.info(`创建完成:${this.#config().name}`);
622
+ }
623
+ async insertItemOnly(fileName, content, collectionList) {
624
+ return await runInEmbeddingContext(
625
+ (t2v) => Promise.all(
626
+ collectionList.map(async (collectionItem) => {
627
+ const chunkList = (await this.#textSplitter(
628
+ content,
629
+ this.getPayload(fileName, content),
630
+ collectionItem.collectionName
631
+ )).filter((item) => !!item.pageContent.trim());
632
+ if (!chunkList.length) {
633
+ return;
634
+ }
635
+ const points = await Promise.all(
636
+ chunkList.map(async (item) => {
637
+ const pageContent = item.pageContent.trim();
638
+ const id = getHash(pageContent);
639
+ const payload = {
640
+ ...item.metadata,
641
+ chunk: pageContent,
642
+ hash: id
643
+ };
644
+ const embeddingChunk = entryFormat(
645
+ payload,
646
+ this.#config().name,
647
+ pageContent,
648
+ collectionItem.embeddingTemplate?.entry
649
+ );
650
+ if (!embeddingChunk) {
651
+ this.#channel.warn(
652
+ `内容格式化后内容为空,跳过
653
+ ${JSON.stringify({ payload, knowledge: this.#config().name, pageContent })}`
654
+ );
655
+ return;
656
+ }
657
+ payload["embeddingChunk"] = embeddingChunk;
658
+ return {
659
+ id,
660
+ vector: {
661
+ chunk: await t2v(
662
+ embeddingChunk,
663
+ collectionItem.collectionName
664
+ )
665
+ },
666
+ payload
667
+ };
668
+ })
669
+ ).then((list) => list.filter(isTruthy2));
670
+ if (points.length) {
671
+ await this.#qdClient.upsert(collectionItem.collectionName, {
672
+ wait: true,
673
+ points
674
+ });
675
+ return points;
676
+ }
677
+ return;
678
+ })
679
+ ).then((list) => list.filter(isTruthy2)),
680
+ this.#text2vec
681
+ );
682
+ }
683
+ async _insertItem(fileName, content) {
684
+ if (await this.#vfs().exists(fileName)) {
685
+ return false;
686
+ }
687
+ await this.#vfs().writeFile(fileName, content);
688
+ return await this.insertItemOnly(
689
+ fileName,
690
+ content,
691
+ this.#config().collectionList
692
+ );
693
+ }
694
+ async insertItem(fileName, content, signal2) {
695
+ await this._insertItem(fileName, content);
696
+ }
697
+ async deleteItem(fileName) {
698
+ await this.#util.multiDelete(
699
+ this.#config().collectionList.map((item) => item.collectionName),
700
+ {
701
+ filter: {
702
+ must: [{ key: "fileName", match: { value: fileName } }],
703
+ should: null
704
+ }
705
+ }
706
+ );
707
+ await this.#vfs().delete(fileName, { force: true });
708
+ }
709
+ async updateItem(fileName, content) {
710
+ await this.deleteItem(fileName);
711
+ await this.insertItem(fileName, content);
712
+ }
713
+ async addCollection(collection) {
714
+ const list = await this.#vfs().exists("") ? await this.#vfs().readdir("") : [];
715
+ await this.createCollection(collection);
716
+ const queue = fastq3(async (fileName) => {
717
+ const content = await this.#vfs().readContent(fileName);
718
+ if (typeof content !== "string" || !content) {
719
+ return;
720
+ }
721
+ await this.insertItemOnly(fileName, content, [collection]);
722
+ }, 20);
723
+ let queueError;
724
+ queue.error((error) => {
725
+ if (error) {
726
+ queueError = error;
727
+ queue.killAndDrain();
728
+ }
729
+ });
730
+ try {
731
+ for (const item of list) {
732
+ queue.push(item);
733
+ }
734
+ await queue.drained();
735
+ if (queueError) {
736
+ throw queueError;
737
+ }
738
+ } catch (error) {
739
+ await this.#qdClient.deleteCollection(collection.collectionName);
740
+ throw error;
741
+ }
742
+ await this.#qdClient.setActivateCollection(
743
+ collection.collectionName,
744
+ this.#config().activateName
745
+ );
746
+ }
747
+ // 激活collection不可删除,所以这里不应该有删除切换的问题
748
+ async deleteCollection(collectionName) {
749
+ const isActivate = this.#config().activateCollection === collectionName;
750
+ if (isActivate) {
751
+ return false;
752
+ }
753
+ const collection = this.#config().collectionList.find(
754
+ (item) => item.collectionName === collectionName
755
+ );
756
+ if (!collection) {
757
+ return false;
758
+ }
759
+ await this.#qdClient.deleteCollection(collectionName);
760
+ return true;
761
+ }
762
+ async changeActivateCollection(collectionName) {
763
+ await this.#qdClient.setActivateCollection(
764
+ collectionName,
765
+ this.#config().activateName
766
+ );
767
+ }
768
+ export() {
769
+ return this.#util.export(
770
+ this.#config().collectionList.map(({ collectionName }) => collectionName)
771
+ );
772
+ }
773
+ async destroy() {
774
+ return this.#util.destroyKnowledge(
775
+ this.#config().collectionList.map(({ collectionName }) => collectionName),
776
+ this.#vfs()
777
+ );
778
+ }
779
+ };
780
+
781
+ // packages/knowledge/graph/define/config.ts
782
+ import * as v7 from "valibot";
783
+ var GraphCollectionDefine = v7.pipe(
784
+ v7.object({
785
+ collectionName: v7.string(),
786
+ size: v7.number(),
787
+ embeddingTemplate: v7.optional(
788
+ v7.object({
789
+ entry: v7.optional(EmbeddingTemplateDefine),
790
+ node: v7.optional(EmbeddingTemplateDefine),
791
+ edge: v7.optional(EmbeddingTemplateDefine)
792
+ })
793
+ )
794
+ }),
795
+ v7.transform((collection) => ({
796
+ ...collection,
797
+ graphCollectionName: getGraphCollectionName(collection.collectionName)
798
+ }))
799
+ );
800
+ var GraphKnowledgeConfigDefine = v7.pipe(
801
+ v7.object({
802
+ ...BaseKnowledgeConfig.entries,
803
+ type: v7.optional(v7.literal("normal-graph"), "normal-graph"),
804
+ maxChunkAsync: v7.number(),
805
+ collectionList: v7.array(GraphCollectionDefine)
806
+ }),
807
+ v7.transform((input) => ({
808
+ ...input,
809
+ /** 激活的普通知识库(文件切片) */
810
+ activateName: getActivateCollectionName(input.name),
811
+ /** 激活的图数据库 */
812
+ activateGraphName: getActivateCollectionName(
813
+ getGraphCollectionName(input.name)
814
+ )
815
+ }))
816
+ );
817
+
818
+ // packages/knowledge/graph/graph.knowledge.service.ts
819
+ import * as v11 from "valibot";
820
+
821
+ // packages/knowledge/graph/graph.util.service.ts
822
+ import { runInEmbeddingContext as runInEmbeddingContext2 } from "@shenghuabi/knowledge/util";
823
+ import { inject as inject5 } from "static-injector";
824
+ import { QdrantClientService as QdrantClientService5 } from "@shenghuabi/knowledge/qdrant";
825
+
826
+ // packages/knowledge/graph/define/define.ts
827
+ import { uniqBy } from "lodash-es";
828
+ import * as v8 from "valibot";
829
+
830
+ // packages/knowledge/graph/util.ts
831
+ function getEdgeName(source, target) {
832
+ return `${source},${target}`;
833
+ }
834
+
835
+ // packages/knowledge/graph/define/define.ts
836
+ import { v4 as v43 } from "uuid";
837
+ var ENTITY_DEFINE = v8.object({
838
+ name: v8.string(),
839
+ description: v8.optional(v8.string(), ""),
840
+ /** 实体类型 */
841
+ type: v8.string()
842
+ });
843
+ var ENTITY_RELATION_DEFINE = v8.object({
844
+ source: v8.string(),
845
+ target: v8.string(),
846
+ description: v8.optional(v8.string(), ""),
847
+ strength: v8.optional(v8.number(), 5),
848
+ keywords: v8.optional(
849
+ v8.union([
850
+ v8.pipe(
851
+ v8.string(),
852
+ v8.transform((str) => str.split(",").map((item) => item.trim()))
853
+ ),
854
+ v8.array(v8.string())
855
+ ])
856
+ )
857
+ });
858
+ var EntityExtraDefine = v8.object({
859
+ entity: v8.pipe(
860
+ v8.nullish(v8.array(ENTITY_DEFINE), []),
861
+ v8.transform(
862
+ (list) => uniqBy(
863
+ (list || []).filter((item) => !!item.name && !!item.description),
864
+ (item) => `${item.name}|${item.type}|${item.description}`
865
+ )
866
+ )
867
+ ),
868
+ entity_relation: v8.pipe(
869
+ v8.nullish(v8.array(ENTITY_RELATION_DEFINE), []),
870
+ v8.transform(
871
+ (list) => uniqBy(
872
+ (list || []).filter(
873
+ (item) => !!item.source && !!item.target && !!item.description
874
+ ),
875
+ (item) => `${item.source}|${item.target}|${item.description}`
876
+ )
877
+ )
878
+ ),
879
+ keyword: v8.pipe(
880
+ v8.nullish(v8.array(v8.string()), []),
881
+ v8.transform((item) => item.map((item2) => item2.trim()))
882
+ )
883
+ });
884
+ var NodePayloadDefine = v8.object({
885
+ ...ENTITY_DEFINE.entries,
886
+ kind: v8.optional(v8.literal("node"), "node"),
887
+ id: v8.string(),
888
+ chunkId: v8.string(),
889
+ fileName: v8.string()
890
+ });
891
+ var NodeItemDefine = v8.pipe(
892
+ v8.omit(NodePayloadDefine, ["kind"]),
893
+ v8.transform((data) => ({ ...data, kind: "node" }))
894
+ );
895
+ var NodePayloadNewDefine = v8.pipe(
896
+ v8.object({
897
+ ...v8.omit(NodePayloadDefine, ["id"]).entries,
898
+ embeddingChunk: v8.string()
899
+ })
900
+ );
901
+ var NodeItemNewDefine = v8.pipe(
902
+ v8.object({
903
+ ...v8.omit(NodePayloadDefine, ["kind", "id"]).entries,
904
+ id: v8.optional(NodePayloadDefine.entries.id, () => v43())
905
+ }),
906
+ v8.transform((data) => ({ ...data, kind: "node" }))
907
+ );
908
+ var EdgePayloadDefine = v8.object({
909
+ ...ENTITY_RELATION_DEFINE.entries,
910
+ chunkId: v8.string(),
911
+ kind: v8.optional(v8.literal("edge"), "edge"),
912
+ name: v8.string(),
913
+ fileName: v8.string(),
914
+ id: v8.string()
915
+ });
916
+ var EdgeItemDefine = v8.pipe(
917
+ v8.omit(EdgePayloadDefine, ["kind", "name"]),
918
+ v8.transform((data) => ({
919
+ ...data,
920
+ name: getEdgeName(data.source, data.target),
921
+ kind: "edge"
922
+ }))
923
+ );
924
+ var EdgePayloadNewDefine = v8.pipe(
925
+ v8.object({
926
+ ...v8.omit(EdgePayloadDefine, ["name", "id"]).entries,
927
+ embeddingChunk: v8.string()
928
+ }),
929
+ v8.transform((data) => ({
930
+ ...data,
931
+ name: getEdgeName(data.source, data.target)
932
+ }))
933
+ );
934
+ var EdgeItemNewDefine = v8.pipe(
935
+ v8.object({
936
+ ...v8.omit(EdgePayloadDefine, ["kind", "name", "id"]).entries,
937
+ id: v8.optional(NodePayloadDefine.entries.id, () => v43())
938
+ }),
939
+ v8.transform((data) => ({
940
+ ...data,
941
+ name: getEdgeName(data.source, data.target),
942
+ kind: "edge"
943
+ }))
944
+ );
945
+ var KnowledgeGraphCreateDefine = v8.object({
946
+ fileName: v8.string(),
947
+ chunkId: v8.string(),
948
+ nodeList: v8.optional(v8.array(ENTITY_DEFINE), []),
949
+ edgeList: v8.optional(v8.array(ENTITY_RELATION_DEFINE), [])
950
+ });
951
+ var KeywordPayloadNewDefine = v8.pipe(
952
+ v8.object({
953
+ kind: v8.optional(v8.literal("keyword"), "keyword"),
954
+ keyword: v8.string(),
955
+ chunkId: v8.string(),
956
+ fileName: v8.string()
957
+ })
958
+ );
959
+
960
+ // packages/knowledge/graph/graph.util.service.ts
961
+ import * as v9 from "valibot";
962
+ var GraphKnowledgeUtilService = class {
963
+ #qdClient = inject5(QdrantClientService5);
964
+ #config = inject5(ConfigToken);
965
+ #text2vec = inject5(Text2VecToken);
966
+ updateContext(fn) {
967
+ return runInEmbeddingContext2(async (t2v) => {
968
+ const result = await fn();
969
+ const requstList = [];
970
+ if (result.upsert) {
971
+ requstList.push(
972
+ this.#config().collectionList.map(
973
+ async ({
974
+ graphCollectionName,
975
+ collectionName,
976
+ embeddingTemplate
977
+ }) => {
978
+ const list = [];
979
+ if (result.upsert?.nodes?.length) {
980
+ list.push(
981
+ Promise.all(
982
+ result.upsert?.nodes.map(async (item) => {
983
+ const embeddingChunk = entryFormat(
984
+ item.payload,
985
+ this.#config().name,
986
+ nodeVectorString(item.payload),
987
+ embeddingTemplate?.node
988
+ );
989
+ return {
990
+ id: item.id,
991
+ payload: v9.parse(NodePayloadNewDefine, {
992
+ ...item.payload,
993
+ embeddingChunk
994
+ }),
995
+ vector: {
996
+ chunk: await t2v(embeddingChunk, collectionName)
997
+ }
998
+ };
999
+ })
1000
+ ).then(
1001
+ (points) => this.#qdClient.upsert(graphCollectionName, { points })
1002
+ )
1003
+ );
1004
+ }
1005
+ if (result.upsert?.edges?.length) {
1006
+ list.push(
1007
+ Promise.all(
1008
+ result.upsert?.edges.map(async (item) => {
1009
+ const embeddingChunk = entryFormat(
1010
+ item.payload,
1011
+ this.#config().name,
1012
+ edgeVectorString(item.payload),
1013
+ embeddingTemplate?.edge
1014
+ );
1015
+ return {
1016
+ id: item.id,
1017
+ payload: v9.parse(EdgePayloadNewDefine, {
1018
+ ...item.payload,
1019
+ embeddingChunk
1020
+ }),
1021
+ vector: {
1022
+ chunk: await t2v(embeddingChunk, collectionName)
1023
+ }
1024
+ };
1025
+ })
1026
+ ).then(
1027
+ (points) => this.#qdClient.upsert(graphCollectionName, { points })
1028
+ )
1029
+ );
1030
+ }
1031
+ if (result.upsert?.keywords?.length) {
1032
+ list.push(
1033
+ Promise.all(
1034
+ result.upsert?.keywords.map(async (item) => ({
1035
+ id: item.id,
1036
+ payload: v9.parse(KeywordPayloadNewDefine, item.payload),
1037
+ vector: {
1038
+ chunk: await t2v(item.payload.keyword, collectionName)
1039
+ }
1040
+ }))
1041
+ ).then(
1042
+ (points) => this.#qdClient.upsert(graphCollectionName, { points })
1043
+ )
1044
+ );
1045
+ }
1046
+ return Promise.all(list);
1047
+ }
1048
+ )
1049
+ );
1050
+ }
1051
+ if (result.delete) {
1052
+ requstList.push(
1053
+ this.#config().collectionList.map(
1054
+ async ({ graphCollectionName, collectionName }) => {
1055
+ await Promise.all(
1056
+ [result.delete?.nodes, result.delete?.edges].map(
1057
+ (deleteData) => {
1058
+ if (Array.isArray(deleteData)) {
1059
+ if (deleteData.length) {
1060
+ return this.#qdClient.delete(graphCollectionName, {
1061
+ points: deleteData.map((item) => item.id)
1062
+ });
1063
+ }
1064
+ } else if (deleteData) {
1065
+ return this.#qdClient.delete(graphCollectionName, {
1066
+ filter: deleteData.filter
1067
+ });
1068
+ }
1069
+ }
1070
+ )
1071
+ );
1072
+ }
1073
+ )
1074
+ );
1075
+ }
1076
+ return Promise.all(requstList.flat());
1077
+ }, this.#text2vec);
1078
+ }
1079
+ };
1080
+
1081
+ // packages/knowledge/graph/graph.local.service.ts
1082
+ import { QdrantClientService as QdrantClientService6 } from "@shenghuabi/knowledge/qdrant";
1083
+ import { computed as computed3, inject as inject6, signal, untracked } from "static-injector";
1084
+ import Graph from "graphology";
1085
+ import * as v10 from "valibot";
1086
+ import { LogToken as LogToken3 } from "@shenghuabi/knowledge/util";
1087
+ var GraphLocalService = class {
1088
+ #qdClient = inject6(QdrantClientService6);
1089
+ #config = inject6(ConfigToken);
1090
+ #limit = 5e3;
1091
+ #graph;
1092
+ #log = inject6(LogToken3);
1093
+ update$ = signal(0);
1094
+ graphExport$$ = computed3(() => {
1095
+ this.update$();
1096
+ return this.#graph.export();
1097
+ });
1098
+ getGraph() {
1099
+ return this.#graph;
1100
+ }
1101
+ loadDataInitGraph$$ = computed3(() => {
1102
+ this.initGraph();
1103
+ return untracked(() => this.loadingData());
1104
+ });
1105
+ /** 只初始化数据 */
1106
+ initGraph() {
1107
+ this.#graph = new Graph();
1108
+ return this.#graph;
1109
+ }
1110
+ #getEdgesFromSource(name) {
1111
+ try {
1112
+ return this.#graph.outEdges(name);
1113
+ } catch (error) {
1114
+ this.#log.warn(error);
1115
+ return [];
1116
+ }
1117
+ }
1118
+ #getEdgesFromTarget(name) {
1119
+ try {
1120
+ return this.#graph.inEdges(name);
1121
+ } catch (error) {
1122
+ this.#log.warn(error);
1123
+ return [];
1124
+ }
1125
+ }
1126
+ getTargetListFromSource(name) {
1127
+ return this.#graph.hasNode(name) ? this.#getEdgesFromSource(name).flatMap(
1128
+ (name2) => this.#graph.getTargetAttributes(name2).list
1129
+ ) : [];
1130
+ }
1131
+ edgeToNode(item, type) {
1132
+ return {
1133
+ name: item[type],
1134
+ description: item.description,
1135
+ type: "未知",
1136
+ chunkId: item.chunkId,
1137
+ fileName: item.fileName
1138
+ };
1139
+ }
1140
+ createOrUpdateNodeAttr(item) {
1141
+ const payload = v10.parse(NodeItemNewDefine, item);
1142
+ if (this.#graph.hasNode(payload.name)) {
1143
+ const obj = this.#graph.getNodeAttributes(payload.name);
1144
+ for (let index = 0; index < obj.list.length; index++) {
1145
+ const item2 = obj.list[index];
1146
+ if (item2.id === payload.id) {
1147
+ obj.list[index] = payload;
1148
+ this.#graph.replaceNodeAttributes(payload.name, obj);
1149
+ return;
1150
+ }
1151
+ }
1152
+ obj.list.push(payload);
1153
+ this.#graph.replaceNodeAttributes(payload.name, obj);
1154
+ } else {
1155
+ this.#graph.addNode(payload.name, {
1156
+ list: [payload],
1157
+ name: payload.name
1158
+ });
1159
+ }
1160
+ }
1161
+ // source/target可能不存在,需要临时节点
1162
+ createOrUpdateEdgeAttr(item) {
1163
+ const payload = v10.parse(EdgeItemNewDefine, item);
1164
+ if (this.#graph.hasEdge(payload.name)) {
1165
+ const obj = this.#graph.getEdgeAttributes(payload.name);
1166
+ for (let index = 0; index < obj.list.length; index++) {
1167
+ const item2 = obj.list[index];
1168
+ if (item2.id === payload.id) {
1169
+ obj.list[index] = payload;
1170
+ this.#graph.replaceEdgeAttributes(item2.name, obj);
1171
+ return;
1172
+ }
1173
+ }
1174
+ obj.list.push(payload);
1175
+ this.#graph.replaceEdgeAttributes(payload.name, obj);
1176
+ } else {
1177
+ if (!this.#graph.hasNode(item.source)) {
1178
+ this.#graph.addNode(item.source, {
1179
+ list: [v10.parse(NodeItemNewDefine, this.edgeToNode(item, "source"))],
1180
+ name: item.source
1181
+ });
1182
+ }
1183
+ if (!this.#graph.hasNode(item.target)) {
1184
+ this.#graph.addNode(item.target, {
1185
+ list: [v10.parse(NodeItemNewDefine, this.edgeToNode(item, "target"))],
1186
+ name: item.target
1187
+ });
1188
+ }
1189
+ this.#graph.addEdgeWithKey(payload.name, item.source, item.target, {
1190
+ list: [payload],
1191
+ name: payload.name,
1192
+ source: payload.source,
1193
+ target: payload.target
1194
+ });
1195
+ }
1196
+ }
1197
+ #deleteNodeAttr(payload) {
1198
+ if (this.#graph.hasNode(payload.name)) {
1199
+ const attr = this.#graph.getNodeAttributes(payload.name);
1200
+ const index = attr.list.findIndex((item) => item.id === payload.id);
1201
+ if (index !== -1) {
1202
+ attr.list.splice(index, 1);
1203
+ if (attr.list.length === 0) {
1204
+ this.#graph.dropNode(payload.name);
1205
+ } else {
1206
+ this.#graph.replaceNodeAttributes(payload.name, attr);
1207
+ }
1208
+ }
1209
+ }
1210
+ }
1211
+ #deleteEdgeAttr(item) {
1212
+ const name = getEdgeName(item.source, item.target);
1213
+ if (!this.#graph.hasEdge(name)) {
1214
+ return;
1215
+ }
1216
+ const attr = this.#graph.getEdgeAttributes(name);
1217
+ for (let index = 0; index < attr.list.length; index++) {
1218
+ const edgeItem = attr.list[index];
1219
+ if (edgeItem.id === item.id) {
1220
+ attr.list.splice(index, 1);
1221
+ if (attr.list.length === 0) {
1222
+ this.#graph.dropEdge(name);
1223
+ } else {
1224
+ this.#graph.replaceEdgeAttributes(name, attr);
1225
+ }
1226
+ return;
1227
+ }
1228
+ }
1229
+ }
1230
+ #updateGraph(payload, update) {
1231
+ if (payload.kind === "node") {
1232
+ this.createOrUpdateNodeAttr(payload);
1233
+ } else if (payload.kind === "edge") {
1234
+ this.createOrUpdateEdgeAttr(payload);
1235
+ }
1236
+ }
1237
+ async loadingData() {
1238
+ let offset;
1239
+ const [nodeList, edgeList] = await Promise.all(
1240
+ ["node", "edge"].map(async (kind) => {
1241
+ const nodePoints = [];
1242
+ do {
1243
+ const { points, next_page_offset } = await this.#qdClient.scroll(
1244
+ this.#config().activateGraphName,
1245
+ {
1246
+ limit: this.#limit,
1247
+ filter: {
1248
+ must: {
1249
+ key: "kind",
1250
+ match: {
1251
+ value: kind
1252
+ }
1253
+ }
1254
+ },
1255
+ with_payload: true,
1256
+ offset
1257
+ }
1258
+ );
1259
+ nodePoints.push(() => {
1260
+ for (const item of points) {
1261
+ this.#updateGraph({ ...item.payload, id: item.id });
1262
+ }
1263
+ });
1264
+ offset = next_page_offset;
1265
+ } while (offset);
1266
+ return nodePoints;
1267
+ })
1268
+ );
1269
+ nodeList.forEach((fn) => fn());
1270
+ edgeList.forEach((fn) => fn());
1271
+ }
1272
+ /** 一个节点分多个,需要删除原来的节点和边,然后插入
1273
+ * 拆分后的节点有可能是存在的
1274
+ */
1275
+ async splitNode(options) {
1276
+ const nodeAttr = this.#graph.getNodeAttributes(options.node);
1277
+ options.list.map((replaceNodeName) => {
1278
+ nodeAttr.list.forEach((item) => {
1279
+ this.createOrUpdateNodeAttr({ ...item, name: replaceNodeName });
1280
+ });
1281
+ });
1282
+ const sourceEdges = this.#getEdgesFromSource(options.node);
1283
+ const targetEdges = this.#getEdgesFromTarget(options.node);
1284
+ for (const edge of sourceEdges) {
1285
+ const attr = this.#graph.getEdgeAttributes(edge);
1286
+ for (const replaceNodeName of options.list) {
1287
+ attr.list.forEach((attrItem) => {
1288
+ this.createOrUpdateEdgeAttr({ ...attrItem, source: replaceNodeName });
1289
+ });
1290
+ }
1291
+ }
1292
+ for (const edge of targetEdges) {
1293
+ const attr = this.#graph.getEdgeAttributes(edge);
1294
+ for (const replaceNodeName of options.list) {
1295
+ attr.list.forEach((attrItem) => {
1296
+ this.createOrUpdateEdgeAttr({ ...attrItem, target: replaceNodeName });
1297
+ });
1298
+ }
1299
+ }
1300
+ this.#graph.dropNode(options.node);
1301
+ this.update$.update((a) => a + 1);
1302
+ }
1303
+ async mergeNode(options) {
1304
+ const sourceEdges = options.list.flatMap(
1305
+ (item) => this.#getEdgesFromSource(item)
1306
+ );
1307
+ const targetEdges = options.list.flatMap(
1308
+ (item) => this.#getEdgesFromTarget(item)
1309
+ );
1310
+ options.list.forEach((node) => {
1311
+ const attr = this.#graph.getNodeAttributes(node);
1312
+ attr.list.forEach((item) => {
1313
+ this.createOrUpdateNodeAttr({ ...item, name: options.node });
1314
+ });
1315
+ });
1316
+ for (const edge of sourceEdges) {
1317
+ const data = this.#graph.getEdgeAttributes(edge);
1318
+ data.list.forEach((item) => {
1319
+ this.createOrUpdateEdgeAttr({ ...item, source: options.node });
1320
+ });
1321
+ }
1322
+ for (const edge of targetEdges) {
1323
+ const data = this.#graph.getEdgeAttributes(edge);
1324
+ data.list.forEach((item) => {
1325
+ this.createOrUpdateEdgeAttr({ ...item, target: options.node });
1326
+ });
1327
+ }
1328
+ options.list.forEach((item) => {
1329
+ this.#graph.dropNode(item);
1330
+ });
1331
+ this.update$.update((a) => a + 1);
1332
+ }
1333
+ async changeNodeDescription(payload) {
1334
+ this.createOrUpdateNodeAttr(payload);
1335
+ this.update$.update((a) => a + 1);
1336
+ }
1337
+ async changeEdge(item, oldItem) {
1338
+ this.createOrUpdateEdgeAttr(item);
1339
+ if (item.source !== oldItem.source || item.target !== oldItem.target) {
1340
+ this.#deleteEdgeAttr(oldItem);
1341
+ }
1342
+ this.update$.update((a) => a + 1);
1343
+ }
1344
+ async add(input) {
1345
+ input.nodes?.forEach((item) => {
1346
+ this.createOrUpdateNodeAttr(item);
1347
+ });
1348
+ input.edges?.forEach((item) => {
1349
+ this.createOrUpdateEdgeAttr(item);
1350
+ });
1351
+ this.update$.update((a) => a + 1);
1352
+ }
1353
+ async deleteNodeItem(item) {
1354
+ this.#deleteNodeAttr(item);
1355
+ this.update$.update((a) => a + 1);
1356
+ }
1357
+ deleteEdgeItem(item) {
1358
+ this.#deleteEdgeAttr(item);
1359
+ this.update$.update((a) => a + 1);
1360
+ }
1361
+ deleteNode(name) {
1362
+ this.#graph.dropNode(name);
1363
+ this.update$.update((a) => a + 1);
1364
+ }
1365
+ /**
1366
+ * node=>edge
1367
+ * 确定最相似的chunk(第一位)
1368
+ * 获取传入节点的所有变
1369
+ */
1370
+ getEdgeByNode(nodes) {
1371
+ const repeatList = /* @__PURE__ */ new Set();
1372
+ const list = [];
1373
+ for (const node of nodes) {
1374
+ for (const edgeName of [
1375
+ ...this.#graph.outEdges(node.name),
1376
+ ...this.#graph.inEdges(node.name)
1377
+ ]) {
1378
+ const key = `${edgeName}|${node.chunkId}`;
1379
+ if (repeatList.has(key)) {
1380
+ continue;
1381
+ }
1382
+ repeatList.add(key);
1383
+ list.push(
1384
+ ...this.#graph.getEdgeAttributes(edgeName).list.filter((item) => item.chunkId === node.chunkId)
1385
+ );
1386
+ }
1387
+ }
1388
+ return list;
1389
+ }
1390
+ /**
1391
+ * 根据边查节点
1392
+ * 通过边上的chunkid,查找同在这个chunkid上的souce/target节点,权重为边
1393
+ */
1394
+ getNodeByEdge(edges) {
1395
+ const repeatList = /* @__PURE__ */ new Set();
1396
+ const list = [];
1397
+ for (const edge of edges) {
1398
+ for (const nodeName of [edge.source, edge.target]) {
1399
+ const key = `${nodeName}|${edge.chunkId}`;
1400
+ if (repeatList.has(key)) {
1401
+ continue;
1402
+ }
1403
+ repeatList.add(key);
1404
+ list.push(
1405
+ ...this.#graph.getNodeAttributes(nodeName).list.filter((item) => item.chunkId === edge.chunkId)
1406
+ );
1407
+ }
1408
+ }
1409
+ return list;
1410
+ }
1411
+ getChunkEdgeByNode(node) {
1412
+ const list = [];
1413
+ for (const edgeName of [
1414
+ ...this.#graph.outEdges(node.name),
1415
+ ...this.#graph.inEdges(node.name)
1416
+ ]) {
1417
+ const edgeAttr = this.#graph.getEdgeAttributes(edgeName);
1418
+ list.push(
1419
+ ...edgeAttr.list.filter((item) => node.chunkId === item.chunkId)
1420
+ );
1421
+ }
1422
+ return list;
1423
+ }
1424
+ };
1425
+
1426
+ // packages/knowledge/graph/graph.knowledge.service.ts
1427
+ import { BatchQueue as BatchQueue2 } from "@shenghuabi/knowledge/util";
1428
+ var GraphKnolwdgeService = class extends NormalKnowledgeService {
1429
+ #text2vec = inject7(Text2VecToken);
1430
+ #config = inject7(ConfigToken);
1431
+ #util = inject7(KnowledgeUtilService);
1432
+ #graphUtil = inject7(GraphKnowledgeUtilService);
1433
+ #qdClient = inject7(QdrantClientService7);
1434
+ #channel = inject7(LogToken4);
1435
+ #injector = inject7(Injector);
1436
+ #contentParser = inject7(ContentParserToken);
1437
+ #graphLocal = inject7(GraphLocalService);
1438
+ formatCollection(input) {
1439
+ return v11.parse(GraphCollectionDefine, input);
1440
+ }
1441
+ async #createCollection(collection) {
1442
+ const collectionName = getGraphCollectionName(collection.collectionName);
1443
+ this.#channel.info(`创建图集合:${collectionName}`);
1444
+ const { exists } = await this.#qdClient.collectionExists(collectionName);
1445
+ if (exists) {
1446
+ throw new Error(`集合${collectionName}已存在`);
1447
+ }
1448
+ await this.#qdClient.createCollection(collectionName, {
1449
+ vectors: {
1450
+ chunk: {
1451
+ size: collection.size,
1452
+ distance: "Cosine",
1453
+ on_disk: true
1454
+ }
1455
+ }
1456
+ });
1457
+ this.#channel.info(`创建图索引`);
1458
+ await this.#qdClient.createPayloadKeywordIndex(collectionName, "kind");
1459
+ await this.#qdClient.createPayloadKeywordIndex(collectionName, "fileName");
1460
+ await this.#qdClient.createPayloadKeywordIndex(collectionName, "name");
1461
+ await this.#qdClient.createPayloadKeywordIndex(collectionName, "source");
1462
+ await this.#qdClient.createPayloadKeywordIndex(collectionName, "target");
1463
+ }
1464
+ async create(collection) {
1465
+ await super.create(collection);
1466
+ await this.#createCollection(collection);
1467
+ await this.#qdClient.setActivateCollection(
1468
+ collection.graphCollectionName,
1469
+ this.#config().activateGraphName
1470
+ );
1471
+ }
1472
+ /** 图谱知识库不允许改chunksize,因为改了后切片就不一样了,那么生成的关系一定也不一样了 */
1473
+ async insertItem(fileName, content, signal2) {
1474
+ const list = await this._insertItem(fileName, content);
1475
+ if (!list || !list.length) {
1476
+ return;
1477
+ }
1478
+ const result = list[0].map(({ payload }) => payload);
1479
+ await this.#insert(result, fileName, signal2);
1480
+ }
1481
+ async #insert(chunkList, fileName, signal2) {
1482
+ const countObj = {
1483
+ success: 0,
1484
+ error: 0
1485
+ };
1486
+ const hasGraph = !!this.#graphLocal.getGraph();
1487
+ const llmAsyncQueue = fastq4(async (document) => {
1488
+ if (signal2?.aborted) {
1489
+ return;
1490
+ }
1491
+ const extractData = await this.#contentParser.parse(document, signal2);
1492
+ countObj.success++;
1493
+ const nodes = extractData.entity.map((item) => ({
1494
+ id: v44(),
1495
+ payload: {
1496
+ ...item,
1497
+ fileName,
1498
+ chunkId: document.hash
1499
+ }
1500
+ }));
1501
+ const edges = extractData.entity_relation.map((item) => ({
1502
+ id: v44(),
1503
+ payload: {
1504
+ ...item,
1505
+ fileName,
1506
+ chunkId: document.hash
1507
+ }
1508
+ }));
1509
+ const keywords = extractData.keyword.map((item) => ({
1510
+ id: v44(),
1511
+ payload: {
1512
+ keyword: item,
1513
+ chunkId: document.hash,
1514
+ fileName
1515
+ }
1516
+ }));
1517
+ await this.#graphUtil.updateContext(async () => ({
1518
+ upsert: {
1519
+ nodes,
1520
+ edges,
1521
+ keywords
1522
+ }
1523
+ }));
1524
+ if (hasGraph) {
1525
+ this.#graphLocal.add({
1526
+ nodes: nodes.map((item) => ({ ...item.payload, id: item.id })),
1527
+ edges: edges.map((item) => ({ ...item.payload, id: item.id }))
1528
+ });
1529
+ }
1530
+ }, this.#config().maxChunkAsync);
1531
+ llmAsyncQueue.error((error, task) => {
1532
+ if (error) {
1533
+ countObj.error++;
1534
+ this.#channel.warn(`[${fileName}]解析失败:
1535
+ ${task.chunk}
1536
+ `, error);
1537
+ }
1538
+ });
1539
+ for (const item of chunkList) {
1540
+ llmAsyncQueue.push(item);
1541
+ }
1542
+ await llmAsyncQueue.drained();
1543
+ if (signal2?.aborted) {
1544
+ return this.deleteItem(fileName);
1545
+ }
1546
+ if (chunkList.length === countObj.error) {
1547
+ await super.deleteItem(fileName);
1548
+ }
1549
+ }
1550
+ async deleteItem(fileName) {
1551
+ await super.deleteItem(fileName);
1552
+ await this.#util.multiDelete(
1553
+ this.#config().collectionList.map((item) => item.graphCollectionName),
1554
+ {
1555
+ filter: {
1556
+ must: [{ key: "fileName", match: { value: fileName } }],
1557
+ should: null
1558
+ }
1559
+ }
1560
+ );
1561
+ }
1562
+ async updateItem(fileName, content) {
1563
+ await this.deleteItem(fileName);
1564
+ await this.insertItem(fileName, content);
1565
+ }
1566
+ async addCollection(collection) {
1567
+ const activateCollectionName = this.#config().activateGraphName;
1568
+ await super.addCollection(collection);
1569
+ await this.#createCollection(collection);
1570
+ const newCollectionName = collection.graphCollectionName;
1571
+ const batchQueue = new BatchQueue2(
1572
+ (item) => this.#text2vec(item, collection.collectionName)
1573
+ );
1574
+ const queue = this.#util.updatePointsQueue(newCollectionName);
1575
+ let queueError = void 0;
1576
+ queue.queue.error((err) => {
1577
+ if (err) {
1578
+ queueError = err;
1579
+ queue.queue.killAndDrain();
1580
+ }
1581
+ });
1582
+ let offset;
1583
+ do {
1584
+ const { points, next_page_offset } = await this.#qdClient.scroll(
1585
+ activateCollectionName,
1586
+ {
1587
+ limit: 5e3,
1588
+ with_payload: true,
1589
+ with_vector: false,
1590
+ offset
1591
+ }
1592
+ );
1593
+ await batchQueue.then(
1594
+ Promise.all([
1595
+ Promise.all(
1596
+ points.map(async (point) => {
1597
+ let embeddingChunk;
1598
+ if (point.payload["kind"] === "node") {
1599
+ embeddingChunk = entryFormat(
1600
+ point.payload,
1601
+ this.#config().name,
1602
+ nodeVectorString(point.payload),
1603
+ collection.embeddingTemplate?.node
1604
+ );
1605
+ } else if (point.payload["kind"] === "edge") {
1606
+ embeddingChunk = entryFormat(
1607
+ point.payload,
1608
+ this.#config().name,
1609
+ edgeVectorString(point.payload),
1610
+ collection.embeddingTemplate?.edge
1611
+ );
1612
+ } else {
1613
+ embeddingChunk = point.payload["keyword"];
1614
+ }
1615
+ const vector = await batchQueue.push(embeddingChunk);
1616
+ queue.push({
1617
+ id: point.id,
1618
+ payload: {
1619
+ ...point.payload,
1620
+ embeddingChunk
1621
+ },
1622
+ vector: {
1623
+ chunk: vector
1624
+ }
1625
+ });
1626
+ })
1627
+ )
1628
+ ])
1629
+ );
1630
+ offset = next_page_offset;
1631
+ } while (offset);
1632
+ queue.complete();
1633
+ await queue.queue.drained();
1634
+ if (queueError) {
1635
+ await this.#qdClient.deleteCollection(collection.collectionName);
1636
+ await this.#qdClient.deleteCollection(collection.graphCollectionName);
1637
+ throw queueError;
1638
+ }
1639
+ await this.#qdClient.setActivateCollection(
1640
+ newCollectionName,
1641
+ this.#config().activateGraphName
1642
+ );
1643
+ }
1644
+ async deleteCollection(collectionName) {
1645
+ const result = await super.deleteCollection(collectionName);
1646
+ if (!result) {
1647
+ return result;
1648
+ }
1649
+ const collection = this.#config().collectionList.find(
1650
+ (item) => item.collectionName === collectionName
1651
+ );
1652
+ await this.#qdClient.deleteCollection(collection.graphCollectionName);
1653
+ return true;
1654
+ }
1655
+ async changeActivateCollection(collectionName) {
1656
+ await super.changeActivateCollection(collectionName);
1657
+ await this.#qdClient.setActivateCollection(
1658
+ getGraphCollectionName(collectionName),
1659
+ this.#config().activateGraphName
1660
+ );
1661
+ }
1662
+ export() {
1663
+ return this.#util.export(
1664
+ this.#config().collectionList.flatMap(
1665
+ ({ collectionName, graphCollectionName }) => [
1666
+ collectionName,
1667
+ graphCollectionName
1668
+ ]
1669
+ )
1670
+ );
1671
+ }
1672
+ async destroy() {
1673
+ await super.destroy();
1674
+ return this.#util.destroyKnowledge(
1675
+ this.#config().collectionList.map(
1676
+ ({ graphCollectionName }) => graphCollectionName
1677
+ )
1678
+ );
1679
+ }
1680
+ };
1681
+
1682
+ // packages/knowledge/knowledge.manager.service.ts
1683
+ import { FileParserService } from "@shenghuabi/knowledge/file-parser";
1684
+
1685
+ // packages/knowledge/graph/graph.handle.service.ts
1686
+ import { QdrantClientService as QdrantClientService8 } from "@shenghuabi/knowledge/qdrant";
1687
+ import { inject as inject8, Injector as Injector2 } from "static-injector";
1688
+ import { v4 as v45 } from "uuid";
1689
+ import * as v12 from "valibot";
1690
+ import { isTruthy as isTruthy3, LogToken as LogToken5 } from "@shenghuabi/knowledge/util";
1691
+ var MAX_LIMIT = 99999;
1692
+ var FilterEdge = {
1693
+ key: "kind",
1694
+ match: {
1695
+ value: "edge"
1696
+ }
1697
+ };
1698
+ var FilterNode = {
1699
+ key: "kind",
1700
+ match: {
1701
+ value: "node"
1702
+ }
1703
+ };
1704
+ var GraphHandleService = class {
1705
+ #text2vec = inject8(Text2VecToken);
1706
+ #config = inject8(ConfigToken);
1707
+ #util = inject8(KnowledgeUtilService);
1708
+ #qdClient = inject8(QdrantClientService8);
1709
+ #channel = inject8(LogToken5);
1710
+ #injector = inject8(Injector2);
1711
+ #graphUtil = inject8(GraphKnowledgeUtilService);
1712
+ /** 拆分节点
1713
+ */
1714
+ async splitNode(options) {
1715
+ await this.#graphUtil.updateContext(async () => {
1716
+ const [{ points: nodes }, { points: edges }] = await Promise.all([
1717
+ this.#qdClient.scroll(this.#config().activateGraphName, {
1718
+ limit: MAX_LIMIT,
1719
+ filter: {
1720
+ must: [FilterNode, { key: "name", match: { value: options.node } }]
1721
+ },
1722
+ with_payload: true,
1723
+ with_vector: false
1724
+ }),
1725
+ this.#qdClient.scroll(this.#config().activateGraphName, {
1726
+ limit: MAX_LIMIT,
1727
+ filter: {
1728
+ must: [FilterEdge],
1729
+ should: [
1730
+ { key: "source", match: { value: options.node } },
1731
+ { key: "target", match: { value: options.node } }
1732
+ ]
1733
+ },
1734
+ with_payload: true,
1735
+ with_vector: false
1736
+ })
1737
+ ]);
1738
+ const [updateNodes, updateEdges] = await Promise.all([
1739
+ Promise.all(
1740
+ nodes.flatMap(
1741
+ (node) => options.list.map(async (nodeName) => {
1742
+ const payload = { ...node.payload, name: nodeName };
1743
+ return {
1744
+ payload,
1745
+ id: v45()
1746
+ };
1747
+ })
1748
+ )
1749
+ ),
1750
+ Promise.all(
1751
+ edges.flatMap(
1752
+ (edge) => options.list.map(async (nodeName) => {
1753
+ const payload = edge.payload["source"] === options.node ? { ...edge.payload, source: nodeName } : { ...edge.payload, target: nodeName };
1754
+ payload["name"] = getEdgeName(
1755
+ payload["source"],
1756
+ payload["target"]
1757
+ );
1758
+ return {
1759
+ payload,
1760
+ id: v45()
1761
+ };
1762
+ })
1763
+ )
1764
+ )
1765
+ ]);
1766
+ return {
1767
+ upsert: {
1768
+ nodes: updateNodes,
1769
+ edges: updateEdges
1770
+ },
1771
+ delete: {
1772
+ nodes,
1773
+ edges
1774
+ }
1775
+ };
1776
+ });
1777
+ }
1778
+ /** 合并节点 */
1779
+ async mergeNode(options) {
1780
+ const listToObj = options.list.reduce(
1781
+ (obj, item) => {
1782
+ obj[item] = true;
1783
+ return obj;
1784
+ },
1785
+ {}
1786
+ );
1787
+ await this.#graphUtil.updateContext(async () => {
1788
+ const [{ points: nodes }, { points: edges }] = await Promise.all([
1789
+ this.#qdClient.scroll(this.#config().activateGraphName, {
1790
+ limit: MAX_LIMIT,
1791
+ filter: {
1792
+ must: [FilterNode, { key: "name", match: { any: options.list } }]
1793
+ },
1794
+ with_payload: true,
1795
+ with_vector: false
1796
+ }),
1797
+ this.#qdClient.scroll(this.#config().activateGraphName, {
1798
+ limit: MAX_LIMIT,
1799
+ filter: {
1800
+ must: [FilterEdge],
1801
+ should: [
1802
+ { key: "source", match: { any: options.list } },
1803
+ { key: "target", match: { any: options.list } }
1804
+ ]
1805
+ },
1806
+ with_payload: true,
1807
+ with_vector: false
1808
+ })
1809
+ ]);
1810
+ const [updateNodes, updateEdges] = await Promise.all([
1811
+ Promise.all(
1812
+ nodes.map(async (node) => {
1813
+ const payload = { ...node.payload, name: options.node };
1814
+ return {
1815
+ payload,
1816
+ id: v45()
1817
+ };
1818
+ })
1819
+ ),
1820
+ Promise.all(
1821
+ edges.map(async (edge) => {
1822
+ const hasSource = listToObj[edge.payload["source"]];
1823
+ const hasTarget = listToObj[edge.payload["target"]];
1824
+ if (hasSource && hasTarget) {
1825
+ return void 0;
1826
+ }
1827
+ const payload = hasSource ? { ...edge.payload, source: options.node } : { ...edge.payload, target: options.node };
1828
+ payload["name"] = getEdgeName(payload["source"], payload["target"]);
1829
+ return {
1830
+ payload,
1831
+ id: v45()
1832
+ };
1833
+ })
1834
+ ).then((list) => list.filter(isTruthy3))
1835
+ ]);
1836
+ return {
1837
+ upsert: {
1838
+ nodes: updateNodes,
1839
+ edges: updateEdges
1840
+ },
1841
+ delete: {
1842
+ nodes,
1843
+ edges
1844
+ }
1845
+ };
1846
+ });
1847
+ }
1848
+ /** 虽然可以修改其他的,但是只允许修改描述 */
1849
+ async changeNodeDescription(item) {
1850
+ const payload = v12.parse(NodeItemDefine, item);
1851
+ const id = payload.id;
1852
+ delete payload.id;
1853
+ await this.#graphUtil.updateContext(async () => ({
1854
+ upsert: {
1855
+ nodes: [
1856
+ {
1857
+ id,
1858
+ payload
1859
+ }
1860
+ ]
1861
+ }
1862
+ }));
1863
+ }
1864
+ /**
1865
+ * 修改边,如果关系修改了不需要改边 */
1866
+ async changeEdge(item) {
1867
+ const id = item.id;
1868
+ const payload = v12.parse(EdgeItemDefine, item);
1869
+ delete item.id;
1870
+ await this.#graphUtil.updateContext(async () => ({
1871
+ upsert: {
1872
+ edges: [
1873
+ {
1874
+ id,
1875
+ payload
1876
+ }
1877
+ ]
1878
+ }
1879
+ }));
1880
+ }
1881
+ /** 可以添加节点/边 */
1882
+ async addNodeItem(input) {
1883
+ await this.#graphUtil.updateContext(async () => {
1884
+ const nodes = (input.nodes ?? []).map((node) => {
1885
+ const payload = v12.parse(NodeItemNewDefine, node);
1886
+ return {
1887
+ id: v45(),
1888
+ payload
1889
+ };
1890
+ });
1891
+ const edges = (input.edges ?? []).map((edge) => {
1892
+ const payload = v12.parse(EdgeItemNewDefine, edge);
1893
+ return {
1894
+ id: v45(),
1895
+ payload
1896
+ };
1897
+ });
1898
+ return {
1899
+ upsert: { nodes, edges }
1900
+ };
1901
+ });
1902
+ }
1903
+ /** 删除节点的一条 */
1904
+ async deleteNodeItem(item) {
1905
+ await this.#graphUtil.updateContext(async () => {
1906
+ const { points } = await this.#qdClient.scroll(
1907
+ this.#config().activateGraphName,
1908
+ {
1909
+ limit: 1,
1910
+ filter: {
1911
+ must: [FilterNode, { key: "name", match: { value: item.name } }]
1912
+ }
1913
+ }
1914
+ );
1915
+ return {
1916
+ delete: {
1917
+ nodes: [item],
1918
+ edges: points.length === 1 ? {
1919
+ filter: {
1920
+ must: [FilterEdge],
1921
+ should: [
1922
+ { key: "source", match: { value: item.name } },
1923
+ { key: "target", match: { value: item.name } }
1924
+ ]
1925
+ }
1926
+ } : []
1927
+ }
1928
+ };
1929
+ });
1930
+ }
1931
+ /** 删除整个边(边不影响节点) */
1932
+ async deleteEdge(item) {
1933
+ await this.#graphUtil.updateContext(async () => ({
1934
+ delete: {
1935
+ edges: [item]
1936
+ }
1937
+ }));
1938
+ }
1939
+ /** 删除整个节点(对应边也删除) */
1940
+ async deleteNodeByName(name) {
1941
+ await this.#graphUtil.updateContext(async () => ({
1942
+ delete: {
1943
+ nodes: {
1944
+ filter: {
1945
+ must: [FilterNode, { key: "name", match: { value: name } }],
1946
+ should: null
1947
+ }
1948
+ },
1949
+ edges: {
1950
+ filter: {
1951
+ must: [FilterEdge],
1952
+ should: [
1953
+ { key: "source", match: { value: name } },
1954
+ { key: "target", match: { value: name } }
1955
+ ]
1956
+ }
1957
+ }
1958
+ }
1959
+ }));
1960
+ }
1961
+ };
1962
+
1963
+ // packages/knowledge/graph/graph.service.ts
1964
+ import { createInjector, inject as inject10, Injector as Injector3 } from "static-injector";
1965
+
1966
+ // packages/knowledge/graph/graph.query.service.ts
1967
+ import { QdrantClientService as QdrantClientService9 } from "@shenghuabi/knowledge/qdrant";
1968
+ import { inject as inject9 } from "static-injector";
1969
+ import { differenceBy, uniqBy as uniqBy2 } from "lodash-es";
1970
+
1971
+ // packages/knowledge/graph/util/graph-util.ts
1972
+ function getNodeStrList(item, index) {
1973
+ return [
1974
+ index + 1,
1975
+ item.name,
1976
+ item.type,
1977
+ item.list.map((item2) => item2.description).join(";"),
1978
+ (item.degree * 100).toFixed(0)
1979
+ ];
1980
+ }
1981
+ function getEdgeStrList(item, i) {
1982
+ return [
1983
+ i + 1,
1984
+ item.source,
1985
+ item.target,
1986
+ item.list.map((item2) => item2.description).join(";"),
1987
+ (item.degree * 100).toFixed(0)
1988
+ ];
1989
+ }
1990
+
1991
+ // packages/knowledge/graph/util/format-attr.ts
1992
+ import * as v13 from "valibot";
1993
+ import { countBy, maxBy } from "lodash-es";
1994
+ function getNodeType(list) {
1995
+ const data = countBy(list, (item) => item.type);
1996
+ delete data["未知"];
1997
+ return maxBy(Object.entries(data), (a) => a[1])?.[0] ?? "未知";
1998
+ }
1999
+ function formatNodeAttr2(list) {
2000
+ let allCount = 0;
2001
+ const attrList = list.map(({ data: data2, score: score2 }) => {
2002
+ allCount += score2;
2003
+ return v13.parse(NodePayloadDefine, data2);
2004
+ });
2005
+ const score = allCount / list.length;
2006
+ const data = countBy(attrList, (item) => item.type);
2007
+ delete data["未知"];
2008
+ const type = getNodeType(attrList);
2009
+ attrList.forEach((item) => {
2010
+ item.type = type;
2011
+ });
2012
+ return {
2013
+ list: attrList,
2014
+ name: attrList[0].name,
2015
+ type: getNodeType(attrList),
2016
+ degree: score
2017
+ };
2018
+ }
2019
+ function formatEdgeAttr2(list) {
2020
+ let allCount = 0;
2021
+ const attrList = list.map(({ data, score: score2 }) => {
2022
+ allCount += score2;
2023
+ return v13.parse(EdgePayloadDefine, data);
2024
+ });
2025
+ const score = allCount / list.length;
2026
+ return {
2027
+ list: attrList,
2028
+ name: attrList[0].name,
2029
+ source: attrList[0].source,
2030
+ target: attrList[0].target,
2031
+ degree: score
2032
+ };
2033
+ }
2034
+
2035
+ // packages/knowledge/graph/define/query.ts
2036
+ import * as v14 from "valibot";
2037
+ var GraphRelationQueryDefine = v14.pipe(
2038
+ v14.object({
2039
+ node: v14.optional(v14.string()),
2040
+ edge: v14.optional(v14.string())
2041
+ }),
2042
+ v14.forward(
2043
+ v14.partialCheck(
2044
+ [["node"], ["edge"]],
2045
+ (input) => typeof input.node === "string" || typeof input.edge === "string",
2046
+ "节点或边必须存在一个"
2047
+ ),
2048
+ ["node"]
2049
+ )
2050
+ );
2051
+
2052
+ // packages/knowledge/graph/graph.query.service.ts
2053
+ import * as v15 from "valibot";
2054
+ var GraphQueryService = class {
2055
+ #qdClient = inject9(QdrantClientService9);
2056
+ #config = inject9(ConfigToken);
2057
+ #graphChange = inject9(GraphLocalService);
2058
+ #queryParams = inject9(QueryParamsToken);
2059
+ #text2vec = inject9(Text2VecToken);
2060
+ #reranker = inject9(ReRankerToken);
2061
+ /** 返回限制索引 */
2062
+ #listLimit(list, strListFn, limit) {
2063
+ let count = 0;
2064
+ const strList = list.map(strListFn);
2065
+ for (let i = 0; i < strList.length; i++) {
2066
+ const str = strList[i];
2067
+ count += str.length;
2068
+ if (count > limit) {
2069
+ return list.slice(0, i);
2070
+ }
2071
+ }
2072
+ return list;
2073
+ }
2074
+ #contextLimit(context) {
2075
+ return {
2076
+ nodes: this.#listLimit(
2077
+ context.nodes,
2078
+ (item, i) => getNodeStrList(item, i).join("|"),
2079
+ this.#queryParams.lengthLimit.node
2080
+ ),
2081
+ edges: this.#listLimit(
2082
+ context.edges,
2083
+ (item, i) => getEdgeStrList(item, i).join("|"),
2084
+ this.#queryParams.lengthLimit.edge
2085
+ ),
2086
+ chunks: this.#listLimit(
2087
+ context.chunks,
2088
+ (list) => list.chunk,
2089
+ this.#queryParams.lengthLimit.chunk
2090
+ )
2091
+ };
2092
+ }
2093
+ async #queryGraphCollection(content, kind) {
2094
+ return this.#qdClient.search(this.#config().activateGraphName, {
2095
+ limit: this.#queryParams.topK * this.#reranker.getQueryRatio(),
2096
+ vector: {
2097
+ name: "chunk",
2098
+ vector: await this.#text2vec(
2099
+ content,
2100
+ this.#config().activateCollection
2101
+ )
2102
+ },
2103
+ filter: {
2104
+ must: {
2105
+ key: "kind",
2106
+ match: {
2107
+ value: kind
2108
+ }
2109
+ }
2110
+ },
2111
+ with_payload: true
2112
+ // with_lookup: true,
2113
+ }).then(
2114
+ (item) => item.map((item2) => ({ ...item2.payload, id: item2.id }))
2115
+ );
2116
+ }
2117
+ async #findChunkDataById(ids) {
2118
+ return this.#qdClient.retrieve(this.#config().activateName, { ids, with_payload: true }).then(
2119
+ (item) => item.map(
2120
+ (item2) => ({
2121
+ ...item2.payload,
2122
+ knowledge: this.#config().name
2123
+ })
2124
+ )
2125
+ );
2126
+ }
2127
+ #getEdgeByNode(nodes) {
2128
+ const list = this.#graphChange.getEdgeByNode(nodes);
2129
+ return this.#qdClient.retrieve(this.#config().activateGraphName, {
2130
+ ids: list.map((item) => item.id)
2131
+ }).then(
2132
+ (list2) => list2.map(
2133
+ (item) => ({ ...item.payload, id: item.id })
2134
+ )
2135
+ );
2136
+ }
2137
+ #getNodeByEdge(edges) {
2138
+ const list = this.#graphChange.getNodeByEdge(edges);
2139
+ return this.#qdClient.retrieve(this.#config().activateGraphName, {
2140
+ ids: list.map((item) => item.id)
2141
+ }).then(
2142
+ (list2) => list2.map(
2143
+ (item) => ({ ...item.payload, id: item.id })
2144
+ )
2145
+ );
2146
+ }
2147
+ async #queryLocal(keywords) {
2148
+ const nodeGroupResult = await this.#queryGraphCollection(
2149
+ keywords,
2150
+ "node"
2151
+ );
2152
+ const chunkResult = await this.#findChunkDataById(
2153
+ nodeGroupResult.map((item) => item.chunkId)
2154
+ );
2155
+ return {
2156
+ nodes: nodeGroupResult,
2157
+ edges: await this.#getEdgeByNode(nodeGroupResult),
2158
+ chunks: chunkResult
2159
+ };
2160
+ }
2161
+ async #queryGlobal(keywords) {
2162
+ const edgeGroupResult = await this.#queryGraphCollection(
2163
+ keywords,
2164
+ "edge"
2165
+ );
2166
+ const chunkResult = await this.#findChunkDataById(
2167
+ edgeGroupResult.map((item) => item["chunkId"])
2168
+ );
2169
+ return {
2170
+ nodes: await this.#getNodeByEdge(edgeGroupResult),
2171
+ edges: edgeGroupResult,
2172
+ chunks: chunkResult
2173
+ };
2174
+ }
2175
+ async query(params, question) {
2176
+ params = v15.parse(GraphRelationQueryDefine, params);
2177
+ await this.#graphChange.loadDataInitGraph$$();
2178
+ let context;
2179
+ if (params.node && params.edge) {
2180
+ const context1 = await this.#queryLocal(params.node);
2181
+ const context2 = await this.#queryGlobal(params.edge);
2182
+ const ctx2NodeExtra = differenceBy(
2183
+ context2.nodes,
2184
+ context1.nodes,
2185
+ (item) => item.id
2186
+ );
2187
+ const ctx1EdgeExtra = differenceBy(
2188
+ context1.edges,
2189
+ context2.edges,
2190
+ (item) => item.id
2191
+ );
2192
+ context = {
2193
+ nodes: [...context1.nodes, ...ctx2NodeExtra],
2194
+ edges: [...context2.edges, ...ctx1EdgeExtra],
2195
+ chunks: uniqBy2(
2196
+ [...context1.chunks, ...context2.chunks],
2197
+ (a) => a.chunk
2198
+ )
2199
+ };
2200
+ } else if (params.node) {
2201
+ context = await this.#queryLocal(params.node);
2202
+ } else if (params.edge) {
2203
+ context = await this.#queryGlobal(params.edge);
2204
+ } else {
2205
+ throw new Error("");
2206
+ }
2207
+ const nodeSortedList = await this.#reranker.run({
2208
+ value: question,
2209
+ docs: context.nodes.map((item) => item.embeddingChunk)
2210
+ });
2211
+ const nodeSorted = nodeSortedList.slice(0, Math.ceil(nodeSortedList.length * 0.3)).reduce(
2212
+ (obj, item) => {
2213
+ const data = context.nodes[item.index];
2214
+ obj[data.name] ??= [];
2215
+ obj[data.name].push({ data, score: item.score });
2216
+ return obj;
2217
+ },
2218
+ {}
2219
+ );
2220
+ const edgeSortList = await this.#reranker.run({
2221
+ value: question,
2222
+ docs: context.edges.map((item) => item.embeddingChunk)
2223
+ });
2224
+ const edgeSorted = edgeSortList.slice(0, Math.ceil(nodeSortedList.length * 0.3)).reduce(
2225
+ (obj, item) => {
2226
+ const data = context.edges[item.index];
2227
+ obj[data.name] ??= [];
2228
+ obj[data.name].push({ data, score: item.score });
2229
+ return obj;
2230
+ },
2231
+ {}
2232
+ );
2233
+ return this.#contextLimit({
2234
+ nodes: Object.values(nodeSorted).reduce((allList, list) => {
2235
+ let length = 0;
2236
+ const index = list.findIndex((value, index2) => {
2237
+ length += value.data.description.length;
2238
+ return length > this.#queryParams.lengthLimit.nodeDescription;
2239
+ });
2240
+ allList.push(
2241
+ formatNodeAttr2(list.slice(0, index == -1 ? list.length : index))
2242
+ );
2243
+ return allList;
2244
+ }, []).sort((a, b) => b.degree - a.degree),
2245
+ edges: Object.values(edgeSorted).reduce((allList, list) => {
2246
+ let length = 0;
2247
+ const index = list.findIndex((value) => {
2248
+ length += value.data.description.length;
2249
+ return length > this.#queryParams.lengthLimit.nodeDescription;
2250
+ });
2251
+ allList.push(
2252
+ formatEdgeAttr2(list.slice(0, index == -1 ? list.length : index))
2253
+ );
2254
+ return allList;
2255
+ }, []).sort((a, b) => b.degree - a.degree),
2256
+ chunks: (await this.#reranker.run({
2257
+ value: question,
2258
+ docs: context.chunks.map((item) => item.embeddingChunk)
2259
+ })).map((item) => context.chunks[item.index])
2260
+ });
2261
+ }
2262
+ async searchNode(str, selectedList) {
2263
+ await this.#graphChange.loadDataInitGraph$$();
2264
+ const limit = 20;
2265
+ const list = [];
2266
+ for (const nodeName of this.#graphChange.getGraph().nodes()) {
2267
+ if (!selectedList.includes(nodeName) && nodeName.includes(str)) {
2268
+ list.push(nodeName);
2269
+ if (limit === list.length) {
2270
+ return list;
2271
+ }
2272
+ }
2273
+ }
2274
+ return list;
2275
+ }
2276
+ async getFileNameList() {
2277
+ const result = await this.#qdClient.queryGroups(
2278
+ this.#config().activateName,
2279
+ {
2280
+ limit: 9999,
2281
+ group_by: "fileName",
2282
+ group_size: 1,
2283
+ with_payload: []
2284
+ }
2285
+ );
2286
+ return result.groups;
2287
+ }
2288
+ async getChunkContent(fileName) {
2289
+ const { points } = await this.#qdClient.query(this.#config().activateName, {
2290
+ limit: 9999,
2291
+ filter: {
2292
+ must: {
2293
+ key: "fileName",
2294
+ match: { value: fileName }
2295
+ }
2296
+ },
2297
+ with_payload: true
2298
+ });
2299
+ return points.map((item) => ({
2300
+ ...item,
2301
+ payload: {
2302
+ ...item.payload,
2303
+ knowledge: this.#config().name
2304
+ }
2305
+ }));
2306
+ }
2307
+ async getGraphData() {
2308
+ await this.#graphChange.loadDataInitGraph$$();
2309
+ return this.#graphChange.getGraph().export();
2310
+ }
2311
+ async queryNode(list, options) {
2312
+ return this.#qdClient.scroll(this.#config().activateGraphName, {
2313
+ limit: options.nodeSizeLimit,
2314
+ filter: {
2315
+ must: [
2316
+ {
2317
+ key: "kind",
2318
+ match: {
2319
+ value: "node"
2320
+ }
2321
+ },
2322
+ {
2323
+ key: "name",
2324
+ match: {
2325
+ any: list
2326
+ }
2327
+ }
2328
+ ]
2329
+ },
2330
+ with_payload: true
2331
+ // with_lookup: true,
2332
+ }).then((item) => item.points);
2333
+ }
2334
+ async fuzzyQueryNode(content, options) {
2335
+ return this.#qdClient.searchPointGroups(this.#config().activateGraphName, {
2336
+ limit: options.nodeLimit,
2337
+ score_threshold: options.score,
2338
+ vector: {
2339
+ name: "chunk",
2340
+ vector: await this.#text2vec(
2341
+ content,
2342
+ this.#config().activateCollection
2343
+ )
2344
+ },
2345
+ group_by: "name",
2346
+ group_size: options.nodeSizeLimit,
2347
+ filter: {
2348
+ must: {
2349
+ key: "kind",
2350
+ match: {
2351
+ value: "node"
2352
+ }
2353
+ }
2354
+ },
2355
+ with_payload: true
2356
+ // with_lookup: true,
2357
+ }).then((item) => item.groups);
2358
+ }
2359
+ };
2360
+
2361
+ // packages/knowledge/graph/graph.service.ts
2362
+ var GraphService = class {
2363
+ local = inject10(GraphLocalService);
2364
+ #knowledge = inject10(GraphHandleService);
2365
+ knowledge = inject10(GraphKnolwdgeService);
2366
+ #injector = inject10(Injector3);
2367
+ graphExport$$ = this.local.graphExport$$;
2368
+ createQuery(queryParams) {
2369
+ return createInjector({
2370
+ providers: [
2371
+ GraphQueryService,
2372
+ { provide: QueryParamsToken, useValue: queryParams }
2373
+ ],
2374
+ parent: this.#injector
2375
+ }).get(GraphQueryService);
2376
+ }
2377
+ loadDataInit$$ = this.local.loadDataInitGraph$$;
2378
+ initGraph() {
2379
+ return this.local.initGraph();
2380
+ }
2381
+ getGraph() {
2382
+ return this.local.getGraph();
2383
+ }
2384
+ async splitNode(options) {
2385
+ await this.local.splitNode(options);
2386
+ await this.#knowledge.splitNode(options);
2387
+ }
2388
+ async mergeNode(options) {
2389
+ await this.local.mergeNode(options);
2390
+ await this.#knowledge.mergeNode(options);
2391
+ }
2392
+ async changeNodeDescription(item) {
2393
+ await this.local.changeNodeDescription(item);
2394
+ await this.#knowledge.changeNodeDescription(item);
2395
+ }
2396
+ async changeEdge(item, oldItem) {
2397
+ await this.local.changeEdge(item, oldItem);
2398
+ await this.#knowledge.changeEdge(item);
2399
+ }
2400
+ async add(input) {
2401
+ await this.local.add(input);
2402
+ await this.#knowledge.addNodeItem(input);
2403
+ }
2404
+ async deleteNodeItem(item) {
2405
+ await this.local.deleteNodeItem(item);
2406
+ await this.#knowledge.deleteNodeItem(item);
2407
+ }
2408
+ async deleteEdge(item) {
2409
+ await this.local.deleteEdgeItem(item);
2410
+ await this.#knowledge.deleteEdge(item);
2411
+ }
2412
+ async deleteNodeByName(name) {
2413
+ await this.local.deleteNode(name);
2414
+ await this.#knowledge.deleteNodeByName(name);
2415
+ }
2416
+ };
2417
+
2418
+ // packages/knowledge/knowledge.manager.service.ts
2419
+ import * as fs from "fs/promises";
2420
+ import { fileTypeFromBuffer } from "file-type";
2421
+ import path3 from "path";
2422
+
2423
+ // packages/knowledge/article/article.knowledge.service.ts
2424
+ import { getHash as getHash2 } from "@shenghuabi/knowledge/util";
2425
+ import { createNormalizeVfs as createNormalizeVfs3, path as path2 } from "@cyia/vfs2";
2426
+ import { computed as computed4, inject as inject11 } from "static-injector";
2427
+ import { QdrantClientService as QdrantClientService10 } from "@shenghuabi/knowledge/qdrant";
2428
+ import { promise as fastq5 } from "fastq";
2429
+
2430
+ // packages/knowledge/article/define/payload.ts
2431
+ import * as v16 from "valibot";
2432
+ var ArticlePayload = v16.object({
2433
+ fileHash: v16.string(),
2434
+ fullName: v16.string(),
2435
+ name: v16.string(),
2436
+ dir: v16.string(),
2437
+ chunk: v16.string(),
2438
+ hash: v16.string(),
2439
+ loc: v16.custom(Boolean)
2440
+ });
2441
+
2442
+ // packages/knowledge/article/article.knowledge.service.ts
2443
+ import * as v17 from "valibot";
2444
+ var ArticleKnowledgeService = class extends NormalKnowledgeService {
2445
+ #text2vec = inject11(Text2VecToken);
2446
+ #reranker = inject11(ReRankerToken);
2447
+ #config = inject11(ConfigToken);
2448
+ #dir = inject11(DirToken);
2449
+ #qdClient = inject11(QdrantClientService10);
2450
+ #util = inject11(KnowledgeUtilService);
2451
+ #vfs = computed4(() => createNormalizeVfs3({ dir: this.#dir() }));
2452
+ KeyWordIndex = ["fullName", "dir", "fileHash"];
2453
+ getPayload(fileName, content) {
2454
+ return {
2455
+ fileHash: getHash2(content),
2456
+ fullName: fileName,
2457
+ name: path2.basename(fileName),
2458
+ dir: path2.dirname(fileName)
2459
+ };
2460
+ }
2461
+ async insertItem(fileName, content) {
2462
+ const { points } = await this.#qdClient.scroll(
2463
+ this.#config().activateName,
2464
+ {
2465
+ limit: 1,
2466
+ filter: {
2467
+ should: [
2468
+ { key: "fileHash", match: { value: getHash2(content) } },
2469
+ { key: "fullName", match: { value: fileName } }
2470
+ ]
2471
+ }
2472
+ }
2473
+ );
2474
+ if (points.length) {
2475
+ return;
2476
+ }
2477
+ await this.insertItemOnly(fileName, content, this.#config().collectionList);
2478
+ }
2479
+ async deleteItem(fileName) {
2480
+ await this.#util.multiDelete(
2481
+ this.#config().collectionList.map((item) => item.collectionName),
2482
+ {
2483
+ filter: {
2484
+ must: [{ key: "fullName", match: { value: fileName } }],
2485
+ should: null
2486
+ }
2487
+ }
2488
+ );
2489
+ }
2490
+ async addCollection(collection) {
2491
+ await this.createCollection(collection);
2492
+ try {
2493
+ const queue = fastq5(async (payload) => {
2494
+ payload = v17.parse(ArticlePayload, payload);
2495
+ const content = await this.#vfs().readContent(payload.fullName);
2496
+ if (!content) {
2497
+ return;
2498
+ }
2499
+ await this.insertItemOnly(payload.fullName, content, [collection]);
2500
+ }, 10);
2501
+ let offset;
2502
+ const wordSet = /* @__PURE__ */ new Set();
2503
+ let queueError;
2504
+ queue.error((error) => {
2505
+ if (error) {
2506
+ queueError = error;
2507
+ queue.killAndDrain();
2508
+ }
2509
+ });
2510
+ do {
2511
+ const { points, next_page_offset } = await this.#qdClient.scroll(
2512
+ this.#config().activateName,
2513
+ {
2514
+ limit: 5e3,
2515
+ with_payload: true,
2516
+ offset
2517
+ }
2518
+ );
2519
+ for (const point of points) {
2520
+ const id = `${point.payload["fileHash"]}`;
2521
+ if (wordSet.has(id)) {
2522
+ continue;
2523
+ }
2524
+ wordSet.add(id);
2525
+ queue.push(point.payload);
2526
+ }
2527
+ offset = next_page_offset;
2528
+ } while (offset);
2529
+ await queue.drained();
2530
+ if (queueError) {
2531
+ throw queueError;
2532
+ }
2533
+ } catch (error) {
2534
+ await this.#qdClient.deleteCollection(collection.collectionName);
2535
+ throw error;
2536
+ }
2537
+ await this.#qdClient.setActivateCollection(
2538
+ collection.collectionName,
2539
+ this.#config().activateName
2540
+ );
2541
+ }
2542
+ destroy() {
2543
+ return this.#util.destroyKnowledge(
2544
+ this.#config().collectionList.map(({ collectionName }) => collectionName)
2545
+ );
2546
+ }
2547
+ async searchGroupByChunk(text, options, filter) {
2548
+ const queryResult = await this.#qdClient.searchPointGroups(
2549
+ this.#config().activateName,
2550
+ {
2551
+ group_by: "hash",
2552
+ filter,
2553
+ with_payload: true,
2554
+ with_vector: false,
2555
+ score_threshold: options?.score,
2556
+ vector: {
2557
+ name: "chunk",
2558
+ vector: await this.#text2vec(text, this.#config().activateCollection)
2559
+ },
2560
+ group_size: options.group_size * this.#reranker.getQueryRatio(),
2561
+ limit: options.limit
2562
+ }
2563
+ );
2564
+ return Promise.all(
2565
+ queryResult.groups.map(async (item) => {
2566
+ const resultList = await this.#reranker.run({
2567
+ value: text,
2568
+ docs: item.hits.map(
2569
+ (item2) => item2.payload?.["embeddingChunk"]
2570
+ )
2571
+ });
2572
+ return {
2573
+ ...item,
2574
+ hits: resultList.slice(0, options.group_size).map(({ index }) => item.hits[index])
2575
+ };
2576
+ })
2577
+ );
2578
+ }
2579
+ };
2580
+
2581
+ // packages/knowledge/article/define/config.ts
2582
+ import * as v18 from "valibot";
2583
+ var ArticleCollectionDefine = v18.object({
2584
+ collectionName: v18.string(),
2585
+ embeddingTemplate: v18.optional(
2586
+ v18.object({
2587
+ entry: v18.optional(EmbeddingTemplateDefine)
2588
+ })
2589
+ ),
2590
+ size: v18.number()
2591
+ });
2592
+ var ArticleKnowledgeConfigDefine = v18.pipe(
2593
+ v18.object({
2594
+ ...BaseKnowledgeConfig.entries,
2595
+ type: v18.optional(v18.literal("article"), "article"),
2596
+ collectionList: v18.array(ArticleCollectionDefine)
2597
+ }),
2598
+ v18.transform((item) => ({
2599
+ ...item,
2600
+ /** 激活的普通知识库(文件切片) */
2601
+ activateName: getActivateCollectionName(item.name)
2602
+ }))
2603
+ );
2604
+
2605
+ // packages/knowledge/knowledge.manager.service.ts
2606
+ import { QdrantClientService as QdrantClientService11 } from "@shenghuabi/knowledge/qdrant";
2607
+ import { LogToken as LogToken6 } from "@shenghuabi/knowledge/util";
2608
+ var KnowledgeManagerService = class extends RootStaticInjectOptions2 {
2609
+ #injector = inject12(Injector4);
2610
+ #fileParser = inject12(FileParserService);
2611
+ #cacheMap = /* @__PURE__ */ new Map();
2612
+ #qdClient = inject12(QdrantClientService11);
2613
+ async getConfig(name) {
2614
+ throw new Error("未实现");
2615
+ }
2616
+ async getInjector(name, extraProviders) {
2617
+ let injector = this.#cacheMap.get(name);
2618
+ if (!injector) {
2619
+ const config = await this.getConfig(name);
2620
+ const CommonProviders = [
2621
+ { provide: ConfigToken, useValue: config },
2622
+ ...extraProviders ?? []
2623
+ ];
2624
+ switch (config().type) {
2625
+ case "normal":
2626
+ injector = createInjector2({
2627
+ providers: [NormalKnowledgeService, ...CommonProviders],
2628
+ parent: this.#injector
2629
+ });
2630
+ break;
2631
+ case "article":
2632
+ injector = createInjector2({
2633
+ providers: [ArticleKnowledgeService, ...CommonProviders],
2634
+ parent: this.#injector
2635
+ });
2636
+ break;
2637
+ case "dict":
2638
+ injector = createInjector2({
2639
+ providers: [DictKnowledgeService, ...CommonProviders],
2640
+ parent: this.#injector
2641
+ });
2642
+ break;
2643
+ case "normal-graph":
2644
+ injector = createInjector2({
2645
+ providers: [
2646
+ GraphKnolwdgeService,
2647
+ GraphLocalService,
2648
+ GraphHandleService,
2649
+ GraphService,
2650
+ GraphKnowledgeUtilService,
2651
+ ...CommonProviders
2652
+ ],
2653
+ parent: this.#injector
2654
+ });
2655
+ break;
2656
+ default:
2657
+ throw "";
2658
+ }
2659
+ this.#cacheMap.set(name, injector);
2660
+ }
2661
+ return injector;
2662
+ }
2663
+ async #get(name) {
2664
+ const injector = await this.getInjector(name);
2665
+ const config = injector.get(ConfigToken);
2666
+ switch (config().type) {
2667
+ case "normal":
2668
+ return injector.get(NormalKnowledgeService);
2669
+ case "dict":
2670
+ return injector.get(DictKnowledgeService);
2671
+ case "normal-graph":
2672
+ return injector.get(GraphKnolwdgeService);
2673
+ case "article":
2674
+ return injector.get(ArticleKnowledgeService);
2675
+ default:
2676
+ throw new Error("");
2677
+ }
2678
+ }
2679
+ async create(name, collection) {
2680
+ const instance = await this.#get(name);
2681
+ await instance.create(instance.formatCollection(collection));
2682
+ }
2683
+ /** 普通知识库和图谱知识库用 */
2684
+ async importFiles(name, filePathList, signal2) {
2685
+ const injector = await this.getInjector(name);
2686
+ const logService = injector.get(LogToken6);
2687
+ const instance = await this.#get(name);
2688
+ for (const filePath of filePathList) {
2689
+ if (signal2?.aborted) {
2690
+ return;
2691
+ }
2692
+ const content = await fs.readFile(filePath);
2693
+ const list = await this.#fileParser.parse(filePath, content);
2694
+ for (const item of list) {
2695
+ if (signal2?.aborted) {
2696
+ return;
2697
+ }
2698
+ logService.info(`正在导入 ${filePath}/${item.title}`);
2699
+ const content2 = item.content.trim();
2700
+ if (content2) {
2701
+ await instance.insertItem(item.title, content2, signal2);
2702
+ } else {
2703
+ logService.warn(`内容为空 ${filePath}/${item.title}`);
2704
+ }
2705
+ }
2706
+ }
2707
+ }
2708
+ /** 字典专用 */
2709
+ async importDict(name, input) {
2710
+ const instance = await this.#get(name);
2711
+ return await instance.importDict(input);
2712
+ }
2713
+ async get(name) {
2714
+ return await this.#get(name);
2715
+ }
2716
+ async importTextFile(name, dir, filePathList, signal2) {
2717
+ const injector = await this.getInjector(name);
2718
+ const logService = injector.get(LogToken6);
2719
+ const instance = await this.#get(name);
2720
+ for (const filePath of filePathList) {
2721
+ if (signal2?.aborted) {
2722
+ return;
2723
+ }
2724
+ logService.info(`正在导入 ${filePath}`);
2725
+ const buffer = await fs.readFile(path3.join(dir, filePath));
2726
+ const type = await fileTypeFromBuffer(buffer);
2727
+ if (type) {
2728
+ continue;
2729
+ }
2730
+ let content;
2731
+ try {
2732
+ content = buffer.toString();
2733
+ } catch (error) {
2734
+ continue;
2735
+ }
2736
+ await instance.insertItem(filePath, content);
2737
+ }
2738
+ }
2739
+ async deleteItem(name, fileName) {
2740
+ const instance = await this.#get(name);
2741
+ await instance.deleteItem(fileName);
2742
+ }
2743
+ async updateItem(name, fileName, content) {
2744
+ const instance = await this.#get(name);
2745
+ await instance.updateItem(fileName, content);
2746
+ }
2747
+ /** 通用接口 */
2748
+ async addCollection(name, collection) {
2749
+ const instance = await this.#get(name);
2750
+ await instance.addCollection(instance.formatCollection(collection));
2751
+ }
2752
+ /** 通用接口 */
2753
+ async deleteCollection(name, collectionName) {
2754
+ const instance = await this.#get(name);
2755
+ return await instance.deleteCollection(collectionName);
2756
+ }
2757
+ async changeActivateCollection(name, collectionName) {
2758
+ const instance = await this.#get(name);
2759
+ await instance.changeActivateCollection(collectionName);
2760
+ }
2761
+ async destroy(name) {
2762
+ const instance = await this.#get(name);
2763
+ this.#cacheMap.delete(name);
2764
+ await instance.destroy();
2765
+ }
2766
+ async export(name) {
2767
+ const instance = await this.#get(name);
2768
+ return await instance.export();
2769
+ }
2770
+ async import(name, options) {
2771
+ const logService = (await this.getInjector(name)).get(LogToken6);
2772
+ for (const item of options.snapshotList) {
2773
+ logService.info(`正在导入 ${item.collection}`);
2774
+ await this.#qdClient.recoverSnapshot(item.collection, {
2775
+ location: item.filePath,
2776
+ priority: "no_sync",
2777
+ checksum: item.checksum
2778
+ });
2779
+ }
2780
+ await this.#qdClient.setActivateCollection(
2781
+ options.activateCollection,
2782
+ getActivateCollectionName(name)
2783
+ );
2784
+ if (options.type === "normal-graph") {
2785
+ await this.#qdClient.setActivateCollection(
2786
+ getGraphCollectionName(options.activateCollection),
2787
+ getActivateCollectionName(getGraphCollectionName(name))
2788
+ );
2789
+ }
2790
+ }
2791
+ async getGraph(name) {
2792
+ const injector = await this.getInjector(name);
2793
+ return injector.get(GraphService);
2794
+ }
2795
+ };
2796
+
2797
+ // packages/knowledge/common/define/chunk.ts
2798
+ import * as v19 from "valibot";
2799
+ var FileChunkPayloadDefine = v19.object({
2800
+ chunk: v19.string(),
2801
+ fileName: v19.string(),
2802
+ loc: v19.custom(Boolean),
2803
+ hash: v19.string(),
2804
+ embeddingChunk: v19.string()
2805
+ });
2806
+ export {
2807
+ ArticleCollectionDefine,
2808
+ ArticleKnowledgeConfigDefine,
2809
+ ArticleKnowledgeService,
2810
+ ArticlePayload,
2811
+ BaseKnowledgeConfig,
2812
+ CHAT_INPUT,
2813
+ ConfigToken,
2814
+ ContentParserToken,
2815
+ DICT_PREFIX,
2816
+ DictCollectionDefine,
2817
+ DictKnowledgeConfigDefine,
2818
+ DictKnowledgeService,
2819
+ DirToken,
2820
+ ENTITY_DEFINE,
2821
+ ENTITY_RELATION_DEFINE,
2822
+ EdgeItemDefine,
2823
+ EdgeItemNewDefine,
2824
+ EdgePayloadDefine,
2825
+ EdgePayloadNewDefine,
2826
+ EmbeddingTemplateDefine,
2827
+ EntityExtraDefine,
2828
+ FileChunkPayloadDefine,
2829
+ GetConfigToken,
2830
+ GraphCollectionDefine,
2831
+ GraphHandleService,
2832
+ GraphKnolwdgeService,
2833
+ GraphKnowledgeConfigDefine,
2834
+ GraphLocalService,
2835
+ GraphQueryService,
2836
+ GraphService,
2837
+ KeywordPayloadNewDefine,
2838
+ KnowledgeGraphCreateDefine,
2839
+ KnowledgeManagerService,
2840
+ NodeItemDefine,
2841
+ NodeItemNewDefine,
2842
+ NodePayloadDefine,
2843
+ NodePayloadNewDefine,
2844
+ NormalCollectionDefine,
2845
+ NormalKnowledgeConfigDefine,
2846
+ NormalKnowledgeService,
2847
+ OCRToken,
2848
+ QueryParamsToken,
2849
+ RagChatToken,
2850
+ ReRankerToken,
2851
+ Text2VecToken,
2852
+ TextSplitterToken,
2853
+ getActivateCollectionName,
2854
+ getGraphCollectionName
2855
+ };
2856
+ //# sourceMappingURL=knowledge.mjs.map