@shenghuabi/knowledge 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/embedding/embedding.service.d.ts +4 -0
- package/embedding/index.d.ts +2 -0
- package/embedding/type.d.ts +12 -0
- package/embedding.mjs +61 -0
- package/embedding.mjs.map +7 -0
- package/file-parser/const.d.ts +20 -0
- package/file-parser/dict/dict-format/dsl/dsl-parse.service.d.ts +6 -0
- package/file-parser/dict/dict-format/dsl/dsl.format.d.ts +1 -0
- package/file-parser/dict/dict-format/mdict-parse.service.d.ts +20 -0
- package/file-parser/dict/dict-format/stardict-parse.service.d.ts +12 -0
- package/file-parser/dict/dict-format/yaml-parse.service.d.ts +25 -0
- package/file-parser/dict/dict.service.d.ts +10 -0
- package/file-parser/dict/index.d.ts +2 -0
- package/file-parser/dict/type.d.ts +24 -0
- package/file-parser/document-file-parser.service.d.ts +15 -0
- package/file-parser/document-loader/pdf-img.loader.d.ts +8 -0
- package/file-parser/document-loader/xlsx.loader.d.ts +6 -0
- package/file-parser/file-parser.service.d.ts +13 -0
- package/file-parser/index.d.ts +6 -0
- package/file-parser/text-analyse.d.ts +1 -0
- package/file-parser/text-parser.d.ts +3 -0
- package/file-parser/vl-parser/markdown.parser.d.ts +8 -0
- package/file-parser.mjs +850 -0
- package/file-parser.mjs.map +7 -0
- package/image/convert.d.ts +25 -0
- package/image/extract.d.ts +2 -0
- package/image/image-metadata.d.ts +2 -0
- package/image/index.d.ts +3 -0
- package/image.mjs +134 -0
- package/image.mjs.map +7 -0
- package/knowledge/article/article.knowledge.service.d.ts +53 -0
- package/knowledge/article/define/config.d.ts +60 -0
- package/knowledge/article/define/index.d.ts +2 -0
- package/knowledge/article/define/payload.d.ts +16 -0
- package/knowledge/article/index.d.ts +2 -0
- package/knowledge/common/common.knowledge.service.d.ts +240 -0
- package/knowledge/common/define/base.d.ts +7 -0
- package/knowledge/common/define/chunk.d.ts +14 -0
- package/knowledge/common/define/embedding.d.ts +5 -0
- package/knowledge/common/define/index.d.ts +3 -0
- package/knowledge/common/index.d.ts +1 -0
- package/knowledge/common/query.d.ts +7 -0
- package/knowledge/const.d.ts +95 -0
- package/knowledge/define/index.d.ts +245 -0
- package/knowledge/dict/define/config.d.ts +68 -0
- package/knowledge/dict/define/index.d.ts +1 -0
- package/knowledge/dict/dict.knowledge.service.d.ts +67 -0
- package/knowledge/graph/const.d.ts +20 -0
- package/knowledge/graph/define/config.d.ts +169 -0
- package/knowledge/graph/define/define.d.ts +402 -0
- package/knowledge/graph/define/index.d.ts +2 -0
- package/knowledge/graph/define/query.d.ts +14 -0
- package/knowledge/graph/graph.handle.service.d.ts +28 -0
- package/knowledge/graph/graph.knowledge.service.d.ts +40 -0
- package/knowledge/graph/graph.local.service.d.ts +85 -0
- package/knowledge/graph/graph.query.service.d.ts +160 -0
- package/knowledge/graph/graph.service.d.ts +24 -0
- package/knowledge/graph/graph.util.service.d.ts +31 -0
- package/knowledge/graph/type.d.ts +11 -0
- package/knowledge/graph/util/format-attr.d.ts +48 -0
- package/knowledge/graph/util/graph-util.d.ts +5 -0
- package/knowledge/graph/util.d.ts +1 -0
- package/knowledge/graph/vecotr-format.d.ts +11 -0
- package/knowledge/index.d.ts +17 -0
- package/knowledge/knowledge.manager.service.d.ts +42 -0
- package/knowledge/knowledge.util.service.d.ts +21 -0
- package/knowledge/normal/define/config.d.ts +60 -0
- package/knowledge/normal/define/index.d.ts +1 -0
- package/knowledge/normal/normal.knowledge.service.d.ts +49 -0
- package/knowledge/template.format.d.ts +6 -0
- package/knowledge/type.d.ts +28 -0
- package/knowledge.mjs +2856 -0
- package/knowledge.mjs.map +7 -0
- package/ocr/FileUtils.d.ts +4 -0
- package/ocr/ImageRaw.d.ts +11 -0
- package/ocr/index.d.ts +2 -0
- package/ocr/model-config.d.ts +8 -0
- package/ocr/ocr.d.ts +29 -0
- package/ocr.mjs +351 -0
- package/ocr.mjs.map +7 -0
- package/package.json +105 -0
- package/qdrant/index.d.ts +3 -0
- package/qdrant/qdrant-client.service.d.ts +396 -0
- package/qdrant/qdrant-server.service.d.ts +21 -0
- package/qdrant/type.d.ts +18 -0
- package/qdrant/util.d.ts +1 -0
- package/qdrant.mjs +274 -0
- package/qdrant.mjs.map +7 -0
- package/util/batch-queue.d.ts +6 -0
- package/util/cache-queue.d.ts +10 -0
- package/util/clone.d.ts +1 -0
- package/util/embedding-queue.d.ts +3 -0
- package/util/get-hash.d.ts +2 -0
- package/util/html-to-text/index.d.ts +5 -0
- package/util/index.d.ts +10 -0
- package/util/is-truthy.d.ts +1 -0
- package/util/log.service.d.ts +6 -0
- package/util/promise.d.ts +5 -0
- package/util/type.d.ts +1 -0
- package/util/uniq-object-key.d.ts +1 -0
- package/util.mjs +219 -0
- package/util.mjs.map +7 -0
- package/worker/custom-cache.d.ts +28 -0
- package/worker/ocr/index.d.ts +17 -0
- package/worker/ocr.mjs +75 -0
- package/worker/ocr.mjs.map +7 -0
- package/worker/reranker.mjs +180 -0
- package/worker/reranker.mjs.map +7 -0
- package/worker/set-transformers-config.d.ts +19 -0
- package/worker/text2vec/index.d.ts +9 -0
- package/worker/text2vec.mjs +194 -0
- package/worker/text2vec.mjs.map +7 -0
package/knowledge.mjs
ADDED
|
@@ -0,0 +1,2856 @@
|
|
|
1
|
+
// packages/knowledge/knowledge.manager.service.ts
|
|
2
|
+
import {
|
|
3
|
+
createInjector as createInjector2,
|
|
4
|
+
inject as inject12,
|
|
5
|
+
Injector as Injector4,
|
|
6
|
+
RootStaticInjectOptions as RootStaticInjectOptions2
|
|
7
|
+
} from "static-injector";
|
|
8
|
+
|
|
9
|
+
// packages/knowledge/const.ts
|
|
10
|
+
import { InjectionToken } from "static-injector";
|
|
11
|
+
function getGraphCollectionName(name) {
|
|
12
|
+
return `[图谱]${name}-定义`;
|
|
13
|
+
}
|
|
14
|
+
function getActivateCollectionName(name) {
|
|
15
|
+
return `${name}[激活]`;
|
|
16
|
+
}
|
|
17
|
+
var DICT_PREFIX = "[字典]-";
|
|
18
|
+
var Text2VecToken = new InjectionToken("text2vec");
|
|
19
|
+
var TextSplitterToken = new InjectionToken(
|
|
20
|
+
"TextSplitter"
|
|
21
|
+
);
|
|
22
|
+
var ConfigToken = new InjectionToken(
|
|
23
|
+
"config"
|
|
24
|
+
);
|
|
25
|
+
var GetConfigToken = new InjectionToken("getConfig");
|
|
26
|
+
var OCRToken = new InjectionToken("OCR");
|
|
27
|
+
var DirToken = new InjectionToken("dir");
|
|
28
|
+
var ReRankerToken = new InjectionToken("reranker");
|
|
29
|
+
|
|
30
|
+
// packages/knowledge/dict/dict.knowledge.service.ts
|
|
31
|
+
import { computed, inject as inject3 } from "static-injector";
|
|
32
|
+
import { QdrantClientService as QdrantClientService3 } from "@shenghuabi/knowledge/qdrant";
|
|
33
|
+
import { DictService } from "@shenghuabi/knowledge/file-parser";
|
|
34
|
+
import { v4 as v42 } from "uuid";
|
|
35
|
+
import { promise as fastq2 } from "fastq";
|
|
36
|
+
import {
|
|
37
|
+
BatchQueue,
|
|
38
|
+
CacheQueue as CacheQueue2,
|
|
39
|
+
html2Text,
|
|
40
|
+
isTruthy,
|
|
41
|
+
LogToken
|
|
42
|
+
} from "@shenghuabi/knowledge/util";
|
|
43
|
+
|
|
44
|
+
// packages/knowledge/template.format.ts
|
|
45
|
+
import hbs from "handlebars";
|
|
46
|
+
function interpolate(input, value) {
|
|
47
|
+
return hbs.compile(input, { noEscape: true, preventIndent: true })(value, {
|
|
48
|
+
allowProtoPropertiesByDefault: true
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
function entryFormat(payload, knowledge, defaultContent, embedingTemplate) {
|
|
52
|
+
return embedingTemplate?.enable && embedingTemplate?.value ? interpolate(embedingTemplate.value, {
|
|
53
|
+
ENTRY: { ...payload, knowledge }
|
|
54
|
+
}).trim() : defaultContent;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// packages/knowledge/dict/dict.knowledge.service.ts
|
|
58
|
+
import { createNormalizeVfs, path } from "@cyia/vfs2";
|
|
59
|
+
|
|
60
|
+
// packages/knowledge/knowledge.util.service.ts
|
|
61
|
+
import { inject, RootStaticInjectOptions } from "static-injector";
|
|
62
|
+
import { CacheQueue } from "@shenghuabi/knowledge/util";
|
|
63
|
+
import { promise as fastq } from "fastq";
|
|
64
|
+
import { QdrantClientService } from "@shenghuabi/knowledge/qdrant";
|
|
65
|
+
var KnowledgeUtilService = class extends RootStaticInjectOptions {
|
|
66
|
+
#qdClient = inject(QdrantClientService);
|
|
67
|
+
multiDelete(list, filter) {
|
|
68
|
+
return Promise.all(list.map((item) => this.#qdClient.delete(item, filter)));
|
|
69
|
+
}
|
|
70
|
+
updatePointsQueue(collectionName) {
|
|
71
|
+
return new CacheQueue(
|
|
72
|
+
fastq(
|
|
73
|
+
(list) => this.#qdClient.upsert(collectionName, {
|
|
74
|
+
wait: true,
|
|
75
|
+
points: list
|
|
76
|
+
}),
|
|
77
|
+
8
|
|
78
|
+
),
|
|
79
|
+
20
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
async destroyKnowledge(list, vfs) {
|
|
83
|
+
await Promise.all(
|
|
84
|
+
list.map(
|
|
85
|
+
(collectionName) => this.#qdClient.deleteCollection(collectionName)
|
|
86
|
+
)
|
|
87
|
+
);
|
|
88
|
+
if (vfs && await vfs.exists("")) {
|
|
89
|
+
await vfs.rm("", { recursive: true, force: true });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
export(list) {
|
|
93
|
+
return Promise.all(
|
|
94
|
+
list.map(
|
|
95
|
+
(collectionName) => this.#qdClient.createSnapshot(collectionName).then((result) => ({
|
|
96
|
+
...result,
|
|
97
|
+
collection: collectionName
|
|
98
|
+
}))
|
|
99
|
+
)
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
// packages/knowledge/common/define/base.ts
|
|
105
|
+
import * as v from "valibot";
|
|
106
|
+
var BaseKnowledgeConfig = v.object({
|
|
107
|
+
/** 知识库名 */
|
|
108
|
+
name: v.string(),
|
|
109
|
+
activateCollection: v.string()
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// packages/knowledge/normal/define/config.ts
|
|
113
|
+
import * as v3 from "valibot";
|
|
114
|
+
|
|
115
|
+
// packages/knowledge/common/define/embedding.ts
|
|
116
|
+
import * as v2 from "valibot";
|
|
117
|
+
var EmbeddingTemplateDefine = v2.object({
|
|
118
|
+
enable: v2.boolean(),
|
|
119
|
+
value: v2.optional(v2.string())
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
// packages/knowledge/normal/define/config.ts
|
|
123
|
+
var NormalCollectionDefine = v3.object({
|
|
124
|
+
collectionName: v3.string(),
|
|
125
|
+
embeddingTemplate: v3.optional(
|
|
126
|
+
v3.object({
|
|
127
|
+
entry: v3.optional(EmbeddingTemplateDefine)
|
|
128
|
+
})
|
|
129
|
+
),
|
|
130
|
+
size: v3.number()
|
|
131
|
+
});
|
|
132
|
+
var NormalKnowledgeConfigDefine = v3.pipe(
|
|
133
|
+
v3.object({
|
|
134
|
+
...BaseKnowledgeConfig.entries,
|
|
135
|
+
type: v3.optional(v3.literal("normal"), "normal"),
|
|
136
|
+
collectionList: v3.array(NormalCollectionDefine)
|
|
137
|
+
}),
|
|
138
|
+
v3.transform((item) => ({
|
|
139
|
+
...item,
|
|
140
|
+
/** 激活的普通知识库(文件切片) */
|
|
141
|
+
activateName: getActivateCollectionName(item.name)
|
|
142
|
+
}))
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
// packages/knowledge/dict/define/config.ts
|
|
146
|
+
import * as v4 from "valibot";
|
|
147
|
+
var DictCollectionDefine = NormalCollectionDefine;
|
|
148
|
+
var DictKnowledgeConfigDefine = v4.pipe(
|
|
149
|
+
v4.object({
|
|
150
|
+
...BaseKnowledgeConfig.entries,
|
|
151
|
+
type: v4.optional(v4.literal("dict"), "dict"),
|
|
152
|
+
collectionList: v4.array(DictCollectionDefine),
|
|
153
|
+
/** 词条提取 */
|
|
154
|
+
extractorWord: v4.optional(v4.boolean()),
|
|
155
|
+
/** 图像识别 */
|
|
156
|
+
useOcr: v4.optional(v4.boolean())
|
|
157
|
+
}),
|
|
158
|
+
v4.transform((item) => ({
|
|
159
|
+
...item,
|
|
160
|
+
/** 激活的普通知识库(文件切片) */
|
|
161
|
+
activateName: getActivateCollectionName(item.name)
|
|
162
|
+
}))
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
// packages/knowledge/dict/dict.knowledge.service.ts
|
|
166
|
+
import * as v5 from "valibot";
|
|
167
|
+
|
|
168
|
+
// packages/knowledge/common/common.knowledge.service.ts
|
|
169
|
+
import { inject as inject2 } from "static-injector";
|
|
170
|
+
import { QdrantClientService as QdrantClientService2 } from "@shenghuabi/knowledge/qdrant";
|
|
171
|
+
var CommonKnowledgeService = class {
|
|
172
|
+
#qdClient = inject2(QdrantClientService2);
|
|
173
|
+
#config = inject2(
|
|
174
|
+
ConfigToken
|
|
175
|
+
);
|
|
176
|
+
#text2vec = inject2(Text2VecToken);
|
|
177
|
+
#reranker = inject2(ReRankerToken);
|
|
178
|
+
async searchChunk(text, filter, options) {
|
|
179
|
+
const queryResult = await this.#qdClient.search(
|
|
180
|
+
this.#config().activateName,
|
|
181
|
+
{
|
|
182
|
+
limit: options?.limit ? options.limit * this.#reranker.getQueryRatio() : void 0,
|
|
183
|
+
filter,
|
|
184
|
+
with_payload: true,
|
|
185
|
+
with_vector: false,
|
|
186
|
+
score_threshold: options?.score,
|
|
187
|
+
offset: options?.offset,
|
|
188
|
+
vector: {
|
|
189
|
+
name: "chunk",
|
|
190
|
+
vector: await this.#text2vec(text, this.#config().activateCollection)
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
);
|
|
194
|
+
const resultList = await this.#reranker.run({
|
|
195
|
+
value: text,
|
|
196
|
+
docs: queryResult.map(
|
|
197
|
+
(item) => item.payload?.["embeddingChunk"]
|
|
198
|
+
)
|
|
199
|
+
});
|
|
200
|
+
return resultList.slice(0, options?.limit).map(({ index }) => queryResult[index]);
|
|
201
|
+
}
|
|
202
|
+
getCollection() {
|
|
203
|
+
return this.#qdClient.getCollection(this.#config().activateName);
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
// packages/knowledge/dict/dict.knowledge.service.ts
|
|
208
|
+
var DictKnowledgeService = class extends CommonKnowledgeService {
|
|
209
|
+
#text2vec = inject3(Text2VecToken);
|
|
210
|
+
#reranker = inject3(ReRankerToken);
|
|
211
|
+
#textSplitter = inject3(TextSplitterToken);
|
|
212
|
+
#ocr = inject3(OCRToken, { optional: true }) || void 0;
|
|
213
|
+
#config = inject3(ConfigToken);
|
|
214
|
+
#vfs = computed(() => createNormalizeVfs({ dir: this.#dir() }));
|
|
215
|
+
#qdClient = inject3(QdrantClientService3);
|
|
216
|
+
#dict = inject3(DictService);
|
|
217
|
+
#util = inject3(KnowledgeUtilService);
|
|
218
|
+
#dir = inject3(DirToken);
|
|
219
|
+
#log = inject3(LogToken);
|
|
220
|
+
formatCollection(input) {
|
|
221
|
+
return v5.parse(DictCollectionDefine, input);
|
|
222
|
+
}
|
|
223
|
+
async #createCollection(collection) {
|
|
224
|
+
const { exists } = await this.#qdClient.collectionExists(
|
|
225
|
+
collection.collectionName
|
|
226
|
+
);
|
|
227
|
+
if (exists) {
|
|
228
|
+
throw new Error(`集合${collection.collectionName}已存在`);
|
|
229
|
+
}
|
|
230
|
+
await this.#qdClient.createCollection(collection.collectionName, {
|
|
231
|
+
vectors: {
|
|
232
|
+
chunk: {
|
|
233
|
+
size: collection.size,
|
|
234
|
+
distance: "Cosine",
|
|
235
|
+
on_disk: true
|
|
236
|
+
},
|
|
237
|
+
word: {
|
|
238
|
+
size: collection.size,
|
|
239
|
+
distance: "Cosine",
|
|
240
|
+
on_disk: true
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
await this.#qdClient.createPayloadKeywordIndex(
|
|
245
|
+
collection.collectionName,
|
|
246
|
+
"word"
|
|
247
|
+
);
|
|
248
|
+
await this.#qdClient.createPayloadKeywordIndex(
|
|
249
|
+
collection.collectionName,
|
|
250
|
+
"chunk"
|
|
251
|
+
);
|
|
252
|
+
await this.#qdClient.createPayloadKeywordIndex(
|
|
253
|
+
collection.collectionName,
|
|
254
|
+
"content"
|
|
255
|
+
);
|
|
256
|
+
}
|
|
257
|
+
/** 只创建这一次,剩下的都是插入或更新 */
|
|
258
|
+
async create(collection) {
|
|
259
|
+
await this.#createCollection(collection);
|
|
260
|
+
await this.#qdClient.setActivateCollection(
|
|
261
|
+
this.#config().name,
|
|
262
|
+
this.#config().activateName
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
async #getImportQueue(collection, assetHandle) {
|
|
266
|
+
const assetFolder = path.join(this.#dir(), "assets");
|
|
267
|
+
const contentBatchQueue = new BatchQueue(
|
|
268
|
+
(str) => this.#text2vec(str, collection.collectionName)
|
|
269
|
+
);
|
|
270
|
+
let sum = 0;
|
|
271
|
+
const updateCacheQueue = this.#util.updatePointsQueue(collection.collectionName);
|
|
272
|
+
let updateError;
|
|
273
|
+
updateCacheQueue.queue.error((error) => {
|
|
274
|
+
if (error) {
|
|
275
|
+
updateError = error;
|
|
276
|
+
updateCacheQueue.queue.killAndDrain();
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
const importCacheQueue = new CacheQueue2(
|
|
280
|
+
fastq2(async (wordList) => {
|
|
281
|
+
const preMergeList = (await Promise.all(
|
|
282
|
+
wordList.map(async (entryItem) => {
|
|
283
|
+
if (assetHandle) {
|
|
284
|
+
if (this.#config().extractorWord) {
|
|
285
|
+
this.#vfs().write(entryItem.word, entryItem.content);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
const formatedContent = assetHandle ? await this.#formatContent(entryItem, assetFolder, {
|
|
289
|
+
useOcr: this.#config().useOcr,
|
|
290
|
+
refReplace: true
|
|
291
|
+
}) : entryItem.formatedContent || entryItem.content;
|
|
292
|
+
const splitContentList = (await this.#textSplitter(
|
|
293
|
+
formatedContent,
|
|
294
|
+
{
|
|
295
|
+
...entryItem,
|
|
296
|
+
formatedContent
|
|
297
|
+
},
|
|
298
|
+
collection.collectionName
|
|
299
|
+
)).filter((item) => !!item.pageContent.trim());
|
|
300
|
+
if (!splitContentList.length) {
|
|
301
|
+
return void 0;
|
|
302
|
+
}
|
|
303
|
+
return {
|
|
304
|
+
word: entryItem.word,
|
|
305
|
+
contentList: splitContentList.map((item) => {
|
|
306
|
+
const pageContent = item.pageContent.trim();
|
|
307
|
+
const metadata = {
|
|
308
|
+
...item.metadata,
|
|
309
|
+
chunk: pageContent
|
|
310
|
+
};
|
|
311
|
+
const embeddingChunk = entryFormat(
|
|
312
|
+
metadata,
|
|
313
|
+
this.#config().name,
|
|
314
|
+
`${metadata["word"]}
|
|
315
|
+
${pageContent}`,
|
|
316
|
+
collection.embeddingTemplate?.entry
|
|
317
|
+
);
|
|
318
|
+
if (!embeddingChunk) {
|
|
319
|
+
this.#log.warn(
|
|
320
|
+
`内容格式化后内容为空,跳过
|
|
321
|
+
${JSON.stringify({ payload: metadata, knowledge: this.#config().name, pageContent })}`
|
|
322
|
+
);
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
metadata["embeddingChunk"] = embeddingChunk;
|
|
326
|
+
return {
|
|
327
|
+
chunk: embeddingChunk,
|
|
328
|
+
metadata
|
|
329
|
+
};
|
|
330
|
+
}).filter(isTruthy)
|
|
331
|
+
};
|
|
332
|
+
})
|
|
333
|
+
)).filter(isTruthy).filter((item) => !!item.contentList.length);
|
|
334
|
+
if (!preMergeList.length) {
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
const [wordVecResult, contentVecResult] = await Promise.all([
|
|
338
|
+
this.#text2vec(
|
|
339
|
+
preMergeList.map((item) => item.word),
|
|
340
|
+
collection.collectionName
|
|
341
|
+
),
|
|
342
|
+
contentBatchQueue.then(
|
|
343
|
+
Promise.all(
|
|
344
|
+
preMergeList.flatMap(
|
|
345
|
+
(item) => item.contentList.map(
|
|
346
|
+
(item2) => contentBatchQueue.push(item2.chunk)
|
|
347
|
+
)
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
)
|
|
351
|
+
]);
|
|
352
|
+
let startIndex = 0;
|
|
353
|
+
for (let i = 0; i < preMergeList.length; i++) {
|
|
354
|
+
const preData = preMergeList[i];
|
|
355
|
+
const wordVector = wordVecResult[i];
|
|
356
|
+
const endIndex = startIndex + preData.contentList.length;
|
|
357
|
+
const extResult = contentVecResult.slice(startIndex, endIndex);
|
|
358
|
+
startIndex = endIndex;
|
|
359
|
+
extResult.forEach((item, j) => {
|
|
360
|
+
updateCacheQueue.push({
|
|
361
|
+
id: v42(),
|
|
362
|
+
vector: {
|
|
363
|
+
word: wordVector,
|
|
364
|
+
chunk: extResult[j]
|
|
365
|
+
},
|
|
366
|
+
payload: preData.contentList[j].metadata
|
|
367
|
+
});
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
sum += wordList.length;
|
|
371
|
+
this.#log.info(`已导入 ${sum} 条`);
|
|
372
|
+
}, 2),
|
|
373
|
+
256
|
|
374
|
+
);
|
|
375
|
+
importCacheQueue.queue.error((error) => {
|
|
376
|
+
if (error) {
|
|
377
|
+
updateError = error;
|
|
378
|
+
importCacheQueue.queue.killAndDrain();
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
return {
|
|
382
|
+
importQueue: importCacheQueue,
|
|
383
|
+
getSum: () => sum,
|
|
384
|
+
updateQueue: updateCacheQueue,
|
|
385
|
+
getError() {
|
|
386
|
+
return updateError;
|
|
387
|
+
}
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
async importDict(input) {
|
|
391
|
+
const collection = this.#config().collectionList[0];
|
|
392
|
+
return this.#dict.importDict(this.#config().name, this.#dir(), input).then(async (generator) => {
|
|
393
|
+
const { importQueue, getSum, updateQueue, getError } = await this.#getImportQueue(collection, true);
|
|
394
|
+
for await (const item of generator) {
|
|
395
|
+
importQueue.push(item);
|
|
396
|
+
}
|
|
397
|
+
importQueue.complete();
|
|
398
|
+
await importQueue.queue.drained();
|
|
399
|
+
updateQueue.complete();
|
|
400
|
+
await updateQueue.queue.drained();
|
|
401
|
+
const error = getError();
|
|
402
|
+
if (error) {
|
|
403
|
+
throw error;
|
|
404
|
+
}
|
|
405
|
+
return getSum();
|
|
406
|
+
}).catch(async (rej) => {
|
|
407
|
+
this.#log.error(`导入失败`, rej);
|
|
408
|
+
await this.destroy();
|
|
409
|
+
throw rej;
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
async #formatContent(wordItem, assetFolder, options) {
|
|
413
|
+
const result = await html2Text(wordItem.htmlContent ?? wordItem.content, {
|
|
414
|
+
useOcr: options.useOcr,
|
|
415
|
+
ocrFn: this.#ocr,
|
|
416
|
+
assetFolder
|
|
417
|
+
});
|
|
418
|
+
return options.refReplace ? result.replaceAll(`~`, wordItem.word) : result;
|
|
419
|
+
}
|
|
420
|
+
async addCollection(collection) {
|
|
421
|
+
await this.#createCollection(collection);
|
|
422
|
+
const { importQueue, updateQueue, getError } = await this.#getImportQueue(
|
|
423
|
+
collection,
|
|
424
|
+
false
|
|
425
|
+
);
|
|
426
|
+
try {
|
|
427
|
+
let offset;
|
|
428
|
+
const activateCollectionName = this.#config().activateName;
|
|
429
|
+
const wordSet = /* @__PURE__ */ new Set();
|
|
430
|
+
do {
|
|
431
|
+
const { points, next_page_offset } = await this.#qdClient.scroll(
|
|
432
|
+
activateCollectionName,
|
|
433
|
+
{
|
|
434
|
+
limit: 5e3,
|
|
435
|
+
with_payload: true,
|
|
436
|
+
offset
|
|
437
|
+
}
|
|
438
|
+
);
|
|
439
|
+
for (const point of points) {
|
|
440
|
+
const id = `${point.payload["word"]}|${point.payload["formatedContent"] || point.payload["content"]}`;
|
|
441
|
+
if (wordSet.has(id)) {
|
|
442
|
+
continue;
|
|
443
|
+
}
|
|
444
|
+
wordSet.add(id);
|
|
445
|
+
importQueue.push(point.payload);
|
|
446
|
+
}
|
|
447
|
+
offset = next_page_offset;
|
|
448
|
+
} while (offset);
|
|
449
|
+
importQueue.complete();
|
|
450
|
+
await importQueue.queue.drained();
|
|
451
|
+
updateQueue.complete();
|
|
452
|
+
await updateQueue.queue.drained();
|
|
453
|
+
const error = getError();
|
|
454
|
+
if (error) {
|
|
455
|
+
throw error;
|
|
456
|
+
}
|
|
457
|
+
} catch (error) {
|
|
458
|
+
await this.#qdClient.deleteCollection(collection.collectionName);
|
|
459
|
+
throw error;
|
|
460
|
+
}
|
|
461
|
+
await this.#qdClient.setActivateCollection(
|
|
462
|
+
collection.collectionName,
|
|
463
|
+
this.#config().activateName
|
|
464
|
+
);
|
|
465
|
+
}
|
|
466
|
+
async deleteCollection(collectionName) {
|
|
467
|
+
const isActivate = this.#config().activateCollection === collectionName;
|
|
468
|
+
if (isActivate) {
|
|
469
|
+
return false;
|
|
470
|
+
}
|
|
471
|
+
const collection = this.#config().collectionList.find(
|
|
472
|
+
(item) => item.collectionName === collectionName
|
|
473
|
+
);
|
|
474
|
+
if (!collection) {
|
|
475
|
+
return false;
|
|
476
|
+
}
|
|
477
|
+
await this.#qdClient.deleteCollection(collectionName);
|
|
478
|
+
return true;
|
|
479
|
+
}
|
|
480
|
+
async changeActivateCollection(collectionName) {
|
|
481
|
+
await this.#qdClient.setActivateCollection(
|
|
482
|
+
collectionName,
|
|
483
|
+
this.#config().activateName
|
|
484
|
+
);
|
|
485
|
+
}
|
|
486
|
+
export() {
|
|
487
|
+
return this.#util.export(
|
|
488
|
+
this.#config().collectionList.map(({ collectionName }) => collectionName)
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
async destroy() {
|
|
492
|
+
return this.#util.destroyKnowledge(
|
|
493
|
+
this.#config().collectionList.map(({ collectionName }) => collectionName),
|
|
494
|
+
this.#vfs()
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
async searchWord(text, options) {
|
|
498
|
+
const queryResult = await this.#qdClient.search(
|
|
499
|
+
this.#config().activateName,
|
|
500
|
+
{
|
|
501
|
+
limit: options.limit ? options.limit * this.#reranker.getQueryRatio() : void 0,
|
|
502
|
+
with_payload: true,
|
|
503
|
+
with_vector: false,
|
|
504
|
+
score_threshold: options.score,
|
|
505
|
+
offset: options.offset,
|
|
506
|
+
vector: {
|
|
507
|
+
name: "word",
|
|
508
|
+
vector: await this.#text2vec(text, this.#config().activateCollection)
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
);
|
|
512
|
+
const resultList = await this.#reranker.run({
|
|
513
|
+
value: text,
|
|
514
|
+
docs: queryResult.map((item) => item.payload?.["word"])
|
|
515
|
+
});
|
|
516
|
+
return resultList.slice(0, options?.limit).map(({ index }) => queryResult[index]);
|
|
517
|
+
}
|
|
518
|
+
/** 当普通数据库用 */
|
|
519
|
+
matchWord(text, options) {
|
|
520
|
+
return this.#qdClient.scroll(this.#config().activateName, {
|
|
521
|
+
limit: options.limit,
|
|
522
|
+
filter: {
|
|
523
|
+
must: {
|
|
524
|
+
key: "word",
|
|
525
|
+
match: {
|
|
526
|
+
value: text
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
},
|
|
530
|
+
with_payload: true,
|
|
531
|
+
with_vector: false
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
// packages/knowledge/graph/graph.knowledge.service.ts
|
|
537
|
+
import { inject as inject7, Injector } from "static-injector";
|
|
538
|
+
import { QdrantClientService as QdrantClientService7 } from "@shenghuabi/knowledge/qdrant";
|
|
539
|
+
import { LogToken as LogToken4 } from "@shenghuabi/knowledge/util";
|
|
540
|
+
import { promise as fastq4 } from "fastq";
|
|
541
|
+
|
|
542
|
+
// packages/knowledge/graph/const.ts
|
|
543
|
+
import { InjectionToken as InjectionToken2 } from "static-injector";
|
|
544
|
+
var ContentParserToken = new InjectionToken2(
|
|
545
|
+
"ContentParser"
|
|
546
|
+
);
|
|
547
|
+
var QueryParamsToken = new InjectionToken2("QueryParams");
|
|
548
|
+
var RagChatToken = new InjectionToken2("RagChat");
|
|
549
|
+
var CHAT_INPUT = `$$INPUT$$`;
|
|
550
|
+
|
|
551
|
+
// packages/knowledge/graph/vecotr-format.ts
|
|
552
|
+
function edgeVectorString(options) {
|
|
553
|
+
return `${options.keywords?.join(",") ?? ""},${options.source},${options.target},${options.description}`;
|
|
554
|
+
}
|
|
555
|
+
function nodeVectorString(options) {
|
|
556
|
+
return `${options.name},${options.description}`;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// packages/knowledge/graph/graph.knowledge.service.ts
|
|
560
|
+
import { v4 as v44 } from "uuid";
|
|
561
|
+
|
|
562
|
+
// packages/knowledge/normal/normal.knowledge.service.ts
|
|
563
|
+
import { QdrantClientService as QdrantClientService4 } from "@shenghuabi/knowledge/qdrant";
|
|
564
|
+
import { computed as computed2, inject as inject4 } from "static-injector";
|
|
565
|
+
import { createNormalizeVfs as createNormalizeVfs2 } from "@cyia/vfs2";
|
|
566
|
+
import { promise as fastq3 } from "fastq";
|
|
567
|
+
import { getHash, isTruthy as isTruthy2, runInEmbeddingContext } from "@shenghuabi/knowledge/util";
|
|
568
|
+
import { LogToken as LogToken2 } from "@shenghuabi/knowledge/util";
|
|
569
|
+
import * as v6 from "valibot";
|
|
570
|
+
var NormalKnowledgeService = class extends CommonKnowledgeService {
|
|
571
|
+
#text2vec = inject4(Text2VecToken);
|
|
572
|
+
#textSplitter = inject4(TextSplitterToken);
|
|
573
|
+
#config = inject4(ConfigToken);
|
|
574
|
+
#util = inject4(KnowledgeUtilService);
|
|
575
|
+
#qdClient = inject4(QdrantClientService4);
|
|
576
|
+
#channel = inject4(LogToken2);
|
|
577
|
+
#vfs = computed2(() => createNormalizeVfs2({ dir: this.#dir() }));
|
|
578
|
+
#dir = inject4(DirToken);
|
|
579
|
+
KeyWordIndex = ["fileName"];
|
|
580
|
+
getPayload(fileName, content) {
|
|
581
|
+
return { fileName };
|
|
582
|
+
}
|
|
583
|
+
formatCollection(input) {
|
|
584
|
+
return v6.parse(NormalCollectionDefine, input);
|
|
585
|
+
}
|
|
586
|
+
async createCollection(collection) {
|
|
587
|
+
this.#channel.info(
|
|
588
|
+
`创建集合:${this.#config().name};嵌入长度:${collection.size}`
|
|
589
|
+
);
|
|
590
|
+
const { exists } = await this.#qdClient.collectionExists(
|
|
591
|
+
collection.collectionName
|
|
592
|
+
);
|
|
593
|
+
if (exists) {
|
|
594
|
+
throw new Error(`集合${collection.collectionName}已存在`);
|
|
595
|
+
}
|
|
596
|
+
await this.#qdClient.createCollection(collection.collectionName, {
|
|
597
|
+
vectors: {
|
|
598
|
+
chunk: {
|
|
599
|
+
size: collection.size,
|
|
600
|
+
distance: "Cosine",
|
|
601
|
+
on_disk: true
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
});
|
|
605
|
+
this.#channel.info(`创建索引`);
|
|
606
|
+
for (const keyword of this.KeyWordIndex) {
|
|
607
|
+
await this.#qdClient.createPayloadKeywordIndex(
|
|
608
|
+
collection.collectionName,
|
|
609
|
+
keyword
|
|
610
|
+
);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
/** 创建知识库 */
|
|
614
|
+
async create(collection) {
|
|
615
|
+
this.#channel.info(`准备创建知识库:${this.#config().name}`);
|
|
616
|
+
await this.createCollection(collection);
|
|
617
|
+
await this.#qdClient.setActivateCollection(
|
|
618
|
+
collection.collectionName,
|
|
619
|
+
this.#config().activateName
|
|
620
|
+
);
|
|
621
|
+
this.#channel.info(`创建完成:${this.#config().name}`);
|
|
622
|
+
}
|
|
623
|
+
async insertItemOnly(fileName, content, collectionList) {
|
|
624
|
+
return await runInEmbeddingContext(
|
|
625
|
+
(t2v) => Promise.all(
|
|
626
|
+
collectionList.map(async (collectionItem) => {
|
|
627
|
+
const chunkList = (await this.#textSplitter(
|
|
628
|
+
content,
|
|
629
|
+
this.getPayload(fileName, content),
|
|
630
|
+
collectionItem.collectionName
|
|
631
|
+
)).filter((item) => !!item.pageContent.trim());
|
|
632
|
+
if (!chunkList.length) {
|
|
633
|
+
return;
|
|
634
|
+
}
|
|
635
|
+
const points = await Promise.all(
|
|
636
|
+
chunkList.map(async (item) => {
|
|
637
|
+
const pageContent = item.pageContent.trim();
|
|
638
|
+
const id = getHash(pageContent);
|
|
639
|
+
const payload = {
|
|
640
|
+
...item.metadata,
|
|
641
|
+
chunk: pageContent,
|
|
642
|
+
hash: id
|
|
643
|
+
};
|
|
644
|
+
const embeddingChunk = entryFormat(
|
|
645
|
+
payload,
|
|
646
|
+
this.#config().name,
|
|
647
|
+
pageContent,
|
|
648
|
+
collectionItem.embeddingTemplate?.entry
|
|
649
|
+
);
|
|
650
|
+
if (!embeddingChunk) {
|
|
651
|
+
this.#channel.warn(
|
|
652
|
+
`内容格式化后内容为空,跳过
|
|
653
|
+
${JSON.stringify({ payload, knowledge: this.#config().name, pageContent })}`
|
|
654
|
+
);
|
|
655
|
+
return;
|
|
656
|
+
}
|
|
657
|
+
payload["embeddingChunk"] = embeddingChunk;
|
|
658
|
+
return {
|
|
659
|
+
id,
|
|
660
|
+
vector: {
|
|
661
|
+
chunk: await t2v(
|
|
662
|
+
embeddingChunk,
|
|
663
|
+
collectionItem.collectionName
|
|
664
|
+
)
|
|
665
|
+
},
|
|
666
|
+
payload
|
|
667
|
+
};
|
|
668
|
+
})
|
|
669
|
+
).then((list) => list.filter(isTruthy2));
|
|
670
|
+
if (points.length) {
|
|
671
|
+
await this.#qdClient.upsert(collectionItem.collectionName, {
|
|
672
|
+
wait: true,
|
|
673
|
+
points
|
|
674
|
+
});
|
|
675
|
+
return points;
|
|
676
|
+
}
|
|
677
|
+
return;
|
|
678
|
+
})
|
|
679
|
+
).then((list) => list.filter(isTruthy2)),
|
|
680
|
+
this.#text2vec
|
|
681
|
+
);
|
|
682
|
+
}
|
|
683
|
+
async _insertItem(fileName, content) {
|
|
684
|
+
if (await this.#vfs().exists(fileName)) {
|
|
685
|
+
return false;
|
|
686
|
+
}
|
|
687
|
+
await this.#vfs().writeFile(fileName, content);
|
|
688
|
+
return await this.insertItemOnly(
|
|
689
|
+
fileName,
|
|
690
|
+
content,
|
|
691
|
+
this.#config().collectionList
|
|
692
|
+
);
|
|
693
|
+
}
|
|
694
|
+
async insertItem(fileName, content, signal2) {
|
|
695
|
+
await this._insertItem(fileName, content);
|
|
696
|
+
}
|
|
697
|
+
async deleteItem(fileName) {
|
|
698
|
+
await this.#util.multiDelete(
|
|
699
|
+
this.#config().collectionList.map((item) => item.collectionName),
|
|
700
|
+
{
|
|
701
|
+
filter: {
|
|
702
|
+
must: [{ key: "fileName", match: { value: fileName } }],
|
|
703
|
+
should: null
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
);
|
|
707
|
+
await this.#vfs().delete(fileName, { force: true });
|
|
708
|
+
}
|
|
709
|
+
async updateItem(fileName, content) {
|
|
710
|
+
await this.deleteItem(fileName);
|
|
711
|
+
await this.insertItem(fileName, content);
|
|
712
|
+
}
|
|
713
|
+
async addCollection(collection) {
|
|
714
|
+
const list = await this.#vfs().exists("") ? await this.#vfs().readdir("") : [];
|
|
715
|
+
await this.createCollection(collection);
|
|
716
|
+
const queue = fastq3(async (fileName) => {
|
|
717
|
+
const content = await this.#vfs().readContent(fileName);
|
|
718
|
+
if (typeof content !== "string" || !content) {
|
|
719
|
+
return;
|
|
720
|
+
}
|
|
721
|
+
await this.insertItemOnly(fileName, content, [collection]);
|
|
722
|
+
}, 20);
|
|
723
|
+
let queueError;
|
|
724
|
+
queue.error((error) => {
|
|
725
|
+
if (error) {
|
|
726
|
+
queueError = error;
|
|
727
|
+
queue.killAndDrain();
|
|
728
|
+
}
|
|
729
|
+
});
|
|
730
|
+
try {
|
|
731
|
+
for (const item of list) {
|
|
732
|
+
queue.push(item);
|
|
733
|
+
}
|
|
734
|
+
await queue.drained();
|
|
735
|
+
if (queueError) {
|
|
736
|
+
throw queueError;
|
|
737
|
+
}
|
|
738
|
+
} catch (error) {
|
|
739
|
+
await this.#qdClient.deleteCollection(collection.collectionName);
|
|
740
|
+
throw error;
|
|
741
|
+
}
|
|
742
|
+
await this.#qdClient.setActivateCollection(
|
|
743
|
+
collection.collectionName,
|
|
744
|
+
this.#config().activateName
|
|
745
|
+
);
|
|
746
|
+
}
|
|
747
|
+
// 激活collection不可删除,所以这里不应该有删除切换的问题
|
|
748
|
+
async deleteCollection(collectionName) {
|
|
749
|
+
const isActivate = this.#config().activateCollection === collectionName;
|
|
750
|
+
if (isActivate) {
|
|
751
|
+
return false;
|
|
752
|
+
}
|
|
753
|
+
const collection = this.#config().collectionList.find(
|
|
754
|
+
(item) => item.collectionName === collectionName
|
|
755
|
+
);
|
|
756
|
+
if (!collection) {
|
|
757
|
+
return false;
|
|
758
|
+
}
|
|
759
|
+
await this.#qdClient.deleteCollection(collectionName);
|
|
760
|
+
return true;
|
|
761
|
+
}
|
|
762
|
+
async changeActivateCollection(collectionName) {
|
|
763
|
+
await this.#qdClient.setActivateCollection(
|
|
764
|
+
collectionName,
|
|
765
|
+
this.#config().activateName
|
|
766
|
+
);
|
|
767
|
+
}
|
|
768
|
+
export() {
|
|
769
|
+
return this.#util.export(
|
|
770
|
+
this.#config().collectionList.map(({ collectionName }) => collectionName)
|
|
771
|
+
);
|
|
772
|
+
}
|
|
773
|
+
async destroy() {
|
|
774
|
+
return this.#util.destroyKnowledge(
|
|
775
|
+
this.#config().collectionList.map(({ collectionName }) => collectionName),
|
|
776
|
+
this.#vfs()
|
|
777
|
+
);
|
|
778
|
+
}
|
|
779
|
+
};
|
|
780
|
+
|
|
781
|
+
// packages/knowledge/graph/define/config.ts
|
|
782
|
+
import * as v7 from "valibot";
|
|
783
|
+
var GraphCollectionDefine = v7.pipe(
|
|
784
|
+
v7.object({
|
|
785
|
+
collectionName: v7.string(),
|
|
786
|
+
size: v7.number(),
|
|
787
|
+
embeddingTemplate: v7.optional(
|
|
788
|
+
v7.object({
|
|
789
|
+
entry: v7.optional(EmbeddingTemplateDefine),
|
|
790
|
+
node: v7.optional(EmbeddingTemplateDefine),
|
|
791
|
+
edge: v7.optional(EmbeddingTemplateDefine)
|
|
792
|
+
})
|
|
793
|
+
)
|
|
794
|
+
}),
|
|
795
|
+
v7.transform((collection) => ({
|
|
796
|
+
...collection,
|
|
797
|
+
graphCollectionName: getGraphCollectionName(collection.collectionName)
|
|
798
|
+
}))
|
|
799
|
+
);
|
|
800
|
+
var GraphKnowledgeConfigDefine = v7.pipe(
|
|
801
|
+
v7.object({
|
|
802
|
+
...BaseKnowledgeConfig.entries,
|
|
803
|
+
type: v7.optional(v7.literal("normal-graph"), "normal-graph"),
|
|
804
|
+
maxChunkAsync: v7.number(),
|
|
805
|
+
collectionList: v7.array(GraphCollectionDefine)
|
|
806
|
+
}),
|
|
807
|
+
v7.transform((input) => ({
|
|
808
|
+
...input,
|
|
809
|
+
/** 激活的普通知识库(文件切片) */
|
|
810
|
+
activateName: getActivateCollectionName(input.name),
|
|
811
|
+
/** 激活的图数据库 */
|
|
812
|
+
activateGraphName: getActivateCollectionName(
|
|
813
|
+
getGraphCollectionName(input.name)
|
|
814
|
+
)
|
|
815
|
+
}))
|
|
816
|
+
);
|
|
817
|
+
|
|
818
|
+
// packages/knowledge/graph/graph.knowledge.service.ts
|
|
819
|
+
import * as v11 from "valibot";
|
|
820
|
+
|
|
821
|
+
// packages/knowledge/graph/graph.util.service.ts
|
|
822
|
+
import { runInEmbeddingContext as runInEmbeddingContext2 } from "@shenghuabi/knowledge/util";
|
|
823
|
+
import { inject as inject5 } from "static-injector";
|
|
824
|
+
import { QdrantClientService as QdrantClientService5 } from "@shenghuabi/knowledge/qdrant";
|
|
825
|
+
|
|
826
|
+
// packages/knowledge/graph/define/define.ts
|
|
827
|
+
import { uniqBy } from "lodash-es";
|
|
828
|
+
import * as v8 from "valibot";
|
|
829
|
+
|
|
830
|
+
// packages/knowledge/graph/util.ts
|
|
831
|
+
function getEdgeName(source, target) {
|
|
832
|
+
return `${source},${target}`;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// packages/knowledge/graph/define/define.ts
|
|
836
|
+
import { v4 as v43 } from "uuid";
|
|
837
|
+
var ENTITY_DEFINE = v8.object({
|
|
838
|
+
name: v8.string(),
|
|
839
|
+
description: v8.optional(v8.string(), ""),
|
|
840
|
+
/** 实体类型 */
|
|
841
|
+
type: v8.string()
|
|
842
|
+
});
|
|
843
|
+
var ENTITY_RELATION_DEFINE = v8.object({
|
|
844
|
+
source: v8.string(),
|
|
845
|
+
target: v8.string(),
|
|
846
|
+
description: v8.optional(v8.string(), ""),
|
|
847
|
+
strength: v8.optional(v8.number(), 5),
|
|
848
|
+
keywords: v8.optional(
|
|
849
|
+
v8.union([
|
|
850
|
+
v8.pipe(
|
|
851
|
+
v8.string(),
|
|
852
|
+
v8.transform((str) => str.split(",").map((item) => item.trim()))
|
|
853
|
+
),
|
|
854
|
+
v8.array(v8.string())
|
|
855
|
+
])
|
|
856
|
+
)
|
|
857
|
+
});
|
|
858
|
+
var EntityExtraDefine = v8.object({
|
|
859
|
+
entity: v8.pipe(
|
|
860
|
+
v8.nullish(v8.array(ENTITY_DEFINE), []),
|
|
861
|
+
v8.transform(
|
|
862
|
+
(list) => uniqBy(
|
|
863
|
+
(list || []).filter((item) => !!item.name && !!item.description),
|
|
864
|
+
(item) => `${item.name}|${item.type}|${item.description}`
|
|
865
|
+
)
|
|
866
|
+
)
|
|
867
|
+
),
|
|
868
|
+
entity_relation: v8.pipe(
|
|
869
|
+
v8.nullish(v8.array(ENTITY_RELATION_DEFINE), []),
|
|
870
|
+
v8.transform(
|
|
871
|
+
(list) => uniqBy(
|
|
872
|
+
(list || []).filter(
|
|
873
|
+
(item) => !!item.source && !!item.target && !!item.description
|
|
874
|
+
),
|
|
875
|
+
(item) => `${item.source}|${item.target}|${item.description}`
|
|
876
|
+
)
|
|
877
|
+
)
|
|
878
|
+
),
|
|
879
|
+
keyword: v8.pipe(
|
|
880
|
+
v8.nullish(v8.array(v8.string()), []),
|
|
881
|
+
v8.transform((item) => item.map((item2) => item2.trim()))
|
|
882
|
+
)
|
|
883
|
+
});
|
|
884
|
+
var NodePayloadDefine = v8.object({
|
|
885
|
+
...ENTITY_DEFINE.entries,
|
|
886
|
+
kind: v8.optional(v8.literal("node"), "node"),
|
|
887
|
+
id: v8.string(),
|
|
888
|
+
chunkId: v8.string(),
|
|
889
|
+
fileName: v8.string()
|
|
890
|
+
});
|
|
891
|
+
var NodeItemDefine = v8.pipe(
|
|
892
|
+
v8.omit(NodePayloadDefine, ["kind"]),
|
|
893
|
+
v8.transform((data) => ({ ...data, kind: "node" }))
|
|
894
|
+
);
|
|
895
|
+
var NodePayloadNewDefine = v8.pipe(
|
|
896
|
+
v8.object({
|
|
897
|
+
...v8.omit(NodePayloadDefine, ["id"]).entries,
|
|
898
|
+
embeddingChunk: v8.string()
|
|
899
|
+
})
|
|
900
|
+
);
|
|
901
|
+
var NodeItemNewDefine = v8.pipe(
|
|
902
|
+
v8.object({
|
|
903
|
+
...v8.omit(NodePayloadDefine, ["kind", "id"]).entries,
|
|
904
|
+
id: v8.optional(NodePayloadDefine.entries.id, () => v43())
|
|
905
|
+
}),
|
|
906
|
+
v8.transform((data) => ({ ...data, kind: "node" }))
|
|
907
|
+
);
|
|
908
|
+
var EdgePayloadDefine = v8.object({
|
|
909
|
+
...ENTITY_RELATION_DEFINE.entries,
|
|
910
|
+
chunkId: v8.string(),
|
|
911
|
+
kind: v8.optional(v8.literal("edge"), "edge"),
|
|
912
|
+
name: v8.string(),
|
|
913
|
+
fileName: v8.string(),
|
|
914
|
+
id: v8.string()
|
|
915
|
+
});
|
|
916
|
+
var EdgeItemDefine = v8.pipe(
|
|
917
|
+
v8.omit(EdgePayloadDefine, ["kind", "name"]),
|
|
918
|
+
v8.transform((data) => ({
|
|
919
|
+
...data,
|
|
920
|
+
name: getEdgeName(data.source, data.target),
|
|
921
|
+
kind: "edge"
|
|
922
|
+
}))
|
|
923
|
+
);
|
|
924
|
+
var EdgePayloadNewDefine = v8.pipe(
|
|
925
|
+
v8.object({
|
|
926
|
+
...v8.omit(EdgePayloadDefine, ["name", "id"]).entries,
|
|
927
|
+
embeddingChunk: v8.string()
|
|
928
|
+
}),
|
|
929
|
+
v8.transform((data) => ({
|
|
930
|
+
...data,
|
|
931
|
+
name: getEdgeName(data.source, data.target)
|
|
932
|
+
}))
|
|
933
|
+
);
|
|
934
|
+
var EdgeItemNewDefine = v8.pipe(
|
|
935
|
+
v8.object({
|
|
936
|
+
...v8.omit(EdgePayloadDefine, ["kind", "name", "id"]).entries,
|
|
937
|
+
id: v8.optional(NodePayloadDefine.entries.id, () => v43())
|
|
938
|
+
}),
|
|
939
|
+
v8.transform((data) => ({
|
|
940
|
+
...data,
|
|
941
|
+
name: getEdgeName(data.source, data.target),
|
|
942
|
+
kind: "edge"
|
|
943
|
+
}))
|
|
944
|
+
);
|
|
945
|
+
var KnowledgeGraphCreateDefine = v8.object({
|
|
946
|
+
fileName: v8.string(),
|
|
947
|
+
chunkId: v8.string(),
|
|
948
|
+
nodeList: v8.optional(v8.array(ENTITY_DEFINE), []),
|
|
949
|
+
edgeList: v8.optional(v8.array(ENTITY_RELATION_DEFINE), [])
|
|
950
|
+
});
|
|
951
|
+
var KeywordPayloadNewDefine = v8.pipe(
|
|
952
|
+
v8.object({
|
|
953
|
+
kind: v8.optional(v8.literal("keyword"), "keyword"),
|
|
954
|
+
keyword: v8.string(),
|
|
955
|
+
chunkId: v8.string(),
|
|
956
|
+
fileName: v8.string()
|
|
957
|
+
})
|
|
958
|
+
);
|
|
959
|
+
|
|
960
|
+
// packages/knowledge/graph/graph.util.service.ts
|
|
961
|
+
import * as v9 from "valibot";
|
|
962
|
+
var GraphKnowledgeUtilService = class {
|
|
963
|
+
#qdClient = inject5(QdrantClientService5);
|
|
964
|
+
#config = inject5(ConfigToken);
|
|
965
|
+
#text2vec = inject5(Text2VecToken);
|
|
966
|
+
updateContext(fn) {
|
|
967
|
+
return runInEmbeddingContext2(async (t2v) => {
|
|
968
|
+
const result = await fn();
|
|
969
|
+
const requstList = [];
|
|
970
|
+
if (result.upsert) {
|
|
971
|
+
requstList.push(
|
|
972
|
+
this.#config().collectionList.map(
|
|
973
|
+
async ({
|
|
974
|
+
graphCollectionName,
|
|
975
|
+
collectionName,
|
|
976
|
+
embeddingTemplate
|
|
977
|
+
}) => {
|
|
978
|
+
const list = [];
|
|
979
|
+
if (result.upsert?.nodes?.length) {
|
|
980
|
+
list.push(
|
|
981
|
+
Promise.all(
|
|
982
|
+
result.upsert?.nodes.map(async (item) => {
|
|
983
|
+
const embeddingChunk = entryFormat(
|
|
984
|
+
item.payload,
|
|
985
|
+
this.#config().name,
|
|
986
|
+
nodeVectorString(item.payload),
|
|
987
|
+
embeddingTemplate?.node
|
|
988
|
+
);
|
|
989
|
+
return {
|
|
990
|
+
id: item.id,
|
|
991
|
+
payload: v9.parse(NodePayloadNewDefine, {
|
|
992
|
+
...item.payload,
|
|
993
|
+
embeddingChunk
|
|
994
|
+
}),
|
|
995
|
+
vector: {
|
|
996
|
+
chunk: await t2v(embeddingChunk, collectionName)
|
|
997
|
+
}
|
|
998
|
+
};
|
|
999
|
+
})
|
|
1000
|
+
).then(
|
|
1001
|
+
(points) => this.#qdClient.upsert(graphCollectionName, { points })
|
|
1002
|
+
)
|
|
1003
|
+
);
|
|
1004
|
+
}
|
|
1005
|
+
if (result.upsert?.edges?.length) {
|
|
1006
|
+
list.push(
|
|
1007
|
+
Promise.all(
|
|
1008
|
+
result.upsert?.edges.map(async (item) => {
|
|
1009
|
+
const embeddingChunk = entryFormat(
|
|
1010
|
+
item.payload,
|
|
1011
|
+
this.#config().name,
|
|
1012
|
+
edgeVectorString(item.payload),
|
|
1013
|
+
embeddingTemplate?.edge
|
|
1014
|
+
);
|
|
1015
|
+
return {
|
|
1016
|
+
id: item.id,
|
|
1017
|
+
payload: v9.parse(EdgePayloadNewDefine, {
|
|
1018
|
+
...item.payload,
|
|
1019
|
+
embeddingChunk
|
|
1020
|
+
}),
|
|
1021
|
+
vector: {
|
|
1022
|
+
chunk: await t2v(embeddingChunk, collectionName)
|
|
1023
|
+
}
|
|
1024
|
+
};
|
|
1025
|
+
})
|
|
1026
|
+
).then(
|
|
1027
|
+
(points) => this.#qdClient.upsert(graphCollectionName, { points })
|
|
1028
|
+
)
|
|
1029
|
+
);
|
|
1030
|
+
}
|
|
1031
|
+
if (result.upsert?.keywords?.length) {
|
|
1032
|
+
list.push(
|
|
1033
|
+
Promise.all(
|
|
1034
|
+
result.upsert?.keywords.map(async (item) => ({
|
|
1035
|
+
id: item.id,
|
|
1036
|
+
payload: v9.parse(KeywordPayloadNewDefine, item.payload),
|
|
1037
|
+
vector: {
|
|
1038
|
+
chunk: await t2v(item.payload.keyword, collectionName)
|
|
1039
|
+
}
|
|
1040
|
+
}))
|
|
1041
|
+
).then(
|
|
1042
|
+
(points) => this.#qdClient.upsert(graphCollectionName, { points })
|
|
1043
|
+
)
|
|
1044
|
+
);
|
|
1045
|
+
}
|
|
1046
|
+
return Promise.all(list);
|
|
1047
|
+
}
|
|
1048
|
+
)
|
|
1049
|
+
);
|
|
1050
|
+
}
|
|
1051
|
+
if (result.delete) {
|
|
1052
|
+
requstList.push(
|
|
1053
|
+
this.#config().collectionList.map(
|
|
1054
|
+
async ({ graphCollectionName, collectionName }) => {
|
|
1055
|
+
await Promise.all(
|
|
1056
|
+
[result.delete?.nodes, result.delete?.edges].map(
|
|
1057
|
+
(deleteData) => {
|
|
1058
|
+
if (Array.isArray(deleteData)) {
|
|
1059
|
+
if (deleteData.length) {
|
|
1060
|
+
return this.#qdClient.delete(graphCollectionName, {
|
|
1061
|
+
points: deleteData.map((item) => item.id)
|
|
1062
|
+
});
|
|
1063
|
+
}
|
|
1064
|
+
} else if (deleteData) {
|
|
1065
|
+
return this.#qdClient.delete(graphCollectionName, {
|
|
1066
|
+
filter: deleteData.filter
|
|
1067
|
+
});
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
)
|
|
1071
|
+
);
|
|
1072
|
+
}
|
|
1073
|
+
)
|
|
1074
|
+
);
|
|
1075
|
+
}
|
|
1076
|
+
return Promise.all(requstList.flat());
|
|
1077
|
+
}, this.#text2vec);
|
|
1078
|
+
}
|
|
1079
|
+
};
|
|
1080
|
+
|
|
1081
|
+
// packages/knowledge/graph/graph.local.service.ts
|
|
1082
|
+
import { QdrantClientService as QdrantClientService6 } from "@shenghuabi/knowledge/qdrant";
|
|
1083
|
+
import { computed as computed3, inject as inject6, signal, untracked } from "static-injector";
|
|
1084
|
+
import Graph from "graphology";
|
|
1085
|
+
import * as v10 from "valibot";
|
|
1086
|
+
import { LogToken as LogToken3 } from "@shenghuabi/knowledge/util";
|
|
1087
|
+
var GraphLocalService = class {
|
|
1088
|
+
#qdClient = inject6(QdrantClientService6);
|
|
1089
|
+
#config = inject6(ConfigToken);
|
|
1090
|
+
#limit = 5e3;
|
|
1091
|
+
#graph;
|
|
1092
|
+
#log = inject6(LogToken3);
|
|
1093
|
+
update$ = signal(0);
|
|
1094
|
+
graphExport$$ = computed3(() => {
|
|
1095
|
+
this.update$();
|
|
1096
|
+
return this.#graph.export();
|
|
1097
|
+
});
|
|
1098
|
+
getGraph() {
|
|
1099
|
+
return this.#graph;
|
|
1100
|
+
}
|
|
1101
|
+
loadDataInitGraph$$ = computed3(() => {
|
|
1102
|
+
this.initGraph();
|
|
1103
|
+
return untracked(() => this.loadingData());
|
|
1104
|
+
});
|
|
1105
|
+
/** 只初始化数据 */
|
|
1106
|
+
initGraph() {
|
|
1107
|
+
this.#graph = new Graph();
|
|
1108
|
+
return this.#graph;
|
|
1109
|
+
}
|
|
1110
|
+
#getEdgesFromSource(name) {
|
|
1111
|
+
try {
|
|
1112
|
+
return this.#graph.outEdges(name);
|
|
1113
|
+
} catch (error) {
|
|
1114
|
+
this.#log.warn(error);
|
|
1115
|
+
return [];
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
#getEdgesFromTarget(name) {
|
|
1119
|
+
try {
|
|
1120
|
+
return this.#graph.inEdges(name);
|
|
1121
|
+
} catch (error) {
|
|
1122
|
+
this.#log.warn(error);
|
|
1123
|
+
return [];
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
1126
|
+
getTargetListFromSource(name) {
|
|
1127
|
+
return this.#graph.hasNode(name) ? this.#getEdgesFromSource(name).flatMap(
|
|
1128
|
+
(name2) => this.#graph.getTargetAttributes(name2).list
|
|
1129
|
+
) : [];
|
|
1130
|
+
}
|
|
1131
|
+
edgeToNode(item, type) {
|
|
1132
|
+
return {
|
|
1133
|
+
name: item[type],
|
|
1134
|
+
description: item.description,
|
|
1135
|
+
type: "未知",
|
|
1136
|
+
chunkId: item.chunkId,
|
|
1137
|
+
fileName: item.fileName
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
createOrUpdateNodeAttr(item) {
|
|
1141
|
+
const payload = v10.parse(NodeItemNewDefine, item);
|
|
1142
|
+
if (this.#graph.hasNode(payload.name)) {
|
|
1143
|
+
const obj = this.#graph.getNodeAttributes(payload.name);
|
|
1144
|
+
for (let index = 0; index < obj.list.length; index++) {
|
|
1145
|
+
const item2 = obj.list[index];
|
|
1146
|
+
if (item2.id === payload.id) {
|
|
1147
|
+
obj.list[index] = payload;
|
|
1148
|
+
this.#graph.replaceNodeAttributes(payload.name, obj);
|
|
1149
|
+
return;
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
obj.list.push(payload);
|
|
1153
|
+
this.#graph.replaceNodeAttributes(payload.name, obj);
|
|
1154
|
+
} else {
|
|
1155
|
+
this.#graph.addNode(payload.name, {
|
|
1156
|
+
list: [payload],
|
|
1157
|
+
name: payload.name
|
|
1158
|
+
});
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
// source/target可能不存在,需要临时节点
|
|
1162
|
+
createOrUpdateEdgeAttr(item) {
|
|
1163
|
+
const payload = v10.parse(EdgeItemNewDefine, item);
|
|
1164
|
+
if (this.#graph.hasEdge(payload.name)) {
|
|
1165
|
+
const obj = this.#graph.getEdgeAttributes(payload.name);
|
|
1166
|
+
for (let index = 0; index < obj.list.length; index++) {
|
|
1167
|
+
const item2 = obj.list[index];
|
|
1168
|
+
if (item2.id === payload.id) {
|
|
1169
|
+
obj.list[index] = payload;
|
|
1170
|
+
this.#graph.replaceEdgeAttributes(item2.name, obj);
|
|
1171
|
+
return;
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
obj.list.push(payload);
|
|
1175
|
+
this.#graph.replaceEdgeAttributes(payload.name, obj);
|
|
1176
|
+
} else {
|
|
1177
|
+
if (!this.#graph.hasNode(item.source)) {
|
|
1178
|
+
this.#graph.addNode(item.source, {
|
|
1179
|
+
list: [v10.parse(NodeItemNewDefine, this.edgeToNode(item, "source"))],
|
|
1180
|
+
name: item.source
|
|
1181
|
+
});
|
|
1182
|
+
}
|
|
1183
|
+
if (!this.#graph.hasNode(item.target)) {
|
|
1184
|
+
this.#graph.addNode(item.target, {
|
|
1185
|
+
list: [v10.parse(NodeItemNewDefine, this.edgeToNode(item, "target"))],
|
|
1186
|
+
name: item.target
|
|
1187
|
+
});
|
|
1188
|
+
}
|
|
1189
|
+
this.#graph.addEdgeWithKey(payload.name, item.source, item.target, {
|
|
1190
|
+
list: [payload],
|
|
1191
|
+
name: payload.name,
|
|
1192
|
+
source: payload.source,
|
|
1193
|
+
target: payload.target
|
|
1194
|
+
});
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
#deleteNodeAttr(payload) {
|
|
1198
|
+
if (this.#graph.hasNode(payload.name)) {
|
|
1199
|
+
const attr = this.#graph.getNodeAttributes(payload.name);
|
|
1200
|
+
const index = attr.list.findIndex((item) => item.id === payload.id);
|
|
1201
|
+
if (index !== -1) {
|
|
1202
|
+
attr.list.splice(index, 1);
|
|
1203
|
+
if (attr.list.length === 0) {
|
|
1204
|
+
this.#graph.dropNode(payload.name);
|
|
1205
|
+
} else {
|
|
1206
|
+
this.#graph.replaceNodeAttributes(payload.name, attr);
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
#deleteEdgeAttr(item) {
|
|
1212
|
+
const name = getEdgeName(item.source, item.target);
|
|
1213
|
+
if (!this.#graph.hasEdge(name)) {
|
|
1214
|
+
return;
|
|
1215
|
+
}
|
|
1216
|
+
const attr = this.#graph.getEdgeAttributes(name);
|
|
1217
|
+
for (let index = 0; index < attr.list.length; index++) {
|
|
1218
|
+
const edgeItem = attr.list[index];
|
|
1219
|
+
if (edgeItem.id === item.id) {
|
|
1220
|
+
attr.list.splice(index, 1);
|
|
1221
|
+
if (attr.list.length === 0) {
|
|
1222
|
+
this.#graph.dropEdge(name);
|
|
1223
|
+
} else {
|
|
1224
|
+
this.#graph.replaceEdgeAttributes(name, attr);
|
|
1225
|
+
}
|
|
1226
|
+
return;
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
#updateGraph(payload, update) {
|
|
1231
|
+
if (payload.kind === "node") {
|
|
1232
|
+
this.createOrUpdateNodeAttr(payload);
|
|
1233
|
+
} else if (payload.kind === "edge") {
|
|
1234
|
+
this.createOrUpdateEdgeAttr(payload);
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
async loadingData() {
|
|
1238
|
+
let offset;
|
|
1239
|
+
const [nodeList, edgeList] = await Promise.all(
|
|
1240
|
+
["node", "edge"].map(async (kind) => {
|
|
1241
|
+
const nodePoints = [];
|
|
1242
|
+
do {
|
|
1243
|
+
const { points, next_page_offset } = await this.#qdClient.scroll(
|
|
1244
|
+
this.#config().activateGraphName,
|
|
1245
|
+
{
|
|
1246
|
+
limit: this.#limit,
|
|
1247
|
+
filter: {
|
|
1248
|
+
must: {
|
|
1249
|
+
key: "kind",
|
|
1250
|
+
match: {
|
|
1251
|
+
value: kind
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
},
|
|
1255
|
+
with_payload: true,
|
|
1256
|
+
offset
|
|
1257
|
+
}
|
|
1258
|
+
);
|
|
1259
|
+
nodePoints.push(() => {
|
|
1260
|
+
for (const item of points) {
|
|
1261
|
+
this.#updateGraph({ ...item.payload, id: item.id });
|
|
1262
|
+
}
|
|
1263
|
+
});
|
|
1264
|
+
offset = next_page_offset;
|
|
1265
|
+
} while (offset);
|
|
1266
|
+
return nodePoints;
|
|
1267
|
+
})
|
|
1268
|
+
);
|
|
1269
|
+
nodeList.forEach((fn) => fn());
|
|
1270
|
+
edgeList.forEach((fn) => fn());
|
|
1271
|
+
}
|
|
1272
|
+
/** 一个节点分多个,需要删除原来的节点和边,然后插入
|
|
1273
|
+
* 拆分后的节点有可能是存在的
|
|
1274
|
+
*/
|
|
1275
|
+
async splitNode(options) {
|
|
1276
|
+
const nodeAttr = this.#graph.getNodeAttributes(options.node);
|
|
1277
|
+
options.list.map((replaceNodeName) => {
|
|
1278
|
+
nodeAttr.list.forEach((item) => {
|
|
1279
|
+
this.createOrUpdateNodeAttr({ ...item, name: replaceNodeName });
|
|
1280
|
+
});
|
|
1281
|
+
});
|
|
1282
|
+
const sourceEdges = this.#getEdgesFromSource(options.node);
|
|
1283
|
+
const targetEdges = this.#getEdgesFromTarget(options.node);
|
|
1284
|
+
for (const edge of sourceEdges) {
|
|
1285
|
+
const attr = this.#graph.getEdgeAttributes(edge);
|
|
1286
|
+
for (const replaceNodeName of options.list) {
|
|
1287
|
+
attr.list.forEach((attrItem) => {
|
|
1288
|
+
this.createOrUpdateEdgeAttr({ ...attrItem, source: replaceNodeName });
|
|
1289
|
+
});
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
for (const edge of targetEdges) {
|
|
1293
|
+
const attr = this.#graph.getEdgeAttributes(edge);
|
|
1294
|
+
for (const replaceNodeName of options.list) {
|
|
1295
|
+
attr.list.forEach((attrItem) => {
|
|
1296
|
+
this.createOrUpdateEdgeAttr({ ...attrItem, target: replaceNodeName });
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
this.#graph.dropNode(options.node);
|
|
1301
|
+
this.update$.update((a) => a + 1);
|
|
1302
|
+
}
|
|
1303
|
+
async mergeNode(options) {
|
|
1304
|
+
const sourceEdges = options.list.flatMap(
|
|
1305
|
+
(item) => this.#getEdgesFromSource(item)
|
|
1306
|
+
);
|
|
1307
|
+
const targetEdges = options.list.flatMap(
|
|
1308
|
+
(item) => this.#getEdgesFromTarget(item)
|
|
1309
|
+
);
|
|
1310
|
+
options.list.forEach((node) => {
|
|
1311
|
+
const attr = this.#graph.getNodeAttributes(node);
|
|
1312
|
+
attr.list.forEach((item) => {
|
|
1313
|
+
this.createOrUpdateNodeAttr({ ...item, name: options.node });
|
|
1314
|
+
});
|
|
1315
|
+
});
|
|
1316
|
+
for (const edge of sourceEdges) {
|
|
1317
|
+
const data = this.#graph.getEdgeAttributes(edge);
|
|
1318
|
+
data.list.forEach((item) => {
|
|
1319
|
+
this.createOrUpdateEdgeAttr({ ...item, source: options.node });
|
|
1320
|
+
});
|
|
1321
|
+
}
|
|
1322
|
+
for (const edge of targetEdges) {
|
|
1323
|
+
const data = this.#graph.getEdgeAttributes(edge);
|
|
1324
|
+
data.list.forEach((item) => {
|
|
1325
|
+
this.createOrUpdateEdgeAttr({ ...item, target: options.node });
|
|
1326
|
+
});
|
|
1327
|
+
}
|
|
1328
|
+
options.list.forEach((item) => {
|
|
1329
|
+
this.#graph.dropNode(item);
|
|
1330
|
+
});
|
|
1331
|
+
this.update$.update((a) => a + 1);
|
|
1332
|
+
}
|
|
1333
|
+
async changeNodeDescription(payload) {
|
|
1334
|
+
this.createOrUpdateNodeAttr(payload);
|
|
1335
|
+
this.update$.update((a) => a + 1);
|
|
1336
|
+
}
|
|
1337
|
+
async changeEdge(item, oldItem) {
|
|
1338
|
+
this.createOrUpdateEdgeAttr(item);
|
|
1339
|
+
if (item.source !== oldItem.source || item.target !== oldItem.target) {
|
|
1340
|
+
this.#deleteEdgeAttr(oldItem);
|
|
1341
|
+
}
|
|
1342
|
+
this.update$.update((a) => a + 1);
|
|
1343
|
+
}
|
|
1344
|
+
async add(input) {
|
|
1345
|
+
input.nodes?.forEach((item) => {
|
|
1346
|
+
this.createOrUpdateNodeAttr(item);
|
|
1347
|
+
});
|
|
1348
|
+
input.edges?.forEach((item) => {
|
|
1349
|
+
this.createOrUpdateEdgeAttr(item);
|
|
1350
|
+
});
|
|
1351
|
+
this.update$.update((a) => a + 1);
|
|
1352
|
+
}
|
|
1353
|
+
async deleteNodeItem(item) {
|
|
1354
|
+
this.#deleteNodeAttr(item);
|
|
1355
|
+
this.update$.update((a) => a + 1);
|
|
1356
|
+
}
|
|
1357
|
+
deleteEdgeItem(item) {
|
|
1358
|
+
this.#deleteEdgeAttr(item);
|
|
1359
|
+
this.update$.update((a) => a + 1);
|
|
1360
|
+
}
|
|
1361
|
+
deleteNode(name) {
|
|
1362
|
+
this.#graph.dropNode(name);
|
|
1363
|
+
this.update$.update((a) => a + 1);
|
|
1364
|
+
}
|
|
1365
|
+
/**
|
|
1366
|
+
* node=>edge
|
|
1367
|
+
* 确定最相似的chunk(第一位)
|
|
1368
|
+
* 获取传入节点的所有变
|
|
1369
|
+
*/
|
|
1370
|
+
getEdgeByNode(nodes) {
|
|
1371
|
+
const repeatList = /* @__PURE__ */ new Set();
|
|
1372
|
+
const list = [];
|
|
1373
|
+
for (const node of nodes) {
|
|
1374
|
+
for (const edgeName of [
|
|
1375
|
+
...this.#graph.outEdges(node.name),
|
|
1376
|
+
...this.#graph.inEdges(node.name)
|
|
1377
|
+
]) {
|
|
1378
|
+
const key = `${edgeName}|${node.chunkId}`;
|
|
1379
|
+
if (repeatList.has(key)) {
|
|
1380
|
+
continue;
|
|
1381
|
+
}
|
|
1382
|
+
repeatList.add(key);
|
|
1383
|
+
list.push(
|
|
1384
|
+
...this.#graph.getEdgeAttributes(edgeName).list.filter((item) => item.chunkId === node.chunkId)
|
|
1385
|
+
);
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
return list;
|
|
1389
|
+
}
|
|
1390
|
+
/**
|
|
1391
|
+
* 根据边查节点
|
|
1392
|
+
* 通过边上的chunkid,查找同在这个chunkid上的souce/target节点,权重为边
|
|
1393
|
+
*/
|
|
1394
|
+
getNodeByEdge(edges) {
|
|
1395
|
+
const repeatList = /* @__PURE__ */ new Set();
|
|
1396
|
+
const list = [];
|
|
1397
|
+
for (const edge of edges) {
|
|
1398
|
+
for (const nodeName of [edge.source, edge.target]) {
|
|
1399
|
+
const key = `${nodeName}|${edge.chunkId}`;
|
|
1400
|
+
if (repeatList.has(key)) {
|
|
1401
|
+
continue;
|
|
1402
|
+
}
|
|
1403
|
+
repeatList.add(key);
|
|
1404
|
+
list.push(
|
|
1405
|
+
...this.#graph.getNodeAttributes(nodeName).list.filter((item) => item.chunkId === edge.chunkId)
|
|
1406
|
+
);
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
return list;
|
|
1410
|
+
}
|
|
1411
|
+
getChunkEdgeByNode(node) {
|
|
1412
|
+
const list = [];
|
|
1413
|
+
for (const edgeName of [
|
|
1414
|
+
...this.#graph.outEdges(node.name),
|
|
1415
|
+
...this.#graph.inEdges(node.name)
|
|
1416
|
+
]) {
|
|
1417
|
+
const edgeAttr = this.#graph.getEdgeAttributes(edgeName);
|
|
1418
|
+
list.push(
|
|
1419
|
+
...edgeAttr.list.filter((item) => node.chunkId === item.chunkId)
|
|
1420
|
+
);
|
|
1421
|
+
}
|
|
1422
|
+
return list;
|
|
1423
|
+
}
|
|
1424
|
+
};
|
|
1425
|
+
|
|
1426
|
+
// packages/knowledge/graph/graph.knowledge.service.ts
|
|
1427
|
+
import { BatchQueue as BatchQueue2 } from "@shenghuabi/knowledge/util";
|
|
1428
|
+
var GraphKnolwdgeService = class extends NormalKnowledgeService {
|
|
1429
|
+
#text2vec = inject7(Text2VecToken);
|
|
1430
|
+
#config = inject7(ConfigToken);
|
|
1431
|
+
#util = inject7(KnowledgeUtilService);
|
|
1432
|
+
#graphUtil = inject7(GraphKnowledgeUtilService);
|
|
1433
|
+
#qdClient = inject7(QdrantClientService7);
|
|
1434
|
+
#channel = inject7(LogToken4);
|
|
1435
|
+
#injector = inject7(Injector);
|
|
1436
|
+
#contentParser = inject7(ContentParserToken);
|
|
1437
|
+
#graphLocal = inject7(GraphLocalService);
|
|
1438
|
+
formatCollection(input) {
|
|
1439
|
+
return v11.parse(GraphCollectionDefine, input);
|
|
1440
|
+
}
|
|
1441
|
+
async #createCollection(collection) {
|
|
1442
|
+
const collectionName = getGraphCollectionName(collection.collectionName);
|
|
1443
|
+
this.#channel.info(`创建图集合:${collectionName}`);
|
|
1444
|
+
const { exists } = await this.#qdClient.collectionExists(collectionName);
|
|
1445
|
+
if (exists) {
|
|
1446
|
+
throw new Error(`集合${collectionName}已存在`);
|
|
1447
|
+
}
|
|
1448
|
+
await this.#qdClient.createCollection(collectionName, {
|
|
1449
|
+
vectors: {
|
|
1450
|
+
chunk: {
|
|
1451
|
+
size: collection.size,
|
|
1452
|
+
distance: "Cosine",
|
|
1453
|
+
on_disk: true
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
});
|
|
1457
|
+
this.#channel.info(`创建图索引`);
|
|
1458
|
+
await this.#qdClient.createPayloadKeywordIndex(collectionName, "kind");
|
|
1459
|
+
await this.#qdClient.createPayloadKeywordIndex(collectionName, "fileName");
|
|
1460
|
+
await this.#qdClient.createPayloadKeywordIndex(collectionName, "name");
|
|
1461
|
+
await this.#qdClient.createPayloadKeywordIndex(collectionName, "source");
|
|
1462
|
+
await this.#qdClient.createPayloadKeywordIndex(collectionName, "target");
|
|
1463
|
+
}
|
|
1464
|
+
async create(collection) {
|
|
1465
|
+
await super.create(collection);
|
|
1466
|
+
await this.#createCollection(collection);
|
|
1467
|
+
await this.#qdClient.setActivateCollection(
|
|
1468
|
+
collection.graphCollectionName,
|
|
1469
|
+
this.#config().activateGraphName
|
|
1470
|
+
);
|
|
1471
|
+
}
|
|
1472
|
+
/** 图谱知识库不允许改chunksize,因为改了后切片就不一样了,那么生成的关系一定也不一样了 */
|
|
1473
|
+
async insertItem(fileName, content, signal2) {
|
|
1474
|
+
const list = await this._insertItem(fileName, content);
|
|
1475
|
+
if (!list || !list.length) {
|
|
1476
|
+
return;
|
|
1477
|
+
}
|
|
1478
|
+
const result = list[0].map(({ payload }) => payload);
|
|
1479
|
+
await this.#insert(result, fileName, signal2);
|
|
1480
|
+
}
|
|
1481
|
+
async #insert(chunkList, fileName, signal2) {
|
|
1482
|
+
const countObj = {
|
|
1483
|
+
success: 0,
|
|
1484
|
+
error: 0
|
|
1485
|
+
};
|
|
1486
|
+
const hasGraph = !!this.#graphLocal.getGraph();
|
|
1487
|
+
const llmAsyncQueue = fastq4(async (document) => {
|
|
1488
|
+
if (signal2?.aborted) {
|
|
1489
|
+
return;
|
|
1490
|
+
}
|
|
1491
|
+
const extractData = await this.#contentParser.parse(document, signal2);
|
|
1492
|
+
countObj.success++;
|
|
1493
|
+
const nodes = extractData.entity.map((item) => ({
|
|
1494
|
+
id: v44(),
|
|
1495
|
+
payload: {
|
|
1496
|
+
...item,
|
|
1497
|
+
fileName,
|
|
1498
|
+
chunkId: document.hash
|
|
1499
|
+
}
|
|
1500
|
+
}));
|
|
1501
|
+
const edges = extractData.entity_relation.map((item) => ({
|
|
1502
|
+
id: v44(),
|
|
1503
|
+
payload: {
|
|
1504
|
+
...item,
|
|
1505
|
+
fileName,
|
|
1506
|
+
chunkId: document.hash
|
|
1507
|
+
}
|
|
1508
|
+
}));
|
|
1509
|
+
const keywords = extractData.keyword.map((item) => ({
|
|
1510
|
+
id: v44(),
|
|
1511
|
+
payload: {
|
|
1512
|
+
keyword: item,
|
|
1513
|
+
chunkId: document.hash,
|
|
1514
|
+
fileName
|
|
1515
|
+
}
|
|
1516
|
+
}));
|
|
1517
|
+
await this.#graphUtil.updateContext(async () => ({
|
|
1518
|
+
upsert: {
|
|
1519
|
+
nodes,
|
|
1520
|
+
edges,
|
|
1521
|
+
keywords
|
|
1522
|
+
}
|
|
1523
|
+
}));
|
|
1524
|
+
if (hasGraph) {
|
|
1525
|
+
this.#graphLocal.add({
|
|
1526
|
+
nodes: nodes.map((item) => ({ ...item.payload, id: item.id })),
|
|
1527
|
+
edges: edges.map((item) => ({ ...item.payload, id: item.id }))
|
|
1528
|
+
});
|
|
1529
|
+
}
|
|
1530
|
+
}, this.#config().maxChunkAsync);
|
|
1531
|
+
llmAsyncQueue.error((error, task) => {
|
|
1532
|
+
if (error) {
|
|
1533
|
+
countObj.error++;
|
|
1534
|
+
this.#channel.warn(`[${fileName}]解析失败:
|
|
1535
|
+
${task.chunk}
|
|
1536
|
+
`, error);
|
|
1537
|
+
}
|
|
1538
|
+
});
|
|
1539
|
+
for (const item of chunkList) {
|
|
1540
|
+
llmAsyncQueue.push(item);
|
|
1541
|
+
}
|
|
1542
|
+
await llmAsyncQueue.drained();
|
|
1543
|
+
if (signal2?.aborted) {
|
|
1544
|
+
return this.deleteItem(fileName);
|
|
1545
|
+
}
|
|
1546
|
+
if (chunkList.length === countObj.error) {
|
|
1547
|
+
await super.deleteItem(fileName);
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
async deleteItem(fileName) {
|
|
1551
|
+
await super.deleteItem(fileName);
|
|
1552
|
+
await this.#util.multiDelete(
|
|
1553
|
+
this.#config().collectionList.map((item) => item.graphCollectionName),
|
|
1554
|
+
{
|
|
1555
|
+
filter: {
|
|
1556
|
+
must: [{ key: "fileName", match: { value: fileName } }],
|
|
1557
|
+
should: null
|
|
1558
|
+
}
|
|
1559
|
+
}
|
|
1560
|
+
);
|
|
1561
|
+
}
|
|
1562
|
+
async updateItem(fileName, content) {
|
|
1563
|
+
await this.deleteItem(fileName);
|
|
1564
|
+
await this.insertItem(fileName, content);
|
|
1565
|
+
}
|
|
1566
|
+
async addCollection(collection) {
|
|
1567
|
+
const activateCollectionName = this.#config().activateGraphName;
|
|
1568
|
+
await super.addCollection(collection);
|
|
1569
|
+
await this.#createCollection(collection);
|
|
1570
|
+
const newCollectionName = collection.graphCollectionName;
|
|
1571
|
+
const batchQueue = new BatchQueue2(
|
|
1572
|
+
(item) => this.#text2vec(item, collection.collectionName)
|
|
1573
|
+
);
|
|
1574
|
+
const queue = this.#util.updatePointsQueue(newCollectionName);
|
|
1575
|
+
let queueError = void 0;
|
|
1576
|
+
queue.queue.error((err) => {
|
|
1577
|
+
if (err) {
|
|
1578
|
+
queueError = err;
|
|
1579
|
+
queue.queue.killAndDrain();
|
|
1580
|
+
}
|
|
1581
|
+
});
|
|
1582
|
+
let offset;
|
|
1583
|
+
do {
|
|
1584
|
+
const { points, next_page_offset } = await this.#qdClient.scroll(
|
|
1585
|
+
activateCollectionName,
|
|
1586
|
+
{
|
|
1587
|
+
limit: 5e3,
|
|
1588
|
+
with_payload: true,
|
|
1589
|
+
with_vector: false,
|
|
1590
|
+
offset
|
|
1591
|
+
}
|
|
1592
|
+
);
|
|
1593
|
+
await batchQueue.then(
|
|
1594
|
+
Promise.all([
|
|
1595
|
+
Promise.all(
|
|
1596
|
+
points.map(async (point) => {
|
|
1597
|
+
let embeddingChunk;
|
|
1598
|
+
if (point.payload["kind"] === "node") {
|
|
1599
|
+
embeddingChunk = entryFormat(
|
|
1600
|
+
point.payload,
|
|
1601
|
+
this.#config().name,
|
|
1602
|
+
nodeVectorString(point.payload),
|
|
1603
|
+
collection.embeddingTemplate?.node
|
|
1604
|
+
);
|
|
1605
|
+
} else if (point.payload["kind"] === "edge") {
|
|
1606
|
+
embeddingChunk = entryFormat(
|
|
1607
|
+
point.payload,
|
|
1608
|
+
this.#config().name,
|
|
1609
|
+
edgeVectorString(point.payload),
|
|
1610
|
+
collection.embeddingTemplate?.edge
|
|
1611
|
+
);
|
|
1612
|
+
} else {
|
|
1613
|
+
embeddingChunk = point.payload["keyword"];
|
|
1614
|
+
}
|
|
1615
|
+
const vector = await batchQueue.push(embeddingChunk);
|
|
1616
|
+
queue.push({
|
|
1617
|
+
id: point.id,
|
|
1618
|
+
payload: {
|
|
1619
|
+
...point.payload,
|
|
1620
|
+
embeddingChunk
|
|
1621
|
+
},
|
|
1622
|
+
vector: {
|
|
1623
|
+
chunk: vector
|
|
1624
|
+
}
|
|
1625
|
+
});
|
|
1626
|
+
})
|
|
1627
|
+
)
|
|
1628
|
+
])
|
|
1629
|
+
);
|
|
1630
|
+
offset = next_page_offset;
|
|
1631
|
+
} while (offset);
|
|
1632
|
+
queue.complete();
|
|
1633
|
+
await queue.queue.drained();
|
|
1634
|
+
if (queueError) {
|
|
1635
|
+
await this.#qdClient.deleteCollection(collection.collectionName);
|
|
1636
|
+
await this.#qdClient.deleteCollection(collection.graphCollectionName);
|
|
1637
|
+
throw queueError;
|
|
1638
|
+
}
|
|
1639
|
+
await this.#qdClient.setActivateCollection(
|
|
1640
|
+
newCollectionName,
|
|
1641
|
+
this.#config().activateGraphName
|
|
1642
|
+
);
|
|
1643
|
+
}
|
|
1644
|
+
async deleteCollection(collectionName) {
|
|
1645
|
+
const result = await super.deleteCollection(collectionName);
|
|
1646
|
+
if (!result) {
|
|
1647
|
+
return result;
|
|
1648
|
+
}
|
|
1649
|
+
const collection = this.#config().collectionList.find(
|
|
1650
|
+
(item) => item.collectionName === collectionName
|
|
1651
|
+
);
|
|
1652
|
+
await this.#qdClient.deleteCollection(collection.graphCollectionName);
|
|
1653
|
+
return true;
|
|
1654
|
+
}
|
|
1655
|
+
async changeActivateCollection(collectionName) {
|
|
1656
|
+
await super.changeActivateCollection(collectionName);
|
|
1657
|
+
await this.#qdClient.setActivateCollection(
|
|
1658
|
+
getGraphCollectionName(collectionName),
|
|
1659
|
+
this.#config().activateGraphName
|
|
1660
|
+
);
|
|
1661
|
+
}
|
|
1662
|
+
export() {
|
|
1663
|
+
return this.#util.export(
|
|
1664
|
+
this.#config().collectionList.flatMap(
|
|
1665
|
+
({ collectionName, graphCollectionName }) => [
|
|
1666
|
+
collectionName,
|
|
1667
|
+
graphCollectionName
|
|
1668
|
+
]
|
|
1669
|
+
)
|
|
1670
|
+
);
|
|
1671
|
+
}
|
|
1672
|
+
async destroy() {
|
|
1673
|
+
await super.destroy();
|
|
1674
|
+
return this.#util.destroyKnowledge(
|
|
1675
|
+
this.#config().collectionList.map(
|
|
1676
|
+
({ graphCollectionName }) => graphCollectionName
|
|
1677
|
+
)
|
|
1678
|
+
);
|
|
1679
|
+
}
|
|
1680
|
+
};
|
|
1681
|
+
|
|
1682
|
+
// packages/knowledge/knowledge.manager.service.ts
|
|
1683
|
+
import { FileParserService } from "@shenghuabi/knowledge/file-parser";
|
|
1684
|
+
|
|
1685
|
+
// packages/knowledge/graph/graph.handle.service.ts
|
|
1686
|
+
import { QdrantClientService as QdrantClientService8 } from "@shenghuabi/knowledge/qdrant";
|
|
1687
|
+
import { inject as inject8, Injector as Injector2 } from "static-injector";
|
|
1688
|
+
import { v4 as v45 } from "uuid";
|
|
1689
|
+
import * as v12 from "valibot";
|
|
1690
|
+
import { isTruthy as isTruthy3, LogToken as LogToken5 } from "@shenghuabi/knowledge/util";
|
|
1691
|
+
var MAX_LIMIT = 99999;
|
|
1692
|
+
var FilterEdge = {
|
|
1693
|
+
key: "kind",
|
|
1694
|
+
match: {
|
|
1695
|
+
value: "edge"
|
|
1696
|
+
}
|
|
1697
|
+
};
|
|
1698
|
+
var FilterNode = {
|
|
1699
|
+
key: "kind",
|
|
1700
|
+
match: {
|
|
1701
|
+
value: "node"
|
|
1702
|
+
}
|
|
1703
|
+
};
|
|
1704
|
+
var GraphHandleService = class {
|
|
1705
|
+
#text2vec = inject8(Text2VecToken);
|
|
1706
|
+
#config = inject8(ConfigToken);
|
|
1707
|
+
#util = inject8(KnowledgeUtilService);
|
|
1708
|
+
#qdClient = inject8(QdrantClientService8);
|
|
1709
|
+
#channel = inject8(LogToken5);
|
|
1710
|
+
#injector = inject8(Injector2);
|
|
1711
|
+
#graphUtil = inject8(GraphKnowledgeUtilService);
|
|
1712
|
+
/** 拆分节点
|
|
1713
|
+
*/
|
|
1714
|
+
async splitNode(options) {
|
|
1715
|
+
await this.#graphUtil.updateContext(async () => {
|
|
1716
|
+
const [{ points: nodes }, { points: edges }] = await Promise.all([
|
|
1717
|
+
this.#qdClient.scroll(this.#config().activateGraphName, {
|
|
1718
|
+
limit: MAX_LIMIT,
|
|
1719
|
+
filter: {
|
|
1720
|
+
must: [FilterNode, { key: "name", match: { value: options.node } }]
|
|
1721
|
+
},
|
|
1722
|
+
with_payload: true,
|
|
1723
|
+
with_vector: false
|
|
1724
|
+
}),
|
|
1725
|
+
this.#qdClient.scroll(this.#config().activateGraphName, {
|
|
1726
|
+
limit: MAX_LIMIT,
|
|
1727
|
+
filter: {
|
|
1728
|
+
must: [FilterEdge],
|
|
1729
|
+
should: [
|
|
1730
|
+
{ key: "source", match: { value: options.node } },
|
|
1731
|
+
{ key: "target", match: { value: options.node } }
|
|
1732
|
+
]
|
|
1733
|
+
},
|
|
1734
|
+
with_payload: true,
|
|
1735
|
+
with_vector: false
|
|
1736
|
+
})
|
|
1737
|
+
]);
|
|
1738
|
+
const [updateNodes, updateEdges] = await Promise.all([
|
|
1739
|
+
Promise.all(
|
|
1740
|
+
nodes.flatMap(
|
|
1741
|
+
(node) => options.list.map(async (nodeName) => {
|
|
1742
|
+
const payload = { ...node.payload, name: nodeName };
|
|
1743
|
+
return {
|
|
1744
|
+
payload,
|
|
1745
|
+
id: v45()
|
|
1746
|
+
};
|
|
1747
|
+
})
|
|
1748
|
+
)
|
|
1749
|
+
),
|
|
1750
|
+
Promise.all(
|
|
1751
|
+
edges.flatMap(
|
|
1752
|
+
(edge) => options.list.map(async (nodeName) => {
|
|
1753
|
+
const payload = edge.payload["source"] === options.node ? { ...edge.payload, source: nodeName } : { ...edge.payload, target: nodeName };
|
|
1754
|
+
payload["name"] = getEdgeName(
|
|
1755
|
+
payload["source"],
|
|
1756
|
+
payload["target"]
|
|
1757
|
+
);
|
|
1758
|
+
return {
|
|
1759
|
+
payload,
|
|
1760
|
+
id: v45()
|
|
1761
|
+
};
|
|
1762
|
+
})
|
|
1763
|
+
)
|
|
1764
|
+
)
|
|
1765
|
+
]);
|
|
1766
|
+
return {
|
|
1767
|
+
upsert: {
|
|
1768
|
+
nodes: updateNodes,
|
|
1769
|
+
edges: updateEdges
|
|
1770
|
+
},
|
|
1771
|
+
delete: {
|
|
1772
|
+
nodes,
|
|
1773
|
+
edges
|
|
1774
|
+
}
|
|
1775
|
+
};
|
|
1776
|
+
});
|
|
1777
|
+
}
|
|
1778
|
+
/** 合并节点 */
|
|
1779
|
+
async mergeNode(options) {
|
|
1780
|
+
const listToObj = options.list.reduce(
|
|
1781
|
+
(obj, item) => {
|
|
1782
|
+
obj[item] = true;
|
|
1783
|
+
return obj;
|
|
1784
|
+
},
|
|
1785
|
+
{}
|
|
1786
|
+
);
|
|
1787
|
+
await this.#graphUtil.updateContext(async () => {
|
|
1788
|
+
const [{ points: nodes }, { points: edges }] = await Promise.all([
|
|
1789
|
+
this.#qdClient.scroll(this.#config().activateGraphName, {
|
|
1790
|
+
limit: MAX_LIMIT,
|
|
1791
|
+
filter: {
|
|
1792
|
+
must: [FilterNode, { key: "name", match: { any: options.list } }]
|
|
1793
|
+
},
|
|
1794
|
+
with_payload: true,
|
|
1795
|
+
with_vector: false
|
|
1796
|
+
}),
|
|
1797
|
+
this.#qdClient.scroll(this.#config().activateGraphName, {
|
|
1798
|
+
limit: MAX_LIMIT,
|
|
1799
|
+
filter: {
|
|
1800
|
+
must: [FilterEdge],
|
|
1801
|
+
should: [
|
|
1802
|
+
{ key: "source", match: { any: options.list } },
|
|
1803
|
+
{ key: "target", match: { any: options.list } }
|
|
1804
|
+
]
|
|
1805
|
+
},
|
|
1806
|
+
with_payload: true,
|
|
1807
|
+
with_vector: false
|
|
1808
|
+
})
|
|
1809
|
+
]);
|
|
1810
|
+
const [updateNodes, updateEdges] = await Promise.all([
|
|
1811
|
+
Promise.all(
|
|
1812
|
+
nodes.map(async (node) => {
|
|
1813
|
+
const payload = { ...node.payload, name: options.node };
|
|
1814
|
+
return {
|
|
1815
|
+
payload,
|
|
1816
|
+
id: v45()
|
|
1817
|
+
};
|
|
1818
|
+
})
|
|
1819
|
+
),
|
|
1820
|
+
Promise.all(
|
|
1821
|
+
edges.map(async (edge) => {
|
|
1822
|
+
const hasSource = listToObj[edge.payload["source"]];
|
|
1823
|
+
const hasTarget = listToObj[edge.payload["target"]];
|
|
1824
|
+
if (hasSource && hasTarget) {
|
|
1825
|
+
return void 0;
|
|
1826
|
+
}
|
|
1827
|
+
const payload = hasSource ? { ...edge.payload, source: options.node } : { ...edge.payload, target: options.node };
|
|
1828
|
+
payload["name"] = getEdgeName(payload["source"], payload["target"]);
|
|
1829
|
+
return {
|
|
1830
|
+
payload,
|
|
1831
|
+
id: v45()
|
|
1832
|
+
};
|
|
1833
|
+
})
|
|
1834
|
+
).then((list) => list.filter(isTruthy3))
|
|
1835
|
+
]);
|
|
1836
|
+
return {
|
|
1837
|
+
upsert: {
|
|
1838
|
+
nodes: updateNodes,
|
|
1839
|
+
edges: updateEdges
|
|
1840
|
+
},
|
|
1841
|
+
delete: {
|
|
1842
|
+
nodes,
|
|
1843
|
+
edges
|
|
1844
|
+
}
|
|
1845
|
+
};
|
|
1846
|
+
});
|
|
1847
|
+
}
|
|
1848
|
+
/** 虽然可以修改其他的,但是只允许修改描述 */
|
|
1849
|
+
async changeNodeDescription(item) {
|
|
1850
|
+
const payload = v12.parse(NodeItemDefine, item);
|
|
1851
|
+
const id = payload.id;
|
|
1852
|
+
delete payload.id;
|
|
1853
|
+
await this.#graphUtil.updateContext(async () => ({
|
|
1854
|
+
upsert: {
|
|
1855
|
+
nodes: [
|
|
1856
|
+
{
|
|
1857
|
+
id,
|
|
1858
|
+
payload
|
|
1859
|
+
}
|
|
1860
|
+
]
|
|
1861
|
+
}
|
|
1862
|
+
}));
|
|
1863
|
+
}
|
|
1864
|
+
/**
|
|
1865
|
+
* 修改边,如果关系修改了不需要改边 */
|
|
1866
|
+
async changeEdge(item) {
|
|
1867
|
+
const id = item.id;
|
|
1868
|
+
const payload = v12.parse(EdgeItemDefine, item);
|
|
1869
|
+
delete item.id;
|
|
1870
|
+
await this.#graphUtil.updateContext(async () => ({
|
|
1871
|
+
upsert: {
|
|
1872
|
+
edges: [
|
|
1873
|
+
{
|
|
1874
|
+
id,
|
|
1875
|
+
payload
|
|
1876
|
+
}
|
|
1877
|
+
]
|
|
1878
|
+
}
|
|
1879
|
+
}));
|
|
1880
|
+
}
|
|
1881
|
+
/** 可以添加节点/边 */
|
|
1882
|
+
async addNodeItem(input) {
|
|
1883
|
+
await this.#graphUtil.updateContext(async () => {
|
|
1884
|
+
const nodes = (input.nodes ?? []).map((node) => {
|
|
1885
|
+
const payload = v12.parse(NodeItemNewDefine, node);
|
|
1886
|
+
return {
|
|
1887
|
+
id: v45(),
|
|
1888
|
+
payload
|
|
1889
|
+
};
|
|
1890
|
+
});
|
|
1891
|
+
const edges = (input.edges ?? []).map((edge) => {
|
|
1892
|
+
const payload = v12.parse(EdgeItemNewDefine, edge);
|
|
1893
|
+
return {
|
|
1894
|
+
id: v45(),
|
|
1895
|
+
payload
|
|
1896
|
+
};
|
|
1897
|
+
});
|
|
1898
|
+
return {
|
|
1899
|
+
upsert: { nodes, edges }
|
|
1900
|
+
};
|
|
1901
|
+
});
|
|
1902
|
+
}
|
|
1903
|
+
/** 删除节点的一条 */
|
|
1904
|
+
async deleteNodeItem(item) {
|
|
1905
|
+
await this.#graphUtil.updateContext(async () => {
|
|
1906
|
+
const { points } = await this.#qdClient.scroll(
|
|
1907
|
+
this.#config().activateGraphName,
|
|
1908
|
+
{
|
|
1909
|
+
limit: 1,
|
|
1910
|
+
filter: {
|
|
1911
|
+
must: [FilterNode, { key: "name", match: { value: item.name } }]
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
);
|
|
1915
|
+
return {
|
|
1916
|
+
delete: {
|
|
1917
|
+
nodes: [item],
|
|
1918
|
+
edges: points.length === 1 ? {
|
|
1919
|
+
filter: {
|
|
1920
|
+
must: [FilterEdge],
|
|
1921
|
+
should: [
|
|
1922
|
+
{ key: "source", match: { value: item.name } },
|
|
1923
|
+
{ key: "target", match: { value: item.name } }
|
|
1924
|
+
]
|
|
1925
|
+
}
|
|
1926
|
+
} : []
|
|
1927
|
+
}
|
|
1928
|
+
};
|
|
1929
|
+
});
|
|
1930
|
+
}
|
|
1931
|
+
/** 删除整个边(边不影响节点) */
|
|
1932
|
+
async deleteEdge(item) {
|
|
1933
|
+
await this.#graphUtil.updateContext(async () => ({
|
|
1934
|
+
delete: {
|
|
1935
|
+
edges: [item]
|
|
1936
|
+
}
|
|
1937
|
+
}));
|
|
1938
|
+
}
|
|
1939
|
+
/** 删除整个节点(对应边也删除) */
|
|
1940
|
+
async deleteNodeByName(name) {
|
|
1941
|
+
await this.#graphUtil.updateContext(async () => ({
|
|
1942
|
+
delete: {
|
|
1943
|
+
nodes: {
|
|
1944
|
+
filter: {
|
|
1945
|
+
must: [FilterNode, { key: "name", match: { value: name } }],
|
|
1946
|
+
should: null
|
|
1947
|
+
}
|
|
1948
|
+
},
|
|
1949
|
+
edges: {
|
|
1950
|
+
filter: {
|
|
1951
|
+
must: [FilterEdge],
|
|
1952
|
+
should: [
|
|
1953
|
+
{ key: "source", match: { value: name } },
|
|
1954
|
+
{ key: "target", match: { value: name } }
|
|
1955
|
+
]
|
|
1956
|
+
}
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
}));
|
|
1960
|
+
}
|
|
1961
|
+
};
|
|
1962
|
+
|
|
1963
|
+
// packages/knowledge/graph/graph.service.ts
|
|
1964
|
+
import { createInjector, inject as inject10, Injector as Injector3 } from "static-injector";
|
|
1965
|
+
|
|
1966
|
+
// packages/knowledge/graph/graph.query.service.ts
|
|
1967
|
+
import { QdrantClientService as QdrantClientService9 } from "@shenghuabi/knowledge/qdrant";
|
|
1968
|
+
import { inject as inject9 } from "static-injector";
|
|
1969
|
+
import { differenceBy, uniqBy as uniqBy2 } from "lodash-es";
|
|
1970
|
+
|
|
1971
|
+
// packages/knowledge/graph/util/graph-util.ts
|
|
1972
|
+
function getNodeStrList(item, index) {
|
|
1973
|
+
return [
|
|
1974
|
+
index + 1,
|
|
1975
|
+
item.name,
|
|
1976
|
+
item.type,
|
|
1977
|
+
item.list.map((item2) => item2.description).join(";"),
|
|
1978
|
+
(item.degree * 100).toFixed(0)
|
|
1979
|
+
];
|
|
1980
|
+
}
|
|
1981
|
+
function getEdgeStrList(item, i) {
|
|
1982
|
+
return [
|
|
1983
|
+
i + 1,
|
|
1984
|
+
item.source,
|
|
1985
|
+
item.target,
|
|
1986
|
+
item.list.map((item2) => item2.description).join(";"),
|
|
1987
|
+
(item.degree * 100).toFixed(0)
|
|
1988
|
+
];
|
|
1989
|
+
}
|
|
1990
|
+
|
|
1991
|
+
// packages/knowledge/graph/util/format-attr.ts
|
|
1992
|
+
import * as v13 from "valibot";
|
|
1993
|
+
import { countBy, maxBy } from "lodash-es";
|
|
1994
|
+
function getNodeType(list) {
|
|
1995
|
+
const data = countBy(list, (item) => item.type);
|
|
1996
|
+
delete data["未知"];
|
|
1997
|
+
return maxBy(Object.entries(data), (a) => a[1])?.[0] ?? "未知";
|
|
1998
|
+
}
|
|
1999
|
+
function formatNodeAttr2(list) {
|
|
2000
|
+
let allCount = 0;
|
|
2001
|
+
const attrList = list.map(({ data: data2, score: score2 }) => {
|
|
2002
|
+
allCount += score2;
|
|
2003
|
+
return v13.parse(NodePayloadDefine, data2);
|
|
2004
|
+
});
|
|
2005
|
+
const score = allCount / list.length;
|
|
2006
|
+
const data = countBy(attrList, (item) => item.type);
|
|
2007
|
+
delete data["未知"];
|
|
2008
|
+
const type = getNodeType(attrList);
|
|
2009
|
+
attrList.forEach((item) => {
|
|
2010
|
+
item.type = type;
|
|
2011
|
+
});
|
|
2012
|
+
return {
|
|
2013
|
+
list: attrList,
|
|
2014
|
+
name: attrList[0].name,
|
|
2015
|
+
type: getNodeType(attrList),
|
|
2016
|
+
degree: score
|
|
2017
|
+
};
|
|
2018
|
+
}
|
|
2019
|
+
function formatEdgeAttr2(list) {
|
|
2020
|
+
let allCount = 0;
|
|
2021
|
+
const attrList = list.map(({ data, score: score2 }) => {
|
|
2022
|
+
allCount += score2;
|
|
2023
|
+
return v13.parse(EdgePayloadDefine, data);
|
|
2024
|
+
});
|
|
2025
|
+
const score = allCount / list.length;
|
|
2026
|
+
return {
|
|
2027
|
+
list: attrList,
|
|
2028
|
+
name: attrList[0].name,
|
|
2029
|
+
source: attrList[0].source,
|
|
2030
|
+
target: attrList[0].target,
|
|
2031
|
+
degree: score
|
|
2032
|
+
};
|
|
2033
|
+
}
|
|
2034
|
+
|
|
2035
|
+
// packages/knowledge/graph/define/query.ts
|
|
2036
|
+
import * as v14 from "valibot";
|
|
2037
|
+
var GraphRelationQueryDefine = v14.pipe(
|
|
2038
|
+
v14.object({
|
|
2039
|
+
node: v14.optional(v14.string()),
|
|
2040
|
+
edge: v14.optional(v14.string())
|
|
2041
|
+
}),
|
|
2042
|
+
v14.forward(
|
|
2043
|
+
v14.partialCheck(
|
|
2044
|
+
[["node"], ["edge"]],
|
|
2045
|
+
(input) => typeof input.node === "string" || typeof input.edge === "string",
|
|
2046
|
+
"节点或边必须存在一个"
|
|
2047
|
+
),
|
|
2048
|
+
["node"]
|
|
2049
|
+
)
|
|
2050
|
+
);
|
|
2051
|
+
|
|
2052
|
+
// packages/knowledge/graph/graph.query.service.ts
|
|
2053
|
+
import * as v15 from "valibot";
|
|
2054
|
+
var GraphQueryService = class {
|
|
2055
|
+
#qdClient = inject9(QdrantClientService9);
|
|
2056
|
+
#config = inject9(ConfigToken);
|
|
2057
|
+
#graphChange = inject9(GraphLocalService);
|
|
2058
|
+
#queryParams = inject9(QueryParamsToken);
|
|
2059
|
+
#text2vec = inject9(Text2VecToken);
|
|
2060
|
+
#reranker = inject9(ReRankerToken);
|
|
2061
|
+
/** 返回限制索引 */
|
|
2062
|
+
#listLimit(list, strListFn, limit) {
|
|
2063
|
+
let count = 0;
|
|
2064
|
+
const strList = list.map(strListFn);
|
|
2065
|
+
for (let i = 0; i < strList.length; i++) {
|
|
2066
|
+
const str = strList[i];
|
|
2067
|
+
count += str.length;
|
|
2068
|
+
if (count > limit) {
|
|
2069
|
+
return list.slice(0, i);
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
return list;
|
|
2073
|
+
}
|
|
2074
|
+
#contextLimit(context) {
|
|
2075
|
+
return {
|
|
2076
|
+
nodes: this.#listLimit(
|
|
2077
|
+
context.nodes,
|
|
2078
|
+
(item, i) => getNodeStrList(item, i).join("|"),
|
|
2079
|
+
this.#queryParams.lengthLimit.node
|
|
2080
|
+
),
|
|
2081
|
+
edges: this.#listLimit(
|
|
2082
|
+
context.edges,
|
|
2083
|
+
(item, i) => getEdgeStrList(item, i).join("|"),
|
|
2084
|
+
this.#queryParams.lengthLimit.edge
|
|
2085
|
+
),
|
|
2086
|
+
chunks: this.#listLimit(
|
|
2087
|
+
context.chunks,
|
|
2088
|
+
(list) => list.chunk,
|
|
2089
|
+
this.#queryParams.lengthLimit.chunk
|
|
2090
|
+
)
|
|
2091
|
+
};
|
|
2092
|
+
}
|
|
2093
|
+
async #queryGraphCollection(content, kind) {
|
|
2094
|
+
return this.#qdClient.search(this.#config().activateGraphName, {
|
|
2095
|
+
limit: this.#queryParams.topK * this.#reranker.getQueryRatio(),
|
|
2096
|
+
vector: {
|
|
2097
|
+
name: "chunk",
|
|
2098
|
+
vector: await this.#text2vec(
|
|
2099
|
+
content,
|
|
2100
|
+
this.#config().activateCollection
|
|
2101
|
+
)
|
|
2102
|
+
},
|
|
2103
|
+
filter: {
|
|
2104
|
+
must: {
|
|
2105
|
+
key: "kind",
|
|
2106
|
+
match: {
|
|
2107
|
+
value: kind
|
|
2108
|
+
}
|
|
2109
|
+
}
|
|
2110
|
+
},
|
|
2111
|
+
with_payload: true
|
|
2112
|
+
// with_lookup: true,
|
|
2113
|
+
}).then(
|
|
2114
|
+
(item) => item.map((item2) => ({ ...item2.payload, id: item2.id }))
|
|
2115
|
+
);
|
|
2116
|
+
}
|
|
2117
|
+
async #findChunkDataById(ids) {
|
|
2118
|
+
return this.#qdClient.retrieve(this.#config().activateName, { ids, with_payload: true }).then(
|
|
2119
|
+
(item) => item.map(
|
|
2120
|
+
(item2) => ({
|
|
2121
|
+
...item2.payload,
|
|
2122
|
+
knowledge: this.#config().name
|
|
2123
|
+
})
|
|
2124
|
+
)
|
|
2125
|
+
);
|
|
2126
|
+
}
|
|
2127
|
+
#getEdgeByNode(nodes) {
|
|
2128
|
+
const list = this.#graphChange.getEdgeByNode(nodes);
|
|
2129
|
+
return this.#qdClient.retrieve(this.#config().activateGraphName, {
|
|
2130
|
+
ids: list.map((item) => item.id)
|
|
2131
|
+
}).then(
|
|
2132
|
+
(list2) => list2.map(
|
|
2133
|
+
(item) => ({ ...item.payload, id: item.id })
|
|
2134
|
+
)
|
|
2135
|
+
);
|
|
2136
|
+
}
|
|
2137
|
+
#getNodeByEdge(edges) {
|
|
2138
|
+
const list = this.#graphChange.getNodeByEdge(edges);
|
|
2139
|
+
return this.#qdClient.retrieve(this.#config().activateGraphName, {
|
|
2140
|
+
ids: list.map((item) => item.id)
|
|
2141
|
+
}).then(
|
|
2142
|
+
(list2) => list2.map(
|
|
2143
|
+
(item) => ({ ...item.payload, id: item.id })
|
|
2144
|
+
)
|
|
2145
|
+
);
|
|
2146
|
+
}
|
|
2147
|
+
async #queryLocal(keywords) {
|
|
2148
|
+
const nodeGroupResult = await this.#queryGraphCollection(
|
|
2149
|
+
keywords,
|
|
2150
|
+
"node"
|
|
2151
|
+
);
|
|
2152
|
+
const chunkResult = await this.#findChunkDataById(
|
|
2153
|
+
nodeGroupResult.map((item) => item.chunkId)
|
|
2154
|
+
);
|
|
2155
|
+
return {
|
|
2156
|
+
nodes: nodeGroupResult,
|
|
2157
|
+
edges: await this.#getEdgeByNode(nodeGroupResult),
|
|
2158
|
+
chunks: chunkResult
|
|
2159
|
+
};
|
|
2160
|
+
}
|
|
2161
|
+
async #queryGlobal(keywords) {
|
|
2162
|
+
const edgeGroupResult = await this.#queryGraphCollection(
|
|
2163
|
+
keywords,
|
|
2164
|
+
"edge"
|
|
2165
|
+
);
|
|
2166
|
+
const chunkResult = await this.#findChunkDataById(
|
|
2167
|
+
edgeGroupResult.map((item) => item["chunkId"])
|
|
2168
|
+
);
|
|
2169
|
+
return {
|
|
2170
|
+
nodes: await this.#getNodeByEdge(edgeGroupResult),
|
|
2171
|
+
edges: edgeGroupResult,
|
|
2172
|
+
chunks: chunkResult
|
|
2173
|
+
};
|
|
2174
|
+
}
|
|
2175
|
+
async query(params, question) {
|
|
2176
|
+
params = v15.parse(GraphRelationQueryDefine, params);
|
|
2177
|
+
await this.#graphChange.loadDataInitGraph$$();
|
|
2178
|
+
let context;
|
|
2179
|
+
if (params.node && params.edge) {
|
|
2180
|
+
const context1 = await this.#queryLocal(params.node);
|
|
2181
|
+
const context2 = await this.#queryGlobal(params.edge);
|
|
2182
|
+
const ctx2NodeExtra = differenceBy(
|
|
2183
|
+
context2.nodes,
|
|
2184
|
+
context1.nodes,
|
|
2185
|
+
(item) => item.id
|
|
2186
|
+
);
|
|
2187
|
+
const ctx1EdgeExtra = differenceBy(
|
|
2188
|
+
context1.edges,
|
|
2189
|
+
context2.edges,
|
|
2190
|
+
(item) => item.id
|
|
2191
|
+
);
|
|
2192
|
+
context = {
|
|
2193
|
+
nodes: [...context1.nodes, ...ctx2NodeExtra],
|
|
2194
|
+
edges: [...context2.edges, ...ctx1EdgeExtra],
|
|
2195
|
+
chunks: uniqBy2(
|
|
2196
|
+
[...context1.chunks, ...context2.chunks],
|
|
2197
|
+
(a) => a.chunk
|
|
2198
|
+
)
|
|
2199
|
+
};
|
|
2200
|
+
} else if (params.node) {
|
|
2201
|
+
context = await this.#queryLocal(params.node);
|
|
2202
|
+
} else if (params.edge) {
|
|
2203
|
+
context = await this.#queryGlobal(params.edge);
|
|
2204
|
+
} else {
|
|
2205
|
+
throw new Error("");
|
|
2206
|
+
}
|
|
2207
|
+
const nodeSortedList = await this.#reranker.run({
|
|
2208
|
+
value: question,
|
|
2209
|
+
docs: context.nodes.map((item) => item.embeddingChunk)
|
|
2210
|
+
});
|
|
2211
|
+
const nodeSorted = nodeSortedList.slice(0, Math.ceil(nodeSortedList.length * 0.3)).reduce(
|
|
2212
|
+
(obj, item) => {
|
|
2213
|
+
const data = context.nodes[item.index];
|
|
2214
|
+
obj[data.name] ??= [];
|
|
2215
|
+
obj[data.name].push({ data, score: item.score });
|
|
2216
|
+
return obj;
|
|
2217
|
+
},
|
|
2218
|
+
{}
|
|
2219
|
+
);
|
|
2220
|
+
const edgeSortList = await this.#reranker.run({
|
|
2221
|
+
value: question,
|
|
2222
|
+
docs: context.edges.map((item) => item.embeddingChunk)
|
|
2223
|
+
});
|
|
2224
|
+
const edgeSorted = edgeSortList.slice(0, Math.ceil(nodeSortedList.length * 0.3)).reduce(
|
|
2225
|
+
(obj, item) => {
|
|
2226
|
+
const data = context.edges[item.index];
|
|
2227
|
+
obj[data.name] ??= [];
|
|
2228
|
+
obj[data.name].push({ data, score: item.score });
|
|
2229
|
+
return obj;
|
|
2230
|
+
},
|
|
2231
|
+
{}
|
|
2232
|
+
);
|
|
2233
|
+
return this.#contextLimit({
|
|
2234
|
+
nodes: Object.values(nodeSorted).reduce((allList, list) => {
|
|
2235
|
+
let length = 0;
|
|
2236
|
+
const index = list.findIndex((value, index2) => {
|
|
2237
|
+
length += value.data.description.length;
|
|
2238
|
+
return length > this.#queryParams.lengthLimit.nodeDescription;
|
|
2239
|
+
});
|
|
2240
|
+
allList.push(
|
|
2241
|
+
formatNodeAttr2(list.slice(0, index == -1 ? list.length : index))
|
|
2242
|
+
);
|
|
2243
|
+
return allList;
|
|
2244
|
+
}, []).sort((a, b) => b.degree - a.degree),
|
|
2245
|
+
edges: Object.values(edgeSorted).reduce((allList, list) => {
|
|
2246
|
+
let length = 0;
|
|
2247
|
+
const index = list.findIndex((value) => {
|
|
2248
|
+
length += value.data.description.length;
|
|
2249
|
+
return length > this.#queryParams.lengthLimit.nodeDescription;
|
|
2250
|
+
});
|
|
2251
|
+
allList.push(
|
|
2252
|
+
formatEdgeAttr2(list.slice(0, index == -1 ? list.length : index))
|
|
2253
|
+
);
|
|
2254
|
+
return allList;
|
|
2255
|
+
}, []).sort((a, b) => b.degree - a.degree),
|
|
2256
|
+
chunks: (await this.#reranker.run({
|
|
2257
|
+
value: question,
|
|
2258
|
+
docs: context.chunks.map((item) => item.embeddingChunk)
|
|
2259
|
+
})).map((item) => context.chunks[item.index])
|
|
2260
|
+
});
|
|
2261
|
+
}
|
|
2262
|
+
async searchNode(str, selectedList) {
|
|
2263
|
+
await this.#graphChange.loadDataInitGraph$$();
|
|
2264
|
+
const limit = 20;
|
|
2265
|
+
const list = [];
|
|
2266
|
+
for (const nodeName of this.#graphChange.getGraph().nodes()) {
|
|
2267
|
+
if (!selectedList.includes(nodeName) && nodeName.includes(str)) {
|
|
2268
|
+
list.push(nodeName);
|
|
2269
|
+
if (limit === list.length) {
|
|
2270
|
+
return list;
|
|
2271
|
+
}
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
return list;
|
|
2275
|
+
}
|
|
2276
|
+
async getFileNameList() {
|
|
2277
|
+
const result = await this.#qdClient.queryGroups(
|
|
2278
|
+
this.#config().activateName,
|
|
2279
|
+
{
|
|
2280
|
+
limit: 9999,
|
|
2281
|
+
group_by: "fileName",
|
|
2282
|
+
group_size: 1,
|
|
2283
|
+
with_payload: []
|
|
2284
|
+
}
|
|
2285
|
+
);
|
|
2286
|
+
return result.groups;
|
|
2287
|
+
}
|
|
2288
|
+
async getChunkContent(fileName) {
|
|
2289
|
+
const { points } = await this.#qdClient.query(this.#config().activateName, {
|
|
2290
|
+
limit: 9999,
|
|
2291
|
+
filter: {
|
|
2292
|
+
must: {
|
|
2293
|
+
key: "fileName",
|
|
2294
|
+
match: { value: fileName }
|
|
2295
|
+
}
|
|
2296
|
+
},
|
|
2297
|
+
with_payload: true
|
|
2298
|
+
});
|
|
2299
|
+
return points.map((item) => ({
|
|
2300
|
+
...item,
|
|
2301
|
+
payload: {
|
|
2302
|
+
...item.payload,
|
|
2303
|
+
knowledge: this.#config().name
|
|
2304
|
+
}
|
|
2305
|
+
}));
|
|
2306
|
+
}
|
|
2307
|
+
async getGraphData() {
|
|
2308
|
+
await this.#graphChange.loadDataInitGraph$$();
|
|
2309
|
+
return this.#graphChange.getGraph().export();
|
|
2310
|
+
}
|
|
2311
|
+
async queryNode(list, options) {
|
|
2312
|
+
return this.#qdClient.scroll(this.#config().activateGraphName, {
|
|
2313
|
+
limit: options.nodeSizeLimit,
|
|
2314
|
+
filter: {
|
|
2315
|
+
must: [
|
|
2316
|
+
{
|
|
2317
|
+
key: "kind",
|
|
2318
|
+
match: {
|
|
2319
|
+
value: "node"
|
|
2320
|
+
}
|
|
2321
|
+
},
|
|
2322
|
+
{
|
|
2323
|
+
key: "name",
|
|
2324
|
+
match: {
|
|
2325
|
+
any: list
|
|
2326
|
+
}
|
|
2327
|
+
}
|
|
2328
|
+
]
|
|
2329
|
+
},
|
|
2330
|
+
with_payload: true
|
|
2331
|
+
// with_lookup: true,
|
|
2332
|
+
}).then((item) => item.points);
|
|
2333
|
+
}
|
|
2334
|
+
async fuzzyQueryNode(content, options) {
|
|
2335
|
+
return this.#qdClient.searchPointGroups(this.#config().activateGraphName, {
|
|
2336
|
+
limit: options.nodeLimit,
|
|
2337
|
+
score_threshold: options.score,
|
|
2338
|
+
vector: {
|
|
2339
|
+
name: "chunk",
|
|
2340
|
+
vector: await this.#text2vec(
|
|
2341
|
+
content,
|
|
2342
|
+
this.#config().activateCollection
|
|
2343
|
+
)
|
|
2344
|
+
},
|
|
2345
|
+
group_by: "name",
|
|
2346
|
+
group_size: options.nodeSizeLimit,
|
|
2347
|
+
filter: {
|
|
2348
|
+
must: {
|
|
2349
|
+
key: "kind",
|
|
2350
|
+
match: {
|
|
2351
|
+
value: "node"
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
2354
|
+
},
|
|
2355
|
+
with_payload: true
|
|
2356
|
+
// with_lookup: true,
|
|
2357
|
+
}).then((item) => item.groups);
|
|
2358
|
+
}
|
|
2359
|
+
};
|
|
2360
|
+
|
|
2361
|
+
// packages/knowledge/graph/graph.service.ts
|
|
2362
|
+
var GraphService = class {
|
|
2363
|
+
local = inject10(GraphLocalService);
|
|
2364
|
+
#knowledge = inject10(GraphHandleService);
|
|
2365
|
+
knowledge = inject10(GraphKnolwdgeService);
|
|
2366
|
+
#injector = inject10(Injector3);
|
|
2367
|
+
graphExport$$ = this.local.graphExport$$;
|
|
2368
|
+
createQuery(queryParams) {
|
|
2369
|
+
return createInjector({
|
|
2370
|
+
providers: [
|
|
2371
|
+
GraphQueryService,
|
|
2372
|
+
{ provide: QueryParamsToken, useValue: queryParams }
|
|
2373
|
+
],
|
|
2374
|
+
parent: this.#injector
|
|
2375
|
+
}).get(GraphQueryService);
|
|
2376
|
+
}
|
|
2377
|
+
loadDataInit$$ = this.local.loadDataInitGraph$$;
|
|
2378
|
+
initGraph() {
|
|
2379
|
+
return this.local.initGraph();
|
|
2380
|
+
}
|
|
2381
|
+
getGraph() {
|
|
2382
|
+
return this.local.getGraph();
|
|
2383
|
+
}
|
|
2384
|
+
async splitNode(options) {
|
|
2385
|
+
await this.local.splitNode(options);
|
|
2386
|
+
await this.#knowledge.splitNode(options);
|
|
2387
|
+
}
|
|
2388
|
+
async mergeNode(options) {
|
|
2389
|
+
await this.local.mergeNode(options);
|
|
2390
|
+
await this.#knowledge.mergeNode(options);
|
|
2391
|
+
}
|
|
2392
|
+
async changeNodeDescription(item) {
|
|
2393
|
+
await this.local.changeNodeDescription(item);
|
|
2394
|
+
await this.#knowledge.changeNodeDescription(item);
|
|
2395
|
+
}
|
|
2396
|
+
async changeEdge(item, oldItem) {
|
|
2397
|
+
await this.local.changeEdge(item, oldItem);
|
|
2398
|
+
await this.#knowledge.changeEdge(item);
|
|
2399
|
+
}
|
|
2400
|
+
async add(input) {
|
|
2401
|
+
await this.local.add(input);
|
|
2402
|
+
await this.#knowledge.addNodeItem(input);
|
|
2403
|
+
}
|
|
2404
|
+
async deleteNodeItem(item) {
|
|
2405
|
+
await this.local.deleteNodeItem(item);
|
|
2406
|
+
await this.#knowledge.deleteNodeItem(item);
|
|
2407
|
+
}
|
|
2408
|
+
async deleteEdge(item) {
|
|
2409
|
+
await this.local.deleteEdgeItem(item);
|
|
2410
|
+
await this.#knowledge.deleteEdge(item);
|
|
2411
|
+
}
|
|
2412
|
+
async deleteNodeByName(name) {
|
|
2413
|
+
await this.local.deleteNode(name);
|
|
2414
|
+
await this.#knowledge.deleteNodeByName(name);
|
|
2415
|
+
}
|
|
2416
|
+
};
|
|
2417
|
+
|
|
2418
|
+
// packages/knowledge/knowledge.manager.service.ts
|
|
2419
|
+
import * as fs from "fs/promises";
|
|
2420
|
+
import { fileTypeFromBuffer } from "file-type";
|
|
2421
|
+
import path3 from "path";
|
|
2422
|
+
|
|
2423
|
+
// packages/knowledge/article/article.knowledge.service.ts
|
|
2424
|
+
import { getHash as getHash2 } from "@shenghuabi/knowledge/util";
|
|
2425
|
+
import { createNormalizeVfs as createNormalizeVfs3, path as path2 } from "@cyia/vfs2";
|
|
2426
|
+
import { computed as computed4, inject as inject11 } from "static-injector";
|
|
2427
|
+
import { QdrantClientService as QdrantClientService10 } from "@shenghuabi/knowledge/qdrant";
|
|
2428
|
+
import { promise as fastq5 } from "fastq";
|
|
2429
|
+
|
|
2430
|
+
// packages/knowledge/article/define/payload.ts
|
|
2431
|
+
import * as v16 from "valibot";
|
|
2432
|
+
var ArticlePayload = v16.object({
|
|
2433
|
+
fileHash: v16.string(),
|
|
2434
|
+
fullName: v16.string(),
|
|
2435
|
+
name: v16.string(),
|
|
2436
|
+
dir: v16.string(),
|
|
2437
|
+
chunk: v16.string(),
|
|
2438
|
+
hash: v16.string(),
|
|
2439
|
+
loc: v16.custom(Boolean)
|
|
2440
|
+
});
|
|
2441
|
+
|
|
2442
|
+
// packages/knowledge/article/article.knowledge.service.ts
|
|
2443
|
+
import * as v17 from "valibot";
|
|
2444
|
+
var ArticleKnowledgeService = class extends NormalKnowledgeService {
|
|
2445
|
+
#text2vec = inject11(Text2VecToken);
|
|
2446
|
+
#reranker = inject11(ReRankerToken);
|
|
2447
|
+
#config = inject11(ConfigToken);
|
|
2448
|
+
#dir = inject11(DirToken);
|
|
2449
|
+
#qdClient = inject11(QdrantClientService10);
|
|
2450
|
+
#util = inject11(KnowledgeUtilService);
|
|
2451
|
+
#vfs = computed4(() => createNormalizeVfs3({ dir: this.#dir() }));
|
|
2452
|
+
KeyWordIndex = ["fullName", "dir", "fileHash"];
|
|
2453
|
+
getPayload(fileName, content) {
|
|
2454
|
+
return {
|
|
2455
|
+
fileHash: getHash2(content),
|
|
2456
|
+
fullName: fileName,
|
|
2457
|
+
name: path2.basename(fileName),
|
|
2458
|
+
dir: path2.dirname(fileName)
|
|
2459
|
+
};
|
|
2460
|
+
}
|
|
2461
|
+
async insertItem(fileName, content) {
|
|
2462
|
+
const { points } = await this.#qdClient.scroll(
|
|
2463
|
+
this.#config().activateName,
|
|
2464
|
+
{
|
|
2465
|
+
limit: 1,
|
|
2466
|
+
filter: {
|
|
2467
|
+
should: [
|
|
2468
|
+
{ key: "fileHash", match: { value: getHash2(content) } },
|
|
2469
|
+
{ key: "fullName", match: { value: fileName } }
|
|
2470
|
+
]
|
|
2471
|
+
}
|
|
2472
|
+
}
|
|
2473
|
+
);
|
|
2474
|
+
if (points.length) {
|
|
2475
|
+
return;
|
|
2476
|
+
}
|
|
2477
|
+
await this.insertItemOnly(fileName, content, this.#config().collectionList);
|
|
2478
|
+
}
|
|
2479
|
+
async deleteItem(fileName) {
|
|
2480
|
+
await this.#util.multiDelete(
|
|
2481
|
+
this.#config().collectionList.map((item) => item.collectionName),
|
|
2482
|
+
{
|
|
2483
|
+
filter: {
|
|
2484
|
+
must: [{ key: "fullName", match: { value: fileName } }],
|
|
2485
|
+
should: null
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
);
|
|
2489
|
+
}
|
|
2490
|
+
async addCollection(collection) {
|
|
2491
|
+
await this.createCollection(collection);
|
|
2492
|
+
try {
|
|
2493
|
+
const queue = fastq5(async (payload) => {
|
|
2494
|
+
payload = v17.parse(ArticlePayload, payload);
|
|
2495
|
+
const content = await this.#vfs().readContent(payload.fullName);
|
|
2496
|
+
if (!content) {
|
|
2497
|
+
return;
|
|
2498
|
+
}
|
|
2499
|
+
await this.insertItemOnly(payload.fullName, content, [collection]);
|
|
2500
|
+
}, 10);
|
|
2501
|
+
let offset;
|
|
2502
|
+
const wordSet = /* @__PURE__ */ new Set();
|
|
2503
|
+
let queueError;
|
|
2504
|
+
queue.error((error) => {
|
|
2505
|
+
if (error) {
|
|
2506
|
+
queueError = error;
|
|
2507
|
+
queue.killAndDrain();
|
|
2508
|
+
}
|
|
2509
|
+
});
|
|
2510
|
+
do {
|
|
2511
|
+
const { points, next_page_offset } = await this.#qdClient.scroll(
|
|
2512
|
+
this.#config().activateName,
|
|
2513
|
+
{
|
|
2514
|
+
limit: 5e3,
|
|
2515
|
+
with_payload: true,
|
|
2516
|
+
offset
|
|
2517
|
+
}
|
|
2518
|
+
);
|
|
2519
|
+
for (const point of points) {
|
|
2520
|
+
const id = `${point.payload["fileHash"]}`;
|
|
2521
|
+
if (wordSet.has(id)) {
|
|
2522
|
+
continue;
|
|
2523
|
+
}
|
|
2524
|
+
wordSet.add(id);
|
|
2525
|
+
queue.push(point.payload);
|
|
2526
|
+
}
|
|
2527
|
+
offset = next_page_offset;
|
|
2528
|
+
} while (offset);
|
|
2529
|
+
await queue.drained();
|
|
2530
|
+
if (queueError) {
|
|
2531
|
+
throw queueError;
|
|
2532
|
+
}
|
|
2533
|
+
} catch (error) {
|
|
2534
|
+
await this.#qdClient.deleteCollection(collection.collectionName);
|
|
2535
|
+
throw error;
|
|
2536
|
+
}
|
|
2537
|
+
await this.#qdClient.setActivateCollection(
|
|
2538
|
+
collection.collectionName,
|
|
2539
|
+
this.#config().activateName
|
|
2540
|
+
);
|
|
2541
|
+
}
|
|
2542
|
+
destroy() {
|
|
2543
|
+
return this.#util.destroyKnowledge(
|
|
2544
|
+
this.#config().collectionList.map(({ collectionName }) => collectionName)
|
|
2545
|
+
);
|
|
2546
|
+
}
|
|
2547
|
+
async searchGroupByChunk(text, options, filter) {
|
|
2548
|
+
const queryResult = await this.#qdClient.searchPointGroups(
|
|
2549
|
+
this.#config().activateName,
|
|
2550
|
+
{
|
|
2551
|
+
group_by: "hash",
|
|
2552
|
+
filter,
|
|
2553
|
+
with_payload: true,
|
|
2554
|
+
with_vector: false,
|
|
2555
|
+
score_threshold: options?.score,
|
|
2556
|
+
vector: {
|
|
2557
|
+
name: "chunk",
|
|
2558
|
+
vector: await this.#text2vec(text, this.#config().activateCollection)
|
|
2559
|
+
},
|
|
2560
|
+
group_size: options.group_size * this.#reranker.getQueryRatio(),
|
|
2561
|
+
limit: options.limit
|
|
2562
|
+
}
|
|
2563
|
+
);
|
|
2564
|
+
return Promise.all(
|
|
2565
|
+
queryResult.groups.map(async (item) => {
|
|
2566
|
+
const resultList = await this.#reranker.run({
|
|
2567
|
+
value: text,
|
|
2568
|
+
docs: item.hits.map(
|
|
2569
|
+
(item2) => item2.payload?.["embeddingChunk"]
|
|
2570
|
+
)
|
|
2571
|
+
});
|
|
2572
|
+
return {
|
|
2573
|
+
...item,
|
|
2574
|
+
hits: resultList.slice(0, options.group_size).map(({ index }) => item.hits[index])
|
|
2575
|
+
};
|
|
2576
|
+
})
|
|
2577
|
+
);
|
|
2578
|
+
}
|
|
2579
|
+
};
|
|
2580
|
+
|
|
2581
|
+
// packages/knowledge/article/define/config.ts
|
|
2582
|
+
import * as v18 from "valibot";
|
|
2583
|
+
var ArticleCollectionDefine = v18.object({
|
|
2584
|
+
collectionName: v18.string(),
|
|
2585
|
+
embeddingTemplate: v18.optional(
|
|
2586
|
+
v18.object({
|
|
2587
|
+
entry: v18.optional(EmbeddingTemplateDefine)
|
|
2588
|
+
})
|
|
2589
|
+
),
|
|
2590
|
+
size: v18.number()
|
|
2591
|
+
});
|
|
2592
|
+
var ArticleKnowledgeConfigDefine = v18.pipe(
|
|
2593
|
+
v18.object({
|
|
2594
|
+
...BaseKnowledgeConfig.entries,
|
|
2595
|
+
type: v18.optional(v18.literal("article"), "article"),
|
|
2596
|
+
collectionList: v18.array(ArticleCollectionDefine)
|
|
2597
|
+
}),
|
|
2598
|
+
v18.transform((item) => ({
|
|
2599
|
+
...item,
|
|
2600
|
+
/** 激活的普通知识库(文件切片) */
|
|
2601
|
+
activateName: getActivateCollectionName(item.name)
|
|
2602
|
+
}))
|
|
2603
|
+
);
|
|
2604
|
+
|
|
2605
|
+
// packages/knowledge/knowledge.manager.service.ts
|
|
2606
|
+
import { QdrantClientService as QdrantClientService11 } from "@shenghuabi/knowledge/qdrant";
|
|
2607
|
+
import { LogToken as LogToken6 } from "@shenghuabi/knowledge/util";
|
|
2608
|
+
var KnowledgeManagerService = class extends RootStaticInjectOptions2 {
|
|
2609
|
+
#injector = inject12(Injector4);
|
|
2610
|
+
#fileParser = inject12(FileParserService);
|
|
2611
|
+
#cacheMap = /* @__PURE__ */ new Map();
|
|
2612
|
+
#qdClient = inject12(QdrantClientService11);
|
|
2613
|
+
async getConfig(name) {
|
|
2614
|
+
throw new Error("未实现");
|
|
2615
|
+
}
|
|
2616
|
+
async getInjector(name, extraProviders) {
|
|
2617
|
+
let injector = this.#cacheMap.get(name);
|
|
2618
|
+
if (!injector) {
|
|
2619
|
+
const config = await this.getConfig(name);
|
|
2620
|
+
const CommonProviders = [
|
|
2621
|
+
{ provide: ConfigToken, useValue: config },
|
|
2622
|
+
...extraProviders ?? []
|
|
2623
|
+
];
|
|
2624
|
+
switch (config().type) {
|
|
2625
|
+
case "normal":
|
|
2626
|
+
injector = createInjector2({
|
|
2627
|
+
providers: [NormalKnowledgeService, ...CommonProviders],
|
|
2628
|
+
parent: this.#injector
|
|
2629
|
+
});
|
|
2630
|
+
break;
|
|
2631
|
+
case "article":
|
|
2632
|
+
injector = createInjector2({
|
|
2633
|
+
providers: [ArticleKnowledgeService, ...CommonProviders],
|
|
2634
|
+
parent: this.#injector
|
|
2635
|
+
});
|
|
2636
|
+
break;
|
|
2637
|
+
case "dict":
|
|
2638
|
+
injector = createInjector2({
|
|
2639
|
+
providers: [DictKnowledgeService, ...CommonProviders],
|
|
2640
|
+
parent: this.#injector
|
|
2641
|
+
});
|
|
2642
|
+
break;
|
|
2643
|
+
case "normal-graph":
|
|
2644
|
+
injector = createInjector2({
|
|
2645
|
+
providers: [
|
|
2646
|
+
GraphKnolwdgeService,
|
|
2647
|
+
GraphLocalService,
|
|
2648
|
+
GraphHandleService,
|
|
2649
|
+
GraphService,
|
|
2650
|
+
GraphKnowledgeUtilService,
|
|
2651
|
+
...CommonProviders
|
|
2652
|
+
],
|
|
2653
|
+
parent: this.#injector
|
|
2654
|
+
});
|
|
2655
|
+
break;
|
|
2656
|
+
default:
|
|
2657
|
+
throw "";
|
|
2658
|
+
}
|
|
2659
|
+
this.#cacheMap.set(name, injector);
|
|
2660
|
+
}
|
|
2661
|
+
return injector;
|
|
2662
|
+
}
|
|
2663
|
+
async #get(name) {
|
|
2664
|
+
const injector = await this.getInjector(name);
|
|
2665
|
+
const config = injector.get(ConfigToken);
|
|
2666
|
+
switch (config().type) {
|
|
2667
|
+
case "normal":
|
|
2668
|
+
return injector.get(NormalKnowledgeService);
|
|
2669
|
+
case "dict":
|
|
2670
|
+
return injector.get(DictKnowledgeService);
|
|
2671
|
+
case "normal-graph":
|
|
2672
|
+
return injector.get(GraphKnolwdgeService);
|
|
2673
|
+
case "article":
|
|
2674
|
+
return injector.get(ArticleKnowledgeService);
|
|
2675
|
+
default:
|
|
2676
|
+
throw new Error("");
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
async create(name, collection) {
|
|
2680
|
+
const instance = await this.#get(name);
|
|
2681
|
+
await instance.create(instance.formatCollection(collection));
|
|
2682
|
+
}
|
|
2683
|
+
/** 普通知识库和图谱知识库用 */
|
|
2684
|
+
async importFiles(name, filePathList, signal2) {
|
|
2685
|
+
const injector = await this.getInjector(name);
|
|
2686
|
+
const logService = injector.get(LogToken6);
|
|
2687
|
+
const instance = await this.#get(name);
|
|
2688
|
+
for (const filePath of filePathList) {
|
|
2689
|
+
if (signal2?.aborted) {
|
|
2690
|
+
return;
|
|
2691
|
+
}
|
|
2692
|
+
const content = await fs.readFile(filePath);
|
|
2693
|
+
const list = await this.#fileParser.parse(filePath, content);
|
|
2694
|
+
for (const item of list) {
|
|
2695
|
+
if (signal2?.aborted) {
|
|
2696
|
+
return;
|
|
2697
|
+
}
|
|
2698
|
+
logService.info(`正在导入 ${filePath}/${item.title}`);
|
|
2699
|
+
const content2 = item.content.trim();
|
|
2700
|
+
if (content2) {
|
|
2701
|
+
await instance.insertItem(item.title, content2, signal2);
|
|
2702
|
+
} else {
|
|
2703
|
+
logService.warn(`内容为空 ${filePath}/${item.title}`);
|
|
2704
|
+
}
|
|
2705
|
+
}
|
|
2706
|
+
}
|
|
2707
|
+
}
|
|
2708
|
+
/** 字典专用 */
|
|
2709
|
+
async importDict(name, input) {
|
|
2710
|
+
const instance = await this.#get(name);
|
|
2711
|
+
return await instance.importDict(input);
|
|
2712
|
+
}
|
|
2713
|
+
async get(name) {
|
|
2714
|
+
return await this.#get(name);
|
|
2715
|
+
}
|
|
2716
|
+
async importTextFile(name, dir, filePathList, signal2) {
|
|
2717
|
+
const injector = await this.getInjector(name);
|
|
2718
|
+
const logService = injector.get(LogToken6);
|
|
2719
|
+
const instance = await this.#get(name);
|
|
2720
|
+
for (const filePath of filePathList) {
|
|
2721
|
+
if (signal2?.aborted) {
|
|
2722
|
+
return;
|
|
2723
|
+
}
|
|
2724
|
+
logService.info(`正在导入 ${filePath}`);
|
|
2725
|
+
const buffer = await fs.readFile(path3.join(dir, filePath));
|
|
2726
|
+
const type = await fileTypeFromBuffer(buffer);
|
|
2727
|
+
if (type) {
|
|
2728
|
+
continue;
|
|
2729
|
+
}
|
|
2730
|
+
let content;
|
|
2731
|
+
try {
|
|
2732
|
+
content = buffer.toString();
|
|
2733
|
+
} catch (error) {
|
|
2734
|
+
continue;
|
|
2735
|
+
}
|
|
2736
|
+
await instance.insertItem(filePath, content);
|
|
2737
|
+
}
|
|
2738
|
+
}
|
|
2739
|
+
async deleteItem(name, fileName) {
|
|
2740
|
+
const instance = await this.#get(name);
|
|
2741
|
+
await instance.deleteItem(fileName);
|
|
2742
|
+
}
|
|
2743
|
+
async updateItem(name, fileName, content) {
|
|
2744
|
+
const instance = await this.#get(name);
|
|
2745
|
+
await instance.updateItem(fileName, content);
|
|
2746
|
+
}
|
|
2747
|
+
/** 通用接口 */
|
|
2748
|
+
async addCollection(name, collection) {
|
|
2749
|
+
const instance = await this.#get(name);
|
|
2750
|
+
await instance.addCollection(instance.formatCollection(collection));
|
|
2751
|
+
}
|
|
2752
|
+
/** 通用接口 */
|
|
2753
|
+
async deleteCollection(name, collectionName) {
|
|
2754
|
+
const instance = await this.#get(name);
|
|
2755
|
+
return await instance.deleteCollection(collectionName);
|
|
2756
|
+
}
|
|
2757
|
+
async changeActivateCollection(name, collectionName) {
|
|
2758
|
+
const instance = await this.#get(name);
|
|
2759
|
+
await instance.changeActivateCollection(collectionName);
|
|
2760
|
+
}
|
|
2761
|
+
async destroy(name) {
|
|
2762
|
+
const instance = await this.#get(name);
|
|
2763
|
+
this.#cacheMap.delete(name);
|
|
2764
|
+
await instance.destroy();
|
|
2765
|
+
}
|
|
2766
|
+
async export(name) {
|
|
2767
|
+
const instance = await this.#get(name);
|
|
2768
|
+
return await instance.export();
|
|
2769
|
+
}
|
|
2770
|
+
async import(name, options) {
|
|
2771
|
+
const logService = (await this.getInjector(name)).get(LogToken6);
|
|
2772
|
+
for (const item of options.snapshotList) {
|
|
2773
|
+
logService.info(`正在导入 ${item.collection}`);
|
|
2774
|
+
await this.#qdClient.recoverSnapshot(item.collection, {
|
|
2775
|
+
location: item.filePath,
|
|
2776
|
+
priority: "no_sync",
|
|
2777
|
+
checksum: item.checksum
|
|
2778
|
+
});
|
|
2779
|
+
}
|
|
2780
|
+
await this.#qdClient.setActivateCollection(
|
|
2781
|
+
options.activateCollection,
|
|
2782
|
+
getActivateCollectionName(name)
|
|
2783
|
+
);
|
|
2784
|
+
if (options.type === "normal-graph") {
|
|
2785
|
+
await this.#qdClient.setActivateCollection(
|
|
2786
|
+
getGraphCollectionName(options.activateCollection),
|
|
2787
|
+
getActivateCollectionName(getGraphCollectionName(name))
|
|
2788
|
+
);
|
|
2789
|
+
}
|
|
2790
|
+
}
|
|
2791
|
+
async getGraph(name) {
|
|
2792
|
+
const injector = await this.getInjector(name);
|
|
2793
|
+
return injector.get(GraphService);
|
|
2794
|
+
}
|
|
2795
|
+
};
|
|
2796
|
+
|
|
2797
|
+
// packages/knowledge/common/define/chunk.ts
|
|
2798
|
+
import * as v19 from "valibot";
|
|
2799
|
+
var FileChunkPayloadDefine = v19.object({
|
|
2800
|
+
chunk: v19.string(),
|
|
2801
|
+
fileName: v19.string(),
|
|
2802
|
+
loc: v19.custom(Boolean),
|
|
2803
|
+
hash: v19.string(),
|
|
2804
|
+
embeddingChunk: v19.string()
|
|
2805
|
+
});
|
|
2806
|
+
export {
|
|
2807
|
+
ArticleCollectionDefine,
|
|
2808
|
+
ArticleKnowledgeConfigDefine,
|
|
2809
|
+
ArticleKnowledgeService,
|
|
2810
|
+
ArticlePayload,
|
|
2811
|
+
BaseKnowledgeConfig,
|
|
2812
|
+
CHAT_INPUT,
|
|
2813
|
+
ConfigToken,
|
|
2814
|
+
ContentParserToken,
|
|
2815
|
+
DICT_PREFIX,
|
|
2816
|
+
DictCollectionDefine,
|
|
2817
|
+
DictKnowledgeConfigDefine,
|
|
2818
|
+
DictKnowledgeService,
|
|
2819
|
+
DirToken,
|
|
2820
|
+
ENTITY_DEFINE,
|
|
2821
|
+
ENTITY_RELATION_DEFINE,
|
|
2822
|
+
EdgeItemDefine,
|
|
2823
|
+
EdgeItemNewDefine,
|
|
2824
|
+
EdgePayloadDefine,
|
|
2825
|
+
EdgePayloadNewDefine,
|
|
2826
|
+
EmbeddingTemplateDefine,
|
|
2827
|
+
EntityExtraDefine,
|
|
2828
|
+
FileChunkPayloadDefine,
|
|
2829
|
+
GetConfigToken,
|
|
2830
|
+
GraphCollectionDefine,
|
|
2831
|
+
GraphHandleService,
|
|
2832
|
+
GraphKnolwdgeService,
|
|
2833
|
+
GraphKnowledgeConfigDefine,
|
|
2834
|
+
GraphLocalService,
|
|
2835
|
+
GraphQueryService,
|
|
2836
|
+
GraphService,
|
|
2837
|
+
KeywordPayloadNewDefine,
|
|
2838
|
+
KnowledgeGraphCreateDefine,
|
|
2839
|
+
KnowledgeManagerService,
|
|
2840
|
+
NodeItemDefine,
|
|
2841
|
+
NodeItemNewDefine,
|
|
2842
|
+
NodePayloadDefine,
|
|
2843
|
+
NodePayloadNewDefine,
|
|
2844
|
+
NormalCollectionDefine,
|
|
2845
|
+
NormalKnowledgeConfigDefine,
|
|
2846
|
+
NormalKnowledgeService,
|
|
2847
|
+
OCRToken,
|
|
2848
|
+
QueryParamsToken,
|
|
2849
|
+
RagChatToken,
|
|
2850
|
+
ReRankerToken,
|
|
2851
|
+
Text2VecToken,
|
|
2852
|
+
TextSplitterToken,
|
|
2853
|
+
getActivateCollectionName,
|
|
2854
|
+
getGraphCollectionName
|
|
2855
|
+
};
|
|
2856
|
+
//# sourceMappingURL=knowledge.mjs.map
|