@ontos-ai/knowhere-claw 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/connect-builder.d.ts +2 -0
- package/dist/connect-builder.js +9 -10
- package/dist/graph-builder.d.ts +4 -1
- package/dist/graph-builder.js +15 -10
- package/dist/index.js +1 -7
- package/dist/kg-service.js +8 -3
- package/dist/parser.d.ts +4 -8
- package/dist/parser.js +25 -243
- package/dist/store.d.ts +4 -14
- package/dist/store.js +21 -106
- package/dist/text.js +1 -13
- package/dist/tools.js +14 -847
- package/dist/types.d.ts +1 -58
- package/openclaw.plugin.json +71 -1
- package/package.json +1 -1
- package/skills/knowhere_memory/SKILL.md +80 -98
- package/skills/knowhere/SKILL.md +0 -285
- /package/dist/__tests__/{read-result-file-tool.test.d.ts → storage-layout.test.d.ts} +0 -0
package/dist/store.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { isNodeError } from "./types.js";
|
|
2
|
-
import {
|
|
2
|
+
import { extractKnowhereResultArchive, readStoredKnowhereResultSummary } from "./parser.js";
|
|
3
3
|
import { hashString, normalizeWhitespace, sanitizeStringArray, slugify } from "./text.js";
|
|
4
4
|
import { deriveMessageContextScopeKey, findConversationSegmentValue, parseConversationSessionKey } from "./session.js";
|
|
5
5
|
import fs from "node:fs/promises";
|
|
@@ -7,10 +7,8 @@ import path from "node:path";
|
|
|
7
7
|
import { randomUUID } from "node:crypto";
|
|
8
8
|
//#region src/store.ts
|
|
9
9
|
const INDEX_VERSION = 1;
|
|
10
|
-
const
|
|
11
|
-
const DOCUMENT_PAYLOAD_CACHE_LIMIT = 16;
|
|
10
|
+
const METADATA_DIRECTORY_NAME = "metadata";
|
|
12
11
|
const METADATA_FILE_NAME = "metadata.json";
|
|
13
|
-
const RESULT_DIRECTORY_NAME = "result";
|
|
14
12
|
const ROUTES_FILE_NAME = "routes.json";
|
|
15
13
|
async function pathExists(targetPath) {
|
|
16
14
|
try {
|
|
@@ -119,10 +117,9 @@ function createEmptyIndex(scope) {
|
|
|
119
117
|
function buildStoredDocumentPaths(scope, docId) {
|
|
120
118
|
const documentDir = path.join(scope.documentsDir, docId);
|
|
121
119
|
return {
|
|
122
|
-
browseIndexPath: path.join(documentDir, BROWSE_INDEX_FILE_NAME),
|
|
123
120
|
documentDir,
|
|
124
|
-
metadataPath: path.join(
|
|
125
|
-
|
|
121
|
+
metadataPath: path.join(scope.metadataDir, `${docId}.json`),
|
|
122
|
+
legacyMetadataPath: path.join(documentDir, METADATA_FILE_NAME)
|
|
126
123
|
};
|
|
127
124
|
}
|
|
128
125
|
async function readStoredDocumentMetadata(metadataPath) {
|
|
@@ -142,7 +139,6 @@ var KnowhereStore = class {
|
|
|
142
139
|
scopeMode;
|
|
143
140
|
logger;
|
|
144
141
|
indexCache;
|
|
145
|
-
documentPayloadCache;
|
|
146
142
|
scopeAccessChains;
|
|
147
143
|
scopeKeyAliases;
|
|
148
144
|
sessionScopeKeysBySessionId;
|
|
@@ -154,7 +150,6 @@ var KnowhereStore = class {
|
|
|
154
150
|
this.scopeMode = params.scopeMode;
|
|
155
151
|
this.logger = params.logger;
|
|
156
152
|
this.indexCache = /* @__PURE__ */ new Map();
|
|
157
|
-
this.documentPayloadCache = /* @__PURE__ */ new Map();
|
|
158
153
|
this.scopeAccessChains = /* @__PURE__ */ new Map();
|
|
159
154
|
this.scopeKeyAliases = /* @__PURE__ */ new Map();
|
|
160
155
|
this.sessionScopeKeysBySessionId = /* @__PURE__ */ new Map();
|
|
@@ -277,80 +272,32 @@ var KnowhereStore = class {
|
|
|
277
272
|
key: rawKey,
|
|
278
273
|
label: rawKey === "global" ? "global" : `${mode}:${rawKey}`,
|
|
279
274
|
rootDir: scopeRoot,
|
|
275
|
+
metadataDir: path.join(scopeRoot, METADATA_DIRECTORY_NAME),
|
|
280
276
|
documentsDir: path.join(scopeRoot, "documents"),
|
|
281
277
|
indexPath: path.join(scopeRoot, "index.json")
|
|
282
278
|
};
|
|
283
279
|
}
|
|
280
|
+
async readDocumentMetadata(scope, docId) {
|
|
281
|
+
const paths = buildStoredDocumentPaths(scope, docId);
|
|
282
|
+
const preferredMetadata = await readStoredDocumentMetadata(paths.metadataPath);
|
|
283
|
+
if (preferredMetadata) return preferredMetadata;
|
|
284
|
+
return readStoredDocumentMetadata(paths.legacyMetadataPath);
|
|
285
|
+
}
|
|
284
286
|
async listDocuments(scope) {
|
|
285
287
|
return this.runWithScopeAccessLock(scope, async () => {
|
|
286
288
|
return [...(await this.getIndex(scope, true)).documents].sort((left, right) => String(right.updatedAt || right.ingestedAt || "").localeCompare(String(left.updatedAt || left.ingestedAt || "")));
|
|
287
289
|
});
|
|
288
290
|
}
|
|
289
|
-
async loadDocumentPayload(scope, docId) {
|
|
290
|
-
return this.runWithScopeAccessLock(scope, async () => {
|
|
291
|
-
const cacheKey = this.buildDocumentPayloadCacheKey(scope, docId);
|
|
292
|
-
const cachedPayload = this.documentPayloadCache.get(cacheKey);
|
|
293
|
-
if (cachedPayload) {
|
|
294
|
-
this.touchDocumentPayloadCache(cacheKey, cachedPayload);
|
|
295
|
-
return cachedPayload;
|
|
296
|
-
}
|
|
297
|
-
const paths = buildStoredDocumentPaths(scope, docId);
|
|
298
|
-
const metadata = await readStoredDocumentMetadata(paths.metadataPath);
|
|
299
|
-
if (!metadata) return null;
|
|
300
|
-
const resultContent = await readStoredKnowhereResultContent(paths.resultDir);
|
|
301
|
-
const browseIndex = await this.loadOrBuildBrowseIndex(paths, resultContent.manifest, resultContent.chunks);
|
|
302
|
-
const payload = {
|
|
303
|
-
version: metadata.version,
|
|
304
|
-
document: metadata.document,
|
|
305
|
-
manifest: resultContent.manifest,
|
|
306
|
-
jobResult: metadata.jobResult,
|
|
307
|
-
fullMarkdown: resultContent.fullMarkdown,
|
|
308
|
-
hierarchy: resultContent.hierarchy,
|
|
309
|
-
browseIndex,
|
|
310
|
-
rawZipSha1: metadata.rawZipSha1,
|
|
311
|
-
chunks: resultContent.chunks
|
|
312
|
-
};
|
|
313
|
-
this.touchDocumentPayloadCache(cacheKey, payload);
|
|
314
|
-
return payload;
|
|
315
|
-
});
|
|
316
|
-
}
|
|
317
|
-
getResultFileAbsolutePath(scope, docId, relativePath) {
|
|
318
|
-
return resolveResultEntryPath(buildStoredDocumentPaths(scope, docId).resultDir, relativePath);
|
|
319
|
-
}
|
|
320
|
-
async readResultFile(scope, docId, relativePath) {
|
|
321
|
-
return this.runWithScopeAccessLock(scope, async () => {
|
|
322
|
-
const paths = buildStoredDocumentPaths(scope, docId);
|
|
323
|
-
const metadata = await readStoredDocumentMetadata(paths.metadataPath);
|
|
324
|
-
if (!metadata) return null;
|
|
325
|
-
const filePath = resolveResultEntryPath(paths.resultDir, relativePath);
|
|
326
|
-
try {
|
|
327
|
-
const text = await fs.readFile(filePath, "utf-8");
|
|
328
|
-
return {
|
|
329
|
-
document: metadata.document,
|
|
330
|
-
relativePath: path.posix.normalize(relativePath.replace(/\\/g, "/")),
|
|
331
|
-
text
|
|
332
|
-
};
|
|
333
|
-
} catch (error) {
|
|
334
|
-
if (isNodeError(error) && error.code === "ENOENT") return {
|
|
335
|
-
document: metadata.document,
|
|
336
|
-
relativePath: path.posix.normalize(relativePath.replace(/\\/g, "/")),
|
|
337
|
-
text: null
|
|
338
|
-
};
|
|
339
|
-
throw error;
|
|
340
|
-
}
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
291
|
async saveDownloadedDocument(scope, payload, options = {}) {
|
|
344
292
|
return this.runWithScopeAccessLock(scope, async () => {
|
|
345
293
|
const index = await this.getIndex(scope, true);
|
|
346
294
|
const existingIds = new Set(index.documents.map((document) => document.id));
|
|
347
295
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
348
296
|
const tempPaths = buildStoredDocumentPaths(scope, `.tmp-${randomUUID()}`);
|
|
297
|
+
let finalPaths = null;
|
|
349
298
|
try {
|
|
350
|
-
await extractKnowhereResultArchive(payload.downloadedResult, tempPaths.
|
|
351
|
-
const resultSummary = await readStoredKnowhereResultSummary(tempPaths.
|
|
352
|
-
const resultContent = await readStoredKnowhereResultContent(tempPaths.resultDir);
|
|
353
|
-
const browseIndex = await buildStoredBrowseIndex(tempPaths.resultDir, resultContent.manifest, resultContent.chunks);
|
|
299
|
+
await extractKnowhereResultArchive(payload.downloadedResult, tempPaths.documentDir);
|
|
300
|
+
const resultSummary = await readStoredKnowhereResultSummary(tempPaths.documentDir);
|
|
354
301
|
const documentIdSeed = [
|
|
355
302
|
payload.sourceType,
|
|
356
303
|
payload.source,
|
|
@@ -364,6 +311,7 @@ var KnowhereStore = class {
|
|
|
364
311
|
if (payload.docId && existingDocument && options.overwrite !== true) throw new Error(`Document ${documentIdCandidate} already exists in scope ${scope.label}.`);
|
|
365
312
|
const docId = existingDocument && options.overwrite === true ? existingDocument.id : buildUniqueDocumentId(documentIdCandidate, documentIdSeed, existingIds);
|
|
366
313
|
const paths = buildStoredDocumentPaths(scope, docId);
|
|
314
|
+
finalPaths = paths;
|
|
367
315
|
const originalFileName = deriveOriginalFileName(payload, resultSummary.manifest);
|
|
368
316
|
const documentRecord = {
|
|
369
317
|
id: docId,
|
|
@@ -392,16 +340,17 @@ var KnowhereStore = class {
|
|
|
392
340
|
rawZipSha1: payload.downloadedResult.rawZipSha1
|
|
393
341
|
};
|
|
394
342
|
await writeJsonAtomic(tempPaths.metadataPath, metadata);
|
|
395
|
-
await writeJsonAtomic(tempPaths.browseIndexPath, browseIndex);
|
|
396
343
|
await this.removeDocumentArtifacts(paths);
|
|
397
344
|
await ensureDir(scope.documentsDir);
|
|
345
|
+
await ensureDir(scope.metadataDir);
|
|
398
346
|
await fs.rename(tempPaths.documentDir, paths.documentDir);
|
|
347
|
+
await fs.rename(tempPaths.metadataPath, paths.metadataPath);
|
|
399
348
|
index.documents = index.documents.filter((document) => document.id !== docId).concat(documentRecord);
|
|
400
|
-
this.deleteDocumentPayloadCache(scope, docId);
|
|
401
349
|
await this.persistIndex(scope, index);
|
|
402
350
|
return documentRecord;
|
|
403
351
|
} catch (error) {
|
|
404
352
|
await this.removeDocumentArtifacts(tempPaths).catch(() => void 0);
|
|
353
|
+
if (finalPaths) await this.removeDocumentArtifacts(finalPaths).catch(() => void 0);
|
|
405
354
|
throw error;
|
|
406
355
|
}
|
|
407
356
|
});
|
|
@@ -413,7 +362,6 @@ var KnowhereStore = class {
|
|
|
413
362
|
if (!existingDocument) return null;
|
|
414
363
|
await this.removeDocumentArtifacts(buildStoredDocumentPaths(scope, docId));
|
|
415
364
|
index.documents = index.documents.filter((document) => document.id !== docId);
|
|
416
|
-
this.deleteDocumentPayloadCache(scope, docId);
|
|
417
365
|
await this.persistIndex(scope, index);
|
|
418
366
|
return existingDocument;
|
|
419
367
|
});
|
|
@@ -426,7 +374,6 @@ var KnowhereStore = class {
|
|
|
426
374
|
force: true
|
|
427
375
|
});
|
|
428
376
|
this.indexCache.delete(scope.rootDir);
|
|
429
|
-
this.deleteScopeDocumentPayloadCaches(scope);
|
|
430
377
|
this.logger.info(`knowhere: cleared scope ${scope.label} (${removedDocuments.length} document${removedDocuments.length === 1 ? "" : "s"})`);
|
|
431
378
|
return removedDocuments;
|
|
432
379
|
});
|
|
@@ -473,39 +420,8 @@ var KnowhereStore = class {
|
|
|
473
420
|
recursive: true,
|
|
474
421
|
force: true
|
|
475
422
|
});
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
return `${scope.rootDir}:${docId}`;
|
|
479
|
-
}
|
|
480
|
-
touchDocumentPayloadCache(cacheKey, payload) {
|
|
481
|
-
this.documentPayloadCache.delete(cacheKey);
|
|
482
|
-
this.documentPayloadCache.set(cacheKey, payload);
|
|
483
|
-
while (this.documentPayloadCache.size > DOCUMENT_PAYLOAD_CACHE_LIMIT) {
|
|
484
|
-
const oldestKey = this.documentPayloadCache.keys().next().value;
|
|
485
|
-
if (!oldestKey) break;
|
|
486
|
-
this.documentPayloadCache.delete(oldestKey);
|
|
487
|
-
}
|
|
488
|
-
}
|
|
489
|
-
deleteDocumentPayloadCache(scope, docId) {
|
|
490
|
-
this.documentPayloadCache.delete(this.buildDocumentPayloadCacheKey(scope, docId));
|
|
491
|
-
}
|
|
492
|
-
deleteScopeDocumentPayloadCaches(scope) {
|
|
493
|
-
const cacheKeyPrefix = `${scope.rootDir}:`;
|
|
494
|
-
for (const cacheKey of this.documentPayloadCache.keys()) if (cacheKey.startsWith(cacheKeyPrefix)) this.documentPayloadCache.delete(cacheKey);
|
|
495
|
-
}
|
|
496
|
-
async loadOrBuildBrowseIndex(paths, manifest, chunks) {
|
|
497
|
-
if (await pathExists(paths.browseIndexPath)) {
|
|
498
|
-
try {
|
|
499
|
-
const existingBrowseIndex = await readJson(paths.browseIndexPath, null);
|
|
500
|
-
if (isStoredBrowseIndex(existingBrowseIndex)) return existingBrowseIndex;
|
|
501
|
-
} catch (error) {
|
|
502
|
-
this.logger.warn(`knowhere: failed to read browse index ${paths.browseIndexPath}; rebuilding. ${error instanceof Error ? error.message : String(error)}`);
|
|
503
|
-
}
|
|
504
|
-
this.logger.info(`knowhere: rebuilding browse index for ${paths.documentDir} (expected version 2)`);
|
|
505
|
-
}
|
|
506
|
-
const browseIndex = await buildStoredBrowseIndex(paths.resultDir, manifest, chunks);
|
|
507
|
-
await writeJsonAtomic(paths.browseIndexPath, browseIndex);
|
|
508
|
-
return browseIndex;
|
|
423
|
+
if (await pathExists(paths.metadataPath)) await fs.rm(paths.metadataPath, { force: true });
|
|
424
|
+
if (await pathExists(paths.legacyMetadataPath)) await fs.rm(paths.legacyMetadataPath, { force: true });
|
|
509
425
|
}
|
|
510
426
|
buildRouteKey(channelId, conversationId) {
|
|
511
427
|
const normalizedChannel = normalizeWhitespace(channelId)?.toLowerCase();
|
|
@@ -557,8 +473,7 @@ var KnowhereStore = class {
|
|
|
557
473
|
const documentEntries = await fs.readdir(scope.documentsDir, { withFileTypes: true });
|
|
558
474
|
for (const documentEntry of documentEntries) {
|
|
559
475
|
if (!documentEntry.isDirectory()) continue;
|
|
560
|
-
const
|
|
561
|
-
const metadata = await readStoredDocumentMetadata(path.join(documentPath, METADATA_FILE_NAME));
|
|
476
|
+
const metadata = await this.readDocumentMetadata(scope, documentEntry.name);
|
|
562
477
|
if (!metadata?.document) continue;
|
|
563
478
|
rebuiltIndex.documents.push(metadata.document);
|
|
564
479
|
}
|
package/dist/text.js
CHANGED
|
@@ -18,17 +18,5 @@ function sanitizeStringArray(value) {
|
|
|
18
18
|
}
|
|
19
19
|
return [];
|
|
20
20
|
}
|
|
21
|
-
function stripHtmlTags(text) {
|
|
22
|
-
return text.replace(/<[^>]*>/g, "");
|
|
23
|
-
}
|
|
24
|
-
function stripLatex(text) {
|
|
25
|
-
return text.replace(/\$([^$]*)\$/g, "$1").replace(/\\text\{([^}]*)}/g, "$1").replace(/\\(?:text(?:bf|it|tt|sf|sc|rm)|math(?:rm|bf|it|sf|tt|cal|bb|frak))\{([^}]*)}/g, "$1").replace(/\\(?:emph|underline|overline)\{([^}]*)}/g, "$1").replace(/\\([%$&#_])/g, "$1").replace(/\\(?:right|Right)arrow/g, "→").replace(/\\(?:left|Left)arrow/g, "←").replace(/\\leftrightarrow/g, "↔").replace(/\\times/g, "×").replace(/\\cdot/g, "·").replace(/\\pm/g, "±").replace(/\\leq/g, "≤").replace(/\\geq/g, "≥").replace(/\\neq/g, "≠").replace(/\\approx/g, "≈").replace(/\\(sup|inf|max|min|log|ln|sin|cos|tan|exp|lim)\b/g, "$1").replace(/\\([{}])/g, "$1").replace(/\\\\/g, " ").replace(/\\[a-zA-Z]+/g, "");
|
|
26
|
-
}
|
|
27
|
-
function normalizeUnicode(text) {
|
|
28
|
-
return text.replace(/[\u2018\u2019\u201A]/g, "'").replace(/[\u201C\u201D\u201E]/g, "\"").replace(/[\u2013\u2014]/g, "-").replace(/[\u00A0\u2009\u200A\u200B\u2007\u202F]/g, " ").replace(/\u2026/g, "...").replace(/\u2022/g, "-");
|
|
29
|
-
}
|
|
30
|
-
function normalizeForGrep(text) {
|
|
31
|
-
return stripHtmlTags(stripLatex(normalizeUnicode(text))).replace(/\s+/g, " ").trim();
|
|
32
|
-
}
|
|
33
21
|
//#endregion
|
|
34
|
-
export { hashString,
|
|
22
|
+
export { hashString, normalizeWhitespace, sanitizeStringArray, slugify };
|