@ontos-ai/knowhere-claw 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +163 -0
  2. package/dist/_virtual/_rolldown/runtime.js +37 -0
  3. package/dist/client.d.ts +33 -0
  4. package/dist/client.js +395 -0
  5. package/dist/config.d.ts +6 -0
  6. package/dist/config.js +132 -0
  7. package/dist/error-message.d.ts +1 -0
  8. package/dist/error-message.js +48 -0
  9. package/dist/hooks.d.ts +8 -0
  10. package/dist/hooks.js +415 -0
  11. package/dist/index.d.ts +9 -0
  12. package/dist/index.js +43 -0
  13. package/dist/node_modules/.pnpm/@knowhere-ai_sdk@0.1.1/node_modules/@knowhere-ai/sdk/dist/index.js +717 -0
  14. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/adapters.js +83 -0
  15. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/fetch.js +170 -0
  16. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/xhr.js +106 -0
  17. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/axios.js +57 -0
  18. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/CancelToken.js +90 -0
  19. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/CanceledError.js +20 -0
  20. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/isCancel.js +6 -0
  21. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/Axios.js +174 -0
  22. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/AxiosError.js +70 -0
  23. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/AxiosHeaders.js +204 -0
  24. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/InterceptorManager.js +60 -0
  25. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/buildFullPath.js +20 -0
  26. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/dispatchRequest.js +52 -0
  27. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/mergeConfig.js +81 -0
  28. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/settle.js +18 -0
  29. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/transformData.js +25 -0
  30. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/defaults/index.js +107 -0
  31. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/defaults/transitional.js +9 -0
  32. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/env/data.js +4 -0
  33. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/AxiosURLSearchParams.js +50 -0
  34. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/HttpStatusCode.js +77 -0
  35. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/bind.js +15 -0
  36. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/buildURL.js +40 -0
  37. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/combineURLs.js +14 -0
  38. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/composeSignals.js +39 -0
  39. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/cookies.js +31 -0
  40. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/formDataToJSON.js +67 -0
  41. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isAbsoluteURL.js +14 -0
  42. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isAxiosError.js +14 -0
  43. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isURLSameOrigin.js +8 -0
  44. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/parseHeaders.js +53 -0
  45. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/parseProtocol.js +7 -0
  46. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/progressEventReducer.js +38 -0
  47. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/resolveConfig.js +36 -0
  48. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/speedometer.js +36 -0
  49. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/spread.js +29 -0
  50. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/throttle.js +38 -0
  51. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/toFormData.js +151 -0
  52. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/toURLEncodedForm.js +18 -0
  53. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/trackStream.js +69 -0
  54. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/validator.js +76 -0
  55. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/Blob.js +4 -0
  56. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/FormData.js +4 -0
  57. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/URLSearchParams.js +5 -0
  58. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/index.js +22 -0
  59. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/common/utils.js +46 -0
  60. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/index.js +9 -0
  61. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/utils.js +698 -0
  62. package/dist/node_modules/.pnpm/fflate@0.8.2/node_modules/fflate/esm/browser.js +426 -0
  63. package/dist/node_modules/.pnpm/jszip@3.10.1/node_modules/jszip/dist/jszip.min.js +3110 -0
  64. package/dist/parser.d.ts +16 -0
  65. package/dist/parser.js +323 -0
  66. package/dist/session.d.ts +11 -0
  67. package/dist/session.js +78 -0
  68. package/dist/store.d.ts +62 -0
  69. package/dist/store.js +482 -0
  70. package/dist/text.d.ts +10 -0
  71. package/dist/text.js +34 -0
  72. package/dist/tools.d.ts +9 -0
  73. package/dist/tools.js +1177 -0
  74. package/dist/tracker-progress.d.ts +8 -0
  75. package/dist/tracker-progress.js +197 -0
  76. package/dist/types.d.ts +247 -0
  77. package/dist/types.js +9 -0
  78. package/openclaw.plugin.json +107 -0
  79. package/package.json +61 -0
  80. package/skills/knowhere/SKILL.md +243 -0
package/dist/store.js ADDED
@@ -0,0 +1,482 @@
1
+ import { isNodeError } from "./types.js";
2
+ import { hashString, normalizeWhitespace, sanitizeStringArray, slugify } from "./text.js";
3
+ import { deriveMessageContextScopeKey } from "./session.js";
4
+ import { buildStoredBrowseIndex, extractKnowhereResultArchive, isStoredBrowseIndex, readStoredKnowhereResultContent, readStoredKnowhereResultSummary, resolveResultEntryPath } from "./parser.js";
5
+ import path from "node:path";
6
+ import fs from "node:fs/promises";
7
+ import { randomUUID } from "node:crypto";
8
+ //#region src/store.ts
9
+ const INDEX_VERSION = 1;
10
+ const BROWSE_INDEX_FILE_NAME = "browse-index.json";
11
+ const DOCUMENT_PAYLOAD_CACHE_LIMIT = 16;
12
+ const METADATA_FILE_NAME = "metadata.json";
13
+ const RESULT_DIRECTORY_NAME = "result";
14
+ async function pathExists(targetPath) {
15
+ try {
16
+ await fs.access(targetPath);
17
+ return true;
18
+ } catch {
19
+ return false;
20
+ }
21
+ }
22
+ async function ensureDir(targetPath) {
23
+ await fs.mkdir(targetPath, { recursive: true });
24
+ }
25
+ async function readJson(targetPath, fallback) {
26
+ try {
27
+ const raw = await fs.readFile(targetPath, "utf-8");
28
+ return JSON.parse(raw);
29
+ } catch (error) {
30
+ if (isNodeError(error) && error.code === "ENOENT") return fallback;
31
+ throw error;
32
+ }
33
+ }
34
+ async function writeJsonAtomic(targetPath, value) {
35
+ await ensureDir(path.dirname(targetPath));
36
+ const tempPath = `${targetPath}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`;
37
+ await fs.writeFile(tempPath, `${JSON.stringify(value, null, 2)}\n`, "utf-8");
38
+ await fs.rename(tempPath, targetPath);
39
+ }
40
+ function deriveDocumentTitle(payload, manifest) {
41
+ const candidates = [
42
+ payload.title,
43
+ manifest.source_file_name,
44
+ payload.jobResult.file_name,
45
+ payload.fileName,
46
+ payload.source
47
+ ];
48
+ for (const candidate of candidates) if (typeof candidate === "string" && normalizeWhitespace(candidate)) {
49
+ const value = normalizeWhitespace(candidate);
50
+ try {
51
+ if (/^https?:\/\//i.test(value)) {
52
+ const url = new URL(value);
53
+ const fileName = path.posix.basename(url.pathname);
54
+ if (fileName) return fileName;
55
+ }
56
+ } catch {
57
+ return value;
58
+ }
59
+ return path.basename(value);
60
+ }
61
+ return "Document";
62
+ }
63
+ function extractFileNameFromSource(source) {
64
+ const value = normalizeWhitespace(source);
65
+ if (!value) return null;
66
+ try {
67
+ if (/^https?:\/\//i.test(value)) {
68
+ const url = new URL(value);
69
+ const fileName = path.posix.basename(url.pathname);
70
+ if (fileName && fileName !== "/" && fileName !== ".") return fileName;
71
+ return null;
72
+ }
73
+ } catch {
74
+ return path.basename(value);
75
+ }
76
+ const fileName = path.basename(value);
77
+ return fileName && fileName !== "." ? fileName : null;
78
+ }
79
+ function deriveOriginalFileName(payload, manifest) {
80
+ const candidates = [
81
+ payload.fileName,
82
+ manifest.source_file_name,
83
+ payload.jobResult.file_name,
84
+ extractFileNameFromSource(payload.source)
85
+ ];
86
+ for (const candidate of candidates) if (typeof candidate === "string" && normalizeWhitespace(candidate)) return normalizeWhitespace(candidate);
87
+ return null;
88
+ }
89
+ function buildDocumentId(payload, manifest) {
90
+ if (payload.docId) return slugify(payload.docId, "document");
91
+ const seed = [
92
+ payload.sourceType,
93
+ payload.source,
94
+ payload.fileName,
95
+ payload.title,
96
+ manifest.data_id
97
+ ].filter(Boolean).join("|");
98
+ return `${slugify(deriveDocumentTitle(payload, manifest), "document")}-${hashString(seed || String(Date.now())).slice(0, 8)}`;
99
+ }
100
+ function buildUniqueDocumentId(docId, seed, existingIds) {
101
+ let nextDocId = docId;
102
+ let counter = 1;
103
+ while (existingIds.has(nextDocId)) {
104
+ nextDocId = `${docId}-${hashString(`${seed}-${counter}`).slice(0, 6)}`;
105
+ counter += 1;
106
+ }
107
+ return nextDocId;
108
+ }
109
+ function createEmptyIndex(scope) {
110
+ return {
111
+ version: INDEX_VERSION,
112
+ scopeMode: scope.mode,
113
+ scopeKey: scope.key,
114
+ updatedAt: null,
115
+ documents: []
116
+ };
117
+ }
118
+ function buildStoredDocumentPaths(scope, docId) {
119
+ const documentDir = path.join(scope.documentsDir, docId);
120
+ return {
121
+ browseIndexPath: path.join(documentDir, BROWSE_INDEX_FILE_NAME),
122
+ documentDir,
123
+ metadataPath: path.join(documentDir, METADATA_FILE_NAME),
124
+ resultDir: path.join(documentDir, RESULT_DIRECTORY_NAME)
125
+ };
126
+ }
127
+ async function readStoredDocumentMetadata(metadataPath) {
128
+ try {
129
+ const rawDocument = await fs.readFile(metadataPath, "utf-8");
130
+ return JSON.parse(rawDocument);
131
+ } catch (error) {
132
+ if (isNodeError(error) && error.code === "ENOENT") return null;
133
+ throw error;
134
+ }
135
+ }
136
+ function isCurrentIndex(index, scope) {
137
+ return index.version === INDEX_VERSION && index.scopeMode === scope.mode && index.scopeKey === scope.key;
138
+ }
139
+ var KnowhereStore = class {
140
+ rootDir;
141
+ scopeMode;
142
+ logger;
143
+ indexCache;
144
+ documentPayloadCache;
145
+ scopeAccessChains;
146
+ scopeKeyAliases;
147
+ sessionScopeKeysBySessionId;
148
+ sessionScopeKeysBySessionKey;
149
+ constructor(params) {
150
+ this.rootDir = params.rootDir;
151
+ this.scopeMode = params.scopeMode;
152
+ this.logger = params.logger;
153
+ this.indexCache = /* @__PURE__ */ new Map();
154
+ this.documentPayloadCache = /* @__PURE__ */ new Map();
155
+ this.scopeAccessChains = /* @__PURE__ */ new Map();
156
+ this.scopeKeyAliases = /* @__PURE__ */ new Map();
157
+ this.sessionScopeKeysBySessionId = /* @__PURE__ */ new Map();
158
+ this.sessionScopeKeysBySessionKey = /* @__PURE__ */ new Map();
159
+ }
160
+ registerScopeAlias(context) {
161
+ const canonicalScopeKey = this.resolveCanonicalScopeKey(context.scopeKey);
162
+ const sessionKey = normalizeWhitespace(context.sessionKey);
163
+ const sessionId = normalizeWhitespace(context.sessionId);
164
+ if (sessionKey) this.sessionScopeKeysBySessionKey.set(sessionKey, canonicalScopeKey);
165
+ if (sessionId) this.sessionScopeKeysBySessionId.set(sessionId, canonicalScopeKey);
166
+ return canonicalScopeKey;
167
+ }
168
+ registerMessageScope(context) {
169
+ const sessionKey = normalizeWhitespace(context.sessionKey);
170
+ const sessionId = normalizeWhitespace(context.sessionId);
171
+ if (sessionKey || sessionId) return this.registerScopeAlias({
172
+ scopeKey: sessionKey || sessionId || "global",
173
+ sessionKey,
174
+ sessionId
175
+ });
176
+ const messageScopeKey = deriveMessageContextScopeKey(context.messageContext);
177
+ if (!messageScopeKey) return;
178
+ return this.registerScopeAlias({
179
+ scopeKey: messageScopeKey,
180
+ sessionKey,
181
+ sessionId
182
+ });
183
+ }
184
+ inheritScopeAlias(context) {
185
+ const requesterScopeKey = this.resolveKnownScopeKey({
186
+ sessionId: context.requesterSessionId,
187
+ sessionKey: context.requesterSessionKey
188
+ });
189
+ if (!requesterScopeKey) return;
190
+ return this.registerScopeAlias({
191
+ scopeKey: requesterScopeKey,
192
+ sessionKey: context.childSessionKey,
193
+ sessionId: context.childSessionId
194
+ });
195
+ }
196
+ resolveScope(context = {}) {
197
+ const agentId = normalizeWhitespace(context.agentId);
198
+ const sessionKey = normalizeWhitespace(context.sessionKey);
199
+ const sessionId = normalizeWhitespace(context.sessionId);
200
+ let rawKey = "global";
201
+ const resolvedScopeKey = this.resolveKnownScopeKey(context);
202
+ if (this.scopeMode === "session") rawKey = resolvedScopeKey || sessionKey || sessionId || agentId || "global";
203
+ else if (this.scopeMode === "agent") rawKey = agentId || resolvedScopeKey || sessionKey || sessionId || "global";
204
+ const mode = rawKey === "global" ? "global" : this.scopeMode;
205
+ const hash = hashString(rawKey).slice(0, 10);
206
+ const dirName = rawKey === "global" ? "global" : `${mode}-${slugify(rawKey, "scope").slice(0, 40)}-${hash}`;
207
+ const scopeRoot = path.join(this.rootDir, dirName);
208
+ return {
209
+ mode,
210
+ key: rawKey,
211
+ label: rawKey === "global" ? "global" : `${mode}:${rawKey}`,
212
+ rootDir: scopeRoot,
213
+ documentsDir: path.join(scopeRoot, "documents"),
214
+ indexPath: path.join(scopeRoot, "index.json")
215
+ };
216
+ }
217
+ async listDocuments(scope) {
218
+ return this.runWithScopeAccessLock(scope, async () => {
219
+ return [...(await this.getIndex(scope, true)).documents].sort((left, right) => String(right.updatedAt || right.ingestedAt || "").localeCompare(String(left.updatedAt || left.ingestedAt || "")));
220
+ });
221
+ }
222
+ async loadDocumentPayload(scope, docId) {
223
+ return this.runWithScopeAccessLock(scope, async () => {
224
+ const cacheKey = this.buildDocumentPayloadCacheKey(scope, docId);
225
+ const cachedPayload = this.documentPayloadCache.get(cacheKey);
226
+ if (cachedPayload) {
227
+ this.touchDocumentPayloadCache(cacheKey, cachedPayload);
228
+ return cachedPayload;
229
+ }
230
+ const paths = buildStoredDocumentPaths(scope, docId);
231
+ const metadata = await readStoredDocumentMetadata(paths.metadataPath);
232
+ if (!metadata) return null;
233
+ const resultContent = await readStoredKnowhereResultContent(paths.resultDir);
234
+ const browseIndex = await this.loadOrBuildBrowseIndex(paths, resultContent.manifest, resultContent.chunks);
235
+ const payload = {
236
+ version: metadata.version,
237
+ document: metadata.document,
238
+ manifest: resultContent.manifest,
239
+ jobResult: metadata.jobResult,
240
+ fullMarkdown: resultContent.fullMarkdown,
241
+ hierarchy: resultContent.hierarchy,
242
+ browseIndex,
243
+ rawZipSha1: metadata.rawZipSha1,
244
+ chunks: resultContent.chunks
245
+ };
246
+ this.touchDocumentPayloadCache(cacheKey, payload);
247
+ return payload;
248
+ });
249
+ }
250
+ async readResultFile(scope, docId, relativePath) {
251
+ return this.runWithScopeAccessLock(scope, async () => {
252
+ const paths = buildStoredDocumentPaths(scope, docId);
253
+ const metadata = await readStoredDocumentMetadata(paths.metadataPath);
254
+ if (!metadata) return null;
255
+ const filePath = resolveResultEntryPath(paths.resultDir, relativePath);
256
+ try {
257
+ const text = await fs.readFile(filePath, "utf-8");
258
+ return {
259
+ document: metadata.document,
260
+ relativePath: path.posix.normalize(relativePath.replace(/\\/g, "/")),
261
+ text
262
+ };
263
+ } catch (error) {
264
+ if (isNodeError(error) && error.code === "ENOENT") return {
265
+ document: metadata.document,
266
+ relativePath: path.posix.normalize(relativePath.replace(/\\/g, "/")),
267
+ text: null
268
+ };
269
+ throw error;
270
+ }
271
+ });
272
+ }
273
+ async saveDownloadedDocument(scope, payload, options = {}) {
274
+ return this.runWithScopeAccessLock(scope, async () => {
275
+ const index = await this.getIndex(scope, true);
276
+ const existingIds = new Set(index.documents.map((document) => document.id));
277
+ const now = (/* @__PURE__ */ new Date()).toISOString();
278
+ const tempPaths = buildStoredDocumentPaths(scope, `.tmp-${randomUUID()}`);
279
+ try {
280
+ await extractKnowhereResultArchive(payload.downloadedResult, tempPaths.resultDir);
281
+ const resultSummary = await readStoredKnowhereResultSummary(tempPaths.resultDir);
282
+ const resultContent = await readStoredKnowhereResultContent(tempPaths.resultDir);
283
+ const browseIndex = await buildStoredBrowseIndex(tempPaths.resultDir, resultContent.manifest, resultContent.chunks);
284
+ const documentIdSeed = [
285
+ payload.sourceType,
286
+ payload.source,
287
+ payload.fileName,
288
+ payload.title,
289
+ resultSummary.manifest.data_id,
290
+ String(now)
291
+ ].filter(Boolean).join("|");
292
+ const documentIdCandidate = buildDocumentId(payload, resultSummary.manifest);
293
+ const existingDocument = index.documents.find((document) => document.id === documentIdCandidate);
294
+ if (payload.docId && existingDocument && options.overwrite !== true) throw new Error(`Document ${documentIdCandidate} already exists in scope ${scope.label}.`);
295
+ const docId = existingDocument && options.overwrite === true ? existingDocument.id : buildUniqueDocumentId(documentIdCandidate, documentIdSeed, existingIds);
296
+ const paths = buildStoredDocumentPaths(scope, docId);
297
+ const originalFileName = deriveOriginalFileName(payload, resultSummary.manifest);
298
+ const documentRecord = {
299
+ id: docId,
300
+ title: deriveDocumentTitle(payload, resultSummary.manifest),
301
+ sourceType: payload.sourceType,
302
+ source: payload.source,
303
+ sourceLabel: payload.sourceLabel || (payload.sourceType === "url" ? payload.source : originalFileName || payload.source),
304
+ fileName: payload.fileName,
305
+ originalFileName,
306
+ dataId: payload.dataId || resultSummary.manifest.data_id || null,
307
+ jobId: payload.jobResult.job_id || payload.job.job_id,
308
+ checksum: resultSummary.manifest.checksum?.value || null,
309
+ resultUrl: payload.jobResult.result_url || null,
310
+ resultUrlExpiresAt: payload.jobResult.result_url_expires_at || null,
311
+ scopeLabel: scope.label,
312
+ tags: sanitizeStringArray(payload.tags),
313
+ ingestedAt: existingDocument?.ingestedAt || now,
314
+ updatedAt: now,
315
+ chunkCount: resultSummary.chunkCount,
316
+ statistics: resultSummary.statistics
317
+ };
318
+ const metadata = {
319
+ version: INDEX_VERSION,
320
+ document: documentRecord,
321
+ jobResult: payload.jobResult,
322
+ rawZipSha1: payload.downloadedResult.rawZipSha1
323
+ };
324
+ await writeJsonAtomic(tempPaths.metadataPath, metadata);
325
+ await writeJsonAtomic(tempPaths.browseIndexPath, browseIndex);
326
+ await this.removeDocumentArtifacts(paths);
327
+ await ensureDir(scope.documentsDir);
328
+ await fs.rename(tempPaths.documentDir, paths.documentDir);
329
+ index.documents = index.documents.filter((document) => document.id !== docId).concat(documentRecord);
330
+ this.deleteDocumentPayloadCache(scope, docId);
331
+ await this.persistIndex(scope, index);
332
+ return documentRecord;
333
+ } catch (error) {
334
+ await this.removeDocumentArtifacts(tempPaths).catch(() => void 0);
335
+ throw error;
336
+ }
337
+ });
338
+ }
339
+ async removeDocument(scope, docId) {
340
+ return this.runWithScopeAccessLock(scope, async () => {
341
+ const index = await this.getIndex(scope, true);
342
+ const existingDocument = index.documents.find((document) => document.id === docId);
343
+ if (!existingDocument) return null;
344
+ await this.removeDocumentArtifacts(buildStoredDocumentPaths(scope, docId));
345
+ index.documents = index.documents.filter((document) => document.id !== docId);
346
+ this.deleteDocumentPayloadCache(scope, docId);
347
+ await this.persistIndex(scope, index);
348
+ return existingDocument;
349
+ });
350
+ }
351
+ async clearScope(scope) {
352
+ return this.runWithScopeAccessLock(scope, async () => {
353
+ const removedDocuments = [...(await this.getIndex(scope, true)).documents];
354
+ if (await pathExists(scope.rootDir)) await fs.rm(scope.rootDir, {
355
+ recursive: true,
356
+ force: true
357
+ });
358
+ this.indexCache.delete(scope.rootDir);
359
+ this.deleteScopeDocumentPayloadCaches(scope);
360
+ this.logger.info(`knowhere: cleared scope ${scope.label} (${removedDocuments.length} document${removedDocuments.length === 1 ? "" : "s"})`);
361
+ return removedDocuments;
362
+ });
363
+ }
364
+ async getIndex(scope, isScopeLocked = false) {
365
+ const cacheKey = scope.rootDir;
366
+ const cached = this.indexCache.get(cacheKey);
367
+ if (cached) return cached;
368
+ const index = await readJson(scope.indexPath, createEmptyIndex(scope));
369
+ if (!isCurrentIndex(index, scope)) {
370
+ if (!isScopeLocked) return this.runWithScopeAccessLock(scope, async () => this.getIndex(scope, true));
371
+ const rebuiltIndex = await this.rebuildIndex(scope);
372
+ this.indexCache.set(cacheKey, rebuiltIndex);
373
+ return rebuiltIndex;
374
+ }
375
+ this.indexCache.set(cacheKey, index);
376
+ return index;
377
+ }
378
+ async persistIndex(scope, index) {
379
+ index.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
380
+ await writeJsonAtomic(scope.indexPath, index);
381
+ this.indexCache.set(scope.rootDir, index);
382
+ this.logger.debug?.(`knowhere: persisted index for scope ${scope.label}`);
383
+ }
384
+ async runWithScopeAccessLock(scope, operation) {
385
+ const accessKey = scope.rootDir;
386
+ const previous = this.scopeAccessChains.get(accessKey) ?? Promise.resolve();
387
+ let releaseCurrent;
388
+ const current = new Promise((resolve) => {
389
+ releaseCurrent = resolve;
390
+ });
391
+ const queued = previous.catch(() => void 0).then(() => current);
392
+ this.scopeAccessChains.set(accessKey, queued);
393
+ await previous.catch(() => void 0);
394
+ try {
395
+ return await operation();
396
+ } finally {
397
+ releaseCurrent?.();
398
+ if (this.scopeAccessChains.get(accessKey) === queued) this.scopeAccessChains.delete(accessKey);
399
+ }
400
+ }
401
+ async removeDocumentArtifacts(paths) {
402
+ if (await pathExists(paths.documentDir)) await fs.rm(paths.documentDir, {
403
+ recursive: true,
404
+ force: true
405
+ });
406
+ }
407
+ buildDocumentPayloadCacheKey(scope, docId) {
408
+ return `${scope.rootDir}:${docId}`;
409
+ }
410
+ touchDocumentPayloadCache(cacheKey, payload) {
411
+ this.documentPayloadCache.delete(cacheKey);
412
+ this.documentPayloadCache.set(cacheKey, payload);
413
+ while (this.documentPayloadCache.size > DOCUMENT_PAYLOAD_CACHE_LIMIT) {
414
+ const oldestKey = this.documentPayloadCache.keys().next().value;
415
+ if (!oldestKey) break;
416
+ this.documentPayloadCache.delete(oldestKey);
417
+ }
418
+ }
419
+ deleteDocumentPayloadCache(scope, docId) {
420
+ this.documentPayloadCache.delete(this.buildDocumentPayloadCacheKey(scope, docId));
421
+ }
422
+ deleteScopeDocumentPayloadCaches(scope) {
423
+ const cacheKeyPrefix = `${scope.rootDir}:`;
424
+ for (const cacheKey of this.documentPayloadCache.keys()) if (cacheKey.startsWith(cacheKeyPrefix)) this.documentPayloadCache.delete(cacheKey);
425
+ }
426
+ async loadOrBuildBrowseIndex(paths, manifest, chunks) {
427
+ if (await pathExists(paths.browseIndexPath)) {
428
+ try {
429
+ const existingBrowseIndex = await readJson(paths.browseIndexPath, null);
430
+ if (isStoredBrowseIndex(existingBrowseIndex)) return existingBrowseIndex;
431
+ } catch (error) {
432
+ this.logger.warn(`knowhere: failed to read browse index ${paths.browseIndexPath}; rebuilding. ${error instanceof Error ? error.message : String(error)}`);
433
+ }
434
+ this.logger.info(`knowhere: rebuilding browse index for ${paths.documentDir} (expected version 2)`);
435
+ }
436
+ const browseIndex = await buildStoredBrowseIndex(paths.resultDir, manifest, chunks);
437
+ await writeJsonAtomic(paths.browseIndexPath, browseIndex);
438
+ return browseIndex;
439
+ }
440
+ resolveCanonicalScopeKey(scopeKey) {
441
+ let currentKey = normalizeWhitespace(scopeKey) || scopeKey;
442
+ const seen = /* @__PURE__ */ new Set();
443
+ while (!seen.has(currentKey)) {
444
+ seen.add(currentKey);
445
+ const alias = this.scopeKeyAliases.get(currentKey);
446
+ if (!alias) break;
447
+ currentKey = alias;
448
+ }
449
+ return currentKey;
450
+ }
451
+ resolveKnownScopeKey(context) {
452
+ const sessionId = normalizeWhitespace(context.sessionId);
453
+ if (sessionId) {
454
+ const scopeKey = this.sessionScopeKeysBySessionId.get(sessionId);
455
+ if (scopeKey) return this.resolveCanonicalScopeKey(scopeKey);
456
+ }
457
+ const sessionKey = normalizeWhitespace(context.sessionKey);
458
+ if (sessionKey) {
459
+ const scopeKey = this.sessionScopeKeysBySessionKey.get(sessionKey);
460
+ if (scopeKey) return this.resolveCanonicalScopeKey(scopeKey);
461
+ }
462
+ }
463
+ async rebuildIndex(scope) {
464
+ const rebuiltIndex = createEmptyIndex(scope);
465
+ if (!await pathExists(scope.documentsDir)) {
466
+ await this.persistIndex(scope, rebuiltIndex);
467
+ return rebuiltIndex;
468
+ }
469
+ const documentEntries = await fs.readdir(scope.documentsDir, { withFileTypes: true });
470
+ for (const documentEntry of documentEntries) {
471
+ if (!documentEntry.isDirectory()) continue;
472
+ const documentPath = path.join(scope.documentsDir, documentEntry.name);
473
+ const metadata = await readStoredDocumentMetadata(path.join(documentPath, METADATA_FILE_NAME));
474
+ if (!metadata?.document) continue;
475
+ rebuiltIndex.documents.push(metadata.document);
476
+ }
477
+ await this.persistIndex(scope, rebuiltIndex);
478
+ return rebuiltIndex;
479
+ }
480
+ };
481
+ //#endregion
482
+ export { KnowhereStore };
package/dist/text.d.ts ADDED
@@ -0,0 +1,10 @@
1
+ export declare function normalizeWhitespace(value: unknown): string;
2
+ export declare function truncateText(value: unknown, maxChars: number): string;
3
+ export declare function slugify(value: unknown, fallback?: string): string;
4
+ export declare function hashString(value: string): string;
5
+ export declare function sanitizeStringArray(value: unknown): string[];
6
+ export declare function toComparableText(value: unknown): string;
7
+ export declare function stripHtmlTags(text: string): string;
8
+ export declare function stripLatex(text: string): string;
9
+ export declare function normalizeUnicode(text: string): string;
10
+ export declare function normalizeForGrep(text: string): string;
package/dist/text.js ADDED
@@ -0,0 +1,34 @@
1
+ import { createHash } from "node:crypto";
2
+ //#region src/text.ts
3
+ function normalizeWhitespace(value) {
4
+ if (typeof value !== "string") return "";
5
+ return value.replace(/\s+/g, " ").trim();
6
+ }
7
+ function slugify(value, fallback = "item") {
8
+ return normalizeWhitespace(value).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 64) || fallback;
9
+ }
10
+ function hashString(value) {
11
+ return createHash("sha1").update(value).digest("hex");
12
+ }
13
+ function sanitizeStringArray(value) {
14
+ if (Array.isArray(value)) return value.filter((entry) => typeof entry === "string").map((entry) => normalizeWhitespace(entry)).filter(Boolean);
15
+ if (typeof value === "string") {
16
+ const normalized = normalizeWhitespace(value);
17
+ return normalized ? [normalized] : [];
18
+ }
19
+ return [];
20
+ }
21
+ function stripHtmlTags(text) {
22
+ return text.replace(/<[^>]*>/g, "");
23
+ }
24
+ function stripLatex(text) {
25
+ return text.replace(/\$([^$]*)\$/g, "$1").replace(/\\text\{([^}]*)}/g, "$1").replace(/\\(?:text(?:bf|it|tt|sf|sc|rm)|math(?:rm|bf|it|sf|tt|cal|bb|frak))\{([^}]*)}/g, "$1").replace(/\\(?:emph|underline|overline)\{([^}]*)}/g, "$1").replace(/\\([%$&#_])/g, "$1").replace(/\\(?:right|Right)arrow/g, "→").replace(/\\(?:left|Left)arrow/g, "←").replace(/\\leftrightarrow/g, "↔").replace(/\\times/g, "×").replace(/\\cdot/g, "·").replace(/\\pm/g, "±").replace(/\\leq/g, "≤").replace(/\\geq/g, "≥").replace(/\\neq/g, "≠").replace(/\\approx/g, "≈").replace(/\\(sup|inf|max|min|log|ln|sin|cos|tan|exp|lim)\b/g, "$1").replace(/\\([{}])/g, "$1").replace(/\\\\/g, " ").replace(/\\[a-zA-Z]+/g, "");
26
+ }
27
+ function normalizeUnicode(text) {
28
+ return text.replace(/[\u2018\u2019\u201A]/g, "'").replace(/[\u201C\u201D\u201E]/g, "\"").replace(/[\u2013\u2014]/g, "-").replace(/[\u00A0\u2009\u200A\u200B\u2007\u202F]/g, " ").replace(/\u2026/g, "...").replace(/\u2022/g, "-");
29
+ }
30
+ function normalizeForGrep(text) {
31
+ return stripHtmlTags(stripLatex(normalizeUnicode(text))).replace(/\s+/g, " ").trim();
32
+ }
33
+ //#endregion
34
+ export { hashString, normalizeForGrep, normalizeWhitespace, sanitizeStringArray, slugify };
@@ -0,0 +1,9 @@
1
+ import type { AnyAgentTool, OpenClawPluginApi } from "openclaw/plugin-sdk/core";
2
+ import { KnowhereStore } from "./store";
3
+ import type { KnowhereAutoGroundingController, ResolvedKnowhereConfig, ToolRuntimeContext } from "./types";
4
+ export declare function createKnowhereToolFactory(params: {
5
+ api: OpenClawPluginApi;
6
+ config: ResolvedKnowhereConfig;
7
+ store: KnowhereStore;
8
+ autoGroundingController?: KnowhereAutoGroundingController;
9
+ }): (ctx: ToolRuntimeContext) => AnyAgentTool[];