@ontos-ai/knowhere-claw 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -122,6 +122,12 @@ Within each scope, the plugin keeps:
122
122
  4. When needed, the agent can preview structure, search chunks, read raw result
123
123
  files, or clear stored documents.
124
124
 
125
+ ## Schema v2.1 Media Handling
126
+
127
+ - `knowhere_read_chunks` now treats `[images/...]` and `[tables/...]` path references in `chunks.json` content as the primary media enrichment path.
128
+ - Standalone `image` and `table` chunks resolve their real asset locations from `metadata.file_path`.
129
+ - Assets without `metadata.file_path` are ignored by the runtime enrichment and delivery pipeline.
130
+
125
131
  ## Troubleshooting
126
132
 
127
133
  - Missing API key: `apiKey` config is optional. You can set
@@ -0,0 +1 @@
1
+ export {};
@@ -10,10 +10,34 @@ const KNOWHERE_PROMPT_CONTEXT = [
10
10
  "- If the file is in the cloud (e.g. Feishu Drive), first obtain the download URL via the appropriate channel tool, then use the `url` parameter.",
11
11
  "- Refer to your **knowhere_memory** skill for the complete step-by-step workflow.",
12
12
  "",
13
+ "### ⚠️ Feishu / Lark Cloud Files",
14
+ "**Never** pass a raw `open.feishu.cn` or `feishu.cn/drive/file/...` URL directly to `knowhere_ingest_document`.",
15
+ "These URLs require authentication and will redirect to a login page, causing Knowhere to parse HTML instead of the actual document.",
16
+ "Instead:",
17
+ "1. Use `feishu_drive` (action: `download`) or equivalents to obtain an **authenticated temporary download URL**.",
18
+ "2. Then pass that authenticated URL to `knowhere_ingest_document(url: ...)`.",
19
+ "",
20
+ "### Empty File Rejection",
21
+ "If a parsed result contains 0 usable chunks, it will be **automatically rejected** and not stored.",
22
+ "This typically means the source file was corrupt, empty, or required authentication that was not provided.",
23
+ "",
13
24
  "### Knowledge Retrieval",
14
- "When answering questions about documents or the knowledge base:",
15
- "- ✅ Use `knowhere_get_map`, `knowhere_get_structure`, `knowhere_read_chunks`, `knowhere_kg_query`",
16
- "- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`"
25
+ "Use this **single retrieval path** do not skip steps:",
26
+ "1. `knowhere_get_map` get the KG overview: which files exist, their keywords, importance, and cross-file edges.",
27
+ "2. `knowhere_get_structure` inspect the chapter/section hierarchy of a specific document.",
28
+ "3. `knowhere_read_chunks` — fetch content for a specific section (use `sectionPath` to narrow scope).",
29
+ "If you're unsure which file contains the answer, also call `knowhere_discover_files` for keyword-based file discovery.",
30
+ "- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`",
31
+ "- ❌ Do NOT skip `knowhere_get_map` and jump directly to `knowhere_read_chunks`",
32
+ "",
33
+ "### 📷 Image Delivery",
34
+ "**`knowhere_read_chunks` has built-in automatic image delivery.** When it returns chunks containing images,",
35
+ "those images are automatically sent to the user's channel (Telegram/Feishu/etc). You do NOT need to send them again.",
36
+ "- The tool result will contain `resolved_assets` with `mode: 'image_sent'` for successfully delivered images.",
37
+ "- If the user asks to **see** or **view** an image from the knowledge base, call `knowhere_read_chunks` with the relevant section — images will be auto-delivered.",
38
+ "- `knowhere_view_image` is for **AI visual analysis only** (it loads image pixels into your context for you to describe/analyze). It does NOT send the image to the user.",
39
+ "- If the user asks you to re-send a specific image, use the `message` tool with the staged file path from `~/.openclaw/knowhere-assets/`.",
40
+ "- **Never tell the user you cannot send images.** You CAN — via `knowhere_read_chunks` auto-delivery or `message` tool."
17
41
  ].join("\n");
18
42
  const KNOWHERE_DIR_PATTERN = ".knowhere";
19
43
  const BLOCK_REASON = "Do not use exec to read .knowhere/ directly. Use knowhere retrieval tools instead: knowhere_get_map, knowhere_get_structure, knowhere_read_chunks, knowhere_kg_query.";
package/dist/index.js CHANGED
@@ -45,12 +45,12 @@ const plugin = {
45
45
  "knowhere_get_job_status",
46
46
  "knowhere_import_completed_job",
47
47
  "knowhere_set_api_key",
48
- "knowhere_kg_list",
49
- "knowhere_kg_query",
50
48
  "knowhere_get_map",
51
49
  "knowhere_get_structure",
52
50
  "knowhere_read_chunks",
53
- "knowhere_discover_files"
51
+ "knowhere_view_image",
52
+ "knowhere_discover_files",
53
+ "knowhere_delete_document"
54
54
  ] });
55
55
  }
56
56
  };
@@ -23,6 +23,7 @@ export declare class KnowledgeGraphService {
23
23
  keywords: string[];
24
24
  metadata: Record<string, unknown>;
25
25
  }): Promise<void>;
26
+ removeDocumentFromKb(kbId: string, docId: string): Promise<void>;
26
27
  scheduleBuild(kbId: string, task: () => Promise<void>): Promise<void>;
27
28
  buildKnowledgeGraph(kbId: string): Promise<void>;
28
29
  private updateKbMetadata;
@@ -1,12 +1,20 @@
1
1
  import { resolveStoredKnowhereResultRoot } from "./parser.js";
2
2
  import { buildConnections, init_connect_builder } from "./connect-builder.js";
3
3
  import { buildKnowledgeGraph } from "./graph-builder.js";
4
+ import fs from "node:fs/promises";
4
5
  import path from "node:path";
5
6
  import os from "node:os";
6
7
  import { spawn } from "node:child_process";
7
- import fs from "fs-extra";
8
+ import fs$1 from "fs-extra";
8
9
  //#region src/kg-service.ts
9
10
  init_connect_builder();
11
+ /**
12
+ * Directories that belong to the Store layer and must be excluded when the KG
13
+ * scans a kb directory for document entries. This matters when the kb
14
+ * directory coincides with the Store root (e.g. both resolve to
15
+ * `~/.knowhere/global/`).
16
+ */
17
+ const STORE_INFRA_DIRS = new Set(["documents", "metadata"]);
10
18
  const DEFAULT_CONNECT_CONFIG = {
11
19
  minKeywordOverlap: 3,
12
20
  keywordScoreWeight: 1,
@@ -101,19 +109,43 @@ var KnowledgeGraphService = class {
101
109
  }
102
110
  async ensureKbDirectory(kbId) {
103
111
  const kbPath = this.getKbPath(kbId);
104
- await fs.ensureDir(kbPath);
112
+ await fs$1.ensureDir(kbPath);
105
113
  return kbPath;
106
114
  }
107
115
  async saveDocumentToKb(params) {
108
116
  const kbPath = await this.ensureKbDirectory(params.kbId);
109
- const docDir = path.join(kbPath, params.docId);
110
- await fs.ensureDir(docDir);
117
+ const linkPath = path.join(kbPath, params.docId);
111
118
  const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
112
- await fs.copy(sourceResultRoot, docDir, { overwrite: true });
113
- const metadataPath = path.join(docDir, "metadata.json");
114
- await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
119
+ try {
120
+ const existingStat = await fs.lstat(linkPath).catch(() => null);
121
+ if (existingStat) {
122
+ if (existingStat.isSymbolicLink()) if (await fs.readlink(linkPath) === sourceResultRoot) this.logger.debug?.(`knowhere: saveDocumentToKb symlink already correct doc=${params.docId}`);
123
+ else {
124
+ await fs.unlink(linkPath);
125
+ await fs.symlink(sourceResultRoot, linkPath);
126
+ }
127
+ else if (existingStat.isDirectory()) {
128
+ await fs$1.remove(linkPath);
129
+ await fs.symlink(sourceResultRoot, linkPath);
130
+ this.logger.info(`knowhere: saveDocumentToKb replaced legacy copy with symlink doc=${params.docId}`);
131
+ }
132
+ } else await fs.symlink(sourceResultRoot, linkPath);
133
+ } catch (symlinkError) {
134
+ this.logger.warn(`knowhere: symlink failed for doc=${params.docId}, falling back to copy: ${formatUnknownError(symlinkError)}`);
135
+ await fs$1.ensureDir(linkPath);
136
+ await fs$1.copy(sourceResultRoot, linkPath, { overwrite: true });
137
+ }
115
138
  this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
116
139
  }
140
+ async removeDocumentFromKb(kbId, docId) {
141
+ const kbPath = this.getKbPath(kbId);
142
+ const docPath = path.join(kbPath, docId);
143
+ const stat = await fs.lstat(docPath).catch(() => null);
144
+ if (!stat) return;
145
+ if (stat.isSymbolicLink()) await fs.unlink(docPath);
146
+ else if (stat.isDirectory()) await fs$1.remove(docPath);
147
+ this.logger.info(`Document removed from knowledge base: kb=${kbId} doc=${docId}`);
148
+ }
117
149
  async scheduleBuild(kbId, task) {
118
150
  if ((this.config.concurrentBuildStrategy || "queue") === "skip") {
119
151
  if (this.buildQueues.has(kbId)) {
@@ -132,11 +164,12 @@ var KnowledgeGraphService = class {
132
164
  }
133
165
  async buildKnowledgeGraph(kbId) {
134
166
  const kbPath = this.getKbPath(kbId);
135
- const docs = await fs.readdir(kbPath);
167
+ const docs = await fs$1.readdir(kbPath);
136
168
  const docDirs = [];
137
169
  for (const doc of docs) {
170
+ if (doc.startsWith(".") || STORE_INFRA_DIRS.has(doc)) continue;
138
171
  const docPath = path.join(kbPath, doc);
139
- if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
172
+ if ((await fs$1.stat(docPath).catch(() => null))?.isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
140
173
  }
141
174
  if (docDirs.length < 1) {
142
175
  this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
@@ -147,8 +180,8 @@ var KnowledgeGraphService = class {
147
180
  const allChunks = [];
148
181
  for (const docDir of docDirs) {
149
182
  const chunksPath = path.join(kbPath, docDir, "chunks.json");
150
- if (await fs.pathExists(chunksPath)) {
151
- const chunksData = await fs.readJSON(chunksPath);
183
+ if (await fs$1.pathExists(chunksPath)) {
184
+ const chunksData = await fs$1.readJSON(chunksPath);
152
185
  if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks.map((c) => ({
153
186
  ...c,
154
187
  fileKey: docDir
@@ -164,10 +197,10 @@ var KnowledgeGraphService = class {
164
197
  this.logger.info(`Built ${connections.length} connections`);
165
198
  const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
166
199
  let chunkStats = {};
167
- if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
200
+ if (await fs$1.pathExists(chunkStatsPath)) chunkStats = await fs$1.readJSON(chunkStatsPath);
168
201
  const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
169
202
  const graphFile = path.join(kbPath, "knowledge_graph.json");
170
- await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
203
+ await fs$1.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
171
204
  this.logger.info(`Knowledge graph saved to ${graphFile}`);
172
205
  await this.updateKbMetadata(kbPath, {
173
206
  lastUpdated: (/* @__PURE__ */ new Date()).toISOString(),
@@ -182,34 +215,34 @@ var KnowledgeGraphService = class {
182
215
  async updateKbMetadata(kbPath, updates) {
183
216
  const metadataPath = path.join(kbPath, "kb_metadata.json");
184
217
  let metadata = {};
185
- if (await fs.pathExists(metadataPath)) metadata = await fs.readJSON(metadataPath);
218
+ if (await fs$1.pathExists(metadataPath)) metadata = await fs$1.readJSON(metadataPath);
186
219
  const updated = {
187
220
  ...metadata,
188
221
  ...updates
189
222
  };
190
- await fs.writeJSON(metadataPath, updated, { spaces: 2 });
223
+ await fs$1.writeJSON(metadataPath, updated, { spaces: 2 });
191
224
  }
192
225
  async queryGraph(kbId, fileKey) {
193
226
  const graphPath = path.join(this.getKbPath(kbId), "knowledge_graph.json");
194
- if (!await fs.pathExists(graphPath)) return [];
195
- const graph = await fs.readJSON(graphPath);
227
+ if (!await fs$1.pathExists(graphPath)) return [];
228
+ const graph = await fs$1.readJSON(graphPath);
196
229
  if (!fileKey) return graph.edges;
197
230
  return graph.edges.filter((edge) => edge.source === fileKey || edge.target === fileKey);
198
231
  }
199
232
  async getKnowledgeGraph(kbId) {
200
233
  const graphPath = path.join(this.getKbPath(kbId), "knowledge_graph.json");
201
- if (!await fs.pathExists(graphPath)) return null;
202
- return await fs.readJSON(graphPath);
234
+ if (!await fs$1.pathExists(graphPath)) return null;
235
+ return await fs$1.readJSON(graphPath);
203
236
  }
204
237
  async listKnowledgeBases() {
205
238
  const knowhereRoot = path.join(os.homedir(), ".knowhere");
206
- if (!await fs.pathExists(knowhereRoot)) return [];
207
- return (await fs.readdir(knowhereRoot, { withFileTypes: true })).filter((e) => e.isDirectory()).map((e) => e.name);
239
+ if (!await fs$1.pathExists(knowhereRoot)) return [];
240
+ return (await fs$1.readdir(knowhereRoot, { withFileTypes: true })).filter((e) => e.isDirectory()).map((e) => e.name);
208
241
  }
209
242
  async getKbMetadata(kbId) {
210
243
  const metadataPath = path.join(this.getKbPath(kbId), "kb_metadata.json");
211
- if (!await fs.pathExists(metadataPath)) return null;
212
- return await fs.readJSON(metadataPath);
244
+ if (!await fs$1.pathExists(metadataPath)) return null;
245
+ return await fs$1.readJSON(metadataPath);
213
246
  }
214
247
  isEnabled() {
215
248
  return this.degradationMode !== "disabled";
package/dist/tools.d.ts CHANGED
@@ -2,9 +2,69 @@ import { type AnyAgentTool, type OpenClawPluginApi } from "openclaw/plugin-sdk/c
2
2
  import { KnowhereStore } from "./store";
3
3
  import type { KnowledgeGraphService } from "./kg-service";
4
4
  import type { ResolvedKnowhereConfig, ToolRuntimeContext } from "./types";
5
+ interface T2ChunkRelation {
6
+ relation?: string;
7
+ target?: string;
8
+ ref?: string;
9
+ [key: string]: unknown;
10
+ }
11
+ interface T2ChunkMetadata {
12
+ summary?: string;
13
+ keywords?: string[];
14
+ tokens?: string[];
15
+ file_path?: string;
16
+ connect_to?: T2ChunkRelation[];
17
+ [key: string]: unknown;
18
+ }
19
+ interface T2ChunkSlim {
20
+ chunk_id?: string;
21
+ type: string;
22
+ path: string;
23
+ content: string;
24
+ summary: string;
25
+ file_path?: string;
26
+ connect_to?: T2ChunkRelation[];
27
+ metadata?: T2ChunkMetadata;
28
+ }
29
+ interface T2EnrichResult {
30
+ chunks: T2ChunkSlim[];
31
+ /** Image paths that were inlined into text via placeholder replacement (need delivery). */
32
+ inlinedImagePaths: ReadonlySet<string>;
33
+ }
34
+ /**
35
+ * Runtime-only enrichment of chunks returned to the AI:
36
+ * 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
37
+ * 2. Normalize standalone image/table chunks to file_path-based content
38
+ * 3. Remove standalone table/image chunks that were already inlined into text
39
+ *
40
+ * Does NOT modify chunks.json on disk.
41
+ */
42
+ declare function t2EnrichChunks(chunks: T2ChunkSlim[], docDir: string): Promise<T2EnrichResult>;
43
+ interface T2ResolvedAsset {
44
+ chunk_id: string;
45
+ type: "image" | "table";
46
+ relative_path: string;
47
+ summary: string;
48
+ mode: "image_sent" | "image_failed" | "table_inline";
49
+ html_content?: string;
50
+ }
51
+ declare function t2ResolveAssets(params: {
52
+ api: OpenClawPluginApi;
53
+ store: KnowhereStore;
54
+ ctx: ToolRuntimeContext;
55
+ docDir: string;
56
+ returnedChunks: T2ChunkSlim[];
57
+ /** Image paths inlined by t2EnrichChunks that still need channel delivery. */
58
+ enrichedImagePaths?: ReadonlySet<string>;
59
+ }): Promise<T2ResolvedAsset[]>;
5
60
  export declare function createKnowhereToolFactory(params: {
6
61
  api: OpenClawPluginApi;
7
62
  config: ResolvedKnowhereConfig;
8
63
  store: KnowhereStore;
9
64
  kgService: KnowledgeGraphService;
10
65
  }): (ctx: ToolRuntimeContext) => AnyAgentTool[];
66
+ export declare const __internal: {
67
+ t2EnrichChunks: typeof t2EnrichChunks;
68
+ t2ResolveAssets: typeof t2ResolveAssets;
69
+ };
70
+ export {};
package/dist/tools.js CHANGED
@@ -125,6 +125,11 @@ async function persistIngestedDocument(params) {
125
125
  jobResult: params.ingestResult.jobResult,
126
126
  downloadedResult: params.ingestResult.downloadedResult
127
127
  }, { overwrite: params.overwrite });
128
+ if (storedDocument.chunkCount === 0) {
129
+ params.api.logger.warn(`knowhere: rejecting empty document scope=${params.scope.label} docId=${storedDocument.id} title=${JSON.stringify(storedDocument.title)} — chunkCount is 0; removing from store`);
130
+ await params.store.removeDocument(params.scope, storedDocument.id);
131
+ throw new Error(`Parsed result for "${storedDocument.title || storedDocument.id}" contains no usable content (0 chunks). The file may be corrupt, empty, or require authentication to download. For cloud files (e.g. Feishu Drive), make sure to obtain an authenticated download URL first.`);
132
+ }
128
133
  params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${params.scope.label} jobId=${params.ingestResult.job.job_id} docId=${storedDocument.id}`);
129
134
  startKnowledgeGraphBuild({
130
135
  api: params.api,
@@ -959,13 +964,7 @@ async function t2LoadChunks(docDir) {
959
964
  if (Array.isArray(data)) chunks = data;
960
965
  else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
961
966
  else continue;
962
- if (fname === "chunks.json") return chunks.map((c) => ({
963
- type: c.type || "text",
964
- path: c.path || "",
965
- content: c.content || "",
966
- summary: c.metadata?.summary || c.summary || ""
967
- }));
968
- return chunks;
967
+ return chunks.map((c) => t2ToSlimChunk(c));
969
968
  } catch {
970
969
  continue;
971
970
  }
@@ -974,7 +973,68 @@ async function t2LoadChunks(docDir) {
974
973
  function t2NormalizePath(s) {
975
974
  return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
976
975
  }
977
- const PLACEHOLDER_RE = /(?:IMAGE|TABLE)_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
976
+ const PATH_REF_RE = /\[((?:images|tables)\/[^\]\n]+)\]/g;
977
+ function t2ReadConnectTo(value) {
978
+ if (!Array.isArray(value)) return;
979
+ const relations = value.filter(isRecord);
980
+ return relations.length > 0 ? relations : void 0;
981
+ }
982
+ function t2GetChunkFilePath(chunk) {
983
+ if (typeof chunk.file_path === "string" && chunk.file_path) return chunk.file_path;
984
+ if (typeof chunk.metadata?.file_path === "string" && chunk.metadata.file_path) return chunk.metadata.file_path;
985
+ }
986
+ function t2GetChunkAssetPath(chunk) {
987
+ return t2GetChunkFilePath(chunk);
988
+ }
989
+ function t2ToSlimChunk(chunk) {
990
+ const connectTo = t2ReadConnectTo(chunk.metadata?.connect_to);
991
+ const filePath = t2GetChunkFilePath(chunk);
992
+ return {
993
+ chunk_id: chunk.chunk_id || void 0,
994
+ type: chunk.type || "text",
995
+ path: chunk.path || "",
996
+ content: chunk.content || "",
997
+ summary: chunk.metadata?.summary || chunk.summary || "",
998
+ file_path: filePath,
999
+ connect_to: connectTo,
1000
+ metadata: chunk.metadata ? {
1001
+ ...chunk.metadata,
1002
+ file_path: filePath,
1003
+ connect_to: connectTo
1004
+ } : void 0
1005
+ };
1006
+ }
1007
+ function t2HydrateChunk(chunk, idToRaw, pathToRaw) {
1008
+ const raw = (chunk.chunk_id ? idToRaw.get(chunk.chunk_id) : void 0) || (chunk.path ? pathToRaw.get(chunk.path) : void 0);
1009
+ if (!raw) return chunk;
1010
+ const rawFilePath = t2GetChunkFilePath(raw);
1011
+ const connectTo = chunk.connect_to || t2ReadConnectTo(raw.metadata?.connect_to);
1012
+ return {
1013
+ ...chunk,
1014
+ chunk_id: chunk.chunk_id || raw.chunk_id,
1015
+ file_path: chunk.file_path || rawFilePath,
1016
+ connect_to: connectTo,
1017
+ metadata: {
1018
+ ...raw.metadata || {},
1019
+ ...chunk.metadata || {},
1020
+ file_path: chunk.file_path || rawFilePath,
1021
+ connect_to: connectTo
1022
+ }
1023
+ };
1024
+ }
1025
+ async function t2ReadTableHtml(docDir, relativePath) {
1026
+ try {
1027
+ return await fs.readFile(path.join(docDir, relativePath), "utf-8");
1028
+ } catch {
1029
+ return null;
1030
+ }
1031
+ }
1032
+ function t2HasUnresolvedMediaReference(text) {
1033
+ PATH_REF_RE.lastIndex = 0;
1034
+ const hasPathRef = PATH_REF_RE.test(text);
1035
+ PATH_REF_RE.lastIndex = 0;
1036
+ return hasPathRef;
1037
+ }
978
1038
  async function t2LoadRawChunks(docDir) {
979
1039
  try {
980
1040
  const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
@@ -988,110 +1048,93 @@ async function t2LoadRawChunks(docDir) {
988
1048
  }
989
1049
  /**
990
1050
  * Runtime-only enrichment of chunks returned to the AI:
991
- * 1. Replace IMAGE_uuid_IMAGE placeholders with [📷 path] in text chunks
992
- * 2. Replace TABLE_uuid_TABLE placeholders with actual HTML content in text chunks
993
- * 3. Remove standalone table chunks that were inlined via placeholders
994
- * 4. Strip self-referencing placeholders from image/table chunk content & summary
1051
+ * 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
1052
+ * 2. Normalize standalone image/table chunks to file_path-based content
1053
+ * 3. Remove standalone table/image chunks that were already inlined into text
995
1054
  *
996
1055
  * Does NOT modify chunks.json on disk.
997
1056
  */
998
1057
  async function t2EnrichChunks(chunks, docDir) {
999
1058
  const rawChunks = await t2LoadRawChunks(docDir);
1000
1059
  const idToRaw = /* @__PURE__ */ new Map();
1001
- for (const rc of rawChunks) if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
1002
- const manifestPaths = /* @__PURE__ */ new Map();
1003
- try {
1004
- const manifest = JSON.parse(await fs.readFile(path.join(docDir, "manifest.json"), "utf-8"));
1005
- if (isRecord(manifest) && isRecord(manifest.files)) {
1006
- const files = manifest.files;
1007
- for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
1008
- type: "image",
1009
- filePath: entry.file_path
1010
- });
1011
- for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
1012
- type: "table",
1013
- filePath: entry.file_path
1014
- });
1015
- }
1016
- } catch {}
1060
+ const pathToRaw = /* @__PURE__ */ new Map();
1061
+ for (const rc of rawChunks) {
1062
+ if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
1063
+ if (rc.path) pathToRaw.set(rc.path, rc);
1064
+ }
1065
+ chunks = chunks.map((chunk) => t2HydrateChunk(chunk, idToRaw, pathToRaw));
1017
1066
  const inlinedTablePaths = /* @__PURE__ */ new Set();
1018
1067
  const inlinedImagePaths = /* @__PURE__ */ new Set();
1019
1068
  for (const chunk of chunks) {
1020
- if (!chunk.content) continue;
1021
- PLACEHOLDER_RE.lastIndex = 0;
1022
- if (PLACEHOLDER_RE.test(chunk.content)) {
1023
- PLACEHOLDER_RE.lastIndex = 0;
1024
- chunk.content = await replacePlaceholders(chunk.content, idToRaw, docDir, chunk.type === "text" ? inlinedTablePaths : void 0, chunk.type === "text" ? inlinedImagePaths : void 0, manifestPaths);
1025
- PLACEHOLDER_RE.lastIndex = 0;
1026
- if (chunk.type !== "text" && chunk.path && PLACEHOLDER_RE.test(chunk.content)) {
1027
- if (chunk.type === "table") try {
1028
- const html = await fs.readFile(path.join(docDir, chunk.path), "utf-8");
1029
- chunk.content = chunk.content.replace(PLACEHOLDER_RE, `\n${html.slice(0, 8e3)}\n`);
1030
- } catch {
1031
- chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
1032
- }
1033
- else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
1034
- }
1069
+ const relativePath = t2GetChunkAssetPath(chunk);
1070
+ if (!relativePath) continue;
1071
+ chunk.file_path = relativePath;
1072
+ chunk.metadata = {
1073
+ ...chunk.metadata || {},
1074
+ file_path: relativePath,
1075
+ connect_to: chunk.connect_to
1076
+ };
1077
+ if (chunk.type === "image") {
1078
+ chunk.content = `[📷 ${relativePath}]`;
1079
+ continue;
1035
1080
  }
1036
- if (chunk.summary) {
1037
- PLACEHOLDER_RE.lastIndex = 0;
1038
- if (PLACEHOLDER_RE.test(chunk.summary)) {
1039
- PLACEHOLDER_RE.lastIndex = 0;
1040
- chunk.summary = await replacePlaceholders(chunk.summary, idToRaw, docDir, void 0, void 0, manifestPaths);
1081
+ if (chunk.type === "table") {
1082
+ const html = await t2ReadTableHtml(docDir, relativePath);
1083
+ if (html) chunk.content = html.slice(0, 8e3);
1084
+ else if (!chunk.content) chunk.content = `[📊 ${relativePath}]`;
1085
+ }
1086
+ }
1087
+ for (const chunk of chunks) {
1088
+ const relativePath = t2GetChunkAssetPath(chunk);
1089
+ if (chunk.content) {
1090
+ if (chunk.type === "text") chunk.content = await replacePathReferences(chunk.content, docDir, inlinedTablePaths, inlinedImagePaths);
1091
+ if (chunk.type !== "text" && relativePath && t2HasUnresolvedMediaReference(chunk.content)) {
1092
+ if (chunk.type === "table") {
1093
+ const html = await t2ReadTableHtml(docDir, relativePath);
1094
+ chunk.content = html ? html.slice(0, 8e3) : `[📊 ${relativePath}]`;
1095
+ } else if (chunk.type === "image") chunk.content = `[📷 ${relativePath}]`;
1041
1096
  }
1042
1097
  }
1098
+ if (chunk.summary) chunk.summary = await replacePathReferences(chunk.summary, docDir);
1043
1099
  }
1044
1100
  chunks = chunks.filter((c) => {
1045
- if (c.type === "table" && inlinedTablePaths.has(c.path)) return false;
1046
- if (c.type === "image" && inlinedImagePaths.has(c.path)) return false;
1101
+ const relativePath = t2GetChunkAssetPath(c) || "";
1102
+ if (c.type === "table" && relativePath && inlinedTablePaths.has(relativePath)) return false;
1103
+ if (c.type === "image" && relativePath && inlinedImagePaths.has(relativePath)) return false;
1047
1104
  return true;
1048
1105
  });
1049
- return chunks;
1106
+ return {
1107
+ chunks,
1108
+ inlinedImagePaths
1109
+ };
1050
1110
  }
1051
- async function replacePlaceholders(text, idToRaw, docDir, inlinedTablePaths, inlinedImagePaths, manifestPaths) {
1111
+ async function replacePathReferences(text, docDir, inlinedTablePaths, inlinedImagePaths) {
1052
1112
  const matches = [];
1053
- const re = /(?:(IMAGE|TABLE))_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
1054
- let m;
1055
- while ((m = re.exec(text)) !== null) matches.push({
1056
- full: m[0],
1057
- type: m[1],
1058
- id: m[2],
1059
- start: m.index,
1060
- end: m.index + m[0].length
1113
+ let match;
1114
+ PATH_REF_RE.lastIndex = 0;
1115
+ while ((match = PATH_REF_RE.exec(text)) !== null) matches.push({
1116
+ full: match[0],
1117
+ relativePath: match[1],
1118
+ start: match.index,
1119
+ end: match.index + match[0].length
1061
1120
  });
1121
+ PATH_REF_RE.lastIndex = 0;
1062
1122
  if (matches.length === 0) return text;
1063
1123
  const replacements = [];
1064
- for (const match of matches) {
1065
- const raw = idToRaw.get(match.id);
1066
- let resolvedPath = raw?.path;
1067
- if (!resolvedPath && manifestPaths) {
1068
- const mEntry = manifestPaths.get(match.id);
1069
- if (mEntry) resolvedPath = mEntry.filePath;
1070
- }
1071
- if (!resolvedPath) {
1072
- replacements.push(match.full);
1124
+ for (const ref of matches) {
1125
+ if (ref.relativePath.startsWith("images/")) {
1126
+ replacements.push(`[📷 ${ref.relativePath}]`);
1127
+ inlinedImagePaths?.add(ref.relativePath);
1073
1128
  continue;
1074
1129
  }
1075
- if (match.type === "IMAGE") {
1076
- replacements.push(`[📷 ${resolvedPath}]`);
1077
- inlinedImagePaths?.add(resolvedPath);
1078
- } else {
1079
- const htmlPath = path.join(docDir, resolvedPath);
1080
- try {
1081
- const html = await fs.readFile(htmlPath, "utf-8");
1082
- replacements.push(`\n${html.slice(0, 8e3)}\n`);
1083
- inlinedTablePaths?.add(resolvedPath);
1084
- } catch {
1085
- const tableContent = raw?.content || "";
1086
- if (tableContent && tableContent.includes("<")) {
1087
- replacements.push(`\n${tableContent}\n`);
1088
- inlinedTablePaths?.add(resolvedPath);
1089
- } else replacements.push(`[📊 ${resolvedPath}]`);
1090
- }
1091
- }
1130
+ const html = await t2ReadTableHtml(docDir, ref.relativePath);
1131
+ if (html) {
1132
+ replacements.push(`\n${html.slice(0, 8e3)}\n`);
1133
+ inlinedTablePaths?.add(ref.relativePath);
1134
+ } else replacements.push(`[📊 ${ref.relativePath}]`);
1092
1135
  }
1093
1136
  let result = text;
1094
- for (let i = matches.length - 1; i >= 0; i--) result = result.slice(0, matches[i].start) + replacements[i] + result.slice(matches[i].end);
1137
+ for (let index = matches.length - 1; index >= 0; index -= 1) result = result.slice(0, matches[index].start) + replacements[index] + result.slice(matches[index].end);
1095
1138
  return result;
1096
1139
  }
1097
1140
  function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
@@ -1213,13 +1256,25 @@ async function t2ResolveAssets(params) {
1213
1256
  params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
1214
1257
  }
1215
1258
  };
1216
- for (const chunk of params.returnedChunks) if ((chunk.type === "image" || chunk.type === "table") && chunk.path) {
1217
- if (chunk.type === "table" && chunk.content && !/TABLE_[a-f0-9-]+_TABLE/.test(chunk.content)) continue;
1259
+ for (const chunk of params.returnedChunks) {
1260
+ const relativePath = t2GetChunkAssetPath(chunk);
1261
+ if ((chunk.type === "image" || chunk.type === "table") && relativePath) {
1262
+ if (chunk.type === "table" && chunk.content && !t2HasUnresolvedMediaReference(chunk.content)) continue;
1263
+ await resolveOne({
1264
+ chunkId: chunk.chunk_id || relativePath,
1265
+ type: chunk.type,
1266
+ relativePath,
1267
+ summary: chunk.summary || chunk.content?.slice(0, 200) || ""
1268
+ });
1269
+ }
1270
+ }
1271
+ if (params.enrichedImagePaths && params.enrichedImagePaths.size > 0) for (const relativePath of params.enrichedImagePaths) {
1272
+ if (processedPaths.has(path.join(params.docDir, relativePath))) continue;
1218
1273
  await resolveOne({
1219
- chunkId: chunk.path,
1220
- type: chunk.type,
1221
- relativePath: chunk.path,
1222
- summary: chunk.summary || chunk.content?.slice(0, 200) || ""
1274
+ chunkId: relativePath,
1275
+ type: "image",
1276
+ relativePath,
1277
+ summary: path.basename(relativePath)
1223
1278
  });
1224
1279
  }
1225
1280
  return assets;
@@ -1436,7 +1491,8 @@ function createReadChunksTool(_params) {
1436
1491
  await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
1437
1492
  }
1438
1493
  } catch {}
1439
- chunks = await t2EnrichChunks(chunks, docDir);
1494
+ const enrichResult = await t2EnrichChunks(chunks, docDir);
1495
+ chunks = enrichResult.chunks;
1440
1496
  let resolvedAssets = [];
1441
1497
  try {
1442
1498
  resolvedAssets = await t2ResolveAssets({
@@ -1444,7 +1500,8 @@ function createReadChunksTool(_params) {
1444
1500
  store: _params.store,
1445
1501
  ctx: _params.ctx,
1446
1502
  docDir,
1447
- returnedChunks: chunks
1503
+ returnedChunks: chunks,
1504
+ enrichedImagePaths: enrichResult.inlinedImagePaths
1448
1505
  });
1449
1506
  } catch (err) {
1450
1507
  _params.api.logger.debug?.(`knowhere: read_chunks asset resolution failed: ${err instanceof Error ? err.message : String(err)}`);
@@ -1602,6 +1659,51 @@ function createDiscoverFilesTool(_params) {
1602
1659
  }
1603
1660
  };
1604
1661
  }
1662
+ function createDeleteDocumentTool(params) {
1663
+ return {
1664
+ name: "knowhere_delete_document",
1665
+ label: "Knowhere Delete Document",
1666
+ description: "Delete a parsed document from the underlying storage and remove it from the Knowledge Graph mapping. Use this to completely remove a document or file from the user's knowledge base. You must provide the exact docId obtained from knowhere_kg_query or the ingest result.",
1667
+ parameters: {
1668
+ type: "object",
1669
+ additionalProperties: false,
1670
+ properties: { docId: {
1671
+ type: "string",
1672
+ description: "The targeted document ID to delete."
1673
+ } },
1674
+ required: ["docId"]
1675
+ },
1676
+ execute: async (_toolCallId, rawParams) => {
1677
+ const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
1678
+ if (!docId) throw new Error("docId is required.");
1679
+ const scope = params.store.resolveScope(params.ctx);
1680
+ const kbId = params.kgService.resolveKbId(params.ctx);
1681
+ let wasRemovedFromStore = false;
1682
+ try {
1683
+ if (await params.store.removeDocument(scope, docId)) {
1684
+ wasRemovedFromStore = true;
1685
+ params.api.logger.info(`knowhere: document ${docId} removed from store`);
1686
+ }
1687
+ } catch (error) {
1688
+ params.api.logger.warn(`knowhere: store.removeDocument failed for ${docId}: ${formatErrorMessage(error)}`);
1689
+ }
1690
+ let wasRemovedFromKg = false;
1691
+ if (kbId) try {
1692
+ await params.kgService.removeDocumentFromKb(kbId, docId);
1693
+ wasRemovedFromKg = true;
1694
+ params.kgService.scheduleBuild(kbId, async () => {
1695
+ await params.kgService.buildKnowledgeGraph(kbId);
1696
+ }).catch((e) => {
1697
+ params.api.logger.warn(`knowhere: rebuild failed after doc removal: ${formatErrorMessage(e)}`);
1698
+ });
1699
+ } catch (error) {
1700
+ params.api.logger.warn(`knowhere: kgService.removeDocumentFromKb failed for ${docId}: ${formatErrorMessage(error)}`);
1701
+ }
1702
+ if (wasRemovedFromStore || wasRemovedFromKg) return textResult(`Success: The document "${docId}" has been deleted from the knowledge base.\nThe Knowledge Graph is being rebuilt in the background.`);
1703
+ else return textResult(`Failed: The document "${docId}" could not be found or removed.`);
1704
+ }
1705
+ };
1706
+ }
1605
1707
  function createKnowhereToolFactory(params) {
1606
1708
  return (ctx) => [
1607
1709
  createIngestTool({
@@ -1652,7 +1754,13 @@ function createKnowhereToolFactory(params) {
1652
1754
  ctx
1653
1755
  }),
1654
1756
  createViewImageTool({ api: params.api }),
1655
- createDiscoverFilesTool({ api: params.api })
1757
+ createDiscoverFilesTool({ api: params.api }),
1758
+ createDeleteDocumentTool({
1759
+ api: params.api,
1760
+ store: params.store,
1761
+ kgService: params.kgService,
1762
+ ctx
1763
+ })
1656
1764
  ];
1657
1765
  }
1658
1766
  //#endregion
package/dist/types.d.ts CHANGED
@@ -232,7 +232,8 @@ export interface FileEdge {
232
232
  }>;
233
233
  }
234
234
  /**
235
- * File metadata in knowledge graph (matches SKILL.md schema)
235
+ * File metadata in knowledge graph (v2.0 schema — matches graph-builder.ts output).
236
+ * `hit_count` and `last_hit` are maintained at runtime by `knowhere_read_chunks`.
236
237
  */
237
238
  export interface FileMetadata {
238
239
  chunks_count: number;
@@ -240,12 +241,22 @@ export interface FileMetadata {
240
241
  top_keywords: string[];
241
242
  top_summary: string;
242
243
  importance: number;
244
+ /** ISO timestamp of when this file entry was first created in the graph. */
245
+ created_at: string;
246
+ /** Number of times chunks from this file have been read via knowhere_read_chunks. */
247
+ hit_count?: number;
248
+ /** ISO timestamp of the last knowhere_read_chunks access for this file. */
249
+ last_hit?: string;
243
250
  }
244
251
  /**
245
- * Knowledge graph structure (matches SKILL.md schema)
252
+ * Knowledge graph structure (v2.0 schema — matches graph-builder.ts output).
246
253
  */
247
254
  export interface KnowledgeGraph {
248
255
  version: string;
256
+ /** ISO timestamp of the last graph build or partial update. */
257
+ updated_at: string;
258
+ /** Knowledge base ID this graph belongs to. */
259
+ kb_id: string;
249
260
  stats: {
250
261
  total_files: number;
251
262
  total_chunks: number;
@@ -3,7 +3,7 @@
3
3
  "name": "Knowhere",
4
4
  "description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
5
5
  "skills": ["./skills"],
6
- "version": "0.2.7",
6
+ "version": "0.2.9",
7
7
  "uiHints": {
8
8
  "apiKey": {
9
9
  "label": "Knowhere API Key",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontos-ai/knowhere-claw",
3
- "version": "0.2.7",
3
+ "version": "0.2.9",
4
4
  "description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
5
5
  "files": [
6
6
  "dist/",
@@ -55,8 +55,10 @@ The plugin handles everything automatically:
55
55
  - Uploads/fetches the file for parsing
56
56
  - Polls until parsing completes
57
57
  - Downloads and extracts the result package
58
- - Copies parsed data to `~/.knowhere/{kbId}/`
58
+ - Stores parsed data under `~/.knowhere/global/documents/{docId}/`
59
+ - Creates a symlink in `~/.knowhere/{kbId}/{docId}` → the stored document
59
60
  - Builds/updates `knowledge_graph.json`
61
+ - **Rejects** files that parse to 0 chunks (empty, corrupt, or auth-gated)
60
62
 
61
63
  After ingest completes, the new document is immediately searchable via the retrieval workflow below.
62
64
 
@@ -70,27 +72,36 @@ All knowledge data lives under `~/.knowhere/{kb_id}/`:
70
72
 
71
73
  ```text
72
74
  ~/.knowhere/
73
- └── {kb_id}/ # e.g. "telegram"
75
+ ├── global/ # Store: document storage (scopeMode=global)
76
+ │ ├── index.json # Store document index
77
+ │ ├── documents/
78
+ │ │ └── {docId}/ # One subdir per parsed document
79
+ │ │ ├── chunks.json # All chunks (the actual content)
80
+ │ │ ├── hierarchy.json # Document structure tree
81
+ │ │ ├── images/ # Extracted images
82
+ │ │ └── tables/ # Extracted tables (HTML)
83
+ │ └── metadata/
84
+ │ └── {docId}.json # Document metadata
85
+ └── {kb_id}/ # KG: knowledge graph layer
74
86
  ├── knowledge_graph.json # File-level overview + cross-file edges
87
+ ├── kb_metadata.json # KG metadata
75
88
  ├── chunk_stats.json # Usage stats per chunk
76
- └── {document_name}/ # One subdir per parsed document
77
- ├── chunks.json # All chunks (the actual content)
78
- ├── hierarchy.json # Document structure tree
79
- ├── images/ # Extracted images
80
- └── tables/ # Extracted tables (HTML)
89
+ └── {docId} → ../global/documents/{docId} # Symlink to Store
81
90
  ```
82
91
 
83
- ### Strategy: Prefer tools, fall back to files
92
+ ### Strategy: Use the Tier-2 retrieval tools
84
93
 
85
- #### If `knowhere_kg_list` / `knowhere_kg_query` tools are available use them
94
+ The canonical retrieval path is **always** the Tier-2 tool chain — do not skip steps:
86
95
 
87
- These tools provide efficient access to the knowledge graph:
96
+ 1. `knowhere_get_map` get the full KG overview: which files exist, their keywords, importance scores, and cross-file edges. Pass `kbId` if known, or leave empty to scan all knowledge bases.
97
+ 2. `knowhere_discover_files` — if you're unsure which file contains the answer, run a keyword search across all KB documents and merge with the `get_map` results.
98
+ 3. `knowhere_get_structure` — inspect the chapter/section hierarchy of the most relevant document.
99
+ 4. `knowhere_read_chunks` — fetch the actual content. Use `sectionPath` to narrow to the specific chapter and minimize token usage.
88
100
 
89
- 1. `knowhere_kg_list` list all available knowledge bases
90
- 2. `knowhere_kg_query(kbId)` returns the full knowledge graph (files, keywords, edges)
91
- 3. Then read individual `chunks.json` files with your file reading tool for detailed content
101
+ Do **not** use `exec` or shell commands to read `~/.knowhere/` files directly.
102
+ Do **not** skip `knowhere_get_map` and jump straight to `knowhere_read_chunks`.
92
103
 
93
- #### If no KG tools are available → self-navigate using file tools
104
+ #### If no Knowhere tools are available → self-navigate using file tools
94
105
 
95
106
  Follow this pattern — do NOT explore the filesystem blindly:
96
107
 
@@ -106,13 +117,17 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
106
117
  ```json
107
118
  {
108
119
  "version": "2.0",
109
- "stats": { "total_files": 5, "total_chunks": 327 },
120
+ "updated_at": "2026-04-09T10:00:00.000Z",
121
+ "kb_id": "telegram",
122
+ "stats": { "total_files": 5, "total_chunks": 327, "total_cross_file_edges": 12 },
110
123
  "files": {
111
124
  "report.docx": {
112
125
  "chunks_count": 198,
113
126
  "types": { "text": 135, "table": 21, "image": 42 },
114
127
  "top_keywords": ["excavation", "retaining", "construction"],
115
- "importance": 0.85
128
+ "top_summary": "Construction safety report for the Lujiazui project.",
129
+ "importance": 0.85,
130
+ "created_at": "2026-04-09T08:00:00.000Z"
116
131
  }
117
132
  },
118
133
  "edges": [
@@ -120,8 +135,16 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
120
135
  "source": "file_A.docx",
121
136
  "target": "file_B.pdf",
122
137
  "connection_count": 20,
138
+ "avg_score": 0.91,
123
139
  "top_connections": [
124
- { "source_chunk": "Chapter 3", "target_chunk": "Safety Policy", "score": 1.0 }
140
+ {
141
+ "source_chunk": "Chapter 3",
142
+ "source_id": "uuid-a",
143
+ "target_chunk": "Safety Policy",
144
+ "target_id": "uuid-b",
145
+ "relation": "keyword",
146
+ "score": 1.0
147
+ }
125
148
  ]
126
149
  }
127
150
  ]
@@ -165,3 +188,19 @@ Check `edges` from Step 1 for cross-document connections. If related files weren
165
188
  - **Show connections**: mention cross-file relationships from edges
166
189
  - **No internal IDs**: never expose `chunk_id` or UUID paths to the user
167
190
  - **User's language**: reply in the same language the user is using
191
+
192
+ ## Part 3: Deleting Knowledge
193
+
194
+ When the user asks to "delete", "remove", or "forget" a specific document:
195
+
196
+ 1. Use `knowhere_get_map` to get an overview of all files in the knowledge base, then identify the correct `docId` that uniquely corresponds to the document the user named.
197
+ 2. If the user provided a filename, use it to disambiguate across multiple hits.
198
+ 3. Call `knowhere_delete_document` with the discovered `docId`.
199
+
200
+ The `knowhere_delete_document` tool natively handles all internal consistency logic:
201
+
202
+ - Deeply cleaning up the `chunks.json`, `images/`, and `tables/` locally.
203
+ - Removing the symlink mapping from the knowledge base profile.
204
+ - Dispatching a background rebuild for `knowledge_graph.json` so that the reference disappears from future queries.
205
+
206
+ **Rule:** DO NOT try to execute Unix file deletion (`rm`) commands on `~/.knowhere/` directly. Always use `knowhere_delete_document`.