npm - @ontos-ai/knowhere-claw - Versions diffs - 0.2.7 → 0.2.9 - Mend

@ontos-ai/knowhere-claw 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +6 -0
package/dist/__tests__/read-chunks-schema-v21.test.d.ts +1 -0
package/dist/agent-hooks.js +27 -3
package/dist/index.js +3 -3
package/dist/kg-service.d.ts +1 -0
package/dist/kg-service.js +56 -23
package/dist/tools.d.ts +60 -0
package/dist/tools.js +204 -96
package/dist/types.d.ts +13 -2
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/skills/knowhere_memory/SKILL.md +56 -17

package/README.md CHANGED Viewed

@@ -122,6 +122,12 @@ Within each scope, the plugin keeps:
 4. When needed, the agent can preview structure, search chunks, read raw result
    files, or clear stored documents.
+## Schema v2.1 Media Handling
+- `knowhere_read_chunks` now treats `[images/...]` and `[tables/...]` path references in `chunks.json` content as the primary media enrichment path.
+- Standalone `image` and `table` chunks resolve their real asset locations from `metadata.file_path`.
+- Assets without `metadata.file_path` are ignored by the runtime enrichment and delivery pipeline.
 ## Troubleshooting
 - Missing API key: `apiKey` config is optional. You can set

package/dist/__tests__/read-chunks-schema-v21.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/agent-hooks.js CHANGED Viewed

@@ -10,10 +10,34 @@ const KNOWHERE_PROMPT_CONTEXT = [
 	"- If the file is in the cloud (e.g. Feishu Drive), first obtain the download URL via the appropriate channel tool, then use the `url` parameter.",
 	"- Refer to your **knowhere_memory** skill for the complete step-by-step workflow.",
 	"",
+	"### ⚠️ Feishu / Lark Cloud Files",
+	"**Never** pass a raw `open.feishu.cn` or `feishu.cn/drive/file/...` URL directly to `knowhere_ingest_document`.",
+	"These URLs require authentication and will redirect to a login page, causing Knowhere to parse HTML instead of the actual document.",
+	"Instead:",
+	"1. Use `feishu_drive` (action: `download`) or equivalents to obtain an **authenticated temporary download URL**.",
+	"2. Then pass that authenticated URL to `knowhere_ingest_document(url: ...)`.",
+	"",
+	"### Empty File Rejection",
+	"If a parsed result contains 0 usable chunks, it will be **automatically rejected** and not stored.",
+	"This typically means the source file was corrupt, empty, or required authentication that was not provided.",
+	"",
 	"### Knowledge Retrieval",
-	"When answering questions about documents or the knowledge base:",
-	"- ✅ Use `knowhere_get_map`, `knowhere_get_structure`, `knowhere_read_chunks`, `knowhere_kg_query`",
-	"- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`"
+	"Use this **single retrieval path** — do not skip steps:",
+	"1. `knowhere_get_map` — get the KG overview: which files exist, their keywords, importance, and cross-file edges.",
+	"2. `knowhere_get_structure` — inspect the chapter/section hierarchy of a specific document.",
+	"3. `knowhere_read_chunks` — fetch content for a specific section (use `sectionPath` to narrow scope).",
+	"If you're unsure which file contains the answer, also call `knowhere_discover_files` for keyword-based file discovery.",
+	"- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`",
+	"- ❌ Do NOT skip `knowhere_get_map` and jump directly to `knowhere_read_chunks`",
+	"",
+	"### 📷 Image Delivery",
+	"**`knowhere_read_chunks` has built-in automatic image delivery.** When it returns chunks containing images,",
+	"those images are automatically sent to the user's channel (Telegram/Feishu/etc). You do NOT need to send them again.",
+	"- The tool result will contain `resolved_assets` with `mode: 'image_sent'` for successfully delivered images.",
+	"- If the user asks to **see** or **view** an image from the knowledge base, call `knowhere_read_chunks` with the relevant section — images will be auto-delivered.",
+	"- `knowhere_view_image` is for **AI visual analysis only** (it loads image pixels into your context for you to describe/analyze). It does NOT send the image to the user.",
+	"- If the user asks you to re-send a specific image, use the `message` tool with the staged file path from `~/.openclaw/knowhere-assets/`.",
+	"- **Never tell the user you cannot send images.** You CAN — via `knowhere_read_chunks` auto-delivery or `message` tool."
 ].join("\n");
 const KNOWHERE_DIR_PATTERN = ".knowhere";
 const BLOCK_REASON = "Do not use exec to read .knowhere/ directly. Use knowhere retrieval tools instead: knowhere_get_map, knowhere_get_structure, knowhere_read_chunks, knowhere_kg_query.";

package/dist/index.js CHANGED Viewed

@@ -45,12 +45,12 @@ const plugin = {
 			"knowhere_get_job_status",
 			"knowhere_import_completed_job",
 			"knowhere_set_api_key",
-			"knowhere_kg_list",
-			"knowhere_kg_query",
 			"knowhere_get_map",
 			"knowhere_get_structure",
 			"knowhere_read_chunks",
-			"knowhere_discover_files"
+			"knowhere_view_image",
+			"knowhere_discover_files",
+			"knowhere_delete_document"
 		] });
 	}
 };

package/dist/kg-service.d.ts CHANGED Viewed

@@ -23,6 +23,7 @@ export declare class KnowledgeGraphService {
         keywords: string[];
         metadata: Record<string, unknown>;
     }): Promise<void>;
+    removeDocumentFromKb(kbId: string, docId: string): Promise<void>;
     scheduleBuild(kbId: string, task: () => Promise<void>): Promise<void>;
     buildKnowledgeGraph(kbId: string): Promise<void>;
     private updateKbMetadata;

package/dist/kg-service.js CHANGED Viewed

@@ -1,12 +1,20 @@
 import { resolveStoredKnowhereResultRoot } from "./parser.js";
 import { buildConnections, init_connect_builder } from "./connect-builder.js";
 import { buildKnowledgeGraph } from "./graph-builder.js";
+import fs from "node:fs/promises";
 import path from "node:path";
 import os from "node:os";
 import { spawn } from "node:child_process";
-import fs from "fs-extra";
+import fs$1 from "fs-extra";
 //#region src/kg-service.ts
 init_connect_builder();
+/**
+* Directories that belong to the Store layer and must be excluded when the KG
+* scans a kb directory for document entries.  This matters when the kb
+* directory coincides with the Store root (e.g. both resolve to
+* `~/.knowhere/global/`).
+*/
+const STORE_INFRA_DIRS = new Set(["documents", "metadata"]);
 const DEFAULT_CONNECT_CONFIG = {
 	minKeywordOverlap: 3,
 	keywordScoreWeight: 1,
@@ -101,19 +109,43 @@ var KnowledgeGraphService = class {
 	}
 	async ensureKbDirectory(kbId) {
 		const kbPath = this.getKbPath(kbId);
-		await fs.ensureDir(kbPath);
+		await fs$1.ensureDir(kbPath);
 		return kbPath;
 	}
 	async saveDocumentToKb(params) {
 		const kbPath = await this.ensureKbDirectory(params.kbId);
-		const docDir = path.join(kbPath, params.docId);
-		await fs.ensureDir(docDir);
+		const linkPath = path.join(kbPath, params.docId);
 		const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
-		await fs.copy(sourceResultRoot, docDir, { overwrite: true });
-		const metadataPath = path.join(docDir, "metadata.json");
-		await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
+		try {
+			const existingStat = await fs.lstat(linkPath).catch(() => null);
+			if (existingStat) {
+				if (existingStat.isSymbolicLink()) if (await fs.readlink(linkPath) === sourceResultRoot) this.logger.debug?.(`knowhere: saveDocumentToKb symlink already correct doc=${params.docId}`);
+				else {
+					await fs.unlink(linkPath);
+					await fs.symlink(sourceResultRoot, linkPath);
+				}
+				else if (existingStat.isDirectory()) {
+					await fs$1.remove(linkPath);
+					await fs.symlink(sourceResultRoot, linkPath);
+					this.logger.info(`knowhere: saveDocumentToKb replaced legacy copy with symlink doc=${params.docId}`);
+				}
+			} else await fs.symlink(sourceResultRoot, linkPath);
+		} catch (symlinkError) {
+			this.logger.warn(`knowhere: symlink failed for doc=${params.docId}, falling back to copy: ${formatUnknownError(symlinkError)}`);
+			await fs$1.ensureDir(linkPath);
+			await fs$1.copy(sourceResultRoot, linkPath, { overwrite: true });
+		}
 		this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
 	}
+	async removeDocumentFromKb(kbId, docId) {
+		const kbPath = this.getKbPath(kbId);
+		const docPath = path.join(kbPath, docId);
+		const stat = await fs.lstat(docPath).catch(() => null);
+		if (!stat) return;
+		if (stat.isSymbolicLink()) await fs.unlink(docPath);
+		else if (stat.isDirectory()) await fs$1.remove(docPath);
+		this.logger.info(`Document removed from knowledge base: kb=${kbId} doc=${docId}`);
+	}
 	async scheduleBuild(kbId, task) {
 		if ((this.config.concurrentBuildStrategy || "queue") === "skip") {
 			if (this.buildQueues.has(kbId)) {
@@ -132,11 +164,12 @@ var KnowledgeGraphService = class {
 	}
 	async buildKnowledgeGraph(kbId) {
 		const kbPath = this.getKbPath(kbId);
-		const docs = await fs.readdir(kbPath);
+		const docs = await fs$1.readdir(kbPath);
 		const docDirs = [];
 		for (const doc of docs) {
+			if (doc.startsWith(".") || STORE_INFRA_DIRS.has(doc)) continue;
 			const docPath = path.join(kbPath, doc);
-			if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
+			if ((await fs$1.stat(docPath).catch(() => null))?.isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
 		}
 		if (docDirs.length < 1) {
 			this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
@@ -147,8 +180,8 @@ var KnowledgeGraphService = class {
 			const allChunks = [];
 			for (const docDir of docDirs) {
 				const chunksPath = path.join(kbPath, docDir, "chunks.json");
-				if (await fs.pathExists(chunksPath)) {
-					const chunksData = await fs.readJSON(chunksPath);
+				if (await fs$1.pathExists(chunksPath)) {
+					const chunksData = await fs$1.readJSON(chunksPath);
 					if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks.map((c) => ({
 						...c,
 						fileKey: docDir
@@ -164,10 +197,10 @@ var KnowledgeGraphService = class {
 			this.logger.info(`Built ${connections.length} connections`);
 			const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
 			let chunkStats = {};
-			if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
+			if (await fs$1.pathExists(chunkStatsPath)) chunkStats = await fs$1.readJSON(chunkStatsPath);
 			const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
 			const graphFile = path.join(kbPath, "knowledge_graph.json");
-			await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
+			await fs$1.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
 			this.logger.info(`Knowledge graph saved to ${graphFile}`);
 			await this.updateKbMetadata(kbPath, {
 				lastUpdated: (/* @__PURE__ */ new Date()).toISOString(),
@@ -182,34 +215,34 @@ var KnowledgeGraphService = class {
 	async updateKbMetadata(kbPath, updates) {
 		const metadataPath = path.join(kbPath, "kb_metadata.json");
 		let metadata = {};
-		if (await fs.pathExists(metadataPath)) metadata = await fs.readJSON(metadataPath);
+		if (await fs$1.pathExists(metadataPath)) metadata = await fs$1.readJSON(metadataPath);
 		const updated = {
 			...metadata,
 			...updates
 		};
-		await fs.writeJSON(metadataPath, updated, { spaces: 2 });
+		await fs$1.writeJSON(metadataPath, updated, { spaces: 2 });
 	}
 	async queryGraph(kbId, fileKey) {
 		const graphPath = path.join(this.getKbPath(kbId), "knowledge_graph.json");
-		if (!await fs.pathExists(graphPath)) return [];
-		const graph = await fs.readJSON(graphPath);
+		if (!await fs$1.pathExists(graphPath)) return [];
+		const graph = await fs$1.readJSON(graphPath);
 		if (!fileKey) return graph.edges;
 		return graph.edges.filter((edge) => edge.source === fileKey || edge.target === fileKey);
 	}
 	async getKnowledgeGraph(kbId) {
 		const graphPath = path.join(this.getKbPath(kbId), "knowledge_graph.json");
-		if (!await fs.pathExists(graphPath)) return null;
-		return await fs.readJSON(graphPath);
+		if (!await fs$1.pathExists(graphPath)) return null;
+		return await fs$1.readJSON(graphPath);
 	}
 	async listKnowledgeBases() {
 		const knowhereRoot = path.join(os.homedir(), ".knowhere");
-		if (!await fs.pathExists(knowhereRoot)) return [];
-		return (await fs.readdir(knowhereRoot, { withFileTypes: true })).filter((e) => e.isDirectory()).map((e) => e.name);
+		if (!await fs$1.pathExists(knowhereRoot)) return [];
+		return (await fs$1.readdir(knowhereRoot, { withFileTypes: true })).filter((e) => e.isDirectory()).map((e) => e.name);
 	}
 	async getKbMetadata(kbId) {
 		const metadataPath = path.join(this.getKbPath(kbId), "kb_metadata.json");
-		if (!await fs.pathExists(metadataPath)) return null;
-		return await fs.readJSON(metadataPath);
+		if (!await fs$1.pathExists(metadataPath)) return null;
+		return await fs$1.readJSON(metadataPath);
 	}
 	isEnabled() {
 		return this.degradationMode !== "disabled";

package/dist/tools.d.ts CHANGED Viewed

@@ -2,9 +2,69 @@ import { type AnyAgentTool, type OpenClawPluginApi } from "openclaw/plugin-sdk/c
 import { KnowhereStore } from "./store";
 import type { KnowledgeGraphService } from "./kg-service";
 import type { ResolvedKnowhereConfig, ToolRuntimeContext } from "./types";
+interface T2ChunkRelation {
+    relation?: string;
+    target?: string;
+    ref?: string;
+    [key: string]: unknown;
+}
+interface T2ChunkMetadata {
+    summary?: string;
+    keywords?: string[];
+    tokens?: string[];
+    file_path?: string;
+    connect_to?: T2ChunkRelation[];
+    [key: string]: unknown;
+}
+interface T2ChunkSlim {
+    chunk_id?: string;
+    type: string;
+    path: string;
+    content: string;
+    summary: string;
+    file_path?: string;
+    connect_to?: T2ChunkRelation[];
+    metadata?: T2ChunkMetadata;
+}
+interface T2EnrichResult {
+    chunks: T2ChunkSlim[];
+    /** Image paths that were inlined into text via placeholder replacement (need delivery). */
+    inlinedImagePaths: ReadonlySet<string>;
+}
+/**
+ * Runtime-only enrichment of chunks returned to the AI:
+ * 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
+ * 2. Normalize standalone image/table chunks to file_path-based content
+ * 3. Remove standalone table/image chunks that were already inlined into text
+ *
+ * Does NOT modify chunks.json on disk.
+ */
+declare function t2EnrichChunks(chunks: T2ChunkSlim[], docDir: string): Promise<T2EnrichResult>;
+interface T2ResolvedAsset {
+    chunk_id: string;
+    type: "image" | "table";
+    relative_path: string;
+    summary: string;
+    mode: "image_sent" | "image_failed" | "table_inline";
+    html_content?: string;
+}
+declare function t2ResolveAssets(params: {
+    api: OpenClawPluginApi;
+    store: KnowhereStore;
+    ctx: ToolRuntimeContext;
+    docDir: string;
+    returnedChunks: T2ChunkSlim[];
+    /** Image paths inlined by t2EnrichChunks that still need channel delivery. */
+    enrichedImagePaths?: ReadonlySet<string>;
+}): Promise<T2ResolvedAsset[]>;
 export declare function createKnowhereToolFactory(params: {
     api: OpenClawPluginApi;
     config: ResolvedKnowhereConfig;
     store: KnowhereStore;
     kgService: KnowledgeGraphService;
 }): (ctx: ToolRuntimeContext) => AnyAgentTool[];
+export declare const __internal: {
+    t2EnrichChunks: typeof t2EnrichChunks;
+    t2ResolveAssets: typeof t2ResolveAssets;
+};
+export {};

package/dist/tools.js CHANGED Viewed

@@ -125,6 +125,11 @@ async function persistIngestedDocument(params) {
 		jobResult: params.ingestResult.jobResult,
 		downloadedResult: params.ingestResult.downloadedResult
 	}, { overwrite: params.overwrite });
+	if (storedDocument.chunkCount === 0) {
+		params.api.logger.warn(`knowhere: rejecting empty document scope=${params.scope.label} docId=${storedDocument.id} title=${JSON.stringify(storedDocument.title)} — chunkCount is 0; removing from store`);
+		await params.store.removeDocument(params.scope, storedDocument.id);
+		throw new Error(`Parsed result for "${storedDocument.title || storedDocument.id}" contains no usable content (0 chunks). The file may be corrupt, empty, or require authentication to download. For cloud files (e.g. Feishu Drive), make sure to obtain an authenticated download URL first.`);
+	}
 	params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${params.scope.label} jobId=${params.ingestResult.job.job_id} docId=${storedDocument.id}`);
 	startKnowledgeGraphBuild({
 		api: params.api,
@@ -959,13 +964,7 @@ async function t2LoadChunks(docDir) {
 		if (Array.isArray(data)) chunks = data;
 		else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
 		else continue;
-		if (fname === "chunks.json") return chunks.map((c) => ({
-			type: c.type || "text",
-			path: c.path || "",
-			content: c.content || "",
-			summary: c.metadata?.summary || c.summary || ""
-		}));
-		return chunks;
+		return chunks.map((c) => t2ToSlimChunk(c));
 	} catch {
 		continue;
 	}
@@ -974,7 +973,68 @@ async function t2LoadChunks(docDir) {
 function t2NormalizePath(s) {
 	return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
 }
-const PLACEHOLDER_RE = /(?:IMAGE|TABLE)_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
+const PATH_REF_RE = /\[((?:images|tables)\/[^\]\n]+)\]/g;
+function t2ReadConnectTo(value) {
+	if (!Array.isArray(value)) return;
+	const relations = value.filter(isRecord);
+	return relations.length > 0 ? relations : void 0;
+}
+function t2GetChunkFilePath(chunk) {
+	if (typeof chunk.file_path === "string" && chunk.file_path) return chunk.file_path;
+	if (typeof chunk.metadata?.file_path === "string" && chunk.metadata.file_path) return chunk.metadata.file_path;
+}
+function t2GetChunkAssetPath(chunk) {
+	return t2GetChunkFilePath(chunk);
+}
+function t2ToSlimChunk(chunk) {
+	const connectTo = t2ReadConnectTo(chunk.metadata?.connect_to);
+	const filePath = t2GetChunkFilePath(chunk);
+	return {
+		chunk_id: chunk.chunk_id || void 0,
+		type: chunk.type || "text",
+		path: chunk.path || "",
+		content: chunk.content || "",
+		summary: chunk.metadata?.summary || chunk.summary || "",
+		file_path: filePath,
+		connect_to: connectTo,
+		metadata: chunk.metadata ? {
+			...chunk.metadata,
+			file_path: filePath,
+			connect_to: connectTo
+		} : void 0
+	};
+}
+function t2HydrateChunk(chunk, idToRaw, pathToRaw) {
+	const raw = (chunk.chunk_id ? idToRaw.get(chunk.chunk_id) : void 0) || (chunk.path ? pathToRaw.get(chunk.path) : void 0);
+	if (!raw) return chunk;
+	const rawFilePath = t2GetChunkFilePath(raw);
+	const connectTo = chunk.connect_to || t2ReadConnectTo(raw.metadata?.connect_to);
+	return {
+		...chunk,
+		chunk_id: chunk.chunk_id || raw.chunk_id,
+		file_path: chunk.file_path || rawFilePath,
+		connect_to: connectTo,
+		metadata: {
+			...raw.metadata || {},
+			...chunk.metadata || {},
+			file_path: chunk.file_path || rawFilePath,
+			connect_to: connectTo
+		}
+	};
+}
+async function t2ReadTableHtml(docDir, relativePath) {
+	try {
+		return await fs.readFile(path.join(docDir, relativePath), "utf-8");
+	} catch {
+		return null;
+	}
+}
+function t2HasUnresolvedMediaReference(text) {
+	PATH_REF_RE.lastIndex = 0;
+	const hasPathRef = PATH_REF_RE.test(text);
+	PATH_REF_RE.lastIndex = 0;
+	return hasPathRef;
+}
 async function t2LoadRawChunks(docDir) {
 	try {
 		const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
@@ -988,110 +1048,93 @@ async function t2LoadRawChunks(docDir) {
 }
 /**
 * Runtime-only enrichment of chunks returned to the AI:
-* 1. Replace IMAGE_uuid_IMAGE placeholders with [📷 path] in text chunks
-* 2. Replace TABLE_uuid_TABLE placeholders with actual HTML content in text chunks
-* 3. Remove standalone table chunks that were inlined via placeholders
-* 4. Strip self-referencing placeholders from image/table chunk content & summary
+* 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
+* 2. Normalize standalone image/table chunks to file_path-based content
+* 3. Remove standalone table/image chunks that were already inlined into text
 *
 * Does NOT modify chunks.json on disk.
 */
 async function t2EnrichChunks(chunks, docDir) {
 	const rawChunks = await t2LoadRawChunks(docDir);
 	const idToRaw = /* @__PURE__ */ new Map();
-	for (const rc of rawChunks) if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
-	const manifestPaths = /* @__PURE__ */ new Map();
-	try {
-		const manifest = JSON.parse(await fs.readFile(path.join(docDir, "manifest.json"), "utf-8"));
-		if (isRecord(manifest) && isRecord(manifest.files)) {
-			const files = manifest.files;
-			for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
-				type: "image",
-				filePath: entry.file_path
-			});
-			for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
-				type: "table",
-				filePath: entry.file_path
-			});
-		}
-	} catch {}
+	const pathToRaw = /* @__PURE__ */ new Map();
+	for (const rc of rawChunks) {
+		if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
+		if (rc.path) pathToRaw.set(rc.path, rc);
+	}
+	chunks = chunks.map((chunk) => t2HydrateChunk(chunk, idToRaw, pathToRaw));
 	const inlinedTablePaths = /* @__PURE__ */ new Set();
 	const inlinedImagePaths = /* @__PURE__ */ new Set();
 	for (const chunk of chunks) {
-		if (!chunk.content) continue;
-		PLACEHOLDER_RE.lastIndex = 0;
-		if (PLACEHOLDER_RE.test(chunk.content)) {
-			PLACEHOLDER_RE.lastIndex = 0;
-			chunk.content = await replacePlaceholders(chunk.content, idToRaw, docDir, chunk.type === "text" ? inlinedTablePaths : void 0, chunk.type === "text" ? inlinedImagePaths : void 0, manifestPaths);
-			PLACEHOLDER_RE.lastIndex = 0;
-			if (chunk.type !== "text" && chunk.path && PLACEHOLDER_RE.test(chunk.content)) {
-				if (chunk.type === "table") try {
-					const html = await fs.readFile(path.join(docDir, chunk.path), "utf-8");
-					chunk.content = chunk.content.replace(PLACEHOLDER_RE, `\n${html.slice(0, 8e3)}\n`);
-				} catch {
-					chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
-				}
-				else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
-			}
+		const relativePath = t2GetChunkAssetPath(chunk);
+		if (!relativePath) continue;
+		chunk.file_path = relativePath;
+		chunk.metadata = {
+			...chunk.metadata || {},
+			file_path: relativePath,
+			connect_to: chunk.connect_to
+		};
+		if (chunk.type === "image") {
+			chunk.content = `[📷 ${relativePath}]`;
+			continue;
 		}
-		if (chunk.summary) {
-			PLACEHOLDER_RE.lastIndex = 0;
-			if (PLACEHOLDER_RE.test(chunk.summary)) {
-				PLACEHOLDER_RE.lastIndex = 0;
-				chunk.summary = await replacePlaceholders(chunk.summary, idToRaw, docDir, void 0, void 0, manifestPaths);
+		if (chunk.type === "table") {
+			const html = await t2ReadTableHtml(docDir, relativePath);
+			if (html) chunk.content = html.slice(0, 8e3);
+			else if (!chunk.content) chunk.content = `[📊 ${relativePath}]`;
+		}
+	}
+	for (const chunk of chunks) {
+		const relativePath = t2GetChunkAssetPath(chunk);
+		if (chunk.content) {
+			if (chunk.type === "text") chunk.content = await replacePathReferences(chunk.content, docDir, inlinedTablePaths, inlinedImagePaths);
+			if (chunk.type !== "text" && relativePath && t2HasUnresolvedMediaReference(chunk.content)) {
+				if (chunk.type === "table") {
+					const html = await t2ReadTableHtml(docDir, relativePath);
+					chunk.content = html ? html.slice(0, 8e3) : `[📊 ${relativePath}]`;
+				} else if (chunk.type === "image") chunk.content = `[📷 ${relativePath}]`;
 			}
 		}
+		if (chunk.summary) chunk.summary = await replacePathReferences(chunk.summary, docDir);
 	}
 	chunks = chunks.filter((c) => {
-		if (c.type === "table" && inlinedTablePaths.has(c.path)) return false;
-		if (c.type === "image" && inlinedImagePaths.has(c.path)) return false;
+		const relativePath = t2GetChunkAssetPath(c) || "";
+		if (c.type === "table" && relativePath && inlinedTablePaths.has(relativePath)) return false;
+		if (c.type === "image" && relativePath && inlinedImagePaths.has(relativePath)) return false;
 		return true;
 	});
-	return chunks;
+	return {
+		chunks,
+		inlinedImagePaths
+	};
 }
-async function replacePlaceholders(text, idToRaw, docDir, inlinedTablePaths, inlinedImagePaths, manifestPaths) {
+async function replacePathReferences(text, docDir, inlinedTablePaths, inlinedImagePaths) {
 	const matches = [];
-	const re = /(?:(IMAGE|TABLE))_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
-	let m;
-	while ((m = re.exec(text)) !== null) matches.push({
-		full: m[0],
-		type: m[1],
-		id: m[2],
-		start: m.index,
-		end: m.index + m[0].length
+	let match;
+	PATH_REF_RE.lastIndex = 0;
+	while ((match = PATH_REF_RE.exec(text)) !== null) matches.push({
+		full: match[0],
+		relativePath: match[1],
+		start: match.index,
+		end: match.index + match[0].length
 	});
+	PATH_REF_RE.lastIndex = 0;
 	if (matches.length === 0) return text;
 	const replacements = [];
-	for (const match of matches) {
-		const raw = idToRaw.get(match.id);
-		let resolvedPath = raw?.path;
-		if (!resolvedPath && manifestPaths) {
-			const mEntry = manifestPaths.get(match.id);
-			if (mEntry) resolvedPath = mEntry.filePath;
-		}
-		if (!resolvedPath) {
-			replacements.push(match.full);
+	for (const ref of matches) {
+		if (ref.relativePath.startsWith("images/")) {
+			replacements.push(`[📷 ${ref.relativePath}]`);
+			inlinedImagePaths?.add(ref.relativePath);
 			continue;
 		}
-		if (match.type === "IMAGE") {
-			replacements.push(`[📷 ${resolvedPath}]`);
-			inlinedImagePaths?.add(resolvedPath);
-		} else {
-			const htmlPath = path.join(docDir, resolvedPath);
-			try {
-				const html = await fs.readFile(htmlPath, "utf-8");
-				replacements.push(`\n${html.slice(0, 8e3)}\n`);
-				inlinedTablePaths?.add(resolvedPath);
-			} catch {
-				const tableContent = raw?.content || "";
-				if (tableContent && tableContent.includes("<")) {
-					replacements.push(`\n${tableContent}\n`);
-					inlinedTablePaths?.add(resolvedPath);
-				} else replacements.push(`[📊 ${resolvedPath}]`);
-			}
-		}
+		const html = await t2ReadTableHtml(docDir, ref.relativePath);
+		if (html) {
+			replacements.push(`\n${html.slice(0, 8e3)}\n`);
+			inlinedTablePaths?.add(ref.relativePath);
+		} else replacements.push(`[📊 ${ref.relativePath}]`);
 	}
 	let result = text;
-	for (let i = matches.length - 1; i >= 0; i--) result = result.slice(0, matches[i].start) + replacements[i] + result.slice(matches[i].end);
+	for (let index = matches.length - 1; index >= 0; index -= 1) result = result.slice(0, matches[index].start) + replacements[index] + result.slice(matches[index].end);
 	return result;
 }
 function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
@@ -1213,13 +1256,25 @@ async function t2ResolveAssets(params) {
 			params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
 		}
 	};
-	for (const chunk of params.returnedChunks) if ((chunk.type === "image" || chunk.type === "table") && chunk.path) {
-		if (chunk.type === "table" && chunk.content && !/TABLE_[a-f0-9-]+_TABLE/.test(chunk.content)) continue;
+	for (const chunk of params.returnedChunks) {
+		const relativePath = t2GetChunkAssetPath(chunk);
+		if ((chunk.type === "image" || chunk.type === "table") && relativePath) {
+			if (chunk.type === "table" && chunk.content && !t2HasUnresolvedMediaReference(chunk.content)) continue;
+			await resolveOne({
+				chunkId: chunk.chunk_id || relativePath,
+				type: chunk.type,
+				relativePath,
+				summary: chunk.summary || chunk.content?.slice(0, 200) || ""
+			});
+		}
+	}
+	if (params.enrichedImagePaths && params.enrichedImagePaths.size > 0) for (const relativePath of params.enrichedImagePaths) {
+		if (processedPaths.has(path.join(params.docDir, relativePath))) continue;
 		await resolveOne({
-			chunkId: chunk.path,
-			type: chunk.type,
-			relativePath: chunk.path,
-			summary: chunk.summary || chunk.content?.slice(0, 200) || ""
+			chunkId: relativePath,
+			type: "image",
+			relativePath,
+			summary: path.basename(relativePath)
 		});
 	}
 	return assets;
@@ -1436,7 +1491,8 @@ function createReadChunksTool(_params) {
 					await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
 				}
 			} catch {}
-			chunks = await t2EnrichChunks(chunks, docDir);
+			const enrichResult = await t2EnrichChunks(chunks, docDir);
+			chunks = enrichResult.chunks;
 			let resolvedAssets = [];
 			try {
 				resolvedAssets = await t2ResolveAssets({
@@ -1444,7 +1500,8 @@ function createReadChunksTool(_params) {
 					store: _params.store,
 					ctx: _params.ctx,
 					docDir,
-					returnedChunks: chunks
+					returnedChunks: chunks,
+					enrichedImagePaths: enrichResult.inlinedImagePaths
 				});
 			} catch (err) {
 				_params.api.logger.debug?.(`knowhere: read_chunks asset resolution failed: ${err instanceof Error ? err.message : String(err)}`);
@@ -1602,6 +1659,51 @@ function createDiscoverFilesTool(_params) {
 		}
 	};
 }
+function createDeleteDocumentTool(params) {
+	return {
+		name: "knowhere_delete_document",
+		label: "Knowhere Delete Document",
+		description: "Delete a parsed document from the underlying storage and remove it from the Knowledge Graph mapping. Use this to completely remove a document or file from the user's knowledge base. You must provide the exact docId obtained from knowhere_kg_query or the ingest result.",
+		parameters: {
+			type: "object",
+			additionalProperties: false,
+			properties: { docId: {
+				type: "string",
+				description: "The targeted document ID to delete."
+			} },
+			required: ["docId"]
+		},
+		execute: async (_toolCallId, rawParams) => {
+			const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
+			if (!docId) throw new Error("docId is required.");
+			const scope = params.store.resolveScope(params.ctx);
+			const kbId = params.kgService.resolveKbId(params.ctx);
+			let wasRemovedFromStore = false;
+			try {
+				if (await params.store.removeDocument(scope, docId)) {
+					wasRemovedFromStore = true;
+					params.api.logger.info(`knowhere: document ${docId} removed from store`);
+				}
+			} catch (error) {
+				params.api.logger.warn(`knowhere: store.removeDocument failed for ${docId}: ${formatErrorMessage(error)}`);
+			}
+			let wasRemovedFromKg = false;
+			if (kbId) try {
+				await params.kgService.removeDocumentFromKb(kbId, docId);
+				wasRemovedFromKg = true;
+				params.kgService.scheduleBuild(kbId, async () => {
+					await params.kgService.buildKnowledgeGraph(kbId);
+				}).catch((e) => {
+					params.api.logger.warn(`knowhere: rebuild failed after doc removal: ${formatErrorMessage(e)}`);
+				});
+			} catch (error) {
+				params.api.logger.warn(`knowhere: kgService.removeDocumentFromKb failed for ${docId}: ${formatErrorMessage(error)}`);
+			}
+			if (wasRemovedFromStore || wasRemovedFromKg) return textResult(`Success: The document "${docId}" has been deleted from the knowledge base.\nThe Knowledge Graph is being rebuilt in the background.`);
+			else return textResult(`Failed: The document "${docId}" could not be found or removed.`);
+		}
+	};
+}
 function createKnowhereToolFactory(params) {
 	return (ctx) => [
 		createIngestTool({
@@ -1652,7 +1754,13 @@ function createKnowhereToolFactory(params) {
 			ctx
 		}),
 		createViewImageTool({ api: params.api }),
-		createDiscoverFilesTool({ api: params.api })
+		createDiscoverFilesTool({ api: params.api }),
+		createDeleteDocumentTool({
+			api: params.api,
+			store: params.store,
+			kgService: params.kgService,
+			ctx
+		})
 	];
 }
 //#endregion

package/dist/types.d.ts CHANGED Viewed

@@ -232,7 +232,8 @@ export interface FileEdge {
     }>;
 }
 /**
- * File metadata in knowledge graph (matches SKILL.md schema)
+ * File metadata in knowledge graph (v2.0 schema — matches graph-builder.ts output).
+ * `hit_count` and `last_hit` are maintained at runtime by `knowhere_read_chunks`.
  */
 export interface FileMetadata {
     chunks_count: number;
@@ -240,12 +241,22 @@ export interface FileMetadata {
     top_keywords: string[];
     top_summary: string;
     importance: number;
+    /** ISO timestamp of when this file entry was first created in the graph. */
+    created_at: string;
+    /** Number of times chunks from this file have been read via knowhere_read_chunks. */
+    hit_count?: number;
+    /** ISO timestamp of the last knowhere_read_chunks access for this file. */
+    last_hit?: string;
 }
 /**
- * Knowledge graph structure (matches SKILL.md schema)
+ * Knowledge graph structure (v2.0 schema — matches graph-builder.ts output).
  */
 export interface KnowledgeGraph {
     version: string;
+    /** ISO timestamp of the last graph build or partial update. */
+    updated_at: string;
+    /** Knowledge base ID this graph belongs to. */
+    kb_id: string;
     stats: {
         total_files: number;
         total_chunks: number;

package/openclaw.plugin.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "name": "Knowhere",
   "description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
   "skills": ["./skills"],
-  "version": "0.2.7",
+  "version": "0.2.9",
   "uiHints": {
     "apiKey": {
       "label": "Knowhere API Key",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ontos-ai/knowhere-claw",
-  "version": "0.2.7",
+  "version": "0.2.9",
   "description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
   "files": [
     "dist/",

package/skills/knowhere_memory/SKILL.md CHANGED Viewed

@@ -55,8 +55,10 @@ The plugin handles everything automatically:
 - Uploads/fetches the file for parsing
 - Polls until parsing completes
 - Downloads and extracts the result package
-- Copies parsed data to `~/.knowhere/{kbId}/`
+- Stores parsed data under `~/.knowhere/global/documents/{docId}/`
+- Creates a symlink in `~/.knowhere/{kbId}/{docId}` → the stored document
 - Builds/updates `knowledge_graph.json`
+- **Rejects** files that parse to 0 chunks (empty, corrupt, or auth-gated)
 After ingest completes, the new document is immediately searchable via the retrieval workflow below.
@@ -70,27 +72,36 @@ All knowledge data lives under `~/.knowhere/{kb_id}/`:
 ```text
 ~/.knowhere/
-└── {kb_id}/                          # e.g. "telegram"
+├── global/                           # Store: document storage (scopeMode=global)
+│   ├── index.json                    # Store document index
+│   ├── documents/
+│   │   └── {docId}/                  # One subdir per parsed document
+│   │       ├── chunks.json           # All chunks (the actual content)
+│   │       ├── hierarchy.json        # Document structure tree
+│   │       ├── images/               # Extracted images
+│   │       └── tables/               # Extracted tables (HTML)
+│   └── metadata/
+│       └── {docId}.json              # Document metadata
+└── {kb_id}/                          # KG: knowledge graph layer
     ├── knowledge_graph.json          # File-level overview + cross-file edges
+    ├── kb_metadata.json              # KG metadata
     ├── chunk_stats.json              # Usage stats per chunk
-    └── {document_name}/              # One subdir per parsed document
-        ├── chunks.json               # All chunks (the actual content)
-        ├── hierarchy.json            # Document structure tree
-        ├── images/                   # Extracted images
-        └── tables/                   # Extracted tables (HTML)
+    └── {docId} → ../global/documents/{docId}   # Symlink to Store
 ```
-### Strategy: Prefer tools, fall back to files
+### Strategy: Use the Tier-2 retrieval tools
-#### If `knowhere_kg_list` / `knowhere_kg_query` tools are available → use them
+The canonical retrieval path is **always** the Tier-2 tool chain — do not skip steps:
-These tools provide efficient access to the knowledge graph:
+1. `knowhere_get_map` — get the full KG overview: which files exist, their keywords, importance scores, and cross-file edges. Pass `kbId` if known, or leave empty to scan all knowledge bases.
+2. `knowhere_discover_files` — if you're unsure which file contains the answer, run a keyword search across all KB documents and merge with the `get_map` results.
+3. `knowhere_get_structure` — inspect the chapter/section hierarchy of the most relevant document.
+4. `knowhere_read_chunks` — fetch the actual content. Use `sectionPath` to narrow to the specific chapter and minimize token usage.
-1. `knowhere_kg_list` — list all available knowledge bases
-2. `knowhere_kg_query(kbId)` — returns the full knowledge graph (files, keywords, edges)
-3. Then read individual `chunks.json` files with your file reading tool for detailed content
+❌ Do **not** use `exec` or shell commands to read `~/.knowhere/` files directly.
+❌ Do **not** skip `knowhere_get_map` and jump straight to `knowhere_read_chunks`.
-#### If no KG tools are available → self-navigate using file tools
+#### If no Knowhere tools are available → self-navigate using file tools
 Follow this pattern — do NOT explore the filesystem blindly:
@@ -106,13 +117,17 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
 ```json
 {
   "version": "2.0",
-  "stats": { "total_files": 5, "total_chunks": 327 },
+  "updated_at": "2026-04-09T10:00:00.000Z",
+  "kb_id": "telegram",
+  "stats": { "total_files": 5, "total_chunks": 327, "total_cross_file_edges": 12 },
   "files": {
     "report.docx": {
       "chunks_count": 198,
       "types": { "text": 135, "table": 21, "image": 42 },
       "top_keywords": ["excavation", "retaining", "construction"],
-      "importance": 0.85
+      "top_summary": "Construction safety report for the Lujiazui project.",
+      "importance": 0.85,
+      "created_at": "2026-04-09T08:00:00.000Z"
     }
   },
   "edges": [
@@ -120,8 +135,16 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
       "source": "file_A.docx",
       "target": "file_B.pdf",
       "connection_count": 20,
+      "avg_score": 0.91,
       "top_connections": [
-        { "source_chunk": "Chapter 3", "target_chunk": "Safety Policy", "score": 1.0 }
+        {
+          "source_chunk": "Chapter 3",
+          "source_id": "uuid-a",
+          "target_chunk": "Safety Policy",
+          "target_id": "uuid-b",
+          "relation": "keyword",
+          "score": 1.0
+        }
       ]
     }
   ]
@@ -165,3 +188,19 @@ Check `edges` from Step 1 for cross-document connections. If related files weren
 - **Show connections**: mention cross-file relationships from edges
 - **No internal IDs**: never expose `chunk_id` or UUID paths to the user
 - **User's language**: reply in the same language the user is using
+## Part 3: Deleting Knowledge
+When the user asks to "delete", "remove", or "forget" a specific document:
+1. Use `knowhere_get_map` to get an overview of all files in the knowledge base, then identify the correct `docId` that uniquely corresponds to the document the user named.
+2. If the user provided a filename, use it to disambiguate across multiple hits.
+3. Call `knowhere_delete_document` with the discovered `docId`.
+The `knowhere_delete_document` tool natively handles all internal consistency logic:
+- Deeply cleaning up the `chunks.json`, `images/`, and `tables/` locally.
+- Removing the symlink mapping from the knowledge base profile.
+- Dispatching a background rebuild for `knowledge_graph.json` so that the reference disappears from future queries.
+**Rule:** DO NOT try to execute Unix file deletion (`rm`) commands on `~/.knowhere/` directly. Always use `knowhere_delete_document`.