npm - @ontos-ai/knowhere-claw - Versions diffs - 0.2.4 → 0.2.5 - Mend

@ontos-ai/knowhere-claw 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/client.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { isRecord } from "./types.js";
 import { formatErrorMessage } from "./error-message.js";
+import { openAsBlob } from "node:fs";
 import path from "node:path";
 import { createHash } from "node:crypto";
-import { openAsBlob } from "node:fs";
 import { Knowhere } from "@knowhere-ai/sdk";
 //#region src/client.ts
 const RETRYABLE_STATUS_CODES = new Set([

package/dist/config.d.ts CHANGED Viewed

@@ -2,6 +2,14 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
 import type { JsonSchemaObject, ResolvedKnowhereConfig, StringRecord, KnowledgeGraphConfig } from "./types";
 export declare const DEFAULT_BASE_URL = "https://api.knowhereto.ai";
 export declare const knowherePluginConfigSchema: JsonSchemaObject;
+/**
+ * Return the effective plugin config object, merging the persisted
+ * resolved-config when the live pluginConfig is missing explicit fields
+ * (i.e. in agent subprocesses).  Both `resolveKnowhereConfig` and
+ * `resolveKnowledgeGraphConfig` should read from this merged result so
+ * that subprocess instances inherit the gateway's full configuration.
+ */
+export declare function resolveEffectivePluginConfig(api: OpenClawPluginApi): StringRecord;
 export declare function resolveKnowhereConfig(api: OpenClawPluginApi): ResolvedKnowhereConfig;
 export declare const API_KEY_URL = "https://knowhereto.ai/api-keys";
 export declare const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";

package/dist/config.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { isRecord } from "./types.js";
+import { readFileSync } from "node:fs";
 import fs from "node:fs/promises";
 import path from "node:path";
 //#region src/config.ts
@@ -147,36 +148,83 @@ function readScopeMode(raw) {
 	if (value === "session" || value === "agent" || value === "global") return value;
 	return KNOWHERE_PLUGIN_DEFAULTS.scopeMode;
 }
+const RESOLVED_CONFIG_STATE_FILE = "resolved-config.json";
+function readPersistedResolvedConfigSync(stateDir) {
+	const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
+	try {
+		const raw = readFileSync(filePath, "utf-8");
+		const parsed = JSON.parse(raw);
+		if (isRecord(parsed)) return parsed;
+		return null;
+	} catch {
+		return null;
+	}
+}
+async function persistResolvedConfig(stateDir, config) {
+	await fs.mkdir(stateDir, { recursive: true });
+	const filePath = path.join(stateDir, RESOLVED_CONFIG_STATE_FILE);
+	await fs.writeFile(filePath, JSON.stringify(config, null, 2), "utf-8");
+}
+function hasExplicitPluginConfig(raw) {
+	return Boolean(readString(raw, "scopeMode") || readString(raw, "storageDir"));
+}
+/**
+* Return the effective plugin config object, merging the persisted
+* resolved-config when the live pluginConfig is missing explicit fields
+* (i.e. in agent subprocesses).  Both `resolveKnowhereConfig` and
+* `resolveKnowledgeGraphConfig` should read from this merged result so
+* that subprocess instances inherit the gateway's full configuration.
+*/
+function resolveEffectivePluginConfig(api) {
+	const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
+	const stateDir = api.runtime.state.resolveStateDir();
+	if (!hasExplicitPluginConfig(raw)) {
+		const persisted = readPersistedResolvedConfigSync(stateDir);
+		if (persisted) return {
+			...persisted,
+			...raw
+		};
+	}
+	return raw;
+}
 function resolveKnowhereConfig(api) {
 	const raw = isRecord(api.pluginConfig) ? api.pluginConfig : {};
 	const stateDir = api.runtime.state.resolveStateDir();
-	const storageDirRaw = readString(raw, "storageDir");
-	return {
+	const hasExplicit = hasExplicitPluginConfig(raw);
+	const effective = resolveEffectivePluginConfig(api);
+	const storageDirRaw = readString(effective, "storageDir");
+	const config = {
 		apiKey: readString(raw, "apiKey") || process.env.KNOWHERE_API_KEY || "",
 		baseUrl: readString(raw, "baseUrl") || process.env.KNOWHERE_BASE_URL || "https://api.knowhereto.ai",
 		storageDir: storageDirRaw ? api.resolvePath(storageDirRaw) : path.join(stateDir, "plugins", api.id),
-		scopeMode: readScopeMode(raw),
-		pollIntervalMs: readNumber(raw, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
+		scopeMode: readScopeMode(effective),
+		pollIntervalMs: readNumber(effective, "pollIntervalMs", KNOWHERE_PLUGIN_DEFAULTS.pollIntervalMs, {
 			min: 1e3,
 			max: 6e4,
 			integer: true
 		}),
-		pollTimeoutMs: readNumber(raw, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
+		pollTimeoutMs: readNumber(effective, "pollTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.pollTimeoutMs, {
 			min: 1e4,
 			max: 72e5,
 			integer: true
 		}),
-		requestTimeoutMs: readNumber(raw, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
+		requestTimeoutMs: readNumber(effective, "requestTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.requestTimeoutMs, {
 			min: 1e3,
 			max: 3e5,
 			integer: true
 		}),
-		uploadTimeoutMs: readNumber(raw, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
+		uploadTimeoutMs: readNumber(effective, "uploadTimeoutMs", KNOWHERE_PLUGIN_DEFAULTS.uploadTimeoutMs, {
 			min: 1e3,
 			max: 72e5,
 			integer: true
 		})
 	};
+	if (hasExplicit) persistResolvedConfig(stateDir, {
+		scopeMode: config.scopeMode,
+		storageDir: config.storageDir,
+		knowledgeGraph: raw.knowledgeGraph
+	}).catch(() => void 0);
+	return config;
 }
 const API_KEY_URL = "https://knowhereto.ai/api-keys";
 const PURCHASE_CREDITS_URL = "https://knowhereto.ai/usage?buy=true";
@@ -257,4 +305,4 @@ function resolveKnowledgeGraphConfig(raw) {
 	};
 }
 //#endregion
-export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig };
+export { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, knowherePluginConfigSchema, persistApiKey, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig };

package/dist/graph-builder.d.ts CHANGED Viewed

@@ -62,7 +62,7 @@ export interface ChunkStats {
  * Main function to build knowledge graph
  * Equivalent to Python: build_knowledge_graph
  */
-export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
+export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, _jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
 /**
  * Incremental update: match new chunks against existing chunks
  * Equivalent to Python: _incremental_connections

package/dist/graph-builder.js CHANGED Viewed

@@ -1,12 +1,5 @@
-import * as nodejieba from "nodejieba";
 //#region src/graph-builder.ts
 /**
-* Graph Builder Module
-*
-* TypeScript implementation of knowhere-api/apps/worker/app/services/connect_builder/graph_builder.py
-* Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
-*/
-/**
 * Extract file key from a chunk.
 * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
 */
@@ -27,29 +20,18 @@ function extractLabel(path) {
 * Extract tokens from text using jieba
 * Equivalent to Python: _extract_tokens_from_content
 */
-function extractTokensFromContent(content, jiebaInitialized) {
-	const cleanContent = content.replace(/<[^>]*>/g, " ");
-	if (!jiebaInitialized) return cleanContent.split(/\s+/).filter((w) => w.length > 1);
-	try {
-		return nodejieba.cut(cleanContent).filter((token) => {
-			if (token.length <= 1) return false;
-			if (/^\d+$/.test(token)) return false;
-			if (/^[^\w\u4e00-\u9fa5]+$/.test(token)) return false;
-			return true;
-		});
-	} catch {
-		return cleanContent.split(/\s+/).filter((w) => w.length > 1);
-	}
+function extractTokensFromContent(content) {
+	return content.replace(/<[^>]*>/g, " ").split(/\s+/).filter((w) => w.length > 1);
 }
 /**
 * Compute TF-IDF top keywords for a file
 * Equivalent to Python: _compute_tfidf_top_keywords
 */
-function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized) {
+function computeTfidfTopKeywords(fileChunks, allChunks, topK) {
 	if (fileChunks.length === 0) return [];
 	const fileTokens = [];
 	for (const chunk of fileChunks) {
-		const tokens = extractTokensFromContent(chunk.content, jiebaInitialized);
+		const tokens = extractTokensFromContent(chunk.content);
 		fileTokens.push(...tokens);
 	}
 	if (fileTokens.length === 0) return [];
@@ -60,7 +42,7 @@ function computeTfidfTopKeywords(fileChunks, allChunks, topK, jiebaInitialized)
 	}
 	const docFreq = /* @__PURE__ */ new Map();
 	for (const chunk of allChunks) {
-		const tokens = new Set(extractTokensFromContent(chunk.content, jiebaInitialized).map((t) => t.toLowerCase()));
+		const tokens = new Set(extractTokensFromContent(chunk.content).map((t) => t.toLowerCase()));
 		for (const token of tokens) docFreq.set(token, (docFreq.get(token) || 0) + 1);
 	}
 	const totalDocs = allChunks.length;
@@ -161,7 +143,7 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
 * Main function to build knowledge graph
 * Equivalent to Python: build_knowledge_graph
 */
-function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized, logger, kbId) {
+function buildKnowledgeGraph(chunks, connections, chunkStats, _jiebaInitialized, logger, kbId) {
 	logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
 	const chunkById = /* @__PURE__ */ new Map();
 	for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
@@ -178,7 +160,7 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
 			const type = chunk.metadata.type || "text";
 			typeCount[type] = (typeCount[type] || 0) + 1;
 		}
-		const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10, jiebaInitialized);
+		const topKeywords = computeTfidfTopKeywords(fileChunks, chunks, 10);
 		const importance = computeFileImportance(fileKey, fileChunks, chunks, chunkStats);
 		let topSummary = "";
 		for (const chunk of fileChunks) if (chunk.metadata.summary && typeof chunk.metadata.summary === "string") {

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { knowherePluginConfigSchema, readPersistedApiKey, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
+import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
 import { KnowhereStore } from "./store.js";
 import { createKnowhereToolFactory } from "./tools.js";
 import { KnowledgeGraphService } from "./kg-service.js";
@@ -10,7 +10,7 @@ const plugin = {
 	configSchema: knowherePluginConfigSchema,
 	register(api) {
 		const config = resolveKnowhereConfig(api);
-		const kgConfig = resolveKnowledgeGraphConfig(api.pluginConfig && typeof api.pluginConfig === "object" ? api.pluginConfig : {});
+		const kgConfig = resolveKnowledgeGraphConfig(resolveEffectivePluginConfig(api));
 		const store = new KnowhereStore({
 			rootDir: config.storageDir,
 			scopeMode: config.scopeMode,

package/dist/kg-service.d.ts CHANGED Viewed

@@ -9,11 +9,9 @@ export declare class KnowledgeGraphService {
     private readonly logger;
     private degradationMode;
     private buildQueues;
-    private jiebaInitialized;
     constructor(params: KnowledgeGraphServiceParams);
     initialize(): Promise<void>;
     private checkPythonEnvironment;
-    private initializeJieba;
     extractKeywords(text: string, topK?: number): Promise<string[]>;
     resolveKbId(context: ToolRuntimeContext): string | null;
     getKbPath(kbId: string): string;

package/dist/kg-service.js CHANGED Viewed

@@ -2,10 +2,9 @@ import { resolveStoredKnowhereResultRoot } from "./parser.js";
 import { buildConnections, init_connect_builder } from "./connect-builder.js";
 import { buildKnowledgeGraph } from "./graph-builder.js";
 import path from "node:path";
+import os from "node:os";
 import { spawn } from "node:child_process";
 import fs from "fs-extra";
-import os from "node:os";
-import * as nodejieba from "nodejieba";
 //#region src/kg-service.ts
 init_connect_builder();
 const DEFAULT_CONNECT_CONFIG = {
@@ -26,18 +25,11 @@ const DEFAULT_KG_CONFIG = {
 function formatUnknownError(error) {
 	return error instanceof Error ? error.message : String(error);
 }
-function extractKeywordText(item) {
-	if (typeof item === "string") return item;
-	if (typeof item === "number" || typeof item === "boolean" || typeof item === "bigint") return String(item);
-	if (typeof item === "object" && item !== null && "word" in item && typeof item.word === "string") return item.word;
-	return null;
-}
 var KnowledgeGraphService = class {
 	config;
 	logger;
 	degradationMode = "full";
 	buildQueues = /* @__PURE__ */ new Map();
-	jiebaInitialized = false;
 	constructor(params) {
 		this.config = {
 			...DEFAULT_KG_CONFIG,
@@ -58,18 +50,12 @@ var KnowledgeGraphService = class {
 		}
 		try {
 			await this.checkPythonEnvironment();
-			await this.initializeJieba();
 			this.degradationMode = "full";
 			this.logger.info("Knowledge graph service initialized in full mode");
 		} catch (error) {
 			this.logger.warn(`Knowledge graph initialization failed: ${error instanceof Error ? error.message : String(error)}`);
-			if (error.code === "PYTHON_MISSING") {
-				this.degradationMode = "disabled";
-				this.logger.warn("Python not found, knowledge graph disabled");
-			} else if (error.code === "NODEJIEBA_MISSING") {
-				this.degradationMode = "basic";
-				this.logger.warn("Nodejieba missing, using basic tokenization");
-			} else this.degradationMode = "disabled";
+			this.degradationMode = "disabled";
+			this.logger.warn("Python not found, knowledge graph disabled");
 		}
 	}
 	async checkPythonEnvironment() {
@@ -92,30 +78,8 @@ var KnowledgeGraphService = class {
 			});
 		});
 	}
-	async initializeJieba() {
-		try {
-			nodejieba.load();
-			this.jiebaInitialized = true;
-			this.logger.info("Nodejieba initialized successfully");
-		} catch {
-			const err = /* @__PURE__ */ new Error("Failed to initialize nodejieba");
-			err.code = "NODEJIEBA_MISSING";
-			throw err;
-		}
-	}
 	async extractKeywords(text, topK = 20) {
 		if (this.degradationMode === "disabled") return [];
-		if (this.degradationMode === "full" && this.jiebaInitialized) try {
-			const rawKeywords = nodejieba.extract(text, topK);
-			return (Array.isArray(rawKeywords) ? rawKeywords : []).map((item) => extractKeywordText(item)).filter((keyword) => keyword !== null).filter((kw) => {
-				if (kw.length <= 1) return false;
-				if (/^\d+$/.test(kw)) return false;
-				return true;
-			}).slice(0, topK);
-		} catch (error) {
-			this.logger.warn(`Jieba extraction failed, falling back to basic: ${formatUnknownError(error)}`);
-			this.degradationMode = "basic";
-		}
 		return text.split(/\s+/).filter((w) => w.length > 1).slice(0, topK);
 	}
 	resolveKbId(context) {
@@ -146,8 +110,6 @@ var KnowledgeGraphService = class {
 		await fs.ensureDir(docDir);
 		const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
 		await fs.copy(sourceResultRoot, docDir, { overwrite: true });
-		const keywordsPath = path.join(docDir, "keywords.json");
-		await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
 		const metadataPath = path.join(docDir, "metadata.json");
 		await fs.writeJSON(metadataPath, params.metadata, { spaces: 2 });
 		this.logger.info(`Document saved to knowledge base: kb=${params.kbId} doc=${params.docId}`);
@@ -176,7 +138,7 @@ var KnowledgeGraphService = class {
 			const docPath = path.join(kbPath, doc);
 			if ((await fs.stat(docPath)).isDirectory() && doc !== "knowledge_graph.json" && doc !== "chunk_stats.json" && doc !== "kb_metadata.json") docDirs.push(doc);
 		}
-		if (docDirs.length < 2) {
+		if (docDirs.length < 1) {
 			this.logger.info(`Not enough documents for graph building (need >=2, have ${docDirs.length}), skipping`);
 			return;
 		}
@@ -203,7 +165,7 @@ var KnowledgeGraphService = class {
 			const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
 			let chunkStats = {};
 			if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
-			const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, this.jiebaInitialized, this.logger, kbId);
+			const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, false, this.logger, kbId);
 			const graphFile = path.join(kbPath, "knowledge_graph.json");
 			await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
 			this.logger.info(`Knowledge graph saved to ${graphFile}`);

package/dist/tools.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { KnowhereClient } from "./client.js";
 import { sendTrackerProgress } from "./tracker-progress.js";
 import fs from "node:fs/promises";
 import path from "node:path";
+import os from "node:os";
 //#region src/tools.ts
 const TERMINAL_JOB_STATUSES = new Set([
 	"cancelled",
@@ -784,6 +785,24 @@ function createImportCompletedJobTool(params) {
 				downloadedResult: importResult.downloadedResult
 			}, { overwrite });
 			params.api.logger.info(`knowhere: knowhere_import_completed_job stored imported document scope=${scope.label} jobId=${importResult.jobResult.job_id} docId=${document.id}`);
+			try {
+				const importKbId = params.kgService.resolveKbId(params.ctx);
+				if (importKbId && params.kgService.isEnabled()) {
+					params.api.logger.info(`knowhere: triggering KG build after import kbId=${importKbId}`);
+					buildKnowledgeGraphAsync({
+						kgService: params.kgService,
+						kbId: importKbId,
+						docId: document.id,
+						documentPayload: { downloadedResult: importResult.downloadedResult },
+						scope,
+						store: params.store,
+						ctx: params.ctx,
+						api: params.api
+					}).catch((e) => params.api.logger.error(`knowhere: KG build after import failed: ${formatErrorMessage(e)}`));
+				}
+			} catch (kgError) {
+				params.api.logger.warn(`knowhere: import KG trigger error: ${formatErrorMessage(kgError)}`);
+			}
 			return textResult([
 				"Import complete.",
 				...buildStoredDocumentSummaryLines({
@@ -914,6 +933,380 @@ function createKgQueryTool(params) {
 		}
 	};
 }
+const T2_KNOWHERE_HOME = path.join(os.homedir(), ".knowhere");
+async function t2FindDocDir(kbDir, docName) {
+	const exactPath = path.join(kbDir, docName);
+	try {
+		await fs.access(exactPath);
+		return exactPath;
+	} catch {}
+	let entries;
+	try {
+		entries = await fs.readdir(kbDir, { withFileTypes: true });
+	} catch {
+		return null;
+	}
+	for (const e of entries) if (e.isDirectory() && String(e.name).includes(docName)) return path.join(kbDir, String(e.name));
+	return null;
+}
+async function t2LoadChunks(docDir) {
+	for (const fname of ["chunks_slim.json", "chunks.json"]) try {
+		const raw = await fs.readFile(path.join(docDir, fname), "utf-8");
+		const data = JSON.parse(raw);
+		let chunks;
+		if (Array.isArray(data)) chunks = data;
+		else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
+		else continue;
+		if (fname === "chunks.json") return chunks.map((c) => ({
+			type: c.type || "text",
+			path: c.path || "",
+			content: c.content || "",
+			summary: c.metadata?.summary || c.summary || ""
+		}));
+		return chunks;
+	} catch {
+		continue;
+	}
+	return [];
+}
+async function t2LoadRawChunks(docDir) {
+	try {
+		const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
+		const data = JSON.parse(raw);
+		if (Array.isArray(data)) return data;
+		if (isRecord(data) && Array.isArray(data.chunks)) return data.chunks;
+		return [];
+	} catch {
+		return [];
+	}
+}
+function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
+	const df = {};
+	const tf = {};
+	const totalDocs = rawChunks.length || 1;
+	for (const c of rawChunks) {
+		const tokens = Array.isArray(c.metadata?.tokens) ? c.metadata.tokens : [];
+		const keywords = Array.isArray(c.metadata?.keywords) ? c.metadata.keywords : [];
+		const allTerms = [...tokens, ...keywords];
+		const seen = /* @__PURE__ */ new Set();
+		for (const t of allTerms) {
+			if (!t || t.length <= 1) continue;
+			if (/^\d+[.,%]*$/.test(t)) continue;
+			const lower = t.toLowerCase();
+			tf[lower] = (tf[lower] || 0) + 1;
+			if (!seen.has(lower)) {
+				df[lower] = (df[lower] || 0) + 1;
+				seen.add(lower);
+			}
+		}
+	}
+	const scored = Object.entries(tf).map(([term, freq]) => {
+		return {
+			term,
+			score: freq * (Math.log(totalDocs / (df[term] || 1)) + 1)
+		};
+	});
+	scored.sort((a, b) => b.score - a.score);
+	return scored.slice(0, topK).map((s) => s.term);
+}
+function t2KeywordsNeedRepair(keywords) {
+	if (!Array.isArray(keywords) || keywords.length === 0) return true;
+	let bad = 0;
+	for (const kw of keywords) if (!kw || typeof kw === "string" && (kw.length <= 1 || /^\d+[.,%]*$/.test(kw) || /^[a-z]{1,2}$/i.test(kw))) bad++;
+	return bad >= keywords.length * .5;
+}
+function t2JsonResult(data) {
+	return {
+		content: [{
+			type: "text",
+			text: JSON.stringify(data, null, 2)
+		}],
+		details: {}
+	};
+}
+async function t2ListDocDirs(kbRoot) {
+	let entries;
+	try {
+		entries = await fs.readdir(kbRoot, { withFileTypes: true });
+	} catch {
+		return [];
+	}
+	const docs = [];
+	for (const e of entries) {
+		if (!e.isDirectory()) continue;
+		try {
+			await fs.access(path.join(kbRoot, String(e.name), "chunks.json"));
+			docs.push(String(e.name));
+		} catch {
+			continue;
+		}
+	}
+	return docs;
+}
+function createGetMapTool(_params) {
+	return {
+		name: "knowhere_get_map",
+		label: "Knowhere Get Map",
+		description: "获取知识库全局概览。查询知识时必须先调此工具，了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure 查看具体文档的章节目录。",
+		parameters: {
+			type: "object",
+			additionalProperties: false,
+			properties: { kbId: {
+				type: "string",
+				description: "Optional: specific KB ID. Leave empty to scan all."
+			} }
+		},
+		execute: async (_toolCallId, rawParams) => {
+			const kbId = readString((isRecord(rawParams) ? rawParams : {}).kbId) || "";
+			try {
+				await fs.access(T2_KNOWHERE_HOME);
+			} catch {
+				return textResult(`未找到知识库目录 ${T2_KNOWHERE_HOME}`);
+			}
+			const entries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
+			const kbs = [];
+			for (const e of entries) {
+				if (!e.isDirectory()) continue;
+				if (kbId && e.name !== kbId) continue;
+				const kbRoot = path.join(T2_KNOWHERE_HOME, e.name);
+				const kgPath = path.join(kbRoot, "knowledge_graph.json");
+				try {
+					const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
+					let kgDirty = false;
+					const files = g.files || {};
+					for (const [docName, info] of Object.entries(files)) if (t2KeywordsNeedRepair(info.top_keywords)) {
+						const rawChunks = await t2LoadRawChunks(path.join(kbRoot, docName));
+						if (rawChunks.length > 0) {
+							const repaired = t2ComputeTfIdfKeywords(rawChunks);
+							if (repaired.length > 0) {
+								info.top_keywords = repaired;
+								const types = {};
+								for (const c of rawChunks) {
+									const t = c.type || "text";
+									types[t] = (types[t] || 0) + 1;
+								}
+								info.types = types;
+								info.chunks_count = rawChunks.length;
+								kgDirty = true;
+							}
+						}
+					}
+					if (kgDirty) {
+						g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
+						try {
+							await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
+						} catch {}
+					}
+					kbs.push({
+						kb_id: e.name,
+						version: g.version || "1.0",
+						updated_at: g.updated_at || "",
+						stats: g.stats || {},
+						files: g.files || {},
+						edges: g.edges || []
+					});
+				} catch {
+					const docs = await t2ListDocDirs(kbRoot);
+					if (docs.length > 0) kbs.push({
+						kb_id: e.name,
+						version: "pending",
+						files: Object.fromEntries(docs.map((d) => [d, {}])),
+						edges: []
+					});
+				}
+			}
+			if (kbs.length === 0) return textResult("未找到知识库。");
+			return t2JsonResult({
+				status: "ok",
+				knowledge_bases: kbs
+			});
+		}
+	};
+}
+function createGetStructureTool(_params) {
+	return {
+		name: "knowhere_get_structure",
+		label: "Knowhere Get Structure",
+		description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后，用此工具查看章节结构，然后用 knowhere_read_chunks 读取内容。",
+		parameters: {
+			type: "object",
+			additionalProperties: false,
+			properties: {
+				kbId: {
+					type: "string",
+					description: "Knowledge base ID (from knowhere_get_map result)"
+				},
+				docName: {
+					type: "string",
+					description: "Document name (supports fuzzy match)"
+				}
+			},
+			required: ["kbId", "docName"]
+		},
+		execute: async (_toolCallId, rawParams) => {
+			const paramsRecord = isRecord(rawParams) ? rawParams : {};
+			const kbId = readString(paramsRecord.kbId);
+			const docName = readString(paramsRecord.docName);
+			if (!kbId || !docName) throw new Error("kbId and docName are required.");
+			const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
+			if (!docDir) return textResult(`文档 '${docName}' 在 kb=${kbId} 中不存在`);
+			try {
+				const h = JSON.parse(await fs.readFile(path.join(docDir, "hierarchy.json"), "utf-8"));
+				return t2JsonResult({
+					status: "ok",
+					kb_id: kbId,
+					doc_name: path.basename(docDir),
+					hierarchy: h
+				});
+			} catch {
+				const chunks = await t2LoadChunks(docDir);
+				const paths = [...new Set(chunks.map((c) => c.path).filter(Boolean))].sort();
+				return t2JsonResult({
+					status: "ok",
+					kb_id: kbId,
+					doc_name: path.basename(docDir),
+					hierarchy: null,
+					chunk_paths: paths,
+					hint: "无 hierarchy.json，已返回 chunk 路径列表"
+				});
+			}
+		}
+	};
+}
+function createReadChunksTool(_params) {
+	return {
+		name: "knowhere_read_chunks",
+		label: "Knowhere Read Chunks",
+		description: "读取文档内容。先调 knowhere_get_structure 确定章节后，用此工具读取具体内容。可通过 sectionPath 过滤特定章节，减少 token 消耗。",
+		parameters: {
+			type: "object",
+			additionalProperties: false,
+			properties: {
+				kbId: {
+					type: "string",
+					description: "Knowledge base ID"
+				},
+				docName: {
+					type: "string",
+					description: "Document name"
+				},
+				sectionPath: {
+					type: "string",
+					description: "Optional: section path prefix to filter (e.g. '一、工程概况')"
+				},
+				maxChunks: {
+					type: "number",
+					description: "Max chunks to return (default 50)"
+				}
+			},
+			required: ["kbId", "docName"]
+		},
+		execute: async (_toolCallId, rawParams) => {
+			const paramsRecord = isRecord(rawParams) ? rawParams : {};
+			const kbId = readString(paramsRecord.kbId);
+			const docName = readString(paramsRecord.docName);
+			const sectionPath = readString(paramsRecord.sectionPath);
+			const maxChunks = readNumber(paramsRecord.maxChunks, 50);
+			if (!kbId || !docName) throw new Error("kbId and docName are required.");
+			const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
+			if (!docDir) return textResult(`文档 '${docName}' 不存在`);
+			let chunks = await t2LoadChunks(docDir);
+			if (sectionPath) chunks = chunks.filter((c) => c.path.includes(sectionPath));
+			const total = chunks.length;
+			const limit = maxChunks || 50;
+			chunks = chunks.slice(0, limit);
+			try {
+				const kgPath = path.join(T2_KNOWHERE_HOME, kbId, "knowledge_graph.json");
+				const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
+				const dn = path.basename(docDir);
+				if (g.files?.[dn]) {
+					g.files[dn].hit_count = (g.files[dn].hit_count || 0) + 1;
+					g.files[dn].last_hit = (/* @__PURE__ */ new Date()).toISOString();
+					g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
+					await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
+				}
+			} catch {}
+			return t2JsonResult({
+				status: "ok",
+				kb_id: kbId,
+				doc_name: path.basename(docDir),
+				section_path: sectionPath || null,
+				total_chunks: total,
+				returned: chunks.length,
+				truncated: total > limit,
+				chunks
+			});
+		}
+	};
+}
+function createDiscoverFilesTool(_params) {
+	return {
+		name: "knowhere_discover_files",
+		label: "Knowhere Discover Files",
+		description: "在所有知识库文档中搜索关键词，返回命中文件和次数。用于和 knowhere_get_map 做并集，避免遗漏相关文件。只返回文件名，不返回内容。",
+		parameters: {
+			type: "object",
+			additionalProperties: false,
+			properties: {
+				query: {
+					type: "string",
+					description: "Search keywords"
+				},
+				kbId: {
+					type: "string",
+					description: "Optional: limit to specific KB"
+				}
+			},
+			required: ["query"]
+		},
+		execute: async (_toolCallId, rawParams) => {
+			const paramsRecord = isRecord(rawParams) ? rawParams : {};
+			const query = readString(paramsRecord.query);
+			const kbId = readString(paramsRecord.kbId);
+			if (!query) throw new Error("query is required.");
+			const terms = query.split(/[\s,;，；。！？、\-/]+/).filter((t) => t.length > 1);
+			if (terms.length === 0) return textResult("查询词为空");
+			try {
+				await fs.access(T2_KNOWHERE_HOME);
+			} catch {
+				return textResult("未找到知识库。");
+			}
+			const results = [];
+			const kbEntries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
+			for (const kbE of kbEntries) {
+				if (!kbE.isDirectory()) continue;
+				if (kbId && kbE.name !== kbId) continue;
+				let docEntries;
+				try {
+					docEntries = await fs.readdir(path.join(T2_KNOWHERE_HOME, String(kbE.name)), { withFileTypes: true });
+				} catch {
+					continue;
+				}
+				for (const docE of docEntries) {
+					if (!docE.isDirectory()) continue;
+					const chunks = await t2LoadChunks(path.join(T2_KNOWHERE_HOME, String(kbE.name), String(docE.name)));
+					let hits = 0;
+					for (const c of chunks) {
+						const text = `${c.content} ${c.summary}`;
+						for (const t of terms) if (text.includes(t)) hits++;
+					}
+					if (hits > 0) results.push({
+						kb_id: String(kbE.name),
+						doc_name: String(docE.name),
+						hit_count: hits
+					});
+				}
+			}
+			results.sort((a, b) => b.hit_count - a.hit_count);
+			return t2JsonResult({
+				status: "ok",
+				query,
+				terms,
+				discovered_files: results
+			});
+		}
+	};
+}
 function createKnowhereToolFactory(params) {
 	return (ctx) => [
 		createIngestTool({
@@ -939,6 +1332,7 @@ function createKnowhereToolFactory(params) {
 			api: params.api,
 			config: params.config,
 			store: params.store,
+			kgService: params.kgService,
 			ctx
 		}),
 		createSetApiKeyTool({
@@ -954,7 +1348,11 @@ function createKnowhereToolFactory(params) {
 			api: params.api,
 			kgService: params.kgService,
 			ctx
-		})
+		}),
+		createGetMapTool({ api: params.api }),
+		createGetStructureTool({ api: params.api }),
+		createReadChunksTool({ api: params.api }),
+		createDiscoverFilesTool({ api: params.api })
 	];
 }
 //#endregion

package/openclaw.plugin.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "name": "Knowhere",
   "description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
   "skills": ["./skills"],
-  "version": "0.2.4",
+  "version": "0.2.5",
   "uiHints": {
     "apiKey": {
       "label": "Knowhere API Key",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ontos-ai/knowhere-claw",
-  "version": "0.2.4",
+  "version": "0.2.5",
   "description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
   "files": [
     "dist/",
@@ -40,8 +40,7 @@
   "dependencies": {
     "@knowhere-ai/sdk": "^0.1.1",
     "fflate": "^0.8.2",
-    "fs-extra": "^11.2.0",
-    "nodejieba": "^2.6.0"
+    "fs-extra": "^11.2.0"
   },
   "devDependencies": {
     "@changesets/changelog-github": "^0.6.0",