@ontos-ai/knowhere-claw 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -122,6 +122,12 @@ Within each scope, the plugin keeps:
122
122
  4. When needed, the agent can preview structure, search chunks, read raw result
123
123
  files, or clear stored documents.
124
124
 
125
+ ## Schema v2.1 Media Handling
126
+
127
+ - `knowhere_read_chunks` now treats `[images/...]` and `[tables/...]` path references in `chunks.json` content as the primary media enrichment path.
128
+ - Standalone `image` and `table` chunks resolve their real asset locations from `metadata.file_path`.
129
+ - Assets without `metadata.file_path` are ignored by the runtime enrichment and delivery pipeline.
130
+
125
131
  ## Troubleshooting
126
132
 
127
133
  - Missing API key: `apiKey` config is optional. You can set
@@ -0,0 +1 @@
1
+ export {};
@@ -22,9 +22,13 @@ const KNOWHERE_PROMPT_CONTEXT = [
22
22
  "This typically means the source file was corrupt, empty, or required authentication that was not provided.",
23
23
  "",
24
24
  "### Knowledge Retrieval",
25
- "When answering questions about documents or the knowledge base:",
26
- "- ✅ Use `knowhere_get_map`, `knowhere_get_structure`, `knowhere_read_chunks`, `knowhere_kg_query`",
25
+ "Use this **single retrieval path** do not skip steps:",
26
+ "1. `knowhere_get_map` get the KG overview: which files exist, their keywords, importance, and cross-file edges.",
27
+ "2. `knowhere_get_structure` — inspect the chapter/section hierarchy of a specific document.",
28
+ "3. `knowhere_read_chunks` — fetch content for a specific section (use `sectionPath` to narrow scope).",
29
+ "If you're unsure which file contains the answer, also call `knowhere_discover_files` for keyword-based file discovery.",
27
30
  "- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`",
31
+ "- ❌ Do NOT skip `knowhere_get_map` and jump directly to `knowhere_read_chunks`",
28
32
  "",
29
33
  "### 📷 Image Delivery",
30
34
  "**`knowhere_read_chunks` has built-in automatic image delivery.** When it returns chunks containing images,",
package/dist/index.js CHANGED
@@ -45,11 +45,10 @@ const plugin = {
45
45
  "knowhere_get_job_status",
46
46
  "knowhere_import_completed_job",
47
47
  "knowhere_set_api_key",
48
- "knowhere_kg_list",
49
- "knowhere_kg_query",
50
48
  "knowhere_get_map",
51
49
  "knowhere_get_structure",
52
50
  "knowhere_read_chunks",
51
+ "knowhere_view_image",
53
52
  "knowhere_discover_files",
54
53
  "knowhere_delete_document"
55
54
  ] });
package/dist/tools.d.ts CHANGED
@@ -2,9 +2,69 @@ import { type AnyAgentTool, type OpenClawPluginApi } from "openclaw/plugin-sdk/c
2
2
  import { KnowhereStore } from "./store";
3
3
  import type { KnowledgeGraphService } from "./kg-service";
4
4
  import type { ResolvedKnowhereConfig, ToolRuntimeContext } from "./types";
5
+ interface T2ChunkRelation {
6
+ relation?: string;
7
+ target?: string;
8
+ ref?: string;
9
+ [key: string]: unknown;
10
+ }
11
+ interface T2ChunkMetadata {
12
+ summary?: string;
13
+ keywords?: string[];
14
+ tokens?: string[];
15
+ file_path?: string;
16
+ connect_to?: T2ChunkRelation[];
17
+ [key: string]: unknown;
18
+ }
19
+ interface T2ChunkSlim {
20
+ chunk_id?: string;
21
+ type: string;
22
+ path: string;
23
+ content: string;
24
+ summary: string;
25
+ file_path?: string;
26
+ connect_to?: T2ChunkRelation[];
27
+ metadata?: T2ChunkMetadata;
28
+ }
29
+ interface T2EnrichResult {
30
+ chunks: T2ChunkSlim[];
31
+ /** Image paths that were inlined into text via placeholder replacement (need delivery). */
32
+ inlinedImagePaths: ReadonlySet<string>;
33
+ }
34
+ /**
35
+ * Runtime-only enrichment of chunks returned to the AI:
36
+ * 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
37
+ * 2. Normalize standalone image/table chunks to file_path-based content
38
+ * 3. Remove standalone table/image chunks that were already inlined into text
39
+ *
40
+ * Does NOT modify chunks.json on disk.
41
+ */
42
+ declare function t2EnrichChunks(chunks: T2ChunkSlim[], docDir: string): Promise<T2EnrichResult>;
43
+ interface T2ResolvedAsset {
44
+ chunk_id: string;
45
+ type: "image" | "table";
46
+ relative_path: string;
47
+ summary: string;
48
+ mode: "image_sent" | "image_failed" | "table_inline";
49
+ html_content?: string;
50
+ }
51
+ declare function t2ResolveAssets(params: {
52
+ api: OpenClawPluginApi;
53
+ store: KnowhereStore;
54
+ ctx: ToolRuntimeContext;
55
+ docDir: string;
56
+ returnedChunks: T2ChunkSlim[];
57
+ /** Image paths inlined by t2EnrichChunks that still need channel delivery. */
58
+ enrichedImagePaths?: ReadonlySet<string>;
59
+ }): Promise<T2ResolvedAsset[]>;
5
60
  export declare function createKnowhereToolFactory(params: {
6
61
  api: OpenClawPluginApi;
7
62
  config: ResolvedKnowhereConfig;
8
63
  store: KnowhereStore;
9
64
  kgService: KnowledgeGraphService;
10
65
  }): (ctx: ToolRuntimeContext) => AnyAgentTool[];
66
+ export declare const __internal: {
67
+ t2EnrichChunks: typeof t2EnrichChunks;
68
+ t2ResolveAssets: typeof t2ResolveAssets;
69
+ };
70
+ export {};
package/dist/tools.js CHANGED
@@ -964,13 +964,7 @@ async function t2LoadChunks(docDir) {
964
964
  if (Array.isArray(data)) chunks = data;
965
965
  else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
966
966
  else continue;
967
- if (fname === "chunks.json") return chunks.map((c) => ({
968
- type: c.type || "text",
969
- path: c.path || "",
970
- content: c.content || "",
971
- summary: c.metadata?.summary || c.summary || ""
972
- }));
973
- return chunks;
967
+ return chunks.map((c) => t2ToSlimChunk(c));
974
968
  } catch {
975
969
  continue;
976
970
  }
@@ -979,7 +973,68 @@ async function t2LoadChunks(docDir) {
979
973
  function t2NormalizePath(s) {
980
974
  return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
981
975
  }
982
- const PLACEHOLDER_RE = /(?:IMAGE|TABLE)_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
976
+ const PATH_REF_RE = /\[((?:images|tables)\/[^\]\n]+)\]/g;
977
+ function t2ReadConnectTo(value) {
978
+ if (!Array.isArray(value)) return;
979
+ const relations = value.filter(isRecord);
980
+ return relations.length > 0 ? relations : void 0;
981
+ }
982
+ function t2GetChunkFilePath(chunk) {
983
+ if (typeof chunk.file_path === "string" && chunk.file_path) return chunk.file_path;
984
+ if (typeof chunk.metadata?.file_path === "string" && chunk.metadata.file_path) return chunk.metadata.file_path;
985
+ }
986
+ function t2GetChunkAssetPath(chunk) {
987
+ return t2GetChunkFilePath(chunk);
988
+ }
989
+ function t2ToSlimChunk(chunk) {
990
+ const connectTo = t2ReadConnectTo(chunk.metadata?.connect_to);
991
+ const filePath = t2GetChunkFilePath(chunk);
992
+ return {
993
+ chunk_id: chunk.chunk_id || void 0,
994
+ type: chunk.type || "text",
995
+ path: chunk.path || "",
996
+ content: chunk.content || "",
997
+ summary: chunk.metadata?.summary || chunk.summary || "",
998
+ file_path: filePath,
999
+ connect_to: connectTo,
1000
+ metadata: chunk.metadata ? {
1001
+ ...chunk.metadata,
1002
+ file_path: filePath,
1003
+ connect_to: connectTo
1004
+ } : void 0
1005
+ };
1006
+ }
1007
+ function t2HydrateChunk(chunk, idToRaw, pathToRaw) {
1008
+ const raw = (chunk.chunk_id ? idToRaw.get(chunk.chunk_id) : void 0) || (chunk.path ? pathToRaw.get(chunk.path) : void 0);
1009
+ if (!raw) return chunk;
1010
+ const rawFilePath = t2GetChunkFilePath(raw);
1011
+ const connectTo = chunk.connect_to || t2ReadConnectTo(raw.metadata?.connect_to);
1012
+ return {
1013
+ ...chunk,
1014
+ chunk_id: chunk.chunk_id || raw.chunk_id,
1015
+ file_path: chunk.file_path || rawFilePath,
1016
+ connect_to: connectTo,
1017
+ metadata: {
1018
+ ...raw.metadata || {},
1019
+ ...chunk.metadata || {},
1020
+ file_path: chunk.file_path || rawFilePath,
1021
+ connect_to: connectTo
1022
+ }
1023
+ };
1024
+ }
1025
+ async function t2ReadTableHtml(docDir, relativePath) {
1026
+ try {
1027
+ return await fs.readFile(path.join(docDir, relativePath), "utf-8");
1028
+ } catch {
1029
+ return null;
1030
+ }
1031
+ }
1032
+ function t2HasUnresolvedMediaReference(text) {
1033
+ PATH_REF_RE.lastIndex = 0;
1034
+ const hasPathRef = PATH_REF_RE.test(text);
1035
+ PATH_REF_RE.lastIndex = 0;
1036
+ return hasPathRef;
1037
+ }
983
1038
  async function t2LoadRawChunks(docDir) {
984
1039
  try {
985
1040
  const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
@@ -993,62 +1048,59 @@ async function t2LoadRawChunks(docDir) {
993
1048
  }
994
1049
  /**
995
1050
  * Runtime-only enrichment of chunks returned to the AI:
996
- * 1. Replace IMAGE_uuid_IMAGE placeholders with [📷 path] in text chunks
997
- * 2. Replace TABLE_uuid_TABLE placeholders with actual HTML content in text chunks
998
- * 3. Remove standalone table chunks that were inlined via placeholders
999
- * 4. Strip self-referencing placeholders from image/table chunk content & summary
1051
+ * 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
1052
+ * 2. Normalize standalone image/table chunks to file_path-based content
1053
+ * 3. Remove standalone table/image chunks that were already inlined into text
1000
1054
  *
1001
1055
  * Does NOT modify chunks.json on disk.
1002
1056
  */
1003
1057
  async function t2EnrichChunks(chunks, docDir) {
1004
1058
  const rawChunks = await t2LoadRawChunks(docDir);
1005
1059
  const idToRaw = /* @__PURE__ */ new Map();
1006
- for (const rc of rawChunks) if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
1007
- const manifestPaths = /* @__PURE__ */ new Map();
1008
- try {
1009
- const manifest = JSON.parse(await fs.readFile(path.join(docDir, "manifest.json"), "utf-8"));
1010
- if (isRecord(manifest) && isRecord(manifest.files)) {
1011
- const files = manifest.files;
1012
- for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
1013
- type: "image",
1014
- filePath: entry.file_path
1015
- });
1016
- for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
1017
- type: "table",
1018
- filePath: entry.file_path
1019
- });
1020
- }
1021
- } catch {}
1060
+ const pathToRaw = /* @__PURE__ */ new Map();
1061
+ for (const rc of rawChunks) {
1062
+ if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
1063
+ if (rc.path) pathToRaw.set(rc.path, rc);
1064
+ }
1065
+ chunks = chunks.map((chunk) => t2HydrateChunk(chunk, idToRaw, pathToRaw));
1022
1066
  const inlinedTablePaths = /* @__PURE__ */ new Set();
1023
1067
  const inlinedImagePaths = /* @__PURE__ */ new Set();
1024
1068
  for (const chunk of chunks) {
1025
- if (!chunk.content) continue;
1026
- PLACEHOLDER_RE.lastIndex = 0;
1027
- if (PLACEHOLDER_RE.test(chunk.content)) {
1028
- PLACEHOLDER_RE.lastIndex = 0;
1029
- chunk.content = await replacePlaceholders(chunk.content, idToRaw, docDir, chunk.type === "text" ? inlinedTablePaths : void 0, chunk.type === "text" ? inlinedImagePaths : void 0, manifestPaths);
1030
- PLACEHOLDER_RE.lastIndex = 0;
1031
- if (chunk.type !== "text" && chunk.path && PLACEHOLDER_RE.test(chunk.content)) {
1032
- if (chunk.type === "table") try {
1033
- const html = await fs.readFile(path.join(docDir, chunk.path), "utf-8");
1034
- chunk.content = chunk.content.replace(PLACEHOLDER_RE, `\n${html.slice(0, 8e3)}\n`);
1035
- } catch {
1036
- chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
1037
- }
1038
- else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
1039
- }
1069
+ const relativePath = t2GetChunkAssetPath(chunk);
1070
+ if (!relativePath) continue;
1071
+ chunk.file_path = relativePath;
1072
+ chunk.metadata = {
1073
+ ...chunk.metadata || {},
1074
+ file_path: relativePath,
1075
+ connect_to: chunk.connect_to
1076
+ };
1077
+ if (chunk.type === "image") {
1078
+ chunk.content = `[📷 ${relativePath}]`;
1079
+ continue;
1040
1080
  }
1041
- if (chunk.summary) {
1042
- PLACEHOLDER_RE.lastIndex = 0;
1043
- if (PLACEHOLDER_RE.test(chunk.summary)) {
1044
- PLACEHOLDER_RE.lastIndex = 0;
1045
- chunk.summary = await replacePlaceholders(chunk.summary, idToRaw, docDir, void 0, void 0, manifestPaths);
1081
+ if (chunk.type === "table") {
1082
+ const html = await t2ReadTableHtml(docDir, relativePath);
1083
+ if (html) chunk.content = html.slice(0, 8e3);
1084
+ else if (!chunk.content) chunk.content = `[📊 ${relativePath}]`;
1085
+ }
1086
+ }
1087
+ for (const chunk of chunks) {
1088
+ const relativePath = t2GetChunkAssetPath(chunk);
1089
+ if (chunk.content) {
1090
+ if (chunk.type === "text") chunk.content = await replacePathReferences(chunk.content, docDir, inlinedTablePaths, inlinedImagePaths);
1091
+ if (chunk.type !== "text" && relativePath && t2HasUnresolvedMediaReference(chunk.content)) {
1092
+ if (chunk.type === "table") {
1093
+ const html = await t2ReadTableHtml(docDir, relativePath);
1094
+ chunk.content = html ? html.slice(0, 8e3) : `[📊 ${relativePath}]`;
1095
+ } else if (chunk.type === "image") chunk.content = `[📷 ${relativePath}]`;
1046
1096
  }
1047
1097
  }
1098
+ if (chunk.summary) chunk.summary = await replacePathReferences(chunk.summary, docDir);
1048
1099
  }
1049
1100
  chunks = chunks.filter((c) => {
1050
- if (c.type === "table" && inlinedTablePaths.has(c.path)) return false;
1051
- if (c.type === "image" && inlinedImagePaths.has(c.path)) return false;
1101
+ const relativePath = t2GetChunkAssetPath(c) || "";
1102
+ if (c.type === "table" && relativePath && inlinedTablePaths.has(relativePath)) return false;
1103
+ if (c.type === "image" && relativePath && inlinedImagePaths.has(relativePath)) return false;
1052
1104
  return true;
1053
1105
  });
1054
1106
  return {
@@ -1056,50 +1108,33 @@ async function t2EnrichChunks(chunks, docDir) {
1056
1108
  inlinedImagePaths
1057
1109
  };
1058
1110
  }
1059
- async function replacePlaceholders(text, idToRaw, docDir, inlinedTablePaths, inlinedImagePaths, manifestPaths) {
1111
+ async function replacePathReferences(text, docDir, inlinedTablePaths, inlinedImagePaths) {
1060
1112
  const matches = [];
1061
- const re = /(?:(IMAGE|TABLE))_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
1062
- let m;
1063
- while ((m = re.exec(text)) !== null) matches.push({
1064
- full: m[0],
1065
- type: m[1],
1066
- id: m[2],
1067
- start: m.index,
1068
- end: m.index + m[0].length
1113
+ let match;
1114
+ PATH_REF_RE.lastIndex = 0;
1115
+ while ((match = PATH_REF_RE.exec(text)) !== null) matches.push({
1116
+ full: match[0],
1117
+ relativePath: match[1],
1118
+ start: match.index,
1119
+ end: match.index + match[0].length
1069
1120
  });
1121
+ PATH_REF_RE.lastIndex = 0;
1070
1122
  if (matches.length === 0) return text;
1071
1123
  const replacements = [];
1072
- for (const match of matches) {
1073
- const raw = idToRaw.get(match.id);
1074
- let resolvedPath = raw?.path;
1075
- if (!resolvedPath && manifestPaths) {
1076
- const mEntry = manifestPaths.get(match.id);
1077
- if (mEntry) resolvedPath = mEntry.filePath;
1078
- }
1079
- if (!resolvedPath) {
1080
- replacements.push(match.full);
1124
+ for (const ref of matches) {
1125
+ if (ref.relativePath.startsWith("images/")) {
1126
+ replacements.push(`[📷 ${ref.relativePath}]`);
1127
+ inlinedImagePaths?.add(ref.relativePath);
1081
1128
  continue;
1082
1129
  }
1083
- if (match.type === "IMAGE") {
1084
- replacements.push(`[📷 ${resolvedPath}]`);
1085
- inlinedImagePaths?.add(resolvedPath);
1086
- } else {
1087
- const htmlPath = path.join(docDir, resolvedPath);
1088
- try {
1089
- const html = await fs.readFile(htmlPath, "utf-8");
1090
- replacements.push(`\n${html.slice(0, 8e3)}\n`);
1091
- inlinedTablePaths?.add(resolvedPath);
1092
- } catch {
1093
- const tableContent = raw?.content || "";
1094
- if (tableContent && tableContent.includes("<")) {
1095
- replacements.push(`\n${tableContent}\n`);
1096
- inlinedTablePaths?.add(resolvedPath);
1097
- } else replacements.push(`[📊 ${resolvedPath}]`);
1098
- }
1099
- }
1130
+ const html = await t2ReadTableHtml(docDir, ref.relativePath);
1131
+ if (html) {
1132
+ replacements.push(`\n${html.slice(0, 8e3)}\n`);
1133
+ inlinedTablePaths?.add(ref.relativePath);
1134
+ } else replacements.push(`[📊 ${ref.relativePath}]`);
1100
1135
  }
1101
1136
  let result = text;
1102
- for (let i = matches.length - 1; i >= 0; i--) result = result.slice(0, matches[i].start) + replacements[i] + result.slice(matches[i].end);
1137
+ for (let index = matches.length - 1; index >= 0; index -= 1) result = result.slice(0, matches[index].start) + replacements[index] + result.slice(matches[index].end);
1103
1138
  return result;
1104
1139
  }
1105
1140
  function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
@@ -1221,14 +1256,17 @@ async function t2ResolveAssets(params) {
1221
1256
  params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
1222
1257
  }
1223
1258
  };
1224
- for (const chunk of params.returnedChunks) if ((chunk.type === "image" || chunk.type === "table") && chunk.path) {
1225
- if (chunk.type === "table" && chunk.content && !/TABLE_[a-f0-9-]+_TABLE/.test(chunk.content)) continue;
1226
- await resolveOne({
1227
- chunkId: chunk.path,
1228
- type: chunk.type,
1229
- relativePath: chunk.path,
1230
- summary: chunk.summary || chunk.content?.slice(0, 200) || ""
1231
- });
1259
+ for (const chunk of params.returnedChunks) {
1260
+ const relativePath = t2GetChunkAssetPath(chunk);
1261
+ if ((chunk.type === "image" || chunk.type === "table") && relativePath) {
1262
+ if (chunk.type === "table" && chunk.content && !t2HasUnresolvedMediaReference(chunk.content)) continue;
1263
+ await resolveOne({
1264
+ chunkId: chunk.chunk_id || relativePath,
1265
+ type: chunk.type,
1266
+ relativePath,
1267
+ summary: chunk.summary || chunk.content?.slice(0, 200) || ""
1268
+ });
1269
+ }
1232
1270
  }
1233
1271
  if (params.enrichedImagePaths && params.enrichedImagePaths.size > 0) for (const relativePath of params.enrichedImagePaths) {
1234
1272
  if (processedPaths.has(path.join(params.docDir, relativePath))) continue;
package/dist/types.d.ts CHANGED
@@ -232,7 +232,8 @@ export interface FileEdge {
232
232
  }>;
233
233
  }
234
234
  /**
235
- * File metadata in knowledge graph (matches SKILL.md schema)
235
+ * File metadata in knowledge graph (v2.0 schema — matches graph-builder.ts output).
236
+ * `hit_count` and `last_hit` are maintained at runtime by `knowhere_read_chunks`.
236
237
  */
237
238
  export interface FileMetadata {
238
239
  chunks_count: number;
@@ -240,12 +241,22 @@ export interface FileMetadata {
240
241
  top_keywords: string[];
241
242
  top_summary: string;
242
243
  importance: number;
244
+ /** ISO timestamp of when this file entry was first created in the graph. */
245
+ created_at: string;
246
+ /** Number of times chunks from this file have been read via knowhere_read_chunks. */
247
+ hit_count?: number;
248
+ /** ISO timestamp of the last knowhere_read_chunks access for this file. */
249
+ last_hit?: string;
243
250
  }
244
251
  /**
245
- * Knowledge graph structure (matches SKILL.md schema)
252
+ * Knowledge graph structure (v2.0 schema — matches graph-builder.ts output).
246
253
  */
247
254
  export interface KnowledgeGraph {
248
255
  version: string;
256
+ /** ISO timestamp of the last graph build or partial update. */
257
+ updated_at: string;
258
+ /** Knowledge base ID this graph belongs to. */
259
+ kb_id: string;
249
260
  stats: {
250
261
  total_files: number;
251
262
  total_chunks: number;
@@ -3,7 +3,7 @@
3
3
  "name": "Knowhere",
4
4
  "description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
5
5
  "skills": ["./skills"],
6
- "version": "0.2.8",
6
+ "version": "0.2.9",
7
7
  "uiHints": {
8
8
  "apiKey": {
9
9
  "label": "Knowhere API Key",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontos-ai/knowhere-claw",
3
- "version": "0.2.8",
3
+ "version": "0.2.9",
4
4
  "description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
5
5
  "files": [
6
6
  "dist/",
@@ -89,17 +89,19 @@ All knowledge data lives under `~/.knowhere/{kb_id}/`:
89
89
  └── {docId} → ../global/documents/{docId} # Symlink to Store
90
90
  ```
91
91
 
92
- ### Strategy: Prefer tools, fall back to files
92
+ ### Strategy: Use the Tier-2 retrieval tools
93
93
 
94
- #### If `knowhere_kg_list` / `knowhere_kg_query` tools are available use them
94
+ The canonical retrieval path is **always** the Tier-2 tool chain — do not skip steps:
95
95
 
96
- These tools provide efficient access to the knowledge graph:
96
+ 1. `knowhere_get_map` get the full KG overview: which files exist, their keywords, importance scores, and cross-file edges. Pass `kbId` if known, or leave empty to scan all knowledge bases.
97
+ 2. `knowhere_discover_files` — if you're unsure which file contains the answer, run a keyword search across all KB documents and merge with the `get_map` results.
98
+ 3. `knowhere_get_structure` — inspect the chapter/section hierarchy of the most relevant document.
99
+ 4. `knowhere_read_chunks` — fetch the actual content. Use `sectionPath` to narrow to the specific chapter and minimize token usage.
97
100
 
98
- 1. `knowhere_kg_list` list all available knowledge bases
99
- 2. `knowhere_kg_query(kbId)` returns the full knowledge graph (files, keywords, edges)
100
- 3. Then read individual `chunks.json` files with your file reading tool for detailed content
101
+ Do **not** use `exec` or shell commands to read `~/.knowhere/` files directly.
102
+ Do **not** skip `knowhere_get_map` and jump straight to `knowhere_read_chunks`.
101
103
 
102
- #### If no KG tools are available → self-navigate using file tools
104
+ #### If no Knowhere tools are available → self-navigate using file tools
103
105
 
104
106
  Follow this pattern — do NOT explore the filesystem blindly:
105
107
 
@@ -115,13 +117,17 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
115
117
  ```json
116
118
  {
117
119
  "version": "2.0",
118
- "stats": { "total_files": 5, "total_chunks": 327 },
120
+ "updated_at": "2026-04-09T10:00:00.000Z",
121
+ "kb_id": "telegram",
122
+ "stats": { "total_files": 5, "total_chunks": 327, "total_cross_file_edges": 12 },
119
123
  "files": {
120
124
  "report.docx": {
121
125
  "chunks_count": 198,
122
126
  "types": { "text": 135, "table": 21, "image": 42 },
123
127
  "top_keywords": ["excavation", "retaining", "construction"],
124
- "importance": 0.85
128
+ "top_summary": "Construction safety report for the Lujiazui project.",
129
+ "importance": 0.85,
130
+ "created_at": "2026-04-09T08:00:00.000Z"
125
131
  }
126
132
  },
127
133
  "edges": [
@@ -129,8 +135,16 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
129
135
  "source": "file_A.docx",
130
136
  "target": "file_B.pdf",
131
137
  "connection_count": 20,
138
+ "avg_score": 0.91,
132
139
  "top_connections": [
133
- { "source_chunk": "Chapter 3", "target_chunk": "Safety Policy", "score": 1.0 }
140
+ {
141
+ "source_chunk": "Chapter 3",
142
+ "source_id": "uuid-a",
143
+ "target_chunk": "Safety Policy",
144
+ "target_id": "uuid-b",
145
+ "relation": "keyword",
146
+ "score": 1.0
147
+ }
134
148
  ]
135
149
  }
136
150
  ]
@@ -179,8 +193,8 @@ Check `edges` from Step 1 for cross-document connections. If related files weren
179
193
 
180
194
  When the user asks to "delete", "remove", or "forget" a specific document:
181
195
 
182
- 1. Use `knowhere_kg_query` to search the Knowledge Graph to find the correct `docId` that uniquely identifies the document.
183
- 2. If the user provided a filename, use it to disambiguate and cross-check multiple hits.
196
+ 1. Use `knowhere_get_map` to get an overview of all files in the knowledge base, then identify the correct `docId` that uniquely corresponds to the document the user named.
197
+ 2. If the user provided a filename, use it to disambiguate across multiple hits.
184
198
  3. Call `knowhere_delete_document` with the discovered `docId`.
185
199
 
186
200
  The `knowhere_delete_document` tool natively handles all internal consistency logic: