@ontos-ai/knowhere-claw 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/__tests__/read-chunks-schema-v21.test.d.ts +1 -0
- package/dist/agent-hooks.js +6 -2
- package/dist/index.js +1 -2
- package/dist/tools.d.ts +60 -0
- package/dist/tools.js +132 -94
- package/dist/types.d.ts +13 -2
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/skills/knowhere_memory/SKILL.md +26 -12
package/README.md
CHANGED
|
@@ -122,6 +122,12 @@ Within each scope, the plugin keeps:
|
|
|
122
122
|
4. When needed, the agent can preview structure, search chunks, read raw result
|
|
123
123
|
files, or clear stored documents.
|
|
124
124
|
|
|
125
|
+
## Schema v2.1 Media Handling
|
|
126
|
+
|
|
127
|
+
- `knowhere_read_chunks` now treats `[images/...]` and `[tables/...]` path references in `chunks.json` content as the primary media enrichment path.
|
|
128
|
+
- Standalone `image` and `table` chunks resolve their real asset locations from `metadata.file_path`.
|
|
129
|
+
- Assets without `metadata.file_path` are ignored by the runtime enrichment and delivery pipeline.
|
|
130
|
+
|
|
125
131
|
## Troubleshooting
|
|
126
132
|
|
|
127
133
|
- Missing API key: `apiKey` config is optional. You can set
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/agent-hooks.js
CHANGED
|
@@ -22,9 +22,13 @@ const KNOWHERE_PROMPT_CONTEXT = [
|
|
|
22
22
|
"This typically means the source file was corrupt, empty, or required authentication that was not provided.",
|
|
23
23
|
"",
|
|
24
24
|
"### Knowledge Retrieval",
|
|
25
|
-
"
|
|
26
|
-
"
|
|
25
|
+
"Use this **single retrieval path** — do not skip steps:",
|
|
26
|
+
"1. `knowhere_get_map` — get the KG overview: which files exist, their keywords, importance, and cross-file edges.",
|
|
27
|
+
"2. `knowhere_get_structure` — inspect the chapter/section hierarchy of a specific document.",
|
|
28
|
+
"3. `knowhere_read_chunks` — fetch content for a specific section (use `sectionPath` to narrow scope).",
|
|
29
|
+
"If you're unsure which file contains the answer, also call `knowhere_discover_files` for keyword-based file discovery.",
|
|
27
30
|
"- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`",
|
|
31
|
+
"- ❌ Do NOT skip `knowhere_get_map` and jump directly to `knowhere_read_chunks`",
|
|
28
32
|
"",
|
|
29
33
|
"### 📷 Image Delivery",
|
|
30
34
|
"**`knowhere_read_chunks` has built-in automatic image delivery.** When it returns chunks containing images,",
|
package/dist/index.js
CHANGED
|
@@ -45,11 +45,10 @@ const plugin = {
|
|
|
45
45
|
"knowhere_get_job_status",
|
|
46
46
|
"knowhere_import_completed_job",
|
|
47
47
|
"knowhere_set_api_key",
|
|
48
|
-
"knowhere_kg_list",
|
|
49
|
-
"knowhere_kg_query",
|
|
50
48
|
"knowhere_get_map",
|
|
51
49
|
"knowhere_get_structure",
|
|
52
50
|
"knowhere_read_chunks",
|
|
51
|
+
"knowhere_view_image",
|
|
53
52
|
"knowhere_discover_files",
|
|
54
53
|
"knowhere_delete_document"
|
|
55
54
|
] });
|
package/dist/tools.d.ts
CHANGED
|
@@ -2,9 +2,69 @@ import { type AnyAgentTool, type OpenClawPluginApi } from "openclaw/plugin-sdk/c
|
|
|
2
2
|
import { KnowhereStore } from "./store";
|
|
3
3
|
import type { KnowledgeGraphService } from "./kg-service";
|
|
4
4
|
import type { ResolvedKnowhereConfig, ToolRuntimeContext } from "./types";
|
|
5
|
+
interface T2ChunkRelation {
|
|
6
|
+
relation?: string;
|
|
7
|
+
target?: string;
|
|
8
|
+
ref?: string;
|
|
9
|
+
[key: string]: unknown;
|
|
10
|
+
}
|
|
11
|
+
interface T2ChunkMetadata {
|
|
12
|
+
summary?: string;
|
|
13
|
+
keywords?: string[];
|
|
14
|
+
tokens?: string[];
|
|
15
|
+
file_path?: string;
|
|
16
|
+
connect_to?: T2ChunkRelation[];
|
|
17
|
+
[key: string]: unknown;
|
|
18
|
+
}
|
|
19
|
+
interface T2ChunkSlim {
|
|
20
|
+
chunk_id?: string;
|
|
21
|
+
type: string;
|
|
22
|
+
path: string;
|
|
23
|
+
content: string;
|
|
24
|
+
summary: string;
|
|
25
|
+
file_path?: string;
|
|
26
|
+
connect_to?: T2ChunkRelation[];
|
|
27
|
+
metadata?: T2ChunkMetadata;
|
|
28
|
+
}
|
|
29
|
+
interface T2EnrichResult {
|
|
30
|
+
chunks: T2ChunkSlim[];
|
|
31
|
+
/** Image paths that were inlined into text via placeholder replacement (need delivery). */
|
|
32
|
+
inlinedImagePaths: ReadonlySet<string>;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Runtime-only enrichment of chunks returned to the AI:
|
|
36
|
+
* 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
|
|
37
|
+
* 2. Normalize standalone image/table chunks to file_path-based content
|
|
38
|
+
* 3. Remove standalone table/image chunks that were already inlined into text
|
|
39
|
+
*
|
|
40
|
+
* Does NOT modify chunks.json on disk.
|
|
41
|
+
*/
|
|
42
|
+
declare function t2EnrichChunks(chunks: T2ChunkSlim[], docDir: string): Promise<T2EnrichResult>;
|
|
43
|
+
interface T2ResolvedAsset {
|
|
44
|
+
chunk_id: string;
|
|
45
|
+
type: "image" | "table";
|
|
46
|
+
relative_path: string;
|
|
47
|
+
summary: string;
|
|
48
|
+
mode: "image_sent" | "image_failed" | "table_inline";
|
|
49
|
+
html_content?: string;
|
|
50
|
+
}
|
|
51
|
+
declare function t2ResolveAssets(params: {
|
|
52
|
+
api: OpenClawPluginApi;
|
|
53
|
+
store: KnowhereStore;
|
|
54
|
+
ctx: ToolRuntimeContext;
|
|
55
|
+
docDir: string;
|
|
56
|
+
returnedChunks: T2ChunkSlim[];
|
|
57
|
+
/** Image paths inlined by t2EnrichChunks that still need channel delivery. */
|
|
58
|
+
enrichedImagePaths?: ReadonlySet<string>;
|
|
59
|
+
}): Promise<T2ResolvedAsset[]>;
|
|
5
60
|
export declare function createKnowhereToolFactory(params: {
|
|
6
61
|
api: OpenClawPluginApi;
|
|
7
62
|
config: ResolvedKnowhereConfig;
|
|
8
63
|
store: KnowhereStore;
|
|
9
64
|
kgService: KnowledgeGraphService;
|
|
10
65
|
}): (ctx: ToolRuntimeContext) => AnyAgentTool[];
|
|
66
|
+
export declare const __internal: {
|
|
67
|
+
t2EnrichChunks: typeof t2EnrichChunks;
|
|
68
|
+
t2ResolveAssets: typeof t2ResolveAssets;
|
|
69
|
+
};
|
|
70
|
+
export {};
|
package/dist/tools.js
CHANGED
|
@@ -964,13 +964,7 @@ async function t2LoadChunks(docDir) {
|
|
|
964
964
|
if (Array.isArray(data)) chunks = data;
|
|
965
965
|
else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
|
|
966
966
|
else continue;
|
|
967
|
-
|
|
968
|
-
type: c.type || "text",
|
|
969
|
-
path: c.path || "",
|
|
970
|
-
content: c.content || "",
|
|
971
|
-
summary: c.metadata?.summary || c.summary || ""
|
|
972
|
-
}));
|
|
973
|
-
return chunks;
|
|
967
|
+
return chunks.map((c) => t2ToSlimChunk(c));
|
|
974
968
|
} catch {
|
|
975
969
|
continue;
|
|
976
970
|
}
|
|
@@ -979,7 +973,68 @@ async function t2LoadChunks(docDir) {
|
|
|
979
973
|
function t2NormalizePath(s) {
|
|
980
974
|
return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
|
|
981
975
|
}
|
|
982
|
-
const
|
|
976
|
+
const PATH_REF_RE = /\[((?:images|tables)\/[^\]\n]+)\]/g;
|
|
977
|
+
function t2ReadConnectTo(value) {
|
|
978
|
+
if (!Array.isArray(value)) return;
|
|
979
|
+
const relations = value.filter(isRecord);
|
|
980
|
+
return relations.length > 0 ? relations : void 0;
|
|
981
|
+
}
|
|
982
|
+
function t2GetChunkFilePath(chunk) {
|
|
983
|
+
if (typeof chunk.file_path === "string" && chunk.file_path) return chunk.file_path;
|
|
984
|
+
if (typeof chunk.metadata?.file_path === "string" && chunk.metadata.file_path) return chunk.metadata.file_path;
|
|
985
|
+
}
|
|
986
|
+
function t2GetChunkAssetPath(chunk) {
|
|
987
|
+
return t2GetChunkFilePath(chunk);
|
|
988
|
+
}
|
|
989
|
+
function t2ToSlimChunk(chunk) {
|
|
990
|
+
const connectTo = t2ReadConnectTo(chunk.metadata?.connect_to);
|
|
991
|
+
const filePath = t2GetChunkFilePath(chunk);
|
|
992
|
+
return {
|
|
993
|
+
chunk_id: chunk.chunk_id || void 0,
|
|
994
|
+
type: chunk.type || "text",
|
|
995
|
+
path: chunk.path || "",
|
|
996
|
+
content: chunk.content || "",
|
|
997
|
+
summary: chunk.metadata?.summary || chunk.summary || "",
|
|
998
|
+
file_path: filePath,
|
|
999
|
+
connect_to: connectTo,
|
|
1000
|
+
metadata: chunk.metadata ? {
|
|
1001
|
+
...chunk.metadata,
|
|
1002
|
+
file_path: filePath,
|
|
1003
|
+
connect_to: connectTo
|
|
1004
|
+
} : void 0
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
function t2HydrateChunk(chunk, idToRaw, pathToRaw) {
|
|
1008
|
+
const raw = (chunk.chunk_id ? idToRaw.get(chunk.chunk_id) : void 0) || (chunk.path ? pathToRaw.get(chunk.path) : void 0);
|
|
1009
|
+
if (!raw) return chunk;
|
|
1010
|
+
const rawFilePath = t2GetChunkFilePath(raw);
|
|
1011
|
+
const connectTo = chunk.connect_to || t2ReadConnectTo(raw.metadata?.connect_to);
|
|
1012
|
+
return {
|
|
1013
|
+
...chunk,
|
|
1014
|
+
chunk_id: chunk.chunk_id || raw.chunk_id,
|
|
1015
|
+
file_path: chunk.file_path || rawFilePath,
|
|
1016
|
+
connect_to: connectTo,
|
|
1017
|
+
metadata: {
|
|
1018
|
+
...raw.metadata || {},
|
|
1019
|
+
...chunk.metadata || {},
|
|
1020
|
+
file_path: chunk.file_path || rawFilePath,
|
|
1021
|
+
connect_to: connectTo
|
|
1022
|
+
}
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
async function t2ReadTableHtml(docDir, relativePath) {
|
|
1026
|
+
try {
|
|
1027
|
+
return await fs.readFile(path.join(docDir, relativePath), "utf-8");
|
|
1028
|
+
} catch {
|
|
1029
|
+
return null;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
function t2HasUnresolvedMediaReference(text) {
|
|
1033
|
+
PATH_REF_RE.lastIndex = 0;
|
|
1034
|
+
const hasPathRef = PATH_REF_RE.test(text);
|
|
1035
|
+
PATH_REF_RE.lastIndex = 0;
|
|
1036
|
+
return hasPathRef;
|
|
1037
|
+
}
|
|
983
1038
|
async function t2LoadRawChunks(docDir) {
|
|
984
1039
|
try {
|
|
985
1040
|
const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
|
|
@@ -993,62 +1048,59 @@ async function t2LoadRawChunks(docDir) {
|
|
|
993
1048
|
}
|
|
994
1049
|
/**
|
|
995
1050
|
* Runtime-only enrichment of chunks returned to the AI:
|
|
996
|
-
* 1.
|
|
997
|
-
* 2.
|
|
998
|
-
* 3. Remove standalone table chunks that were inlined
|
|
999
|
-
* 4. Strip self-referencing placeholders from image/table chunk content & summary
|
|
1051
|
+
* 1. Prefer Schema v2.1 path refs ([images/...], [tables/...]) in text chunks
|
|
1052
|
+
* 2. Normalize standalone image/table chunks to file_path-based content
|
|
1053
|
+
* 3. Remove standalone table/image chunks that were already inlined into text
|
|
1000
1054
|
*
|
|
1001
1055
|
* Does NOT modify chunks.json on disk.
|
|
1002
1056
|
*/
|
|
1003
1057
|
async function t2EnrichChunks(chunks, docDir) {
|
|
1004
1058
|
const rawChunks = await t2LoadRawChunks(docDir);
|
|
1005
1059
|
const idToRaw = /* @__PURE__ */ new Map();
|
|
1006
|
-
|
|
1007
|
-
const
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
|
|
1013
|
-
type: "image",
|
|
1014
|
-
filePath: entry.file_path
|
|
1015
|
-
});
|
|
1016
|
-
for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
|
|
1017
|
-
type: "table",
|
|
1018
|
-
filePath: entry.file_path
|
|
1019
|
-
});
|
|
1020
|
-
}
|
|
1021
|
-
} catch {}
|
|
1060
|
+
const pathToRaw = /* @__PURE__ */ new Map();
|
|
1061
|
+
for (const rc of rawChunks) {
|
|
1062
|
+
if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
|
|
1063
|
+
if (rc.path) pathToRaw.set(rc.path, rc);
|
|
1064
|
+
}
|
|
1065
|
+
chunks = chunks.map((chunk) => t2HydrateChunk(chunk, idToRaw, pathToRaw));
|
|
1022
1066
|
const inlinedTablePaths = /* @__PURE__ */ new Set();
|
|
1023
1067
|
const inlinedImagePaths = /* @__PURE__ */ new Set();
|
|
1024
1068
|
for (const chunk of chunks) {
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
chunk.
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
|
|
1037
|
-
}
|
|
1038
|
-
else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
|
|
1039
|
-
}
|
|
1069
|
+
const relativePath = t2GetChunkAssetPath(chunk);
|
|
1070
|
+
if (!relativePath) continue;
|
|
1071
|
+
chunk.file_path = relativePath;
|
|
1072
|
+
chunk.metadata = {
|
|
1073
|
+
...chunk.metadata || {},
|
|
1074
|
+
file_path: relativePath,
|
|
1075
|
+
connect_to: chunk.connect_to
|
|
1076
|
+
};
|
|
1077
|
+
if (chunk.type === "image") {
|
|
1078
|
+
chunk.content = `[📷 ${relativePath}]`;
|
|
1079
|
+
continue;
|
|
1040
1080
|
}
|
|
1041
|
-
if (chunk.
|
|
1042
|
-
|
|
1043
|
-
if (
|
|
1044
|
-
|
|
1045
|
-
|
|
1081
|
+
if (chunk.type === "table") {
|
|
1082
|
+
const html = await t2ReadTableHtml(docDir, relativePath);
|
|
1083
|
+
if (html) chunk.content = html.slice(0, 8e3);
|
|
1084
|
+
else if (!chunk.content) chunk.content = `[📊 ${relativePath}]`;
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
for (const chunk of chunks) {
|
|
1088
|
+
const relativePath = t2GetChunkAssetPath(chunk);
|
|
1089
|
+
if (chunk.content) {
|
|
1090
|
+
if (chunk.type === "text") chunk.content = await replacePathReferences(chunk.content, docDir, inlinedTablePaths, inlinedImagePaths);
|
|
1091
|
+
if (chunk.type !== "text" && relativePath && t2HasUnresolvedMediaReference(chunk.content)) {
|
|
1092
|
+
if (chunk.type === "table") {
|
|
1093
|
+
const html = await t2ReadTableHtml(docDir, relativePath);
|
|
1094
|
+
chunk.content = html ? html.slice(0, 8e3) : `[📊 ${relativePath}]`;
|
|
1095
|
+
} else if (chunk.type === "image") chunk.content = `[📷 ${relativePath}]`;
|
|
1046
1096
|
}
|
|
1047
1097
|
}
|
|
1098
|
+
if (chunk.summary) chunk.summary = await replacePathReferences(chunk.summary, docDir);
|
|
1048
1099
|
}
|
|
1049
1100
|
chunks = chunks.filter((c) => {
|
|
1050
|
-
|
|
1051
|
-
if (c.type === "
|
|
1101
|
+
const relativePath = t2GetChunkAssetPath(c) || "";
|
|
1102
|
+
if (c.type === "table" && relativePath && inlinedTablePaths.has(relativePath)) return false;
|
|
1103
|
+
if (c.type === "image" && relativePath && inlinedImagePaths.has(relativePath)) return false;
|
|
1052
1104
|
return true;
|
|
1053
1105
|
});
|
|
1054
1106
|
return {
|
|
@@ -1056,50 +1108,33 @@ async function t2EnrichChunks(chunks, docDir) {
|
|
|
1056
1108
|
inlinedImagePaths
|
|
1057
1109
|
};
|
|
1058
1110
|
}
|
|
1059
|
-
async function
|
|
1111
|
+
async function replacePathReferences(text, docDir, inlinedTablePaths, inlinedImagePaths) {
|
|
1060
1112
|
const matches = [];
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
while ((
|
|
1064
|
-
full:
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
end: m.index + m[0].length
|
|
1113
|
+
let match;
|
|
1114
|
+
PATH_REF_RE.lastIndex = 0;
|
|
1115
|
+
while ((match = PATH_REF_RE.exec(text)) !== null) matches.push({
|
|
1116
|
+
full: match[0],
|
|
1117
|
+
relativePath: match[1],
|
|
1118
|
+
start: match.index,
|
|
1119
|
+
end: match.index + match[0].length
|
|
1069
1120
|
});
|
|
1121
|
+
PATH_REF_RE.lastIndex = 0;
|
|
1070
1122
|
if (matches.length === 0) return text;
|
|
1071
1123
|
const replacements = [];
|
|
1072
|
-
for (const
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
const mEntry = manifestPaths.get(match.id);
|
|
1077
|
-
if (mEntry) resolvedPath = mEntry.filePath;
|
|
1078
|
-
}
|
|
1079
|
-
if (!resolvedPath) {
|
|
1080
|
-
replacements.push(match.full);
|
|
1124
|
+
for (const ref of matches) {
|
|
1125
|
+
if (ref.relativePath.startsWith("images/")) {
|
|
1126
|
+
replacements.push(`[📷 ${ref.relativePath}]`);
|
|
1127
|
+
inlinedImagePaths?.add(ref.relativePath);
|
|
1081
1128
|
continue;
|
|
1082
1129
|
}
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
try {
|
|
1089
|
-
const html = await fs.readFile(htmlPath, "utf-8");
|
|
1090
|
-
replacements.push(`\n${html.slice(0, 8e3)}\n`);
|
|
1091
|
-
inlinedTablePaths?.add(resolvedPath);
|
|
1092
|
-
} catch {
|
|
1093
|
-
const tableContent = raw?.content || "";
|
|
1094
|
-
if (tableContent && tableContent.includes("<")) {
|
|
1095
|
-
replacements.push(`\n${tableContent}\n`);
|
|
1096
|
-
inlinedTablePaths?.add(resolvedPath);
|
|
1097
|
-
} else replacements.push(`[📊 ${resolvedPath}]`);
|
|
1098
|
-
}
|
|
1099
|
-
}
|
|
1130
|
+
const html = await t2ReadTableHtml(docDir, ref.relativePath);
|
|
1131
|
+
if (html) {
|
|
1132
|
+
replacements.push(`\n${html.slice(0, 8e3)}\n`);
|
|
1133
|
+
inlinedTablePaths?.add(ref.relativePath);
|
|
1134
|
+
} else replacements.push(`[📊 ${ref.relativePath}]`);
|
|
1100
1135
|
}
|
|
1101
1136
|
let result = text;
|
|
1102
|
-
for (let
|
|
1137
|
+
for (let index = matches.length - 1; index >= 0; index -= 1) result = result.slice(0, matches[index].start) + replacements[index] + result.slice(matches[index].end);
|
|
1103
1138
|
return result;
|
|
1104
1139
|
}
|
|
1105
1140
|
function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
|
|
@@ -1221,14 +1256,17 @@ async function t2ResolveAssets(params) {
|
|
|
1221
1256
|
params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
|
|
1222
1257
|
}
|
|
1223
1258
|
};
|
|
1224
|
-
for (const chunk of params.returnedChunks)
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1259
|
+
for (const chunk of params.returnedChunks) {
|
|
1260
|
+
const relativePath = t2GetChunkAssetPath(chunk);
|
|
1261
|
+
if ((chunk.type === "image" || chunk.type === "table") && relativePath) {
|
|
1262
|
+
if (chunk.type === "table" && chunk.content && !t2HasUnresolvedMediaReference(chunk.content)) continue;
|
|
1263
|
+
await resolveOne({
|
|
1264
|
+
chunkId: chunk.chunk_id || relativePath,
|
|
1265
|
+
type: chunk.type,
|
|
1266
|
+
relativePath,
|
|
1267
|
+
summary: chunk.summary || chunk.content?.slice(0, 200) || ""
|
|
1268
|
+
});
|
|
1269
|
+
}
|
|
1232
1270
|
}
|
|
1233
1271
|
if (params.enrichedImagePaths && params.enrichedImagePaths.size > 0) for (const relativePath of params.enrichedImagePaths) {
|
|
1234
1272
|
if (processedPaths.has(path.join(params.docDir, relativePath))) continue;
|
package/dist/types.d.ts
CHANGED
|
@@ -232,7 +232,8 @@ export interface FileEdge {
|
|
|
232
232
|
}>;
|
|
233
233
|
}
|
|
234
234
|
/**
|
|
235
|
-
* File metadata in knowledge graph (matches
|
|
235
|
+
* File metadata in knowledge graph (v2.0 schema — matches graph-builder.ts output).
|
|
236
|
+
* `hit_count` and `last_hit` are maintained at runtime by `knowhere_read_chunks`.
|
|
236
237
|
*/
|
|
237
238
|
export interface FileMetadata {
|
|
238
239
|
chunks_count: number;
|
|
@@ -240,12 +241,22 @@ export interface FileMetadata {
|
|
|
240
241
|
top_keywords: string[];
|
|
241
242
|
top_summary: string;
|
|
242
243
|
importance: number;
|
|
244
|
+
/** ISO timestamp of when this file entry was first created in the graph. */
|
|
245
|
+
created_at: string;
|
|
246
|
+
/** Number of times chunks from this file have been read via knowhere_read_chunks. */
|
|
247
|
+
hit_count?: number;
|
|
248
|
+
/** ISO timestamp of the last knowhere_read_chunks access for this file. */
|
|
249
|
+
last_hit?: string;
|
|
243
250
|
}
|
|
244
251
|
/**
|
|
245
|
-
* Knowledge graph structure (matches
|
|
252
|
+
* Knowledge graph structure (v2.0 schema — matches graph-builder.ts output).
|
|
246
253
|
*/
|
|
247
254
|
export interface KnowledgeGraph {
|
|
248
255
|
version: string;
|
|
256
|
+
/** ISO timestamp of the last graph build or partial update. */
|
|
257
|
+
updated_at: string;
|
|
258
|
+
/** Knowledge base ID this graph belongs to. */
|
|
259
|
+
kb_id: string;
|
|
249
260
|
stats: {
|
|
250
261
|
total_files: number;
|
|
251
262
|
total_chunks: number;
|
package/openclaw.plugin.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"name": "Knowhere",
|
|
4
4
|
"description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
|
|
5
5
|
"skills": ["./skills"],
|
|
6
|
-
"version": "0.2.
|
|
6
|
+
"version": "0.2.9",
|
|
7
7
|
"uiHints": {
|
|
8
8
|
"apiKey": {
|
|
9
9
|
"label": "Knowhere API Key",
|
package/package.json
CHANGED
|
@@ -89,17 +89,19 @@ All knowledge data lives under `~/.knowhere/{kb_id}/`:
|
|
|
89
89
|
└── {docId} → ../global/documents/{docId} # Symlink to Store
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
-
### Strategy:
|
|
92
|
+
### Strategy: Use the Tier-2 retrieval tools
|
|
93
93
|
|
|
94
|
-
|
|
94
|
+
The canonical retrieval path is **always** the Tier-2 tool chain — do not skip steps:
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
1. `knowhere_get_map` — get the full KG overview: which files exist, their keywords, importance scores, and cross-file edges. Pass `kbId` if known, or leave empty to scan all knowledge bases.
|
|
97
|
+
2. `knowhere_discover_files` — if you're unsure which file contains the answer, run a keyword search across all KB documents and merge with the `get_map` results.
|
|
98
|
+
3. `knowhere_get_structure` — inspect the chapter/section hierarchy of the most relevant document.
|
|
99
|
+
4. `knowhere_read_chunks` — fetch the actual content. Use `sectionPath` to narrow to the specific chapter and minimize token usage.
|
|
97
100
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
3. Then read individual `chunks.json` files with your file reading tool for detailed content
|
|
101
|
+
❌ Do **not** use `exec` or shell commands to read `~/.knowhere/` files directly.
|
|
102
|
+
❌ Do **not** skip `knowhere_get_map` and jump straight to `knowhere_read_chunks`.
|
|
101
103
|
|
|
102
|
-
#### If no
|
|
104
|
+
#### If no Knowhere tools are available → self-navigate using file tools
|
|
103
105
|
|
|
104
106
|
Follow this pattern — do NOT explore the filesystem blindly:
|
|
105
107
|
|
|
@@ -115,13 +117,17 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
|
|
|
115
117
|
```json
|
|
116
118
|
{
|
|
117
119
|
"version": "2.0",
|
|
118
|
-
"
|
|
120
|
+
"updated_at": "2026-04-09T10:00:00.000Z",
|
|
121
|
+
"kb_id": "telegram",
|
|
122
|
+
"stats": { "total_files": 5, "total_chunks": 327, "total_cross_file_edges": 12 },
|
|
119
123
|
"files": {
|
|
120
124
|
"report.docx": {
|
|
121
125
|
"chunks_count": 198,
|
|
122
126
|
"types": { "text": 135, "table": 21, "image": 42 },
|
|
123
127
|
"top_keywords": ["excavation", "retaining", "construction"],
|
|
124
|
-
"
|
|
128
|
+
"top_summary": "Construction safety report for the Lujiazui project.",
|
|
129
|
+
"importance": 0.85,
|
|
130
|
+
"created_at": "2026-04-09T08:00:00.000Z"
|
|
125
131
|
}
|
|
126
132
|
},
|
|
127
133
|
"edges": [
|
|
@@ -129,8 +135,16 @@ Read `~/.knowhere/{kb_id}/knowledge_graph.json`:
|
|
|
129
135
|
"source": "file_A.docx",
|
|
130
136
|
"target": "file_B.pdf",
|
|
131
137
|
"connection_count": 20,
|
|
138
|
+
"avg_score": 0.91,
|
|
132
139
|
"top_connections": [
|
|
133
|
-
{
|
|
140
|
+
{
|
|
141
|
+
"source_chunk": "Chapter 3",
|
|
142
|
+
"source_id": "uuid-a",
|
|
143
|
+
"target_chunk": "Safety Policy",
|
|
144
|
+
"target_id": "uuid-b",
|
|
145
|
+
"relation": "keyword",
|
|
146
|
+
"score": 1.0
|
|
147
|
+
}
|
|
134
148
|
]
|
|
135
149
|
}
|
|
136
150
|
]
|
|
@@ -179,8 +193,8 @@ Check `edges` from Step 1 for cross-document connections. If related files weren
|
|
|
179
193
|
|
|
180
194
|
When the user asks to "delete", "remove", or "forget" a specific document:
|
|
181
195
|
|
|
182
|
-
1. Use `
|
|
183
|
-
2. If the user provided a filename, use it to disambiguate
|
|
196
|
+
1. Use `knowhere_get_map` to get an overview of all files in the knowledge base, then identify the correct `docId` that uniquely corresponds to the document the user named.
|
|
197
|
+
2. If the user provided a filename, use it to disambiguate across multiple hits.
|
|
184
198
|
3. Call `knowhere_delete_document` with the discovered `docId`.
|
|
185
199
|
|
|
186
200
|
The `knowhere_delete_document` tool natively handles all internal consistency logic:
|