@ontos-ai/knowhere-claw 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-hooks.d.ts +3 -0
- package/dist/agent-hooks.js +46 -0
- package/dist/index.js +9 -2
- package/dist/tools.js +308 -8
- package/openclaw.plugin.json +11 -1
- package/package.json +1 -1
- package/skills/knowhere_memory/SKILL.md +31 -13
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { isRecord } from "./types.js";
|
|
2
|
+
//#region src/agent-hooks.ts
|
|
3
|
+
const KNOWHERE_PROMPT_CONTEXT = [
|
|
4
|
+
"## 🔧 Knowhere Plugin Guidance",
|
|
5
|
+
"",
|
|
6
|
+
"### File Ingestion",
|
|
7
|
+
"When the user sends a file or attachment, or asks to parse/ingest a document,",
|
|
8
|
+
"**always** use `knowhere_ingest_document` to process it.",
|
|
9
|
+
"- If a `[media attached: ...]` marker is present, the file is already on disk — use the `filePath` parameter.",
|
|
10
|
+
"- If the file is in the cloud (e.g. Feishu Drive), first obtain the download URL via the appropriate channel tool, then use the `url` parameter.",
|
|
11
|
+
"- Refer to your **knowhere_memory** skill for the complete step-by-step workflow.",
|
|
12
|
+
"",
|
|
13
|
+
"### Knowledge Retrieval",
|
|
14
|
+
"When answering questions about documents or the knowledge base:",
|
|
15
|
+
"- ✅ Use `knowhere_get_map`, `knowhere_get_structure`, `knowhere_read_chunks`, `knowhere_kg_query`",
|
|
16
|
+
"- ❌ Do NOT use `exec` or shell commands to read files inside `~/.knowhere/`"
|
|
17
|
+
].join("\n");
|
|
18
|
+
const KNOWHERE_DIR_PATTERN = ".knowhere";
|
|
19
|
+
const BLOCK_REASON = "Do not use exec to read .knowhere/ directly. Use knowhere retrieval tools instead: knowhere_get_map, knowhere_get_structure, knowhere_read_chunks, knowhere_kg_query.";
|
|
20
|
+
function resolveAgentHooksConfig(raw) {
|
|
21
|
+
const section = isRecord(raw.agentHooks) ? raw.agentHooks : {};
|
|
22
|
+
return { enabled: typeof section.enabled === "boolean" ? section.enabled : true };
|
|
23
|
+
}
|
|
24
|
+
function registerAgentHooks(api, rawConfig) {
|
|
25
|
+
if (!resolveAgentHooksConfig(rawConfig).enabled) {
|
|
26
|
+
api.logger.info("knowhere: agent hooks disabled via config");
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
api.on("before_prompt_build", async () => {
|
|
30
|
+
return { prependContext: KNOWHERE_PROMPT_CONTEXT };
|
|
31
|
+
});
|
|
32
|
+
api.on("before_tool_call", async (event) => {
|
|
33
|
+
if (event.toolName !== "exec") return;
|
|
34
|
+
const params = isRecord(event.params) ? event.params : {};
|
|
35
|
+
if ((typeof params.command === "string" ? params.command : typeof params.cmd === "string" ? params.cmd : "").includes(KNOWHERE_DIR_PATTERN)) {
|
|
36
|
+
api.logger.info(`knowhere: blocked exec touching ${KNOWHERE_DIR_PATTERN}`);
|
|
37
|
+
return {
|
|
38
|
+
block: true,
|
|
39
|
+
blockReason: BLOCK_REASON
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
api.logger.info("knowhere: agent hooks registered (prompt context + tool governance)");
|
|
44
|
+
}
|
|
45
|
+
//#endregion
|
|
46
|
+
export { registerAgentHooks };
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { registerAgentHooks } from "./agent-hooks.js";
|
|
1
2
|
import { knowherePluginConfigSchema, readPersistedApiKey, resolveEffectivePluginConfig, resolveKnowhereConfig, resolveKnowledgeGraphConfig } from "./config.js";
|
|
2
3
|
import { KnowhereStore } from "./store.js";
|
|
3
4
|
import { createKnowhereToolFactory } from "./tools.js";
|
|
@@ -10,7 +11,9 @@ const plugin = {
|
|
|
10
11
|
configSchema: knowherePluginConfigSchema,
|
|
11
12
|
register(api) {
|
|
12
13
|
const config = resolveKnowhereConfig(api);
|
|
13
|
-
const
|
|
14
|
+
const effectiveRaw = resolveEffectivePluginConfig(api);
|
|
15
|
+
const kgConfig = resolveKnowledgeGraphConfig(effectiveRaw);
|
|
16
|
+
registerAgentHooks(api, effectiveRaw);
|
|
14
17
|
const store = new KnowhereStore({
|
|
15
18
|
rootDir: config.storageDir,
|
|
16
19
|
scopeMode: config.scopeMode,
|
|
@@ -43,7 +46,11 @@ const plugin = {
|
|
|
43
46
|
"knowhere_import_completed_job",
|
|
44
47
|
"knowhere_set_api_key",
|
|
45
48
|
"knowhere_kg_list",
|
|
46
|
-
"knowhere_kg_query"
|
|
49
|
+
"knowhere_kg_query",
|
|
50
|
+
"knowhere_get_map",
|
|
51
|
+
"knowhere_get_structure",
|
|
52
|
+
"knowhere_read_chunks",
|
|
53
|
+
"knowhere_discover_files"
|
|
47
54
|
] });
|
|
48
55
|
}
|
|
49
56
|
};
|
package/dist/tools.js
CHANGED
|
@@ -4,9 +4,11 @@ import { resolveStoredKnowhereArtifactPath } from "./parser.js";
|
|
|
4
4
|
import { sanitizeStringArray } from "./text.js";
|
|
5
5
|
import { formatErrorMessage } from "./error-message.js";
|
|
6
6
|
import { KnowhereClient } from "./client.js";
|
|
7
|
+
import { deliverChannelMessage } from "./channel-delivery.js";
|
|
7
8
|
import { sendTrackerProgress } from "./tracker-progress.js";
|
|
8
9
|
import fs from "node:fs/promises";
|
|
9
10
|
import path from "node:path";
|
|
11
|
+
import crypto from "node:crypto";
|
|
10
12
|
import os from "node:os";
|
|
11
13
|
//#region src/tools.ts
|
|
12
14
|
const TERMINAL_JOB_STATUSES = new Set([
|
|
@@ -969,6 +971,10 @@ async function t2LoadChunks(docDir) {
|
|
|
969
971
|
}
|
|
970
972
|
return [];
|
|
971
973
|
}
|
|
974
|
+
function t2NormalizePath(s) {
|
|
975
|
+
return s.replace(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replace(/[\s\u3000\u00A0]+/g, "").toLowerCase();
|
|
976
|
+
}
|
|
977
|
+
const PLACEHOLDER_RE = /(?:IMAGE|TABLE)_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
|
|
972
978
|
async function t2LoadRawChunks(docDir) {
|
|
973
979
|
try {
|
|
974
980
|
const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
|
|
@@ -980,6 +986,114 @@ async function t2LoadRawChunks(docDir) {
|
|
|
980
986
|
return [];
|
|
981
987
|
}
|
|
982
988
|
}
|
|
989
|
+
/**
|
|
990
|
+
* Runtime-only enrichment of chunks returned to the AI:
|
|
991
|
+
* 1. Replace IMAGE_uuid_IMAGE placeholders with [📷 path] in text chunks
|
|
992
|
+
* 2. Replace TABLE_uuid_TABLE placeholders with actual HTML content in text chunks
|
|
993
|
+
* 3. Remove standalone table chunks that were inlined via placeholders
|
|
994
|
+
* 4. Strip self-referencing placeholders from image/table chunk content & summary
|
|
995
|
+
*
|
|
996
|
+
* Does NOT modify chunks.json on disk.
|
|
997
|
+
*/
|
|
998
|
+
async function t2EnrichChunks(chunks, docDir) {
|
|
999
|
+
const rawChunks = await t2LoadRawChunks(docDir);
|
|
1000
|
+
const idToRaw = /* @__PURE__ */ new Map();
|
|
1001
|
+
for (const rc of rawChunks) if (rc.chunk_id) idToRaw.set(rc.chunk_id, rc);
|
|
1002
|
+
const manifestPaths = /* @__PURE__ */ new Map();
|
|
1003
|
+
try {
|
|
1004
|
+
const manifest = JSON.parse(await fs.readFile(path.join(docDir, "manifest.json"), "utf-8"));
|
|
1005
|
+
if (isRecord(manifest) && isRecord(manifest.files)) {
|
|
1006
|
+
const files = manifest.files;
|
|
1007
|
+
for (const entry of Array.isArray(files.images) ? files.images : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
|
|
1008
|
+
type: "image",
|
|
1009
|
+
filePath: entry.file_path
|
|
1010
|
+
});
|
|
1011
|
+
for (const entry of Array.isArray(files.tables) ? files.tables : []) if (typeof entry.id === "string" && typeof entry.file_path === "string") manifestPaths.set(entry.id, {
|
|
1012
|
+
type: "table",
|
|
1013
|
+
filePath: entry.file_path
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
} catch {}
|
|
1017
|
+
const inlinedTablePaths = /* @__PURE__ */ new Set();
|
|
1018
|
+
const inlinedImagePaths = /* @__PURE__ */ new Set();
|
|
1019
|
+
for (const chunk of chunks) {
|
|
1020
|
+
if (!chunk.content) continue;
|
|
1021
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1022
|
+
if (PLACEHOLDER_RE.test(chunk.content)) {
|
|
1023
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1024
|
+
chunk.content = await replacePlaceholders(chunk.content, idToRaw, docDir, chunk.type === "text" ? inlinedTablePaths : void 0, chunk.type === "text" ? inlinedImagePaths : void 0, manifestPaths);
|
|
1025
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1026
|
+
if (chunk.type !== "text" && chunk.path && PLACEHOLDER_RE.test(chunk.content)) {
|
|
1027
|
+
if (chunk.type === "table") try {
|
|
1028
|
+
const html = await fs.readFile(path.join(docDir, chunk.path), "utf-8");
|
|
1029
|
+
chunk.content = chunk.content.replace(PLACEHOLDER_RE, `\n${html.slice(0, 8e3)}\n`);
|
|
1030
|
+
} catch {
|
|
1031
|
+
chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📊 ${chunk.path}]`);
|
|
1032
|
+
}
|
|
1033
|
+
else if (chunk.type === "image") chunk.content = chunk.content.replace(PLACEHOLDER_RE, `[📷 ${chunk.path}]`);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
if (chunk.summary) {
|
|
1037
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1038
|
+
if (PLACEHOLDER_RE.test(chunk.summary)) {
|
|
1039
|
+
PLACEHOLDER_RE.lastIndex = 0;
|
|
1040
|
+
chunk.summary = await replacePlaceholders(chunk.summary, idToRaw, docDir, void 0, void 0, manifestPaths);
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
chunks = chunks.filter((c) => {
|
|
1045
|
+
if (c.type === "table" && inlinedTablePaths.has(c.path)) return false;
|
|
1046
|
+
if (c.type === "image" && inlinedImagePaths.has(c.path)) return false;
|
|
1047
|
+
return true;
|
|
1048
|
+
});
|
|
1049
|
+
return chunks;
|
|
1050
|
+
}
|
|
1051
|
+
async function replacePlaceholders(text, idToRaw, docDir, inlinedTablePaths, inlinedImagePaths, manifestPaths) {
|
|
1052
|
+
const matches = [];
|
|
1053
|
+
const re = /(?:(IMAGE|TABLE))_([a-f0-9-]+)_(?:IMAGE|TABLE)/g;
|
|
1054
|
+
let m;
|
|
1055
|
+
while ((m = re.exec(text)) !== null) matches.push({
|
|
1056
|
+
full: m[0],
|
|
1057
|
+
type: m[1],
|
|
1058
|
+
id: m[2],
|
|
1059
|
+
start: m.index,
|
|
1060
|
+
end: m.index + m[0].length
|
|
1061
|
+
});
|
|
1062
|
+
if (matches.length === 0) return text;
|
|
1063
|
+
const replacements = [];
|
|
1064
|
+
for (const match of matches) {
|
|
1065
|
+
const raw = idToRaw.get(match.id);
|
|
1066
|
+
let resolvedPath = raw?.path;
|
|
1067
|
+
if (!resolvedPath && manifestPaths) {
|
|
1068
|
+
const mEntry = manifestPaths.get(match.id);
|
|
1069
|
+
if (mEntry) resolvedPath = mEntry.filePath;
|
|
1070
|
+
}
|
|
1071
|
+
if (!resolvedPath) {
|
|
1072
|
+
replacements.push(match.full);
|
|
1073
|
+
continue;
|
|
1074
|
+
}
|
|
1075
|
+
if (match.type === "IMAGE") {
|
|
1076
|
+
replacements.push(`[📷 ${resolvedPath}]`);
|
|
1077
|
+
inlinedImagePaths?.add(resolvedPath);
|
|
1078
|
+
} else {
|
|
1079
|
+
const htmlPath = path.join(docDir, resolvedPath);
|
|
1080
|
+
try {
|
|
1081
|
+
const html = await fs.readFile(htmlPath, "utf-8");
|
|
1082
|
+
replacements.push(`\n${html.slice(0, 8e3)}\n`);
|
|
1083
|
+
inlinedTablePaths?.add(resolvedPath);
|
|
1084
|
+
} catch {
|
|
1085
|
+
const tableContent = raw?.content || "";
|
|
1086
|
+
if (tableContent && tableContent.includes("<")) {
|
|
1087
|
+
replacements.push(`\n${tableContent}\n`);
|
|
1088
|
+
inlinedTablePaths?.add(resolvedPath);
|
|
1089
|
+
} else replacements.push(`[📊 ${resolvedPath}]`);
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
let result = text;
|
|
1094
|
+
for (let i = matches.length - 1; i >= 0; i--) result = result.slice(0, matches[i].start) + replacements[i] + result.slice(matches[i].end);
|
|
1095
|
+
return result;
|
|
1096
|
+
}
|
|
983
1097
|
function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
|
|
984
1098
|
const df = {};
|
|
985
1099
|
const tf = {};
|
|
@@ -1043,11 +1157,78 @@ async function t2ListDocDirs(kbRoot) {
|
|
|
1043
1157
|
}
|
|
1044
1158
|
return docs;
|
|
1045
1159
|
}
|
|
1160
|
+
async function t2StageFile(srcPath, stagingDir) {
|
|
1161
|
+
await fs.mkdir(stagingDir, { recursive: true });
|
|
1162
|
+
const ext = path.extname(srcPath);
|
|
1163
|
+
const hash = crypto.createHash("sha1").update(srcPath).digest("hex").slice(0, 12);
|
|
1164
|
+
const staged = path.join(stagingDir, `knowhere-asset-${hash}${ext}`);
|
|
1165
|
+
await fs.copyFile(srcPath, staged);
|
|
1166
|
+
return staged;
|
|
1167
|
+
}
|
|
1168
|
+
async function t2ResolveAssets(params) {
|
|
1169
|
+
const assets = [];
|
|
1170
|
+
const stagingDir = path.join(os.homedir(), ".openclaw", "knowhere-assets");
|
|
1171
|
+
const processedPaths = /* @__PURE__ */ new Set();
|
|
1172
|
+
let channelRoute;
|
|
1173
|
+
try {
|
|
1174
|
+
channelRoute = await params.store.resolveChannelRoute({ sessionKey: params.ctx.sessionKey });
|
|
1175
|
+
} catch {}
|
|
1176
|
+
const resolveOne = async (opts) => {
|
|
1177
|
+
const absolutePath = path.join(params.docDir, opts.relativePath);
|
|
1178
|
+
if (processedPaths.has(absolutePath)) return;
|
|
1179
|
+
processedPaths.add(absolutePath);
|
|
1180
|
+
if (opts.type === "table") try {
|
|
1181
|
+
const html = await fs.readFile(absolutePath, "utf-8");
|
|
1182
|
+
assets.push({
|
|
1183
|
+
chunk_id: opts.chunkId,
|
|
1184
|
+
type: "table",
|
|
1185
|
+
relative_path: opts.relativePath,
|
|
1186
|
+
summary: opts.summary.slice(0, 200),
|
|
1187
|
+
mode: "table_inline",
|
|
1188
|
+
html_content: html.slice(0, 8e3)
|
|
1189
|
+
});
|
|
1190
|
+
} catch {
|
|
1191
|
+
params.api.logger.debug?.(`knowhere: t2ResolveAssets table read failed: ${absolutePath}`);
|
|
1192
|
+
}
|
|
1193
|
+
else try {
|
|
1194
|
+
await fs.access(absolutePath);
|
|
1195
|
+
const stagedPath = await t2StageFile(absolutePath, stagingDir);
|
|
1196
|
+
const result = await deliverChannelMessage({
|
|
1197
|
+
api: params.api,
|
|
1198
|
+
operationLabel: "t2-asset-image",
|
|
1199
|
+
sessionKey: params.ctx.sessionKey,
|
|
1200
|
+
channelRoute,
|
|
1201
|
+
text: `📷 ${opts.summary.slice(0, 100)}`,
|
|
1202
|
+
mediaUrl: stagedPath,
|
|
1203
|
+
mediaLocalRoots: [stagingDir]
|
|
1204
|
+
});
|
|
1205
|
+
assets.push({
|
|
1206
|
+
chunk_id: opts.chunkId,
|
|
1207
|
+
type: "image",
|
|
1208
|
+
relative_path: opts.relativePath,
|
|
1209
|
+
summary: opts.summary.slice(0, 200),
|
|
1210
|
+
mode: result.delivered ? "image_sent" : "image_failed"
|
|
1211
|
+
});
|
|
1212
|
+
} catch (err) {
|
|
1213
|
+
params.api.logger.debug?.(`knowhere: t2ResolveAssets image delivery failed: ${absolutePath} — ${err instanceof Error ? err.message : String(err)}`);
|
|
1214
|
+
}
|
|
1215
|
+
};
|
|
1216
|
+
for (const chunk of params.returnedChunks) if ((chunk.type === "image" || chunk.type === "table") && chunk.path) {
|
|
1217
|
+
if (chunk.type === "table" && chunk.content && !/TABLE_[a-f0-9-]+_TABLE/.test(chunk.content)) continue;
|
|
1218
|
+
await resolveOne({
|
|
1219
|
+
chunkId: chunk.path,
|
|
1220
|
+
type: chunk.type,
|
|
1221
|
+
relativePath: chunk.path,
|
|
1222
|
+
summary: chunk.summary || chunk.content?.slice(0, 200) || ""
|
|
1223
|
+
});
|
|
1224
|
+
}
|
|
1225
|
+
return assets;
|
|
1226
|
+
}
|
|
1046
1227
|
function createGetMapTool(_params) {
|
|
1047
1228
|
return {
|
|
1048
1229
|
name: "knowhere_get_map",
|
|
1049
1230
|
label: "Knowhere Get Map",
|
|
1050
|
-
description: "获取知识库全局概览。查询知识时必须先调此工具,了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure
|
|
1231
|
+
description: "获取知识库全局概览。查询知识时必须先调此工具,了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure 查看具体文档的章节目录。重要:knowhere_read_chunks 已内置图片/表格自动投递功能,读取内容时会自动发送多媒体到用户频道。除非用户明确要求下载原始文件,否则请勿主动发送 .docx/.pdf 等文档。",
|
|
1051
1232
|
parameters: {
|
|
1052
1233
|
type: "object",
|
|
1053
1234
|
additionalProperties: false,
|
|
@@ -1127,7 +1308,7 @@ function createGetStructureTool(_params) {
|
|
|
1127
1308
|
return {
|
|
1128
1309
|
name: "knowhere_get_structure",
|
|
1129
1310
|
label: "Knowhere Get Structure",
|
|
1130
|
-
description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后,用此工具查看章节结构,然后用 knowhere_read_chunks
|
|
1311
|
+
description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后,用此工具查看章节结构,然后用 knowhere_read_chunks 读取内容。重要:knowhere_read_chunks 已内置图片/表格自动投递功能,除非用户要求下载文件,否则无需额外发送原始文档。",
|
|
1131
1312
|
parameters: {
|
|
1132
1313
|
type: "object",
|
|
1133
1314
|
additionalProperties: false,
|
|
@@ -1177,7 +1358,7 @@ function createReadChunksTool(_params) {
|
|
|
1177
1358
|
return {
|
|
1178
1359
|
name: "knowhere_read_chunks",
|
|
1179
1360
|
label: "Knowhere Read Chunks",
|
|
1180
|
-
description: "读取文档内容。先调 knowhere_get_structure 确定章节后,用此工具读取具体内容。可通过 sectionPath 过滤特定章节,减少 token
|
|
1361
|
+
description: "读取文档内容。先调 knowhere_get_structure 确定章节后,用此工具读取具体内容。可通过 sectionPath 过滤特定章节,减少 token 消耗。此工具已内置图片/表格自动投递:读取时会自动将多媒体内容发送到用户频道。除非用户明确要求下载原始文件,否则无需再用 message 发送文档。",
|
|
1181
1362
|
parameters: {
|
|
1182
1363
|
type: "object",
|
|
1183
1364
|
additionalProperties: false,
|
|
@@ -1211,7 +1392,36 @@ function createReadChunksTool(_params) {
|
|
|
1211
1392
|
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1212
1393
|
if (!docDir) return textResult(`文档 '${docName}' 不存在`);
|
|
1213
1394
|
let chunks = await t2LoadChunks(docDir);
|
|
1214
|
-
|
|
1395
|
+
const totalAll = chunks.length;
|
|
1396
|
+
if (sectionPath) {
|
|
1397
|
+
let filtered = chunks.filter((c) => c.path.includes(sectionPath));
|
|
1398
|
+
if (filtered.length === 0) {
|
|
1399
|
+
const normQuery = t2NormalizePath(sectionPath);
|
|
1400
|
+
filtered = chunks.filter((c) => t2NormalizePath(c.path).includes(normQuery));
|
|
1401
|
+
}
|
|
1402
|
+
if (filtered.length === 0) {
|
|
1403
|
+
const sections = [...new Set(chunks.filter((c) => c.type === "text" && c.path).map((c) => {
|
|
1404
|
+
const parts = c.path.split("/");
|
|
1405
|
+
return parts.length > 1 ? parts.slice(-1)[0] : c.path;
|
|
1406
|
+
}))].slice(0, 30);
|
|
1407
|
+
return t2JsonResult({
|
|
1408
|
+
status: "no_match",
|
|
1409
|
+
message: `sectionPath '${sectionPath}' 未匹配到任何切片。请检查路径或从以下章节中选择:`,
|
|
1410
|
+
available_sections: sections
|
|
1411
|
+
});
|
|
1412
|
+
}
|
|
1413
|
+
chunks = filtered;
|
|
1414
|
+
} else if (totalAll > 20) {
|
|
1415
|
+
const sections = [...new Set(chunks.filter((c) => c.type === "text" && c.path).map((c) => {
|
|
1416
|
+
const parts = c.path.split("/");
|
|
1417
|
+
return parts.length > 1 ? parts.slice(-1)[0] : c.path;
|
|
1418
|
+
}))].slice(0, 30);
|
|
1419
|
+
return t2JsonResult({
|
|
1420
|
+
status: "too_many",
|
|
1421
|
+
message: `该文档共有 ${totalAll} 个切片,请先用 knowhere_get_structure 查看目录,然后用 sectionPath 参数指定章节。`,
|
|
1422
|
+
available_sections: sections
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1215
1425
|
const total = chunks.length;
|
|
1216
1426
|
const limit = maxChunks || 50;
|
|
1217
1427
|
chunks = chunks.slice(0, limit);
|
|
@@ -1226,7 +1436,20 @@ function createReadChunksTool(_params) {
|
|
|
1226
1436
|
await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
|
|
1227
1437
|
}
|
|
1228
1438
|
} catch {}
|
|
1229
|
-
|
|
1439
|
+
chunks = await t2EnrichChunks(chunks, docDir);
|
|
1440
|
+
let resolvedAssets = [];
|
|
1441
|
+
try {
|
|
1442
|
+
resolvedAssets = await t2ResolveAssets({
|
|
1443
|
+
api: _params.api,
|
|
1444
|
+
store: _params.store,
|
|
1445
|
+
ctx: _params.ctx,
|
|
1446
|
+
docDir,
|
|
1447
|
+
returnedChunks: chunks
|
|
1448
|
+
});
|
|
1449
|
+
} catch (err) {
|
|
1450
|
+
_params.api.logger.debug?.(`knowhere: read_chunks asset resolution failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1451
|
+
}
|
|
1452
|
+
const result = {
|
|
1230
1453
|
status: "ok",
|
|
1231
1454
|
kb_id: kbId,
|
|
1232
1455
|
doc_name: path.basename(docDir),
|
|
@@ -1235,7 +1458,79 @@ function createReadChunksTool(_params) {
|
|
|
1235
1458
|
returned: chunks.length,
|
|
1236
1459
|
truncated: total > limit,
|
|
1237
1460
|
chunks
|
|
1238
|
-
}
|
|
1461
|
+
};
|
|
1462
|
+
if (resolvedAssets.length > 0) {
|
|
1463
|
+
result.resolved_assets = resolvedAssets;
|
|
1464
|
+
const sent = resolvedAssets.filter((a) => a.mode === "image_sent").length;
|
|
1465
|
+
const tables = resolvedAssets.filter((a) => a.mode === "table_inline").length;
|
|
1466
|
+
const notes = [];
|
|
1467
|
+
if (sent > 0) notes.push(`${sent} 张图片已自动发送到用户频道`);
|
|
1468
|
+
if (tables > 0) notes.push(`${tables} 个表格已内联为 HTML`);
|
|
1469
|
+
result.asset_summary = notes.join(";");
|
|
1470
|
+
result._agent_note = "上述多媒体资源(图片/表格/视频)已自动投递到用户频道,用户已经可以看到。请直接用文字回答用户的问题,不要再重复发送这些图片或表格。也不要主动发送原始文档文件(.docx/.pdf),除非用户明确要求下载原始文件。如果用户要求你分析、描述或对比图片内容,请使用 knowhere_view_image 工具传入 content 中的 [📷 path] 路径来获取图片数据进行视觉分析。";
|
|
1471
|
+
}
|
|
1472
|
+
return t2JsonResult(result);
|
|
1473
|
+
}
|
|
1474
|
+
};
|
|
1475
|
+
}
|
|
1476
|
+
function createViewImageTool(_params) {
|
|
1477
|
+
return {
|
|
1478
|
+
name: "knowhere_view_image",
|
|
1479
|
+
label: "Knowhere View Image",
|
|
1480
|
+
description: "分析知识库图片的像素内容。当 knowhere_read_chunks 返回的文本中包含 [📷 path] 标记时,可用此工具传入该 path 来获取图片的 base64 数据进行视觉分析。需要提供 kbId、docName 和 imagePath(即 [📷 ...] 中的相对路径)。",
|
|
1481
|
+
parameters: {
|
|
1482
|
+
type: "object",
|
|
1483
|
+
additionalProperties: false,
|
|
1484
|
+
properties: {
|
|
1485
|
+
kbId: {
|
|
1486
|
+
type: "string",
|
|
1487
|
+
description: "Knowledge base ID"
|
|
1488
|
+
},
|
|
1489
|
+
docName: {
|
|
1490
|
+
type: "string",
|
|
1491
|
+
description: "Document name"
|
|
1492
|
+
},
|
|
1493
|
+
imagePath: {
|
|
1494
|
+
type: "string",
|
|
1495
|
+
description: "Image relative path from [📷 ...] marker, e.g. 'images/image-9 助力手推车.jpeg'"
|
|
1496
|
+
}
|
|
1497
|
+
},
|
|
1498
|
+
required: [
|
|
1499
|
+
"kbId",
|
|
1500
|
+
"docName",
|
|
1501
|
+
"imagePath"
|
|
1502
|
+
]
|
|
1503
|
+
},
|
|
1504
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1505
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1506
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1507
|
+
const docName = readString(paramsRecord.docName);
|
|
1508
|
+
const imagePath = readString(paramsRecord.imagePath);
|
|
1509
|
+
if (!kbId || !docName || !imagePath) throw new Error("kbId, docName, and imagePath are required.");
|
|
1510
|
+
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1511
|
+
if (!docDir) return textResult(`文档 '${docName}' 不存在`);
|
|
1512
|
+
const absolutePath = path.join(docDir, imagePath);
|
|
1513
|
+
try {
|
|
1514
|
+
await fs.access(absolutePath);
|
|
1515
|
+
} catch {
|
|
1516
|
+
return textResult(`图片文件不存在: ${imagePath}`);
|
|
1517
|
+
}
|
|
1518
|
+
const base64Data = (await fs.readFile(absolutePath)).toString("base64");
|
|
1519
|
+
const ext = path.extname(imagePath).toLowerCase();
|
|
1520
|
+
let mediaType = "image/jpeg";
|
|
1521
|
+
if (ext === ".png") mediaType = "image/png";
|
|
1522
|
+
else if (ext === ".gif") mediaType = "image/gif";
|
|
1523
|
+
else if (ext === ".webp") mediaType = "image/webp";
|
|
1524
|
+
return {
|
|
1525
|
+
content: [{
|
|
1526
|
+
type: "text",
|
|
1527
|
+
text: `你正在查看图片: ${imagePath}。图片数据已附在下方,请直接用你的视觉能力分析图片内容,`
|
|
1528
|
+
}, {
|
|
1529
|
+
type: "image_url",
|
|
1530
|
+
image_url: { url: `data:${mediaType};base64,${base64Data}` }
|
|
1531
|
+
}],
|
|
1532
|
+
details: {}
|
|
1533
|
+
};
|
|
1239
1534
|
}
|
|
1240
1535
|
};
|
|
1241
1536
|
}
|
|
@@ -1243,7 +1538,7 @@ function createDiscoverFilesTool(_params) {
|
|
|
1243
1538
|
return {
|
|
1244
1539
|
name: "knowhere_discover_files",
|
|
1245
1540
|
label: "Knowhere Discover Files",
|
|
1246
|
-
description: "在所有知识库文档中搜索关键词,返回命中文件和次数。用于和 knowhere_get_map
|
|
1541
|
+
description: "在所有知识库文档中搜索关键词,返回命中文件和次数。用于和 knowhere_get_map 做并集,避免遗漏相关文件。只返回文件名,不返回内容。注意:后续用 knowhere_read_chunks 读取时会自动投递图片/表格到用户频道,除非用户要求下载文件,否则无需额外发送原始文档。",
|
|
1247
1542
|
parameters: {
|
|
1248
1543
|
type: "object",
|
|
1249
1544
|
additionalProperties: false,
|
|
@@ -1351,7 +1646,12 @@ function createKnowhereToolFactory(params) {
|
|
|
1351
1646
|
}),
|
|
1352
1647
|
createGetMapTool({ api: params.api }),
|
|
1353
1648
|
createGetStructureTool({ api: params.api }),
|
|
1354
|
-
createReadChunksTool({
|
|
1649
|
+
createReadChunksTool({
|
|
1650
|
+
api: params.api,
|
|
1651
|
+
store: params.store,
|
|
1652
|
+
ctx
|
|
1653
|
+
}),
|
|
1654
|
+
createViewImageTool({ api: params.api }),
|
|
1355
1655
|
createDiscoverFilesTool({ api: params.api })
|
|
1356
1656
|
];
|
|
1357
1657
|
}
|
package/openclaw.plugin.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"name": "Knowhere",
|
|
4
4
|
"description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
|
|
5
5
|
"skills": ["./skills"],
|
|
6
|
-
"version": "0.2.
|
|
6
|
+
"version": "0.2.6",
|
|
7
7
|
"uiHints": {
|
|
8
8
|
"apiKey": {
|
|
9
9
|
"label": "Knowhere API Key",
|
|
@@ -152,6 +152,16 @@
|
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
|
+
},
|
|
156
|
+
"agentHooks": {
|
|
157
|
+
"type": "object",
|
|
158
|
+
"additionalProperties": false,
|
|
159
|
+
"properties": {
|
|
160
|
+
"enabled": {
|
|
161
|
+
"type": "boolean",
|
|
162
|
+
"default": true
|
|
163
|
+
}
|
|
164
|
+
}
|
|
155
165
|
}
|
|
156
166
|
}
|
|
157
167
|
}
|
package/package.json
CHANGED
|
@@ -19,28 +19,46 @@ Activate this skill when:
|
|
|
19
19
|
|
|
20
20
|
## Part 1: Ingesting New Documents
|
|
21
21
|
|
|
22
|
-
When a
|
|
22
|
+
When a user sends, uploads, or mentions a file, **always** use `knowhere_ingest_document` to parse it into the knowledge base. Two delivery modes exist depending on how the file arrives:
|
|
23
23
|
|
|
24
|
-
###
|
|
24
|
+
### Mode A: Local File (Telegram, Discord, Signal, …)
|
|
25
25
|
|
|
26
|
-
When
|
|
26
|
+
When the channel downloads the file to disk, a marker appears in the prompt:
|
|
27
27
|
|
|
28
28
|
```text
|
|
29
29
|
[media attached: /absolute/path/to/file.pdf (application/pdf) | handbook.pdf]
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
-
Use the exact absolute path
|
|
32
|
+
Use the exact absolute path from the marker:
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
```
|
|
35
|
+
knowhere_ingest_document(filePath: "/absolute/path/to/file.pdf", fileName: "handbook.pdf")
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Mode B: Cloud File (Feishu, …)
|
|
39
|
+
|
|
40
|
+
When the file stays in a cloud service (no `[media attached:]` marker):
|
|
41
|
+
|
|
42
|
+
1. Use the channel's file tool to locate the file and obtain a download URL
|
|
43
|
+
- Feishu: use `feishu_drive` with `action: "list"` to find the file, then get its download URL
|
|
44
|
+
- Other cloud channels: use the equivalent tool to get a direct download URL
|
|
45
|
+
2. Pass the URL to knowhere:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
knowhere_ingest_document(url: "https://download-url-from-channel-tool/...")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### After Ingestion
|
|
52
|
+
|
|
53
|
+
The plugin handles everything automatically:
|
|
54
|
+
|
|
55
|
+
- Uploads/fetches the file for parsing
|
|
56
|
+
- Polls until parsing completes
|
|
57
|
+
- Downloads and extracts the result package
|
|
58
|
+
- Copies parsed data to `~/.knowhere/{kbId}/`
|
|
59
|
+
- Builds/updates `knowledge_graph.json`
|
|
35
60
|
|
|
36
|
-
|
|
37
|
-
2. The plugin handles everything automatically:
|
|
38
|
-
- Uploads the file to Knowhere API for parsing
|
|
39
|
-
- Polls until parsing completes
|
|
40
|
-
- Downloads and extracts the result package
|
|
41
|
-
- **Automatically** copies parsed data to `~/.knowhere/{kbId}/`
|
|
42
|
-
- **Automatically** builds/updates `knowledge_graph.json`
|
|
43
|
-
3. After ingest completes, the new document is immediately searchable via the retrieval workflow below
|
|
61
|
+
After ingest completes, the new document is immediately searchable via the retrieval workflow below.
|
|
44
62
|
|
|
45
63
|
Supported formats: PDF, DOCX, XLSX, PPTX, TXT, MD, images (JPG, PNG)
|
|
46
64
|
|