@ontos-ai/knowhere-claw 0.1.0-beta.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -132
- package/dist/__tests__/channel-route.test.d.ts +1 -0
- package/dist/__tests__/ingest-tool.test.d.ts +1 -0
- package/dist/__tests__/read-result-file-tool.test.d.ts +1 -0
- package/dist/__tests__/tracker-progress.test.d.ts +1 -0
- package/dist/channel-delivery.d.ts +21 -0
- package/dist/channel-delivery.js +337 -0
- package/dist/config.d.ts +6 -0
- package/dist/config.js +37 -23
- package/dist/index.js +13 -11
- package/dist/parser.js +1 -1
- package/dist/session.js +1 -0
- package/dist/store.d.ts +18 -1
- package/dist/store.js +91 -3
- package/dist/tools.d.ts +2 -3
- package/dist/tools.js +473 -105
- package/dist/tracker-progress.d.ts +3 -1
- package/dist/tracker-progress.js +8 -190
- package/dist/types.d.ts +7 -6
- package/openclaw.plugin.json +3 -21
- package/package.json +7 -5
- package/skills/knowhere/SKILL.md +57 -20
- package/dist/hooks.d.ts +0 -8
- package/dist/hooks.js +0 -415
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
|
|
2
|
+
import type { ChannelRouteRecord } from "./types";
|
|
2
3
|
export declare function sendTrackerProgress(params: {
|
|
3
4
|
api: OpenClawPluginApi;
|
|
4
5
|
context?: unknown;
|
|
5
6
|
sessionKey?: string;
|
|
6
7
|
messages?: unknown[];
|
|
8
|
+
channelRoute?: ChannelRouteRecord;
|
|
7
9
|
text: string;
|
|
8
|
-
}): Promise<
|
|
10
|
+
}): Promise<boolean>;
|
package/dist/tracker-progress.js
CHANGED
|
@@ -1,197 +1,15 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { normalizeWhitespace } from "./text.js";
|
|
3
|
-
import { findConversationSegmentValue, parseConversationSessionKey } from "./session.js";
|
|
1
|
+
import { deliverChannelMessage } from "./channel-delivery.js";
|
|
4
2
|
//#region src/tracker-progress.ts
|
|
5
|
-
const TRACKER_PROGRESS_SURFACES = new Set([
|
|
6
|
-
"discord",
|
|
7
|
-
"imessage",
|
|
8
|
-
"line",
|
|
9
|
-
"signal",
|
|
10
|
-
"slack",
|
|
11
|
-
"telegram",
|
|
12
|
-
"whatsapp"
|
|
13
|
-
]);
|
|
14
|
-
function readString(value) {
|
|
15
|
-
return normalizeWhitespace(value) || void 0;
|
|
16
|
-
}
|
|
17
|
-
function readInteger(value) {
|
|
18
|
-
const normalized = readString(value);
|
|
19
|
-
if (!normalized || !/^-?\d+$/.test(normalized)) return;
|
|
20
|
-
const parsed = Number(normalized);
|
|
21
|
-
return Number.isSafeInteger(parsed) ? parsed : void 0;
|
|
22
|
-
}
|
|
23
|
-
function normalizeTrackerProgressSurface(value) {
|
|
24
|
-
const normalized = readString(value)?.toLowerCase();
|
|
25
|
-
if (!normalized || !TRACKER_PROGRESS_SURFACES.has(normalized)) return;
|
|
26
|
-
return normalized;
|
|
27
|
-
}
|
|
28
|
-
function extractReplyReferenceFromValue(value, seen = /* @__PURE__ */ new WeakSet()) {
|
|
29
|
-
if (typeof value === "string") return readString(value.match(/"(?:messageId|message_id|replyTo|reply_to|threadTs|thread_ts|ts)"\s*:\s*"([^"]+)"/)?.[1]);
|
|
30
|
-
if (Array.isArray(value)) {
|
|
31
|
-
if (seen.has(value)) return;
|
|
32
|
-
seen.add(value);
|
|
33
|
-
for (let index = value.length - 1; index >= 0; index -= 1) {
|
|
34
|
-
const replyReference = extractReplyReferenceFromValue(value[index], seen);
|
|
35
|
-
if (replyReference) return replyReference;
|
|
36
|
-
}
|
|
37
|
-
return;
|
|
38
|
-
}
|
|
39
|
-
if (!isRecord(value)) return;
|
|
40
|
-
if (seen.has(value)) return;
|
|
41
|
-
seen.add(value);
|
|
42
|
-
for (const key of [
|
|
43
|
-
"messageId",
|
|
44
|
-
"message_id",
|
|
45
|
-
"replyTo",
|
|
46
|
-
"reply_to",
|
|
47
|
-
"threadTs",
|
|
48
|
-
"thread_ts",
|
|
49
|
-
"ts"
|
|
50
|
-
]) {
|
|
51
|
-
const replyReference = readString(value[key]);
|
|
52
|
-
if (replyReference) return replyReference;
|
|
53
|
-
}
|
|
54
|
-
const preferredKeys = [
|
|
55
|
-
"content",
|
|
56
|
-
"text",
|
|
57
|
-
"body",
|
|
58
|
-
"message",
|
|
59
|
-
"messages",
|
|
60
|
-
"items"
|
|
61
|
-
];
|
|
62
|
-
const preferredKeySet = new Set(preferredKeys);
|
|
63
|
-
for (const key of preferredKeys) {
|
|
64
|
-
const replyReference = extractReplyReferenceFromValue(value[key], seen);
|
|
65
|
-
if (replyReference) return replyReference;
|
|
66
|
-
}
|
|
67
|
-
for (const [key, nestedValue] of Object.entries(value).reverse()) {
|
|
68
|
-
if (preferredKeySet.has(key)) continue;
|
|
69
|
-
const replyReference = extractReplyReferenceFromValue(nestedValue, seen);
|
|
70
|
-
if (replyReference) return replyReference;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
function resolveTrackerProgressTarget(params) {
|
|
74
|
-
const context = isRecord(params.context) ? params.context : void 0;
|
|
75
|
-
const session = parseConversationSessionKey(params.sessionKey);
|
|
76
|
-
const surface = normalizeTrackerProgressSurface(context?.channelId) || normalizeTrackerProgressSurface(context?.provider) || normalizeTrackerProgressSurface(context?.surface) || normalizeTrackerProgressSurface(session?.surface);
|
|
77
|
-
if (!surface) return;
|
|
78
|
-
const replyReference = readString(context?.messageId) || readString(context?.replyTo) || readString(context?.threadTs) || (params.messages ? extractReplyReferenceFromValue(params.messages) : void 0);
|
|
79
|
-
switch (surface) {
|
|
80
|
-
case "discord": {
|
|
81
|
-
const threadId = readString(context?.threadId) || findConversationSegmentValue(session, "thread");
|
|
82
|
-
const channelId = findConversationSegmentValue(session, "channel", "guild");
|
|
83
|
-
const directId = findConversationSegmentValue(session, "direct", "user", "member");
|
|
84
|
-
const to = threadId && `channel:${threadId}` || readString(context?.conversationId) || channelId && `channel:${channelId}` || directId && `user:${directId}`;
|
|
85
|
-
if (!to) return;
|
|
86
|
-
return {
|
|
87
|
-
surface,
|
|
88
|
-
to,
|
|
89
|
-
accountId: "tracker",
|
|
90
|
-
replyTo: replyReference
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
case "slack": {
|
|
94
|
-
const directId = findConversationSegmentValue(session, "direct", "user", "member");
|
|
95
|
-
const channelId = findConversationSegmentValue(session, "channel", "group");
|
|
96
|
-
const to = readString(context?.conversationId) || channelId || (directId ? `user:${directId}` : void 0);
|
|
97
|
-
if (!to) return;
|
|
98
|
-
return {
|
|
99
|
-
surface,
|
|
100
|
-
to,
|
|
101
|
-
accountId: readString(context?.accountId),
|
|
102
|
-
threadTs: readString(context?.threadTs) || replyReference
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
case "telegram": {
|
|
106
|
-
const chatId = findConversationSegmentValue(session, "direct", "group", "channel", "chat", "room");
|
|
107
|
-
const to = readString(context?.conversationId) || chatId;
|
|
108
|
-
if (!to) return;
|
|
109
|
-
return {
|
|
110
|
-
surface,
|
|
111
|
-
to,
|
|
112
|
-
accountId: readString(context?.accountId),
|
|
113
|
-
replyToMessageId: readInteger(replyReference),
|
|
114
|
-
messageThreadId: readInteger(context?.threadId)
|
|
115
|
-
};
|
|
116
|
-
}
|
|
117
|
-
case "line": {
|
|
118
|
-
const directId = findConversationSegmentValue(session, "direct", "user", "member");
|
|
119
|
-
const groupId = findConversationSegmentValue(session, "group");
|
|
120
|
-
const roomId = findConversationSegmentValue(session, "room");
|
|
121
|
-
const chatId = findConversationSegmentValue(session, "channel", "chat", "space");
|
|
122
|
-
const to = readString(context?.conversationId) || (groupId ? `line:group:${groupId}` : void 0) || (roomId ? `line:room:${roomId}` : void 0) || (directId ? `line:${directId}` : void 0) || (chatId ? `line:${chatId}` : void 0);
|
|
123
|
-
if (!to) return;
|
|
124
|
-
return {
|
|
125
|
-
surface,
|
|
126
|
-
to,
|
|
127
|
-
accountId: readString(context?.accountId),
|
|
128
|
-
replyToken: readString(context?.replyToken)
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
|
-
case "imessage":
|
|
132
|
-
case "signal":
|
|
133
|
-
case "whatsapp": {
|
|
134
|
-
const conversationId = findConversationSegmentValue(session, "direct", "group", "channel", "chat", "room");
|
|
135
|
-
const to = readString(context?.conversationId) || conversationId;
|
|
136
|
-
if (!to) return;
|
|
137
|
-
return {
|
|
138
|
-
surface,
|
|
139
|
-
to,
|
|
140
|
-
accountId: readString(context?.accountId)
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
3
|
async function sendTrackerProgress(params) {
|
|
146
|
-
|
|
4
|
+
return (await deliverChannelMessage({
|
|
5
|
+
api: params.api,
|
|
6
|
+
operationLabel: "tracker progress",
|
|
147
7
|
context: params.context,
|
|
148
8
|
sessionKey: params.sessionKey,
|
|
149
|
-
messages: params.messages
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
switch (target.surface) {
|
|
154
|
-
case "discord":
|
|
155
|
-
await params.api.runtime.channel.discord.sendMessageDiscord(target.to, params.text, {
|
|
156
|
-
accountId: target.accountId,
|
|
157
|
-
replyTo: target.replyTo
|
|
158
|
-
});
|
|
159
|
-
break;
|
|
160
|
-
case "slack":
|
|
161
|
-
await params.api.runtime.channel.slack.sendMessageSlack(target.to, params.text, {
|
|
162
|
-
accountId: target.accountId,
|
|
163
|
-
threadTs: target.threadTs
|
|
164
|
-
});
|
|
165
|
-
break;
|
|
166
|
-
case "telegram":
|
|
167
|
-
await params.api.runtime.channel.telegram.sendMessageTelegram(target.to, params.text, {
|
|
168
|
-
accountId: target.accountId,
|
|
169
|
-
messageThreadId: target.messageThreadId,
|
|
170
|
-
replyToMessageId: target.replyToMessageId
|
|
171
|
-
});
|
|
172
|
-
break;
|
|
173
|
-
case "line":
|
|
174
|
-
await params.api.runtime.channel.line.sendMessageLine(target.to, params.text, {
|
|
175
|
-
accountId: target.accountId,
|
|
176
|
-
replyToken: target.replyToken
|
|
177
|
-
});
|
|
178
|
-
break;
|
|
179
|
-
case "whatsapp":
|
|
180
|
-
await params.api.runtime.channel.whatsapp.sendMessageWhatsApp(target.to, params.text, {
|
|
181
|
-
accountId: target.accountId,
|
|
182
|
-
verbose: false
|
|
183
|
-
});
|
|
184
|
-
break;
|
|
185
|
-
case "signal":
|
|
186
|
-
await params.api.runtime.channel.signal.sendMessageSignal(target.to, params.text, { accountId: target.accountId });
|
|
187
|
-
break;
|
|
188
|
-
case "imessage":
|
|
189
|
-
await params.api.runtime.channel.imessage.sendMessageIMessage(target.to, params.text, { accountId: target.accountId });
|
|
190
|
-
break;
|
|
191
|
-
}
|
|
192
|
-
} catch (error) {
|
|
193
|
-
params.api.logger.warn(`knowhere: tracker progress send failed. ${error instanceof Error ? error.message : String(error)}`);
|
|
194
|
-
}
|
|
9
|
+
messages: params.messages,
|
|
10
|
+
channelRoute: params.channelRoute,
|
|
11
|
+
text: params.text
|
|
12
|
+
})).delivered;
|
|
195
13
|
}
|
|
196
14
|
//#endregion
|
|
197
15
|
export { sendTrackerProgress };
|
package/dist/types.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export interface PluginLogger {
|
|
|
9
9
|
export interface ToolRuntimeContext {
|
|
10
10
|
workspaceDir?: string;
|
|
11
11
|
agentId?: string;
|
|
12
|
+
accountId?: string;
|
|
12
13
|
sessionKey?: string;
|
|
13
14
|
sessionId?: string;
|
|
14
15
|
}
|
|
@@ -17,8 +18,6 @@ export interface ResolvedKnowhereConfig {
|
|
|
17
18
|
baseUrl: string;
|
|
18
19
|
storageDir: string;
|
|
19
20
|
scopeMode: ScopeMode;
|
|
20
|
-
autoGrounding: boolean;
|
|
21
|
-
maxContextChars: number;
|
|
22
21
|
pollIntervalMs: number;
|
|
23
22
|
pollTimeoutMs: number;
|
|
24
23
|
requestTimeoutMs: number;
|
|
@@ -179,10 +178,6 @@ export interface KnowhereScope {
|
|
|
179
178
|
documentsDir: string;
|
|
180
179
|
indexPath: string;
|
|
181
180
|
}
|
|
182
|
-
export interface KnowhereAutoGroundingController {
|
|
183
|
-
forgetDocument(scope: KnowhereScope, docId: string): void;
|
|
184
|
-
forgetScope(scope: KnowhereScope): void;
|
|
185
|
-
}
|
|
186
181
|
export interface StoredDocumentRecord {
|
|
187
182
|
id: string;
|
|
188
183
|
title: string;
|
|
@@ -243,5 +238,11 @@ export interface SaveStoredDocumentPayload {
|
|
|
243
238
|
export interface JsonSchemaObject {
|
|
244
239
|
[key: string]: unknown;
|
|
245
240
|
}
|
|
241
|
+
export interface ChannelRouteRecord {
|
|
242
|
+
readonly channelId: string;
|
|
243
|
+
readonly accountId: string;
|
|
244
|
+
readonly conversationId?: string;
|
|
245
|
+
readonly updatedAt: string;
|
|
246
|
+
}
|
|
246
247
|
export declare function isRecord(value: unknown): value is StringRecord;
|
|
247
248
|
export declare function isNodeError(error: unknown): error is NodeJS.ErrnoException;
|
package/openclaw.plugin.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
|
-
"id": "knowhere",
|
|
2
|
+
"id": "knowhere-claw",
|
|
3
3
|
"name": "Knowhere",
|
|
4
4
|
"description": "Parse documents with Knowhere and expose the stored result as tool-queryable document state for OpenClaw agents.",
|
|
5
5
|
"skills": ["./skills"],
|
|
6
|
-
"version": "0.1.
|
|
6
|
+
"version": "0.1.1",
|
|
7
7
|
"uiHints": {
|
|
8
8
|
"apiKey": {
|
|
9
9
|
"label": "Knowhere API Key",
|
|
@@ -22,14 +22,6 @@
|
|
|
22
22
|
"label": "Document Scope",
|
|
23
23
|
"help": "Choose whether ingested documents are isolated per session, shared per agent, or global."
|
|
24
24
|
},
|
|
25
|
-
"autoGrounding": {
|
|
26
|
-
"label": "Auto Grounding",
|
|
27
|
-
"help": "Automatically ingest attachments and inject compact Knowhere document availability/status guidance before prompt construction."
|
|
28
|
-
},
|
|
29
|
-
"maxContextChars": {
|
|
30
|
-
"label": "Grounding Context Limit",
|
|
31
|
-
"help": "Maximum characters injected into the prompt for Knowhere document handle/status context."
|
|
32
|
-
},
|
|
33
25
|
"pollIntervalMs": {
|
|
34
26
|
"label": "Poll Interval",
|
|
35
27
|
"help": "Polling interval in milliseconds while waiting for Knowhere jobs to finish."
|
|
@@ -57,7 +49,7 @@
|
|
|
57
49
|
},
|
|
58
50
|
"baseUrl": {
|
|
59
51
|
"type": "string",
|
|
60
|
-
"format": "
|
|
52
|
+
"format": "string",
|
|
61
53
|
"default": "https://api.knowhereto.ai"
|
|
62
54
|
},
|
|
63
55
|
"storageDir": {
|
|
@@ -68,16 +60,6 @@
|
|
|
68
60
|
"enum": ["session", "agent", "global"],
|
|
69
61
|
"default": "session"
|
|
70
62
|
},
|
|
71
|
-
"autoGrounding": {
|
|
72
|
-
"type": "boolean",
|
|
73
|
-
"default": true
|
|
74
|
-
},
|
|
75
|
-
"maxContextChars": {
|
|
76
|
-
"type": "integer",
|
|
77
|
-
"minimum": 500,
|
|
78
|
-
"maximum": 12000,
|
|
79
|
-
"default": 4000
|
|
80
|
-
},
|
|
81
63
|
"pollIntervalMs": {
|
|
82
64
|
"type": "integer",
|
|
83
65
|
"minimum": 1000,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ontos-ai/knowhere-claw",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "OpenClaw plugin for Knowhere-powered document ingestion and automatic grounding.",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/",
|
|
@@ -26,10 +26,11 @@
|
|
|
26
26
|
"smoke:tools": "vite-node --mode smoke ./smoketest/run-tool.ts",
|
|
27
27
|
"tsgo": "tsgo --noEmit -p tsconfig.json",
|
|
28
28
|
"typecheck": "pnpm tsgo",
|
|
29
|
-
"clean": "rm -rf dist",
|
|
30
29
|
"check:plugin-version": "node ./scripts/release-guard.mjs plugin-version",
|
|
31
30
|
"check:beta-version": "node ./scripts/release-guard.mjs beta-version",
|
|
32
|
-
"prepack": "pnpm build"
|
|
31
|
+
"prepack": "pnpm build",
|
|
32
|
+
"test": "vitest --run",
|
|
33
|
+
"clean": "rm -rf dist"
|
|
33
34
|
},
|
|
34
35
|
"dependencies": {
|
|
35
36
|
"@knowhere-ai/sdk": "^0.1.1",
|
|
@@ -44,7 +45,8 @@
|
|
|
44
45
|
"oxlint-tsgolint": "^0.16.0",
|
|
45
46
|
"rolldown": "^1.0.0-rc.8",
|
|
46
47
|
"typescript": "^5.9.3",
|
|
47
|
-
"vite-node": "5.2.0"
|
|
48
|
+
"vite-node": "5.2.0",
|
|
49
|
+
"vitest": "^4.1.0"
|
|
48
50
|
},
|
|
49
51
|
"peerDependencies": {
|
|
50
52
|
"openclaw": ">=2026.3.8"
|
|
@@ -58,4 +60,4 @@
|
|
|
58
60
|
"./dist/index.js"
|
|
59
61
|
]
|
|
60
62
|
}
|
|
61
|
-
}
|
|
63
|
+
}
|
package/skills/knowhere/SKILL.md
CHANGED
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: knowhere
|
|
3
|
-
description: Use Knowhere
|
|
3
|
+
description: First-class document reader. Use Knowhere tools whenever the task involves reading, understanding, searching, or reasoning over document files (PDF, DOCX, XLSX, PPTX, images, TXT, MD) or URLs — even if the user does not mention "Knowhere" by name. Prefer Knowhere over generic file reads for all supported formats. Covers ingestion, structured browsing, chunk search, result file inspection, job management, and scope cleanup.
|
|
4
4
|
user-invocable: false
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# Knowhere Skill
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Knowhere is the first-class way to read, understand, and answer questions about document files. When a task involves reading or reasoning over a supported file (`.pdf`, `.docx`, `.xlsx`, `.pptx`, `.txt`, `.md`, `.jpg`, `.jpeg`, `.png`), always prefer Knowhere tools over generic file-reading approaches. Knowhere parses documents into structured chunks with hierarchy, summaries, tables, and images — far richer than raw text extraction.
|
|
10
|
+
|
|
11
|
+
Use the `knowhere_*` tools for explicit document ingestion and browse-first stored-result workflows. Before starting a new ingest, prefer checking whether the current scope already has the same document stored.
|
|
12
|
+
|
|
13
|
+
## When to prefer Knowhere
|
|
14
|
+
|
|
15
|
+
Reach for Knowhere tools first whenever:
|
|
16
|
+
|
|
17
|
+
- The user asks you to read, summarize, analyze, compare, or quote from a document file — even if they don't mention "Knowhere" by name.
|
|
18
|
+
- The user references an attached file or a file path pointing to a supported format.
|
|
19
|
+
- The user asks a question that likely requires content from a document (e.g., "what does the contract say about…", "find the table in the report…").
|
|
20
|
+
- You encounter a supported file in the workspace and need to understand its content to complete a task.
|
|
21
|
+
|
|
22
|
+
Do not attempt to read supported document files (especially PDFs, DOCX, XLSX, PPTX) with generic file-read tools or shell commands. These formats are binary or semi-structured and will produce garbled or incomplete output. Knowhere handles them properly.
|
|
23
|
+
|
|
24
|
+
For plain text files (`.txt`, `.md`), Knowhere still adds value through chunking, hierarchy extraction, and search — but direct reads are acceptable for quick one-off checks.
|
|
10
25
|
|
|
11
26
|
## Terminology
|
|
12
27
|
|
|
@@ -22,20 +37,28 @@ Use these terms consistently:
|
|
|
22
37
|
|
|
23
38
|
Keep tool-driven replies short and labeled.
|
|
24
39
|
|
|
40
|
+
- Always reply in the same language the user is using. Tool outputs are in English — translate status messages, labels, and explanations to the user's language.
|
|
41
|
+
- Never expose internal identifiers (`docId`, `chunkId`, UUID-based filenames, raw paths like `Default_Root/...`) in user-facing replies. These are for your internal reasoning and tool calls only. Cite sources using human-readable section or chapter names derived from the path.
|
|
25
42
|
- Reuse labels such as `Scope`, `Source`, `File`, `Chunks`, `Job ID`, and `Next` when relaying tool results.
|
|
26
43
|
- Prefer one short status line plus the key fields the user needs for the next step.
|
|
27
44
|
|
|
28
45
|
## When to use Knowhere
|
|
29
46
|
|
|
30
|
-
Use Knowhere
|
|
47
|
+
Use Knowhere whenever the task involves document content — whether the user mentions "Knowhere" or not:
|
|
48
|
+
|
|
49
|
+
- Read, summarize, analyze, compare, or quote from any supported file (PDF, DOCX, XLSX, PPTX, images, TXT, MD)
|
|
50
|
+
- Parse a document from a URL (pass the URL directly to `knowhere_ingest_document` — no local download needed)
|
|
51
|
+
- Answer questions that depend on document content ("what does section 3 say?", "find the revenue table", "compare these two reports")
|
|
52
|
+
- Inspect, browse, or search previously ingested documents
|
|
53
|
+
- Inspect ingest jobs or import a completed Knowhere job
|
|
54
|
+
- Preview, list, remove, or clear stored documents
|
|
55
|
+
- Understand what fields exist inside the stored result package
|
|
56
|
+
|
|
57
|
+
When the user provides a URL to a document (e.g., a link to a PDF, web page, or online report), use `knowhere_ingest_document` with the `url` parameter. Knowhere fetches it directly — no need to download the file locally first.
|
|
31
58
|
|
|
32
|
-
|
|
33
|
-
- inspect, summarize, or quote previously ingested documents
|
|
34
|
-
- inspect ingest jobs or import a completed Knowhere job
|
|
35
|
-
- preview, list, remove, or clear stored documents
|
|
36
|
-
- understand what fields exist inside the stored result package
|
|
59
|
+
Before calling `knowhere_ingest_document` for a file or URL that may already be available in the current scope, call `knowhere_list_documents` and compare `Source`, `File`, and `Title`. If a matching stored document already exists, reuse it instead of ingesting again unless the user explicitly asks for a fresh parse, different parsing settings, or overwrite behavior.
|
|
37
60
|
|
|
38
|
-
Do not assume an uploaded attachment was already ingested. If the user asks you to use an attached file
|
|
61
|
+
Do not assume an uploaded attachment was already ingested. If the user asks you to use an attached file, first check for an existing matching Knowhere result in the current scope. If no existing result clearly covers it, call `knowhere_ingest_document` yourself.
|
|
39
62
|
|
|
40
63
|
## Attachment markers
|
|
41
64
|
|
|
@@ -96,9 +119,9 @@ When `truncatedStrings: true` appears, retry with a higher value (e.g. 12000, up
|
|
|
96
119
|
|
|
97
120
|
## Tool selection
|
|
98
121
|
|
|
99
|
-
- `knowhere_ingest_document` for new local files or URLs
|
|
100
|
-
- `knowhere_list_documents` to find candidate document IDs in the current scope
|
|
101
|
-
- `knowhere_read_result_file` for `manifest.json`, `hierarchy.json`, `kb.csv`, table HTML files, or other
|
|
122
|
+
- `knowhere_ingest_document` for new local files or URLs after confirming the document is not already stored in the current scope, or when the user explicitly wants a fresh parse
|
|
123
|
+
- `knowhere_list_documents` to find candidate document IDs in the current scope and check whether the same document is already stored before a new ingest
|
|
124
|
+
- `knowhere_read_result_file` for `manifest.json`, `hierarchy.json`, `kb.csv`, table HTML files, image assets (e.g., `images/img-0.png`), or other files under `result/`. Image files are staged into an attachment-ready local path and returned with a `message` tool handoff instead of inline image bytes. When the result mode is `image_attachment`, never call `read` or any other file-reading tool on `data.stagedPath`; immediately call `message` with `data.sendWithMessageTool`.
|
|
102
125
|
- `knowhere_preview_document` for a quick overview: markdown preview plus the structural path tree (like a book index)
|
|
103
126
|
- `knowhere_grep` to search chunks with composable AND conditions across fields. This is the recommended default for text search — just pass `conditions: [{ pattern: "your query" }]` with no target to search all text fields at once. Use targeted conditions only when you need to narrow by specific fields like `chunk.type` or `chunk.path`.
|
|
104
127
|
- `knowhere_list_jobs`, `knowhere_get_job_status`, and `knowhere_import_completed_job` for async Knowhere jobs
|
|
@@ -108,23 +131,26 @@ After ingesting a document, use the returned document or job identifiers for fol
|
|
|
108
131
|
|
|
109
132
|
## Recommended workflow
|
|
110
133
|
|
|
111
|
-
1.
|
|
112
|
-
2.
|
|
113
|
-
3. Call `
|
|
114
|
-
4.
|
|
115
|
-
5.
|
|
116
|
-
6. Call `
|
|
134
|
+
1. If the document may already exist in the current scope, call `knowhere_list_documents` first and compare `Source`, `File`, and `Title` to find an existing match.
|
|
135
|
+
2. Ingest or import the document only if it is not already in the store, or if the user explicitly wants a fresh parse. After calling `knowhere_ingest_document`, you receive a job ID immediately while parsing continues in the background. Wait for the plugin's completion message that the document is ready before proceeding. If no completion message arrives, check with `knowhere_get_job_status`.
|
|
136
|
+
3. Call `knowhere_list_documents` again if you need to confirm the right `docId`.
|
|
137
|
+
4. Call `knowhere_preview_document` to get a structural overview (table of contents with summaries).
|
|
138
|
+
5. When you know what to search for, call `knowhere_grep` with `conditions: [{ pattern: "your query" }]` — this searches all text fields (content, summary, keywords, path) in one call. Add more conditions to narrow results (e.g. filter by `chunk.type` or `chunk.path`).
|
|
139
|
+
6. Call `knowhere_grep` with a path condition to narrow results to a specific branch when browsing by structure.
|
|
140
|
+
7. Call `knowhere_read_result_file` for `hierarchy.json`, `kb.csv`, table HTML, or image assets when the answer depends on parser rows, rich table structure, or visual content.
|
|
117
141
|
|
|
118
142
|
## Reasoning rules
|
|
119
143
|
|
|
120
144
|
- Prefer `knowhere_grep` for all text search. It supports composable AND conditions, regex, and normalizes HTML/LaTeX/unicode before matching. Use `knowhere_preview_document` when you need a quick overview and structural browsing by path.
|
|
121
145
|
- Use `knowhere_preview_document` before broad reads when the document is large or the relevant branch is unclear.
|
|
122
146
|
- Keep `path` in your reasoning and in your answer when possible. It restores section position and improves grounding.
|
|
123
|
-
-
|
|
124
|
-
- For image or table questions, inspect matching `image` or `table` chunks and the related manifest asset entries before answering.
|
|
147
|
+
- Use `chunkId` and `path` internally for your own reasoning and tool calls, but do not expose them to the user. When citing sources, use human-readable section names derived from the path (e.g., "第7章 维护、保养" instead of `Default_Root/f339a970...-->7 维护、保养`). Never show raw `docId`, `chunkId`, or internal file paths in user-facing replies.
|
|
148
|
+
- For image or table questions, inspect matching `image` or `table` chunks and the related manifest asset entries before answering. Use `knowhere_read_result_file` with the chunk's `assetFilePath` to prepare image assets for delivery, then use the returned `message` tool handoff when the user wants to see the image. Do not call `read` on the staged image path because it may live outside the agent sandbox.
|
|
125
149
|
- Do not rely on `full.md` alone if the question depends on exact section boundaries, tables, or images.
|
|
126
150
|
- If the task needs raw `kb.csv`, raw HTML tables, or another stored text file under `result/`, read it directly with `knowhere_read_result_file`.
|
|
127
151
|
- When a tool response contains `truncatedStrings: true`, retry with `maxStringChars: 12000` (or up to 20000) before answering from incomplete content.
|
|
152
|
+
- When `knowhere_grep` returns hints after the JSON (separated by `---`), follow the suggested next steps before answering. The hints guide you to refine your search iteratively.
|
|
153
|
+
- Iterate on grep: if 0 matches, broaden; if results are capped, add a path condition; if truncated, re-query fewer results with higher maxStringChars.
|
|
128
154
|
|
|
129
155
|
## Tool usage examples
|
|
130
156
|
|
|
@@ -229,6 +255,17 @@ Read a table HTML file:
|
|
|
229
255
|
}
|
|
230
256
|
```
|
|
231
257
|
|
|
258
|
+
Read an image asset:
|
|
259
|
+
|
|
260
|
+
```json
|
|
261
|
+
{
|
|
262
|
+
"docId": "handbook-1234",
|
|
263
|
+
"filePath": "images/img-0.png"
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
If the result returns `mode: "image_attachment"`, call `message` with `data.sendWithMessageTool` exactly as returned. Do not call `read` on `data.stagedPath`.
|
|
268
|
+
|
|
232
269
|
Example workflow for a question like `What does the handbook say about hotel limits?`:
|
|
233
270
|
|
|
234
271
|
1. Call `knowhere_list_documents` to find the right `docId` if you do not already have it.
|
package/dist/hooks.d.ts
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
|
|
2
|
-
import type { KnowhereAutoGroundingController, ResolvedKnowhereConfig } from "./types";
|
|
3
|
-
import { KnowhereStore } from "./store";
|
|
4
|
-
export declare function registerKnowhereAutoGrounding(params: {
|
|
5
|
-
api: OpenClawPluginApi;
|
|
6
|
-
config: ResolvedKnowhereConfig;
|
|
7
|
-
store: KnowhereStore;
|
|
8
|
-
}): KnowhereAutoGroundingController;
|