@ontos-ai/knowhere-claw 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/dist/connect-builder.d.ts +2 -0
- package/dist/connect-builder.js +9 -10
- package/dist/graph-builder.d.ts +4 -1
- package/dist/graph-builder.js +15 -10
- package/dist/index.js +1 -7
- package/dist/kg-service.js +8 -3
- package/dist/parser.d.ts +4 -8
- package/dist/parser.js +25 -243
- package/dist/store.d.ts +4 -14
- package/dist/store.js +21 -106
- package/dist/text.js +1 -13
- package/dist/tools.js +135 -879
- package/dist/types.d.ts +1 -58
- package/openclaw.plugin.json +71 -1
- package/package.json +1 -1
- package/skills/knowhere_memory/SKILL.md +80 -98
- package/skills/knowhere/SKILL.md +0 -280
- /package/dist/__tests__/{read-result-file-tool.test.d.ts → storage-layout.test.d.ts} +0 -0
package/dist/tools.js
CHANGED
|
@@ -1,29 +1,28 @@
|
|
|
1
1
|
import { isRecord } from "./types.js";
|
|
2
2
|
import { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, persistApiKey } from "./config.js";
|
|
3
|
-
import {
|
|
3
|
+
import { resolveStoredKnowhereArtifactPath } from "./parser.js";
|
|
4
|
+
import { sanitizeStringArray } from "./text.js";
|
|
4
5
|
import { formatErrorMessage } from "./error-message.js";
|
|
5
6
|
import { KnowhereClient } from "./client.js";
|
|
6
|
-
import { deliverChannelMessage } from "./channel-delivery.js";
|
|
7
7
|
import { sendTrackerProgress } from "./tracker-progress.js";
|
|
8
8
|
import fs from "node:fs/promises";
|
|
9
9
|
import path from "node:path";
|
|
10
|
-
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/core";
|
|
11
10
|
//#region src/tools.ts
|
|
11
|
+
const TERMINAL_JOB_STATUSES = new Set([
|
|
12
|
+
"cancelled",
|
|
13
|
+
"canceled",
|
|
14
|
+
"done",
|
|
15
|
+
"error",
|
|
16
|
+
"failed"
|
|
17
|
+
]);
|
|
12
18
|
async function buildKnowledgeGraphAsync(params) {
|
|
13
19
|
const docDir = path.join(params.scope.documentsDir, params.docId);
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
try {
|
|
17
|
-
payload = await fs.readFile(metadataPath, "utf-8");
|
|
18
|
-
} catch {
|
|
19
|
-
payload = null;
|
|
20
|
-
}
|
|
21
|
-
if (!payload) {
|
|
20
|
+
const payloadData = await params.store.readDocumentMetadata(params.scope, params.docId);
|
|
21
|
+
if (!payloadData) {
|
|
22
22
|
params.api.logger.warn(`knowhere: cannot build KG, metadata not found for docId=${params.docId}`);
|
|
23
23
|
return;
|
|
24
24
|
}
|
|
25
|
-
const
|
|
26
|
-
const fullMarkdownPath = path.join(docDir, "full_markdown.txt");
|
|
25
|
+
const fullMarkdownPath = await resolveStoredKnowhereArtifactPath(docDir, "full_markdown.txt");
|
|
27
26
|
let fullMarkdown = "";
|
|
28
27
|
try {
|
|
29
28
|
fullMarkdown = await fs.readFile(fullMarkdownPath, "utf-8");
|
|
@@ -37,8 +36,8 @@ async function buildKnowledgeGraphAsync(params) {
|
|
|
37
36
|
sourcePath: docDir,
|
|
38
37
|
keywords,
|
|
39
38
|
metadata: {
|
|
40
|
-
title: payloadData.document
|
|
41
|
-
sourceLabel: payloadData.document
|
|
39
|
+
title: payloadData.document.title || "Untitled",
|
|
40
|
+
sourceLabel: payloadData.document.sourceLabel || "Unknown",
|
|
42
41
|
checksum: params.documentPayload.downloadedResult.rawZipSha1,
|
|
43
42
|
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
44
43
|
}
|
|
@@ -48,7 +47,6 @@ async function buildKnowledgeGraphAsync(params) {
|
|
|
48
47
|
});
|
|
49
48
|
params.api.logger.info(`knowhere: knowledge graph build completed kbId=${params.kbId} docId=${params.docId}`);
|
|
50
49
|
}
|
|
51
|
-
const PREVIEW_SUMMARY_MAX_CHARS = 120;
|
|
52
50
|
const INGEST_TRACKER_LANGUAGES = new Set(["ch", "en"]);
|
|
53
51
|
function textResult(text) {
|
|
54
52
|
return {
|
|
@@ -69,13 +67,6 @@ function deriveStoredDocumentDisplayName(document) {
|
|
|
69
67
|
function deriveStoredDocumentFileLabel(document, manifestSourceFileName) {
|
|
70
68
|
return document.originalFileName || document.fileName || manifestSourceFileName || "unknown";
|
|
71
69
|
}
|
|
72
|
-
function formatStoredDocumentNotFound(docId, scopeLabel) {
|
|
73
|
-
return [
|
|
74
|
-
"Stored document not found.",
|
|
75
|
-
`Document ID: ${docId}`,
|
|
76
|
-
`Scope: ${scopeLabel}`
|
|
77
|
-
].join("\n");
|
|
78
|
-
}
|
|
79
70
|
function buildStoredDocumentSummaryLines(params) {
|
|
80
71
|
const lines = [`Stored document: "${params.document.title}" [${params.document.id}]`, `Scope: ${params.scopeLabel}`];
|
|
81
72
|
if (params.includeSource) lines.push(`Source: ${params.document.sourceLabel}`);
|
|
@@ -95,6 +86,69 @@ function buildStoredDocumentSummaryLines(params) {
|
|
|
95
86
|
if (params.includeUpdatedAt) lines.push(`Updated: ${params.document.updatedAt}`);
|
|
96
87
|
return lines;
|
|
97
88
|
}
|
|
89
|
+
function isTerminalJobStatus(status, hasError) {
|
|
90
|
+
return TERMINAL_JOB_STATUSES.has(status.trim().toLowerCase()) || hasError;
|
|
91
|
+
}
|
|
92
|
+
function startKnowledgeGraphBuild(params) {
|
|
93
|
+
if (!params.kgService.isEnabled()) return;
|
|
94
|
+
const kbId = params.kgService.resolveKbId(params.ctx);
|
|
95
|
+
if (!kbId) return;
|
|
96
|
+
params.api.logger.info(`knowhere: initiating knowledge graph build kbId=${kbId} docId=${params.document.id}`);
|
|
97
|
+
buildKnowledgeGraphAsync({
|
|
98
|
+
kgService: params.kgService,
|
|
99
|
+
kbId,
|
|
100
|
+
docId: params.document.id,
|
|
101
|
+
documentPayload: params.ingestResult,
|
|
102
|
+
scope: params.scope,
|
|
103
|
+
store: params.store,
|
|
104
|
+
ctx: params.ctx,
|
|
105
|
+
api: params.api,
|
|
106
|
+
channelRoute: params.channelRoute,
|
|
107
|
+
sessionKey: params.sessionKey
|
|
108
|
+
}).catch((kgError) => {
|
|
109
|
+
params.api.logger.error(`knowhere: knowledge graph build failed kbId=${kbId} docId=${params.document.id}: ${formatErrorMessage(kgError)}`);
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
async function persistIngestedDocument(params) {
|
|
113
|
+
const storedDocument = await params.store.saveDownloadedDocument(params.scope, {
|
|
114
|
+
sourceType: params.sourceType,
|
|
115
|
+
source: params.source,
|
|
116
|
+
fileName: params.fileName,
|
|
117
|
+
docId: params.docId,
|
|
118
|
+
title: params.title,
|
|
119
|
+
dataId: params.dataId,
|
|
120
|
+
tags: params.tags,
|
|
121
|
+
job: params.ingestResult.job,
|
|
122
|
+
jobResult: params.ingestResult.jobResult,
|
|
123
|
+
downloadedResult: params.ingestResult.downloadedResult
|
|
124
|
+
}, { overwrite: params.overwrite });
|
|
125
|
+
params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${params.scope.label} jobId=${params.ingestResult.job.job_id} docId=${storedDocument.id}`);
|
|
126
|
+
startKnowledgeGraphBuild({
|
|
127
|
+
api: params.api,
|
|
128
|
+
channelRoute: params.channelRoute,
|
|
129
|
+
ctx: params.ctx,
|
|
130
|
+
document: storedDocument,
|
|
131
|
+
ingestResult: params.ingestResult,
|
|
132
|
+
kgService: params.kgService,
|
|
133
|
+
scope: params.scope,
|
|
134
|
+
store: params.store,
|
|
135
|
+
sessionKey: params.sessionKey
|
|
136
|
+
});
|
|
137
|
+
return storedDocument;
|
|
138
|
+
}
|
|
139
|
+
function formatCompletedIngestResult(params) {
|
|
140
|
+
return [
|
|
141
|
+
"Ingest complete.",
|
|
142
|
+
...buildStoredDocumentSummaryLines({
|
|
143
|
+
document: params.document,
|
|
144
|
+
scopeLabel: params.scopeLabel,
|
|
145
|
+
includeJobId: true,
|
|
146
|
+
includeSource: true
|
|
147
|
+
}),
|
|
148
|
+
`Source type: ${params.sourceType}`,
|
|
149
|
+
"Next: use knowhere_kg_query to search the knowledge graph for related content."
|
|
150
|
+
].join("\n");
|
|
151
|
+
}
|
|
98
152
|
function readString(value) {
|
|
99
153
|
return typeof value === "string" && value.trim() ? value.trim() : void 0;
|
|
100
154
|
}
|
|
@@ -216,22 +270,6 @@ function normalizeParsingParams(rawParsing) {
|
|
|
216
270
|
}
|
|
217
271
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
218
272
|
}
|
|
219
|
-
function formatDocumentList(documents, scopeLabel) {
|
|
220
|
-
if (documents.length === 0) return `No stored documents in scope ${scopeLabel}.`;
|
|
221
|
-
return documents.map((document, index) => {
|
|
222
|
-
const displayTitle = deriveStoredDocumentDisplayName(document);
|
|
223
|
-
const lines = [
|
|
224
|
-
`${index + 1}. ${displayTitle} [${document.id}]`,
|
|
225
|
-
`Source: ${document.sourceLabel}`,
|
|
226
|
-
`Chunks: ${document.chunkCount}`,
|
|
227
|
-
`Updated: ${document.updatedAt || document.ingestedAt}`
|
|
228
|
-
];
|
|
229
|
-
if (document.title && document.title !== displayTitle) lines.push(`Title: ${document.title}`);
|
|
230
|
-
if (document.tags.length > 0) lines.push(`Tags: ${document.tags.join(", ")}`);
|
|
231
|
-
if (document.resultUrl) lines.push(`Result URL: ${document.resultUrl}`);
|
|
232
|
-
return lines.join("\n");
|
|
233
|
-
}).join("\n\n");
|
|
234
|
-
}
|
|
235
273
|
function formatJobList(params) {
|
|
236
274
|
if (params.jobList.jobs.length === 0) return `No Knowhere jobs on page ${params.jobList.page}.`;
|
|
237
275
|
const lines = [`Knowhere jobs ${params.jobList.page}/${params.jobList.totalPages || 1} (${params.jobList.total} total).`, ""];
|
|
@@ -266,239 +304,6 @@ function buildHistoryJobSourceLabel(jobId, fileName) {
|
|
|
266
304
|
function mergeTags(tags, extras) {
|
|
267
305
|
return Array.from(new Set([...tags, ...extras]));
|
|
268
306
|
}
|
|
269
|
-
function formatScopeClearResult(documents, scopeLabel) {
|
|
270
|
-
if (documents.length === 0) return `Scope ${scopeLabel} is already empty.`;
|
|
271
|
-
const lines = [`Removed ${documents.length} stored document${documents.length === 1 ? "" : "s"} from scope ${scopeLabel}.`];
|
|
272
|
-
for (const [index, document] of documents.entries()) lines.push(`${index + 1}. ${deriveStoredDocumentDisplayName(document)} [${document.id}]`);
|
|
273
|
-
return lines.join("\n");
|
|
274
|
-
}
|
|
275
|
-
function readResultFileReadMode(value) {
|
|
276
|
-
return value === "json" || value === "csv" || value === "text" ? value : "text";
|
|
277
|
-
}
|
|
278
|
-
function normalizeResultFilePath(filePath) {
|
|
279
|
-
const value = normalizeWhitespace(filePath);
|
|
280
|
-
if (!value) return;
|
|
281
|
-
return path.posix.normalize(value.replace(/\\/g, "/")).replace(/^\/+/, "");
|
|
282
|
-
}
|
|
283
|
-
function buildChunkOrderIndex(browseIndex) {
|
|
284
|
-
return new Map(browseIndex.chunkOrder.map((chunkId, index) => [chunkId, index]));
|
|
285
|
-
}
|
|
286
|
-
function sortChunksByBrowseOrder(chunks, browseIndex) {
|
|
287
|
-
const orderIndex = buildChunkOrderIndex(browseIndex);
|
|
288
|
-
return [...chunks].sort((left, right) => {
|
|
289
|
-
return (orderIndex.get(left.chunkId) ?? Number.MAX_SAFE_INTEGER) - (orderIndex.get(right.chunkId) ?? Number.MAX_SAFE_INTEGER) || left.chunkId.localeCompare(right.chunkId);
|
|
290
|
-
});
|
|
291
|
-
}
|
|
292
|
-
function findResultFile(browseIndex, relativePath) {
|
|
293
|
-
return browseIndex.resultFiles.find((entry) => entry.relativePath === relativePath);
|
|
294
|
-
}
|
|
295
|
-
function isTextReadableResultFile(fileRecord) {
|
|
296
|
-
return fileRecord.kind !== "image";
|
|
297
|
-
}
|
|
298
|
-
const IMAGE_EXTENSION_MIME_TYPES = {
|
|
299
|
-
".png": "image/png",
|
|
300
|
-
".jpg": "image/jpeg",
|
|
301
|
-
".jpeg": "image/jpeg",
|
|
302
|
-
".gif": "image/gif",
|
|
303
|
-
".webp": "image/webp",
|
|
304
|
-
".svg": "image/svg+xml",
|
|
305
|
-
".bmp": "image/bmp",
|
|
306
|
-
".tiff": "image/tiff",
|
|
307
|
-
".tif": "image/tiff"
|
|
308
|
-
};
|
|
309
|
-
function inferImageMimeType(filePath) {
|
|
310
|
-
return IMAGE_EXTENSION_MIME_TYPES[path.extname(filePath).toLowerCase()] || "image/png";
|
|
311
|
-
}
|
|
312
|
-
async function buildImageToolResult(params) {
|
|
313
|
-
const mimeType = inferImageMimeType(params.absolutePath);
|
|
314
|
-
const stagedImage = await stageImageResultFileForDelivery({
|
|
315
|
-
absolutePath: params.absolutePath,
|
|
316
|
-
documentTitle: params.documentTitle,
|
|
317
|
-
relativePath: params.filePath,
|
|
318
|
-
workspaceDir: params.workspaceDir
|
|
319
|
-
});
|
|
320
|
-
const stagedImagePath = stagedImage.stagedPath;
|
|
321
|
-
const fileName = path.basename(stagedImagePath);
|
|
322
|
-
const caption = `${params.documentTitle} - ${params.filePath}`;
|
|
323
|
-
const directDelivery = await deliverChannelMessage({
|
|
324
|
-
api: params.api,
|
|
325
|
-
operationLabel: "read result image",
|
|
326
|
-
context: params.context,
|
|
327
|
-
sessionKey: params.sessionKey,
|
|
328
|
-
channelRoute: params.channelRoute,
|
|
329
|
-
text: caption,
|
|
330
|
-
mediaUrl: stagedImagePath,
|
|
331
|
-
mediaLocalRoots: [path.dirname(stagedImagePath)]
|
|
332
|
-
});
|
|
333
|
-
if (directDelivery.delivered) {
|
|
334
|
-
const payload = {
|
|
335
|
-
scope: params.scopeLabel,
|
|
336
|
-
docId: params.docId,
|
|
337
|
-
documentTitle: params.documentTitle,
|
|
338
|
-
file: params.file,
|
|
339
|
-
mode: "image_sent",
|
|
340
|
-
data: {
|
|
341
|
-
mimeType,
|
|
342
|
-
sourceRelativePath: params.filePath,
|
|
343
|
-
stagedPath: stagedImagePath,
|
|
344
|
-
fileName,
|
|
345
|
-
caption,
|
|
346
|
-
note: "Image already sent to the current channel by the plugin. Do not call read on stagedPath. Do not call the message tool or attach this file again. If you reply, send only a brief confirmation.",
|
|
347
|
-
delivery: {
|
|
348
|
-
method: "direct_runtime",
|
|
349
|
-
surface: directDelivery.surface,
|
|
350
|
-
target: directDelivery.to,
|
|
351
|
-
accountId: directDelivery.accountId
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
};
|
|
355
|
-
return {
|
|
356
|
-
content: [{
|
|
357
|
-
type: "text",
|
|
358
|
-
text: `${JSON.stringify(payload, null, 2)}\n`
|
|
359
|
-
}],
|
|
360
|
-
details: payload
|
|
361
|
-
};
|
|
362
|
-
}
|
|
363
|
-
const sendWithMessageTool = {
|
|
364
|
-
action: "send",
|
|
365
|
-
path: stagedImagePath,
|
|
366
|
-
filePath: stagedImagePath,
|
|
367
|
-
filename: fileName,
|
|
368
|
-
caption
|
|
369
|
-
};
|
|
370
|
-
const replyFallback = stagedImage.workspaceRelativePath ? {
|
|
371
|
-
instructions: "If the message tool is unavailable, send your normal user-visible reply and include this exact line on its own line to attach the image.",
|
|
372
|
-
workspaceRelativePath: stagedImage.workspaceRelativePath,
|
|
373
|
-
replyWithMediaDirective: `MEDIA:${stagedImage.workspaceRelativePath}`
|
|
374
|
-
} : void 0;
|
|
375
|
-
const note = replyFallback ? "Image bytes are not inlined. Do not call read on stagedPath. If the user wants to see this image, use the message tool with sendWithMessageTool. If the message tool is unavailable, send your user-visible reply normally and include replyFallback.replyWithMediaDirective on its own line." : "Image bytes are not inlined. Do not call read on stagedPath. If the user wants to see this image, call the message tool with sendWithMessageTool.";
|
|
376
|
-
const nextActionInstructions = replyFallback ? "Do not call read on stagedPath. Call the message tool with sendWithMessageTool to attach this image. If the message tool is unavailable, use replyFallback.replyWithMediaDirective in your normal reply instead." : "Do not call read on stagedPath. Call the message tool with sendWithMessageTool to attach this image.";
|
|
377
|
-
const payload = {
|
|
378
|
-
scope: params.scopeLabel,
|
|
379
|
-
docId: params.docId,
|
|
380
|
-
documentTitle: params.documentTitle,
|
|
381
|
-
file: params.file,
|
|
382
|
-
mode: "image_attachment",
|
|
383
|
-
data: {
|
|
384
|
-
mimeType,
|
|
385
|
-
sourceRelativePath: params.filePath,
|
|
386
|
-
stagedPath: stagedImagePath,
|
|
387
|
-
fileName,
|
|
388
|
-
caption,
|
|
389
|
-
note,
|
|
390
|
-
nextAction: {
|
|
391
|
-
tool: "message",
|
|
392
|
-
instructions: nextActionInstructions,
|
|
393
|
-
args: sendWithMessageTool
|
|
394
|
-
},
|
|
395
|
-
sendWithMessageTool,
|
|
396
|
-
...replyFallback ? { replyFallback } : {}
|
|
397
|
-
}
|
|
398
|
-
};
|
|
399
|
-
return {
|
|
400
|
-
content: [{
|
|
401
|
-
type: "text",
|
|
402
|
-
text: `${JSON.stringify(payload, null, 2)}\n`
|
|
403
|
-
}],
|
|
404
|
-
details: payload
|
|
405
|
-
};
|
|
406
|
-
}
|
|
407
|
-
function normalizeWorkspaceDir(workspaceDir) {
|
|
408
|
-
const trimmed = readString(workspaceDir);
|
|
409
|
-
return trimmed ? path.resolve(trimmed) : void 0;
|
|
410
|
-
}
|
|
411
|
-
function toWorkspaceRelativeMediaPath(params) {
|
|
412
|
-
const relativePath = path.relative(params.workspaceDir, params.stagedPath);
|
|
413
|
-
if (!relativePath || relativePath.startsWith("..") || path.isAbsolute(relativePath)) return;
|
|
414
|
-
const normalizedRelativePath = relativePath.split(path.sep).join("/");
|
|
415
|
-
return normalizedRelativePath.startsWith("./") || normalizedRelativePath.startsWith("../") ? normalizedRelativePath : `./${normalizedRelativePath}`;
|
|
416
|
-
}
|
|
417
|
-
async function stageImageResultFileForDelivery(params) {
|
|
418
|
-
const extension = path.extname(params.relativePath) || path.extname(params.absolutePath) || ".png";
|
|
419
|
-
const imageBaseName = path.basename(params.relativePath, extension) || "image";
|
|
420
|
-
const workspaceDir = normalizeWorkspaceDir(params.workspaceDir);
|
|
421
|
-
let stagedDir;
|
|
422
|
-
if (workspaceDir) {
|
|
423
|
-
const workspaceStageRoot = path.join(workspaceDir, ".openclaw");
|
|
424
|
-
await fs.mkdir(workspaceStageRoot, { recursive: true });
|
|
425
|
-
stagedDir = await fs.mkdtemp(path.join(workspaceStageRoot, "knowhere-read-result-file-"));
|
|
426
|
-
} else stagedDir = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "knowhere-read-result-file-"));
|
|
427
|
-
const stagedFileName = `${slugify(`${params.documentTitle}-${imageBaseName}`, "knowhere-image")}${extension.toLowerCase()}`;
|
|
428
|
-
const stagedPath = path.join(stagedDir, stagedFileName);
|
|
429
|
-
await fs.copyFile(params.absolutePath, stagedPath);
|
|
430
|
-
return {
|
|
431
|
-
stagedPath,
|
|
432
|
-
...workspaceDir ? { workspaceRelativePath: toWorkspaceRelativeMediaPath({
|
|
433
|
-
workspaceDir,
|
|
434
|
-
stagedPath
|
|
435
|
-
}) } : {}
|
|
436
|
-
};
|
|
437
|
-
}
|
|
438
|
-
function stripUtf8Bom(text) {
|
|
439
|
-
return text.charCodeAt(0) === 65279 ? text.slice(1) : text;
|
|
440
|
-
}
|
|
441
|
-
function buildTextFilePayload(text, maxChars) {
|
|
442
|
-
return {
|
|
443
|
-
content: truncatePreview(text, maxChars),
|
|
444
|
-
lineCount: text === "" ? 0 : text.split(/\r\n|\n|\r/).length
|
|
445
|
-
};
|
|
446
|
-
}
|
|
447
|
-
function buildCsvFilePayload(text, maxChars) {
|
|
448
|
-
const normalized = stripUtf8Bom(text);
|
|
449
|
-
const [headerLine = ""] = normalized.split(/\r\n|\n|\r/, 1);
|
|
450
|
-
return {
|
|
451
|
-
content: truncatePreview(normalized, maxChars),
|
|
452
|
-
header: headerLine.trim() || null,
|
|
453
|
-
lineCount: normalized === "" ? 0 : normalized.split(/\r\n|\n|\r/).length
|
|
454
|
-
};
|
|
455
|
-
}
|
|
456
|
-
function truncatePreview(value, maxChars) {
|
|
457
|
-
if (typeof value !== "string") return "";
|
|
458
|
-
const normalized = value.replace(/\r\n/g, "\n").trim();
|
|
459
|
-
if (!normalized) return "";
|
|
460
|
-
if (normalized.length <= maxChars) return normalized;
|
|
461
|
-
return `${normalized.slice(0, Math.max(0, maxChars - 3)).trimEnd()}...`;
|
|
462
|
-
}
|
|
463
|
-
function truncateJsonValue(value, maxStringChars) {
|
|
464
|
-
if (typeof value === "string") {
|
|
465
|
-
if (value.length <= maxStringChars) return {
|
|
466
|
-
value,
|
|
467
|
-
truncated: false
|
|
468
|
-
};
|
|
469
|
-
return {
|
|
470
|
-
value: `${value.slice(0, Math.max(0, maxStringChars - 1))}…`,
|
|
471
|
-
truncated: true
|
|
472
|
-
};
|
|
473
|
-
}
|
|
474
|
-
if (Array.isArray(value)) {
|
|
475
|
-
let truncated = false;
|
|
476
|
-
return {
|
|
477
|
-
value: value.map((entry) => {
|
|
478
|
-
const result = truncateJsonValue(entry, maxStringChars);
|
|
479
|
-
truncated = truncated || result.truncated;
|
|
480
|
-
return result.value;
|
|
481
|
-
}),
|
|
482
|
-
truncated
|
|
483
|
-
};
|
|
484
|
-
}
|
|
485
|
-
if (isRecord(value)) {
|
|
486
|
-
let truncated = false;
|
|
487
|
-
const entries = Object.entries(value).map(([key, entry]) => {
|
|
488
|
-
const result = truncateJsonValue(entry, maxStringChars);
|
|
489
|
-
truncated = truncated || result.truncated;
|
|
490
|
-
return [key, result.value];
|
|
491
|
-
});
|
|
492
|
-
return {
|
|
493
|
-
value: Object.fromEntries(entries),
|
|
494
|
-
truncated
|
|
495
|
-
};
|
|
496
|
-
}
|
|
497
|
-
return {
|
|
498
|
-
value,
|
|
499
|
-
truncated: false
|
|
500
|
-
};
|
|
501
|
-
}
|
|
502
307
|
function formatJsonToolResult(value) {
|
|
503
308
|
return textResult(`${JSON.stringify(value, null, 2)}\n`);
|
|
504
309
|
}
|
|
@@ -531,7 +336,7 @@ function createIngestTool(params) {
|
|
|
531
336
|
return {
|
|
532
337
|
name: "knowhere_ingest_document",
|
|
533
338
|
label: "Knowhere Ingest",
|
|
534
|
-
description: "Parse a local file or remote URL with Knowhere and store the result in the current scope.
|
|
339
|
+
description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. When the user provides a URL to a document (PDF link, web page, etc.), pass it as the url parameter — Knowhere fetches it directly, no local download needed. Knowhere must be the only parser for supported files. If Knowhere returns an error, surface that exact error to the user and do not fall back to other parsing methods or fabricate a preview. By default blockUntilComplete is false, so this tool is fire-and-forget and returns a job ID while parsing continues in the background. Set blockUntilComplete to true only when the current turn explicitly needs the parsed result before continuing. Use lang to control the language of any user-facing background status update (`en` by default, `ch` for Chinese). Provide either filePath or url, not both.",
|
|
535
340
|
parameters: {
|
|
536
341
|
type: "object",
|
|
537
342
|
additionalProperties: false,
|
|
@@ -573,6 +378,10 @@ function createIngestTool(params) {
|
|
|
573
378
|
type: "boolean",
|
|
574
379
|
description: "Replace an existing stored document with the same docId."
|
|
575
380
|
},
|
|
381
|
+
blockUntilComplete: {
|
|
382
|
+
type: "boolean",
|
|
383
|
+
description: "When true, wait for Knowhere to finish parsing, store the result, and return a ready-to-use stored-document summary. Defaults to false, which returns immediately with a job ID and continues parsing in the background."
|
|
384
|
+
},
|
|
576
385
|
lang: {
|
|
577
386
|
type: "string",
|
|
578
387
|
description: "Language for any user-facing background status update sent after parsing completes or fails. Supports en and ch; unsupported values fall back to en."
|
|
@@ -628,13 +437,14 @@ function createIngestTool(params) {
|
|
|
628
437
|
filePath: resolvedFilePath,
|
|
629
438
|
url: urlParam
|
|
630
439
|
});
|
|
440
|
+
const blockUntilComplete = readBoolean(paramsRecord.blockUntilComplete, false);
|
|
631
441
|
const tags = sanitizeStringArray(paramsRecord.tags);
|
|
632
442
|
const overwrite = readBoolean(paramsRecord.overwrite, false);
|
|
633
443
|
const trackerLanguage = readIngestTrackerLanguage(paramsRecord.lang);
|
|
634
444
|
const sessionKey = params.ctx.sessionKey;
|
|
635
445
|
const sourceType = urlParam ? "url" : "file";
|
|
636
446
|
const channelRoute = await params.store.resolveChannelRoute({ sessionKey });
|
|
637
|
-
params.api.logger.info(`knowhere: knowhere_ingest_document starting
|
|
447
|
+
params.api.logger.info(`knowhere: knowhere_ingest_document starting ingest scope=${scope.label} sourceType=${sourceType} label=${JSON.stringify(progressLabel)} mode=${blockUntilComplete ? "blocking" : "background"} overwrite=${overwrite} docId=${docId ?? "auto"} dataId=${dataId ?? "none"} lang=${trackerLanguage} routeState=${channelRoute ? "resolved" : "missing"} routeAccountId=${channelRoute?.accountId ?? "none"}`);
|
|
638
448
|
let resolveJobCreated;
|
|
639
449
|
const jobCreatedPromise = new Promise((resolve) => {
|
|
640
450
|
resolveJobCreated = resolve;
|
|
@@ -653,40 +463,52 @@ function createIngestTool(params) {
|
|
|
653
463
|
resolveJobCreated(job);
|
|
654
464
|
}
|
|
655
465
|
});
|
|
466
|
+
if (blockUntilComplete) {
|
|
467
|
+
const ingestResult = await ingestPromise.catch(rethrowWithPaymentHint);
|
|
468
|
+
params.api.logger.info(`knowhere: knowhere_ingest_document download completed scope=${scope.label} jobId=${ingestResult.job.job_id}; storing extracted result`);
|
|
469
|
+
return textResult(formatCompletedIngestResult({
|
|
470
|
+
document: await persistIngestedDocument({
|
|
471
|
+
api: params.api,
|
|
472
|
+
channelRoute,
|
|
473
|
+
ctx: params.ctx,
|
|
474
|
+
dataId,
|
|
475
|
+
docId,
|
|
476
|
+
fileName,
|
|
477
|
+
ingestResult,
|
|
478
|
+
kgService: params.kgService,
|
|
479
|
+
overwrite,
|
|
480
|
+
scope,
|
|
481
|
+
sessionKey,
|
|
482
|
+
source: urlParam || resolvedFilePath || "",
|
|
483
|
+
sourceType,
|
|
484
|
+
store: params.store,
|
|
485
|
+
tags,
|
|
486
|
+
title
|
|
487
|
+
}),
|
|
488
|
+
scopeLabel: scope.label,
|
|
489
|
+
sourceType
|
|
490
|
+
}));
|
|
491
|
+
}
|
|
656
492
|
ingestPromise.then(async (ingestResult) => {
|
|
657
493
|
params.api.logger.info(`knowhere: knowhere_ingest_document download completed scope=${scope.label} jobId=${ingestResult.job.job_id}; storing extracted result`);
|
|
658
|
-
const storedDocument = await
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
docId,
|
|
663
|
-
title,
|
|
494
|
+
const storedDocument = await persistIngestedDocument({
|
|
495
|
+
api: params.api,
|
|
496
|
+
channelRoute,
|
|
497
|
+
ctx: params.ctx,
|
|
664
498
|
dataId,
|
|
499
|
+
docId,
|
|
500
|
+
fileName,
|
|
501
|
+
ingestResult,
|
|
502
|
+
kgService: params.kgService,
|
|
503
|
+
overwrite,
|
|
504
|
+
scope,
|
|
505
|
+
sessionKey,
|
|
506
|
+
source: urlParam || resolvedFilePath || "",
|
|
507
|
+
sourceType,
|
|
508
|
+
store: params.store,
|
|
665
509
|
tags,
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
downloadedResult: ingestResult.downloadedResult
|
|
669
|
-
}, { overwrite });
|
|
670
|
-
params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${scope.label} jobId=${ingestResult.job.job_id} docId=${storedDocument.id} label=${JSON.stringify(progressLabel)}`);
|
|
671
|
-
if (params.kgService.isEnabled()) {
|
|
672
|
-
const kbId = params.kgService.resolveKbId(params.ctx);
|
|
673
|
-
if (kbId) {
|
|
674
|
-
params.api.logger.info(`knowhere: initiating knowledge graph build kbId=${kbId} docId=${storedDocument.id}`);
|
|
675
|
-
buildKnowledgeGraphAsync({
|
|
676
|
-
kgService: params.kgService,
|
|
677
|
-
kbId,
|
|
678
|
-
docId: storedDocument.id,
|
|
679
|
-
documentPayload: ingestResult,
|
|
680
|
-
scope,
|
|
681
|
-
ctx: params.ctx,
|
|
682
|
-
api: params.api,
|
|
683
|
-
channelRoute,
|
|
684
|
-
sessionKey
|
|
685
|
-
}).catch((kgError) => {
|
|
686
|
-
params.api.logger.error(`knowhere: knowledge graph build failed kbId=${kbId} docId=${storedDocument.id}: ${formatErrorMessage(kgError)}`);
|
|
687
|
-
});
|
|
688
|
-
}
|
|
689
|
-
}
|
|
510
|
+
title
|
|
511
|
+
});
|
|
690
512
|
await notifyBackgroundIngestOutcome({
|
|
691
513
|
api: params.api,
|
|
692
514
|
context: params.ctx,
|
|
@@ -730,14 +552,14 @@ function createIngestTool(params) {
|
|
|
730
552
|
})]);
|
|
731
553
|
if (typeof createdJob === "symbol") {
|
|
732
554
|
params.api.logger.warn(`knowhere: knowhere_ingest_document ingest completed before job-created callback scope=${scope.label} label=${JSON.stringify(progressLabel)}`);
|
|
733
|
-
return textResult("Ingest completed synchronously.
|
|
555
|
+
return textResult("Ingest completed synchronously. The document is now stored and indexed.");
|
|
734
556
|
}
|
|
735
557
|
return textResult([
|
|
736
558
|
"Ingest job created. Parsing in background.",
|
|
737
559
|
`Job ID: ${createdJob.job_id}`,
|
|
738
560
|
`File: ${progressLabel}`,
|
|
739
561
|
`Scope: ${scope.label}`,
|
|
740
|
-
"
|
|
562
|
+
"This call does not include parsed content yet."
|
|
741
563
|
].join("\n"));
|
|
742
564
|
}
|
|
743
565
|
};
|
|
@@ -746,7 +568,7 @@ function createJobStatusTool(params) {
|
|
|
746
568
|
return {
|
|
747
569
|
name: "knowhere_get_job_status",
|
|
748
570
|
label: "Knowhere Job Status",
|
|
749
|
-
description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job.",
|
|
571
|
+
description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job. Do not assume a running job is stuck just because progress is unchanged or slow. Only treat the job as failed or stuck when Knowhere returns an explicit failure status or error code.",
|
|
750
572
|
parameters: {
|
|
751
573
|
type: "object",
|
|
752
574
|
additionalProperties: false,
|
|
@@ -798,6 +620,10 @@ function createJobStatusTool(params) {
|
|
|
798
620
|
lines.push(`Result URL: ${job.result_url}`);
|
|
799
621
|
if (job.result_url_expires_at) lines.push(`Result URL expires: ${job.result_url_expires_at}`);
|
|
800
622
|
}
|
|
623
|
+
const hasExplicitError = Boolean(job.error?.code || job.error?.message);
|
|
624
|
+
if (job.status.trim().toLowerCase() === "done") lines.push("Interpretation: completed.");
|
|
625
|
+
else if (isTerminalJobStatus(job.status, hasExplicitError)) lines.push("Interpretation: Knowhere reported an explicit failure. Surface this error to the user and do not fall back to other parsing methods.");
|
|
626
|
+
else lines.push("Interpretation: still running. Do not describe this job as stuck or failed unless a later Knowhere API response returns an explicit failure status or error code.");
|
|
801
627
|
if (matchingDocuments.length === 0) lines.push("Stored docs in scope: none");
|
|
802
628
|
else {
|
|
803
629
|
lines.push("Stored docs in scope:");
|
|
@@ -898,7 +724,7 @@ function createImportCompletedJobTool(params) {
|
|
|
898
724
|
return {
|
|
899
725
|
name: "knowhere_import_completed_job",
|
|
900
726
|
label: "Knowhere Import Completed Job",
|
|
901
|
-
description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally
|
|
727
|
+
description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally. Use knowhere_list_jobs to find available completed jobs.",
|
|
902
728
|
parameters: {
|
|
903
729
|
type: "object",
|
|
904
730
|
additionalProperties: false,
|
|
@@ -968,551 +794,11 @@ function createImportCompletedJobTool(params) {
|
|
|
968
794
|
}),
|
|
969
795
|
`Imported from job: ${importResult.jobResult.job_id}`,
|
|
970
796
|
`Source type: ${importResult.jobResult.source_type}`,
|
|
971
|
-
"
|
|
797
|
+
"Document imported successfully. Use knowhere_kg_query to search its content."
|
|
972
798
|
].join("\n"));
|
|
973
799
|
}
|
|
974
800
|
};
|
|
975
801
|
}
|
|
976
|
-
const GREP_VALID_TARGETS = new Set([
|
|
977
|
-
"chunk.content",
|
|
978
|
-
"chunk.summary",
|
|
979
|
-
"chunk.keywords",
|
|
980
|
-
"chunk.path",
|
|
981
|
-
"chunk.type",
|
|
982
|
-
"chunk.chunkId"
|
|
983
|
-
]);
|
|
984
|
-
const GREP_TEXT_TARGETS = [
|
|
985
|
-
"chunk.content",
|
|
986
|
-
"chunk.summary",
|
|
987
|
-
"chunk.keywords",
|
|
988
|
-
"chunk.path"
|
|
989
|
-
];
|
|
990
|
-
function parseGrepConditions(raw) {
|
|
991
|
-
if (!Array.isArray(raw)) return [];
|
|
992
|
-
return raw.filter(isRecord).map((entry) => {
|
|
993
|
-
const target = readString(entry.target);
|
|
994
|
-
if (target && !GREP_VALID_TARGETS.has(target)) throw new Error(`Invalid grep target: "${target}". Valid targets: ${[...GREP_VALID_TARGETS].join(", ")}`);
|
|
995
|
-
const pattern = typeof entry.pattern === "string" ? entry.pattern : "";
|
|
996
|
-
return {
|
|
997
|
-
...target ? { target } : {},
|
|
998
|
-
pattern,
|
|
999
|
-
...typeof entry.regex === "boolean" ? { regex: entry.regex } : {},
|
|
1000
|
-
...typeof entry.caseSensitive === "boolean" ? { caseSensitive: entry.caseSensitive } : {}
|
|
1001
|
-
};
|
|
1002
|
-
});
|
|
1003
|
-
}
|
|
1004
|
-
function resolveGrepFieldValue(chunk, target) {
|
|
1005
|
-
switch (target) {
|
|
1006
|
-
case "chunk.content": return chunk.content || "";
|
|
1007
|
-
case "chunk.summary": return chunk.summary || "";
|
|
1008
|
-
case "chunk.keywords": return chunk.keywords || [];
|
|
1009
|
-
case "chunk.path": return chunk.path || "";
|
|
1010
|
-
case "chunk.type": return chunk.type;
|
|
1011
|
-
case "chunk.chunkId": return chunk.chunkId;
|
|
1012
|
-
default: return "";
|
|
1013
|
-
}
|
|
1014
|
-
}
|
|
1015
|
-
function testGrepMatch(text, pattern, useRegex, caseSensitive) {
|
|
1016
|
-
if (pattern === "") return true;
|
|
1017
|
-
if (useRegex) {
|
|
1018
|
-
const flags = caseSensitive ? "" : "i";
|
|
1019
|
-
return new RegExp(pattern, flags).test(text);
|
|
1020
|
-
}
|
|
1021
|
-
if (caseSensitive) return text.includes(pattern);
|
|
1022
|
-
return text.toLowerCase().includes(pattern.toLowerCase());
|
|
1023
|
-
}
|
|
1024
|
-
function testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive) {
|
|
1025
|
-
const useRegex = condition.regex ?? outerRegex;
|
|
1026
|
-
const caseSensitive = condition.caseSensitive ?? outerCaseSensitive;
|
|
1027
|
-
const matchedTargets = [];
|
|
1028
|
-
const targets = condition.target ? [condition.target] : GREP_TEXT_TARGETS;
|
|
1029
|
-
for (const target of targets) {
|
|
1030
|
-
const fieldValue = target === "chunk.type" || target === "chunk.chunkId" ? resolveGrepFieldValue(chunk, target) : normalizedFields.get(target) ?? resolveGrepFieldValue(chunk, target);
|
|
1031
|
-
if (Array.isArray(fieldValue)) {
|
|
1032
|
-
if (fieldValue.some((entry) => testGrepMatch(entry, condition.pattern, useRegex, caseSensitive))) matchedTargets.push(target);
|
|
1033
|
-
} else if (testGrepMatch(fieldValue, condition.pattern, useRegex, caseSensitive)) matchedTargets.push(target);
|
|
1034
|
-
}
|
|
1035
|
-
return matchedTargets;
|
|
1036
|
-
}
|
|
1037
|
-
function buildNormalizedFields(chunk) {
|
|
1038
|
-
const fields = /* @__PURE__ */ new Map();
|
|
1039
|
-
fields.set("chunk.content", normalizeForGrep(chunk.content || ""));
|
|
1040
|
-
fields.set("chunk.summary", normalizeForGrep(chunk.summary || ""));
|
|
1041
|
-
fields.set("chunk.keywords", (chunk.keywords || []).map((k) => normalizeForGrep(k)));
|
|
1042
|
-
fields.set("chunk.path", normalizeForGrep(chunk.path || ""));
|
|
1043
|
-
return fields;
|
|
1044
|
-
}
|
|
1045
|
-
function buildGrepHints(params) {
|
|
1046
|
-
const hints = [];
|
|
1047
|
-
const maxHints = 3;
|
|
1048
|
-
if (params.totalMatches === 0 && params.conditionCount > 0) hints.push("No matches. Try broadening: remove a condition, use a shorter pattern, or check for typos. Call knowhere_preview_document to see the document structure first.");
|
|
1049
|
-
if (hints.length < maxHints && params.totalMatches > params.returned) {
|
|
1050
|
-
let hint = `Showing ${params.returned} of ${params.totalMatches} matches. Add another condition (e.g., target chunk.path to a specific section) to narrow results.`;
|
|
1051
|
-
if (!params.hasPathCondition) hint += " Use knowhere_preview_document to find section paths.";
|
|
1052
|
-
hints.push(hint);
|
|
1053
|
-
}
|
|
1054
|
-
if (hints.length < maxHints && params.truncatedStrings) if (params.returned > 3) hints.push(`Fields truncated at ${params.maxStringChars} chars. Reduce maxResults to 1-3 and increase maxStringChars to 12000-20000 for full content.`);
|
|
1055
|
-
else hints.push(`Fields truncated at ${params.maxStringChars} chars. Increase maxStringChars (up to 20000) for full content.`);
|
|
1056
|
-
if (hints.length < maxHints && params.totalMatches >= 1 && params.totalMatches <= 5 && !params.includeContext) hints.push("Tip: set includeContext=true to discover sibling chunks in the same section.");
|
|
1057
|
-
if (hints.length < maxHints && params.totalChunks > 0 && params.totalMatches > params.totalChunks * .5 && params.conditionCount <= 1) hints.push("Pattern matches over half the document. Add a second condition to narrow.");
|
|
1058
|
-
return hints;
|
|
1059
|
-
}
|
|
1060
|
-
function createGrepTool(params) {
|
|
1061
|
-
return {
|
|
1062
|
-
name: "knowhere_grep",
|
|
1063
|
-
label: "Knowhere Grep",
|
|
1064
|
-
description: "Search a stored document's chunks with composable AND conditions. Returns matching chunks with content, summary, keywords, path, and chunkId. Supports substring and regex matching with text normalization (HTML stripping, LaTeX cleanup, unicode normalization). Omit conditions to list all chunks. Omit the target field in a condition to search across all text fields (content, summary, keywords, path) — this is the recommended default. When answering questions from results, cite the chunkId and path. Tip: set maxStringChars up to 20000 when you need full untruncated content from a small number of results (e.g., maxResults=1). The default 4000 may truncate long chunks. Search strategy: (1) Start with knowhere_preview_document to see document structure. (2) Search broadly with a single short pattern, then narrow by adding conditions. (3) If zero results, broaden or try synonyms. If too many, add a path condition. (4) Once you find the right chunks, re-query with maxResults=1-3 and maxStringChars=12000-20000 to read full content.",
|
|
1065
|
-
parameters: {
|
|
1066
|
-
type: "object",
|
|
1067
|
-
additionalProperties: false,
|
|
1068
|
-
properties: {
|
|
1069
|
-
docId: {
|
|
1070
|
-
type: "string",
|
|
1071
|
-
description: "Identifier of the stored document to search."
|
|
1072
|
-
},
|
|
1073
|
-
conditions: {
|
|
1074
|
-
type: "array",
|
|
1075
|
-
items: {
|
|
1076
|
-
type: "object",
|
|
1077
|
-
additionalProperties: false,
|
|
1078
|
-
properties: {
|
|
1079
|
-
target: {
|
|
1080
|
-
type: "string",
|
|
1081
|
-
enum: [...GREP_VALID_TARGETS],
|
|
1082
|
-
description: "Chunk field to search. Omit to search all text fields (content, summary, keywords, path) — this is the recommended default."
|
|
1083
|
-
},
|
|
1084
|
-
pattern: {
|
|
1085
|
-
type: "string",
|
|
1086
|
-
description: "Search pattern. Empty string matches all."
|
|
1087
|
-
},
|
|
1088
|
-
regex: {
|
|
1089
|
-
type: "boolean",
|
|
1090
|
-
description: "Use regex matching for this condition. Overrides outer regex default."
|
|
1091
|
-
},
|
|
1092
|
-
caseSensitive: {
|
|
1093
|
-
type: "boolean",
|
|
1094
|
-
description: "Case-sensitive matching for this condition. Overrides outer default."
|
|
1095
|
-
}
|
|
1096
|
-
},
|
|
1097
|
-
required: ["pattern"]
|
|
1098
|
-
},
|
|
1099
|
-
description: "ANDed search conditions. Each condition must match for a chunk to be returned. Default [] matches all chunks (useful for browsing). Omit target in a condition to search all text fields. Use multiple conditions to narrow results (e.g., path contains 'chapter 3' AND content contains 'algorithm')."
|
|
1100
|
-
},
|
|
1101
|
-
regex: {
|
|
1102
|
-
type: "boolean",
|
|
1103
|
-
description: "Default regex mode for all conditions. Defaults to false."
|
|
1104
|
-
},
|
|
1105
|
-
caseSensitive: {
|
|
1106
|
-
type: "boolean",
|
|
1107
|
-
description: "Default case-sensitivity for all conditions. Defaults to false."
|
|
1108
|
-
},
|
|
1109
|
-
includeContext: {
|
|
1110
|
-
type: "boolean",
|
|
1111
|
-
description: "Include sibling chunk IDs sharing the same document path for each matched chunk. Useful for navigating to adjacent chunks in the same section — re-query with a condition on chunk.chunkId to fetch a specific sibling."
|
|
1112
|
-
},
|
|
1113
|
-
maxResults: {
|
|
1114
|
-
type: "integer",
|
|
1115
|
-
minimum: 1,
|
|
1116
|
-
maximum: 50,
|
|
1117
|
-
description: "Maximum number of matching chunks to return. Defaults to 10. Use a low value (1–3) with high maxStringChars to read specific chunks in full. Use a higher value (10–50) with lower maxStringChars to scan broadly."
|
|
1118
|
-
},
|
|
1119
|
-
maxStringChars: {
|
|
1120
|
-
type: "integer",
|
|
1121
|
-
minimum: 100,
|
|
1122
|
-
maximum: 2e4,
|
|
1123
|
-
description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 when retrieving full content from a small number of results (e.g., reading a single chunk in full). Reduce below 4000 when scanning many results to save tokens."
|
|
1124
|
-
}
|
|
1125
|
-
},
|
|
1126
|
-
required: ["docId"]
|
|
1127
|
-
},
|
|
1128
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1129
|
-
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1130
|
-
const docId = readString(paramsRecord.docId);
|
|
1131
|
-
if (!docId) throw new Error("docId is required.");
|
|
1132
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1133
|
-
const payload = await params.store.loadDocumentPayload(scope, docId);
|
|
1134
|
-
if (!payload) {
|
|
1135
|
-
params.api.logger.warn(`knowhere: knowhere_grep document not found scope=${scope.label} docId=${docId}`);
|
|
1136
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1137
|
-
}
|
|
1138
|
-
const conditions = parseGrepConditions(paramsRecord.conditions);
|
|
1139
|
-
const outerRegex = readBoolean(paramsRecord.regex, false);
|
|
1140
|
-
const outerCaseSensitive = readBoolean(paramsRecord.caseSensitive, false);
|
|
1141
|
-
const includeContext = readBoolean(paramsRecord.includeContext, false);
|
|
1142
|
-
const maxResults = Math.min(50, Math.max(1, Math.trunc(readNumber(paramsRecord.maxResults, 10))));
|
|
1143
|
-
const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
|
|
1144
|
-
params.api.logger.info(`knowhere: knowhere_grep searching document scope=${scope.label} docId=${docId} conditions=${conditions.length} regex=${outerRegex} caseSensitive=${outerCaseSensitive} includeContext=${includeContext} maxResults=${maxResults} maxStringChars=${maxStringChars}`);
|
|
1145
|
-
const pathChunkIndex = includeContext ? new Map(payload.browseIndex.paths.map((p) => [p.path, p.chunkIds])) : void 0;
|
|
1146
|
-
const sortedChunks = sortChunksByBrowseOrder(payload.chunks, payload.browseIndex);
|
|
1147
|
-
const results = [];
|
|
1148
|
-
for (const chunk of sortedChunks) {
|
|
1149
|
-
if (results.length >= maxResults) break;
|
|
1150
|
-
const normalizedFields = buildNormalizedFields(chunk);
|
|
1151
|
-
const allMatchedTargets = /* @__PURE__ */ new Set();
|
|
1152
|
-
let allConditionsPassed = true;
|
|
1153
|
-
for (const condition of conditions) {
|
|
1154
|
-
const matched = testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive);
|
|
1155
|
-
if (matched.length === 0) {
|
|
1156
|
-
allConditionsPassed = false;
|
|
1157
|
-
break;
|
|
1158
|
-
}
|
|
1159
|
-
for (const target of matched) allMatchedTargets.add(target);
|
|
1160
|
-
}
|
|
1161
|
-
if (!allConditionsPassed) continue;
|
|
1162
|
-
const entry = {
|
|
1163
|
-
chunk,
|
|
1164
|
-
matchedOn: [...allMatchedTargets]
|
|
1165
|
-
};
|
|
1166
|
-
if (includeContext && pathChunkIndex && chunk.path) {
|
|
1167
|
-
const siblings = pathChunkIndex.get(chunk.path);
|
|
1168
|
-
if (siblings) entry.siblingChunkIds = siblings.filter((id) => id !== chunk.chunkId);
|
|
1169
|
-
}
|
|
1170
|
-
results.push(entry);
|
|
1171
|
-
}
|
|
1172
|
-
let totalMatches = results.length;
|
|
1173
|
-
if (results.length >= maxResults) {
|
|
1174
|
-
const remainingChunks = sortedChunks.slice(sortedChunks.indexOf(results[results.length - 1].chunk) + 1);
|
|
1175
|
-
for (const chunk of remainingChunks) {
|
|
1176
|
-
const normalizedFields = buildNormalizedFields(chunk);
|
|
1177
|
-
let passed = true;
|
|
1178
|
-
for (const condition of conditions) if (testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive).length === 0) {
|
|
1179
|
-
passed = false;
|
|
1180
|
-
break;
|
|
1181
|
-
}
|
|
1182
|
-
if (passed) totalMatches++;
|
|
1183
|
-
}
|
|
1184
|
-
}
|
|
1185
|
-
const truncated = truncateJsonValue(results.map((entry) => {
|
|
1186
|
-
const projected = {
|
|
1187
|
-
chunkId: entry.chunk.chunkId,
|
|
1188
|
-
type: entry.chunk.type,
|
|
1189
|
-
path: entry.chunk.path,
|
|
1190
|
-
content: entry.chunk.content,
|
|
1191
|
-
summary: entry.chunk.summary,
|
|
1192
|
-
keywords: entry.chunk.keywords,
|
|
1193
|
-
tokens: entry.chunk.tokens,
|
|
1194
|
-
assetFilePath: entry.chunk.assetFilePath,
|
|
1195
|
-
matchedOn: entry.matchedOn
|
|
1196
|
-
};
|
|
1197
|
-
if (entry.siblingChunkIds) projected.siblingChunkIds = entry.siblingChunkIds;
|
|
1198
|
-
return projected;
|
|
1199
|
-
}), maxStringChars);
|
|
1200
|
-
params.api.logger.info(`knowhere: knowhere_grep completed search scope=${scope.label} docId=${docId} returned=${results.length} totalMatches=${totalMatches} truncated=${truncated.truncated}`);
|
|
1201
|
-
const hasPathCondition = conditions.some((c) => c.target === "chunk.path");
|
|
1202
|
-
const hints = buildGrepHints({
|
|
1203
|
-
totalMatches,
|
|
1204
|
-
returned: results.length,
|
|
1205
|
-
maxResults,
|
|
1206
|
-
maxStringChars,
|
|
1207
|
-
truncatedStrings: truncated.truncated,
|
|
1208
|
-
conditionCount: conditions.length,
|
|
1209
|
-
includeContext,
|
|
1210
|
-
totalChunks: sortedChunks.length,
|
|
1211
|
-
hasPathCondition
|
|
1212
|
-
});
|
|
1213
|
-
const jsonResult = formatJsonToolResult({
|
|
1214
|
-
totalMatches,
|
|
1215
|
-
returned: results.length,
|
|
1216
|
-
results: truncated.value,
|
|
1217
|
-
maxStringChars,
|
|
1218
|
-
truncatedStrings: truncated.truncated
|
|
1219
|
-
});
|
|
1220
|
-
if (hints.length === 0) return jsonResult;
|
|
1221
|
-
return textResult(`${jsonResult.content[0].text}\n---\n${hints.join("\n")}`);
|
|
1222
|
-
}
|
|
1223
|
-
};
|
|
1224
|
-
}
|
|
1225
|
-
function createReadResultFileTool(params) {
|
|
1226
|
-
return {
|
|
1227
|
-
name: "knowhere_read_result_file",
|
|
1228
|
-
label: "Knowhere Read Result File",
|
|
1229
|
-
description: "Read a raw result file from the stored document's extracted ZIP package. Common files: manifest.json (parsing metadata), hierarchy.json (document structure), kb.csv (knowledge base export), table HTML files (e.g., tables/table-1.html), or image assets (e.g., images/img-0.png). Image files are staged into a local attachment path and sent directly to the current channel when routing can be resolved. If direct delivery is unavailable, the tool returns a message-tool handoff and, when the run has a workspace, a workspace-relative MEDIA fallback for a normal assistant reply. When the result mode is image_attachment, do not call generic file-read tools on data.stagedPath; use data.sendWithMessageTool or data.replyFallback as returned. When the result mode is image_sent, the plugin already delivered the image. Use mode='json' for JSON files, mode='csv' for CSV files, or mode='text' (default) for everything else. Increase maxStringChars (up to 20000) for large files.",
|
|
1230
|
-
parameters: {
|
|
1231
|
-
type: "object",
|
|
1232
|
-
additionalProperties: false,
|
|
1233
|
-
properties: {
|
|
1234
|
-
docId: {
|
|
1235
|
-
type: "string",
|
|
1236
|
-
description: "Identifier of the stored document to read from."
|
|
1237
|
-
},
|
|
1238
|
-
filePath: {
|
|
1239
|
-
type: "string",
|
|
1240
|
-
description: "Relative path under the stored result directory, for example manifest.json or tables/table-1.html."
|
|
1241
|
-
},
|
|
1242
|
-
mode: {
|
|
1243
|
-
type: "string",
|
|
1244
|
-
enum: [
|
|
1245
|
-
"text",
|
|
1246
|
-
"json",
|
|
1247
|
-
"csv"
|
|
1248
|
-
],
|
|
1249
|
-
description: "text returns trimmed text, json parses JSON, and csv returns a raw CSV preview. Defaults to text."
|
|
1250
|
-
},
|
|
1251
|
-
maxStringChars: {
|
|
1252
|
-
type: "integer",
|
|
1253
|
-
minimum: 100,
|
|
1254
|
-
maximum: 2e4,
|
|
1255
|
-
description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 for large files like hierarchy.json or kb.csv."
|
|
1256
|
-
}
|
|
1257
|
-
},
|
|
1258
|
-
required: ["docId", "filePath"]
|
|
1259
|
-
},
|
|
1260
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1261
|
-
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1262
|
-
const docId = readString(paramsRecord.docId);
|
|
1263
|
-
const filePath = normalizeResultFilePath(paramsRecord.filePath);
|
|
1264
|
-
if (!docId) throw new Error("docId is required.");
|
|
1265
|
-
if (!filePath) throw new Error("filePath is required.");
|
|
1266
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1267
|
-
const payload = await params.store.loadDocumentPayload(scope, docId);
|
|
1268
|
-
if (!payload) {
|
|
1269
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file document not found scope=${scope.label} docId=${docId}`);
|
|
1270
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1271
|
-
}
|
|
1272
|
-
const resultFile = findResultFile(payload.browseIndex, filePath);
|
|
1273
|
-
if (!resultFile) {
|
|
1274
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file result file not found scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1275
|
-
return textResult([
|
|
1276
|
-
"Result file not found.",
|
|
1277
|
-
`File path: ${filePath}`,
|
|
1278
|
-
`Document ID: ${docId}`,
|
|
1279
|
-
`Scope: ${scope.label}`
|
|
1280
|
-
].join("\n"));
|
|
1281
|
-
}
|
|
1282
|
-
if (resultFile.kind === "image") {
|
|
1283
|
-
const absolutePath = params.store.getResultFileAbsolutePath(scope, docId, filePath);
|
|
1284
|
-
const channelRoute = await params.store.resolveChannelRoute({ sessionKey: params.ctx.sessionKey });
|
|
1285
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file staging image asset scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1286
|
-
return await buildImageToolResult({
|
|
1287
|
-
api: params.api,
|
|
1288
|
-
absolutePath,
|
|
1289
|
-
channelRoute,
|
|
1290
|
-
context: params.ctx,
|
|
1291
|
-
docId: payload.document.id,
|
|
1292
|
-
documentTitle: payload.document.title,
|
|
1293
|
-
filePath,
|
|
1294
|
-
file: resultFile,
|
|
1295
|
-
sessionKey: params.ctx.sessionKey,
|
|
1296
|
-
scopeLabel: scope.label,
|
|
1297
|
-
workspaceDir: params.ctx.workspaceDir
|
|
1298
|
-
});
|
|
1299
|
-
}
|
|
1300
|
-
if (!isTextReadableResultFile(resultFile)) {
|
|
1301
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file unreadable result kind scope=${scope.label} docId=${docId} filePath=${filePath} kind=${resultFile.kind}`);
|
|
1302
|
-
return textResult([
|
|
1303
|
-
"Result file is not readable as text through this tool.",
|
|
1304
|
-
`File path: ${filePath}`,
|
|
1305
|
-
`Kind: ${resultFile.kind}`,
|
|
1306
|
-
`Document ID: ${docId}`,
|
|
1307
|
-
`Scope: ${scope.label}`
|
|
1308
|
-
].join("\n"));
|
|
1309
|
-
}
|
|
1310
|
-
const storedFile = await params.store.readResultFile(scope, docId, filePath);
|
|
1311
|
-
if (!storedFile) {
|
|
1312
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file payload disappeared scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1313
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1314
|
-
}
|
|
1315
|
-
if (storedFile.text === null) {
|
|
1316
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file text content missing scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1317
|
-
return textResult([
|
|
1318
|
-
"Result file not found.",
|
|
1319
|
-
`File path: ${filePath}`,
|
|
1320
|
-
`Document ID: ${docId}`,
|
|
1321
|
-
`Scope: ${scope.label}`
|
|
1322
|
-
].join("\n"));
|
|
1323
|
-
}
|
|
1324
|
-
const mode = readResultFileReadMode(paramsRecord.mode);
|
|
1325
|
-
const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
|
|
1326
|
-
const normalizedText = stripUtf8Bom(storedFile.text);
|
|
1327
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file reading file scope=${scope.label} docId=${docId} filePath=${filePath} kind=${resultFile.kind} mode=${mode} maxStringChars=${maxStringChars}`);
|
|
1328
|
-
if (mode === "json") {
|
|
1329
|
-
let parsedJson;
|
|
1330
|
-
try {
|
|
1331
|
-
parsedJson = JSON.parse(normalizedText);
|
|
1332
|
-
} catch (error) {
|
|
1333
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file invalid json scope=${scope.label} docId=${docId} filePath=${filePath} error=${formatErrorMessage(error)}`);
|
|
1334
|
-
throw new Error(`Result file ${filePath} is not valid JSON. ${formatErrorMessage(error)}`, { cause: error });
|
|
1335
|
-
}
|
|
1336
|
-
const truncatedJson = truncateJsonValue(parsedJson, maxStringChars);
|
|
1337
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file parsed json scope=${scope.label} docId=${docId} filePath=${filePath} truncated=${truncatedJson.truncated}`);
|
|
1338
|
-
return formatJsonToolResult({
|
|
1339
|
-
scope: scope.label,
|
|
1340
|
-
docId: payload.document.id,
|
|
1341
|
-
documentTitle: payload.document.title,
|
|
1342
|
-
file: resultFile,
|
|
1343
|
-
mode,
|
|
1344
|
-
maxStringChars,
|
|
1345
|
-
truncatedStrings: truncatedJson.truncated,
|
|
1346
|
-
data: truncatedJson.value
|
|
1347
|
-
});
|
|
1348
|
-
}
|
|
1349
|
-
const data = mode === "csv" ? buildCsvFilePayload(normalizedText, maxStringChars) : buildTextFilePayload(normalizedText, maxStringChars);
|
|
1350
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file prepared text payload scope=${scope.label} docId=${docId} filePath=${filePath} lineCount=${data.lineCount}`);
|
|
1351
|
-
return formatJsonToolResult({
|
|
1352
|
-
scope: scope.label,
|
|
1353
|
-
docId: payload.document.id,
|
|
1354
|
-
documentTitle: payload.document.title,
|
|
1355
|
-
file: resultFile,
|
|
1356
|
-
mode,
|
|
1357
|
-
maxStringChars,
|
|
1358
|
-
data
|
|
1359
|
-
});
|
|
1360
|
-
}
|
|
1361
|
-
};
|
|
1362
|
-
}
|
|
1363
|
-
function createPreviewDocumentTool(params) {
|
|
1364
|
-
return {
|
|
1365
|
-
name: "knowhere_preview_document",
|
|
1366
|
-
label: "Knowhere Preview Document",
|
|
1367
|
-
description: "Get a structural overview of a stored Knowhere document. Returns the document metadata and a hierarchical table of contents showing sections, subsections, and chunk counts per path (text, image, table). Use this as the first step after identifying a docId to understand the document's structure before searching with knowhere_grep.",
|
|
1368
|
-
parameters: {
|
|
1369
|
-
type: "object",
|
|
1370
|
-
additionalProperties: false,
|
|
1371
|
-
properties: { docId: {
|
|
1372
|
-
type: "string",
|
|
1373
|
-
description: "Identifier of the stored document to preview."
|
|
1374
|
-
} },
|
|
1375
|
-
required: ["docId"]
|
|
1376
|
-
},
|
|
1377
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1378
|
-
const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
|
|
1379
|
-
if (!docId) throw new Error("docId is required.");
|
|
1380
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1381
|
-
const payload = await params.store.loadDocumentPayload(scope, docId);
|
|
1382
|
-
if (!payload) {
|
|
1383
|
-
params.api.logger.warn(`knowhere: knowhere_preview_document document not found scope=${scope.label} docId=${docId}`);
|
|
1384
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1385
|
-
}
|
|
1386
|
-
const { document } = payload;
|
|
1387
|
-
const pathSummaryMap = /* @__PURE__ */ new Map();
|
|
1388
|
-
for (const chunk of payload.chunks) {
|
|
1389
|
-
if (!chunk.path || pathSummaryMap.has(chunk.path)) continue;
|
|
1390
|
-
const raw = (chunk.summary || chunk.content || "").trim();
|
|
1391
|
-
if (raw) {
|
|
1392
|
-
const oneLine = raw.replace(/\n+/g, " ").slice(0, PREVIEW_SUMMARY_MAX_CHARS);
|
|
1393
|
-
pathSummaryMap.set(chunk.path, oneLine + (raw.length > PREVIEW_SUMMARY_MAX_CHARS ? "..." : ""));
|
|
1394
|
-
}
|
|
1395
|
-
}
|
|
1396
|
-
const lines = [
|
|
1397
|
-
`"${document.title}" [${document.id}]`,
|
|
1398
|
-
`Scope: ${scope.label}`,
|
|
1399
|
-
`Source: ${document.sourceLabel}`,
|
|
1400
|
-
`Chunks: ${document.chunkCount}`
|
|
1401
|
-
];
|
|
1402
|
-
const pathByName = /* @__PURE__ */ new Map();
|
|
1403
|
-
for (const p of payload.browseIndex.paths) pathByName.set(p.path, p);
|
|
1404
|
-
const roots = payload.browseIndex.paths.filter((p) => p.depth === 1);
|
|
1405
|
-
params.api.logger.info(`knowhere: knowhere_preview_document building preview scope=${scope.label} docId=${docId} paths=${payload.browseIndex.paths.length} roots=${roots.length} chunks=${payload.chunks.length}`);
|
|
1406
|
-
if (roots.length > 0) {
|
|
1407
|
-
lines.push("");
|
|
1408
|
-
lines.push("## Table of Contents");
|
|
1409
|
-
lines.push("");
|
|
1410
|
-
const renderTree = (pathRecord, indent) => {
|
|
1411
|
-
const prefix = " ".repeat(indent);
|
|
1412
|
-
const segments = pathRecord.path.split(/\/|-->/);
|
|
1413
|
-
const label = segments[segments.length - 1] || pathRecord.path;
|
|
1414
|
-
const counts = [];
|
|
1415
|
-
if (pathRecord.textChunkCount > 0) counts.push(`${pathRecord.textChunkCount} text`);
|
|
1416
|
-
if (pathRecord.imageChunkCount > 0) counts.push(`${pathRecord.imageChunkCount} img`);
|
|
1417
|
-
if (pathRecord.tableChunkCount > 0) counts.push(`${pathRecord.tableChunkCount} tbl`);
|
|
1418
|
-
const countStr = counts.length > 0 ? ` (${counts.join(", ")})` : "";
|
|
1419
|
-
const summary = pathSummaryMap.get(pathRecord.path);
|
|
1420
|
-
const summaryStr = summary ? ` — ${summary}` : "";
|
|
1421
|
-
lines.push(`${prefix}- ${label}${countStr}${summaryStr}`);
|
|
1422
|
-
for (const childPath of pathRecord.childPaths) {
|
|
1423
|
-
const child = pathByName.get(childPath);
|
|
1424
|
-
if (child) renderTree(child, indent + 1);
|
|
1425
|
-
}
|
|
1426
|
-
};
|
|
1427
|
-
for (const root of roots) renderTree(root, 0);
|
|
1428
|
-
} else {
|
|
1429
|
-
lines.push("");
|
|
1430
|
-
lines.push("No structural paths available for this document.");
|
|
1431
|
-
params.api.logger.warn(`knowhere: knowhere_preview_document no structural paths scope=${scope.label} docId=${docId}`);
|
|
1432
|
-
}
|
|
1433
|
-
return textResult(lines.join("\n"));
|
|
1434
|
-
}
|
|
1435
|
-
};
|
|
1436
|
-
}
|
|
1437
|
-
function createListTool(params) {
|
|
1438
|
-
return {
|
|
1439
|
-
name: "knowhere_list_documents",
|
|
1440
|
-
label: "Knowhere List",
|
|
1441
|
-
description: "List all Knowhere documents stored in the current scope. Returns each document's ID, title, source, chunk count, tags, and last-updated timestamp. Use this first to discover available documents, check whether a file or URL is already stored, and find the right docId before calling other tools.",
|
|
1442
|
-
parameters: {
|
|
1443
|
-
type: "object",
|
|
1444
|
-
additionalProperties: false,
|
|
1445
|
-
properties: {}
|
|
1446
|
-
},
|
|
1447
|
-
execute: async () => {
|
|
1448
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1449
|
-
const documents = await params.store.listDocuments(scope);
|
|
1450
|
-
params.api.logger.info(`knowhere: knowhere_list_documents listed documents scope=${scope.label} count=${documents.length}`);
|
|
1451
|
-
return textResult(formatDocumentList(documents, scope.label));
|
|
1452
|
-
}
|
|
1453
|
-
};
|
|
1454
|
-
}
|
|
1455
|
-
function createRemoveTool(params) {
|
|
1456
|
-
return {
|
|
1457
|
-
name: "knowhere_remove_document",
|
|
1458
|
-
label: "Knowhere Remove",
|
|
1459
|
-
description: "Remove a stored Knowhere document and all its extracted data from the current scope. This is irreversible — the document must be re-ingested or re-imported to restore it.",
|
|
1460
|
-
parameters: {
|
|
1461
|
-
type: "object",
|
|
1462
|
-
additionalProperties: false,
|
|
1463
|
-
properties: { docId: {
|
|
1464
|
-
type: "string",
|
|
1465
|
-
description: "Identifier of the stored document to remove."
|
|
1466
|
-
} },
|
|
1467
|
-
required: ["docId"]
|
|
1468
|
-
},
|
|
1469
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1470
|
-
const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
|
|
1471
|
-
if (!docId) throw new Error("docId is required.");
|
|
1472
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1473
|
-
params.api.logger.info(`knowhere: knowhere_remove_document removing document scope=${scope.label} docId=${docId}`);
|
|
1474
|
-
const removed = await params.store.removeDocument(scope, docId);
|
|
1475
|
-
if (!removed) {
|
|
1476
|
-
params.api.logger.warn(`knowhere: knowhere_remove_document document not found scope=${scope.label} docId=${docId}`);
|
|
1477
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1478
|
-
}
|
|
1479
|
-
params.api.logger.info(`knowhere: knowhere_remove_document removed document scope=${scope.label} docId=${removed.id}`);
|
|
1480
|
-
return textResult([
|
|
1481
|
-
"Removed stored document.",
|
|
1482
|
-
`Document ID: ${removed.id}`,
|
|
1483
|
-
`Title: ${removed.title}`,
|
|
1484
|
-
`Scope: ${scope.label}`
|
|
1485
|
-
].join("\n"));
|
|
1486
|
-
}
|
|
1487
|
-
};
|
|
1488
|
-
}
|
|
1489
|
-
function createClearScopeTool(params) {
|
|
1490
|
-
return {
|
|
1491
|
-
name: "knowhere_clear_scope",
|
|
1492
|
-
label: "Knowhere Clear Scope",
|
|
1493
|
-
description: "Remove all stored Knowhere documents from the current scope. This is irreversible and affects every document in the scope. Set confirm=true to execute.",
|
|
1494
|
-
parameters: {
|
|
1495
|
-
type: "object",
|
|
1496
|
-
additionalProperties: false,
|
|
1497
|
-
properties: { confirm: {
|
|
1498
|
-
type: "boolean",
|
|
1499
|
-
description: "Must be true to clear the current scope."
|
|
1500
|
-
} }
|
|
1501
|
-
},
|
|
1502
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1503
|
-
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1504
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1505
|
-
if (!readBoolean(paramsRecord.confirm, false)) {
|
|
1506
|
-
params.api.logger.warn(`knowhere: knowhere_clear_scope skipped without confirm scope=${scope.label}`);
|
|
1507
|
-
return textResult(`Set confirm=true to clear scope ${scope.label}.`);
|
|
1508
|
-
}
|
|
1509
|
-
params.api.logger.info(`knowhere: knowhere_clear_scope clearing scope scope=${scope.label}`);
|
|
1510
|
-
const removedDocuments = await params.store.clearScope(scope);
|
|
1511
|
-
params.api.logger.info(`knowhere: knowhere_clear_scope cleared scope scope=${scope.label} removed=${removedDocuments.length}`);
|
|
1512
|
-
return textResult(formatScopeClearResult(removedDocuments, scope.label));
|
|
1513
|
-
}
|
|
1514
|
-
};
|
|
1515
|
-
}
|
|
1516
802
|
function createSetApiKeyTool(params) {
|
|
1517
803
|
return {
|
|
1518
804
|
name: "knowhere_set_api_key",
|
|
@@ -1655,36 +941,6 @@ function createKnowhereToolFactory(params) {
|
|
|
1655
941
|
store: params.store,
|
|
1656
942
|
ctx
|
|
1657
943
|
}),
|
|
1658
|
-
createGrepTool({
|
|
1659
|
-
api: params.api,
|
|
1660
|
-
store: params.store,
|
|
1661
|
-
ctx
|
|
1662
|
-
}),
|
|
1663
|
-
createReadResultFileTool({
|
|
1664
|
-
api: params.api,
|
|
1665
|
-
store: params.store,
|
|
1666
|
-
ctx
|
|
1667
|
-
}),
|
|
1668
|
-
createPreviewDocumentTool({
|
|
1669
|
-
api: params.api,
|
|
1670
|
-
store: params.store,
|
|
1671
|
-
ctx
|
|
1672
|
-
}),
|
|
1673
|
-
createListTool({
|
|
1674
|
-
api: params.api,
|
|
1675
|
-
store: params.store,
|
|
1676
|
-
ctx
|
|
1677
|
-
}),
|
|
1678
|
-
createRemoveTool({
|
|
1679
|
-
api: params.api,
|
|
1680
|
-
store: params.store,
|
|
1681
|
-
ctx
|
|
1682
|
-
}),
|
|
1683
|
-
createClearScopeTool({
|
|
1684
|
-
api: params.api,
|
|
1685
|
-
store: params.store,
|
|
1686
|
-
ctx
|
|
1687
|
-
}),
|
|
1688
944
|
createSetApiKeyTool({
|
|
1689
945
|
api: params.api,
|
|
1690
946
|
config: params.config
|