@ontos-ai/knowhere-claw 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/client.js +1 -1
- package/dist/config.d.ts +8 -0
- package/dist/config.js +56 -8
- package/dist/connect-builder.d.ts +2 -0
- package/dist/connect-builder.js +9 -10
- package/dist/graph-builder.d.ts +4 -1
- package/dist/graph-builder.js +21 -34
- package/dist/index.js +3 -9
- package/dist/kg-service.d.ts +0 -2
- package/dist/kg-service.js +12 -45
- package/dist/parser.d.ts +4 -8
- package/dist/parser.js +25 -243
- package/dist/store.d.ts +4 -14
- package/dist/store.js +21 -106
- package/dist/text.js +1 -13
- package/dist/tools.js +413 -848
- package/dist/types.d.ts +1 -58
- package/openclaw.plugin.json +71 -1
- package/package.json +2 -3
- package/skills/knowhere_memory/SKILL.md +80 -98
- package/skills/knowhere/SKILL.md +0 -285
- /package/dist/__tests__/{read-result-file-tool.test.d.ts → storage-layout.test.d.ts} +0 -0
package/dist/tools.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import { isRecord } from "./types.js";
|
|
2
2
|
import { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, persistApiKey } from "./config.js";
|
|
3
|
-
import {
|
|
3
|
+
import { resolveStoredKnowhereArtifactPath } from "./parser.js";
|
|
4
|
+
import { sanitizeStringArray } from "./text.js";
|
|
4
5
|
import { formatErrorMessage } from "./error-message.js";
|
|
5
6
|
import { KnowhereClient } from "./client.js";
|
|
6
|
-
import { deliverChannelMessage } from "./channel-delivery.js";
|
|
7
7
|
import { sendTrackerProgress } from "./tracker-progress.js";
|
|
8
8
|
import fs from "node:fs/promises";
|
|
9
9
|
import path from "node:path";
|
|
10
|
-
import
|
|
10
|
+
import os from "node:os";
|
|
11
11
|
//#region src/tools.ts
|
|
12
12
|
const TERMINAL_JOB_STATUSES = new Set([
|
|
13
13
|
"cancelled",
|
|
@@ -18,19 +18,12 @@ const TERMINAL_JOB_STATUSES = new Set([
|
|
|
18
18
|
]);
|
|
19
19
|
async function buildKnowledgeGraphAsync(params) {
|
|
20
20
|
const docDir = path.join(params.scope.documentsDir, params.docId);
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
try {
|
|
24
|
-
payload = await fs.readFile(metadataPath, "utf-8");
|
|
25
|
-
} catch {
|
|
26
|
-
payload = null;
|
|
27
|
-
}
|
|
28
|
-
if (!payload) {
|
|
21
|
+
const payloadData = await params.store.readDocumentMetadata(params.scope, params.docId);
|
|
22
|
+
if (!payloadData) {
|
|
29
23
|
params.api.logger.warn(`knowhere: cannot build KG, metadata not found for docId=${params.docId}`);
|
|
30
24
|
return;
|
|
31
25
|
}
|
|
32
|
-
const
|
|
33
|
-
const fullMarkdownPath = path.join(docDir, "full_markdown.txt");
|
|
26
|
+
const fullMarkdownPath = await resolveStoredKnowhereArtifactPath(docDir, "full_markdown.txt");
|
|
34
27
|
let fullMarkdown = "";
|
|
35
28
|
try {
|
|
36
29
|
fullMarkdown = await fs.readFile(fullMarkdownPath, "utf-8");
|
|
@@ -44,8 +37,8 @@ async function buildKnowledgeGraphAsync(params) {
|
|
|
44
37
|
sourcePath: docDir,
|
|
45
38
|
keywords,
|
|
46
39
|
metadata: {
|
|
47
|
-
title: payloadData.document
|
|
48
|
-
sourceLabel: payloadData.document
|
|
40
|
+
title: payloadData.document.title || "Untitled",
|
|
41
|
+
sourceLabel: payloadData.document.sourceLabel || "Unknown",
|
|
49
42
|
checksum: params.documentPayload.downloadedResult.rawZipSha1,
|
|
50
43
|
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
51
44
|
}
|
|
@@ -55,7 +48,6 @@ async function buildKnowledgeGraphAsync(params) {
|
|
|
55
48
|
});
|
|
56
49
|
params.api.logger.info(`knowhere: knowledge graph build completed kbId=${params.kbId} docId=${params.docId}`);
|
|
57
50
|
}
|
|
58
|
-
const PREVIEW_SUMMARY_MAX_CHARS = 120;
|
|
59
51
|
const INGEST_TRACKER_LANGUAGES = new Set(["ch", "en"]);
|
|
60
52
|
function textResult(text) {
|
|
61
53
|
return {
|
|
@@ -76,13 +68,6 @@ function deriveStoredDocumentDisplayName(document) {
|
|
|
76
68
|
function deriveStoredDocumentFileLabel(document, manifestSourceFileName) {
|
|
77
69
|
return document.originalFileName || document.fileName || manifestSourceFileName || "unknown";
|
|
78
70
|
}
|
|
79
|
-
function formatStoredDocumentNotFound(docId, scopeLabel) {
|
|
80
|
-
return [
|
|
81
|
-
"Stored document not found.",
|
|
82
|
-
`Document ID: ${docId}`,
|
|
83
|
-
`Scope: ${scopeLabel}`
|
|
84
|
-
].join("\n");
|
|
85
|
-
}
|
|
86
71
|
function buildStoredDocumentSummaryLines(params) {
|
|
87
72
|
const lines = [`Stored document: "${params.document.title}" [${params.document.id}]`, `Scope: ${params.scopeLabel}`];
|
|
88
73
|
if (params.includeSource) lines.push(`Source: ${params.document.sourceLabel}`);
|
|
@@ -116,6 +101,7 @@ function startKnowledgeGraphBuild(params) {
|
|
|
116
101
|
docId: params.document.id,
|
|
117
102
|
documentPayload: params.ingestResult,
|
|
118
103
|
scope: params.scope,
|
|
104
|
+
store: params.store,
|
|
119
105
|
ctx: params.ctx,
|
|
120
106
|
api: params.api,
|
|
121
107
|
channelRoute: params.channelRoute,
|
|
@@ -146,6 +132,7 @@ async function persistIngestedDocument(params) {
|
|
|
146
132
|
ingestResult: params.ingestResult,
|
|
147
133
|
kgService: params.kgService,
|
|
148
134
|
scope: params.scope,
|
|
135
|
+
store: params.store,
|
|
149
136
|
sessionKey: params.sessionKey
|
|
150
137
|
});
|
|
151
138
|
return storedDocument;
|
|
@@ -160,7 +147,7 @@ function formatCompletedIngestResult(params) {
|
|
|
160
147
|
includeSource: true
|
|
161
148
|
}),
|
|
162
149
|
`Source type: ${params.sourceType}`,
|
|
163
|
-
"Next: use
|
|
150
|
+
"Next: use knowhere_kg_query to search the knowledge graph for related content."
|
|
164
151
|
].join("\n");
|
|
165
152
|
}
|
|
166
153
|
function readString(value) {
|
|
@@ -284,22 +271,6 @@ function normalizeParsingParams(rawParsing) {
|
|
|
284
271
|
}
|
|
285
272
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
286
273
|
}
|
|
287
|
-
function formatDocumentList(documents, scopeLabel) {
|
|
288
|
-
if (documents.length === 0) return `No stored documents in scope ${scopeLabel}.`;
|
|
289
|
-
return documents.map((document, index) => {
|
|
290
|
-
const displayTitle = deriveStoredDocumentDisplayName(document);
|
|
291
|
-
const lines = [
|
|
292
|
-
`${index + 1}. ${displayTitle} [${document.id}]`,
|
|
293
|
-
`Source: ${document.sourceLabel}`,
|
|
294
|
-
`Chunks: ${document.chunkCount}`,
|
|
295
|
-
`Updated: ${document.updatedAt || document.ingestedAt}`
|
|
296
|
-
];
|
|
297
|
-
if (document.title && document.title !== displayTitle) lines.push(`Title: ${document.title}`);
|
|
298
|
-
if (document.tags.length > 0) lines.push(`Tags: ${document.tags.join(", ")}`);
|
|
299
|
-
if (document.resultUrl) lines.push(`Result URL: ${document.resultUrl}`);
|
|
300
|
-
return lines.join("\n");
|
|
301
|
-
}).join("\n\n");
|
|
302
|
-
}
|
|
303
274
|
function formatJobList(params) {
|
|
304
275
|
if (params.jobList.jobs.length === 0) return `No Knowhere jobs on page ${params.jobList.page}.`;
|
|
305
276
|
const lines = [`Knowhere jobs ${params.jobList.page}/${params.jobList.totalPages || 1} (${params.jobList.total} total).`, ""];
|
|
@@ -334,239 +305,6 @@ function buildHistoryJobSourceLabel(jobId, fileName) {
|
|
|
334
305
|
function mergeTags(tags, extras) {
|
|
335
306
|
return Array.from(new Set([...tags, ...extras]));
|
|
336
307
|
}
|
|
337
|
-
function formatScopeClearResult(documents, scopeLabel) {
|
|
338
|
-
if (documents.length === 0) return `Scope ${scopeLabel} is already empty.`;
|
|
339
|
-
const lines = [`Removed ${documents.length} stored document${documents.length === 1 ? "" : "s"} from scope ${scopeLabel}.`];
|
|
340
|
-
for (const [index, document] of documents.entries()) lines.push(`${index + 1}. ${deriveStoredDocumentDisplayName(document)} [${document.id}]`);
|
|
341
|
-
return lines.join("\n");
|
|
342
|
-
}
|
|
343
|
-
function readResultFileReadMode(value) {
|
|
344
|
-
return value === "json" || value === "csv" || value === "text" ? value : "text";
|
|
345
|
-
}
|
|
346
|
-
function normalizeResultFilePath(filePath) {
|
|
347
|
-
const value = normalizeWhitespace(filePath);
|
|
348
|
-
if (!value) return;
|
|
349
|
-
return path.posix.normalize(value.replace(/\\/g, "/")).replace(/^\/+/, "");
|
|
350
|
-
}
|
|
351
|
-
function buildChunkOrderIndex(browseIndex) {
|
|
352
|
-
return new Map(browseIndex.chunkOrder.map((chunkId, index) => [chunkId, index]));
|
|
353
|
-
}
|
|
354
|
-
function sortChunksByBrowseOrder(chunks, browseIndex) {
|
|
355
|
-
const orderIndex = buildChunkOrderIndex(browseIndex);
|
|
356
|
-
return [...chunks].sort((left, right) => {
|
|
357
|
-
return (orderIndex.get(left.chunkId) ?? Number.MAX_SAFE_INTEGER) - (orderIndex.get(right.chunkId) ?? Number.MAX_SAFE_INTEGER) || left.chunkId.localeCompare(right.chunkId);
|
|
358
|
-
});
|
|
359
|
-
}
|
|
360
|
-
function findResultFile(browseIndex, relativePath) {
|
|
361
|
-
return browseIndex.resultFiles.find((entry) => entry.relativePath === relativePath);
|
|
362
|
-
}
|
|
363
|
-
function isTextReadableResultFile(fileRecord) {
|
|
364
|
-
return fileRecord.kind !== "image";
|
|
365
|
-
}
|
|
366
|
-
const IMAGE_EXTENSION_MIME_TYPES = {
|
|
367
|
-
".png": "image/png",
|
|
368
|
-
".jpg": "image/jpeg",
|
|
369
|
-
".jpeg": "image/jpeg",
|
|
370
|
-
".gif": "image/gif",
|
|
371
|
-
".webp": "image/webp",
|
|
372
|
-
".svg": "image/svg+xml",
|
|
373
|
-
".bmp": "image/bmp",
|
|
374
|
-
".tiff": "image/tiff",
|
|
375
|
-
".tif": "image/tiff"
|
|
376
|
-
};
|
|
377
|
-
function inferImageMimeType(filePath) {
|
|
378
|
-
return IMAGE_EXTENSION_MIME_TYPES[path.extname(filePath).toLowerCase()] || "image/png";
|
|
379
|
-
}
|
|
380
|
-
async function buildImageToolResult(params) {
|
|
381
|
-
const mimeType = inferImageMimeType(params.absolutePath);
|
|
382
|
-
const stagedImage = await stageImageResultFileForDelivery({
|
|
383
|
-
absolutePath: params.absolutePath,
|
|
384
|
-
documentTitle: params.documentTitle,
|
|
385
|
-
relativePath: params.filePath,
|
|
386
|
-
workspaceDir: params.workspaceDir
|
|
387
|
-
});
|
|
388
|
-
const stagedImagePath = stagedImage.stagedPath;
|
|
389
|
-
const fileName = path.basename(stagedImagePath);
|
|
390
|
-
const caption = `${params.documentTitle} - ${params.filePath}`;
|
|
391
|
-
const directDelivery = await deliverChannelMessage({
|
|
392
|
-
api: params.api,
|
|
393
|
-
operationLabel: "read result image",
|
|
394
|
-
context: params.context,
|
|
395
|
-
sessionKey: params.sessionKey,
|
|
396
|
-
channelRoute: params.channelRoute,
|
|
397
|
-
text: caption,
|
|
398
|
-
mediaUrl: stagedImagePath,
|
|
399
|
-
mediaLocalRoots: [path.dirname(stagedImagePath)]
|
|
400
|
-
});
|
|
401
|
-
if (directDelivery.delivered) {
|
|
402
|
-
const payload = {
|
|
403
|
-
scope: params.scopeLabel,
|
|
404
|
-
docId: params.docId,
|
|
405
|
-
documentTitle: params.documentTitle,
|
|
406
|
-
file: params.file,
|
|
407
|
-
mode: "image_sent",
|
|
408
|
-
data: {
|
|
409
|
-
mimeType,
|
|
410
|
-
sourceRelativePath: params.filePath,
|
|
411
|
-
stagedPath: stagedImagePath,
|
|
412
|
-
fileName,
|
|
413
|
-
caption,
|
|
414
|
-
note: "Image already sent to the current channel by the plugin. Do not call read on stagedPath. Do not call the message tool or attach this file again. If you reply, send only a brief confirmation.",
|
|
415
|
-
delivery: {
|
|
416
|
-
method: "direct_runtime",
|
|
417
|
-
surface: directDelivery.surface,
|
|
418
|
-
target: directDelivery.to,
|
|
419
|
-
accountId: directDelivery.accountId
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
};
|
|
423
|
-
return {
|
|
424
|
-
content: [{
|
|
425
|
-
type: "text",
|
|
426
|
-
text: `${JSON.stringify(payload, null, 2)}\n`
|
|
427
|
-
}],
|
|
428
|
-
details: payload
|
|
429
|
-
};
|
|
430
|
-
}
|
|
431
|
-
const sendWithMessageTool = {
|
|
432
|
-
action: "send",
|
|
433
|
-
path: stagedImagePath,
|
|
434
|
-
filePath: stagedImagePath,
|
|
435
|
-
filename: fileName,
|
|
436
|
-
caption
|
|
437
|
-
};
|
|
438
|
-
const replyFallback = stagedImage.workspaceRelativePath ? {
|
|
439
|
-
instructions: "If the message tool is unavailable, send your normal user-visible reply and include this exact line on its own line to attach the image.",
|
|
440
|
-
workspaceRelativePath: stagedImage.workspaceRelativePath,
|
|
441
|
-
replyWithMediaDirective: `MEDIA:${stagedImage.workspaceRelativePath}`
|
|
442
|
-
} : void 0;
|
|
443
|
-
const note = replyFallback ? "Image bytes are not inlined. Do not call read on stagedPath. If the user wants to see this image, use the message tool with sendWithMessageTool. If the message tool is unavailable, send your user-visible reply normally and include replyFallback.replyWithMediaDirective on its own line." : "Image bytes are not inlined. Do not call read on stagedPath. If the user wants to see this image, call the message tool with sendWithMessageTool.";
|
|
444
|
-
const nextActionInstructions = replyFallback ? "Do not call read on stagedPath. Call the message tool with sendWithMessageTool to attach this image. If the message tool is unavailable, use replyFallback.replyWithMediaDirective in your normal reply instead." : "Do not call read on stagedPath. Call the message tool with sendWithMessageTool to attach this image.";
|
|
445
|
-
const payload = {
|
|
446
|
-
scope: params.scopeLabel,
|
|
447
|
-
docId: params.docId,
|
|
448
|
-
documentTitle: params.documentTitle,
|
|
449
|
-
file: params.file,
|
|
450
|
-
mode: "image_attachment",
|
|
451
|
-
data: {
|
|
452
|
-
mimeType,
|
|
453
|
-
sourceRelativePath: params.filePath,
|
|
454
|
-
stagedPath: stagedImagePath,
|
|
455
|
-
fileName,
|
|
456
|
-
caption,
|
|
457
|
-
note,
|
|
458
|
-
nextAction: {
|
|
459
|
-
tool: "message",
|
|
460
|
-
instructions: nextActionInstructions,
|
|
461
|
-
args: sendWithMessageTool
|
|
462
|
-
},
|
|
463
|
-
sendWithMessageTool,
|
|
464
|
-
...replyFallback ? { replyFallback } : {}
|
|
465
|
-
}
|
|
466
|
-
};
|
|
467
|
-
return {
|
|
468
|
-
content: [{
|
|
469
|
-
type: "text",
|
|
470
|
-
text: `${JSON.stringify(payload, null, 2)}\n`
|
|
471
|
-
}],
|
|
472
|
-
details: payload
|
|
473
|
-
};
|
|
474
|
-
}
|
|
475
|
-
function normalizeWorkspaceDir(workspaceDir) {
|
|
476
|
-
const trimmed = readString(workspaceDir);
|
|
477
|
-
return trimmed ? path.resolve(trimmed) : void 0;
|
|
478
|
-
}
|
|
479
|
-
function toWorkspaceRelativeMediaPath(params) {
|
|
480
|
-
const relativePath = path.relative(params.workspaceDir, params.stagedPath);
|
|
481
|
-
if (!relativePath || relativePath.startsWith("..") || path.isAbsolute(relativePath)) return;
|
|
482
|
-
const normalizedRelativePath = relativePath.split(path.sep).join("/");
|
|
483
|
-
return normalizedRelativePath.startsWith("./") || normalizedRelativePath.startsWith("../") ? normalizedRelativePath : `./${normalizedRelativePath}`;
|
|
484
|
-
}
|
|
485
|
-
async function stageImageResultFileForDelivery(params) {
|
|
486
|
-
const extension = path.extname(params.relativePath) || path.extname(params.absolutePath) || ".png";
|
|
487
|
-
const imageBaseName = path.basename(params.relativePath, extension) || "image";
|
|
488
|
-
const workspaceDir = normalizeWorkspaceDir(params.workspaceDir);
|
|
489
|
-
let stagedDir;
|
|
490
|
-
if (workspaceDir) {
|
|
491
|
-
const workspaceStageRoot = path.join(workspaceDir, ".openclaw");
|
|
492
|
-
await fs.mkdir(workspaceStageRoot, { recursive: true });
|
|
493
|
-
stagedDir = await fs.mkdtemp(path.join(workspaceStageRoot, "knowhere-read-result-file-"));
|
|
494
|
-
} else stagedDir = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "knowhere-read-result-file-"));
|
|
495
|
-
const stagedFileName = `${slugify(`${params.documentTitle}-${imageBaseName}`, "knowhere-image")}${extension.toLowerCase()}`;
|
|
496
|
-
const stagedPath = path.join(stagedDir, stagedFileName);
|
|
497
|
-
await fs.copyFile(params.absolutePath, stagedPath);
|
|
498
|
-
return {
|
|
499
|
-
stagedPath,
|
|
500
|
-
...workspaceDir ? { workspaceRelativePath: toWorkspaceRelativeMediaPath({
|
|
501
|
-
workspaceDir,
|
|
502
|
-
stagedPath
|
|
503
|
-
}) } : {}
|
|
504
|
-
};
|
|
505
|
-
}
|
|
506
|
-
function stripUtf8Bom(text) {
|
|
507
|
-
return text.charCodeAt(0) === 65279 ? text.slice(1) : text;
|
|
508
|
-
}
|
|
509
|
-
function buildTextFilePayload(text, maxChars) {
|
|
510
|
-
return {
|
|
511
|
-
content: truncatePreview(text, maxChars),
|
|
512
|
-
lineCount: text === "" ? 0 : text.split(/\r\n|\n|\r/).length
|
|
513
|
-
};
|
|
514
|
-
}
|
|
515
|
-
function buildCsvFilePayload(text, maxChars) {
|
|
516
|
-
const normalized = stripUtf8Bom(text);
|
|
517
|
-
const [headerLine = ""] = normalized.split(/\r\n|\n|\r/, 1);
|
|
518
|
-
return {
|
|
519
|
-
content: truncatePreview(normalized, maxChars),
|
|
520
|
-
header: headerLine.trim() || null,
|
|
521
|
-
lineCount: normalized === "" ? 0 : normalized.split(/\r\n|\n|\r/).length
|
|
522
|
-
};
|
|
523
|
-
}
|
|
524
|
-
function truncatePreview(value, maxChars) {
|
|
525
|
-
if (typeof value !== "string") return "";
|
|
526
|
-
const normalized = value.replace(/\r\n/g, "\n").trim();
|
|
527
|
-
if (!normalized) return "";
|
|
528
|
-
if (normalized.length <= maxChars) return normalized;
|
|
529
|
-
return `${normalized.slice(0, Math.max(0, maxChars - 3)).trimEnd()}...`;
|
|
530
|
-
}
|
|
531
|
-
function truncateJsonValue(value, maxStringChars) {
|
|
532
|
-
if (typeof value === "string") {
|
|
533
|
-
if (value.length <= maxStringChars) return {
|
|
534
|
-
value,
|
|
535
|
-
truncated: false
|
|
536
|
-
};
|
|
537
|
-
return {
|
|
538
|
-
value: `${value.slice(0, Math.max(0, maxStringChars - 1))}…`,
|
|
539
|
-
truncated: true
|
|
540
|
-
};
|
|
541
|
-
}
|
|
542
|
-
if (Array.isArray(value)) {
|
|
543
|
-
let truncated = false;
|
|
544
|
-
return {
|
|
545
|
-
value: value.map((entry) => {
|
|
546
|
-
const result = truncateJsonValue(entry, maxStringChars);
|
|
547
|
-
truncated = truncated || result.truncated;
|
|
548
|
-
return result.value;
|
|
549
|
-
}),
|
|
550
|
-
truncated
|
|
551
|
-
};
|
|
552
|
-
}
|
|
553
|
-
if (isRecord(value)) {
|
|
554
|
-
let truncated = false;
|
|
555
|
-
const entries = Object.entries(value).map(([key, entry]) => {
|
|
556
|
-
const result = truncateJsonValue(entry, maxStringChars);
|
|
557
|
-
truncated = truncated || result.truncated;
|
|
558
|
-
return [key, result.value];
|
|
559
|
-
});
|
|
560
|
-
return {
|
|
561
|
-
value: Object.fromEntries(entries),
|
|
562
|
-
truncated
|
|
563
|
-
};
|
|
564
|
-
}
|
|
565
|
-
return {
|
|
566
|
-
value,
|
|
567
|
-
truncated: false
|
|
568
|
-
};
|
|
569
|
-
}
|
|
570
308
|
function formatJsonToolResult(value) {
|
|
571
309
|
return textResult(`${JSON.stringify(value, null, 2)}\n`);
|
|
572
310
|
}
|
|
@@ -599,7 +337,7 @@ function createIngestTool(params) {
|
|
|
599
337
|
return {
|
|
600
338
|
name: "knowhere_ingest_document",
|
|
601
339
|
label: "Knowhere Ingest",
|
|
602
|
-
description: "Parse a local file or remote URL with Knowhere and store the result in the current scope.
|
|
340
|
+
description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. When the user provides a URL to a document (PDF link, web page, etc.), pass it as the url parameter — Knowhere fetches it directly, no local download needed. Knowhere must be the only parser for supported files. If Knowhere returns an error, surface that exact error to the user and do not fall back to other parsing methods or fabricate a preview. By default blockUntilComplete is false, so this tool is fire-and-forget and returns a job ID while parsing continues in the background. Set blockUntilComplete to true only when the current turn explicitly needs the parsed result before continuing. Use lang to control the language of any user-facing background status update (`en` by default, `ch` for Chinese). Provide either filePath or url, not both.",
|
|
603
341
|
parameters: {
|
|
604
342
|
type: "object",
|
|
605
343
|
additionalProperties: false,
|
|
@@ -815,7 +553,7 @@ function createIngestTool(params) {
|
|
|
815
553
|
})]);
|
|
816
554
|
if (typeof createdJob === "symbol") {
|
|
817
555
|
params.api.logger.warn(`knowhere: knowhere_ingest_document ingest completed before job-created callback scope=${scope.label} label=${JSON.stringify(progressLabel)}`);
|
|
818
|
-
return textResult("Ingest completed synchronously.
|
|
556
|
+
return textResult("Ingest completed synchronously. The document is now stored and indexed.");
|
|
819
557
|
}
|
|
820
558
|
return textResult([
|
|
821
559
|
"Ingest job created. Parsing in background.",
|
|
@@ -987,7 +725,7 @@ function createImportCompletedJobTool(params) {
|
|
|
987
725
|
return {
|
|
988
726
|
name: "knowhere_import_completed_job",
|
|
989
727
|
label: "Knowhere Import Completed Job",
|
|
990
|
-
description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally
|
|
728
|
+
description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally. Use knowhere_list_jobs to find available completed jobs.",
|
|
991
729
|
parameters: {
|
|
992
730
|
type: "object",
|
|
993
731
|
additionalProperties: false,
|
|
@@ -1047,6 +785,24 @@ function createImportCompletedJobTool(params) {
|
|
|
1047
785
|
downloadedResult: importResult.downloadedResult
|
|
1048
786
|
}, { overwrite });
|
|
1049
787
|
params.api.logger.info(`knowhere: knowhere_import_completed_job stored imported document scope=${scope.label} jobId=${importResult.jobResult.job_id} docId=${document.id}`);
|
|
788
|
+
try {
|
|
789
|
+
const importKbId = params.kgService.resolveKbId(params.ctx);
|
|
790
|
+
if (importKbId && params.kgService.isEnabled()) {
|
|
791
|
+
params.api.logger.info(`knowhere: triggering KG build after import kbId=${importKbId}`);
|
|
792
|
+
buildKnowledgeGraphAsync({
|
|
793
|
+
kgService: params.kgService,
|
|
794
|
+
kbId: importKbId,
|
|
795
|
+
docId: document.id,
|
|
796
|
+
documentPayload: { downloadedResult: importResult.downloadedResult },
|
|
797
|
+
scope,
|
|
798
|
+
store: params.store,
|
|
799
|
+
ctx: params.ctx,
|
|
800
|
+
api: params.api
|
|
801
|
+
}).catch((e) => params.api.logger.error(`knowhere: KG build after import failed: ${formatErrorMessage(e)}`));
|
|
802
|
+
}
|
|
803
|
+
} catch (kgError) {
|
|
804
|
+
params.api.logger.warn(`knowhere: import KG trigger error: ${formatErrorMessage(kgError)}`);
|
|
805
|
+
}
|
|
1050
806
|
return textResult([
|
|
1051
807
|
"Import complete.",
|
|
1052
808
|
...buildStoredDocumentSummaryLines({
|
|
@@ -1057,551 +813,11 @@ function createImportCompletedJobTool(params) {
|
|
|
1057
813
|
}),
|
|
1058
814
|
`Imported from job: ${importResult.jobResult.job_id}`,
|
|
1059
815
|
`Source type: ${importResult.jobResult.source_type}`,
|
|
1060
|
-
"
|
|
1061
|
-
].join("\n"));
|
|
1062
|
-
}
|
|
1063
|
-
};
|
|
1064
|
-
}
|
|
1065
|
-
const GREP_VALID_TARGETS = new Set([
|
|
1066
|
-
"chunk.content",
|
|
1067
|
-
"chunk.summary",
|
|
1068
|
-
"chunk.keywords",
|
|
1069
|
-
"chunk.path",
|
|
1070
|
-
"chunk.type",
|
|
1071
|
-
"chunk.chunkId"
|
|
1072
|
-
]);
|
|
1073
|
-
const GREP_TEXT_TARGETS = [
|
|
1074
|
-
"chunk.content",
|
|
1075
|
-
"chunk.summary",
|
|
1076
|
-
"chunk.keywords",
|
|
1077
|
-
"chunk.path"
|
|
1078
|
-
];
|
|
1079
|
-
function parseGrepConditions(raw) {
|
|
1080
|
-
if (!Array.isArray(raw)) return [];
|
|
1081
|
-
return raw.filter(isRecord).map((entry) => {
|
|
1082
|
-
const target = readString(entry.target);
|
|
1083
|
-
if (target && !GREP_VALID_TARGETS.has(target)) throw new Error(`Invalid grep target: "${target}". Valid targets: ${[...GREP_VALID_TARGETS].join(", ")}`);
|
|
1084
|
-
const pattern = typeof entry.pattern === "string" ? entry.pattern : "";
|
|
1085
|
-
return {
|
|
1086
|
-
...target ? { target } : {},
|
|
1087
|
-
pattern,
|
|
1088
|
-
...typeof entry.regex === "boolean" ? { regex: entry.regex } : {},
|
|
1089
|
-
...typeof entry.caseSensitive === "boolean" ? { caseSensitive: entry.caseSensitive } : {}
|
|
1090
|
-
};
|
|
1091
|
-
});
|
|
1092
|
-
}
|
|
1093
|
-
function resolveGrepFieldValue(chunk, target) {
|
|
1094
|
-
switch (target) {
|
|
1095
|
-
case "chunk.content": return chunk.content || "";
|
|
1096
|
-
case "chunk.summary": return chunk.summary || "";
|
|
1097
|
-
case "chunk.keywords": return chunk.keywords || [];
|
|
1098
|
-
case "chunk.path": return chunk.path || "";
|
|
1099
|
-
case "chunk.type": return chunk.type;
|
|
1100
|
-
case "chunk.chunkId": return chunk.chunkId;
|
|
1101
|
-
default: return "";
|
|
1102
|
-
}
|
|
1103
|
-
}
|
|
1104
|
-
function testGrepMatch(text, pattern, useRegex, caseSensitive) {
|
|
1105
|
-
if (pattern === "") return true;
|
|
1106
|
-
if (useRegex) {
|
|
1107
|
-
const flags = caseSensitive ? "" : "i";
|
|
1108
|
-
return new RegExp(pattern, flags).test(text);
|
|
1109
|
-
}
|
|
1110
|
-
if (caseSensitive) return text.includes(pattern);
|
|
1111
|
-
return text.toLowerCase().includes(pattern.toLowerCase());
|
|
1112
|
-
}
|
|
1113
|
-
function testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive) {
|
|
1114
|
-
const useRegex = condition.regex ?? outerRegex;
|
|
1115
|
-
const caseSensitive = condition.caseSensitive ?? outerCaseSensitive;
|
|
1116
|
-
const matchedTargets = [];
|
|
1117
|
-
const targets = condition.target ? [condition.target] : GREP_TEXT_TARGETS;
|
|
1118
|
-
for (const target of targets) {
|
|
1119
|
-
const fieldValue = target === "chunk.type" || target === "chunk.chunkId" ? resolveGrepFieldValue(chunk, target) : normalizedFields.get(target) ?? resolveGrepFieldValue(chunk, target);
|
|
1120
|
-
if (Array.isArray(fieldValue)) {
|
|
1121
|
-
if (fieldValue.some((entry) => testGrepMatch(entry, condition.pattern, useRegex, caseSensitive))) matchedTargets.push(target);
|
|
1122
|
-
} else if (testGrepMatch(fieldValue, condition.pattern, useRegex, caseSensitive)) matchedTargets.push(target);
|
|
1123
|
-
}
|
|
1124
|
-
return matchedTargets;
|
|
1125
|
-
}
|
|
1126
|
-
function buildNormalizedFields(chunk) {
|
|
1127
|
-
const fields = /* @__PURE__ */ new Map();
|
|
1128
|
-
fields.set("chunk.content", normalizeForGrep(chunk.content || ""));
|
|
1129
|
-
fields.set("chunk.summary", normalizeForGrep(chunk.summary || ""));
|
|
1130
|
-
fields.set("chunk.keywords", (chunk.keywords || []).map((k) => normalizeForGrep(k)));
|
|
1131
|
-
fields.set("chunk.path", normalizeForGrep(chunk.path || ""));
|
|
1132
|
-
return fields;
|
|
1133
|
-
}
|
|
1134
|
-
function buildGrepHints(params) {
|
|
1135
|
-
const hints = [];
|
|
1136
|
-
const maxHints = 3;
|
|
1137
|
-
if (params.totalMatches === 0 && params.conditionCount > 0) hints.push("No matches. Try broadening: remove a condition, use a shorter pattern, or check for typos. Call knowhere_preview_document to see the document structure first.");
|
|
1138
|
-
if (hints.length < maxHints && params.totalMatches > params.returned) {
|
|
1139
|
-
let hint = `Showing ${params.returned} of ${params.totalMatches} matches. Add another condition (e.g., target chunk.path to a specific section) to narrow results.`;
|
|
1140
|
-
if (!params.hasPathCondition) hint += " Use knowhere_preview_document to find section paths.";
|
|
1141
|
-
hints.push(hint);
|
|
1142
|
-
}
|
|
1143
|
-
if (hints.length < maxHints && params.truncatedStrings) if (params.returned > 3) hints.push(`Fields truncated at ${params.maxStringChars} chars. Reduce maxResults to 1-3 and increase maxStringChars to 12000-20000 for full content.`);
|
|
1144
|
-
else hints.push(`Fields truncated at ${params.maxStringChars} chars. Increase maxStringChars (up to 20000) for full content.`);
|
|
1145
|
-
if (hints.length < maxHints && params.totalMatches >= 1 && params.totalMatches <= 5 && !params.includeContext) hints.push("Tip: set includeContext=true to discover sibling chunks in the same section.");
|
|
1146
|
-
if (hints.length < maxHints && params.totalChunks > 0 && params.totalMatches > params.totalChunks * .5 && params.conditionCount <= 1) hints.push("Pattern matches over half the document. Add a second condition to narrow.");
|
|
1147
|
-
return hints;
|
|
1148
|
-
}
|
|
1149
|
-
function createGrepTool(params) {
|
|
1150
|
-
return {
|
|
1151
|
-
name: "knowhere_grep",
|
|
1152
|
-
label: "Knowhere Grep",
|
|
1153
|
-
description: "Search a stored document's chunks with composable AND conditions. Returns matching chunks with content, summary, keywords, path, and chunkId. Supports substring and regex matching with text normalization (HTML stripping, LaTeX cleanup, unicode normalization). Omit conditions to list all chunks. Omit the target field in a condition to search across all text fields (content, summary, keywords, path) — this is the recommended default. When answering questions from results, cite the chunkId and path. Tip: set maxStringChars up to 20000 when you need full untruncated content from a small number of results (e.g., maxResults=1). The default 4000 may truncate long chunks. Search strategy: (1) Start with knowhere_preview_document to see document structure. (2) Search broadly with a single short pattern, then narrow by adding conditions. (3) If zero results, broaden or try synonyms. If too many, add a path condition. (4) Once you find the right chunks, re-query with maxResults=1-3 and maxStringChars=12000-20000 to read full content.",
|
|
1154
|
-
parameters: {
|
|
1155
|
-
type: "object",
|
|
1156
|
-
additionalProperties: false,
|
|
1157
|
-
properties: {
|
|
1158
|
-
docId: {
|
|
1159
|
-
type: "string",
|
|
1160
|
-
description: "Identifier of the stored document to search."
|
|
1161
|
-
},
|
|
1162
|
-
conditions: {
|
|
1163
|
-
type: "array",
|
|
1164
|
-
items: {
|
|
1165
|
-
type: "object",
|
|
1166
|
-
additionalProperties: false,
|
|
1167
|
-
properties: {
|
|
1168
|
-
target: {
|
|
1169
|
-
type: "string",
|
|
1170
|
-
enum: [...GREP_VALID_TARGETS],
|
|
1171
|
-
description: "Chunk field to search. Omit to search all text fields (content, summary, keywords, path) — this is the recommended default."
|
|
1172
|
-
},
|
|
1173
|
-
pattern: {
|
|
1174
|
-
type: "string",
|
|
1175
|
-
description: "Search pattern. Empty string matches all."
|
|
1176
|
-
},
|
|
1177
|
-
regex: {
|
|
1178
|
-
type: "boolean",
|
|
1179
|
-
description: "Use regex matching for this condition. Overrides outer regex default."
|
|
1180
|
-
},
|
|
1181
|
-
caseSensitive: {
|
|
1182
|
-
type: "boolean",
|
|
1183
|
-
description: "Case-sensitive matching for this condition. Overrides outer default."
|
|
1184
|
-
}
|
|
1185
|
-
},
|
|
1186
|
-
required: ["pattern"]
|
|
1187
|
-
},
|
|
1188
|
-
description: "ANDed search conditions. Each condition must match for a chunk to be returned. Default [] matches all chunks (useful for browsing). Omit target in a condition to search all text fields. Use multiple conditions to narrow results (e.g., path contains 'chapter 3' AND content contains 'algorithm')."
|
|
1189
|
-
},
|
|
1190
|
-
regex: {
|
|
1191
|
-
type: "boolean",
|
|
1192
|
-
description: "Default regex mode for all conditions. Defaults to false."
|
|
1193
|
-
},
|
|
1194
|
-
caseSensitive: {
|
|
1195
|
-
type: "boolean",
|
|
1196
|
-
description: "Default case-sensitivity for all conditions. Defaults to false."
|
|
1197
|
-
},
|
|
1198
|
-
includeContext: {
|
|
1199
|
-
type: "boolean",
|
|
1200
|
-
description: "Include sibling chunk IDs sharing the same document path for each matched chunk. Useful for navigating to adjacent chunks in the same section — re-query with a condition on chunk.chunkId to fetch a specific sibling."
|
|
1201
|
-
},
|
|
1202
|
-
maxResults: {
|
|
1203
|
-
type: "integer",
|
|
1204
|
-
minimum: 1,
|
|
1205
|
-
maximum: 50,
|
|
1206
|
-
description: "Maximum number of matching chunks to return. Defaults to 10. Use a low value (1–3) with high maxStringChars to read specific chunks in full. Use a higher value (10–50) with lower maxStringChars to scan broadly."
|
|
1207
|
-
},
|
|
1208
|
-
maxStringChars: {
|
|
1209
|
-
type: "integer",
|
|
1210
|
-
minimum: 100,
|
|
1211
|
-
maximum: 2e4,
|
|
1212
|
-
description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 when retrieving full content from a small number of results (e.g., reading a single chunk in full). Reduce below 4000 when scanning many results to save tokens."
|
|
1213
|
-
}
|
|
1214
|
-
},
|
|
1215
|
-
required: ["docId"]
|
|
1216
|
-
},
|
|
1217
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1218
|
-
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1219
|
-
const docId = readString(paramsRecord.docId);
|
|
1220
|
-
if (!docId) throw new Error("docId is required.");
|
|
1221
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1222
|
-
const payload = await params.store.loadDocumentPayload(scope, docId);
|
|
1223
|
-
if (!payload) {
|
|
1224
|
-
params.api.logger.warn(`knowhere: knowhere_grep document not found scope=${scope.label} docId=${docId}`);
|
|
1225
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1226
|
-
}
|
|
1227
|
-
const conditions = parseGrepConditions(paramsRecord.conditions);
|
|
1228
|
-
const outerRegex = readBoolean(paramsRecord.regex, false);
|
|
1229
|
-
const outerCaseSensitive = readBoolean(paramsRecord.caseSensitive, false);
|
|
1230
|
-
const includeContext = readBoolean(paramsRecord.includeContext, false);
|
|
1231
|
-
const maxResults = Math.min(50, Math.max(1, Math.trunc(readNumber(paramsRecord.maxResults, 10))));
|
|
1232
|
-
const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
|
|
1233
|
-
params.api.logger.info(`knowhere: knowhere_grep searching document scope=${scope.label} docId=${docId} conditions=${conditions.length} regex=${outerRegex} caseSensitive=${outerCaseSensitive} includeContext=${includeContext} maxResults=${maxResults} maxStringChars=${maxStringChars}`);
|
|
1234
|
-
const pathChunkIndex = includeContext ? new Map(payload.browseIndex.paths.map((p) => [p.path, p.chunkIds])) : void 0;
|
|
1235
|
-
const sortedChunks = sortChunksByBrowseOrder(payload.chunks, payload.browseIndex);
|
|
1236
|
-
const results = [];
|
|
1237
|
-
for (const chunk of sortedChunks) {
|
|
1238
|
-
if (results.length >= maxResults) break;
|
|
1239
|
-
const normalizedFields = buildNormalizedFields(chunk);
|
|
1240
|
-
const allMatchedTargets = /* @__PURE__ */ new Set();
|
|
1241
|
-
let allConditionsPassed = true;
|
|
1242
|
-
for (const condition of conditions) {
|
|
1243
|
-
const matched = testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive);
|
|
1244
|
-
if (matched.length === 0) {
|
|
1245
|
-
allConditionsPassed = false;
|
|
1246
|
-
break;
|
|
1247
|
-
}
|
|
1248
|
-
for (const target of matched) allMatchedTargets.add(target);
|
|
1249
|
-
}
|
|
1250
|
-
if (!allConditionsPassed) continue;
|
|
1251
|
-
const entry = {
|
|
1252
|
-
chunk,
|
|
1253
|
-
matchedOn: [...allMatchedTargets]
|
|
1254
|
-
};
|
|
1255
|
-
if (includeContext && pathChunkIndex && chunk.path) {
|
|
1256
|
-
const siblings = pathChunkIndex.get(chunk.path);
|
|
1257
|
-
if (siblings) entry.siblingChunkIds = siblings.filter((id) => id !== chunk.chunkId);
|
|
1258
|
-
}
|
|
1259
|
-
results.push(entry);
|
|
1260
|
-
}
|
|
1261
|
-
let totalMatches = results.length;
|
|
1262
|
-
if (results.length >= maxResults) {
|
|
1263
|
-
const remainingChunks = sortedChunks.slice(sortedChunks.indexOf(results[results.length - 1].chunk) + 1);
|
|
1264
|
-
for (const chunk of remainingChunks) {
|
|
1265
|
-
const normalizedFields = buildNormalizedFields(chunk);
|
|
1266
|
-
let passed = true;
|
|
1267
|
-
for (const condition of conditions) if (testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive).length === 0) {
|
|
1268
|
-
passed = false;
|
|
1269
|
-
break;
|
|
1270
|
-
}
|
|
1271
|
-
if (passed) totalMatches++;
|
|
1272
|
-
}
|
|
1273
|
-
}
|
|
1274
|
-
const truncated = truncateJsonValue(results.map((entry) => {
|
|
1275
|
-
const projected = {
|
|
1276
|
-
chunkId: entry.chunk.chunkId,
|
|
1277
|
-
type: entry.chunk.type,
|
|
1278
|
-
path: entry.chunk.path,
|
|
1279
|
-
content: entry.chunk.content,
|
|
1280
|
-
summary: entry.chunk.summary,
|
|
1281
|
-
keywords: entry.chunk.keywords,
|
|
1282
|
-
tokens: entry.chunk.tokens,
|
|
1283
|
-
assetFilePath: entry.chunk.assetFilePath,
|
|
1284
|
-
matchedOn: entry.matchedOn
|
|
1285
|
-
};
|
|
1286
|
-
if (entry.siblingChunkIds) projected.siblingChunkIds = entry.siblingChunkIds;
|
|
1287
|
-
return projected;
|
|
1288
|
-
}), maxStringChars);
|
|
1289
|
-
params.api.logger.info(`knowhere: knowhere_grep completed search scope=${scope.label} docId=${docId} returned=${results.length} totalMatches=${totalMatches} truncated=${truncated.truncated}`);
|
|
1290
|
-
const hasPathCondition = conditions.some((c) => c.target === "chunk.path");
|
|
1291
|
-
const hints = buildGrepHints({
|
|
1292
|
-
totalMatches,
|
|
1293
|
-
returned: results.length,
|
|
1294
|
-
maxResults,
|
|
1295
|
-
maxStringChars,
|
|
1296
|
-
truncatedStrings: truncated.truncated,
|
|
1297
|
-
conditionCount: conditions.length,
|
|
1298
|
-
includeContext,
|
|
1299
|
-
totalChunks: sortedChunks.length,
|
|
1300
|
-
hasPathCondition
|
|
1301
|
-
});
|
|
1302
|
-
const jsonResult = formatJsonToolResult({
|
|
1303
|
-
totalMatches,
|
|
1304
|
-
returned: results.length,
|
|
1305
|
-
results: truncated.value,
|
|
1306
|
-
maxStringChars,
|
|
1307
|
-
truncatedStrings: truncated.truncated
|
|
1308
|
-
});
|
|
1309
|
-
if (hints.length === 0) return jsonResult;
|
|
1310
|
-
return textResult(`${jsonResult.content[0].text}\n---\n${hints.join("\n")}`);
|
|
1311
|
-
}
|
|
1312
|
-
};
|
|
1313
|
-
}
|
|
1314
|
-
function createReadResultFileTool(params) {
|
|
1315
|
-
return {
|
|
1316
|
-
name: "knowhere_read_result_file",
|
|
1317
|
-
label: "Knowhere Read Result File",
|
|
1318
|
-
description: "Read a raw result file from the stored document's extracted ZIP package. Common files: manifest.json (parsing metadata), hierarchy.json (document structure), kb.csv (knowledge base export), table HTML files (e.g., tables/table-1.html), or image assets (e.g., images/img-0.png). Image files are staged into a local attachment path and sent directly to the current channel when routing can be resolved. If direct delivery is unavailable, the tool returns a message-tool handoff and, when the run has a workspace, a workspace-relative MEDIA fallback for a normal assistant reply. When the result mode is image_attachment, do not call generic file-read tools on data.stagedPath; use data.sendWithMessageTool or data.replyFallback as returned. When the result mode is image_sent, the plugin already delivered the image. Use mode='json' for JSON files, mode='csv' for CSV files, or mode='text' (default) for everything else. Increase maxStringChars (up to 20000) for large files.",
|
|
1319
|
-
parameters: {
|
|
1320
|
-
type: "object",
|
|
1321
|
-
additionalProperties: false,
|
|
1322
|
-
properties: {
|
|
1323
|
-
docId: {
|
|
1324
|
-
type: "string",
|
|
1325
|
-
description: "Identifier of the stored document to read from."
|
|
1326
|
-
},
|
|
1327
|
-
filePath: {
|
|
1328
|
-
type: "string",
|
|
1329
|
-
description: "Relative path under the stored result directory, for example manifest.json or tables/table-1.html."
|
|
1330
|
-
},
|
|
1331
|
-
mode: {
|
|
1332
|
-
type: "string",
|
|
1333
|
-
enum: [
|
|
1334
|
-
"text",
|
|
1335
|
-
"json",
|
|
1336
|
-
"csv"
|
|
1337
|
-
],
|
|
1338
|
-
description: "text returns trimmed text, json parses JSON, and csv returns a raw CSV preview. Defaults to text."
|
|
1339
|
-
},
|
|
1340
|
-
maxStringChars: {
|
|
1341
|
-
type: "integer",
|
|
1342
|
-
minimum: 100,
|
|
1343
|
-
maximum: 2e4,
|
|
1344
|
-
description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 for large files like hierarchy.json or kb.csv."
|
|
1345
|
-
}
|
|
1346
|
-
},
|
|
1347
|
-
required: ["docId", "filePath"]
|
|
1348
|
-
},
|
|
1349
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1350
|
-
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1351
|
-
const docId = readString(paramsRecord.docId);
|
|
1352
|
-
const filePath = normalizeResultFilePath(paramsRecord.filePath);
|
|
1353
|
-
if (!docId) throw new Error("docId is required.");
|
|
1354
|
-
if (!filePath) throw new Error("filePath is required.");
|
|
1355
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1356
|
-
const payload = await params.store.loadDocumentPayload(scope, docId);
|
|
1357
|
-
if (!payload) {
|
|
1358
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file document not found scope=${scope.label} docId=${docId}`);
|
|
1359
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1360
|
-
}
|
|
1361
|
-
const resultFile = findResultFile(payload.browseIndex, filePath);
|
|
1362
|
-
if (!resultFile) {
|
|
1363
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file result file not found scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1364
|
-
return textResult([
|
|
1365
|
-
"Result file not found.",
|
|
1366
|
-
`File path: ${filePath}`,
|
|
1367
|
-
`Document ID: ${docId}`,
|
|
1368
|
-
`Scope: ${scope.label}`
|
|
1369
|
-
].join("\n"));
|
|
1370
|
-
}
|
|
1371
|
-
if (resultFile.kind === "image") {
|
|
1372
|
-
const absolutePath = params.store.getResultFileAbsolutePath(scope, docId, filePath);
|
|
1373
|
-
const channelRoute = await params.store.resolveChannelRoute({ sessionKey: params.ctx.sessionKey });
|
|
1374
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file staging image asset scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1375
|
-
return await buildImageToolResult({
|
|
1376
|
-
api: params.api,
|
|
1377
|
-
absolutePath,
|
|
1378
|
-
channelRoute,
|
|
1379
|
-
context: params.ctx,
|
|
1380
|
-
docId: payload.document.id,
|
|
1381
|
-
documentTitle: payload.document.title,
|
|
1382
|
-
filePath,
|
|
1383
|
-
file: resultFile,
|
|
1384
|
-
sessionKey: params.ctx.sessionKey,
|
|
1385
|
-
scopeLabel: scope.label,
|
|
1386
|
-
workspaceDir: params.ctx.workspaceDir
|
|
1387
|
-
});
|
|
1388
|
-
}
|
|
1389
|
-
if (!isTextReadableResultFile(resultFile)) {
|
|
1390
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file unreadable result kind scope=${scope.label} docId=${docId} filePath=${filePath} kind=${resultFile.kind}`);
|
|
1391
|
-
return textResult([
|
|
1392
|
-
"Result file is not readable as text through this tool.",
|
|
1393
|
-
`File path: ${filePath}`,
|
|
1394
|
-
`Kind: ${resultFile.kind}`,
|
|
1395
|
-
`Document ID: ${docId}`,
|
|
1396
|
-
`Scope: ${scope.label}`
|
|
1397
|
-
].join("\n"));
|
|
1398
|
-
}
|
|
1399
|
-
const storedFile = await params.store.readResultFile(scope, docId, filePath);
|
|
1400
|
-
if (!storedFile) {
|
|
1401
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file payload disappeared scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1402
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1403
|
-
}
|
|
1404
|
-
if (storedFile.text === null) {
|
|
1405
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file text content missing scope=${scope.label} docId=${docId} filePath=${filePath}`);
|
|
1406
|
-
return textResult([
|
|
1407
|
-
"Result file not found.",
|
|
1408
|
-
`File path: ${filePath}`,
|
|
1409
|
-
`Document ID: ${docId}`,
|
|
1410
|
-
`Scope: ${scope.label}`
|
|
1411
|
-
].join("\n"));
|
|
1412
|
-
}
|
|
1413
|
-
const mode = readResultFileReadMode(paramsRecord.mode);
|
|
1414
|
-
const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
|
|
1415
|
-
const normalizedText = stripUtf8Bom(storedFile.text);
|
|
1416
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file reading file scope=${scope.label} docId=${docId} filePath=${filePath} kind=${resultFile.kind} mode=${mode} maxStringChars=${maxStringChars}`);
|
|
1417
|
-
if (mode === "json") {
|
|
1418
|
-
let parsedJson;
|
|
1419
|
-
try {
|
|
1420
|
-
parsedJson = JSON.parse(normalizedText);
|
|
1421
|
-
} catch (error) {
|
|
1422
|
-
params.api.logger.warn(`knowhere: knowhere_read_result_file invalid json scope=${scope.label} docId=${docId} filePath=${filePath} error=${formatErrorMessage(error)}`);
|
|
1423
|
-
throw new Error(`Result file ${filePath} is not valid JSON. ${formatErrorMessage(error)}`, { cause: error });
|
|
1424
|
-
}
|
|
1425
|
-
const truncatedJson = truncateJsonValue(parsedJson, maxStringChars);
|
|
1426
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file parsed json scope=${scope.label} docId=${docId} filePath=${filePath} truncated=${truncatedJson.truncated}`);
|
|
1427
|
-
return formatJsonToolResult({
|
|
1428
|
-
scope: scope.label,
|
|
1429
|
-
docId: payload.document.id,
|
|
1430
|
-
documentTitle: payload.document.title,
|
|
1431
|
-
file: resultFile,
|
|
1432
|
-
mode,
|
|
1433
|
-
maxStringChars,
|
|
1434
|
-
truncatedStrings: truncatedJson.truncated,
|
|
1435
|
-
data: truncatedJson.value
|
|
1436
|
-
});
|
|
1437
|
-
}
|
|
1438
|
-
const data = mode === "csv" ? buildCsvFilePayload(normalizedText, maxStringChars) : buildTextFilePayload(normalizedText, maxStringChars);
|
|
1439
|
-
params.api.logger.info(`knowhere: knowhere_read_result_file prepared text payload scope=${scope.label} docId=${docId} filePath=${filePath} lineCount=${data.lineCount}`);
|
|
1440
|
-
return formatJsonToolResult({
|
|
1441
|
-
scope: scope.label,
|
|
1442
|
-
docId: payload.document.id,
|
|
1443
|
-
documentTitle: payload.document.title,
|
|
1444
|
-
file: resultFile,
|
|
1445
|
-
mode,
|
|
1446
|
-
maxStringChars,
|
|
1447
|
-
data
|
|
1448
|
-
});
|
|
1449
|
-
}
|
|
1450
|
-
};
|
|
1451
|
-
}
|
|
1452
|
-
function createPreviewDocumentTool(params) {
|
|
1453
|
-
return {
|
|
1454
|
-
name: "knowhere_preview_document",
|
|
1455
|
-
label: "Knowhere Preview Document",
|
|
1456
|
-
description: "Get a structural overview of a stored Knowhere document. Returns the document metadata and a hierarchical table of contents showing sections, subsections, and chunk counts per path (text, image, table). Use this as the first step after identifying a docId to understand the document's structure before searching with knowhere_grep.",
|
|
1457
|
-
parameters: {
|
|
1458
|
-
type: "object",
|
|
1459
|
-
additionalProperties: false,
|
|
1460
|
-
properties: { docId: {
|
|
1461
|
-
type: "string",
|
|
1462
|
-
description: "Identifier of the stored document to preview."
|
|
1463
|
-
} },
|
|
1464
|
-
required: ["docId"]
|
|
1465
|
-
},
|
|
1466
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1467
|
-
const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
|
|
1468
|
-
if (!docId) throw new Error("docId is required.");
|
|
1469
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1470
|
-
const payload = await params.store.loadDocumentPayload(scope, docId);
|
|
1471
|
-
if (!payload) {
|
|
1472
|
-
params.api.logger.warn(`knowhere: knowhere_preview_document document not found scope=${scope.label} docId=${docId}`);
|
|
1473
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1474
|
-
}
|
|
1475
|
-
const { document } = payload;
|
|
1476
|
-
const pathSummaryMap = /* @__PURE__ */ new Map();
|
|
1477
|
-
for (const chunk of payload.chunks) {
|
|
1478
|
-
if (!chunk.path || pathSummaryMap.has(chunk.path)) continue;
|
|
1479
|
-
const raw = (chunk.summary || chunk.content || "").trim();
|
|
1480
|
-
if (raw) {
|
|
1481
|
-
const oneLine = raw.replace(/\n+/g, " ").slice(0, PREVIEW_SUMMARY_MAX_CHARS);
|
|
1482
|
-
pathSummaryMap.set(chunk.path, oneLine + (raw.length > PREVIEW_SUMMARY_MAX_CHARS ? "..." : ""));
|
|
1483
|
-
}
|
|
1484
|
-
}
|
|
1485
|
-
const lines = [
|
|
1486
|
-
`"${document.title}" [${document.id}]`,
|
|
1487
|
-
`Scope: ${scope.label}`,
|
|
1488
|
-
`Source: ${document.sourceLabel}`,
|
|
1489
|
-
`Chunks: ${document.chunkCount}`
|
|
1490
|
-
];
|
|
1491
|
-
const pathByName = /* @__PURE__ */ new Map();
|
|
1492
|
-
for (const p of payload.browseIndex.paths) pathByName.set(p.path, p);
|
|
1493
|
-
const roots = payload.browseIndex.paths.filter((p) => p.depth === 1);
|
|
1494
|
-
params.api.logger.info(`knowhere: knowhere_preview_document building preview scope=${scope.label} docId=${docId} paths=${payload.browseIndex.paths.length} roots=${roots.length} chunks=${payload.chunks.length}`);
|
|
1495
|
-
if (roots.length > 0) {
|
|
1496
|
-
lines.push("");
|
|
1497
|
-
lines.push("## Table of Contents");
|
|
1498
|
-
lines.push("");
|
|
1499
|
-
const renderTree = (pathRecord, indent) => {
|
|
1500
|
-
const prefix = " ".repeat(indent);
|
|
1501
|
-
const segments = pathRecord.path.split(/\/|-->/);
|
|
1502
|
-
const label = segments[segments.length - 1] || pathRecord.path;
|
|
1503
|
-
const counts = [];
|
|
1504
|
-
if (pathRecord.textChunkCount > 0) counts.push(`${pathRecord.textChunkCount} text`);
|
|
1505
|
-
if (pathRecord.imageChunkCount > 0) counts.push(`${pathRecord.imageChunkCount} img`);
|
|
1506
|
-
if (pathRecord.tableChunkCount > 0) counts.push(`${pathRecord.tableChunkCount} tbl`);
|
|
1507
|
-
const countStr = counts.length > 0 ? ` (${counts.join(", ")})` : "";
|
|
1508
|
-
const summary = pathSummaryMap.get(pathRecord.path);
|
|
1509
|
-
const summaryStr = summary ? ` — ${summary}` : "";
|
|
1510
|
-
lines.push(`${prefix}- ${label}${countStr}${summaryStr}`);
|
|
1511
|
-
for (const childPath of pathRecord.childPaths) {
|
|
1512
|
-
const child = pathByName.get(childPath);
|
|
1513
|
-
if (child) renderTree(child, indent + 1);
|
|
1514
|
-
}
|
|
1515
|
-
};
|
|
1516
|
-
for (const root of roots) renderTree(root, 0);
|
|
1517
|
-
} else {
|
|
1518
|
-
lines.push("");
|
|
1519
|
-
lines.push("No structural paths available for this document.");
|
|
1520
|
-
params.api.logger.warn(`knowhere: knowhere_preview_document no structural paths scope=${scope.label} docId=${docId}`);
|
|
1521
|
-
}
|
|
1522
|
-
return textResult(lines.join("\n"));
|
|
1523
|
-
}
|
|
1524
|
-
};
|
|
1525
|
-
}
|
|
1526
|
-
function createListTool(params) {
|
|
1527
|
-
return {
|
|
1528
|
-
name: "knowhere_list_documents",
|
|
1529
|
-
label: "Knowhere List",
|
|
1530
|
-
description: "List all Knowhere documents stored in the current scope. Returns each document's ID, title, source, chunk count, tags, and last-updated timestamp. Use this first to discover available documents, check whether a file or URL is already stored, and find the right docId before calling other tools.",
|
|
1531
|
-
parameters: {
|
|
1532
|
-
type: "object",
|
|
1533
|
-
additionalProperties: false,
|
|
1534
|
-
properties: {}
|
|
1535
|
-
},
|
|
1536
|
-
execute: async () => {
|
|
1537
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1538
|
-
const documents = await params.store.listDocuments(scope);
|
|
1539
|
-
params.api.logger.info(`knowhere: knowhere_list_documents listed documents scope=${scope.label} count=${documents.length}`);
|
|
1540
|
-
return textResult(formatDocumentList(documents, scope.label));
|
|
1541
|
-
}
|
|
1542
|
-
};
|
|
1543
|
-
}
|
|
1544
|
-
function createRemoveTool(params) {
|
|
1545
|
-
return {
|
|
1546
|
-
name: "knowhere_remove_document",
|
|
1547
|
-
label: "Knowhere Remove",
|
|
1548
|
-
description: "Remove a stored Knowhere document and all its extracted data from the current scope. This is irreversible — the document must be re-ingested or re-imported to restore it.",
|
|
1549
|
-
parameters: {
|
|
1550
|
-
type: "object",
|
|
1551
|
-
additionalProperties: false,
|
|
1552
|
-
properties: { docId: {
|
|
1553
|
-
type: "string",
|
|
1554
|
-
description: "Identifier of the stored document to remove."
|
|
1555
|
-
} },
|
|
1556
|
-
required: ["docId"]
|
|
1557
|
-
},
|
|
1558
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1559
|
-
const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
|
|
1560
|
-
if (!docId) throw new Error("docId is required.");
|
|
1561
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1562
|
-
params.api.logger.info(`knowhere: knowhere_remove_document removing document scope=${scope.label} docId=${docId}`);
|
|
1563
|
-
const removed = await params.store.removeDocument(scope, docId);
|
|
1564
|
-
if (!removed) {
|
|
1565
|
-
params.api.logger.warn(`knowhere: knowhere_remove_document document not found scope=${scope.label} docId=${docId}`);
|
|
1566
|
-
return textResult(formatStoredDocumentNotFound(docId, scope.label));
|
|
1567
|
-
}
|
|
1568
|
-
params.api.logger.info(`knowhere: knowhere_remove_document removed document scope=${scope.label} docId=${removed.id}`);
|
|
1569
|
-
return textResult([
|
|
1570
|
-
"Removed stored document.",
|
|
1571
|
-
`Document ID: ${removed.id}`,
|
|
1572
|
-
`Title: ${removed.title}`,
|
|
1573
|
-
`Scope: ${scope.label}`
|
|
816
|
+
"Document imported successfully. Use knowhere_kg_query to search its content."
|
|
1574
817
|
].join("\n"));
|
|
1575
818
|
}
|
|
1576
819
|
};
|
|
1577
820
|
}
|
|
1578
|
-
function createClearScopeTool(params) {
|
|
1579
|
-
return {
|
|
1580
|
-
name: "knowhere_clear_scope",
|
|
1581
|
-
label: "Knowhere Clear Scope",
|
|
1582
|
-
description: "Remove all stored Knowhere documents from the current scope. This is irreversible and affects every document in the scope. Set confirm=true to execute.",
|
|
1583
|
-
parameters: {
|
|
1584
|
-
type: "object",
|
|
1585
|
-
additionalProperties: false,
|
|
1586
|
-
properties: { confirm: {
|
|
1587
|
-
type: "boolean",
|
|
1588
|
-
description: "Must be true to clear the current scope."
|
|
1589
|
-
} }
|
|
1590
|
-
},
|
|
1591
|
-
execute: async (_toolCallId, rawParams) => {
|
|
1592
|
-
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1593
|
-
const scope = params.store.resolveScope(params.ctx);
|
|
1594
|
-
if (!readBoolean(paramsRecord.confirm, false)) {
|
|
1595
|
-
params.api.logger.warn(`knowhere: knowhere_clear_scope skipped without confirm scope=${scope.label}`);
|
|
1596
|
-
return textResult(`Set confirm=true to clear scope ${scope.label}.`);
|
|
1597
|
-
}
|
|
1598
|
-
params.api.logger.info(`knowhere: knowhere_clear_scope clearing scope scope=${scope.label}`);
|
|
1599
|
-
const removedDocuments = await params.store.clearScope(scope);
|
|
1600
|
-
params.api.logger.info(`knowhere: knowhere_clear_scope cleared scope scope=${scope.label} removed=${removedDocuments.length}`);
|
|
1601
|
-
return textResult(formatScopeClearResult(removedDocuments, scope.label));
|
|
1602
|
-
}
|
|
1603
|
-
};
|
|
1604
|
-
}
|
|
1605
821
|
function createSetApiKeyTool(params) {
|
|
1606
822
|
return {
|
|
1607
823
|
name: "knowhere_set_api_key",
|
|
@@ -1717,6 +933,380 @@ function createKgQueryTool(params) {
|
|
|
1717
933
|
}
|
|
1718
934
|
};
|
|
1719
935
|
}
|
|
936
|
+
const T2_KNOWHERE_HOME = path.join(os.homedir(), ".knowhere");
|
|
937
|
+
async function t2FindDocDir(kbDir, docName) {
|
|
938
|
+
const exactPath = path.join(kbDir, docName);
|
|
939
|
+
try {
|
|
940
|
+
await fs.access(exactPath);
|
|
941
|
+
return exactPath;
|
|
942
|
+
} catch {}
|
|
943
|
+
let entries;
|
|
944
|
+
try {
|
|
945
|
+
entries = await fs.readdir(kbDir, { withFileTypes: true });
|
|
946
|
+
} catch {
|
|
947
|
+
return null;
|
|
948
|
+
}
|
|
949
|
+
for (const e of entries) if (e.isDirectory() && String(e.name).includes(docName)) return path.join(kbDir, String(e.name));
|
|
950
|
+
return null;
|
|
951
|
+
}
|
|
952
|
+
async function t2LoadChunks(docDir) {
|
|
953
|
+
for (const fname of ["chunks_slim.json", "chunks.json"]) try {
|
|
954
|
+
const raw = await fs.readFile(path.join(docDir, fname), "utf-8");
|
|
955
|
+
const data = JSON.parse(raw);
|
|
956
|
+
let chunks;
|
|
957
|
+
if (Array.isArray(data)) chunks = data;
|
|
958
|
+
else if (isRecord(data) && Array.isArray(data.chunks)) chunks = data.chunks;
|
|
959
|
+
else continue;
|
|
960
|
+
if (fname === "chunks.json") return chunks.map((c) => ({
|
|
961
|
+
type: c.type || "text",
|
|
962
|
+
path: c.path || "",
|
|
963
|
+
content: c.content || "",
|
|
964
|
+
summary: c.metadata?.summary || c.summary || ""
|
|
965
|
+
}));
|
|
966
|
+
return chunks;
|
|
967
|
+
} catch {
|
|
968
|
+
continue;
|
|
969
|
+
}
|
|
970
|
+
return [];
|
|
971
|
+
}
|
|
972
|
+
async function t2LoadRawChunks(docDir) {
|
|
973
|
+
try {
|
|
974
|
+
const raw = await fs.readFile(path.join(docDir, "chunks.json"), "utf-8");
|
|
975
|
+
const data = JSON.parse(raw);
|
|
976
|
+
if (Array.isArray(data)) return data;
|
|
977
|
+
if (isRecord(data) && Array.isArray(data.chunks)) return data.chunks;
|
|
978
|
+
return [];
|
|
979
|
+
} catch {
|
|
980
|
+
return [];
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
function t2ComputeTfIdfKeywords(rawChunks, topK = 10) {
|
|
984
|
+
const df = {};
|
|
985
|
+
const tf = {};
|
|
986
|
+
const totalDocs = rawChunks.length || 1;
|
|
987
|
+
for (const c of rawChunks) {
|
|
988
|
+
const tokens = Array.isArray(c.metadata?.tokens) ? c.metadata.tokens : [];
|
|
989
|
+
const keywords = Array.isArray(c.metadata?.keywords) ? c.metadata.keywords : [];
|
|
990
|
+
const allTerms = [...tokens, ...keywords];
|
|
991
|
+
const seen = /* @__PURE__ */ new Set();
|
|
992
|
+
for (const t of allTerms) {
|
|
993
|
+
if (!t || t.length <= 1) continue;
|
|
994
|
+
if (/^\d+[.,%]*$/.test(t)) continue;
|
|
995
|
+
const lower = t.toLowerCase();
|
|
996
|
+
tf[lower] = (tf[lower] || 0) + 1;
|
|
997
|
+
if (!seen.has(lower)) {
|
|
998
|
+
df[lower] = (df[lower] || 0) + 1;
|
|
999
|
+
seen.add(lower);
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
const scored = Object.entries(tf).map(([term, freq]) => {
|
|
1004
|
+
return {
|
|
1005
|
+
term,
|
|
1006
|
+
score: freq * (Math.log(totalDocs / (df[term] || 1)) + 1)
|
|
1007
|
+
};
|
|
1008
|
+
});
|
|
1009
|
+
scored.sort((a, b) => b.score - a.score);
|
|
1010
|
+
return scored.slice(0, topK).map((s) => s.term);
|
|
1011
|
+
}
|
|
1012
|
+
function t2KeywordsNeedRepair(keywords) {
|
|
1013
|
+
if (!Array.isArray(keywords) || keywords.length === 0) return true;
|
|
1014
|
+
let bad = 0;
|
|
1015
|
+
for (const kw of keywords) if (!kw || typeof kw === "string" && (kw.length <= 1 || /^\d+[.,%]*$/.test(kw) || /^[a-z]{1,2}$/i.test(kw))) bad++;
|
|
1016
|
+
return bad >= keywords.length * .5;
|
|
1017
|
+
}
|
|
1018
|
+
function t2JsonResult(data) {
|
|
1019
|
+
return {
|
|
1020
|
+
content: [{
|
|
1021
|
+
type: "text",
|
|
1022
|
+
text: JSON.stringify(data, null, 2)
|
|
1023
|
+
}],
|
|
1024
|
+
details: {}
|
|
1025
|
+
};
|
|
1026
|
+
}
|
|
1027
|
+
async function t2ListDocDirs(kbRoot) {
|
|
1028
|
+
let entries;
|
|
1029
|
+
try {
|
|
1030
|
+
entries = await fs.readdir(kbRoot, { withFileTypes: true });
|
|
1031
|
+
} catch {
|
|
1032
|
+
return [];
|
|
1033
|
+
}
|
|
1034
|
+
const docs = [];
|
|
1035
|
+
for (const e of entries) {
|
|
1036
|
+
if (!e.isDirectory()) continue;
|
|
1037
|
+
try {
|
|
1038
|
+
await fs.access(path.join(kbRoot, String(e.name), "chunks.json"));
|
|
1039
|
+
docs.push(String(e.name));
|
|
1040
|
+
} catch {
|
|
1041
|
+
continue;
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
return docs;
|
|
1045
|
+
}
|
|
1046
|
+
function createGetMapTool(_params) {
|
|
1047
|
+
return {
|
|
1048
|
+
name: "knowhere_get_map",
|
|
1049
|
+
label: "Knowhere Get Map",
|
|
1050
|
+
description: "获取知识库全局概览。查询知识时必须先调此工具,了解有哪些文档、关键词、重要性和跨文件关联。然后用 knowhere_get_structure 查看具体文档的章节目录。",
|
|
1051
|
+
parameters: {
|
|
1052
|
+
type: "object",
|
|
1053
|
+
additionalProperties: false,
|
|
1054
|
+
properties: { kbId: {
|
|
1055
|
+
type: "string",
|
|
1056
|
+
description: "Optional: specific KB ID. Leave empty to scan all."
|
|
1057
|
+
} }
|
|
1058
|
+
},
|
|
1059
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1060
|
+
const kbId = readString((isRecord(rawParams) ? rawParams : {}).kbId) || "";
|
|
1061
|
+
try {
|
|
1062
|
+
await fs.access(T2_KNOWHERE_HOME);
|
|
1063
|
+
} catch {
|
|
1064
|
+
return textResult(`未找到知识库目录 ${T2_KNOWHERE_HOME}`);
|
|
1065
|
+
}
|
|
1066
|
+
const entries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
|
|
1067
|
+
const kbs = [];
|
|
1068
|
+
for (const e of entries) {
|
|
1069
|
+
if (!e.isDirectory()) continue;
|
|
1070
|
+
if (kbId && e.name !== kbId) continue;
|
|
1071
|
+
const kbRoot = path.join(T2_KNOWHERE_HOME, e.name);
|
|
1072
|
+
const kgPath = path.join(kbRoot, "knowledge_graph.json");
|
|
1073
|
+
try {
|
|
1074
|
+
const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
|
|
1075
|
+
let kgDirty = false;
|
|
1076
|
+
const files = g.files || {};
|
|
1077
|
+
for (const [docName, info] of Object.entries(files)) if (t2KeywordsNeedRepair(info.top_keywords)) {
|
|
1078
|
+
const rawChunks = await t2LoadRawChunks(path.join(kbRoot, docName));
|
|
1079
|
+
if (rawChunks.length > 0) {
|
|
1080
|
+
const repaired = t2ComputeTfIdfKeywords(rawChunks);
|
|
1081
|
+
if (repaired.length > 0) {
|
|
1082
|
+
info.top_keywords = repaired;
|
|
1083
|
+
const types = {};
|
|
1084
|
+
for (const c of rawChunks) {
|
|
1085
|
+
const t = c.type || "text";
|
|
1086
|
+
types[t] = (types[t] || 0) + 1;
|
|
1087
|
+
}
|
|
1088
|
+
info.types = types;
|
|
1089
|
+
info.chunks_count = rawChunks.length;
|
|
1090
|
+
kgDirty = true;
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
if (kgDirty) {
|
|
1095
|
+
g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
|
|
1096
|
+
try {
|
|
1097
|
+
await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
|
|
1098
|
+
} catch {}
|
|
1099
|
+
}
|
|
1100
|
+
kbs.push({
|
|
1101
|
+
kb_id: e.name,
|
|
1102
|
+
version: g.version || "1.0",
|
|
1103
|
+
updated_at: g.updated_at || "",
|
|
1104
|
+
stats: g.stats || {},
|
|
1105
|
+
files: g.files || {},
|
|
1106
|
+
edges: g.edges || []
|
|
1107
|
+
});
|
|
1108
|
+
} catch {
|
|
1109
|
+
const docs = await t2ListDocDirs(kbRoot);
|
|
1110
|
+
if (docs.length > 0) kbs.push({
|
|
1111
|
+
kb_id: e.name,
|
|
1112
|
+
version: "pending",
|
|
1113
|
+
files: Object.fromEntries(docs.map((d) => [d, {}])),
|
|
1114
|
+
edges: []
|
|
1115
|
+
});
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
if (kbs.length === 0) return textResult("未找到知识库。");
|
|
1119
|
+
return t2JsonResult({
|
|
1120
|
+
status: "ok",
|
|
1121
|
+
knowledge_bases: kbs
|
|
1122
|
+
});
|
|
1123
|
+
}
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
function createGetStructureTool(_params) {
|
|
1127
|
+
return {
|
|
1128
|
+
name: "knowhere_get_structure",
|
|
1129
|
+
label: "Knowhere Get Structure",
|
|
1130
|
+
description: "获取文档章节目录。先调 knowhere_get_map 确定 kbId 和文档名后,用此工具查看章节结构,然后用 knowhere_read_chunks 读取内容。",
|
|
1131
|
+
parameters: {
|
|
1132
|
+
type: "object",
|
|
1133
|
+
additionalProperties: false,
|
|
1134
|
+
properties: {
|
|
1135
|
+
kbId: {
|
|
1136
|
+
type: "string",
|
|
1137
|
+
description: "Knowledge base ID (from knowhere_get_map result)"
|
|
1138
|
+
},
|
|
1139
|
+
docName: {
|
|
1140
|
+
type: "string",
|
|
1141
|
+
description: "Document name (supports fuzzy match)"
|
|
1142
|
+
}
|
|
1143
|
+
},
|
|
1144
|
+
required: ["kbId", "docName"]
|
|
1145
|
+
},
|
|
1146
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1147
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1148
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1149
|
+
const docName = readString(paramsRecord.docName);
|
|
1150
|
+
if (!kbId || !docName) throw new Error("kbId and docName are required.");
|
|
1151
|
+
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1152
|
+
if (!docDir) return textResult(`文档 '${docName}' 在 kb=${kbId} 中不存在`);
|
|
1153
|
+
try {
|
|
1154
|
+
const h = JSON.parse(await fs.readFile(path.join(docDir, "hierarchy.json"), "utf-8"));
|
|
1155
|
+
return t2JsonResult({
|
|
1156
|
+
status: "ok",
|
|
1157
|
+
kb_id: kbId,
|
|
1158
|
+
doc_name: path.basename(docDir),
|
|
1159
|
+
hierarchy: h
|
|
1160
|
+
});
|
|
1161
|
+
} catch {
|
|
1162
|
+
const chunks = await t2LoadChunks(docDir);
|
|
1163
|
+
const paths = [...new Set(chunks.map((c) => c.path).filter(Boolean))].sort();
|
|
1164
|
+
return t2JsonResult({
|
|
1165
|
+
status: "ok",
|
|
1166
|
+
kb_id: kbId,
|
|
1167
|
+
doc_name: path.basename(docDir),
|
|
1168
|
+
hierarchy: null,
|
|
1169
|
+
chunk_paths: paths,
|
|
1170
|
+
hint: "无 hierarchy.json,已返回 chunk 路径列表"
|
|
1171
|
+
});
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
};
|
|
1175
|
+
}
|
|
1176
|
+
function createReadChunksTool(_params) {
|
|
1177
|
+
return {
|
|
1178
|
+
name: "knowhere_read_chunks",
|
|
1179
|
+
label: "Knowhere Read Chunks",
|
|
1180
|
+
description: "读取文档内容。先调 knowhere_get_structure 确定章节后,用此工具读取具体内容。可通过 sectionPath 过滤特定章节,减少 token 消耗。",
|
|
1181
|
+
parameters: {
|
|
1182
|
+
type: "object",
|
|
1183
|
+
additionalProperties: false,
|
|
1184
|
+
properties: {
|
|
1185
|
+
kbId: {
|
|
1186
|
+
type: "string",
|
|
1187
|
+
description: "Knowledge base ID"
|
|
1188
|
+
},
|
|
1189
|
+
docName: {
|
|
1190
|
+
type: "string",
|
|
1191
|
+
description: "Document name"
|
|
1192
|
+
},
|
|
1193
|
+
sectionPath: {
|
|
1194
|
+
type: "string",
|
|
1195
|
+
description: "Optional: section path prefix to filter (e.g. '一、工程概况')"
|
|
1196
|
+
},
|
|
1197
|
+
maxChunks: {
|
|
1198
|
+
type: "number",
|
|
1199
|
+
description: "Max chunks to return (default 50)"
|
|
1200
|
+
}
|
|
1201
|
+
},
|
|
1202
|
+
required: ["kbId", "docName"]
|
|
1203
|
+
},
|
|
1204
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1205
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1206
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1207
|
+
const docName = readString(paramsRecord.docName);
|
|
1208
|
+
const sectionPath = readString(paramsRecord.sectionPath);
|
|
1209
|
+
const maxChunks = readNumber(paramsRecord.maxChunks, 50);
|
|
1210
|
+
if (!kbId || !docName) throw new Error("kbId and docName are required.");
|
|
1211
|
+
const docDir = await t2FindDocDir(path.join(T2_KNOWHERE_HOME, kbId), docName);
|
|
1212
|
+
if (!docDir) return textResult(`文档 '${docName}' 不存在`);
|
|
1213
|
+
let chunks = await t2LoadChunks(docDir);
|
|
1214
|
+
if (sectionPath) chunks = chunks.filter((c) => c.path.includes(sectionPath));
|
|
1215
|
+
const total = chunks.length;
|
|
1216
|
+
const limit = maxChunks || 50;
|
|
1217
|
+
chunks = chunks.slice(0, limit);
|
|
1218
|
+
try {
|
|
1219
|
+
const kgPath = path.join(T2_KNOWHERE_HOME, kbId, "knowledge_graph.json");
|
|
1220
|
+
const g = JSON.parse(await fs.readFile(kgPath, "utf-8"));
|
|
1221
|
+
const dn = path.basename(docDir);
|
|
1222
|
+
if (g.files?.[dn]) {
|
|
1223
|
+
g.files[dn].hit_count = (g.files[dn].hit_count || 0) + 1;
|
|
1224
|
+
g.files[dn].last_hit = (/* @__PURE__ */ new Date()).toISOString();
|
|
1225
|
+
g.updated_at = (/* @__PURE__ */ new Date()).toISOString();
|
|
1226
|
+
await fs.writeFile(kgPath, JSON.stringify(g, null, 2), "utf-8");
|
|
1227
|
+
}
|
|
1228
|
+
} catch {}
|
|
1229
|
+
return t2JsonResult({
|
|
1230
|
+
status: "ok",
|
|
1231
|
+
kb_id: kbId,
|
|
1232
|
+
doc_name: path.basename(docDir),
|
|
1233
|
+
section_path: sectionPath || null,
|
|
1234
|
+
total_chunks: total,
|
|
1235
|
+
returned: chunks.length,
|
|
1236
|
+
truncated: total > limit,
|
|
1237
|
+
chunks
|
|
1238
|
+
});
|
|
1239
|
+
}
|
|
1240
|
+
};
|
|
1241
|
+
}
|
|
1242
|
+
function createDiscoverFilesTool(_params) {
|
|
1243
|
+
return {
|
|
1244
|
+
name: "knowhere_discover_files",
|
|
1245
|
+
label: "Knowhere Discover Files",
|
|
1246
|
+
description: "在所有知识库文档中搜索关键词,返回命中文件和次数。用于和 knowhere_get_map 做并集,避免遗漏相关文件。只返回文件名,不返回内容。",
|
|
1247
|
+
parameters: {
|
|
1248
|
+
type: "object",
|
|
1249
|
+
additionalProperties: false,
|
|
1250
|
+
properties: {
|
|
1251
|
+
query: {
|
|
1252
|
+
type: "string",
|
|
1253
|
+
description: "Search keywords"
|
|
1254
|
+
},
|
|
1255
|
+
kbId: {
|
|
1256
|
+
type: "string",
|
|
1257
|
+
description: "Optional: limit to specific KB"
|
|
1258
|
+
}
|
|
1259
|
+
},
|
|
1260
|
+
required: ["query"]
|
|
1261
|
+
},
|
|
1262
|
+
execute: async (_toolCallId, rawParams) => {
|
|
1263
|
+
const paramsRecord = isRecord(rawParams) ? rawParams : {};
|
|
1264
|
+
const query = readString(paramsRecord.query);
|
|
1265
|
+
const kbId = readString(paramsRecord.kbId);
|
|
1266
|
+
if (!query) throw new Error("query is required.");
|
|
1267
|
+
const terms = query.split(/[\s,;,;。!?、\-/]+/).filter((t) => t.length > 1);
|
|
1268
|
+
if (terms.length === 0) return textResult("查询词为空");
|
|
1269
|
+
try {
|
|
1270
|
+
await fs.access(T2_KNOWHERE_HOME);
|
|
1271
|
+
} catch {
|
|
1272
|
+
return textResult("未找到知识库。");
|
|
1273
|
+
}
|
|
1274
|
+
const results = [];
|
|
1275
|
+
const kbEntries = await fs.readdir(T2_KNOWHERE_HOME, { withFileTypes: true });
|
|
1276
|
+
for (const kbE of kbEntries) {
|
|
1277
|
+
if (!kbE.isDirectory()) continue;
|
|
1278
|
+
if (kbId && kbE.name !== kbId) continue;
|
|
1279
|
+
let docEntries;
|
|
1280
|
+
try {
|
|
1281
|
+
docEntries = await fs.readdir(path.join(T2_KNOWHERE_HOME, String(kbE.name)), { withFileTypes: true });
|
|
1282
|
+
} catch {
|
|
1283
|
+
continue;
|
|
1284
|
+
}
|
|
1285
|
+
for (const docE of docEntries) {
|
|
1286
|
+
if (!docE.isDirectory()) continue;
|
|
1287
|
+
const chunks = await t2LoadChunks(path.join(T2_KNOWHERE_HOME, String(kbE.name), String(docE.name)));
|
|
1288
|
+
let hits = 0;
|
|
1289
|
+
for (const c of chunks) {
|
|
1290
|
+
const text = `${c.content} ${c.summary}`;
|
|
1291
|
+
for (const t of terms) if (text.includes(t)) hits++;
|
|
1292
|
+
}
|
|
1293
|
+
if (hits > 0) results.push({
|
|
1294
|
+
kb_id: String(kbE.name),
|
|
1295
|
+
doc_name: String(docE.name),
|
|
1296
|
+
hit_count: hits
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
results.sort((a, b) => b.hit_count - a.hit_count);
|
|
1301
|
+
return t2JsonResult({
|
|
1302
|
+
status: "ok",
|
|
1303
|
+
query,
|
|
1304
|
+
terms,
|
|
1305
|
+
discovered_files: results
|
|
1306
|
+
});
|
|
1307
|
+
}
|
|
1308
|
+
};
|
|
1309
|
+
}
|
|
1720
1310
|
function createKnowhereToolFactory(params) {
|
|
1721
1311
|
return (ctx) => [
|
|
1722
1312
|
createIngestTool({
|
|
@@ -1742,36 +1332,7 @@ function createKnowhereToolFactory(params) {
|
|
|
1742
1332
|
api: params.api,
|
|
1743
1333
|
config: params.config,
|
|
1744
1334
|
store: params.store,
|
|
1745
|
-
|
|
1746
|
-
}),
|
|
1747
|
-
createGrepTool({
|
|
1748
|
-
api: params.api,
|
|
1749
|
-
store: params.store,
|
|
1750
|
-
ctx
|
|
1751
|
-
}),
|
|
1752
|
-
createReadResultFileTool({
|
|
1753
|
-
api: params.api,
|
|
1754
|
-
store: params.store,
|
|
1755
|
-
ctx
|
|
1756
|
-
}),
|
|
1757
|
-
createPreviewDocumentTool({
|
|
1758
|
-
api: params.api,
|
|
1759
|
-
store: params.store,
|
|
1760
|
-
ctx
|
|
1761
|
-
}),
|
|
1762
|
-
createListTool({
|
|
1763
|
-
api: params.api,
|
|
1764
|
-
store: params.store,
|
|
1765
|
-
ctx
|
|
1766
|
-
}),
|
|
1767
|
-
createRemoveTool({
|
|
1768
|
-
api: params.api,
|
|
1769
|
-
store: params.store,
|
|
1770
|
-
ctx
|
|
1771
|
-
}),
|
|
1772
|
-
createClearScopeTool({
|
|
1773
|
-
api: params.api,
|
|
1774
|
-
store: params.store,
|
|
1335
|
+
kgService: params.kgService,
|
|
1775
1336
|
ctx
|
|
1776
1337
|
}),
|
|
1777
1338
|
createSetApiKeyTool({
|
|
@@ -1787,7 +1348,11 @@ function createKnowhereToolFactory(params) {
|
|
|
1787
1348
|
api: params.api,
|
|
1788
1349
|
kgService: params.kgService,
|
|
1789
1350
|
ctx
|
|
1790
|
-
})
|
|
1351
|
+
}),
|
|
1352
|
+
createGetMapTool({ api: params.api }),
|
|
1353
|
+
createGetStructureTool({ api: params.api }),
|
|
1354
|
+
createReadChunksTool({ api: params.api }),
|
|
1355
|
+
createDiscoverFilesTool({ api: params.api })
|
|
1791
1356
|
];
|
|
1792
1357
|
}
|
|
1793
1358
|
//#endregion
|