@ontos-ai/knowhere-claw 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tools.js CHANGED
@@ -1,29 +1,28 @@
1
1
  import { isRecord } from "./types.js";
2
2
  import { assertKnowhereApiKey, formatPaymentRequiredMessage, isPaymentRequiredError, persistApiKey } from "./config.js";
3
- import { normalizeForGrep, normalizeWhitespace, sanitizeStringArray, slugify } from "./text.js";
3
+ import { resolveStoredKnowhereArtifactPath } from "./parser.js";
4
+ import { sanitizeStringArray } from "./text.js";
4
5
  import { formatErrorMessage } from "./error-message.js";
5
6
  import { KnowhereClient } from "./client.js";
6
- import { deliverChannelMessage } from "./channel-delivery.js";
7
7
  import { sendTrackerProgress } from "./tracker-progress.js";
8
8
  import fs from "node:fs/promises";
9
9
  import path from "node:path";
10
- import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/core";
11
10
  //#region src/tools.ts
11
+ const TERMINAL_JOB_STATUSES = new Set([
12
+ "cancelled",
13
+ "canceled",
14
+ "done",
15
+ "error",
16
+ "failed"
17
+ ]);
12
18
  async function buildKnowledgeGraphAsync(params) {
13
19
  const docDir = path.join(params.scope.documentsDir, params.docId);
14
- const metadataPath = path.join(docDir, "metadata.json");
15
- let payload = null;
16
- try {
17
- payload = await fs.readFile(metadataPath, "utf-8");
18
- } catch {
19
- payload = null;
20
- }
21
- if (!payload) {
20
+ const payloadData = await params.store.readDocumentMetadata(params.scope, params.docId);
21
+ if (!payloadData) {
22
22
  params.api.logger.warn(`knowhere: cannot build KG, metadata not found for docId=${params.docId}`);
23
23
  return;
24
24
  }
25
- const payloadData = JSON.parse(payload);
26
- const fullMarkdownPath = path.join(docDir, "full_markdown.txt");
25
+ const fullMarkdownPath = await resolveStoredKnowhereArtifactPath(docDir, "full_markdown.txt");
27
26
  let fullMarkdown = "";
28
27
  try {
29
28
  fullMarkdown = await fs.readFile(fullMarkdownPath, "utf-8");
@@ -37,8 +36,8 @@ async function buildKnowledgeGraphAsync(params) {
37
36
  sourcePath: docDir,
38
37
  keywords,
39
38
  metadata: {
40
- title: payloadData.document?.title || "Untitled",
41
- sourceLabel: payloadData.document?.sourceLabel || "Unknown",
39
+ title: payloadData.document.title || "Untitled",
40
+ sourceLabel: payloadData.document.sourceLabel || "Unknown",
42
41
  checksum: params.documentPayload.downloadedResult.rawZipSha1,
43
42
  ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
44
43
  }
@@ -48,7 +47,6 @@ async function buildKnowledgeGraphAsync(params) {
48
47
  });
49
48
  params.api.logger.info(`knowhere: knowledge graph build completed kbId=${params.kbId} docId=${params.docId}`);
50
49
  }
51
- const PREVIEW_SUMMARY_MAX_CHARS = 120;
52
50
  const INGEST_TRACKER_LANGUAGES = new Set(["ch", "en"]);
53
51
  function textResult(text) {
54
52
  return {
@@ -69,13 +67,6 @@ function deriveStoredDocumentDisplayName(document) {
69
67
  function deriveStoredDocumentFileLabel(document, manifestSourceFileName) {
70
68
  return document.originalFileName || document.fileName || manifestSourceFileName || "unknown";
71
69
  }
72
- function formatStoredDocumentNotFound(docId, scopeLabel) {
73
- return [
74
- "Stored document not found.",
75
- `Document ID: ${docId}`,
76
- `Scope: ${scopeLabel}`
77
- ].join("\n");
78
- }
79
70
  function buildStoredDocumentSummaryLines(params) {
80
71
  const lines = [`Stored document: "${params.document.title}" [${params.document.id}]`, `Scope: ${params.scopeLabel}`];
81
72
  if (params.includeSource) lines.push(`Source: ${params.document.sourceLabel}`);
@@ -95,6 +86,69 @@ function buildStoredDocumentSummaryLines(params) {
95
86
  if (params.includeUpdatedAt) lines.push(`Updated: ${params.document.updatedAt}`);
96
87
  return lines;
97
88
  }
89
+ function isTerminalJobStatus(status, hasError) {
90
+ return TERMINAL_JOB_STATUSES.has(status.trim().toLowerCase()) || hasError;
91
+ }
92
+ function startKnowledgeGraphBuild(params) {
93
+ if (!params.kgService.isEnabled()) return;
94
+ const kbId = params.kgService.resolveKbId(params.ctx);
95
+ if (!kbId) return;
96
+ params.api.logger.info(`knowhere: initiating knowledge graph build kbId=${kbId} docId=${params.document.id}`);
97
+ buildKnowledgeGraphAsync({
98
+ kgService: params.kgService,
99
+ kbId,
100
+ docId: params.document.id,
101
+ documentPayload: params.ingestResult,
102
+ scope: params.scope,
103
+ store: params.store,
104
+ ctx: params.ctx,
105
+ api: params.api,
106
+ channelRoute: params.channelRoute,
107
+ sessionKey: params.sessionKey
108
+ }).catch((kgError) => {
109
+ params.api.logger.error(`knowhere: knowledge graph build failed kbId=${kbId} docId=${params.document.id}: ${formatErrorMessage(kgError)}`);
110
+ });
111
+ }
112
+ async function persistIngestedDocument(params) {
113
+ const storedDocument = await params.store.saveDownloadedDocument(params.scope, {
114
+ sourceType: params.sourceType,
115
+ source: params.source,
116
+ fileName: params.fileName,
117
+ docId: params.docId,
118
+ title: params.title,
119
+ dataId: params.dataId,
120
+ tags: params.tags,
121
+ job: params.ingestResult.job,
122
+ jobResult: params.ingestResult.jobResult,
123
+ downloadedResult: params.ingestResult.downloadedResult
124
+ }, { overwrite: params.overwrite });
125
+ params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${params.scope.label} jobId=${params.ingestResult.job.job_id} docId=${storedDocument.id}`);
126
+ startKnowledgeGraphBuild({
127
+ api: params.api,
128
+ channelRoute: params.channelRoute,
129
+ ctx: params.ctx,
130
+ document: storedDocument,
131
+ ingestResult: params.ingestResult,
132
+ kgService: params.kgService,
133
+ scope: params.scope,
134
+ store: params.store,
135
+ sessionKey: params.sessionKey
136
+ });
137
+ return storedDocument;
138
+ }
139
+ function formatCompletedIngestResult(params) {
140
+ return [
141
+ "Ingest complete.",
142
+ ...buildStoredDocumentSummaryLines({
143
+ document: params.document,
144
+ scopeLabel: params.scopeLabel,
145
+ includeJobId: true,
146
+ includeSource: true
147
+ }),
148
+ `Source type: ${params.sourceType}`,
149
+ "Next: use knowhere_kg_query to search the knowledge graph for related content."
150
+ ].join("\n");
151
+ }
98
152
  function readString(value) {
99
153
  return typeof value === "string" && value.trim() ? value.trim() : void 0;
100
154
  }
@@ -216,22 +270,6 @@ function normalizeParsingParams(rawParsing) {
216
270
  }
217
271
  return Object.keys(result).length > 0 ? result : void 0;
218
272
  }
219
- function formatDocumentList(documents, scopeLabel) {
220
- if (documents.length === 0) return `No stored documents in scope ${scopeLabel}.`;
221
- return documents.map((document, index) => {
222
- const displayTitle = deriveStoredDocumentDisplayName(document);
223
- const lines = [
224
- `${index + 1}. ${displayTitle} [${document.id}]`,
225
- `Source: ${document.sourceLabel}`,
226
- `Chunks: ${document.chunkCount}`,
227
- `Updated: ${document.updatedAt || document.ingestedAt}`
228
- ];
229
- if (document.title && document.title !== displayTitle) lines.push(`Title: ${document.title}`);
230
- if (document.tags.length > 0) lines.push(`Tags: ${document.tags.join(", ")}`);
231
- if (document.resultUrl) lines.push(`Result URL: ${document.resultUrl}`);
232
- return lines.join("\n");
233
- }).join("\n\n");
234
- }
235
273
  function formatJobList(params) {
236
274
  if (params.jobList.jobs.length === 0) return `No Knowhere jobs on page ${params.jobList.page}.`;
237
275
  const lines = [`Knowhere jobs ${params.jobList.page}/${params.jobList.totalPages || 1} (${params.jobList.total} total).`, ""];
@@ -266,239 +304,6 @@ function buildHistoryJobSourceLabel(jobId, fileName) {
266
304
  function mergeTags(tags, extras) {
267
305
  return Array.from(new Set([...tags, ...extras]));
268
306
  }
269
- function formatScopeClearResult(documents, scopeLabel) {
270
- if (documents.length === 0) return `Scope ${scopeLabel} is already empty.`;
271
- const lines = [`Removed ${documents.length} stored document${documents.length === 1 ? "" : "s"} from scope ${scopeLabel}.`];
272
- for (const [index, document] of documents.entries()) lines.push(`${index + 1}. ${deriveStoredDocumentDisplayName(document)} [${document.id}]`);
273
- return lines.join("\n");
274
- }
275
- function readResultFileReadMode(value) {
276
- return value === "json" || value === "csv" || value === "text" ? value : "text";
277
- }
278
- function normalizeResultFilePath(filePath) {
279
- const value = normalizeWhitespace(filePath);
280
- if (!value) return;
281
- return path.posix.normalize(value.replace(/\\/g, "/")).replace(/^\/+/, "");
282
- }
283
- function buildChunkOrderIndex(browseIndex) {
284
- return new Map(browseIndex.chunkOrder.map((chunkId, index) => [chunkId, index]));
285
- }
286
- function sortChunksByBrowseOrder(chunks, browseIndex) {
287
- const orderIndex = buildChunkOrderIndex(browseIndex);
288
- return [...chunks].sort((left, right) => {
289
- return (orderIndex.get(left.chunkId) ?? Number.MAX_SAFE_INTEGER) - (orderIndex.get(right.chunkId) ?? Number.MAX_SAFE_INTEGER) || left.chunkId.localeCompare(right.chunkId);
290
- });
291
- }
292
- function findResultFile(browseIndex, relativePath) {
293
- return browseIndex.resultFiles.find((entry) => entry.relativePath === relativePath);
294
- }
295
- function isTextReadableResultFile(fileRecord) {
296
- return fileRecord.kind !== "image";
297
- }
298
- const IMAGE_EXTENSION_MIME_TYPES = {
299
- ".png": "image/png",
300
- ".jpg": "image/jpeg",
301
- ".jpeg": "image/jpeg",
302
- ".gif": "image/gif",
303
- ".webp": "image/webp",
304
- ".svg": "image/svg+xml",
305
- ".bmp": "image/bmp",
306
- ".tiff": "image/tiff",
307
- ".tif": "image/tiff"
308
- };
309
- function inferImageMimeType(filePath) {
310
- return IMAGE_EXTENSION_MIME_TYPES[path.extname(filePath).toLowerCase()] || "image/png";
311
- }
312
- async function buildImageToolResult(params) {
313
- const mimeType = inferImageMimeType(params.absolutePath);
314
- const stagedImage = await stageImageResultFileForDelivery({
315
- absolutePath: params.absolutePath,
316
- documentTitle: params.documentTitle,
317
- relativePath: params.filePath,
318
- workspaceDir: params.workspaceDir
319
- });
320
- const stagedImagePath = stagedImage.stagedPath;
321
- const fileName = path.basename(stagedImagePath);
322
- const caption = `${params.documentTitle} - ${params.filePath}`;
323
- const directDelivery = await deliverChannelMessage({
324
- api: params.api,
325
- operationLabel: "read result image",
326
- context: params.context,
327
- sessionKey: params.sessionKey,
328
- channelRoute: params.channelRoute,
329
- text: caption,
330
- mediaUrl: stagedImagePath,
331
- mediaLocalRoots: [path.dirname(stagedImagePath)]
332
- });
333
- if (directDelivery.delivered) {
334
- const payload = {
335
- scope: params.scopeLabel,
336
- docId: params.docId,
337
- documentTitle: params.documentTitle,
338
- file: params.file,
339
- mode: "image_sent",
340
- data: {
341
- mimeType,
342
- sourceRelativePath: params.filePath,
343
- stagedPath: stagedImagePath,
344
- fileName,
345
- caption,
346
- note: "Image already sent to the current channel by the plugin. Do not call read on stagedPath. Do not call the message tool or attach this file again. If you reply, send only a brief confirmation.",
347
- delivery: {
348
- method: "direct_runtime",
349
- surface: directDelivery.surface,
350
- target: directDelivery.to,
351
- accountId: directDelivery.accountId
352
- }
353
- }
354
- };
355
- return {
356
- content: [{
357
- type: "text",
358
- text: `${JSON.stringify(payload, null, 2)}\n`
359
- }],
360
- details: payload
361
- };
362
- }
363
- const sendWithMessageTool = {
364
- action: "send",
365
- path: stagedImagePath,
366
- filePath: stagedImagePath,
367
- filename: fileName,
368
- caption
369
- };
370
- const replyFallback = stagedImage.workspaceRelativePath ? {
371
- instructions: "If the message tool is unavailable, send your normal user-visible reply and include this exact line on its own line to attach the image.",
372
- workspaceRelativePath: stagedImage.workspaceRelativePath,
373
- replyWithMediaDirective: `MEDIA:${stagedImage.workspaceRelativePath}`
374
- } : void 0;
375
- const note = replyFallback ? "Image bytes are not inlined. Do not call read on stagedPath. If the user wants to see this image, use the message tool with sendWithMessageTool. If the message tool is unavailable, send your user-visible reply normally and include replyFallback.replyWithMediaDirective on its own line." : "Image bytes are not inlined. Do not call read on stagedPath. If the user wants to see this image, call the message tool with sendWithMessageTool.";
376
- const nextActionInstructions = replyFallback ? "Do not call read on stagedPath. Call the message tool with sendWithMessageTool to attach this image. If the message tool is unavailable, use replyFallback.replyWithMediaDirective in your normal reply instead." : "Do not call read on stagedPath. Call the message tool with sendWithMessageTool to attach this image.";
377
- const payload = {
378
- scope: params.scopeLabel,
379
- docId: params.docId,
380
- documentTitle: params.documentTitle,
381
- file: params.file,
382
- mode: "image_attachment",
383
- data: {
384
- mimeType,
385
- sourceRelativePath: params.filePath,
386
- stagedPath: stagedImagePath,
387
- fileName,
388
- caption,
389
- note,
390
- nextAction: {
391
- tool: "message",
392
- instructions: nextActionInstructions,
393
- args: sendWithMessageTool
394
- },
395
- sendWithMessageTool,
396
- ...replyFallback ? { replyFallback } : {}
397
- }
398
- };
399
- return {
400
- content: [{
401
- type: "text",
402
- text: `${JSON.stringify(payload, null, 2)}\n`
403
- }],
404
- details: payload
405
- };
406
- }
407
- function normalizeWorkspaceDir(workspaceDir) {
408
- const trimmed = readString(workspaceDir);
409
- return trimmed ? path.resolve(trimmed) : void 0;
410
- }
411
- function toWorkspaceRelativeMediaPath(params) {
412
- const relativePath = path.relative(params.workspaceDir, params.stagedPath);
413
- if (!relativePath || relativePath.startsWith("..") || path.isAbsolute(relativePath)) return;
414
- const normalizedRelativePath = relativePath.split(path.sep).join("/");
415
- return normalizedRelativePath.startsWith("./") || normalizedRelativePath.startsWith("../") ? normalizedRelativePath : `./${normalizedRelativePath}`;
416
- }
417
- async function stageImageResultFileForDelivery(params) {
418
- const extension = path.extname(params.relativePath) || path.extname(params.absolutePath) || ".png";
419
- const imageBaseName = path.basename(params.relativePath, extension) || "image";
420
- const workspaceDir = normalizeWorkspaceDir(params.workspaceDir);
421
- let stagedDir;
422
- if (workspaceDir) {
423
- const workspaceStageRoot = path.join(workspaceDir, ".openclaw");
424
- await fs.mkdir(workspaceStageRoot, { recursive: true });
425
- stagedDir = await fs.mkdtemp(path.join(workspaceStageRoot, "knowhere-read-result-file-"));
426
- } else stagedDir = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "knowhere-read-result-file-"));
427
- const stagedFileName = `${slugify(`${params.documentTitle}-${imageBaseName}`, "knowhere-image")}${extension.toLowerCase()}`;
428
- const stagedPath = path.join(stagedDir, stagedFileName);
429
- await fs.copyFile(params.absolutePath, stagedPath);
430
- return {
431
- stagedPath,
432
- ...workspaceDir ? { workspaceRelativePath: toWorkspaceRelativeMediaPath({
433
- workspaceDir,
434
- stagedPath
435
- }) } : {}
436
- };
437
- }
438
- function stripUtf8Bom(text) {
439
- return text.charCodeAt(0) === 65279 ? text.slice(1) : text;
440
- }
441
- function buildTextFilePayload(text, maxChars) {
442
- return {
443
- content: truncatePreview(text, maxChars),
444
- lineCount: text === "" ? 0 : text.split(/\r\n|\n|\r/).length
445
- };
446
- }
447
- function buildCsvFilePayload(text, maxChars) {
448
- const normalized = stripUtf8Bom(text);
449
- const [headerLine = ""] = normalized.split(/\r\n|\n|\r/, 1);
450
- return {
451
- content: truncatePreview(normalized, maxChars),
452
- header: headerLine.trim() || null,
453
- lineCount: normalized === "" ? 0 : normalized.split(/\r\n|\n|\r/).length
454
- };
455
- }
456
- function truncatePreview(value, maxChars) {
457
- if (typeof value !== "string") return "";
458
- const normalized = value.replace(/\r\n/g, "\n").trim();
459
- if (!normalized) return "";
460
- if (normalized.length <= maxChars) return normalized;
461
- return `${normalized.slice(0, Math.max(0, maxChars - 3)).trimEnd()}...`;
462
- }
463
- function truncateJsonValue(value, maxStringChars) {
464
- if (typeof value === "string") {
465
- if (value.length <= maxStringChars) return {
466
- value,
467
- truncated: false
468
- };
469
- return {
470
- value: `${value.slice(0, Math.max(0, maxStringChars - 1))}…`,
471
- truncated: true
472
- };
473
- }
474
- if (Array.isArray(value)) {
475
- let truncated = false;
476
- return {
477
- value: value.map((entry) => {
478
- const result = truncateJsonValue(entry, maxStringChars);
479
- truncated = truncated || result.truncated;
480
- return result.value;
481
- }),
482
- truncated
483
- };
484
- }
485
- if (isRecord(value)) {
486
- let truncated = false;
487
- const entries = Object.entries(value).map(([key, entry]) => {
488
- const result = truncateJsonValue(entry, maxStringChars);
489
- truncated = truncated || result.truncated;
490
- return [key, result.value];
491
- });
492
- return {
493
- value: Object.fromEntries(entries),
494
- truncated
495
- };
496
- }
497
- return {
498
- value,
499
- truncated: false
500
- };
501
- }
502
307
  function formatJsonToolResult(value) {
503
308
  return textResult(`${JSON.stringify(value, null, 2)}\n`);
504
309
  }
@@ -531,7 +336,7 @@ function createIngestTool(params) {
531
336
  return {
532
337
  name: "knowhere_ingest_document",
533
338
  label: "Knowhere Ingest",
534
- description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. Before calling this for a document that might already be stored in the current scope, use knowhere_list_documents and reuse the existing stored document when Source, File, or Title clearly match unless the user explicitly asks for a fresh parse or overwrite. When the user provides a URL to a document (PDF link, web page, etc.), pass it as the url parameter Knowhere fetches it directly, no local download needed. Returns immediately with a job ID while parsing continues in the background. Use knowhere_get_job_status only when the current turn needs the parsed result. Use lang to control the language of any user-facing background status update (`en` by default, `ch` for Chinese). Provide either filePath or url, not both.",
339
+ description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. When the user provides a URL to a document (PDF link, web page, etc.), pass it as the url parameter Knowhere fetches it directly, no local download needed. Knowhere must be the only parser for supported files. If Knowhere returns an error, surface that exact error to the user and do not fall back to other parsing methods or fabricate a preview. By default blockUntilComplete is false, so this tool is fire-and-forget and returns a job ID while parsing continues in the background. Set blockUntilComplete to true only when the current turn explicitly needs the parsed result before continuing. Use lang to control the language of any user-facing background status update (`en` by default, `ch` for Chinese). Provide either filePath or url, not both.",
535
340
  parameters: {
536
341
  type: "object",
537
342
  additionalProperties: false,
@@ -573,6 +378,10 @@ function createIngestTool(params) {
573
378
  type: "boolean",
574
379
  description: "Replace an existing stored document with the same docId."
575
380
  },
381
+ blockUntilComplete: {
382
+ type: "boolean",
383
+ description: "When true, wait for Knowhere to finish parsing, store the result, and return a ready-to-use stored-document summary. Defaults to false, which returns immediately with a job ID and continues parsing in the background."
384
+ },
576
385
  lang: {
577
386
  type: "string",
578
387
  description: "Language for any user-facing background status update sent after parsing completes or fails. Supports en and ch; unsupported values fall back to en."
@@ -628,13 +437,14 @@ function createIngestTool(params) {
628
437
  filePath: resolvedFilePath,
629
438
  url: urlParam
630
439
  });
440
+ const blockUntilComplete = readBoolean(paramsRecord.blockUntilComplete, false);
631
441
  const tags = sanitizeStringArray(paramsRecord.tags);
632
442
  const overwrite = readBoolean(paramsRecord.overwrite, false);
633
443
  const trackerLanguage = readIngestTrackerLanguage(paramsRecord.lang);
634
444
  const sessionKey = params.ctx.sessionKey;
635
445
  const sourceType = urlParam ? "url" : "file";
636
446
  const channelRoute = await params.store.resolveChannelRoute({ sessionKey });
637
- params.api.logger.info(`knowhere: knowhere_ingest_document starting background ingest scope=${scope.label} sourceType=${sourceType} label=${JSON.stringify(progressLabel)} overwrite=${overwrite} docId=${docId ?? "auto"} dataId=${dataId ?? "none"} lang=${trackerLanguage} routeState=${channelRoute ? "resolved" : "missing"} routeAccountId=${channelRoute?.accountId ?? "none"}`);
447
+ params.api.logger.info(`knowhere: knowhere_ingest_document starting ingest scope=${scope.label} sourceType=${sourceType} label=${JSON.stringify(progressLabel)} mode=${blockUntilComplete ? "blocking" : "background"} overwrite=${overwrite} docId=${docId ?? "auto"} dataId=${dataId ?? "none"} lang=${trackerLanguage} routeState=${channelRoute ? "resolved" : "missing"} routeAccountId=${channelRoute?.accountId ?? "none"}`);
638
448
  let resolveJobCreated;
639
449
  const jobCreatedPromise = new Promise((resolve) => {
640
450
  resolveJobCreated = resolve;
@@ -653,40 +463,52 @@ function createIngestTool(params) {
653
463
  resolveJobCreated(job);
654
464
  }
655
465
  });
466
+ if (blockUntilComplete) {
467
+ const ingestResult = await ingestPromise.catch(rethrowWithPaymentHint);
468
+ params.api.logger.info(`knowhere: knowhere_ingest_document download completed scope=${scope.label} jobId=${ingestResult.job.job_id}; storing extracted result`);
469
+ return textResult(formatCompletedIngestResult({
470
+ document: await persistIngestedDocument({
471
+ api: params.api,
472
+ channelRoute,
473
+ ctx: params.ctx,
474
+ dataId,
475
+ docId,
476
+ fileName,
477
+ ingestResult,
478
+ kgService: params.kgService,
479
+ overwrite,
480
+ scope,
481
+ sessionKey,
482
+ source: urlParam || resolvedFilePath || "",
483
+ sourceType,
484
+ store: params.store,
485
+ tags,
486
+ title
487
+ }),
488
+ scopeLabel: scope.label,
489
+ sourceType
490
+ }));
491
+ }
656
492
  ingestPromise.then(async (ingestResult) => {
657
493
  params.api.logger.info(`knowhere: knowhere_ingest_document download completed scope=${scope.label} jobId=${ingestResult.job.job_id}; storing extracted result`);
658
- const storedDocument = await params.store.saveDownloadedDocument(scope, {
659
- sourceType,
660
- source: urlParam || resolvedFilePath || "",
661
- fileName,
662
- docId,
663
- title,
494
+ const storedDocument = await persistIngestedDocument({
495
+ api: params.api,
496
+ channelRoute,
497
+ ctx: params.ctx,
664
498
  dataId,
499
+ docId,
500
+ fileName,
501
+ ingestResult,
502
+ kgService: params.kgService,
503
+ overwrite,
504
+ scope,
505
+ sessionKey,
506
+ source: urlParam || resolvedFilePath || "",
507
+ sourceType,
508
+ store: params.store,
665
509
  tags,
666
- job: ingestResult.job,
667
- jobResult: ingestResult.jobResult,
668
- downloadedResult: ingestResult.downloadedResult
669
- }, { overwrite });
670
- params.api.logger.info(`knowhere: knowhere_ingest_document stored document scope=${scope.label} jobId=${ingestResult.job.job_id} docId=${storedDocument.id} label=${JSON.stringify(progressLabel)}`);
671
- if (params.kgService.isEnabled()) {
672
- const kbId = params.kgService.resolveKbId(params.ctx);
673
- if (kbId) {
674
- params.api.logger.info(`knowhere: initiating knowledge graph build kbId=${kbId} docId=${storedDocument.id}`);
675
- buildKnowledgeGraphAsync({
676
- kgService: params.kgService,
677
- kbId,
678
- docId: storedDocument.id,
679
- documentPayload: ingestResult,
680
- scope,
681
- ctx: params.ctx,
682
- api: params.api,
683
- channelRoute,
684
- sessionKey
685
- }).catch((kgError) => {
686
- params.api.logger.error(`knowhere: knowledge graph build failed kbId=${kbId} docId=${storedDocument.id}: ${formatErrorMessage(kgError)}`);
687
- });
688
- }
689
- }
510
+ title
511
+ });
690
512
  await notifyBackgroundIngestOutcome({
691
513
  api: params.api,
692
514
  context: params.ctx,
@@ -730,14 +552,14 @@ function createIngestTool(params) {
730
552
  })]);
731
553
  if (typeof createdJob === "symbol") {
732
554
  params.api.logger.warn(`knowhere: knowhere_ingest_document ingest completed before job-created callback scope=${scope.label} label=${JSON.stringify(progressLabel)}`);
733
- return textResult("Ingest completed synchronously. Use knowhere_list_documents to find the stored document.");
555
+ return textResult("Ingest completed synchronously. The document is now stored and indexed.");
734
556
  }
735
557
  return textResult([
736
558
  "Ingest job created. Parsing in background.",
737
559
  `Job ID: ${createdJob.job_id}`,
738
560
  `File: ${progressLabel}`,
739
561
  `Scope: ${scope.label}`,
740
- "Use knowhere_get_job_status only if this turn needs the parsed result."
562
+ "This call does not include parsed content yet."
741
563
  ].join("\n"));
742
564
  }
743
565
  };
@@ -746,7 +568,7 @@ function createJobStatusTool(params) {
746
568
  return {
747
569
  name: "knowhere_get_job_status",
748
570
  label: "Knowhere Job Status",
749
- description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job.",
571
+ description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job. Do not assume a running job is stuck just because progress is unchanged or slow. Only treat the job as failed or stuck when Knowhere returns an explicit failure status or error code.",
750
572
  parameters: {
751
573
  type: "object",
752
574
  additionalProperties: false,
@@ -798,6 +620,10 @@ function createJobStatusTool(params) {
798
620
  lines.push(`Result URL: ${job.result_url}`);
799
621
  if (job.result_url_expires_at) lines.push(`Result URL expires: ${job.result_url_expires_at}`);
800
622
  }
623
+ const hasExplicitError = Boolean(job.error?.code || job.error?.message);
624
+ if (job.status.trim().toLowerCase() === "done") lines.push("Interpretation: completed.");
625
+ else if (isTerminalJobStatus(job.status, hasExplicitError)) lines.push("Interpretation: Knowhere reported an explicit failure. Surface this error to the user and do not fall back to other parsing methods.");
626
+ else lines.push("Interpretation: still running. Do not describe this job as stuck or failed unless a later Knowhere API response returns an explicit failure status or error code.");
801
627
  if (matchingDocuments.length === 0) lines.push("Stored docs in scope: none");
802
628
  else {
803
629
  lines.push("Stored docs in scope:");
@@ -898,7 +724,7 @@ function createImportCompletedJobTool(params) {
898
724
  return {
899
725
  name: "knowhere_import_completed_job",
900
726
  label: "Knowhere Import Completed Job",
901
- description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally, making it available for knowhere_preview_document, knowhere_grep, and knowhere_read_result_file. Use knowhere_list_jobs to find available completed jobs.",
727
+ description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally. Use knowhere_list_jobs to find available completed jobs.",
902
728
  parameters: {
903
729
  type: "object",
904
730
  additionalProperties: false,
@@ -968,551 +794,11 @@ function createImportCompletedJobTool(params) {
968
794
  }),
969
795
  `Imported from job: ${importResult.jobResult.job_id}`,
970
796
  `Source type: ${importResult.jobResult.source_type}`,
971
- "Next: read manifest.json with knowhere_read_result_file or preview the document with knowhere_preview_document."
797
+ "Document imported successfully. Use knowhere_kg_query to search its content."
972
798
  ].join("\n"));
973
799
  }
974
800
  };
975
801
  }
976
- const GREP_VALID_TARGETS = new Set([
977
- "chunk.content",
978
- "chunk.summary",
979
- "chunk.keywords",
980
- "chunk.path",
981
- "chunk.type",
982
- "chunk.chunkId"
983
- ]);
984
- const GREP_TEXT_TARGETS = [
985
- "chunk.content",
986
- "chunk.summary",
987
- "chunk.keywords",
988
- "chunk.path"
989
- ];
990
- function parseGrepConditions(raw) {
991
- if (!Array.isArray(raw)) return [];
992
- return raw.filter(isRecord).map((entry) => {
993
- const target = readString(entry.target);
994
- if (target && !GREP_VALID_TARGETS.has(target)) throw new Error(`Invalid grep target: "${target}". Valid targets: ${[...GREP_VALID_TARGETS].join(", ")}`);
995
- const pattern = typeof entry.pattern === "string" ? entry.pattern : "";
996
- return {
997
- ...target ? { target } : {},
998
- pattern,
999
- ...typeof entry.regex === "boolean" ? { regex: entry.regex } : {},
1000
- ...typeof entry.caseSensitive === "boolean" ? { caseSensitive: entry.caseSensitive } : {}
1001
- };
1002
- });
1003
- }
1004
- function resolveGrepFieldValue(chunk, target) {
1005
- switch (target) {
1006
- case "chunk.content": return chunk.content || "";
1007
- case "chunk.summary": return chunk.summary || "";
1008
- case "chunk.keywords": return chunk.keywords || [];
1009
- case "chunk.path": return chunk.path || "";
1010
- case "chunk.type": return chunk.type;
1011
- case "chunk.chunkId": return chunk.chunkId;
1012
- default: return "";
1013
- }
1014
- }
1015
- function testGrepMatch(text, pattern, useRegex, caseSensitive) {
1016
- if (pattern === "") return true;
1017
- if (useRegex) {
1018
- const flags = caseSensitive ? "" : "i";
1019
- return new RegExp(pattern, flags).test(text);
1020
- }
1021
- if (caseSensitive) return text.includes(pattern);
1022
- return text.toLowerCase().includes(pattern.toLowerCase());
1023
- }
1024
- function testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive) {
1025
- const useRegex = condition.regex ?? outerRegex;
1026
- const caseSensitive = condition.caseSensitive ?? outerCaseSensitive;
1027
- const matchedTargets = [];
1028
- const targets = condition.target ? [condition.target] : GREP_TEXT_TARGETS;
1029
- for (const target of targets) {
1030
- const fieldValue = target === "chunk.type" || target === "chunk.chunkId" ? resolveGrepFieldValue(chunk, target) : normalizedFields.get(target) ?? resolveGrepFieldValue(chunk, target);
1031
- if (Array.isArray(fieldValue)) {
1032
- if (fieldValue.some((entry) => testGrepMatch(entry, condition.pattern, useRegex, caseSensitive))) matchedTargets.push(target);
1033
- } else if (testGrepMatch(fieldValue, condition.pattern, useRegex, caseSensitive)) matchedTargets.push(target);
1034
- }
1035
- return matchedTargets;
1036
- }
1037
- function buildNormalizedFields(chunk) {
1038
- const fields = /* @__PURE__ */ new Map();
1039
- fields.set("chunk.content", normalizeForGrep(chunk.content || ""));
1040
- fields.set("chunk.summary", normalizeForGrep(chunk.summary || ""));
1041
- fields.set("chunk.keywords", (chunk.keywords || []).map((k) => normalizeForGrep(k)));
1042
- fields.set("chunk.path", normalizeForGrep(chunk.path || ""));
1043
- return fields;
1044
- }
1045
- function buildGrepHints(params) {
1046
- const hints = [];
1047
- const maxHints = 3;
1048
- if (params.totalMatches === 0 && params.conditionCount > 0) hints.push("No matches. Try broadening: remove a condition, use a shorter pattern, or check for typos. Call knowhere_preview_document to see the document structure first.");
1049
- if (hints.length < maxHints && params.totalMatches > params.returned) {
1050
- let hint = `Showing ${params.returned} of ${params.totalMatches} matches. Add another condition (e.g., target chunk.path to a specific section) to narrow results.`;
1051
- if (!params.hasPathCondition) hint += " Use knowhere_preview_document to find section paths.";
1052
- hints.push(hint);
1053
- }
1054
- if (hints.length < maxHints && params.truncatedStrings) if (params.returned > 3) hints.push(`Fields truncated at ${params.maxStringChars} chars. Reduce maxResults to 1-3 and increase maxStringChars to 12000-20000 for full content.`);
1055
- else hints.push(`Fields truncated at ${params.maxStringChars} chars. Increase maxStringChars (up to 20000) for full content.`);
1056
- if (hints.length < maxHints && params.totalMatches >= 1 && params.totalMatches <= 5 && !params.includeContext) hints.push("Tip: set includeContext=true to discover sibling chunks in the same section.");
1057
- if (hints.length < maxHints && params.totalChunks > 0 && params.totalMatches > params.totalChunks * .5 && params.conditionCount <= 1) hints.push("Pattern matches over half the document. Add a second condition to narrow.");
1058
- return hints;
1059
- }
1060
- function createGrepTool(params) {
1061
- return {
1062
- name: "knowhere_grep",
1063
- label: "Knowhere Grep",
1064
- description: "Search a stored document's chunks with composable AND conditions. Returns matching chunks with content, summary, keywords, path, and chunkId. Supports substring and regex matching with text normalization (HTML stripping, LaTeX cleanup, unicode normalization). Omit conditions to list all chunks. Omit the target field in a condition to search across all text fields (content, summary, keywords, path) — this is the recommended default. When answering questions from results, cite the chunkId and path. Tip: set maxStringChars up to 20000 when you need full untruncated content from a small number of results (e.g., maxResults=1). The default 4000 may truncate long chunks. Search strategy: (1) Start with knowhere_preview_document to see document structure. (2) Search broadly with a single short pattern, then narrow by adding conditions. (3) If zero results, broaden or try synonyms. If too many, add a path condition. (4) Once you find the right chunks, re-query with maxResults=1-3 and maxStringChars=12000-20000 to read full content.",
1065
- parameters: {
1066
- type: "object",
1067
- additionalProperties: false,
1068
- properties: {
1069
- docId: {
1070
- type: "string",
1071
- description: "Identifier of the stored document to search."
1072
- },
1073
- conditions: {
1074
- type: "array",
1075
- items: {
1076
- type: "object",
1077
- additionalProperties: false,
1078
- properties: {
1079
- target: {
1080
- type: "string",
1081
- enum: [...GREP_VALID_TARGETS],
1082
- description: "Chunk field to search. Omit to search all text fields (content, summary, keywords, path) — this is the recommended default."
1083
- },
1084
- pattern: {
1085
- type: "string",
1086
- description: "Search pattern. Empty string matches all."
1087
- },
1088
- regex: {
1089
- type: "boolean",
1090
- description: "Use regex matching for this condition. Overrides outer regex default."
1091
- },
1092
- caseSensitive: {
1093
- type: "boolean",
1094
- description: "Case-sensitive matching for this condition. Overrides outer default."
1095
- }
1096
- },
1097
- required: ["pattern"]
1098
- },
1099
- description: "ANDed search conditions. Each condition must match for a chunk to be returned. Default [] matches all chunks (useful for browsing). Omit target in a condition to search all text fields. Use multiple conditions to narrow results (e.g., path contains 'chapter 3' AND content contains 'algorithm')."
1100
- },
1101
- regex: {
1102
- type: "boolean",
1103
- description: "Default regex mode for all conditions. Defaults to false."
1104
- },
1105
- caseSensitive: {
1106
- type: "boolean",
1107
- description: "Default case-sensitivity for all conditions. Defaults to false."
1108
- },
1109
- includeContext: {
1110
- type: "boolean",
1111
- description: "Include sibling chunk IDs sharing the same document path for each matched chunk. Useful for navigating to adjacent chunks in the same section — re-query with a condition on chunk.chunkId to fetch a specific sibling."
1112
- },
1113
- maxResults: {
1114
- type: "integer",
1115
- minimum: 1,
1116
- maximum: 50,
1117
- description: "Maximum number of matching chunks to return. Defaults to 10. Use a low value (1–3) with high maxStringChars to read specific chunks in full. Use a higher value (10–50) with lower maxStringChars to scan broadly."
1118
- },
1119
- maxStringChars: {
1120
- type: "integer",
1121
- minimum: 100,
1122
- maximum: 2e4,
1123
- description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 when retrieving full content from a small number of results (e.g., reading a single chunk in full). Reduce below 4000 when scanning many results to save tokens."
1124
- }
1125
- },
1126
- required: ["docId"]
1127
- },
1128
- execute: async (_toolCallId, rawParams) => {
1129
- const paramsRecord = isRecord(rawParams) ? rawParams : {};
1130
- const docId = readString(paramsRecord.docId);
1131
- if (!docId) throw new Error("docId is required.");
1132
- const scope = params.store.resolveScope(params.ctx);
1133
- const payload = await params.store.loadDocumentPayload(scope, docId);
1134
- if (!payload) {
1135
- params.api.logger.warn(`knowhere: knowhere_grep document not found scope=${scope.label} docId=${docId}`);
1136
- return textResult(formatStoredDocumentNotFound(docId, scope.label));
1137
- }
1138
- const conditions = parseGrepConditions(paramsRecord.conditions);
1139
- const outerRegex = readBoolean(paramsRecord.regex, false);
1140
- const outerCaseSensitive = readBoolean(paramsRecord.caseSensitive, false);
1141
- const includeContext = readBoolean(paramsRecord.includeContext, false);
1142
- const maxResults = Math.min(50, Math.max(1, Math.trunc(readNumber(paramsRecord.maxResults, 10))));
1143
- const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
1144
- params.api.logger.info(`knowhere: knowhere_grep searching document scope=${scope.label} docId=${docId} conditions=${conditions.length} regex=${outerRegex} caseSensitive=${outerCaseSensitive} includeContext=${includeContext} maxResults=${maxResults} maxStringChars=${maxStringChars}`);
1145
- const pathChunkIndex = includeContext ? new Map(payload.browseIndex.paths.map((p) => [p.path, p.chunkIds])) : void 0;
1146
- const sortedChunks = sortChunksByBrowseOrder(payload.chunks, payload.browseIndex);
1147
- const results = [];
1148
- for (const chunk of sortedChunks) {
1149
- if (results.length >= maxResults) break;
1150
- const normalizedFields = buildNormalizedFields(chunk);
1151
- const allMatchedTargets = /* @__PURE__ */ new Set();
1152
- let allConditionsPassed = true;
1153
- for (const condition of conditions) {
1154
- const matched = testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive);
1155
- if (matched.length === 0) {
1156
- allConditionsPassed = false;
1157
- break;
1158
- }
1159
- for (const target of matched) allMatchedTargets.add(target);
1160
- }
1161
- if (!allConditionsPassed) continue;
1162
- const entry = {
1163
- chunk,
1164
- matchedOn: [...allMatchedTargets]
1165
- };
1166
- if (includeContext && pathChunkIndex && chunk.path) {
1167
- const siblings = pathChunkIndex.get(chunk.path);
1168
- if (siblings) entry.siblingChunkIds = siblings.filter((id) => id !== chunk.chunkId);
1169
- }
1170
- results.push(entry);
1171
- }
1172
- let totalMatches = results.length;
1173
- if (results.length >= maxResults) {
1174
- const remainingChunks = sortedChunks.slice(sortedChunks.indexOf(results[results.length - 1].chunk) + 1);
1175
- for (const chunk of remainingChunks) {
1176
- const normalizedFields = buildNormalizedFields(chunk);
1177
- let passed = true;
1178
- for (const condition of conditions) if (testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive).length === 0) {
1179
- passed = false;
1180
- break;
1181
- }
1182
- if (passed) totalMatches++;
1183
- }
1184
- }
1185
- const truncated = truncateJsonValue(results.map((entry) => {
1186
- const projected = {
1187
- chunkId: entry.chunk.chunkId,
1188
- type: entry.chunk.type,
1189
- path: entry.chunk.path,
1190
- content: entry.chunk.content,
1191
- summary: entry.chunk.summary,
1192
- keywords: entry.chunk.keywords,
1193
- tokens: entry.chunk.tokens,
1194
- assetFilePath: entry.chunk.assetFilePath,
1195
- matchedOn: entry.matchedOn
1196
- };
1197
- if (entry.siblingChunkIds) projected.siblingChunkIds = entry.siblingChunkIds;
1198
- return projected;
1199
- }), maxStringChars);
1200
- params.api.logger.info(`knowhere: knowhere_grep completed search scope=${scope.label} docId=${docId} returned=${results.length} totalMatches=${totalMatches} truncated=${truncated.truncated}`);
1201
- const hasPathCondition = conditions.some((c) => c.target === "chunk.path");
1202
- const hints = buildGrepHints({
1203
- totalMatches,
1204
- returned: results.length,
1205
- maxResults,
1206
- maxStringChars,
1207
- truncatedStrings: truncated.truncated,
1208
- conditionCount: conditions.length,
1209
- includeContext,
1210
- totalChunks: sortedChunks.length,
1211
- hasPathCondition
1212
- });
1213
- const jsonResult = formatJsonToolResult({
1214
- totalMatches,
1215
- returned: results.length,
1216
- results: truncated.value,
1217
- maxStringChars,
1218
- truncatedStrings: truncated.truncated
1219
- });
1220
- if (hints.length === 0) return jsonResult;
1221
- return textResult(`${jsonResult.content[0].text}\n---\n${hints.join("\n")}`);
1222
- }
1223
- };
1224
- }
1225
- function createReadResultFileTool(params) {
1226
- return {
1227
- name: "knowhere_read_result_file",
1228
- label: "Knowhere Read Result File",
1229
- description: "Read a raw result file from the stored document's extracted ZIP package. Common files: manifest.json (parsing metadata), hierarchy.json (document structure), kb.csv (knowledge base export), table HTML files (e.g., tables/table-1.html), or image assets (e.g., images/img-0.png). Image files are staged into a local attachment path and sent directly to the current channel when routing can be resolved. If direct delivery is unavailable, the tool returns a message-tool handoff and, when the run has a workspace, a workspace-relative MEDIA fallback for a normal assistant reply. When the result mode is image_attachment, do not call generic file-read tools on data.stagedPath; use data.sendWithMessageTool or data.replyFallback as returned. When the result mode is image_sent, the plugin already delivered the image. Use mode='json' for JSON files, mode='csv' for CSV files, or mode='text' (default) for everything else. Increase maxStringChars (up to 20000) for large files.",
1230
- parameters: {
1231
- type: "object",
1232
- additionalProperties: false,
1233
- properties: {
1234
- docId: {
1235
- type: "string",
1236
- description: "Identifier of the stored document to read from."
1237
- },
1238
- filePath: {
1239
- type: "string",
1240
- description: "Relative path under the stored result directory, for example manifest.json or tables/table-1.html."
1241
- },
1242
- mode: {
1243
- type: "string",
1244
- enum: [
1245
- "text",
1246
- "json",
1247
- "csv"
1248
- ],
1249
- description: "text returns trimmed text, json parses JSON, and csv returns a raw CSV preview. Defaults to text."
1250
- },
1251
- maxStringChars: {
1252
- type: "integer",
1253
- minimum: 100,
1254
- maximum: 2e4,
1255
- description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 for large files like hierarchy.json or kb.csv."
1256
- }
1257
- },
1258
- required: ["docId", "filePath"]
1259
- },
1260
- execute: async (_toolCallId, rawParams) => {
1261
- const paramsRecord = isRecord(rawParams) ? rawParams : {};
1262
- const docId = readString(paramsRecord.docId);
1263
- const filePath = normalizeResultFilePath(paramsRecord.filePath);
1264
- if (!docId) throw new Error("docId is required.");
1265
- if (!filePath) throw new Error("filePath is required.");
1266
- const scope = params.store.resolveScope(params.ctx);
1267
- const payload = await params.store.loadDocumentPayload(scope, docId);
1268
- if (!payload) {
1269
- params.api.logger.warn(`knowhere: knowhere_read_result_file document not found scope=${scope.label} docId=${docId}`);
1270
- return textResult(formatStoredDocumentNotFound(docId, scope.label));
1271
- }
1272
- const resultFile = findResultFile(payload.browseIndex, filePath);
1273
- if (!resultFile) {
1274
- params.api.logger.warn(`knowhere: knowhere_read_result_file result file not found scope=${scope.label} docId=${docId} filePath=${filePath}`);
1275
- return textResult([
1276
- "Result file not found.",
1277
- `File path: ${filePath}`,
1278
- `Document ID: ${docId}`,
1279
- `Scope: ${scope.label}`
1280
- ].join("\n"));
1281
- }
1282
- if (resultFile.kind === "image") {
1283
- const absolutePath = params.store.getResultFileAbsolutePath(scope, docId, filePath);
1284
- const channelRoute = await params.store.resolveChannelRoute({ sessionKey: params.ctx.sessionKey });
1285
- params.api.logger.info(`knowhere: knowhere_read_result_file staging image asset scope=${scope.label} docId=${docId} filePath=${filePath}`);
1286
- return await buildImageToolResult({
1287
- api: params.api,
1288
- absolutePath,
1289
- channelRoute,
1290
- context: params.ctx,
1291
- docId: payload.document.id,
1292
- documentTitle: payload.document.title,
1293
- filePath,
1294
- file: resultFile,
1295
- sessionKey: params.ctx.sessionKey,
1296
- scopeLabel: scope.label,
1297
- workspaceDir: params.ctx.workspaceDir
1298
- });
1299
- }
1300
- if (!isTextReadableResultFile(resultFile)) {
1301
- params.api.logger.warn(`knowhere: knowhere_read_result_file unreadable result kind scope=${scope.label} docId=${docId} filePath=${filePath} kind=${resultFile.kind}`);
1302
- return textResult([
1303
- "Result file is not readable as text through this tool.",
1304
- `File path: ${filePath}`,
1305
- `Kind: ${resultFile.kind}`,
1306
- `Document ID: ${docId}`,
1307
- `Scope: ${scope.label}`
1308
- ].join("\n"));
1309
- }
1310
- const storedFile = await params.store.readResultFile(scope, docId, filePath);
1311
- if (!storedFile) {
1312
- params.api.logger.warn(`knowhere: knowhere_read_result_file payload disappeared scope=${scope.label} docId=${docId} filePath=${filePath}`);
1313
- return textResult(formatStoredDocumentNotFound(docId, scope.label));
1314
- }
1315
- if (storedFile.text === null) {
1316
- params.api.logger.warn(`knowhere: knowhere_read_result_file text content missing scope=${scope.label} docId=${docId} filePath=${filePath}`);
1317
- return textResult([
1318
- "Result file not found.",
1319
- `File path: ${filePath}`,
1320
- `Document ID: ${docId}`,
1321
- `Scope: ${scope.label}`
1322
- ].join("\n"));
1323
- }
1324
- const mode = readResultFileReadMode(paramsRecord.mode);
1325
- const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
1326
- const normalizedText = stripUtf8Bom(storedFile.text);
1327
- params.api.logger.info(`knowhere: knowhere_read_result_file reading file scope=${scope.label} docId=${docId} filePath=${filePath} kind=${resultFile.kind} mode=${mode} maxStringChars=${maxStringChars}`);
1328
- if (mode === "json") {
1329
- let parsedJson;
1330
- try {
1331
- parsedJson = JSON.parse(normalizedText);
1332
- } catch (error) {
1333
- params.api.logger.warn(`knowhere: knowhere_read_result_file invalid json scope=${scope.label} docId=${docId} filePath=${filePath} error=${formatErrorMessage(error)}`);
1334
- throw new Error(`Result file ${filePath} is not valid JSON. ${formatErrorMessage(error)}`, { cause: error });
1335
- }
1336
- const truncatedJson = truncateJsonValue(parsedJson, maxStringChars);
1337
- params.api.logger.info(`knowhere: knowhere_read_result_file parsed json scope=${scope.label} docId=${docId} filePath=${filePath} truncated=${truncatedJson.truncated}`);
1338
- return formatJsonToolResult({
1339
- scope: scope.label,
1340
- docId: payload.document.id,
1341
- documentTitle: payload.document.title,
1342
- file: resultFile,
1343
- mode,
1344
- maxStringChars,
1345
- truncatedStrings: truncatedJson.truncated,
1346
- data: truncatedJson.value
1347
- });
1348
- }
1349
- const data = mode === "csv" ? buildCsvFilePayload(normalizedText, maxStringChars) : buildTextFilePayload(normalizedText, maxStringChars);
1350
- params.api.logger.info(`knowhere: knowhere_read_result_file prepared text payload scope=${scope.label} docId=${docId} filePath=${filePath} lineCount=${data.lineCount}`);
1351
- return formatJsonToolResult({
1352
- scope: scope.label,
1353
- docId: payload.document.id,
1354
- documentTitle: payload.document.title,
1355
- file: resultFile,
1356
- mode,
1357
- maxStringChars,
1358
- data
1359
- });
1360
- }
1361
- };
1362
- }
1363
- function createPreviewDocumentTool(params) {
1364
- return {
1365
- name: "knowhere_preview_document",
1366
- label: "Knowhere Preview Document",
1367
- description: "Get a structural overview of a stored Knowhere document. Returns the document metadata and a hierarchical table of contents showing sections, subsections, and chunk counts per path (text, image, table). Use this as the first step after identifying a docId to understand the document's structure before searching with knowhere_grep.",
1368
- parameters: {
1369
- type: "object",
1370
- additionalProperties: false,
1371
- properties: { docId: {
1372
- type: "string",
1373
- description: "Identifier of the stored document to preview."
1374
- } },
1375
- required: ["docId"]
1376
- },
1377
- execute: async (_toolCallId, rawParams) => {
1378
- const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
1379
- if (!docId) throw new Error("docId is required.");
1380
- const scope = params.store.resolveScope(params.ctx);
1381
- const payload = await params.store.loadDocumentPayload(scope, docId);
1382
- if (!payload) {
1383
- params.api.logger.warn(`knowhere: knowhere_preview_document document not found scope=${scope.label} docId=${docId}`);
1384
- return textResult(formatStoredDocumentNotFound(docId, scope.label));
1385
- }
1386
- const { document } = payload;
1387
- const pathSummaryMap = /* @__PURE__ */ new Map();
1388
- for (const chunk of payload.chunks) {
1389
- if (!chunk.path || pathSummaryMap.has(chunk.path)) continue;
1390
- const raw = (chunk.summary || chunk.content || "").trim();
1391
- if (raw) {
1392
- const oneLine = raw.replace(/\n+/g, " ").slice(0, PREVIEW_SUMMARY_MAX_CHARS);
1393
- pathSummaryMap.set(chunk.path, oneLine + (raw.length > PREVIEW_SUMMARY_MAX_CHARS ? "..." : ""));
1394
- }
1395
- }
1396
- const lines = [
1397
- `"${document.title}" [${document.id}]`,
1398
- `Scope: ${scope.label}`,
1399
- `Source: ${document.sourceLabel}`,
1400
- `Chunks: ${document.chunkCount}`
1401
- ];
1402
- const pathByName = /* @__PURE__ */ new Map();
1403
- for (const p of payload.browseIndex.paths) pathByName.set(p.path, p);
1404
- const roots = payload.browseIndex.paths.filter((p) => p.depth === 1);
1405
- params.api.logger.info(`knowhere: knowhere_preview_document building preview scope=${scope.label} docId=${docId} paths=${payload.browseIndex.paths.length} roots=${roots.length} chunks=${payload.chunks.length}`);
1406
- if (roots.length > 0) {
1407
- lines.push("");
1408
- lines.push("## Table of Contents");
1409
- lines.push("");
1410
- const renderTree = (pathRecord, indent) => {
1411
- const prefix = " ".repeat(indent);
1412
- const segments = pathRecord.path.split(/\/|-->/);
1413
- const label = segments[segments.length - 1] || pathRecord.path;
1414
- const counts = [];
1415
- if (pathRecord.textChunkCount > 0) counts.push(`${pathRecord.textChunkCount} text`);
1416
- if (pathRecord.imageChunkCount > 0) counts.push(`${pathRecord.imageChunkCount} img`);
1417
- if (pathRecord.tableChunkCount > 0) counts.push(`${pathRecord.tableChunkCount} tbl`);
1418
- const countStr = counts.length > 0 ? ` (${counts.join(", ")})` : "";
1419
- const summary = pathSummaryMap.get(pathRecord.path);
1420
- const summaryStr = summary ? ` — ${summary}` : "";
1421
- lines.push(`${prefix}- ${label}${countStr}${summaryStr}`);
1422
- for (const childPath of pathRecord.childPaths) {
1423
- const child = pathByName.get(childPath);
1424
- if (child) renderTree(child, indent + 1);
1425
- }
1426
- };
1427
- for (const root of roots) renderTree(root, 0);
1428
- } else {
1429
- lines.push("");
1430
- lines.push("No structural paths available for this document.");
1431
- params.api.logger.warn(`knowhere: knowhere_preview_document no structural paths scope=${scope.label} docId=${docId}`);
1432
- }
1433
- return textResult(lines.join("\n"));
1434
- }
1435
- };
1436
- }
1437
- function createListTool(params) {
1438
- return {
1439
- name: "knowhere_list_documents",
1440
- label: "Knowhere List",
1441
- description: "List all Knowhere documents stored in the current scope. Returns each document's ID, title, source, chunk count, tags, and last-updated timestamp. Use this first to discover available documents, check whether a file or URL is already stored, and find the right docId before calling other tools.",
1442
- parameters: {
1443
- type: "object",
1444
- additionalProperties: false,
1445
- properties: {}
1446
- },
1447
- execute: async () => {
1448
- const scope = params.store.resolveScope(params.ctx);
1449
- const documents = await params.store.listDocuments(scope);
1450
- params.api.logger.info(`knowhere: knowhere_list_documents listed documents scope=${scope.label} count=${documents.length}`);
1451
- return textResult(formatDocumentList(documents, scope.label));
1452
- }
1453
- };
1454
- }
1455
- function createRemoveTool(params) {
1456
- return {
1457
- name: "knowhere_remove_document",
1458
- label: "Knowhere Remove",
1459
- description: "Remove a stored Knowhere document and all its extracted data from the current scope. This is irreversible — the document must be re-ingested or re-imported to restore it.",
1460
- parameters: {
1461
- type: "object",
1462
- additionalProperties: false,
1463
- properties: { docId: {
1464
- type: "string",
1465
- description: "Identifier of the stored document to remove."
1466
- } },
1467
- required: ["docId"]
1468
- },
1469
- execute: async (_toolCallId, rawParams) => {
1470
- const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
1471
- if (!docId) throw new Error("docId is required.");
1472
- const scope = params.store.resolveScope(params.ctx);
1473
- params.api.logger.info(`knowhere: knowhere_remove_document removing document scope=${scope.label} docId=${docId}`);
1474
- const removed = await params.store.removeDocument(scope, docId);
1475
- if (!removed) {
1476
- params.api.logger.warn(`knowhere: knowhere_remove_document document not found scope=${scope.label} docId=${docId}`);
1477
- return textResult(formatStoredDocumentNotFound(docId, scope.label));
1478
- }
1479
- params.api.logger.info(`knowhere: knowhere_remove_document removed document scope=${scope.label} docId=${removed.id}`);
1480
- return textResult([
1481
- "Removed stored document.",
1482
- `Document ID: ${removed.id}`,
1483
- `Title: ${removed.title}`,
1484
- `Scope: ${scope.label}`
1485
- ].join("\n"));
1486
- }
1487
- };
1488
- }
1489
- function createClearScopeTool(params) {
1490
- return {
1491
- name: "knowhere_clear_scope",
1492
- label: "Knowhere Clear Scope",
1493
- description: "Remove all stored Knowhere documents from the current scope. This is irreversible and affects every document in the scope. Set confirm=true to execute.",
1494
- parameters: {
1495
- type: "object",
1496
- additionalProperties: false,
1497
- properties: { confirm: {
1498
- type: "boolean",
1499
- description: "Must be true to clear the current scope."
1500
- } }
1501
- },
1502
- execute: async (_toolCallId, rawParams) => {
1503
- const paramsRecord = isRecord(rawParams) ? rawParams : {};
1504
- const scope = params.store.resolveScope(params.ctx);
1505
- if (!readBoolean(paramsRecord.confirm, false)) {
1506
- params.api.logger.warn(`knowhere: knowhere_clear_scope skipped without confirm scope=${scope.label}`);
1507
- return textResult(`Set confirm=true to clear scope ${scope.label}.`);
1508
- }
1509
- params.api.logger.info(`knowhere: knowhere_clear_scope clearing scope scope=${scope.label}`);
1510
- const removedDocuments = await params.store.clearScope(scope);
1511
- params.api.logger.info(`knowhere: knowhere_clear_scope cleared scope scope=${scope.label} removed=${removedDocuments.length}`);
1512
- return textResult(formatScopeClearResult(removedDocuments, scope.label));
1513
- }
1514
- };
1515
- }
1516
802
  function createSetApiKeyTool(params) {
1517
803
  return {
1518
804
  name: "knowhere_set_api_key",
@@ -1655,36 +941,6 @@ function createKnowhereToolFactory(params) {
1655
941
  store: params.store,
1656
942
  ctx
1657
943
  }),
1658
- createGrepTool({
1659
- api: params.api,
1660
- store: params.store,
1661
- ctx
1662
- }),
1663
- createReadResultFileTool({
1664
- api: params.api,
1665
- store: params.store,
1666
- ctx
1667
- }),
1668
- createPreviewDocumentTool({
1669
- api: params.api,
1670
- store: params.store,
1671
- ctx
1672
- }),
1673
- createListTool({
1674
- api: params.api,
1675
- store: params.store,
1676
- ctx
1677
- }),
1678
- createRemoveTool({
1679
- api: params.api,
1680
- store: params.store,
1681
- ctx
1682
- }),
1683
- createClearScopeTool({
1684
- api: params.api,
1685
- store: params.store,
1686
- ctx
1687
- }),
1688
944
  createSetApiKeyTool({
1689
945
  api: params.api,
1690
946
  config: params.config