@ontos-ai/knowhere-claw 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +163 -0
  2. package/dist/_virtual/_rolldown/runtime.js +37 -0
  3. package/dist/client.d.ts +33 -0
  4. package/dist/client.js +395 -0
  5. package/dist/config.d.ts +6 -0
  6. package/dist/config.js +132 -0
  7. package/dist/error-message.d.ts +1 -0
  8. package/dist/error-message.js +48 -0
  9. package/dist/hooks.d.ts +8 -0
  10. package/dist/hooks.js +415 -0
  11. package/dist/index.d.ts +9 -0
  12. package/dist/index.js +43 -0
  13. package/dist/node_modules/.pnpm/@knowhere-ai_sdk@0.1.1/node_modules/@knowhere-ai/sdk/dist/index.js +717 -0
  14. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/adapters.js +83 -0
  15. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/fetch.js +170 -0
  16. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/adapters/xhr.js +106 -0
  17. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/axios.js +57 -0
  18. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/CancelToken.js +90 -0
  19. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/CanceledError.js +20 -0
  20. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/cancel/isCancel.js +6 -0
  21. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/Axios.js +174 -0
  22. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/AxiosError.js +70 -0
  23. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/AxiosHeaders.js +204 -0
  24. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/InterceptorManager.js +60 -0
  25. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/buildFullPath.js +20 -0
  26. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/dispatchRequest.js +52 -0
  27. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/mergeConfig.js +81 -0
  28. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/settle.js +18 -0
  29. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/core/transformData.js +25 -0
  30. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/defaults/index.js +107 -0
  31. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/defaults/transitional.js +9 -0
  32. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/env/data.js +4 -0
  33. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/AxiosURLSearchParams.js +50 -0
  34. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/HttpStatusCode.js +77 -0
  35. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/bind.js +15 -0
  36. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/buildURL.js +40 -0
  37. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/combineURLs.js +14 -0
  38. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/composeSignals.js +39 -0
  39. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/cookies.js +31 -0
  40. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/formDataToJSON.js +67 -0
  41. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isAbsoluteURL.js +14 -0
  42. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isAxiosError.js +14 -0
  43. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/isURLSameOrigin.js +8 -0
  44. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/parseHeaders.js +53 -0
  45. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/parseProtocol.js +7 -0
  46. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/progressEventReducer.js +38 -0
  47. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/resolveConfig.js +36 -0
  48. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/speedometer.js +36 -0
  49. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/spread.js +29 -0
  50. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/throttle.js +38 -0
  51. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/toFormData.js +151 -0
  52. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/toURLEncodedForm.js +18 -0
  53. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/trackStream.js +69 -0
  54. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/helpers/validator.js +76 -0
  55. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/Blob.js +4 -0
  56. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/FormData.js +4 -0
  57. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/classes/URLSearchParams.js +5 -0
  58. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/browser/index.js +22 -0
  59. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/common/utils.js +46 -0
  60. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/platform/index.js +9 -0
  61. package/dist/node_modules/.pnpm/axios@1.13.6/node_modules/axios/lib/utils.js +698 -0
  62. package/dist/node_modules/.pnpm/fflate@0.8.2/node_modules/fflate/esm/browser.js +426 -0
  63. package/dist/node_modules/.pnpm/jszip@3.10.1/node_modules/jszip/dist/jszip.min.js +3110 -0
  64. package/dist/parser.d.ts +16 -0
  65. package/dist/parser.js +323 -0
  66. package/dist/session.d.ts +11 -0
  67. package/dist/session.js +78 -0
  68. package/dist/store.d.ts +62 -0
  69. package/dist/store.js +482 -0
  70. package/dist/text.d.ts +10 -0
  71. package/dist/text.js +34 -0
  72. package/dist/tools.d.ts +9 -0
  73. package/dist/tools.js +1177 -0
  74. package/dist/tracker-progress.d.ts +8 -0
  75. package/dist/tracker-progress.js +197 -0
  76. package/dist/types.d.ts +247 -0
  77. package/dist/types.js +9 -0
  78. package/openclaw.plugin.json +107 -0
  79. package/package.json +61 -0
  80. package/skills/knowhere/SKILL.md +243 -0
package/dist/tools.js ADDED
@@ -0,0 +1,1177 @@
1
+ import { isRecord } from "./types.js";
2
+ import { assertKnowhereApiKey } from "./config.js";
3
+ import { formatErrorMessage } from "./error-message.js";
4
+ import { KnowhereClient } from "./client.js";
5
+ import { normalizeForGrep, normalizeWhitespace, sanitizeStringArray } from "./text.js";
6
+ import { sendTrackerProgress } from "./tracker-progress.js";
7
+ import path from "node:path";
8
+ import fs from "node:fs/promises";
9
+ //#region src/tools.ts
10
+ const PREVIEW_SUMMARY_MAX_CHARS = 120;
11
+ function textResult(text) {
12
+ return {
13
+ content: [{
14
+ type: "text",
15
+ text
16
+ }],
17
+ details: {}
18
+ };
19
+ }
20
+ function deriveStoredDocumentDisplayName(document) {
21
+ return document.originalFileName || document.title;
22
+ }
23
+ function deriveStoredDocumentFileLabel(document, manifestSourceFileName) {
24
+ return document.originalFileName || document.fileName || manifestSourceFileName || "unknown";
25
+ }
26
+ function formatStoredDocumentNotFound(docId, scopeLabel) {
27
+ return [
28
+ "Stored document not found.",
29
+ `Document ID: ${docId}`,
30
+ `Scope: ${scopeLabel}`
31
+ ].join("\n");
32
+ }
33
+ function buildStoredDocumentSummaryLines(params) {
34
+ const lines = [`Stored document: "${params.document.title}" [${params.document.id}]`, `Scope: ${params.scopeLabel}`];
35
+ if (params.includeSource) lines.push(`Source: ${params.document.sourceLabel}`);
36
+ lines.push(`File: ${deriveStoredDocumentFileLabel(params.document, params.manifestSourceFileName)}`);
37
+ lines.push(`Chunks: ${params.document.chunkCount}`);
38
+ const stats = params.document.statistics;
39
+ if (stats) {
40
+ const parts = [];
41
+ if (stats.text_chunks) parts.push(`${stats.text_chunks} text`);
42
+ if (stats.image_chunks) parts.push(`${stats.image_chunks} image`);
43
+ if (stats.table_chunks) parts.push(`${stats.table_chunks} table`);
44
+ if (parts.length > 0) lines.push(`Breakdown: ${parts.join(", ")}`);
45
+ if (stats.total_pages) lines.push(`Pages: ${stats.total_pages}`);
46
+ }
47
+ if (params.includeJobId && params.document.jobId) lines.push(`Job ID: ${params.document.jobId}`);
48
+ if (params.document.resultUrl) lines.push(`Result URL: ${params.document.resultUrl}`);
49
+ if (params.includeUpdatedAt) lines.push(`Updated: ${params.document.updatedAt}`);
50
+ return lines;
51
+ }
52
+ function readString(value) {
53
+ return typeof value === "string" && value.trim() ? value.trim() : void 0;
54
+ }
55
+ function readBoolean(value, fallback = false) {
56
+ return typeof value === "boolean" ? value : fallback;
57
+ }
58
+ function readNumber(value, fallback) {
59
+ if (typeof value === "number" && Number.isFinite(value)) return value;
60
+ if (typeof value === "string" && value.trim()) {
61
+ const parsed = Number(value);
62
+ if (Number.isFinite(parsed)) return parsed;
63
+ }
64
+ return fallback;
65
+ }
66
+ function resolveWorkspacePath(filePath, ctx) {
67
+ if (path.isAbsolute(filePath)) return filePath;
68
+ return path.resolve(ctx.workspaceDir || process.cwd(), filePath);
69
+ }
70
+ function parseAllowedIngestUrl(urlText) {
71
+ const parsedUrl = new URL(urlText);
72
+ if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") throw new Error(`Only http and https URLs are supported for Knowhere ingest. URL: ${urlText}`);
73
+ const hostName = parsedUrl.hostname.trim().toLowerCase();
74
+ if (!hostName) throw new Error(`URL host is required for Knowhere ingest. URL: ${urlText}`);
75
+ if (hostName === "localhost" || hostName === "::1" || hostName.startsWith("127.")) throw new Error(`Loopback URLs are not allowed for Knowhere ingest. URL: ${urlText}`);
76
+ return parsedUrl;
77
+ }
78
+ function buildIngestProgressLabel(params) {
79
+ if (typeof params.fileName === "string" && params.fileName.trim()) return params.fileName.trim();
80
+ if (params.filePath) return path.basename(params.filePath);
81
+ if (!params.url) return "document";
82
+ try {
83
+ const parsedUrl = new URL(params.url);
84
+ const decodedPath = decodeURIComponent(parsedUrl.pathname);
85
+ const baseName = path.posix.basename(decodedPath);
86
+ if (baseName && baseName !== "/" && baseName !== ".") return baseName;
87
+ return parsedUrl.host || params.url;
88
+ } catch {
89
+ return params.url;
90
+ }
91
+ }
92
+ function normalizeParsingParams(rawParsing) {
93
+ const parsing = isRecord(rawParsing) ? rawParsing : {};
94
+ const result = {};
95
+ const model = readString(parsing.model);
96
+ if (model === "base" || model === "advanced") result.model = model;
97
+ const kbDir = readString(parsing.kbDir ?? parsing.kb_dir);
98
+ if (kbDir) result.kb_dir = kbDir;
99
+ const docType = readString(parsing.docType ?? parsing.doc_type);
100
+ if (docType) result.doc_type = docType;
101
+ const addFragDesc = readString(parsing.addFragDesc ?? parsing.add_frag_desc);
102
+ if (addFragDesc) result.add_frag_desc = addFragDesc;
103
+ for (const [sourceKey, targetKey] of [
104
+ ["ocrEnabled", "ocr_enabled"],
105
+ ["ocr_enabled", "ocr_enabled"],
106
+ ["smartTitleParse", "smart_title_parse"],
107
+ ["smart_title_parse", "smart_title_parse"],
108
+ ["summaryImage", "summary_image"],
109
+ ["summary_image", "summary_image"],
110
+ ["summaryTable", "summary_table"],
111
+ ["summary_table", "summary_table"],
112
+ ["summaryText", "summary_txt"],
113
+ ["summary_txt", "summary_txt"]
114
+ ]) {
115
+ const value = parsing[sourceKey];
116
+ if (typeof value === "boolean") result[targetKey] = value;
117
+ }
118
+ return Object.keys(result).length > 0 ? result : void 0;
119
+ }
120
+ function formatDocumentList(documents, scopeLabel) {
121
+ if (documents.length === 0) return `No stored documents in scope ${scopeLabel}.`;
122
+ return documents.map((document, index) => {
123
+ const displayTitle = deriveStoredDocumentDisplayName(document);
124
+ const lines = [
125
+ `${index + 1}. ${displayTitle} [${document.id}]`,
126
+ `Source: ${document.sourceLabel}`,
127
+ `Chunks: ${document.chunkCount}`,
128
+ `Updated: ${document.updatedAt || document.ingestedAt}`
129
+ ];
130
+ if (document.title && document.title !== displayTitle) lines.push(`Title: ${document.title}`);
131
+ if (document.tags.length > 0) lines.push(`Tags: ${document.tags.join(", ")}`);
132
+ if (document.resultUrl) lines.push(`Result URL: ${document.resultUrl}`);
133
+ return lines.join("\n");
134
+ }).join("\n\n");
135
+ }
136
+ function formatJobList(params) {
137
+ if (params.jobList.jobs.length === 0) return `No Knowhere jobs on page ${params.jobList.page}.`;
138
+ const lines = [`Knowhere jobs ${params.jobList.page}/${params.jobList.totalPages || 1} (${params.jobList.total} total).`, ""];
139
+ for (const [index, job] of params.jobList.jobs.entries()) {
140
+ const storedDocuments = params.documentsByJobId.get(job.job_id) ?? [];
141
+ const entry = [
142
+ `${index + 1}. ${job.job_id}`,
143
+ `Status: ${job.status}`,
144
+ `Source type: ${job.source_type}`,
145
+ job.file_name ? `File: ${job.file_name}` : null,
146
+ job.created_at ? `Created: ${job.created_at}` : null,
147
+ job.result_url ? `Result URL: ${job.result_url}` : "Result package: not ready",
148
+ storedDocuments.length > 0 ? `Stored docs in scope: ${storedDocuments.map((document) => document.id).join(", ")}` : "Stored docs in scope: none",
149
+ job.error?.message ? `Error: ${job.error.message}` : null
150
+ ].filter((value) => Boolean(value));
151
+ lines.push(entry.join("\n"));
152
+ if (index < params.jobList.jobs.length - 1) lines.push("");
153
+ }
154
+ return lines.join("\n");
155
+ }
156
+ function readRecentDays(value) {
157
+ const parsed = readNumber(value, NaN);
158
+ if (parsed === 1 || parsed === 7 || parsed === 30) return parsed;
159
+ }
160
+ function buildHistoryJobSource(jobId) {
161
+ return `knowhere-job:${jobId}`;
162
+ }
163
+ function buildHistoryJobSourceLabel(jobId, fileName) {
164
+ if (typeof fileName === "string" && fileName.trim()) return fileName.trim();
165
+ return `Imported Knowhere job ${jobId}`;
166
+ }
167
+ function mergeTags(tags, extras) {
168
+ return Array.from(new Set([...tags, ...extras]));
169
+ }
170
+ function formatScopeClearResult(documents, scopeLabel) {
171
+ if (documents.length === 0) return `Scope ${scopeLabel} is already empty.`;
172
+ const lines = [`Removed ${documents.length} stored document${documents.length === 1 ? "" : "s"} from scope ${scopeLabel}.`];
173
+ for (const [index, document] of documents.entries()) lines.push(`${index + 1}. ${deriveStoredDocumentDisplayName(document)} [${document.id}]`);
174
+ return lines.join("\n");
175
+ }
176
+ function readResultFileReadMode(value) {
177
+ return value === "json" || value === "csv" || value === "text" ? value : "text";
178
+ }
179
+ function normalizeResultFilePath(filePath) {
180
+ const value = normalizeWhitespace(filePath);
181
+ if (!value) return;
182
+ return path.posix.normalize(value.replace(/\\/g, "/")).replace(/^\/+/, "");
183
+ }
184
+ function buildChunkOrderIndex(browseIndex) {
185
+ return new Map(browseIndex.chunkOrder.map((chunkId, index) => [chunkId, index]));
186
+ }
187
+ function sortChunksByBrowseOrder(chunks, browseIndex) {
188
+ const orderIndex = buildChunkOrderIndex(browseIndex);
189
+ return [...chunks].sort((left, right) => {
190
+ return (orderIndex.get(left.chunkId) ?? Number.MAX_SAFE_INTEGER) - (orderIndex.get(right.chunkId) ?? Number.MAX_SAFE_INTEGER) || left.chunkId.localeCompare(right.chunkId);
191
+ });
192
+ }
193
+ function findResultFile(browseIndex, relativePath) {
194
+ return browseIndex.resultFiles.find((entry) => entry.relativePath === relativePath);
195
+ }
196
+ function isTextReadableResultFile(fileRecord) {
197
+ return fileRecord.kind !== "image";
198
+ }
199
+ function stripUtf8Bom(text) {
200
+ return text.charCodeAt(0) === 65279 ? text.slice(1) : text;
201
+ }
202
+ function buildTextFilePayload(text, maxChars) {
203
+ return {
204
+ content: truncatePreview(text, maxChars),
205
+ lineCount: text === "" ? 0 : text.split(/\r\n|\n|\r/).length
206
+ };
207
+ }
208
+ function buildCsvFilePayload(text, maxChars) {
209
+ const normalized = stripUtf8Bom(text);
210
+ const [headerLine = ""] = normalized.split(/\r\n|\n|\r/, 1);
211
+ return {
212
+ content: truncatePreview(normalized, maxChars),
213
+ header: headerLine.trim() || null,
214
+ lineCount: normalized === "" ? 0 : normalized.split(/\r\n|\n|\r/).length
215
+ };
216
+ }
217
+ function truncatePreview(value, maxChars) {
218
+ if (typeof value !== "string") return "";
219
+ const normalized = value.replace(/\r\n/g, "\n").trim();
220
+ if (!normalized) return "";
221
+ if (normalized.length <= maxChars) return normalized;
222
+ return `${normalized.slice(0, Math.max(0, maxChars - 3)).trimEnd()}...`;
223
+ }
224
+ function truncateJsonValue(value, maxStringChars) {
225
+ if (typeof value === "string") {
226
+ if (value.length <= maxStringChars) return {
227
+ value,
228
+ truncated: false
229
+ };
230
+ return {
231
+ value: `${value.slice(0, Math.max(0, maxStringChars - 1))}…`,
232
+ truncated: true
233
+ };
234
+ }
235
+ if (Array.isArray(value)) {
236
+ let truncated = false;
237
+ return {
238
+ value: value.map((entry) => {
239
+ const result = truncateJsonValue(entry, maxStringChars);
240
+ truncated = truncated || result.truncated;
241
+ return result.value;
242
+ }),
243
+ truncated
244
+ };
245
+ }
246
+ if (isRecord(value)) {
247
+ let truncated = false;
248
+ const entries = Object.entries(value).map(([key, entry]) => {
249
+ const result = truncateJsonValue(entry, maxStringChars);
250
+ truncated = truncated || result.truncated;
251
+ return [key, result.value];
252
+ });
253
+ return {
254
+ value: Object.fromEntries(entries),
255
+ truncated
256
+ };
257
+ }
258
+ return {
259
+ value,
260
+ truncated: false
261
+ };
262
+ }
263
+ function formatJsonToolResult(value) {
264
+ return textResult(`${JSON.stringify(value, null, 2)}\n`);
265
+ }
266
+ function formatJsonInline(value) {
267
+ if (value === void 0) return null;
268
+ if (value === null) return "null";
269
+ if (typeof value === "string") return value.trim() || null;
270
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
271
+ if (typeof value === "bigint") return value.toString();
272
+ if (typeof value === "symbol") return value.description ? `Symbol(${value.description})` : "Symbol()";
273
+ if (typeof value === "function") return value.name ? `[function ${value.name}]` : "[function]";
274
+ try {
275
+ return JSON.stringify(value) ?? Object.prototype.toString.call(value);
276
+ } catch {
277
+ return Object.prototype.toString.call(value);
278
+ }
279
+ }
280
+ function createClient(params) {
281
+ return new KnowhereClient({
282
+ apiKey: params.config.apiKey,
283
+ baseUrl: params.config.baseUrl,
284
+ requestTimeoutMs: params.config.requestTimeoutMs,
285
+ uploadTimeoutMs: params.config.uploadTimeoutMs,
286
+ pollIntervalMs: params.config.pollIntervalMs,
287
+ pollTimeoutMs: params.config.pollTimeoutMs,
288
+ logger: params.api.logger
289
+ });
290
+ }
291
+ function createIngestTool(params) {
292
+ return {
293
+ name: "knowhere_ingest_document",
294
+ label: "Knowhere Ingest",
295
+ description: "Parse a local file or remote URL with Knowhere and store the result in the current scope. This uploads the document to the Knowhere API, waits for parsing to complete, downloads the result package, and extracts it locally. After ingestion, use knowhere_preview_document to see the document structure, knowhere_grep to search its content, or knowhere_read_result_file to access raw result files. Provide either filePath or url, not both.",
296
+ parameters: {
297
+ type: "object",
298
+ additionalProperties: false,
299
+ properties: {
300
+ filePath: {
301
+ type: "string",
302
+ description: "Local file path to parse. Use either filePath or url."
303
+ },
304
+ fileName: {
305
+ type: "string",
306
+ description: "Original filename to send to Knowhere and store locally when filePath points to a temporary attachment path."
307
+ },
308
+ originalFileName: {
309
+ type: "string",
310
+ description: "Alias of fileName. Useful when an attachment was saved locally under a UUID path but the prompt still shows the visible filename."
311
+ },
312
+ url: {
313
+ type: "string",
314
+ description: "Remote document URL to parse. Use either url or filePath."
315
+ },
316
+ docId: {
317
+ type: "string",
318
+ description: "Optional stable identifier to use in the local store."
319
+ },
320
+ title: {
321
+ type: "string",
322
+ description: "Optional display title stored with the document."
323
+ },
324
+ tags: {
325
+ type: "array",
326
+ items: { type: "string" },
327
+ description: "Optional tags stored alongside the document."
328
+ },
329
+ dataId: {
330
+ type: "string",
331
+ description: "Optional Knowhere data_id for correlation/idempotency."
332
+ },
333
+ overwrite: {
334
+ type: "boolean",
335
+ description: "Replace an existing stored document with the same docId."
336
+ },
337
+ parsing: {
338
+ type: "object",
339
+ additionalProperties: false,
340
+ properties: {
341
+ model: {
342
+ type: "string",
343
+ enum: ["base", "advanced"]
344
+ },
345
+ ocrEnabled: { type: "boolean" },
346
+ kbDir: { type: "string" },
347
+ docType: {
348
+ type: "string",
349
+ description: "Optional Knowhere parsing_params.doc_type value passed through as-is."
350
+ },
351
+ smartTitleParse: { type: "boolean" },
352
+ summaryImage: { type: "boolean" },
353
+ summaryTable: { type: "boolean" },
354
+ summaryText: { type: "boolean" },
355
+ addFragDesc: { type: "string" }
356
+ }
357
+ }
358
+ }
359
+ },
360
+ execute: async (_toolCallId, rawParams) => {
361
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
362
+ const filePathParam = readString(paramsRecord.filePath);
363
+ const urlParam = readString(paramsRecord.url);
364
+ if (!filePathParam && !urlParam) throw new Error("filePath or url is required.");
365
+ if (filePathParam && urlParam) throw new Error("Provide either filePath or url, not both.");
366
+ assertKnowhereApiKey(params.config);
367
+ const scope = params.store.resolveScope(params.ctx);
368
+ const client = createClient({
369
+ api: params.api,
370
+ config: params.config
371
+ });
372
+ let resolvedFilePath;
373
+ if (filePathParam) {
374
+ resolvedFilePath = resolveWorkspacePath(filePathParam, params.ctx);
375
+ const stats = await fs.stat(resolvedFilePath).catch(() => null);
376
+ if (!stats || !stats.isFile()) throw new Error(`File not found: ${resolvedFilePath}`);
377
+ }
378
+ if (urlParam) parseAllowedIngestUrl(urlParam);
379
+ const dataId = readString(paramsRecord.dataId);
380
+ const docId = readString(paramsRecord.docId);
381
+ const title = readString(paramsRecord.title);
382
+ const requestedFileName = readString(paramsRecord.fileName) ?? readString(paramsRecord.originalFileName);
383
+ const progressLabel = buildIngestProgressLabel({
384
+ fileName: requestedFileName,
385
+ filePath: resolvedFilePath,
386
+ url: urlParam
387
+ });
388
+ const tags = sanitizeStringArray(paramsRecord.tags);
389
+ const overwrite = readBoolean(paramsRecord.overwrite, false);
390
+ let jobId;
391
+ await sendTrackerProgress({
392
+ api: params.api,
393
+ sessionKey: params.ctx.sessionKey,
394
+ text: `Tracker: creating Knowhere job for \`${progressLabel}\` for this request.`
395
+ });
396
+ try {
397
+ const ingestResult = await client.ingestDocument({
398
+ filePath: resolvedFilePath,
399
+ fileName: requestedFileName,
400
+ url: urlParam,
401
+ dataId,
402
+ parsingParams: normalizeParsingParams(paramsRecord.parsing),
403
+ onJobCreated: async (job) => {
404
+ jobId = job.job_id;
405
+ await sendTrackerProgress({
406
+ api: params.api,
407
+ sessionKey: params.ctx.sessionKey,
408
+ text: `Tracker: ingesting \`${progressLabel}\` into Knowhere for this request. Job ID: \`${job.job_id}\`.`
409
+ });
410
+ }
411
+ });
412
+ const fileName = requestedFileName || (resolvedFilePath ? path.basename(resolvedFilePath) : null);
413
+ const document = await params.store.saveDownloadedDocument(scope, {
414
+ sourceType: urlParam ? "url" : "file",
415
+ source: urlParam || resolvedFilePath || "",
416
+ fileName,
417
+ docId,
418
+ title,
419
+ dataId,
420
+ tags,
421
+ job: ingestResult.job,
422
+ jobResult: ingestResult.jobResult,
423
+ downloadedResult: ingestResult.downloadedResult
424
+ }, { overwrite });
425
+ await sendTrackerProgress({
426
+ api: params.api,
427
+ sessionKey: params.ctx.sessionKey,
428
+ text: `Tracker: \`${progressLabel}\` is parsed and ready. Job ID: \`${ingestResult.job.job_id}\`. The agent can use it now.`
429
+ });
430
+ return textResult([
431
+ "Ingest complete.",
432
+ ...buildStoredDocumentSummaryLines({
433
+ document,
434
+ scopeLabel: scope.label,
435
+ includeSource: true,
436
+ includeJobId: true
437
+ }),
438
+ "Next: read manifest.json with knowhere_read_result_file or preview the document with knowhere_preview_document."
439
+ ].join("\n"));
440
+ } catch (error) {
441
+ const errorText = formatErrorMessage(error);
442
+ await sendTrackerProgress({
443
+ api: params.api,
444
+ sessionKey: params.ctx.sessionKey,
445
+ text: `Tracker: failed to ingest \`${progressLabel}\`${jobId ? ` (job \`${jobId}\`)` : ""}: ${errorText}`
446
+ });
447
+ throw error;
448
+ }
449
+ }
450
+ };
451
+ }
452
+ function createJobStatusTool(params) {
453
+ return {
454
+ name: "knowhere_get_job_status",
455
+ label: "Knowhere Job Status",
456
+ description: "Check the status of a Knowhere parsing job by job ID. Returns job status, progress, duration, credits spent, and whether the result is already stored locally. Use this to monitor a running job or inspect a past job before importing it with knowhere_import_completed_job.",
457
+ parameters: {
458
+ type: "object",
459
+ additionalProperties: false,
460
+ properties: { jobId: {
461
+ type: "string",
462
+ description: "Knowhere job ID, for example job_f6f12930906a."
463
+ } },
464
+ required: ["jobId"]
465
+ },
466
+ execute: async (_toolCallId, rawParams) => {
467
+ const jobId = readString((isRecord(rawParams) ? rawParams : {}).jobId);
468
+ if (!jobId) throw new Error("jobId is required.");
469
+ assertKnowhereApiKey(params.config);
470
+ const client = createClient({
471
+ api: params.api,
472
+ config: params.config
473
+ });
474
+ const scope = params.store.resolveScope(params.ctx);
475
+ const job = await client.getJob(jobId);
476
+ const matchingDocuments = (await params.store.listDocuments(scope)).filter((document) => document.jobId === job.job_id);
477
+ const lines = [
478
+ `Knowhere job ${job.job_id}`,
479
+ `Scope: ${scope.label}`,
480
+ `Status: ${job.status}`,
481
+ `Source type: ${job.source_type}`
482
+ ];
483
+ if (job.file_name) lines.push(`File: ${job.file_name}`);
484
+ if (job.data_id) lines.push(`Data ID: ${job.data_id}`);
485
+ if (job.created_at) lines.push(`Created: ${job.created_at}`);
486
+ if (job.model) lines.push(`Model: ${job.model}`);
487
+ if (typeof job.ocr_enabled === "boolean") lines.push(`OCR enabled: ${job.ocr_enabled}`);
488
+ if (typeof job.duration_seconds === "number") lines.push(`Duration seconds: ${job.duration_seconds}`);
489
+ if (typeof job.credits_spent === "number") lines.push(`Credits spent: ${job.credits_spent}`);
490
+ const progressText = formatJsonInline(job.progress);
491
+ if (progressText) lines.push(`Progress: ${progressText}`);
492
+ if (job.error) {
493
+ const errorParts = [
494
+ job.error.code,
495
+ job.error.message,
496
+ job.error.request_id ? `request_id=${job.error.request_id}` : null,
497
+ job.error.details !== void 0 ? `details=${formatJsonInline(job.error.details) || "unavailable"}` : null
498
+ ].filter((value) => Boolean(value));
499
+ if (errorParts.length > 0) lines.push(`Error: ${errorParts.join(" | ")}`);
500
+ }
501
+ if (job.result_url) {
502
+ lines.push(`Result URL: ${job.result_url}`);
503
+ if (job.result_url_expires_at) lines.push(`Result URL expires: ${job.result_url_expires_at}`);
504
+ }
505
+ if (matchingDocuments.length === 0) lines.push("Stored docs in scope: none");
506
+ else {
507
+ lines.push("Stored docs in scope:");
508
+ for (const [index, document] of matchingDocuments.entries()) lines.push(`${index + 1}. ${deriveStoredDocumentDisplayName(document)} [${document.id}]`);
509
+ }
510
+ return textResult(lines.join("\n"));
511
+ }
512
+ };
513
+ }
514
+ function createJobListTool(params) {
515
+ return {
516
+ name: "knowhere_list_jobs",
517
+ label: "Knowhere Job List",
518
+ description: "List Knowhere parsing jobs for the current API key with optional filters. Returns job IDs, statuses, file names, and whether each job is already stored locally. Use this to find completed jobs that can be imported with knowhere_import_completed_job.",
519
+ parameters: {
520
+ type: "object",
521
+ additionalProperties: false,
522
+ properties: {
523
+ page: {
524
+ type: "integer",
525
+ minimum: 1,
526
+ description: "Page number. Defaults to 1."
527
+ },
528
+ pageSize: {
529
+ type: "integer",
530
+ minimum: 1,
531
+ maximum: 100,
532
+ description: "Page size. Defaults to 20."
533
+ },
534
+ jobStatus: {
535
+ type: "string",
536
+ description: "Optional Knowhere job status filter."
537
+ },
538
+ jobType: {
539
+ type: "string",
540
+ description: "Optional Knowhere job type filter."
541
+ },
542
+ recentDays: {
543
+ type: "integer",
544
+ enum: [
545
+ 1,
546
+ 7,
547
+ 30
548
+ ],
549
+ description: "Optional recent-days filter supported by Knowhere."
550
+ },
551
+ startTime: {
552
+ type: "string",
553
+ description: "Optional ISO start timestamp. Overrides recentDays lower bound."
554
+ },
555
+ endTime: {
556
+ type: "string",
557
+ description: "Optional ISO end timestamp."
558
+ }
559
+ }
560
+ },
561
+ execute: async (_toolCallId, rawParams) => {
562
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
563
+ assertKnowhereApiKey(params.config);
564
+ const page = Math.max(1, Math.trunc(readNumber(paramsRecord.page, 1)));
565
+ const pageSize = Math.min(100, Math.max(1, Math.trunc(readNumber(paramsRecord.pageSize, 20))));
566
+ const client = createClient({
567
+ api: params.api,
568
+ config: params.config
569
+ });
570
+ const scope = params.store.resolveScope(params.ctx);
571
+ const [jobList, documents] = await Promise.all([client.listJobs({
572
+ page,
573
+ pageSize,
574
+ jobStatus: readString(paramsRecord.jobStatus),
575
+ jobType: readString(paramsRecord.jobType),
576
+ recentDays: readRecentDays(paramsRecord.recentDays),
577
+ startTime: readString(paramsRecord.startTime),
578
+ endTime: readString(paramsRecord.endTime)
579
+ }), params.store.listDocuments(scope)]);
580
+ const documentsByJobId = /* @__PURE__ */ new Map();
581
+ for (const document of documents) {
582
+ if (!document.jobId) continue;
583
+ const entries = documentsByJobId.get(document.jobId) ?? [];
584
+ entries.push(document);
585
+ documentsByJobId.set(document.jobId, entries);
586
+ }
587
+ return textResult(formatJobList({
588
+ jobList,
589
+ documentsByJobId
590
+ }));
591
+ }
592
+ };
593
+ }
594
+ function createImportCompletedJobTool(params) {
595
+ return {
596
+ name: "knowhere_import_completed_job",
597
+ label: "Knowhere Import Completed Job",
598
+ description: "Import a previously completed Knowhere job into the current scope. Downloads the result package and extracts it locally, making it available for knowhere_preview_document, knowhere_grep, and knowhere_read_result_file. Use knowhere_list_jobs to find available completed jobs.",
599
+ parameters: {
600
+ type: "object",
601
+ additionalProperties: false,
602
+ properties: {
603
+ jobId: {
604
+ type: "string",
605
+ description: "Completed Knowhere job ID to import."
606
+ },
607
+ docId: {
608
+ type: "string",
609
+ description: "Optional stable local document ID to use in the store."
610
+ },
611
+ title: {
612
+ type: "string",
613
+ description: "Optional display title for the stored document."
614
+ },
615
+ tags: {
616
+ type: "array",
617
+ items: { type: "string" },
618
+ description: "Optional tags stored alongside the imported document."
619
+ },
620
+ overwrite: {
621
+ type: "boolean",
622
+ description: "Replace an existing stored document with the same derived or explicit docId."
623
+ }
624
+ },
625
+ required: ["jobId"]
626
+ },
627
+ execute: async (_toolCallId, rawParams) => {
628
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
629
+ const jobId = readString(paramsRecord.jobId);
630
+ if (!jobId) throw new Error("jobId is required.");
631
+ assertKnowhereApiKey(params.config);
632
+ const client = createClient({
633
+ api: params.api,
634
+ config: params.config
635
+ });
636
+ const scope = params.store.resolveScope(params.ctx);
637
+ const importResult = await client.getCompletedJobResult(jobId);
638
+ const overwrite = readBoolean(paramsRecord.overwrite, false);
639
+ const tags = mergeTags(sanitizeStringArray(paramsRecord.tags), ["history-imported", `job:${importResult.jobResult.job_id}`]);
640
+ const fileName = importResult.jobResult.file_name || null;
641
+ const sourceType = importResult.jobResult.source_type === "url" ? "url" : "file";
642
+ return textResult([
643
+ "Import complete.",
644
+ ...buildStoredDocumentSummaryLines({
645
+ document: await params.store.saveDownloadedDocument(scope, {
646
+ sourceType,
647
+ source: buildHistoryJobSource(importResult.jobResult.job_id),
648
+ sourceLabel: buildHistoryJobSourceLabel(importResult.jobResult.job_id, fileName),
649
+ fileName,
650
+ docId: readString(paramsRecord.docId),
651
+ title: readString(paramsRecord.title),
652
+ dataId: importResult.jobResult.data_id || void 0,
653
+ tags,
654
+ job: importResult.job,
655
+ jobResult: importResult.jobResult,
656
+ downloadedResult: importResult.downloadedResult
657
+ }, { overwrite }),
658
+ scopeLabel: scope.label,
659
+ includeSource: true,
660
+ includeJobId: true
661
+ }),
662
+ `Imported from job: ${importResult.jobResult.job_id}`,
663
+ `Source type: ${importResult.jobResult.source_type}`,
664
+ "Next: read manifest.json with knowhere_read_result_file or preview the document with knowhere_preview_document."
665
+ ].join("\n"));
666
+ }
667
+ };
668
+ }
669
+ const GREP_VALID_TARGETS = new Set([
670
+ "chunk.content",
671
+ "chunk.summary",
672
+ "chunk.keywords",
673
+ "chunk.path",
674
+ "chunk.type",
675
+ "chunk.chunkId"
676
+ ]);
677
+ const GREP_TEXT_TARGETS = [
678
+ "chunk.content",
679
+ "chunk.summary",
680
+ "chunk.keywords",
681
+ "chunk.path"
682
+ ];
683
+ function parseGrepConditions(raw) {
684
+ if (!Array.isArray(raw)) return [];
685
+ return raw.filter(isRecord).map((entry) => {
686
+ const target = readString(entry.target);
687
+ if (target && !GREP_VALID_TARGETS.has(target)) throw new Error(`Invalid grep target: "${target}". Valid targets: ${[...GREP_VALID_TARGETS].join(", ")}`);
688
+ const pattern = typeof entry.pattern === "string" ? entry.pattern : "";
689
+ return {
690
+ ...target ? { target } : {},
691
+ pattern,
692
+ ...typeof entry.regex === "boolean" ? { regex: entry.regex } : {},
693
+ ...typeof entry.caseSensitive === "boolean" ? { caseSensitive: entry.caseSensitive } : {}
694
+ };
695
+ });
696
+ }
697
+ function resolveGrepFieldValue(chunk, target) {
698
+ switch (target) {
699
+ case "chunk.content": return chunk.content || "";
700
+ case "chunk.summary": return chunk.summary || "";
701
+ case "chunk.keywords": return chunk.keywords || [];
702
+ case "chunk.path": return chunk.path || "";
703
+ case "chunk.type": return chunk.type;
704
+ case "chunk.chunkId": return chunk.chunkId;
705
+ default: return "";
706
+ }
707
+ }
708
+ function testGrepMatch(text, pattern, useRegex, caseSensitive) {
709
+ if (pattern === "") return true;
710
+ if (useRegex) {
711
+ const flags = caseSensitive ? "" : "i";
712
+ return new RegExp(pattern, flags).test(text);
713
+ }
714
+ if (caseSensitive) return text.includes(pattern);
715
+ return text.toLowerCase().includes(pattern.toLowerCase());
716
+ }
717
+ function testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive) {
718
+ const useRegex = condition.regex ?? outerRegex;
719
+ const caseSensitive = condition.caseSensitive ?? outerCaseSensitive;
720
+ const matchedTargets = [];
721
+ const targets = condition.target ? [condition.target] : GREP_TEXT_TARGETS;
722
+ for (const target of targets) {
723
+ const fieldValue = target === "chunk.type" || target === "chunk.chunkId" ? resolveGrepFieldValue(chunk, target) : normalizedFields.get(target) ?? resolveGrepFieldValue(chunk, target);
724
+ if (Array.isArray(fieldValue)) {
725
+ if (fieldValue.some((entry) => testGrepMatch(entry, condition.pattern, useRegex, caseSensitive))) matchedTargets.push(target);
726
+ } else if (testGrepMatch(fieldValue, condition.pattern, useRegex, caseSensitive)) matchedTargets.push(target);
727
+ }
728
+ return matchedTargets;
729
+ }
730
+ function buildNormalizedFields(chunk) {
731
+ const fields = /* @__PURE__ */ new Map();
732
+ fields.set("chunk.content", normalizeForGrep(chunk.content || ""));
733
+ fields.set("chunk.summary", normalizeForGrep(chunk.summary || ""));
734
+ fields.set("chunk.keywords", (chunk.keywords || []).map((k) => normalizeForGrep(k)));
735
+ fields.set("chunk.path", normalizeForGrep(chunk.path || ""));
736
+ return fields;
737
+ }
738
+ function createGrepTool(params) {
739
+ return {
740
+ name: "knowhere_grep",
741
+ label: "Knowhere Grep",
742
+ description: "Search a stored document's chunks with composable AND conditions. Returns matching chunks with content, summary, keywords, path, and chunkId. Supports substring and regex matching with text normalization (HTML stripping, LaTeX cleanup, unicode normalization). Omit conditions to list all chunks. Omit the target field in a condition to search across all text fields (content, summary, keywords, path) — this is the recommended default. When answering questions from results, cite the chunkId and path. Tip: set maxStringChars up to 20000 when you need full untruncated content from a small number of results (e.g., maxResults=1). The default 4000 may truncate long chunks.",
743
+ parameters: {
744
+ type: "object",
745
+ additionalProperties: false,
746
+ properties: {
747
+ docId: {
748
+ type: "string",
749
+ description: "Identifier of the stored document to search."
750
+ },
751
+ conditions: {
752
+ type: "array",
753
+ items: {
754
+ type: "object",
755
+ additionalProperties: false,
756
+ properties: {
757
+ target: {
758
+ type: "string",
759
+ enum: [...GREP_VALID_TARGETS],
760
+ description: "Chunk field to search. Omit to search all text fields (content, summary, keywords, path) — this is the recommended default."
761
+ },
762
+ pattern: {
763
+ type: "string",
764
+ description: "Search pattern. Empty string matches all."
765
+ },
766
+ regex: {
767
+ type: "boolean",
768
+ description: "Use regex matching for this condition. Overrides outer regex default."
769
+ },
770
+ caseSensitive: {
771
+ type: "boolean",
772
+ description: "Case-sensitive matching for this condition. Overrides outer default."
773
+ }
774
+ },
775
+ required: ["pattern"]
776
+ },
777
+ description: "ANDed search conditions. Each condition must match for a chunk to be returned. Default [] matches all chunks (useful for browsing). Omit target in a condition to search all text fields. Use multiple conditions to narrow results (e.g., path contains 'chapter 3' AND content contains 'algorithm')."
778
+ },
779
+ regex: {
780
+ type: "boolean",
781
+ description: "Default regex mode for all conditions. Defaults to false."
782
+ },
783
+ caseSensitive: {
784
+ type: "boolean",
785
+ description: "Default case-sensitivity for all conditions. Defaults to false."
786
+ },
787
+ includeContext: {
788
+ type: "boolean",
789
+ description: "Include sibling chunk IDs sharing the same document path for each matched chunk. Useful for navigating to adjacent chunks in the same section — re-query with a condition on chunk.chunkId to fetch a specific sibling."
790
+ },
791
+ maxResults: {
792
+ type: "integer",
793
+ minimum: 1,
794
+ maximum: 50,
795
+ description: "Maximum number of matching chunks to return. Defaults to 10. Use a low value (1–3) with high maxStringChars to read specific chunks in full. Use a higher value (10–50) with lower maxStringChars to scan broadly."
796
+ },
797
+ maxStringChars: {
798
+ type: "integer",
799
+ minimum: 100,
800
+ maximum: 2e4,
801
+ description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 when retrieving full content from a small number of results (e.g., reading a single chunk in full). Reduce below 4000 when scanning many results to save tokens."
802
+ }
803
+ },
804
+ required: ["docId"]
805
+ },
806
+ execute: async (_toolCallId, rawParams) => {
807
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
808
+ const docId = readString(paramsRecord.docId);
809
+ if (!docId) throw new Error("docId is required.");
810
+ const scope = params.store.resolveScope(params.ctx);
811
+ const payload = await params.store.loadDocumentPayload(scope, docId);
812
+ if (!payload) return textResult(formatStoredDocumentNotFound(docId, scope.label));
813
+ const conditions = parseGrepConditions(paramsRecord.conditions);
814
+ const outerRegex = readBoolean(paramsRecord.regex, false);
815
+ const outerCaseSensitive = readBoolean(paramsRecord.caseSensitive, false);
816
+ const includeContext = readBoolean(paramsRecord.includeContext, false);
817
+ const maxResults = Math.min(50, Math.max(1, Math.trunc(readNumber(paramsRecord.maxResults, 10))));
818
+ const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
819
+ const pathChunkIndex = includeContext ? new Map(payload.browseIndex.paths.map((p) => [p.path, p.chunkIds])) : void 0;
820
+ const sortedChunks = sortChunksByBrowseOrder(payload.chunks, payload.browseIndex);
821
+ const results = [];
822
+ for (const chunk of sortedChunks) {
823
+ if (results.length >= maxResults) break;
824
+ const normalizedFields = buildNormalizedFields(chunk);
825
+ const allMatchedTargets = /* @__PURE__ */ new Set();
826
+ let allConditionsPassed = true;
827
+ for (const condition of conditions) {
828
+ const matched = testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive);
829
+ if (matched.length === 0) {
830
+ allConditionsPassed = false;
831
+ break;
832
+ }
833
+ for (const target of matched) allMatchedTargets.add(target);
834
+ }
835
+ if (!allConditionsPassed) continue;
836
+ const entry = {
837
+ chunk,
838
+ matchedOn: [...allMatchedTargets]
839
+ };
840
+ if (includeContext && pathChunkIndex && chunk.path) {
841
+ const siblings = pathChunkIndex.get(chunk.path);
842
+ if (siblings) entry.siblingChunkIds = siblings.filter((id) => id !== chunk.chunkId);
843
+ }
844
+ results.push(entry);
845
+ }
846
+ let totalMatches = results.length;
847
+ if (results.length >= maxResults) {
848
+ const remainingChunks = sortedChunks.slice(sortedChunks.indexOf(results[results.length - 1].chunk) + 1);
849
+ for (const chunk of remainingChunks) {
850
+ const normalizedFields = buildNormalizedFields(chunk);
851
+ let passed = true;
852
+ for (const condition of conditions) if (testGrepCondition(chunk, normalizedFields, condition, outerRegex, outerCaseSensitive).length === 0) {
853
+ passed = false;
854
+ break;
855
+ }
856
+ if (passed) totalMatches++;
857
+ }
858
+ }
859
+ const truncated = truncateJsonValue(results.map((entry) => {
860
+ const projected = {
861
+ chunkId: entry.chunk.chunkId,
862
+ type: entry.chunk.type,
863
+ path: entry.chunk.path,
864
+ content: entry.chunk.content,
865
+ summary: entry.chunk.summary,
866
+ keywords: entry.chunk.keywords,
867
+ tokens: entry.chunk.tokens,
868
+ assetFilePath: entry.chunk.assetFilePath,
869
+ matchedOn: entry.matchedOn
870
+ };
871
+ if (entry.siblingChunkIds) projected.siblingChunkIds = entry.siblingChunkIds;
872
+ return projected;
873
+ }), maxStringChars);
874
+ return formatJsonToolResult({
875
+ totalMatches,
876
+ returned: results.length,
877
+ results: truncated.value,
878
+ maxStringChars,
879
+ truncatedStrings: truncated.truncated
880
+ });
881
+ }
882
+ };
883
+ }
884
+ function createReadResultFileTool(params) {
885
+ return {
886
+ name: "knowhere_read_result_file",
887
+ label: "Knowhere Read Result File",
888
+ description: "Read a raw result file from the stored document's extracted ZIP package. Common files: manifest.json (parsing metadata), hierarchy.json (document structure), kb.csv (knowledge base export), or table HTML files (e.g., tables/table-1.html). Use mode='json' for JSON files, mode='csv' for CSV files, or mode='text' (default) for everything else. Increase maxStringChars (up to 20000) for large files.",
889
+ parameters: {
890
+ type: "object",
891
+ additionalProperties: false,
892
+ properties: {
893
+ docId: {
894
+ type: "string",
895
+ description: "Identifier of the stored document to read from."
896
+ },
897
+ filePath: {
898
+ type: "string",
899
+ description: "Relative path under the stored result directory, for example manifest.json or tables/table-1.html."
900
+ },
901
+ mode: {
902
+ type: "string",
903
+ enum: [
904
+ "text",
905
+ "json",
906
+ "csv"
907
+ ],
908
+ description: "text returns trimmed text, json parses JSON, and csv returns a raw CSV preview. Defaults to text."
909
+ },
910
+ maxStringChars: {
911
+ type: "integer",
912
+ minimum: 100,
913
+ maximum: 2e4,
914
+ description: "Maximum characters per string field before truncation. Defaults to 4000. Increase up to 20000 for large files like hierarchy.json or kb.csv."
915
+ }
916
+ },
917
+ required: ["docId", "filePath"]
918
+ },
919
+ execute: async (_toolCallId, rawParams) => {
920
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
921
+ const docId = readString(paramsRecord.docId);
922
+ const filePath = normalizeResultFilePath(paramsRecord.filePath);
923
+ if (!docId) throw new Error("docId is required.");
924
+ if (!filePath) throw new Error("filePath is required.");
925
+ const scope = params.store.resolveScope(params.ctx);
926
+ const payload = await params.store.loadDocumentPayload(scope, docId);
927
+ if (!payload) return textResult(formatStoredDocumentNotFound(docId, scope.label));
928
+ const resultFile = findResultFile(payload.browseIndex, filePath);
929
+ if (!resultFile) return textResult([
930
+ "Result file not found.",
931
+ `File path: ${filePath}`,
932
+ `Document ID: ${docId}`,
933
+ `Scope: ${scope.label}`
934
+ ].join("\n"));
935
+ if (!isTextReadableResultFile(resultFile)) return textResult([
936
+ "Result file is not readable as text through this tool.",
937
+ `File path: ${filePath}`,
938
+ `Kind: ${resultFile.kind}`,
939
+ `Document ID: ${docId}`,
940
+ `Scope: ${scope.label}`
941
+ ].join("\n"));
942
+ const storedFile = await params.store.readResultFile(scope, docId, filePath);
943
+ if (!storedFile) return textResult(formatStoredDocumentNotFound(docId, scope.label));
944
+ if (storedFile.text === null) return textResult([
945
+ "Result file not found.",
946
+ `File path: ${filePath}`,
947
+ `Document ID: ${docId}`,
948
+ `Scope: ${scope.label}`
949
+ ].join("\n"));
950
+ const mode = readResultFileReadMode(paramsRecord.mode);
951
+ const maxStringChars = Math.min(2e4, Math.max(100, Math.trunc(readNumber(paramsRecord.maxStringChars, 4e3))));
952
+ const normalizedText = stripUtf8Bom(storedFile.text);
953
+ if (mode === "json") {
954
+ let parsedJson;
955
+ try {
956
+ parsedJson = JSON.parse(normalizedText);
957
+ } catch (error) {
958
+ throw new Error(`Result file ${filePath} is not valid JSON. ${formatErrorMessage(error)}`, { cause: error });
959
+ }
960
+ const truncatedJson = truncateJsonValue(parsedJson, maxStringChars);
961
+ return formatJsonToolResult({
962
+ scope: scope.label,
963
+ docId: payload.document.id,
964
+ documentTitle: payload.document.title,
965
+ file: resultFile,
966
+ mode,
967
+ maxStringChars,
968
+ truncatedStrings: truncatedJson.truncated,
969
+ data: truncatedJson.value
970
+ });
971
+ }
972
+ const data = mode === "csv" ? buildCsvFilePayload(normalizedText, maxStringChars) : buildTextFilePayload(normalizedText, maxStringChars);
973
+ return formatJsonToolResult({
974
+ scope: scope.label,
975
+ docId: payload.document.id,
976
+ documentTitle: payload.document.title,
977
+ file: resultFile,
978
+ mode,
979
+ maxStringChars,
980
+ data
981
+ });
982
+ }
983
+ };
984
+ }
985
+ function createPreviewDocumentTool(params) {
986
+ return {
987
+ name: "knowhere_preview_document",
988
+ label: "Knowhere Preview Document",
989
+ description: "Get a structural overview of a stored Knowhere document. Returns the document metadata and a hierarchical table of contents showing sections, subsections, and chunk counts per path (text, image, table). Use this as the first step after identifying a docId to understand the document's structure before searching with knowhere_grep.",
990
+ parameters: {
991
+ type: "object",
992
+ additionalProperties: false,
993
+ properties: { docId: {
994
+ type: "string",
995
+ description: "Identifier of the stored document to preview."
996
+ } },
997
+ required: ["docId"]
998
+ },
999
+ execute: async (_toolCallId, rawParams) => {
1000
+ const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
1001
+ if (!docId) throw new Error("docId is required.");
1002
+ const scope = params.store.resolveScope(params.ctx);
1003
+ const payload = await params.store.loadDocumentPayload(scope, docId);
1004
+ if (!payload) return textResult(formatStoredDocumentNotFound(docId, scope.label));
1005
+ const { document } = payload;
1006
+ const pathSummaryMap = /* @__PURE__ */ new Map();
1007
+ for (const chunk of payload.chunks) {
1008
+ if (!chunk.path || pathSummaryMap.has(chunk.path)) continue;
1009
+ const raw = (chunk.summary || chunk.content || "").trim();
1010
+ if (raw) {
1011
+ const oneLine = raw.replace(/\n+/g, " ").slice(0, PREVIEW_SUMMARY_MAX_CHARS);
1012
+ pathSummaryMap.set(chunk.path, oneLine + (raw.length > PREVIEW_SUMMARY_MAX_CHARS ? "..." : ""));
1013
+ }
1014
+ }
1015
+ const lines = [
1016
+ `"${document.title}" [${document.id}]`,
1017
+ `Scope: ${scope.label}`,
1018
+ `Source: ${document.sourceLabel}`,
1019
+ `Chunks: ${document.chunkCount}`
1020
+ ];
1021
+ const pathByName = /* @__PURE__ */ new Map();
1022
+ for (const p of payload.browseIndex.paths) pathByName.set(p.path, p);
1023
+ const roots = payload.browseIndex.paths.filter((p) => p.depth === 1);
1024
+ if (roots.length > 0) {
1025
+ lines.push("");
1026
+ lines.push("## Table of Contents");
1027
+ lines.push("");
1028
+ const renderTree = (pathRecord, indent) => {
1029
+ const prefix = " ".repeat(indent);
1030
+ const segments = pathRecord.path.split(/\/|-->/);
1031
+ const label = segments[segments.length - 1] || pathRecord.path;
1032
+ const counts = [];
1033
+ if (pathRecord.textChunkCount > 0) counts.push(`${pathRecord.textChunkCount} text`);
1034
+ if (pathRecord.imageChunkCount > 0) counts.push(`${pathRecord.imageChunkCount} img`);
1035
+ if (pathRecord.tableChunkCount > 0) counts.push(`${pathRecord.tableChunkCount} tbl`);
1036
+ const countStr = counts.length > 0 ? ` (${counts.join(", ")})` : "";
1037
+ const summary = pathSummaryMap.get(pathRecord.path);
1038
+ const summaryStr = summary ? ` — ${summary}` : "";
1039
+ lines.push(`${prefix}- ${label}${countStr}${summaryStr}`);
1040
+ for (const childPath of pathRecord.childPaths) {
1041
+ const child = pathByName.get(childPath);
1042
+ if (child) renderTree(child, indent + 1);
1043
+ }
1044
+ };
1045
+ for (const root of roots) renderTree(root, 0);
1046
+ } else {
1047
+ lines.push("");
1048
+ lines.push("No structural paths available for this document.");
1049
+ }
1050
+ return textResult(lines.join("\n"));
1051
+ }
1052
+ };
1053
+ }
1054
+ function createListTool(params) {
1055
+ return {
1056
+ name: "knowhere_list_documents",
1057
+ label: "Knowhere List",
1058
+ description: "List all Knowhere documents stored in the current scope. Returns each document's ID, title, source, chunk count, tags, and last-updated timestamp. Use this first to discover available documents and their docId values before calling other tools.",
1059
+ parameters: {
1060
+ type: "object",
1061
+ additionalProperties: false,
1062
+ properties: {}
1063
+ },
1064
+ execute: async () => {
1065
+ const scope = params.store.resolveScope(params.ctx);
1066
+ return textResult(formatDocumentList(await params.store.listDocuments(scope), scope.label));
1067
+ }
1068
+ };
1069
+ }
1070
+ function createRemoveTool(params) {
1071
+ return {
1072
+ name: "knowhere_remove_document",
1073
+ label: "Knowhere Remove",
1074
+ description: "Remove a stored Knowhere document and all its extracted data from the current scope. This is irreversible — the document must be re-ingested or re-imported to restore it.",
1075
+ parameters: {
1076
+ type: "object",
1077
+ additionalProperties: false,
1078
+ properties: { docId: {
1079
+ type: "string",
1080
+ description: "Identifier of the stored document to remove."
1081
+ } },
1082
+ required: ["docId"]
1083
+ },
1084
+ execute: async (_toolCallId, rawParams) => {
1085
+ const docId = readString((isRecord(rawParams) ? rawParams : {}).docId);
1086
+ if (!docId) throw new Error("docId is required.");
1087
+ const scope = params.store.resolveScope(params.ctx);
1088
+ const removed = await params.store.removeDocument(scope, docId);
1089
+ if (!removed) return textResult(formatStoredDocumentNotFound(docId, scope.label));
1090
+ params.autoGroundingController?.forgetDocument(scope, removed.id);
1091
+ return textResult([
1092
+ "Removed stored document.",
1093
+ `Document ID: ${removed.id}`,
1094
+ `Title: ${removed.title}`,
1095
+ `Scope: ${scope.label}`
1096
+ ].join("\n"));
1097
+ }
1098
+ };
1099
+ }
1100
+ function createClearScopeTool(params) {
1101
+ return {
1102
+ name: "knowhere_clear_scope",
1103
+ label: "Knowhere Clear Scope",
1104
+ description: "Remove all stored Knowhere documents from the current scope. This is irreversible and affects every document in the scope. Set confirm=true to execute.",
1105
+ parameters: {
1106
+ type: "object",
1107
+ additionalProperties: false,
1108
+ properties: { confirm: {
1109
+ type: "boolean",
1110
+ description: "Must be true to clear the current scope."
1111
+ } }
1112
+ },
1113
+ execute: async (_toolCallId, rawParams) => {
1114
+ const paramsRecord = isRecord(rawParams) ? rawParams : {};
1115
+ const scope = params.store.resolveScope(params.ctx);
1116
+ if (!readBoolean(paramsRecord.confirm, false)) return textResult(`Set confirm=true to clear scope ${scope.label}.`);
1117
+ params.autoGroundingController?.forgetScope(scope);
1118
+ return textResult(formatScopeClearResult(await params.store.clearScope(scope), scope.label));
1119
+ }
1120
+ };
1121
+ }
1122
+ function createKnowhereToolFactory(params) {
1123
+ return (ctx) => [
1124
+ createIngestTool({
1125
+ api: params.api,
1126
+ config: params.config,
1127
+ store: params.store,
1128
+ ctx
1129
+ }),
1130
+ createJobListTool({
1131
+ api: params.api,
1132
+ config: params.config,
1133
+ store: params.store,
1134
+ ctx
1135
+ }),
1136
+ createJobStatusTool({
1137
+ api: params.api,
1138
+ config: params.config,
1139
+ store: params.store,
1140
+ ctx
1141
+ }),
1142
+ createImportCompletedJobTool({
1143
+ api: params.api,
1144
+ config: params.config,
1145
+ store: params.store,
1146
+ ctx
1147
+ }),
1148
+ createGrepTool({
1149
+ store: params.store,
1150
+ ctx
1151
+ }),
1152
+ createReadResultFileTool({
1153
+ store: params.store,
1154
+ ctx
1155
+ }),
1156
+ createPreviewDocumentTool({
1157
+ store: params.store,
1158
+ ctx
1159
+ }),
1160
+ createListTool({
1161
+ store: params.store,
1162
+ ctx
1163
+ }),
1164
+ createRemoveTool({
1165
+ store: params.store,
1166
+ ctx,
1167
+ autoGroundingController: params.autoGroundingController
1168
+ }),
1169
+ createClearScopeTool({
1170
+ store: params.store,
1171
+ ctx,
1172
+ autoGroundingController: params.autoGroundingController
1173
+ })
1174
+ ];
1175
+ }
1176
+ //#endregion
1177
+ export { createKnowhereToolFactory };