@ontos-ai/knowhere-sdk 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -43,6 +43,8 @@ __export(index_exports, {
43
43
  Jobs: () => Jobs,
44
44
  Knowhere: () => Knowhere,
45
45
  KnowhereError: () => KnowhereError,
46
+ Knowledge: () => Knowledge,
47
+ LocalKnowledgeStore: () => LocalKnowledgeStore,
46
48
  NetworkError: () => NetworkError,
47
49
  NotFoundError: () => NotFoundError,
48
50
  PaymentRequiredError: () => PaymentRequiredError,
@@ -59,7 +61,7 @@ __export(index_exports, {
59
61
  module.exports = __toCommonJS(index_exports);
60
62
 
61
63
  // src/client.ts
62
- var import_path2 = __toESM(require("path"));
64
+ var import_path3 = __toESM(require("path"));
63
65
 
64
66
  // src/lib/http-client.ts
65
67
  var import_axios = __toESM(require("axios"));
@@ -101,6 +103,7 @@ var NetworkError = class extends KnowhereError {
101
103
  this.cause = cause;
102
104
  this.name = "NetworkError";
103
105
  }
106
+ cause;
104
107
  };
105
108
  var TimeoutError = class extends NetworkError {
106
109
  constructor(message = "Request timed out") {
@@ -114,6 +117,7 @@ var PollingTimeoutError = class extends KnowhereError {
114
117
  this.elapsedMs = elapsedMs;
115
118
  this.name = "PollingTimeoutError";
116
119
  }
120
+ elapsedMs;
117
121
  };
118
122
  var ChecksumError = class extends KnowhereError {
119
123
  constructor(message = "Checksum verification failed", expected, actual) {
@@ -122,6 +126,8 @@ var ChecksumError = class extends KnowhereError {
122
126
  this.actual = actual;
123
127
  this.name = "ChecksumError";
124
128
  }
129
+ expected;
130
+ actual;
125
131
  };
126
132
  var ValidationError = class extends KnowhereError {
127
133
  constructor(message) {
@@ -147,6 +153,11 @@ var APIError = class extends KnowhereError {
147
153
  this.body = body;
148
154
  this.name = "APIError";
149
155
  }
156
+ statusCode;
157
+ code;
158
+ requestId;
159
+ details;
160
+ body;
150
161
  };
151
162
  var BadRequestError = class extends APIError {
152
163
  constructor(message, code, requestId, details, body) {
@@ -190,6 +201,7 @@ var RateLimitError = class extends APIError {
190
201
  this.retryAfter = retryAfter;
191
202
  this.name = "RateLimitError";
192
203
  }
204
+ retryAfter;
193
205
  };
194
206
  var InternalServerError = class extends APIError {
195
207
  constructor(message = "Internal server error", code, requestId, details, body) {
@@ -245,11 +257,13 @@ var JobFailedError = class extends KnowhereError {
245
257
  this.jobResult = jobResult;
246
258
  this.name = "JobFailedError";
247
259
  }
260
+ code;
261
+ jobResult;
248
262
  };
249
263
 
250
264
  // src/lib/utils.ts
251
265
  function sleep(ms) {
252
- return new Promise((resolve) => setTimeout(resolve, ms));
266
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
253
267
  }
254
268
  function snakeToCamel(str) {
255
269
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
@@ -355,8 +369,8 @@ function enrichParseResult(parseResult2, scope) {
355
369
  }
356
370
  return parseResult2;
357
371
  }
358
- function sanitizePath(path2) {
359
- let sanitized = path2.replace(/^\/+/, "");
372
+ function sanitizePath(path3) {
373
+ let sanitized = path3.replace(/^\/+/, "");
360
374
  sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
361
375
  sanitized = sanitized.replace(/\\/g, "/");
362
376
  return sanitized;
@@ -470,7 +484,7 @@ async function withRetry(fn, maxRetries, onRetry) {
470
484
  if (onRetry) {
471
485
  onRetry(attempt + 1, error);
472
486
  }
473
- await new Promise((resolve) => setTimeout(resolve, delay));
487
+ await new Promise((resolve2) => setTimeout(resolve2, delay));
474
488
  }
475
489
  }
476
490
  throw lastError;
@@ -483,17 +497,19 @@ var HttpClient = class {
483
497
  uploadTimeout;
484
498
  httpAgent;
485
499
  httpsAgent;
500
+ authTokenProvider;
486
501
  constructor(options) {
487
502
  this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
488
503
  this.uploadTimeout = options.uploadTimeout ?? 6e5;
489
504
  this.httpAgent = options.httpAgent;
490
505
  this.httpsAgent = options.httpsAgent;
506
+ this.authTokenProvider = options.authTokenProvider;
491
507
  this.axios = import_axios.default.create({
492
508
  baseURL: options.baseURL,
493
509
  timeout: options.timeout ?? DEFAULT_TIMEOUT,
494
510
  headers: {
495
511
  "User-Agent": `knowhere-node-sdk/${VERSION}`,
496
- Authorization: `Bearer ${options.apiKey}`,
512
+ ...options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : {},
497
513
  "Content-Type": "application/json",
498
514
  ...options.defaultHeaders
499
515
  },
@@ -505,6 +521,9 @@ var HttpClient = class {
505
521
  setupInterceptors() {
506
522
  this.axios.interceptors.request.use(
507
523
  (config) => {
524
+ if (this.authTokenProvider) {
525
+ return this.attachDynamicAuthorization(config);
526
+ }
508
527
  if (config.data && typeof config.data === "object") {
509
528
  config.data = keysToSnake(config.data);
510
529
  }
@@ -528,6 +547,19 @@ var HttpClient = class {
528
547
  }
529
548
  );
530
549
  }
550
+ async attachDynamicAuthorization(config) {
551
+ const token = await this.authTokenProvider?.();
552
+ if (!token) {
553
+ throw new ValidationError("Authentication token provider returned an empty token");
554
+ }
555
+ const headers = import_axios.AxiosHeaders.from(config.headers);
556
+ headers.set("Authorization", `Bearer ${token}`);
557
+ config.headers = headers;
558
+ if (config.data && typeof config.data === "object") {
559
+ config.data = keysToSnake(config.data);
560
+ }
561
+ return config;
562
+ }
531
563
  handleError(error) {
532
564
  if (!error.response) {
533
565
  if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
@@ -817,6 +849,9 @@ async function parseResult(httpClient, resultUrl, options) {
817
849
  const zipBuffer = await httpClient.download(resultUrl);
818
850
  if (options?.verifyChecksum !== false) {
819
851
  }
852
+ return parseResultBuffer(zipBuffer);
853
+ }
854
+ async function parseResultBuffer(zipBuffer) {
820
855
  const zip = await import_jszip.default.loadAsync(zipBuffer);
821
856
  const manifestFile = zip.file("manifest.json");
822
857
  if (!manifestFile) {
@@ -881,7 +916,7 @@ async function parseResult(httpClient, resultUrl, options) {
881
916
  if (hierarchyViewFile) {
882
917
  hierarchyViewHtml = await hierarchyViewFile.async("string");
883
918
  }
884
- const result = {
919
+ return createParseResult({
885
920
  manifest,
886
921
  chunks,
887
922
  docNav,
@@ -892,6 +927,136 @@ async function parseResult(httpClient, resultUrl, options) {
892
927
  hierarchy,
893
928
  tocHierarchies,
894
929
  kbCsv,
930
+ hierarchyViewHtml
931
+ });
932
+ }
933
+ async function parseResultDirectory(directory) {
934
+ const manifestContent = await readRequiredTextFile(directory, "manifest.json");
935
+ let manifest = JSON.parse(manifestContent);
936
+ manifest = keysToCamel(manifest);
937
+ manifest = parseDates(manifest);
938
+ const chunksContent = await readRequiredTextFile(directory, "chunks.json");
939
+ let chunksData = JSON.parse(chunksContent);
940
+ chunksData = keysToCamel(chunksData);
941
+ const rawChunks = extractChunks(chunksData);
942
+ const chunks = [];
943
+ for (const chunkData of rawChunks) {
944
+ chunks.push(await processDirectoryChunk(directory, chunkData));
945
+ }
946
+ const fullMarkdown = await readOptionalTextFile(directory, "full.md");
947
+ const rawDocNav = await readOptionalJsonFile(directory, "doc_nav.json");
948
+ const docNav = rawDocNav === void 0 ? void 0 : keysToCamel(rawDocNav);
949
+ const hierarchy = await readOptionalJsonFile(directory, "hierarchy.json");
950
+ const rawChunksSlim = await readOptionalJsonFile(directory, "chunks_slim.json");
951
+ const chunksSlim = rawChunksSlim === void 0 ? void 0 : extractSlimChunks(keysToCamel(rawChunksSlim));
952
+ const rawTocHierarchies = await readOptionalJsonFile(directory, "toc_hierarchies.json");
953
+ const tocHierarchies = rawTocHierarchies === void 0 ? void 0 : keysToCamel(rawTocHierarchies);
954
+ const kbCsv = await readOptionalTextFile(directory, "kb.csv");
955
+ const hierarchyViewHtml = await readOptionalTextFile(directory, "hierarchy_view.html");
956
+ return createParseResult({
957
+ manifest,
958
+ chunks,
959
+ docNav,
960
+ fullMarkdown,
961
+ rawZip: Buffer.alloc(0),
962
+ chunksSlim,
963
+ hierarchy,
964
+ tocHierarchies,
965
+ kbCsv,
966
+ hierarchyViewHtml
967
+ });
968
+ }
969
+ async function saveExpandedParseResult(result, directory) {
970
+ if (result.rawZip.length > 0) {
971
+ const didExtractZip = await tryExtractRawZip(result.rawZip, directory);
972
+ if (didExtractZip) {
973
+ return directory;
974
+ }
975
+ }
976
+ await import_fs2.promises.mkdir(directory, { recursive: true });
977
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "manifest.json"), JSON.stringify(result.manifest, null, 2));
978
+ if (result.docNav) {
979
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "doc_nav.json"), JSON.stringify(result.docNav, null, 2));
980
+ }
981
+ await import_fs2.promises.writeFile(
982
+ (0, import_path.join)(directory, "chunks.json"),
983
+ JSON.stringify(serializeChunks(result.chunks), null, 2)
984
+ );
985
+ if (result.chunksSlim) {
986
+ await import_fs2.promises.writeFile(
987
+ (0, import_path.join)(directory, "chunks_slim.json"),
988
+ JSON.stringify({ chunks: result.chunksSlim }, null, 2)
989
+ );
990
+ }
991
+ if (result.fullMarkdown) {
992
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "full.md"), result.fullMarkdown);
993
+ }
994
+ if (result.hierarchy) {
995
+ await import_fs2.promises.writeFile(
996
+ (0, import_path.join)(directory, "hierarchy.json"),
997
+ JSON.stringify(result.hierarchy, null, 2)
998
+ );
999
+ }
1000
+ if (result.tocHierarchies) {
1001
+ await import_fs2.promises.writeFile(
1002
+ (0, import_path.join)(directory, "toc_hierarchies.json"),
1003
+ JSON.stringify(result.tocHierarchies, null, 2)
1004
+ );
1005
+ }
1006
+ if (result.kbCsv) {
1007
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "kb.csv"), result.kbCsv);
1008
+ }
1009
+ if (result.hierarchyViewHtml) {
1010
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "hierarchy_view.html"), result.hierarchyViewHtml);
1011
+ }
1012
+ for (const imageChunk of result.imageChunks) {
1013
+ await writeBinaryAsset(directory, imageChunk.filePath, imageChunk.data);
1014
+ }
1015
+ for (const tableChunk of result.tableChunks) {
1016
+ await writeTextAsset(directory, tableChunk.filePath, tableChunk.html);
1017
+ }
1018
+ return directory;
1019
+ }
1020
+ async function tryExtractRawZip(zipBuffer, directory) {
1021
+ try {
1022
+ const zip = await import_jszip.default.loadAsync(zipBuffer);
1023
+ await import_fs2.promises.mkdir(directory, { recursive: true });
1024
+ for (const entry of Object.values(zip.files)) {
1025
+ if (entry.dir || entry.name === "result.zip") {
1026
+ continue;
1027
+ }
1028
+ const outputPath = resolveAssetPath(directory, entry.name);
1029
+ await import_fs2.promises.mkdir((0, import_path.dirname)(outputPath), { recursive: true });
1030
+ await import_fs2.promises.writeFile(outputPath, await entry.async("nodebuffer"));
1031
+ }
1032
+ return true;
1033
+ } catch {
1034
+ return false;
1035
+ }
1036
+ }
1037
+ function createParseResult(parts) {
1038
+ const {
1039
+ manifest,
1040
+ chunks,
1041
+ docNav,
1042
+ fullMarkdown,
1043
+ rawZip,
1044
+ chunksSlim,
1045
+ hierarchy,
1046
+ tocHierarchies,
1047
+ kbCsv,
1048
+ hierarchyViewHtml
1049
+ } = parts;
1050
+ return {
1051
+ manifest,
1052
+ chunks,
1053
+ docNav,
1054
+ fullMarkdown,
1055
+ rawZip,
1056
+ chunksSlim,
1057
+ hierarchy,
1058
+ tocHierarchies,
1059
+ kbCsv,
895
1060
  hierarchyViewHtml,
896
1061
  get textChunks() {
897
1062
  return chunks.filter((c) => c.type === "text");
@@ -948,11 +1113,10 @@ async function parseResult(httpClient, resultUrl, options) {
948
1113
  for (const tableChunk of this.tableChunks) {
949
1114
  await tableChunk.save(directory);
950
1115
  }
951
- await import_fs2.promises.writeFile((0, import_path.join)(directory, "result.zip"), zipBuffer);
1116
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "result.zip"), rawZip);
952
1117
  return directory;
953
1118
  }
954
1119
  };
955
- return result;
956
1120
  }
957
1121
  function extractChunks(payload) {
958
1122
  if (Array.isArray(payload)) {
@@ -985,6 +1149,37 @@ function buildTextChunk(chunkData) {
985
1149
  metadata: chunkData.metadata ?? {}
986
1150
  };
987
1151
  }
1152
+ function buildImageChunk(chunkData, filePath, imageBuffer) {
1153
+ return {
1154
+ chunkId: chunkData.chunkId ?? "",
1155
+ type: "image",
1156
+ content: chunkData.content ?? "",
1157
+ path: chunkData.path ?? "",
1158
+ filePath,
1159
+ data: imageBuffer,
1160
+ metadata: chunkData.metadata ?? {},
1161
+ get format() {
1162
+ return getFileExtension(this.filePath);
1163
+ },
1164
+ async save(directory) {
1165
+ return writeBinaryAsset(directory, this.filePath, this.data);
1166
+ }
1167
+ };
1168
+ }
1169
+ function buildTableChunk(chunkData, filePath, html) {
1170
+ return {
1171
+ chunkId: chunkData.chunkId ?? "",
1172
+ type: "table",
1173
+ content: chunkData.content ?? "",
1174
+ path: chunkData.path ?? "",
1175
+ filePath,
1176
+ html,
1177
+ metadata: chunkData.metadata ?? {},
1178
+ async save(directory) {
1179
+ return writeTextAsset(directory, this.filePath, this.html);
1180
+ }
1181
+ };
1182
+ }
988
1183
  async function processChunk(zip, chunkData) {
989
1184
  if (chunkData.type === "text") {
990
1185
  return buildTextChunk(chunkData);
@@ -1000,26 +1195,7 @@ async function processChunk(zip, chunkData) {
1000
1195
  throw new KnowhereError(`Image file not found: ${filePath}`);
1001
1196
  }
1002
1197
  const imageBuffer = await imageFile.async("nodebuffer");
1003
- const enrichedChunk = {
1004
- chunkId: chunkData.chunkId ?? "",
1005
- type: "image",
1006
- content: chunkData.content ?? "",
1007
- path: chunkData.path ?? "",
1008
- filePath,
1009
- data: imageBuffer,
1010
- metadata: chunkData.metadata ?? {},
1011
- get format() {
1012
- return getFileExtension(this.filePath);
1013
- },
1014
- async save(directory) {
1015
- const outputPath = (0, import_path.join)(directory, sanitizePath(this.filePath));
1016
- const outputDir = (0, import_path.dirname)(outputPath);
1017
- await import_fs2.promises.mkdir(outputDir, { recursive: true });
1018
- await import_fs2.promises.writeFile(outputPath, this.data);
1019
- return outputPath;
1020
- }
1021
- };
1022
- return enrichedChunk;
1198
+ return buildImageChunk(chunkData, filePath, imageBuffer);
1023
1199
  }
1024
1200
  if (chunkData.type === "table") {
1025
1201
  const filePath = getChunkFilePath(chunkData);
@@ -1032,26 +1208,112 @@ async function processChunk(zip, chunkData) {
1032
1208
  throw new KnowhereError(`Table file not found: ${filePath}`);
1033
1209
  }
1034
1210
  const html = await htmlFile.async("string");
1035
- const enrichedChunk = {
1036
- chunkId: chunkData.chunkId ?? "",
1037
- type: "table",
1038
- content: chunkData.content ?? "",
1039
- path: chunkData.path ?? "",
1040
- filePath,
1041
- html,
1042
- metadata: chunkData.metadata ?? {},
1043
- async save(directory) {
1044
- const outputPath = (0, import_path.join)(directory, sanitizePath(this.filePath));
1045
- const outputDir = (0, import_path.dirname)(outputPath);
1046
- await import_fs2.promises.mkdir(outputDir, { recursive: true });
1047
- await import_fs2.promises.writeFile(outputPath, this.html);
1048
- return outputPath;
1211
+ return buildTableChunk(chunkData, filePath, html);
1212
+ }
1213
+ return buildTextChunk(chunkData);
1214
+ }
1215
+ async function processDirectoryChunk(directory, chunkData) {
1216
+ if (chunkData.type === "text") {
1217
+ return buildTextChunk(chunkData);
1218
+ }
1219
+ if (chunkData.type === "image") {
1220
+ const filePath = getChunkFilePath(chunkData);
1221
+ if (!filePath) {
1222
+ throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1223
+ }
1224
+ try {
1225
+ const imageBuffer = await import_fs2.promises.readFile(resolveAssetPath(directory, filePath));
1226
+ return buildImageChunk(chunkData, filePath, imageBuffer);
1227
+ } catch (error) {
1228
+ if (isMissingFileError(error)) {
1229
+ throw new KnowhereError(`Image file not found: ${filePath}`);
1049
1230
  }
1050
- };
1051
- return enrichedChunk;
1231
+ throw error;
1232
+ }
1233
+ }
1234
+ if (chunkData.type === "table") {
1235
+ const filePath = getChunkFilePath(chunkData);
1236
+ if (!filePath) {
1237
+ throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1238
+ }
1239
+ try {
1240
+ const html = await import_fs2.promises.readFile(resolveAssetPath(directory, filePath), "utf8");
1241
+ return buildTableChunk(chunkData, filePath, html);
1242
+ } catch (error) {
1243
+ if (isMissingFileError(error)) {
1244
+ throw new KnowhereError(`Table file not found: ${filePath}`);
1245
+ }
1246
+ throw error;
1247
+ }
1052
1248
  }
1053
1249
  return buildTextChunk(chunkData);
1054
1250
  }
1251
+ function serializeChunks(chunks) {
1252
+ return {
1253
+ chunks: chunks.map((chunk) => {
1254
+ const rawChunk = {
1255
+ chunkId: chunk.chunkId,
1256
+ type: chunk.type,
1257
+ content: chunk.content,
1258
+ path: chunk.path,
1259
+ metadata: chunk.metadata
1260
+ };
1261
+ if (chunk.type === "image" || chunk.type === "table") {
1262
+ rawChunk.filePath = chunk.filePath;
1263
+ }
1264
+ return rawChunk;
1265
+ })
1266
+ };
1267
+ }
1268
+ async function readRequiredTextFile(directory, fileName) {
1269
+ try {
1270
+ return await import_fs2.promises.readFile((0, import_path.join)(directory, fileName), "utf8");
1271
+ } catch (error) {
1272
+ if (isMissingFileError(error)) {
1273
+ throw new KnowhereError(`${fileName} not found in result directory`);
1274
+ }
1275
+ throw error;
1276
+ }
1277
+ }
1278
+ async function readOptionalTextFile(directory, fileName) {
1279
+ try {
1280
+ return await import_fs2.promises.readFile((0, import_path.join)(directory, fileName), "utf8");
1281
+ } catch (error) {
1282
+ if (isMissingFileError(error)) {
1283
+ return void 0;
1284
+ }
1285
+ throw error;
1286
+ }
1287
+ }
1288
+ async function readOptionalJsonFile(directory, fileName) {
1289
+ const content = await readOptionalTextFile(directory, fileName);
1290
+ return content === void 0 ? void 0 : JSON.parse(content);
1291
+ }
1292
+ async function writeBinaryAsset(directory, filePath, data) {
1293
+ const outputPath = resolveAssetPath(directory, filePath);
1294
+ const outputDir = (0, import_path.dirname)(outputPath);
1295
+ await import_fs2.promises.mkdir(outputDir, { recursive: true });
1296
+ await import_fs2.promises.writeFile(outputPath, data);
1297
+ return outputPath;
1298
+ }
1299
+ async function writeTextAsset(directory, filePath, text) {
1300
+ const outputPath = resolveAssetPath(directory, filePath);
1301
+ const outputDir = (0, import_path.dirname)(outputPath);
1302
+ await import_fs2.promises.mkdir(outputDir, { recursive: true });
1303
+ await import_fs2.promises.writeFile(outputPath, text);
1304
+ return outputPath;
1305
+ }
1306
+ function resolveAssetPath(directory, filePath) {
1307
+ const root = (0, import_path.resolve)(directory);
1308
+ const outputPath = (0, import_path.resolve)(root, sanitizePath(filePath));
1309
+ if (outputPath !== root && !outputPath.startsWith(`${root}${import_path.sep}`)) {
1310
+ throw new KnowhereError(`Invalid result asset path: ${filePath}`);
1311
+ }
1312
+ return outputPath;
1313
+ }
1314
+ function isMissingFileError(error) {
1315
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1316
+ }
1055
1317
 
1056
1318
  // src/resources/jobs.ts
1057
1319
  var Jobs = class extends BaseResource {
@@ -1060,11 +1322,7 @@ var Jobs = class extends BaseResource {
1060
1322
  * Create a new parsing job
1061
1323
  */
1062
1324
  async create(params) {
1063
- const job = await this.httpClient.post(
1064
- "/v1/jobs",
1065
- params
1066
- );
1067
- delete job.documentId;
1325
+ const job = await this.httpClient.post("/v1/jobs", params);
1068
1326
  if (job.uploadUrl) {
1069
1327
  this.pendingUploadJobs.set(job.jobId, job);
1070
1328
  }
@@ -1248,22 +1506,741 @@ var Documents = class extends BaseResource {
1248
1506
  }
1249
1507
  };
1250
1508
 
1509
+ // src/knowledge/local-store.ts
1510
+ var import_crypto = require("crypto");
1511
+ var import_os = __toESM(require("os"));
1512
+ var import_fs3 = require("fs");
1513
+ var import_path2 = __toESM(require("path"));
1514
+ var STORE_VERSION = 1;
1515
+ var LOCAL_DOCUMENT_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
1516
+ var LocalKnowledgeStore = class {
1517
+ cacheDirectory;
1518
+ indexPath;
1519
+ resultCache = /* @__PURE__ */ new Map();
1520
+ constructor(cacheDirectory) {
1521
+ this.cacheDirectory = cacheDirectory ?? import_path2.default.join(import_os.default.homedir(), ".knowhere-node-sdk", "knowledge");
1522
+ this.indexPath = import_path2.default.join(this.cacheDirectory, "index.json");
1523
+ }
1524
+ async saveResult(result, options) {
1525
+ await import_fs3.promises.mkdir(this.cacheDirectory, { recursive: true });
1526
+ const now = /* @__PURE__ */ new Date();
1527
+ const index = await this.readIndex();
1528
+ const localDocumentId = validateLocalDocumentId(
1529
+ options?.localDocumentId ?? createLocalDocumentId(result)
1530
+ );
1531
+ const resultDirectoryPath = this.getResultDirectoryPath(localDocumentId);
1532
+ await import_fs3.promises.rm(resultDirectoryPath, { recursive: true, force: true });
1533
+ await saveExpandedParseResult(result, resultDirectoryPath);
1534
+ this.resultCache.set(localDocumentId, result);
1535
+ const existing = index.documents.find(
1536
+ (document) => document.localDocumentId === localDocumentId
1537
+ );
1538
+ const stored = {
1539
+ localDocumentId,
1540
+ jobId: result.jobId,
1541
+ documentId: result.documentId,
1542
+ namespace: result.namespace,
1543
+ sourceFileName: result.manifest.sourceFileName,
1544
+ chunkCount: result.chunks.length,
1545
+ typeCounts: countChunkTypes(result),
1546
+ resultDirectoryPath,
1547
+ createdAt: existing?.createdAt ?? now.toISOString(),
1548
+ updatedAt: now.toISOString()
1549
+ };
1550
+ const nextDocuments = [
1551
+ stored,
1552
+ ...index.documents.filter((document) => document.localDocumentId !== localDocumentId)
1553
+ ];
1554
+ const asyncParseJobs = (index.asyncParseJobs ?? []).map(
1555
+ (job) => job.jobId === result.jobId ? {
1556
+ ...job,
1557
+ localDocumentId,
1558
+ cacheStatus: "cached",
1559
+ updatedAt: now.toISOString()
1560
+ } : job
1561
+ );
1562
+ await this.writeIndex({
1563
+ version: STORE_VERSION,
1564
+ documents: nextDocuments,
1565
+ asyncParseJobs
1566
+ });
1567
+ return toLocalKnowledgeDocument(stored);
1568
+ }
1569
+ async saveAsyncParseJob(params) {
1570
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1571
+ const index = await this.readIndex();
1572
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1573
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1574
+ const stored = {
1575
+ jobId: params.jobId,
1576
+ localDocumentId: localDocumentId ?? existing?.localDocumentId,
1577
+ cacheStatus: existing?.cacheStatus ?? "pending",
1578
+ createdAt: existing?.createdAt ?? now,
1579
+ updatedAt: now
1580
+ };
1581
+ await this.writeIndex({
1582
+ version: STORE_VERSION,
1583
+ documents: index.documents,
1584
+ asyncParseJobs: [
1585
+ stored,
1586
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1587
+ ]
1588
+ });
1589
+ }
1590
+ async getAsyncParseJob(jobId) {
1591
+ const index = await this.readIndex();
1592
+ return (index.asyncParseJobs ?? []).find((job) => job.jobId === jobId);
1593
+ }
1594
+ async listRecoverableAsyncParseJobs() {
1595
+ const index = await this.readIndex();
1596
+ return (index.asyncParseJobs ?? []).filter((job) => job.cacheStatus === "pending" || job.cacheStatus === "not_available").map(toLocalKnowledgeAsyncParseJob);
1597
+ }
1598
+ async updateAsyncParseJobCacheStatus(params) {
1599
+ const index = await this.readIndex();
1600
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1601
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1602
+ if (!existing) {
1603
+ return;
1604
+ }
1605
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1606
+ const stored = {
1607
+ ...existing,
1608
+ localDocumentId: localDocumentId ?? existing.localDocumentId,
1609
+ cacheStatus: params.cacheStatus,
1610
+ updatedAt: now
1611
+ };
1612
+ await this.writeIndex({
1613
+ version: STORE_VERSION,
1614
+ documents: index.documents,
1615
+ asyncParseJobs: [
1616
+ stored,
1617
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1618
+ ]
1619
+ });
1620
+ }
1621
+ async listDocuments() {
1622
+ const index = await this.readIndex();
1623
+ return index.documents.map(toLocalKnowledgeDocument);
1624
+ }
1625
+ async getDocument(localDocumentId) {
1626
+ validateLocalDocumentId(localDocumentId);
1627
+ const index = await this.readIndex();
1628
+ const stored = index.documents.find((document) => document.localDocumentId === localDocumentId);
1629
+ return stored ? toLocalKnowledgeDocument(stored) : void 0;
1630
+ }
1631
+ async loadResult(localDocumentId) {
1632
+ const document = await this.getDocument(localDocumentId);
1633
+ if (!document) {
1634
+ throw new Error(`Local Knowhere document not found: ${localDocumentId}`);
1635
+ }
1636
+ const cachedResult = this.resultCache.get(localDocumentId);
1637
+ if (cachedResult) {
1638
+ return { document, result: cachedResult };
1639
+ }
1640
+ const result = await this.loadStoredResult(document);
1641
+ result.namespace = document.namespace;
1642
+ result.documentId = document.documentId;
1643
+ this.resultCache.set(localDocumentId, result);
1644
+ return { document, result };
1645
+ }
1646
+ getResultDirectoryPath(localDocumentId) {
1647
+ const documentsDirectory = import_path2.default.resolve(this.cacheDirectory, "documents");
1648
+ const resultDirectoryPath = import_path2.default.resolve(documentsDirectory, localDocumentId);
1649
+ if (!isPathInsideDirectory(resultDirectoryPath, documentsDirectory)) {
1650
+ throw new Error(`Local Knowhere document ID resolves outside the cache: ${localDocumentId}`);
1651
+ }
1652
+ return resultDirectoryPath;
1653
+ }
1654
+ async loadStoredResult(document) {
1655
+ return parseResultDirectory(document.resultDirectoryPath);
1656
+ }
1657
+ async readIndex() {
1658
+ try {
1659
+ const raw = await import_fs3.promises.readFile(this.indexPath, "utf8");
1660
+ const parsed = JSON.parse(raw);
1661
+ if (parsed.version !== STORE_VERSION || !Array.isArray(parsed.documents)) {
1662
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1663
+ }
1664
+ return {
1665
+ version: STORE_VERSION,
1666
+ documents: parsed.documents,
1667
+ asyncParseJobs: Array.isArray(parsed.asyncParseJobs) ? parsed.asyncParseJobs : []
1668
+ };
1669
+ } catch (error) {
1670
+ if (isMissingFileError2(error)) {
1671
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1672
+ }
1673
+ throw error;
1674
+ }
1675
+ }
1676
+ async writeIndex(index) {
1677
+ await import_fs3.promises.mkdir(this.cacheDirectory, { recursive: true });
1678
+ await import_fs3.promises.writeFile(this.indexPath, JSON.stringify(index, null, 2));
1679
+ }
1680
+ };
1681
+ function validateLocalDocumentId(localDocumentId) {
1682
+ if (!LOCAL_DOCUMENT_ID_PATTERN.test(localDocumentId) || localDocumentId.includes("..") || import_path2.default.basename(localDocumentId) !== localDocumentId) {
1683
+ throw new Error(
1684
+ "Local Knowhere document ID must be a safe slug containing only letters, numbers, dots, underscores, or hyphens"
1685
+ );
1686
+ }
1687
+ return localDocumentId;
1688
+ }
1689
+ function isPathInsideDirectory(targetPath, parentDirectory) {
1690
+ const relativePath = import_path2.default.relative(parentDirectory, targetPath);
1691
+ return relativePath.length === 0 || !relativePath.startsWith("..") && !import_path2.default.isAbsolute(relativePath);
1692
+ }
1693
+ function createLocalDocumentId(result) {
1694
+ const hash = (0, import_crypto.createHash)("sha256").update(result.jobId).update("\0").update(result.manifest.sourceFileName).digest("hex").slice(0, 16);
1695
+ return `local_${hash}`;
1696
+ }
1697
+ function countChunkTypes(result) {
1698
+ return result.chunks.reduce(
1699
+ (counts, chunk) => {
1700
+ counts[chunk.type] += 1;
1701
+ return counts;
1702
+ },
1703
+ { text: 0, image: 0, table: 0 }
1704
+ );
1705
+ }
1706
+ function toLocalKnowledgeDocument(stored) {
1707
+ return {
1708
+ localDocumentId: stored.localDocumentId,
1709
+ jobId: stored.jobId,
1710
+ documentId: stored.documentId,
1711
+ namespace: stored.namespace,
1712
+ sourceFileName: stored.sourceFileName,
1713
+ chunkCount: stored.chunkCount,
1714
+ typeCounts: stored.typeCounts,
1715
+ resultDirectoryPath: stored.resultDirectoryPath,
1716
+ createdAt: new Date(stored.createdAt),
1717
+ updatedAt: new Date(stored.updatedAt)
1718
+ };
1719
+ }
1720
+ function toLocalKnowledgeAsyncParseJob(stored) {
1721
+ return {
1722
+ ...stored,
1723
+ createdAt: new Date(stored.createdAt),
1724
+ updatedAt: new Date(stored.updatedAt)
1725
+ };
1726
+ }
1727
+ function isMissingFileError2(error) {
1728
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1729
+ }
1730
+
1731
+ // src/knowledge/knowledge.ts
1732
+ var DEFAULT_READ_LIMIT = 12;
1733
+ var MAX_READ_LIMIT = 40;
1734
+ var DEFAULT_GREP_LIMIT = 20;
1735
+ var MAX_GREP_LIMIT = 50;
1736
+ var DEFAULT_CONTEXT_CHARS = 80;
1737
+ var Knowledge = class _Knowledge {
1738
+ client;
1739
+ store;
1740
+ constructor(client, options) {
1741
+ this.client = client;
1742
+ this.store = new LocalKnowledgeStore(options?.cacheDirectory);
1743
+ }
1744
+ withCacheDirectory(cacheDirectory) {
1745
+ return new _Knowledge(this.client, { cacheDirectory });
1746
+ }
1747
+ async parse(params) {
1748
+ const result = await this.client.parse(params);
1749
+ const document = await this.store.saveResult(result, {
1750
+ localDocumentId: params.localDocumentId
1751
+ });
1752
+ return { document, result };
1753
+ }
1754
+ async startParse(params) {
1755
+ const job = await this.client.startParse(params);
1756
+ await this.store.saveAsyncParseJob({
1757
+ jobId: job.jobId,
1758
+ localDocumentId: params.localDocumentId
1759
+ });
1760
+ return {
1761
+ job,
1762
+ localDocumentId: params.localDocumentId
1763
+ };
1764
+ }
1765
+ async getJobStatus(jobId) {
1766
+ const job = await this.client.jobs.get(jobId);
1767
+ return {
1768
+ job,
1769
+ cache: await this.resolveAsyncCache(jobId, job.isDone, job.isFailed)
1770
+ };
1771
+ }
1772
+ async recoverPendingAsyncParseJobs() {
1773
+ const jobs = await this.store.listRecoverableAsyncParseJobs();
1774
+ const results = [];
1775
+ for (const job of jobs) {
1776
+ results.push(await this.getJobStatus(job.jobId));
1777
+ }
1778
+ return {
1779
+ checkedJobs: jobs.length,
1780
+ results
1781
+ };
1782
+ }
1783
+ async cacheJobResult(params) {
1784
+ const result = await this.client.jobs.load(params.jobId, {
1785
+ verifyChecksum: params.verifyChecksum
1786
+ });
1787
+ const document = await this.store.saveResult(result, {
1788
+ localDocumentId: params.localDocumentId
1789
+ });
1790
+ return { document, result };
1791
+ }
1792
+ async resolveAsyncCache(jobId, isDone, isFailed) {
1793
+ const trackedJob = await this.store.getAsyncParseJob(jobId);
1794
+ if (!trackedJob) {
1795
+ return { status: "untracked" };
1796
+ }
1797
+ if (trackedJob.cacheStatus === "cached" && trackedJob.localDocumentId) {
1798
+ const existingDocument = await this.store.getDocument(trackedJob.localDocumentId);
1799
+ if (existingDocument) {
1800
+ return {
1801
+ status: "already_cached",
1802
+ localDocumentId: trackedJob.localDocumentId,
1803
+ document: existingDocument
1804
+ };
1805
+ }
1806
+ }
1807
+ if (isFailed) {
1808
+ await this.store.updateAsyncParseJobCacheStatus({
1809
+ jobId,
1810
+ cacheStatus: "failed"
1811
+ });
1812
+ return {
1813
+ status: "failed",
1814
+ localDocumentId: trackedJob.localDocumentId
1815
+ };
1816
+ }
1817
+ if (!isDone) {
1818
+ return {
1819
+ status: "pending",
1820
+ localDocumentId: trackedJob.localDocumentId
1821
+ };
1822
+ }
1823
+ try {
1824
+ const cached = await this.cacheJobResult({
1825
+ jobId,
1826
+ localDocumentId: trackedJob.localDocumentId
1827
+ });
1828
+ return {
1829
+ status: "cached",
1830
+ localDocumentId: cached.document.localDocumentId,
1831
+ document: cached.document
1832
+ };
1833
+ } catch (error) {
1834
+ await this.store.updateAsyncParseJobCacheStatus({
1835
+ jobId,
1836
+ cacheStatus: "not_available"
1837
+ });
1838
+ return {
1839
+ status: "not_available",
1840
+ localDocumentId: trackedJob.localDocumentId,
1841
+ error: error instanceof Error ? error.message : String(error)
1842
+ };
1843
+ }
1844
+ }
1845
+ async listDocuments() {
1846
+ return this.store.listDocuments();
1847
+ }
1848
+ async getDocumentOutline(localDocumentId) {
1849
+ const { document, result } = await this.store.loadResult(localDocumentId);
1850
+ const chunks = indexChunks(result);
1851
+ const sections = buildFlatSections(result, chunks);
1852
+ const sectionTree = result.docNav?.sections && result.docNav.sections.length > 0 ? result.docNav.sections.map(
1853
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1854
+ ) : nestSections(sections);
1855
+ return {
1856
+ document,
1857
+ totalChunks: chunks.length,
1858
+ typeCounts: document.typeCounts,
1859
+ sections,
1860
+ sectionTree
1861
+ };
1862
+ }
1863
+ async readChunks(params) {
1864
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1865
+ const limit = clampLimit(params.limit, DEFAULT_READ_LIMIT, MAX_READ_LIMIT);
1866
+ const chunks = indexChunks(result).filter((chunk) => matchesReadScope(chunk, params));
1867
+ const selected = selectReadWindow(chunks, params, limit);
1868
+ const lastSelected = selected[selected.length - 1];
1869
+ const nextChunk = lastSelected && selected.length < chunks.length ? chunks[chunks.indexOf(lastSelected) + 1]?.position : void 0;
1870
+ return {
1871
+ document,
1872
+ chunks: selected.map(toReadChunk),
1873
+ nextChunk
1874
+ };
1875
+ }
1876
+ async grepChunks(params) {
1877
+ if (!params.pattern) {
1878
+ throw new ValidationError("pattern is required");
1879
+ }
1880
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1881
+ const maxResults = clampLimit(params.maxResults, DEFAULT_GREP_LIMIT, MAX_GREP_LIMIT);
1882
+ const contextChars = params.contextChars ?? DEFAULT_CONTEXT_CHARS;
1883
+ const matcher = createMatcher(params);
1884
+ const scopedChunks = indexChunks(result).filter((chunk) => matchesGrepScope(chunk, params));
1885
+ const matches = [];
1886
+ let scannedChunks = 0;
1887
+ for (const chunk of scopedChunks) {
1888
+ scannedChunks += 1;
1889
+ const chunkMatches = matcher(chunk.content);
1890
+ for (const match of chunkMatches) {
1891
+ matches.push({
1892
+ position: chunk.position,
1893
+ chunkId: chunk.chunkId,
1894
+ chunkType: chunk.chunkType,
1895
+ sectionPath: chunk.sectionPath,
1896
+ sourceChunkPath: chunk.sourceChunkPath,
1897
+ filePath: chunk.filePath,
1898
+ startOffset: match.startOffset,
1899
+ endOffset: match.endOffset,
1900
+ snippet: buildSnippet(chunk.content, match.startOffset, match.endOffset, contextChars)
1901
+ });
1902
+ if (matches.length >= maxResults) {
1903
+ return { document, matches, scannedChunks, truncated: true };
1904
+ }
1905
+ }
1906
+ }
1907
+ return { document, matches, scannedChunks, truncated: false };
1908
+ }
1909
+ async search(params) {
1910
+ const localDocuments = await this.resolveSearchDocuments(params.localDocumentIds);
1911
+ const rawResponse = await this.client.retrieval.query({
1912
+ query: params.query,
1913
+ namespace: params.namespace,
1914
+ topK: params.topK,
1915
+ useAgentic: params.useAgentic ?? false
1916
+ });
1917
+ const documentByServerId = new Map(
1918
+ localDocuments.filter((document) => document.documentId).map((document) => [document.documentId, document])
1919
+ );
1920
+ return {
1921
+ namespace: rawResponse.namespace,
1922
+ query: rawResponse.query,
1923
+ evidenceText: rawResponse.evidenceText,
1924
+ references: [
1925
+ ...rawResponse.referencedChunks.map(
1926
+ (reference) => ({
1927
+ localDocumentId: reference.documentId ? documentByServerId.get(reference.documentId)?.localDocumentId : void 0,
1928
+ documentId: reference.documentId,
1929
+ chunkId: reference.chunkId,
1930
+ sectionPath: reference.sectionPath,
1931
+ chunkType: reference.chunkType
1932
+ })
1933
+ ),
1934
+ ...rawResponse.results.map((result) => toResultReference(result, documentByServerId))
1935
+ ],
1936
+ results: rawResponse.results.map(
1937
+ (result) => toRemoteSearchResult(result, documentByServerId)
1938
+ ),
1939
+ rawResponse
1940
+ };
1941
+ }
1942
+ async resolveSearchDocuments(localDocumentIds) {
1943
+ const documents = await this.store.listDocuments();
1944
+ if (!localDocumentIds || localDocumentIds.length === 0) {
1945
+ return documents;
1946
+ }
1947
+ const requested = new Set(localDocumentIds);
1948
+ return documents.filter((document) => requested.has(document.localDocumentId));
1949
+ }
1950
+ };
1951
+ function indexChunks(result) {
1952
+ return result.chunks.map((chunk, index) => {
1953
+ const filePath = getChunkFilePath2(chunk);
1954
+ return {
1955
+ source: chunk,
1956
+ position: index + 1,
1957
+ chunkId: chunk.chunkId,
1958
+ chunkType: chunk.type,
1959
+ content: chunk.content,
1960
+ sectionPath: normalizeSectionPath(chunk.path, result.manifest.sourceFileName),
1961
+ sourceChunkPath: chunk.path,
1962
+ filePath,
1963
+ metadata: chunk.metadata
1964
+ };
1965
+ });
1966
+ }
1967
+ function getChunkFilePath2(chunk) {
1968
+ if (chunk.type === "image" || chunk.type === "table") {
1969
+ return chunk.filePath;
1970
+ }
1971
+ const filePath = chunk.metadata.filePath;
1972
+ return typeof filePath === "string" ? filePath : void 0;
1973
+ }
1974
+ function normalizeSectionPath(path3, sourceFileName) {
1975
+ if (!path3) {
1976
+ return "";
1977
+ }
1978
+ if (path3.startsWith("images/") || path3.startsWith("tables/")) {
1979
+ return path3;
1980
+ }
1981
+ const parts = path3.split("/").filter(Boolean);
1982
+ if (sourceFileName) {
1983
+ const fileNameIndex = parts.indexOf(sourceFileName);
1984
+ if (fileNameIndex >= 0) {
1985
+ return parts.slice(fileNameIndex + 1).join(" / ") || sourceFileName;
1986
+ }
1987
+ }
1988
+ if (parts.length <= 1) {
1989
+ return parts[0] ?? "";
1990
+ }
1991
+ return parts.slice(1).join(" / ");
1992
+ }
1993
+ function buildFlatSections(result, chunks) {
1994
+ if (result.docNav?.sections && result.docNav.sections.length > 0) {
1995
+ return flattenSections(
1996
+ result.docNav.sections.map(
1997
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1998
+ )
1999
+ );
2000
+ }
2001
+ const byPath = /* @__PURE__ */ new Map();
2002
+ for (const chunk of chunks) {
2003
+ const path3 = chunk.sectionPath || chunk.sourceChunkPath;
2004
+ const existing = byPath.get(path3);
2005
+ if (existing) {
2006
+ addChunkToSection(existing, chunk);
2007
+ } else {
2008
+ byPath.set(path3, createSectionFromChunk(path3, chunk));
2009
+ }
2010
+ }
2011
+ return [...byPath.values()].sort(compareSections);
2012
+ }
2013
+ function toKnowledgeSection(section, chunks, sourceFileName) {
2014
+ const sectionPath = normalizeSectionPath(section.path, sourceFileName);
2015
+ const scopedChunks = chunks.filter((chunk) => isInSection(chunk.sectionPath, sectionPath));
2016
+ const children = section.children.map(
2017
+ (child) => toKnowledgeSection(child, chunks, sourceFileName)
2018
+ );
2019
+ return {
2020
+ sectionPath,
2021
+ sectionTitle: section.title,
2022
+ sectionLevel: section.level,
2023
+ summary: section.summary,
2024
+ startChunk: minPosition(scopedChunks),
2025
+ endChunk: maxPosition(scopedChunks),
2026
+ chunkCount: scopedChunks.length,
2027
+ typeCounts: countIndexedTypes(scopedChunks),
2028
+ children
2029
+ };
2030
+ }
2031
+ function createSectionFromChunk(pathValue, chunk) {
2032
+ const parts = pathValue.split(" / ").filter(Boolean);
2033
+ return {
2034
+ sectionPath: pathValue,
2035
+ sectionTitle: parts[parts.length - 1] ?? pathValue,
2036
+ sectionLevel: Math.max(parts.length, 1),
2037
+ startChunk: chunk.position,
2038
+ endChunk: chunk.position,
2039
+ chunkCount: 1,
2040
+ typeCounts: { text: 0, image: 0, table: 0, [chunk.chunkType]: 1 },
2041
+ children: []
2042
+ };
2043
+ }
2044
+ function addChunkToSection(section, chunk) {
2045
+ section.startChunk = Math.min(section.startChunk ?? chunk.position, chunk.position);
2046
+ section.endChunk = Math.max(section.endChunk ?? chunk.position, chunk.position);
2047
+ section.chunkCount += 1;
2048
+ section.typeCounts[chunk.chunkType] += 1;
2049
+ }
2050
+ function flattenSections(sections) {
2051
+ return sections.flatMap((section) => [section, ...flattenSections(section.children)]);
2052
+ }
2053
+ function nestSections(sections) {
2054
+ const clonedSections = sections.map((section) => ({
2055
+ ...section,
2056
+ children: []
2057
+ }));
2058
+ const byPath = new Map(clonedSections.map((section) => [section.sectionPath, section]));
2059
+ const roots = [];
2060
+ for (const section of clonedSections) {
2061
+ const parentPath = getParentSectionPath(section.sectionPath);
2062
+ const parent = parentPath ? byPath.get(parentPath) : void 0;
2063
+ if (parent) {
2064
+ parent.children.push(section);
2065
+ } else {
2066
+ roots.push(section);
2067
+ }
2068
+ }
2069
+ return roots;
2070
+ }
2071
+ function getParentSectionPath(sectionPath) {
2072
+ const parts = sectionPath.split(" / ").filter(Boolean);
2073
+ if (parts.length <= 1) {
2074
+ return void 0;
2075
+ }
2076
+ return parts.slice(0, -1).join(" / ");
2077
+ }
2078
+ function compareSections(left, right) {
2079
+ return (left.startChunk ?? Number.MAX_SAFE_INTEGER) - (right.startChunk ?? Number.MAX_SAFE_INTEGER);
2080
+ }
2081
+ function minPosition(chunks) {
2082
+ if (chunks.length === 0) {
2083
+ return void 0;
2084
+ }
2085
+ return Math.min(...chunks.map((chunk) => chunk.position));
2086
+ }
2087
+ function maxPosition(chunks) {
2088
+ if (chunks.length === 0) {
2089
+ return void 0;
2090
+ }
2091
+ return Math.max(...chunks.map((chunk) => chunk.position));
2092
+ }
2093
+ function countIndexedTypes(chunks) {
2094
+ return chunks.reduce(
2095
+ (counts, chunk) => {
2096
+ counts[chunk.chunkType] += 1;
2097
+ return counts;
2098
+ },
2099
+ { text: 0, image: 0, table: 0 }
2100
+ );
2101
+ }
2102
+ function isInSection(chunkSectionPath, sectionPath) {
2103
+ return chunkSectionPath === sectionPath || chunkSectionPath.startsWith(`${sectionPath} / `);
2104
+ }
2105
+ function clampLimit(value, defaultValue, maxValue) {
2106
+ if (value === void 0) {
2107
+ return defaultValue;
2108
+ }
2109
+ return Math.min(Math.max(Math.floor(value), 1), maxValue);
2110
+ }
2111
+ function matchesReadScope(chunk, params) {
2112
+ if (params.chunkId && chunk.chunkId !== params.chunkId) {
2113
+ return false;
2114
+ }
2115
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2116
+ return false;
2117
+ }
2118
+ if (params.sectionPath && !isInSection(chunk.sectionPath, params.sectionPath)) {
2119
+ return false;
2120
+ }
2121
+ return true;
2122
+ }
2123
+ function selectReadWindow(chunks, params, limit) {
2124
+ if (params.chunkId) {
2125
+ return chunks.slice(0, limit);
2126
+ }
2127
+ const startChunk = params.startChunk ?? chunks[0]?.position ?? 1;
2128
+ const endChunk = params.endChunk ?? Number.MAX_SAFE_INTEGER;
2129
+ return chunks.filter((chunk) => chunk.position >= startChunk && chunk.position <= endChunk).slice(0, limit);
2130
+ }
2131
+ function toReadChunk(chunk) {
2132
+ return {
2133
+ position: chunk.position,
2134
+ chunkId: chunk.chunkId,
2135
+ chunkType: chunk.chunkType,
2136
+ content: chunk.content,
2137
+ sectionPath: chunk.sectionPath,
2138
+ sourceChunkPath: chunk.sourceChunkPath,
2139
+ filePath: chunk.filePath,
2140
+ metadata: chunk.metadata
2141
+ };
2142
+ }
2143
+ function matchesGrepScope(chunk, params) {
2144
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2145
+ return false;
2146
+ }
2147
+ if (params.sectionPathPrefix && !chunk.sectionPath.startsWith(params.sectionPathPrefix)) {
2148
+ return false;
2149
+ }
2150
+ return true;
2151
+ }
2152
+ function createMatcher(params) {
2153
+ if (params.isRegex) {
2154
+ const flags = params.isCaseSensitive ? "g" : "gi";
2155
+ const regex = new RegExp(params.pattern, flags);
2156
+ return (content) => {
2157
+ const matches = [];
2158
+ for (const match of content.matchAll(regex)) {
2159
+ const startOffset = match.index ?? 0;
2160
+ const text = match[0] ?? "";
2161
+ matches.push({ startOffset, endOffset: startOffset + text.length });
2162
+ if (text.length === 0) {
2163
+ break;
2164
+ }
2165
+ }
2166
+ return matches;
2167
+ };
2168
+ }
2169
+ const needle = params.isCaseSensitive ? params.pattern : params.pattern.toLowerCase();
2170
+ return (content) => {
2171
+ const haystack = params.isCaseSensitive ? content : content.toLowerCase();
2172
+ const matches = [];
2173
+ let index = haystack.indexOf(needle);
2174
+ while (index >= 0) {
2175
+ matches.push({ startOffset: index, endOffset: index + needle.length });
2176
+ index = haystack.indexOf(needle, index + Math.max(needle.length, 1));
2177
+ }
2178
+ return matches;
2179
+ };
2180
+ }
2181
+ function buildSnippet(content, startOffset, endOffset, contextChars) {
2182
+ const start = Math.max(0, startOffset - contextChars);
2183
+ const end = Math.min(content.length, endOffset + contextChars);
2184
+ return content.slice(start, end);
2185
+ }
2186
+ function toResultReference(result, documentByServerId) {
2187
+ const documentId = result.source.documentId ?? void 0;
2188
+ return {
2189
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2190
+ documentId,
2191
+ sectionPath: result.source.sectionPath ?? void 0,
2192
+ chunkType: result.chunkType,
2193
+ score: result.score
2194
+ };
2195
+ }
2196
+ function toRemoteSearchResult(result, documentByServerId) {
2197
+ const documentId = result.source.documentId ?? void 0;
2198
+ return {
2199
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2200
+ documentId,
2201
+ chunkType: result.chunkType,
2202
+ content: result.content,
2203
+ score: result.score,
2204
+ sectionPath: result.source.sectionPath ?? void 0,
2205
+ sourceFileName: result.source.sourceFileName ?? void 0
2206
+ };
2207
+ }
2208
+
1251
2209
  // src/client.ts
1252
2210
  function inferFileName(file, explicitFileName) {
1253
2211
  if (explicitFileName) {
1254
2212
  return explicitFileName;
1255
2213
  }
1256
2214
  if (typeof file === "string") {
1257
- return import_path2.default.basename(file);
2215
+ return import_path3.default.basename(file);
1258
2216
  }
1259
2217
  if (isReadStream2(file) && typeof file.path === "string") {
1260
- return import_path2.default.basename(file.path);
2218
+ return import_path3.default.basename(file.path);
1261
2219
  }
1262
2220
  return void 0;
1263
2221
  }
1264
2222
  function isReadStream2(file) {
1265
2223
  return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
1266
2224
  }
2225
+ function buildParsingParams(params) {
2226
+ const parsingParams = {
2227
+ model: params.model,
2228
+ ocrEnabled: params.ocr,
2229
+ docType: params.docType,
2230
+ smartTitleParse: params.smartTitleParse,
2231
+ summaryImage: params.summaryImage,
2232
+ summaryTable: params.summaryTable,
2233
+ summaryTxt: params.summaryTxt,
2234
+ addFragDesc: params.addFragDesc,
2235
+ kbDir: params.kbDir
2236
+ };
2237
+ Object.keys(parsingParams).forEach((key) => {
2238
+ if (parsingParams[key] === void 0) {
2239
+ delete parsingParams[key];
2240
+ }
2241
+ });
2242
+ return Object.keys(parsingParams).length > 0 ? parsingParams : void 0;
2243
+ }
1267
2244
  var Knowhere = class {
1268
2245
  /** Jobs resource for low-level API */
1269
2246
  jobs;
@@ -1271,21 +2248,25 @@ var Knowhere = class {
1271
2248
  retrieval;
1272
2249
  /** Documents resource for canonical document lifecycle operations */
1273
2250
  documents;
2251
+ /** Client-side local knowledge tools over parsed Knowhere results */
2252
+ knowledge;
1274
2253
  httpClient;
1275
2254
  /**
1276
2255
  * Create a new Knowhere client
1277
2256
  */
1278
2257
  constructor(options = {}) {
1279
2258
  const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
1280
- if (!apiKey) {
2259
+ const authTokenProvider = apiKey ? void 0 : options.authTokenProvider;
2260
+ if (!apiKey && !authTokenProvider) {
1281
2261
  throw new ValidationError(
1282
- `API key is required. Provide it via options.apiKey or ${ENV.API_KEY} environment variable.`
2262
+ `API authentication is required. Provide it via options.apiKey, options.authTokenProvider, or ${ENV.API_KEY} environment variable.`
1283
2263
  );
1284
2264
  }
1285
2265
  const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
1286
2266
  this.httpClient = new HttpClient({
1287
2267
  baseURL,
1288
2268
  apiKey,
2269
+ authTokenProvider,
1289
2270
  timeout: options.timeout,
1290
2271
  uploadTimeout: options.uploadTimeout,
1291
2272
  maxRetries: options.maxRetries,
@@ -1296,6 +2277,7 @@ var Knowhere = class {
1296
2277
  this.jobs = new Jobs(this.httpClient);
1297
2278
  this.retrieval = new Retrieval(this.httpClient);
1298
2279
  this.documents = new Documents(this.httpClient);
2280
+ this.knowledge = new Knowledge(this);
1299
2281
  }
1300
2282
  /**
1301
2283
  * High-level API: Parse a document and return structured results
@@ -1318,6 +2300,24 @@ var Knowhere = class {
1318
2300
  * ```
1319
2301
  */
1320
2302
  async parse(params) {
2303
+ const job = await this.startParse(params);
2304
+ const jobResult = await this.jobs.wait(job.jobId, {
2305
+ pollInterval: params.pollInterval,
2306
+ pollTimeout: params.pollTimeout,
2307
+ onProgress: params.onPollProgress,
2308
+ signal: params.signal
2309
+ });
2310
+ const result = await this.jobs.load(jobResult, {
2311
+ verifyChecksum: params.verifyChecksum
2312
+ });
2313
+ return enrichParseResult(result, jobResult);
2314
+ }
2315
+ /**
2316
+ * Start a parse job and return immediately after the URL job is created or
2317
+ * the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
2318
+ * to inspect completion and load results later.
2319
+ */
2320
+ async startParse(params) {
1321
2321
  if (!params.url && !params.file) {
1322
2322
  throw new ValidationError("Either url or file must be provided");
1323
2323
  }
@@ -1331,22 +2331,6 @@ var Knowhere = class {
1331
2331
  "fileName is required when file is a Buffer, Uint8Array, or stream without a path."
1332
2332
  );
1333
2333
  }
1334
- const parsingParams = {
1335
- model: params.model,
1336
- ocrEnabled: params.ocr,
1337
- docType: params.docType,
1338
- smartTitleParse: params.smartTitleParse,
1339
- summaryImage: params.summaryImage,
1340
- summaryTable: params.summaryTable,
1341
- summaryTxt: params.summaryTxt,
1342
- addFragDesc: params.addFragDesc,
1343
- kbDir: params.kbDir
1344
- };
1345
- Object.keys(parsingParams).forEach((key) => {
1346
- if (parsingParams[key] === void 0) {
1347
- delete parsingParams[key];
1348
- }
1349
- });
1350
2334
  const webhook = params.webhook;
1351
2335
  const job = await this.jobs.create({
1352
2336
  sourceType,
@@ -1355,7 +2339,8 @@ var Knowhere = class {
1355
2339
  dataId: params.dataId,
1356
2340
  namespace: params.namespace,
1357
2341
  documentId: params.documentId,
1358
- parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
2342
+ documentMetadata: params.documentMetadata,
2343
+ parsingParams: buildParsingParams(params),
1359
2344
  webhook
1360
2345
  });
1361
2346
  if (params.file) {
@@ -1365,16 +2350,7 @@ var Knowhere = class {
1365
2350
  signal: params.signal
1366
2351
  });
1367
2352
  }
1368
- const jobResult = await this.jobs.wait(job.jobId, {
1369
- pollInterval: params.pollInterval,
1370
- pollTimeout: params.pollTimeout,
1371
- onProgress: params.onPollProgress,
1372
- signal: params.signal
1373
- });
1374
- const result = await this.jobs.load(jobResult, {
1375
- verifyChecksum: params.verifyChecksum
1376
- });
1377
- return enrichParseResult(result, jobResult);
2353
+ return job;
1378
2354
  }
1379
2355
  };
1380
2356
  // Annotate the CommonJS export names for ESM import in node:
@@ -1392,6 +2368,8 @@ var Knowhere = class {
1392
2368
  Jobs,
1393
2369
  Knowhere,
1394
2370
  KnowhereError,
2371
+ Knowledge,
2372
+ LocalKnowledgeStore,
1395
2373
  NetworkError,
1396
2374
  NotFoundError,
1397
2375
  PaymentRequiredError,