@ontos-ai/knowhere-sdk 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -43,6 +43,8 @@ __export(index_exports, {
43
43
  Jobs: () => Jobs,
44
44
  Knowhere: () => Knowhere,
45
45
  KnowhereError: () => KnowhereError,
46
+ Knowledge: () => Knowledge,
47
+ LocalKnowledgeStore: () => LocalKnowledgeStore,
46
48
  NetworkError: () => NetworkError,
47
49
  NotFoundError: () => NotFoundError,
48
50
  PaymentRequiredError: () => PaymentRequiredError,
@@ -59,7 +61,7 @@ __export(index_exports, {
59
61
  module.exports = __toCommonJS(index_exports);
60
62
 
61
63
  // src/client.ts
62
- var import_path2 = __toESM(require("path"));
64
+ var import_path3 = __toESM(require("path"));
63
65
 
64
66
  // src/lib/http-client.ts
65
67
  var import_axios = __toESM(require("axios"));
@@ -101,6 +103,7 @@ var NetworkError = class extends KnowhereError {
101
103
  this.cause = cause;
102
104
  this.name = "NetworkError";
103
105
  }
106
+ cause;
104
107
  };
105
108
  var TimeoutError = class extends NetworkError {
106
109
  constructor(message = "Request timed out") {
@@ -114,6 +117,7 @@ var PollingTimeoutError = class extends KnowhereError {
114
117
  this.elapsedMs = elapsedMs;
115
118
  this.name = "PollingTimeoutError";
116
119
  }
120
+ elapsedMs;
117
121
  };
118
122
  var ChecksumError = class extends KnowhereError {
119
123
  constructor(message = "Checksum verification failed", expected, actual) {
@@ -122,6 +126,8 @@ var ChecksumError = class extends KnowhereError {
122
126
  this.actual = actual;
123
127
  this.name = "ChecksumError";
124
128
  }
129
+ expected;
130
+ actual;
125
131
  };
126
132
  var ValidationError = class extends KnowhereError {
127
133
  constructor(message) {
@@ -147,6 +153,11 @@ var APIError = class extends KnowhereError {
147
153
  this.body = body;
148
154
  this.name = "APIError";
149
155
  }
156
+ statusCode;
157
+ code;
158
+ requestId;
159
+ details;
160
+ body;
150
161
  };
151
162
  var BadRequestError = class extends APIError {
152
163
  constructor(message, code, requestId, details, body) {
@@ -190,6 +201,7 @@ var RateLimitError = class extends APIError {
190
201
  this.retryAfter = retryAfter;
191
202
  this.name = "RateLimitError";
192
203
  }
204
+ retryAfter;
193
205
  };
194
206
  var InternalServerError = class extends APIError {
195
207
  constructor(message = "Internal server error", code, requestId, details, body) {
@@ -245,11 +257,13 @@ var JobFailedError = class extends KnowhereError {
245
257
  this.jobResult = jobResult;
246
258
  this.name = "JobFailedError";
247
259
  }
260
+ code;
261
+ jobResult;
248
262
  };
249
263
 
250
264
  // src/lib/utils.ts
251
265
  function sleep(ms) {
252
- return new Promise((resolve) => setTimeout(resolve, ms));
266
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
253
267
  }
254
268
  function snakeToCamel(str) {
255
269
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
@@ -355,8 +369,8 @@ function enrichParseResult(parseResult2, scope) {
355
369
  }
356
370
  return parseResult2;
357
371
  }
358
- function sanitizePath(path2) {
359
- let sanitized = path2.replace(/^\/+/, "");
372
+ function sanitizePath(path3) {
373
+ let sanitized = path3.replace(/^\/+/, "");
360
374
  sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
361
375
  sanitized = sanitized.replace(/\\/g, "/");
362
376
  return sanitized;
@@ -470,7 +484,7 @@ async function withRetry(fn, maxRetries, onRetry) {
470
484
  if (onRetry) {
471
485
  onRetry(attempt + 1, error);
472
486
  }
473
- await new Promise((resolve) => setTimeout(resolve, delay));
487
+ await new Promise((resolve2) => setTimeout(resolve2, delay));
474
488
  }
475
489
  }
476
490
  throw lastError;
@@ -483,17 +497,19 @@ var HttpClient = class {
483
497
  uploadTimeout;
484
498
  httpAgent;
485
499
  httpsAgent;
500
+ authTokenProvider;
486
501
  constructor(options) {
487
502
  this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
488
503
  this.uploadTimeout = options.uploadTimeout ?? 6e5;
489
504
  this.httpAgent = options.httpAgent;
490
505
  this.httpsAgent = options.httpsAgent;
506
+ this.authTokenProvider = options.authTokenProvider;
491
507
  this.axios = import_axios.default.create({
492
508
  baseURL: options.baseURL,
493
509
  timeout: options.timeout ?? DEFAULT_TIMEOUT,
494
510
  headers: {
495
511
  "User-Agent": `knowhere-node-sdk/${VERSION}`,
496
- Authorization: `Bearer ${options.apiKey}`,
512
+ ...options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : {},
497
513
  "Content-Type": "application/json",
498
514
  ...options.defaultHeaders
499
515
  },
@@ -505,6 +521,9 @@ var HttpClient = class {
505
521
  setupInterceptors() {
506
522
  this.axios.interceptors.request.use(
507
523
  (config) => {
524
+ if (this.authTokenProvider) {
525
+ return this.attachDynamicAuthorization(config);
526
+ }
508
527
  if (config.data && typeof config.data === "object") {
509
528
  config.data = keysToSnake(config.data);
510
529
  }
@@ -528,6 +547,19 @@ var HttpClient = class {
528
547
  }
529
548
  );
530
549
  }
550
+ async attachDynamicAuthorization(config) {
551
+ const token = await this.authTokenProvider?.();
552
+ if (!token) {
553
+ throw new ValidationError("Authentication token provider returned an empty token");
554
+ }
555
+ const headers = import_axios.AxiosHeaders.from(config.headers);
556
+ headers.set("Authorization", `Bearer ${token}`);
557
+ config.headers = headers;
558
+ if (config.data && typeof config.data === "object") {
559
+ config.data = keysToSnake(config.data);
560
+ }
561
+ return config;
562
+ }
531
563
  handleError(error) {
532
564
  if (!error.response) {
533
565
  if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
@@ -817,6 +849,9 @@ async function parseResult(httpClient, resultUrl, options) {
817
849
  const zipBuffer = await httpClient.download(resultUrl);
818
850
  if (options?.verifyChecksum !== false) {
819
851
  }
852
+ return parseResultBuffer(zipBuffer);
853
+ }
854
+ async function parseResultBuffer(zipBuffer) {
820
855
  const zip = await import_jszip.default.loadAsync(zipBuffer);
821
856
  const manifestFile = zip.file("manifest.json");
822
857
  if (!manifestFile) {
@@ -881,7 +916,7 @@ async function parseResult(httpClient, resultUrl, options) {
881
916
  if (hierarchyViewFile) {
882
917
  hierarchyViewHtml = await hierarchyViewFile.async("string");
883
918
  }
884
- const result = {
919
+ return createParseResult({
885
920
  manifest,
886
921
  chunks,
887
922
  docNav,
@@ -892,6 +927,136 @@ async function parseResult(httpClient, resultUrl, options) {
892
927
  hierarchy,
893
928
  tocHierarchies,
894
929
  kbCsv,
930
+ hierarchyViewHtml
931
+ });
932
+ }
933
+ async function parseResultDirectory(directory) {
934
+ const manifestContent = await readRequiredTextFile(directory, "manifest.json");
935
+ let manifest = JSON.parse(manifestContent);
936
+ manifest = keysToCamel(manifest);
937
+ manifest = parseDates(manifest);
938
+ const chunksContent = await readRequiredTextFile(directory, "chunks.json");
939
+ let chunksData = JSON.parse(chunksContent);
940
+ chunksData = keysToCamel(chunksData);
941
+ const rawChunks = extractChunks(chunksData);
942
+ const chunks = [];
943
+ for (const chunkData of rawChunks) {
944
+ chunks.push(await processDirectoryChunk(directory, chunkData));
945
+ }
946
+ const fullMarkdown = await readOptionalTextFile(directory, "full.md");
947
+ const rawDocNav = await readOptionalJsonFile(directory, "doc_nav.json");
948
+ const docNav = rawDocNav === void 0 ? void 0 : keysToCamel(rawDocNav);
949
+ const hierarchy = await readOptionalJsonFile(directory, "hierarchy.json");
950
+ const rawChunksSlim = await readOptionalJsonFile(directory, "chunks_slim.json");
951
+ const chunksSlim = rawChunksSlim === void 0 ? void 0 : extractSlimChunks(keysToCamel(rawChunksSlim));
952
+ const rawTocHierarchies = await readOptionalJsonFile(directory, "toc_hierarchies.json");
953
+ const tocHierarchies = rawTocHierarchies === void 0 ? void 0 : keysToCamel(rawTocHierarchies);
954
+ const kbCsv = await readOptionalTextFile(directory, "kb.csv");
955
+ const hierarchyViewHtml = await readOptionalTextFile(directory, "hierarchy_view.html");
956
+ return createParseResult({
957
+ manifest,
958
+ chunks,
959
+ docNav,
960
+ fullMarkdown,
961
+ rawZip: Buffer.alloc(0),
962
+ chunksSlim,
963
+ hierarchy,
964
+ tocHierarchies,
965
+ kbCsv,
966
+ hierarchyViewHtml
967
+ });
968
+ }
969
+ async function saveExpandedParseResult(result, directory) {
970
+ if (result.rawZip.length > 0) {
971
+ const didExtractZip = await tryExtractRawZip(result.rawZip, directory);
972
+ if (didExtractZip) {
973
+ return directory;
974
+ }
975
+ }
976
+ await import_fs2.promises.mkdir(directory, { recursive: true });
977
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "manifest.json"), JSON.stringify(result.manifest, null, 2));
978
+ if (result.docNav) {
979
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "doc_nav.json"), JSON.stringify(result.docNav, null, 2));
980
+ }
981
+ await import_fs2.promises.writeFile(
982
+ (0, import_path.join)(directory, "chunks.json"),
983
+ JSON.stringify(serializeChunks(result.chunks), null, 2)
984
+ );
985
+ if (result.chunksSlim) {
986
+ await import_fs2.promises.writeFile(
987
+ (0, import_path.join)(directory, "chunks_slim.json"),
988
+ JSON.stringify({ chunks: result.chunksSlim }, null, 2)
989
+ );
990
+ }
991
+ if (result.fullMarkdown) {
992
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "full.md"), result.fullMarkdown);
993
+ }
994
+ if (result.hierarchy) {
995
+ await import_fs2.promises.writeFile(
996
+ (0, import_path.join)(directory, "hierarchy.json"),
997
+ JSON.stringify(result.hierarchy, null, 2)
998
+ );
999
+ }
1000
+ if (result.tocHierarchies) {
1001
+ await import_fs2.promises.writeFile(
1002
+ (0, import_path.join)(directory, "toc_hierarchies.json"),
1003
+ JSON.stringify(result.tocHierarchies, null, 2)
1004
+ );
1005
+ }
1006
+ if (result.kbCsv) {
1007
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "kb.csv"), result.kbCsv);
1008
+ }
1009
+ if (result.hierarchyViewHtml) {
1010
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "hierarchy_view.html"), result.hierarchyViewHtml);
1011
+ }
1012
+ for (const imageChunk of result.imageChunks) {
1013
+ await writeBinaryAsset(directory, imageChunk.filePath, imageChunk.data);
1014
+ }
1015
+ for (const tableChunk of result.tableChunks) {
1016
+ await writeTextAsset(directory, tableChunk.filePath, tableChunk.html);
1017
+ }
1018
+ return directory;
1019
+ }
1020
+ async function tryExtractRawZip(zipBuffer, directory) {
1021
+ try {
1022
+ const zip = await import_jszip.default.loadAsync(zipBuffer);
1023
+ await import_fs2.promises.mkdir(directory, { recursive: true });
1024
+ for (const entry of Object.values(zip.files)) {
1025
+ if (entry.dir || entry.name === "result.zip") {
1026
+ continue;
1027
+ }
1028
+ const outputPath = resolveAssetPath(directory, entry.name);
1029
+ await import_fs2.promises.mkdir((0, import_path.dirname)(outputPath), { recursive: true });
1030
+ await import_fs2.promises.writeFile(outputPath, await entry.async("nodebuffer"));
1031
+ }
1032
+ return true;
1033
+ } catch {
1034
+ return false;
1035
+ }
1036
+ }
1037
+ function createParseResult(parts) {
1038
+ const {
1039
+ manifest,
1040
+ chunks,
1041
+ docNav,
1042
+ fullMarkdown,
1043
+ rawZip,
1044
+ chunksSlim,
1045
+ hierarchy,
1046
+ tocHierarchies,
1047
+ kbCsv,
1048
+ hierarchyViewHtml
1049
+ } = parts;
1050
+ return {
1051
+ manifest,
1052
+ chunks,
1053
+ docNav,
1054
+ fullMarkdown,
1055
+ rawZip,
1056
+ chunksSlim,
1057
+ hierarchy,
1058
+ tocHierarchies,
1059
+ kbCsv,
895
1060
  hierarchyViewHtml,
896
1061
  get textChunks() {
897
1062
  return chunks.filter((c) => c.type === "text");
@@ -948,11 +1113,10 @@ async function parseResult(httpClient, resultUrl, options) {
948
1113
  for (const tableChunk of this.tableChunks) {
949
1114
  await tableChunk.save(directory);
950
1115
  }
951
- await import_fs2.promises.writeFile((0, import_path.join)(directory, "result.zip"), zipBuffer);
1116
+ await import_fs2.promises.writeFile((0, import_path.join)(directory, "result.zip"), rawZip);
952
1117
  return directory;
953
1118
  }
954
1119
  };
955
- return result;
956
1120
  }
957
1121
  function extractChunks(payload) {
958
1122
  if (Array.isArray(payload)) {
@@ -985,6 +1149,37 @@ function buildTextChunk(chunkData) {
985
1149
  metadata: chunkData.metadata ?? {}
986
1150
  };
987
1151
  }
1152
+ function buildImageChunk(chunkData, filePath, imageBuffer) {
1153
+ return {
1154
+ chunkId: chunkData.chunkId ?? "",
1155
+ type: "image",
1156
+ content: chunkData.content ?? "",
1157
+ path: chunkData.path ?? "",
1158
+ filePath,
1159
+ data: imageBuffer,
1160
+ metadata: chunkData.metadata ?? {},
1161
+ get format() {
1162
+ return getFileExtension(this.filePath);
1163
+ },
1164
+ async save(directory) {
1165
+ return writeBinaryAsset(directory, this.filePath, this.data);
1166
+ }
1167
+ };
1168
+ }
1169
+ function buildTableChunk(chunkData, filePath, html) {
1170
+ return {
1171
+ chunkId: chunkData.chunkId ?? "",
1172
+ type: "table",
1173
+ content: chunkData.content ?? "",
1174
+ path: chunkData.path ?? "",
1175
+ filePath,
1176
+ html,
1177
+ metadata: chunkData.metadata ?? {},
1178
+ async save(directory) {
1179
+ return writeTextAsset(directory, this.filePath, this.html);
1180
+ }
1181
+ };
1182
+ }
988
1183
  async function processChunk(zip, chunkData) {
989
1184
  if (chunkData.type === "text") {
990
1185
  return buildTextChunk(chunkData);
@@ -1000,26 +1195,7 @@ async function processChunk(zip, chunkData) {
1000
1195
  throw new KnowhereError(`Image file not found: ${filePath}`);
1001
1196
  }
1002
1197
  const imageBuffer = await imageFile.async("nodebuffer");
1003
- const enrichedChunk = {
1004
- chunkId: chunkData.chunkId ?? "",
1005
- type: "image",
1006
- content: chunkData.content ?? "",
1007
- path: chunkData.path ?? "",
1008
- filePath,
1009
- data: imageBuffer,
1010
- metadata: chunkData.metadata ?? {},
1011
- get format() {
1012
- return getFileExtension(this.filePath);
1013
- },
1014
- async save(directory) {
1015
- const outputPath = (0, import_path.join)(directory, sanitizePath(this.filePath));
1016
- const outputDir = (0, import_path.dirname)(outputPath);
1017
- await import_fs2.promises.mkdir(outputDir, { recursive: true });
1018
- await import_fs2.promises.writeFile(outputPath, this.data);
1019
- return outputPath;
1020
- }
1021
- };
1022
- return enrichedChunk;
1198
+ return buildImageChunk(chunkData, filePath, imageBuffer);
1023
1199
  }
1024
1200
  if (chunkData.type === "table") {
1025
1201
  const filePath = getChunkFilePath(chunkData);
@@ -1032,26 +1208,112 @@ async function processChunk(zip, chunkData) {
1032
1208
  throw new KnowhereError(`Table file not found: ${filePath}`);
1033
1209
  }
1034
1210
  const html = await htmlFile.async("string");
1035
- const enrichedChunk = {
1036
- chunkId: chunkData.chunkId ?? "",
1037
- type: "table",
1038
- content: chunkData.content ?? "",
1039
- path: chunkData.path ?? "",
1040
- filePath,
1041
- html,
1042
- metadata: chunkData.metadata ?? {},
1043
- async save(directory) {
1044
- const outputPath = (0, import_path.join)(directory, sanitizePath(this.filePath));
1045
- const outputDir = (0, import_path.dirname)(outputPath);
1046
- await import_fs2.promises.mkdir(outputDir, { recursive: true });
1047
- await import_fs2.promises.writeFile(outputPath, this.html);
1048
- return outputPath;
1211
+ return buildTableChunk(chunkData, filePath, html);
1212
+ }
1213
+ return buildTextChunk(chunkData);
1214
+ }
1215
+ async function processDirectoryChunk(directory, chunkData) {
1216
+ if (chunkData.type === "text") {
1217
+ return buildTextChunk(chunkData);
1218
+ }
1219
+ if (chunkData.type === "image") {
1220
+ const filePath = getChunkFilePath(chunkData);
1221
+ if (!filePath) {
1222
+ throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1223
+ }
1224
+ try {
1225
+ const imageBuffer = await import_fs2.promises.readFile(resolveAssetPath(directory, filePath));
1226
+ return buildImageChunk(chunkData, filePath, imageBuffer);
1227
+ } catch (error) {
1228
+ if (isMissingFileError(error)) {
1229
+ throw new KnowhereError(`Image file not found: ${filePath}`);
1049
1230
  }
1050
- };
1051
- return enrichedChunk;
1231
+ throw error;
1232
+ }
1233
+ }
1234
+ if (chunkData.type === "table") {
1235
+ const filePath = getChunkFilePath(chunkData);
1236
+ if (!filePath) {
1237
+ throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1238
+ }
1239
+ try {
1240
+ const html = await import_fs2.promises.readFile(resolveAssetPath(directory, filePath), "utf8");
1241
+ return buildTableChunk(chunkData, filePath, html);
1242
+ } catch (error) {
1243
+ if (isMissingFileError(error)) {
1244
+ throw new KnowhereError(`Table file not found: ${filePath}`);
1245
+ }
1246
+ throw error;
1247
+ }
1052
1248
  }
1053
1249
  return buildTextChunk(chunkData);
1054
1250
  }
1251
+ function serializeChunks(chunks) {
1252
+ return {
1253
+ chunks: chunks.map((chunk) => {
1254
+ const rawChunk = {
1255
+ chunkId: chunk.chunkId,
1256
+ type: chunk.type,
1257
+ content: chunk.content,
1258
+ path: chunk.path,
1259
+ metadata: chunk.metadata
1260
+ };
1261
+ if (chunk.type === "image" || chunk.type === "table") {
1262
+ rawChunk.filePath = chunk.filePath;
1263
+ }
1264
+ return rawChunk;
1265
+ })
1266
+ };
1267
+ }
1268
+ async function readRequiredTextFile(directory, fileName) {
1269
+ try {
1270
+ return await import_fs2.promises.readFile((0, import_path.join)(directory, fileName), "utf8");
1271
+ } catch (error) {
1272
+ if (isMissingFileError(error)) {
1273
+ throw new KnowhereError(`${fileName} not found in result directory`);
1274
+ }
1275
+ throw error;
1276
+ }
1277
+ }
1278
+ async function readOptionalTextFile(directory, fileName) {
1279
+ try {
1280
+ return await import_fs2.promises.readFile((0, import_path.join)(directory, fileName), "utf8");
1281
+ } catch (error) {
1282
+ if (isMissingFileError(error)) {
1283
+ return void 0;
1284
+ }
1285
+ throw error;
1286
+ }
1287
+ }
1288
+ async function readOptionalJsonFile(directory, fileName) {
1289
+ const content = await readOptionalTextFile(directory, fileName);
1290
+ return content === void 0 ? void 0 : JSON.parse(content);
1291
+ }
1292
+ async function writeBinaryAsset(directory, filePath, data) {
1293
+ const outputPath = resolveAssetPath(directory, filePath);
1294
+ const outputDir = (0, import_path.dirname)(outputPath);
1295
+ await import_fs2.promises.mkdir(outputDir, { recursive: true });
1296
+ await import_fs2.promises.writeFile(outputPath, data);
1297
+ return outputPath;
1298
+ }
1299
+ async function writeTextAsset(directory, filePath, text) {
1300
+ const outputPath = resolveAssetPath(directory, filePath);
1301
+ const outputDir = (0, import_path.dirname)(outputPath);
1302
+ await import_fs2.promises.mkdir(outputDir, { recursive: true });
1303
+ await import_fs2.promises.writeFile(outputPath, text);
1304
+ return outputPath;
1305
+ }
1306
+ function resolveAssetPath(directory, filePath) {
1307
+ const root = (0, import_path.resolve)(directory);
1308
+ const outputPath = (0, import_path.resolve)(root, sanitizePath(filePath));
1309
+ if (outputPath !== root && !outputPath.startsWith(`${root}${import_path.sep}`)) {
1310
+ throw new KnowhereError(`Invalid result asset path: ${filePath}`);
1311
+ }
1312
+ return outputPath;
1313
+ }
1314
+ function isMissingFileError(error) {
1315
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1316
+ }
1055
1317
 
1056
1318
  // src/resources/jobs.ts
1057
1319
  var Jobs = class extends BaseResource {
@@ -1248,22 +1510,741 @@ var Documents = class extends BaseResource {
1248
1510
  }
1249
1511
  };
1250
1512
 
1513
+ // src/knowledge/local-store.ts
1514
+ var import_crypto = require("crypto");
1515
+ var import_os = __toESM(require("os"));
1516
+ var import_fs3 = require("fs");
1517
+ var import_path2 = __toESM(require("path"));
1518
+ var STORE_VERSION = 1;
1519
+ var LOCAL_DOCUMENT_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
1520
+ var LocalKnowledgeStore = class {
1521
+ cacheDirectory;
1522
+ indexPath;
1523
+ resultCache = /* @__PURE__ */ new Map();
1524
+ constructor(cacheDirectory) {
1525
+ this.cacheDirectory = cacheDirectory ?? import_path2.default.join(import_os.default.homedir(), ".knowhere-node-sdk", "knowledge");
1526
+ this.indexPath = import_path2.default.join(this.cacheDirectory, "index.json");
1527
+ }
1528
+ async saveResult(result, options) {
1529
+ await import_fs3.promises.mkdir(this.cacheDirectory, { recursive: true });
1530
+ const now = /* @__PURE__ */ new Date();
1531
+ const index = await this.readIndex();
1532
+ const localDocumentId = validateLocalDocumentId(
1533
+ options?.localDocumentId ?? createLocalDocumentId(result)
1534
+ );
1535
+ const resultDirectoryPath = this.getResultDirectoryPath(localDocumentId);
1536
+ await import_fs3.promises.rm(resultDirectoryPath, { recursive: true, force: true });
1537
+ await saveExpandedParseResult(result, resultDirectoryPath);
1538
+ this.resultCache.set(localDocumentId, result);
1539
+ const existing = index.documents.find(
1540
+ (document) => document.localDocumentId === localDocumentId
1541
+ );
1542
+ const stored = {
1543
+ localDocumentId,
1544
+ jobId: result.jobId,
1545
+ documentId: result.documentId,
1546
+ namespace: result.namespace,
1547
+ sourceFileName: result.manifest.sourceFileName,
1548
+ chunkCount: result.chunks.length,
1549
+ typeCounts: countChunkTypes(result),
1550
+ resultDirectoryPath,
1551
+ createdAt: existing?.createdAt ?? now.toISOString(),
1552
+ updatedAt: now.toISOString()
1553
+ };
1554
+ const nextDocuments = [
1555
+ stored,
1556
+ ...index.documents.filter((document) => document.localDocumentId !== localDocumentId)
1557
+ ];
1558
+ const asyncParseJobs = (index.asyncParseJobs ?? []).map(
1559
+ (job) => job.jobId === result.jobId ? {
1560
+ ...job,
1561
+ localDocumentId,
1562
+ cacheStatus: "cached",
1563
+ updatedAt: now.toISOString()
1564
+ } : job
1565
+ );
1566
+ await this.writeIndex({
1567
+ version: STORE_VERSION,
1568
+ documents: nextDocuments,
1569
+ asyncParseJobs
1570
+ });
1571
+ return toLocalKnowledgeDocument(stored);
1572
+ }
1573
+ async saveAsyncParseJob(params) {
1574
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1575
+ const index = await this.readIndex();
1576
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1577
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1578
+ const stored = {
1579
+ jobId: params.jobId,
1580
+ localDocumentId: localDocumentId ?? existing?.localDocumentId,
1581
+ cacheStatus: existing?.cacheStatus ?? "pending",
1582
+ createdAt: existing?.createdAt ?? now,
1583
+ updatedAt: now
1584
+ };
1585
+ await this.writeIndex({
1586
+ version: STORE_VERSION,
1587
+ documents: index.documents,
1588
+ asyncParseJobs: [
1589
+ stored,
1590
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1591
+ ]
1592
+ });
1593
+ }
1594
+ async getAsyncParseJob(jobId) {
1595
+ const index = await this.readIndex();
1596
+ return (index.asyncParseJobs ?? []).find((job) => job.jobId === jobId);
1597
+ }
1598
+ async listRecoverableAsyncParseJobs() {
1599
+ const index = await this.readIndex();
1600
+ return (index.asyncParseJobs ?? []).filter((job) => job.cacheStatus === "pending" || job.cacheStatus === "not_available").map(toLocalKnowledgeAsyncParseJob);
1601
+ }
1602
+ async updateAsyncParseJobCacheStatus(params) {
1603
+ const index = await this.readIndex();
1604
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1605
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1606
+ if (!existing) {
1607
+ return;
1608
+ }
1609
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1610
+ const stored = {
1611
+ ...existing,
1612
+ localDocumentId: localDocumentId ?? existing.localDocumentId,
1613
+ cacheStatus: params.cacheStatus,
1614
+ updatedAt: now
1615
+ };
1616
+ await this.writeIndex({
1617
+ version: STORE_VERSION,
1618
+ documents: index.documents,
1619
+ asyncParseJobs: [
1620
+ stored,
1621
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1622
+ ]
1623
+ });
1624
+ }
1625
+ async listDocuments() {
1626
+ const index = await this.readIndex();
1627
+ return index.documents.map(toLocalKnowledgeDocument);
1628
+ }
1629
+ async getDocument(localDocumentId) {
1630
+ validateLocalDocumentId(localDocumentId);
1631
+ const index = await this.readIndex();
1632
+ const stored = index.documents.find((document) => document.localDocumentId === localDocumentId);
1633
+ return stored ? toLocalKnowledgeDocument(stored) : void 0;
1634
+ }
1635
+ async loadResult(localDocumentId) {
1636
+ const document = await this.getDocument(localDocumentId);
1637
+ if (!document) {
1638
+ throw new Error(`Local Knowhere document not found: ${localDocumentId}`);
1639
+ }
1640
+ const cachedResult = this.resultCache.get(localDocumentId);
1641
+ if (cachedResult) {
1642
+ return { document, result: cachedResult };
1643
+ }
1644
+ const result = await this.loadStoredResult(document);
1645
+ result.namespace = document.namespace;
1646
+ result.documentId = document.documentId;
1647
+ this.resultCache.set(localDocumentId, result);
1648
+ return { document, result };
1649
+ }
1650
+ getResultDirectoryPath(localDocumentId) {
1651
+ const documentsDirectory = import_path2.default.resolve(this.cacheDirectory, "documents");
1652
+ const resultDirectoryPath = import_path2.default.resolve(documentsDirectory, localDocumentId);
1653
+ if (!isPathInsideDirectory(resultDirectoryPath, documentsDirectory)) {
1654
+ throw new Error(`Local Knowhere document ID resolves outside the cache: ${localDocumentId}`);
1655
+ }
1656
+ return resultDirectoryPath;
1657
+ }
1658
+ async loadStoredResult(document) {
1659
+ return parseResultDirectory(document.resultDirectoryPath);
1660
+ }
1661
+ async readIndex() {
1662
+ try {
1663
+ const raw = await import_fs3.promises.readFile(this.indexPath, "utf8");
1664
+ const parsed = JSON.parse(raw);
1665
+ if (parsed.version !== STORE_VERSION || !Array.isArray(parsed.documents)) {
1666
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1667
+ }
1668
+ return {
1669
+ version: STORE_VERSION,
1670
+ documents: parsed.documents,
1671
+ asyncParseJobs: Array.isArray(parsed.asyncParseJobs) ? parsed.asyncParseJobs : []
1672
+ };
1673
+ } catch (error) {
1674
+ if (isMissingFileError2(error)) {
1675
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1676
+ }
1677
+ throw error;
1678
+ }
1679
+ }
1680
+ async writeIndex(index) {
1681
+ await import_fs3.promises.mkdir(this.cacheDirectory, { recursive: true });
1682
+ await import_fs3.promises.writeFile(this.indexPath, JSON.stringify(index, null, 2));
1683
+ }
1684
+ };
1685
+ function validateLocalDocumentId(localDocumentId) {
1686
+ if (!LOCAL_DOCUMENT_ID_PATTERN.test(localDocumentId) || localDocumentId.includes("..") || import_path2.default.basename(localDocumentId) !== localDocumentId) {
1687
+ throw new Error(
1688
+ "Local Knowhere document ID must be a safe slug containing only letters, numbers, dots, underscores, or hyphens"
1689
+ );
1690
+ }
1691
+ return localDocumentId;
1692
+ }
1693
+ function isPathInsideDirectory(targetPath, parentDirectory) {
1694
+ const relativePath = import_path2.default.relative(parentDirectory, targetPath);
1695
+ return relativePath.length === 0 || !relativePath.startsWith("..") && !import_path2.default.isAbsolute(relativePath);
1696
+ }
1697
+ function createLocalDocumentId(result) {
1698
+ const hash = (0, import_crypto.createHash)("sha256").update(result.jobId).update("\0").update(result.manifest.sourceFileName).digest("hex").slice(0, 16);
1699
+ return `local_${hash}`;
1700
+ }
1701
+ function countChunkTypes(result) {
1702
+ return result.chunks.reduce(
1703
+ (counts, chunk) => {
1704
+ counts[chunk.type] += 1;
1705
+ return counts;
1706
+ },
1707
+ { text: 0, image: 0, table: 0 }
1708
+ );
1709
+ }
1710
+ function toLocalKnowledgeDocument(stored) {
1711
+ return {
1712
+ localDocumentId: stored.localDocumentId,
1713
+ jobId: stored.jobId,
1714
+ documentId: stored.documentId,
1715
+ namespace: stored.namespace,
1716
+ sourceFileName: stored.sourceFileName,
1717
+ chunkCount: stored.chunkCount,
1718
+ typeCounts: stored.typeCounts,
1719
+ resultDirectoryPath: stored.resultDirectoryPath,
1720
+ createdAt: new Date(stored.createdAt),
1721
+ updatedAt: new Date(stored.updatedAt)
1722
+ };
1723
+ }
1724
+ function toLocalKnowledgeAsyncParseJob(stored) {
1725
+ return {
1726
+ ...stored,
1727
+ createdAt: new Date(stored.createdAt),
1728
+ updatedAt: new Date(stored.updatedAt)
1729
+ };
1730
+ }
1731
+ function isMissingFileError2(error) {
1732
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1733
+ }
1734
+
1735
+ // src/knowledge/knowledge.ts
1736
+ var DEFAULT_READ_LIMIT = 12;
1737
+ var MAX_READ_LIMIT = 40;
1738
+ var DEFAULT_GREP_LIMIT = 20;
1739
+ var MAX_GREP_LIMIT = 50;
1740
+ var DEFAULT_CONTEXT_CHARS = 80;
1741
+ var Knowledge = class _Knowledge {
1742
+ client;
1743
+ store;
1744
+ constructor(client, options) {
1745
+ this.client = client;
1746
+ this.store = new LocalKnowledgeStore(options?.cacheDirectory);
1747
+ }
1748
+ withCacheDirectory(cacheDirectory) {
1749
+ return new _Knowledge(this.client, { cacheDirectory });
1750
+ }
1751
+ async parse(params) {
1752
+ const result = await this.client.parse(params);
1753
+ const document = await this.store.saveResult(result, {
1754
+ localDocumentId: params.localDocumentId
1755
+ });
1756
+ return { document, result };
1757
+ }
1758
+ async startParse(params) {
1759
+ const job = await this.client.startParse(params);
1760
+ await this.store.saveAsyncParseJob({
1761
+ jobId: job.jobId,
1762
+ localDocumentId: params.localDocumentId
1763
+ });
1764
+ return {
1765
+ job,
1766
+ localDocumentId: params.localDocumentId
1767
+ };
1768
+ }
1769
+ async getJobStatus(jobId) {
1770
+ const job = await this.client.jobs.get(jobId);
1771
+ return {
1772
+ job,
1773
+ cache: await this.resolveAsyncCache(jobId, job.isDone, job.isFailed)
1774
+ };
1775
+ }
1776
+ async recoverPendingAsyncParseJobs() {
1777
+ const jobs = await this.store.listRecoverableAsyncParseJobs();
1778
+ const results = [];
1779
+ for (const job of jobs) {
1780
+ results.push(await this.getJobStatus(job.jobId));
1781
+ }
1782
+ return {
1783
+ checkedJobs: jobs.length,
1784
+ results
1785
+ };
1786
+ }
1787
+ async cacheJobResult(params) {
1788
+ const result = await this.client.jobs.load(params.jobId, {
1789
+ verifyChecksum: params.verifyChecksum
1790
+ });
1791
+ const document = await this.store.saveResult(result, {
1792
+ localDocumentId: params.localDocumentId
1793
+ });
1794
+ return { document, result };
1795
+ }
1796
+ async resolveAsyncCache(jobId, isDone, isFailed) {
1797
+ const trackedJob = await this.store.getAsyncParseJob(jobId);
1798
+ if (!trackedJob) {
1799
+ return { status: "untracked" };
1800
+ }
1801
+ if (trackedJob.cacheStatus === "cached" && trackedJob.localDocumentId) {
1802
+ const existingDocument = await this.store.getDocument(trackedJob.localDocumentId);
1803
+ if (existingDocument) {
1804
+ return {
1805
+ status: "already_cached",
1806
+ localDocumentId: trackedJob.localDocumentId,
1807
+ document: existingDocument
1808
+ };
1809
+ }
1810
+ }
1811
+ if (isFailed) {
1812
+ await this.store.updateAsyncParseJobCacheStatus({
1813
+ jobId,
1814
+ cacheStatus: "failed"
1815
+ });
1816
+ return {
1817
+ status: "failed",
1818
+ localDocumentId: trackedJob.localDocumentId
1819
+ };
1820
+ }
1821
+ if (!isDone) {
1822
+ return {
1823
+ status: "pending",
1824
+ localDocumentId: trackedJob.localDocumentId
1825
+ };
1826
+ }
1827
+ try {
1828
+ const cached = await this.cacheJobResult({
1829
+ jobId,
1830
+ localDocumentId: trackedJob.localDocumentId
1831
+ });
1832
+ return {
1833
+ status: "cached",
1834
+ localDocumentId: cached.document.localDocumentId,
1835
+ document: cached.document
1836
+ };
1837
+ } catch (error) {
1838
+ await this.store.updateAsyncParseJobCacheStatus({
1839
+ jobId,
1840
+ cacheStatus: "not_available"
1841
+ });
1842
+ return {
1843
+ status: "not_available",
1844
+ localDocumentId: trackedJob.localDocumentId,
1845
+ error: error instanceof Error ? error.message : String(error)
1846
+ };
1847
+ }
1848
+ }
1849
+ async listDocuments() {
1850
+ return this.store.listDocuments();
1851
+ }
1852
+ async getDocumentOutline(localDocumentId) {
1853
+ const { document, result } = await this.store.loadResult(localDocumentId);
1854
+ const chunks = indexChunks(result);
1855
+ const sections = buildFlatSections(result, chunks);
1856
+ const sectionTree = result.docNav?.sections && result.docNav.sections.length > 0 ? result.docNav.sections.map(
1857
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1858
+ ) : nestSections(sections);
1859
+ return {
1860
+ document,
1861
+ totalChunks: chunks.length,
1862
+ typeCounts: document.typeCounts,
1863
+ sections,
1864
+ sectionTree
1865
+ };
1866
+ }
1867
+ async readChunks(params) {
1868
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1869
+ const limit = clampLimit(params.limit, DEFAULT_READ_LIMIT, MAX_READ_LIMIT);
1870
+ const chunks = indexChunks(result).filter((chunk) => matchesReadScope(chunk, params));
1871
+ const selected = selectReadWindow(chunks, params, limit);
1872
+ const lastSelected = selected[selected.length - 1];
1873
+ const nextChunk = lastSelected && selected.length < chunks.length ? chunks[chunks.indexOf(lastSelected) + 1]?.position : void 0;
1874
+ return {
1875
+ document,
1876
+ chunks: selected.map(toReadChunk),
1877
+ nextChunk
1878
+ };
1879
+ }
1880
+ async grepChunks(params) {
1881
+ if (!params.pattern) {
1882
+ throw new ValidationError("pattern is required");
1883
+ }
1884
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1885
+ const maxResults = clampLimit(params.maxResults, DEFAULT_GREP_LIMIT, MAX_GREP_LIMIT);
1886
+ const contextChars = params.contextChars ?? DEFAULT_CONTEXT_CHARS;
1887
+ const matcher = createMatcher(params);
1888
+ const scopedChunks = indexChunks(result).filter((chunk) => matchesGrepScope(chunk, params));
1889
+ const matches = [];
1890
+ let scannedChunks = 0;
1891
+ for (const chunk of scopedChunks) {
1892
+ scannedChunks += 1;
1893
+ const chunkMatches = matcher(chunk.content);
1894
+ for (const match of chunkMatches) {
1895
+ matches.push({
1896
+ position: chunk.position,
1897
+ chunkId: chunk.chunkId,
1898
+ chunkType: chunk.chunkType,
1899
+ sectionPath: chunk.sectionPath,
1900
+ sourceChunkPath: chunk.sourceChunkPath,
1901
+ filePath: chunk.filePath,
1902
+ startOffset: match.startOffset,
1903
+ endOffset: match.endOffset,
1904
+ snippet: buildSnippet(chunk.content, match.startOffset, match.endOffset, contextChars)
1905
+ });
1906
+ if (matches.length >= maxResults) {
1907
+ return { document, matches, scannedChunks, truncated: true };
1908
+ }
1909
+ }
1910
+ }
1911
+ return { document, matches, scannedChunks, truncated: false };
1912
+ }
1913
+ async search(params) {
1914
+ const localDocuments = await this.resolveSearchDocuments(params.localDocumentIds);
1915
+ const rawResponse = await this.client.retrieval.query({
1916
+ query: params.query,
1917
+ namespace: params.namespace,
1918
+ topK: params.topK,
1919
+ useAgentic: params.useAgentic ?? false
1920
+ });
1921
+ const documentByServerId = new Map(
1922
+ localDocuments.filter((document) => document.documentId).map((document) => [document.documentId, document])
1923
+ );
1924
+ return {
1925
+ namespace: rawResponse.namespace,
1926
+ query: rawResponse.query,
1927
+ evidenceText: rawResponse.evidenceText,
1928
+ references: [
1929
+ ...rawResponse.referencedChunks.map(
1930
+ (reference) => ({
1931
+ localDocumentId: reference.documentId ? documentByServerId.get(reference.documentId)?.localDocumentId : void 0,
1932
+ documentId: reference.documentId,
1933
+ chunkId: reference.chunkId,
1934
+ sectionPath: reference.sectionPath,
1935
+ chunkType: reference.chunkType
1936
+ })
1937
+ ),
1938
+ ...rawResponse.results.map((result) => toResultReference(result, documentByServerId))
1939
+ ],
1940
+ results: rawResponse.results.map(
1941
+ (result) => toRemoteSearchResult(result, documentByServerId)
1942
+ ),
1943
+ rawResponse
1944
+ };
1945
+ }
1946
+ async resolveSearchDocuments(localDocumentIds) {
1947
+ const documents = await this.store.listDocuments();
1948
+ if (!localDocumentIds || localDocumentIds.length === 0) {
1949
+ return documents;
1950
+ }
1951
+ const requested = new Set(localDocumentIds);
1952
+ return documents.filter((document) => requested.has(document.localDocumentId));
1953
+ }
1954
+ };
1955
+ function indexChunks(result) {
1956
+ return result.chunks.map((chunk, index) => {
1957
+ const filePath = getChunkFilePath2(chunk);
1958
+ return {
1959
+ source: chunk,
1960
+ position: index + 1,
1961
+ chunkId: chunk.chunkId,
1962
+ chunkType: chunk.type,
1963
+ content: chunk.content,
1964
+ sectionPath: normalizeSectionPath(chunk.path, result.manifest.sourceFileName),
1965
+ sourceChunkPath: chunk.path,
1966
+ filePath,
1967
+ metadata: chunk.metadata
1968
+ };
1969
+ });
1970
+ }
1971
+ function getChunkFilePath2(chunk) {
1972
+ if (chunk.type === "image" || chunk.type === "table") {
1973
+ return chunk.filePath;
1974
+ }
1975
+ const filePath = chunk.metadata.filePath;
1976
+ return typeof filePath === "string" ? filePath : void 0;
1977
+ }
1978
+ function normalizeSectionPath(path3, sourceFileName) {
1979
+ if (!path3) {
1980
+ return "";
1981
+ }
1982
+ if (path3.startsWith("images/") || path3.startsWith("tables/")) {
1983
+ return path3;
1984
+ }
1985
+ const parts = path3.split("/").filter(Boolean);
1986
+ if (sourceFileName) {
1987
+ const fileNameIndex = parts.indexOf(sourceFileName);
1988
+ if (fileNameIndex >= 0) {
1989
+ return parts.slice(fileNameIndex + 1).join(" / ") || sourceFileName;
1990
+ }
1991
+ }
1992
+ if (parts.length <= 1) {
1993
+ return parts[0] ?? "";
1994
+ }
1995
+ return parts.slice(1).join(" / ");
1996
+ }
1997
+ function buildFlatSections(result, chunks) {
1998
+ if (result.docNav?.sections && result.docNav.sections.length > 0) {
1999
+ return flattenSections(
2000
+ result.docNav.sections.map(
2001
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
2002
+ )
2003
+ );
2004
+ }
2005
+ const byPath = /* @__PURE__ */ new Map();
2006
+ for (const chunk of chunks) {
2007
+ const path3 = chunk.sectionPath || chunk.sourceChunkPath;
2008
+ const existing = byPath.get(path3);
2009
+ if (existing) {
2010
+ addChunkToSection(existing, chunk);
2011
+ } else {
2012
+ byPath.set(path3, createSectionFromChunk(path3, chunk));
2013
+ }
2014
+ }
2015
+ return [...byPath.values()].sort(compareSections);
2016
+ }
2017
+ function toKnowledgeSection(section, chunks, sourceFileName) {
2018
+ const sectionPath = normalizeSectionPath(section.path, sourceFileName);
2019
+ const scopedChunks = chunks.filter((chunk) => isInSection(chunk.sectionPath, sectionPath));
2020
+ const children = section.children.map(
2021
+ (child) => toKnowledgeSection(child, chunks, sourceFileName)
2022
+ );
2023
+ return {
2024
+ sectionPath,
2025
+ sectionTitle: section.title,
2026
+ sectionLevel: section.level,
2027
+ summary: section.summary,
2028
+ startChunk: minPosition(scopedChunks),
2029
+ endChunk: maxPosition(scopedChunks),
2030
+ chunkCount: scopedChunks.length,
2031
+ typeCounts: countIndexedTypes(scopedChunks),
2032
+ children
2033
+ };
2034
+ }
2035
+ function createSectionFromChunk(pathValue, chunk) {
2036
+ const parts = pathValue.split(" / ").filter(Boolean);
2037
+ return {
2038
+ sectionPath: pathValue,
2039
+ sectionTitle: parts[parts.length - 1] ?? pathValue,
2040
+ sectionLevel: Math.max(parts.length, 1),
2041
+ startChunk: chunk.position,
2042
+ endChunk: chunk.position,
2043
+ chunkCount: 1,
2044
+ typeCounts: { text: 0, image: 0, table: 0, [chunk.chunkType]: 1 },
2045
+ children: []
2046
+ };
2047
+ }
2048
+ function addChunkToSection(section, chunk) {
2049
+ section.startChunk = Math.min(section.startChunk ?? chunk.position, chunk.position);
2050
+ section.endChunk = Math.max(section.endChunk ?? chunk.position, chunk.position);
2051
+ section.chunkCount += 1;
2052
+ section.typeCounts[chunk.chunkType] += 1;
2053
+ }
2054
+ function flattenSections(sections) {
2055
+ return sections.flatMap((section) => [section, ...flattenSections(section.children)]);
2056
+ }
2057
+ function nestSections(sections) {
2058
+ const clonedSections = sections.map((section) => ({
2059
+ ...section,
2060
+ children: []
2061
+ }));
2062
+ const byPath = new Map(clonedSections.map((section) => [section.sectionPath, section]));
2063
+ const roots = [];
2064
+ for (const section of clonedSections) {
2065
+ const parentPath = getParentSectionPath(section.sectionPath);
2066
+ const parent = parentPath ? byPath.get(parentPath) : void 0;
2067
+ if (parent) {
2068
+ parent.children.push(section);
2069
+ } else {
2070
+ roots.push(section);
2071
+ }
2072
+ }
2073
+ return roots;
2074
+ }
2075
+ function getParentSectionPath(sectionPath) {
2076
+ const parts = sectionPath.split(" / ").filter(Boolean);
2077
+ if (parts.length <= 1) {
2078
+ return void 0;
2079
+ }
2080
+ return parts.slice(0, -1).join(" / ");
2081
+ }
2082
+ function compareSections(left, right) {
2083
+ return (left.startChunk ?? Number.MAX_SAFE_INTEGER) - (right.startChunk ?? Number.MAX_SAFE_INTEGER);
2084
+ }
2085
+ function minPosition(chunks) {
2086
+ if (chunks.length === 0) {
2087
+ return void 0;
2088
+ }
2089
+ return Math.min(...chunks.map((chunk) => chunk.position));
2090
+ }
2091
+ function maxPosition(chunks) {
2092
+ if (chunks.length === 0) {
2093
+ return void 0;
2094
+ }
2095
+ return Math.max(...chunks.map((chunk) => chunk.position));
2096
+ }
2097
+ function countIndexedTypes(chunks) {
2098
+ return chunks.reduce(
2099
+ (counts, chunk) => {
2100
+ counts[chunk.chunkType] += 1;
2101
+ return counts;
2102
+ },
2103
+ { text: 0, image: 0, table: 0 }
2104
+ );
2105
+ }
2106
+ function isInSection(chunkSectionPath, sectionPath) {
2107
+ return chunkSectionPath === sectionPath || chunkSectionPath.startsWith(`${sectionPath} / `);
2108
+ }
2109
+ function clampLimit(value, defaultValue, maxValue) {
2110
+ if (value === void 0) {
2111
+ return defaultValue;
2112
+ }
2113
+ return Math.min(Math.max(Math.floor(value), 1), maxValue);
2114
+ }
2115
+ function matchesReadScope(chunk, params) {
2116
+ if (params.chunkId && chunk.chunkId !== params.chunkId) {
2117
+ return false;
2118
+ }
2119
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2120
+ return false;
2121
+ }
2122
+ if (params.sectionPath && !isInSection(chunk.sectionPath, params.sectionPath)) {
2123
+ return false;
2124
+ }
2125
+ return true;
2126
+ }
2127
+ function selectReadWindow(chunks, params, limit) {
2128
+ if (params.chunkId) {
2129
+ return chunks.slice(0, limit);
2130
+ }
2131
+ const startChunk = params.startChunk ?? chunks[0]?.position ?? 1;
2132
+ const endChunk = params.endChunk ?? Number.MAX_SAFE_INTEGER;
2133
+ return chunks.filter((chunk) => chunk.position >= startChunk && chunk.position <= endChunk).slice(0, limit);
2134
+ }
2135
+ function toReadChunk(chunk) {
2136
+ return {
2137
+ position: chunk.position,
2138
+ chunkId: chunk.chunkId,
2139
+ chunkType: chunk.chunkType,
2140
+ content: chunk.content,
2141
+ sectionPath: chunk.sectionPath,
2142
+ sourceChunkPath: chunk.sourceChunkPath,
2143
+ filePath: chunk.filePath,
2144
+ metadata: chunk.metadata
2145
+ };
2146
+ }
2147
+ function matchesGrepScope(chunk, params) {
2148
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2149
+ return false;
2150
+ }
2151
+ if (params.sectionPathPrefix && !chunk.sectionPath.startsWith(params.sectionPathPrefix)) {
2152
+ return false;
2153
+ }
2154
+ return true;
2155
+ }
2156
+ function createMatcher(params) {
2157
+ if (params.isRegex) {
2158
+ const flags = params.isCaseSensitive ? "g" : "gi";
2159
+ const regex = new RegExp(params.pattern, flags);
2160
+ return (content) => {
2161
+ const matches = [];
2162
+ for (const match of content.matchAll(regex)) {
2163
+ const startOffset = match.index ?? 0;
2164
+ const text = match[0] ?? "";
2165
+ matches.push({ startOffset, endOffset: startOffset + text.length });
2166
+ if (text.length === 0) {
2167
+ break;
2168
+ }
2169
+ }
2170
+ return matches;
2171
+ };
2172
+ }
2173
+ const needle = params.isCaseSensitive ? params.pattern : params.pattern.toLowerCase();
2174
+ return (content) => {
2175
+ const haystack = params.isCaseSensitive ? content : content.toLowerCase();
2176
+ const matches = [];
2177
+ let index = haystack.indexOf(needle);
2178
+ while (index >= 0) {
2179
+ matches.push({ startOffset: index, endOffset: index + needle.length });
2180
+ index = haystack.indexOf(needle, index + Math.max(needle.length, 1));
2181
+ }
2182
+ return matches;
2183
+ };
2184
+ }
2185
+ function buildSnippet(content, startOffset, endOffset, contextChars) {
2186
+ const start = Math.max(0, startOffset - contextChars);
2187
+ const end = Math.min(content.length, endOffset + contextChars);
2188
+ return content.slice(start, end);
2189
+ }
2190
+ function toResultReference(result, documentByServerId) {
2191
+ const documentId = result.source.documentId ?? void 0;
2192
+ return {
2193
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2194
+ documentId,
2195
+ sectionPath: result.source.sectionPath ?? void 0,
2196
+ chunkType: result.chunkType,
2197
+ score: result.score
2198
+ };
2199
+ }
2200
+ function toRemoteSearchResult(result, documentByServerId) {
2201
+ const documentId = result.source.documentId ?? void 0;
2202
+ return {
2203
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2204
+ documentId,
2205
+ chunkType: result.chunkType,
2206
+ content: result.content,
2207
+ score: result.score,
2208
+ sectionPath: result.source.sectionPath ?? void 0,
2209
+ sourceFileName: result.source.sourceFileName ?? void 0
2210
+ };
2211
+ }
2212
+
1251
2213
  // src/client.ts
1252
2214
  function inferFileName(file, explicitFileName) {
1253
2215
  if (explicitFileName) {
1254
2216
  return explicitFileName;
1255
2217
  }
1256
2218
  if (typeof file === "string") {
1257
- return import_path2.default.basename(file);
2219
+ return import_path3.default.basename(file);
1258
2220
  }
1259
2221
  if (isReadStream2(file) && typeof file.path === "string") {
1260
- return import_path2.default.basename(file.path);
2222
+ return import_path3.default.basename(file.path);
1261
2223
  }
1262
2224
  return void 0;
1263
2225
  }
1264
2226
  function isReadStream2(file) {
1265
2227
  return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
1266
2228
  }
2229
+ function buildParsingParams(params) {
2230
+ const parsingParams = {
2231
+ model: params.model,
2232
+ ocrEnabled: params.ocr,
2233
+ docType: params.docType,
2234
+ smartTitleParse: params.smartTitleParse,
2235
+ summaryImage: params.summaryImage,
2236
+ summaryTable: params.summaryTable,
2237
+ summaryTxt: params.summaryTxt,
2238
+ addFragDesc: params.addFragDesc,
2239
+ kbDir: params.kbDir
2240
+ };
2241
+ Object.keys(parsingParams).forEach((key) => {
2242
+ if (parsingParams[key] === void 0) {
2243
+ delete parsingParams[key];
2244
+ }
2245
+ });
2246
+ return Object.keys(parsingParams).length > 0 ? parsingParams : void 0;
2247
+ }
1267
2248
  var Knowhere = class {
1268
2249
  /** Jobs resource for low-level API */
1269
2250
  jobs;
@@ -1271,21 +2252,25 @@ var Knowhere = class {
1271
2252
  retrieval;
1272
2253
  /** Documents resource for canonical document lifecycle operations */
1273
2254
  documents;
2255
+ /** Client-side local knowledge tools over parsed Knowhere results */
2256
+ knowledge;
1274
2257
  httpClient;
1275
2258
  /**
1276
2259
  * Create a new Knowhere client
1277
2260
  */
1278
2261
  constructor(options = {}) {
1279
2262
  const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
1280
- if (!apiKey) {
2263
+ const authTokenProvider = apiKey ? void 0 : options.authTokenProvider;
2264
+ if (!apiKey && !authTokenProvider) {
1281
2265
  throw new ValidationError(
1282
- `API key is required. Provide it via options.apiKey or ${ENV.API_KEY} environment variable.`
2266
+ `API authentication is required. Provide it via options.apiKey, options.authTokenProvider, or ${ENV.API_KEY} environment variable.`
1283
2267
  );
1284
2268
  }
1285
2269
  const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
1286
2270
  this.httpClient = new HttpClient({
1287
2271
  baseURL,
1288
2272
  apiKey,
2273
+ authTokenProvider,
1289
2274
  timeout: options.timeout,
1290
2275
  uploadTimeout: options.uploadTimeout,
1291
2276
  maxRetries: options.maxRetries,
@@ -1296,6 +2281,7 @@ var Knowhere = class {
1296
2281
  this.jobs = new Jobs(this.httpClient);
1297
2282
  this.retrieval = new Retrieval(this.httpClient);
1298
2283
  this.documents = new Documents(this.httpClient);
2284
+ this.knowledge = new Knowledge(this);
1299
2285
  }
1300
2286
  /**
1301
2287
  * High-level API: Parse a document and return structured results
@@ -1318,6 +2304,24 @@ var Knowhere = class {
1318
2304
  * ```
1319
2305
  */
1320
2306
  async parse(params) {
2307
+ const job = await this.startParse(params);
2308
+ const jobResult = await this.jobs.wait(job.jobId, {
2309
+ pollInterval: params.pollInterval,
2310
+ pollTimeout: params.pollTimeout,
2311
+ onProgress: params.onPollProgress,
2312
+ signal: params.signal
2313
+ });
2314
+ const result = await this.jobs.load(jobResult, {
2315
+ verifyChecksum: params.verifyChecksum
2316
+ });
2317
+ return enrichParseResult(result, jobResult);
2318
+ }
2319
+ /**
2320
+ * Start a parse job and return immediately after the URL job is created or
2321
+ * the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
2322
+ * to inspect completion and load results later.
2323
+ */
2324
+ async startParse(params) {
1321
2325
  if (!params.url && !params.file) {
1322
2326
  throw new ValidationError("Either url or file must be provided");
1323
2327
  }
@@ -1331,22 +2335,6 @@ var Knowhere = class {
1331
2335
  "fileName is required when file is a Buffer, Uint8Array, or stream without a path."
1332
2336
  );
1333
2337
  }
1334
- const parsingParams = {
1335
- model: params.model,
1336
- ocrEnabled: params.ocr,
1337
- docType: params.docType,
1338
- smartTitleParse: params.smartTitleParse,
1339
- summaryImage: params.summaryImage,
1340
- summaryTable: params.summaryTable,
1341
- summaryTxt: params.summaryTxt,
1342
- addFragDesc: params.addFragDesc,
1343
- kbDir: params.kbDir
1344
- };
1345
- Object.keys(parsingParams).forEach((key) => {
1346
- if (parsingParams[key] === void 0) {
1347
- delete parsingParams[key];
1348
- }
1349
- });
1350
2338
  const webhook = params.webhook;
1351
2339
  const job = await this.jobs.create({
1352
2340
  sourceType,
@@ -1355,7 +2343,7 @@ var Knowhere = class {
1355
2343
  dataId: params.dataId,
1356
2344
  namespace: params.namespace,
1357
2345
  documentId: params.documentId,
1358
- parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
2346
+ parsingParams: buildParsingParams(params),
1359
2347
  webhook
1360
2348
  });
1361
2349
  if (params.file) {
@@ -1365,16 +2353,7 @@ var Knowhere = class {
1365
2353
  signal: params.signal
1366
2354
  });
1367
2355
  }
1368
- const jobResult = await this.jobs.wait(job.jobId, {
1369
- pollInterval: params.pollInterval,
1370
- pollTimeout: params.pollTimeout,
1371
- onProgress: params.onPollProgress,
1372
- signal: params.signal
1373
- });
1374
- const result = await this.jobs.load(jobResult, {
1375
- verifyChecksum: params.verifyChecksum
1376
- });
1377
- return enrichParseResult(result, jobResult);
2356
+ return job;
1378
2357
  }
1379
2358
  };
1380
2359
  // Annotate the CommonJS export names for ESM import in node:
@@ -1392,6 +2371,8 @@ var Knowhere = class {
1392
2371
  Jobs,
1393
2372
  Knowhere,
1394
2373
  KnowhereError,
2374
+ Knowledge,
2375
+ LocalKnowledgeStore,
1395
2376
  NetworkError,
1396
2377
  NotFoundError,
1397
2378
  PaymentRequiredError,