@ontos-ai/knowhere-sdk 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,8 +1,10 @@
1
1
  // src/client.ts
2
- import path from "path";
2
+ import path2 from "path";
3
3
 
4
4
  // src/lib/http-client.ts
5
- import axios from "axios";
5
+ import axios, {
6
+ AxiosHeaders
7
+ } from "axios";
6
8
 
7
9
  // src/version.ts
8
10
  var VERSION = "0.1.0";
@@ -41,6 +43,7 @@ var NetworkError = class extends KnowhereError {
41
43
  this.cause = cause;
42
44
  this.name = "NetworkError";
43
45
  }
46
+ cause;
44
47
  };
45
48
  var TimeoutError = class extends NetworkError {
46
49
  constructor(message = "Request timed out") {
@@ -54,6 +57,7 @@ var PollingTimeoutError = class extends KnowhereError {
54
57
  this.elapsedMs = elapsedMs;
55
58
  this.name = "PollingTimeoutError";
56
59
  }
60
+ elapsedMs;
57
61
  };
58
62
  var ChecksumError = class extends KnowhereError {
59
63
  constructor(message = "Checksum verification failed", expected, actual) {
@@ -62,6 +66,8 @@ var ChecksumError = class extends KnowhereError {
62
66
  this.actual = actual;
63
67
  this.name = "ChecksumError";
64
68
  }
69
+ expected;
70
+ actual;
65
71
  };
66
72
  var ValidationError = class extends KnowhereError {
67
73
  constructor(message) {
@@ -87,6 +93,11 @@ var APIError = class extends KnowhereError {
87
93
  this.body = body;
88
94
  this.name = "APIError";
89
95
  }
96
+ statusCode;
97
+ code;
98
+ requestId;
99
+ details;
100
+ body;
90
101
  };
91
102
  var BadRequestError = class extends APIError {
92
103
  constructor(message, code, requestId, details, body) {
@@ -130,6 +141,7 @@ var RateLimitError = class extends APIError {
130
141
  this.retryAfter = retryAfter;
131
142
  this.name = "RateLimitError";
132
143
  }
144
+ retryAfter;
133
145
  };
134
146
  var InternalServerError = class extends APIError {
135
147
  constructor(message = "Internal server error", code, requestId, details, body) {
@@ -185,11 +197,13 @@ var JobFailedError = class extends KnowhereError {
185
197
  this.jobResult = jobResult;
186
198
  this.name = "JobFailedError";
187
199
  }
200
+ code;
201
+ jobResult;
188
202
  };
189
203
 
190
204
  // src/lib/utils.ts
191
205
  function sleep(ms) {
192
- return new Promise((resolve) => setTimeout(resolve, ms));
206
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
193
207
  }
194
208
  function snakeToCamel(str) {
195
209
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
@@ -295,8 +309,8 @@ function enrichParseResult(parseResult2, scope) {
295
309
  }
296
310
  return parseResult2;
297
311
  }
298
- function sanitizePath(path2) {
299
- let sanitized = path2.replace(/^\/+/, "");
312
+ function sanitizePath(path3) {
313
+ let sanitized = path3.replace(/^\/+/, "");
300
314
  sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
301
315
  sanitized = sanitized.replace(/\\/g, "/");
302
316
  return sanitized;
@@ -410,7 +424,7 @@ async function withRetry(fn, maxRetries, onRetry) {
410
424
  if (onRetry) {
411
425
  onRetry(attempt + 1, error);
412
426
  }
413
- await new Promise((resolve) => setTimeout(resolve, delay));
427
+ await new Promise((resolve2) => setTimeout(resolve2, delay));
414
428
  }
415
429
  }
416
430
  throw lastError;
@@ -423,17 +437,19 @@ var HttpClient = class {
423
437
  uploadTimeout;
424
438
  httpAgent;
425
439
  httpsAgent;
440
+ authTokenProvider;
426
441
  constructor(options) {
427
442
  this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
428
443
  this.uploadTimeout = options.uploadTimeout ?? 6e5;
429
444
  this.httpAgent = options.httpAgent;
430
445
  this.httpsAgent = options.httpsAgent;
446
+ this.authTokenProvider = options.authTokenProvider;
431
447
  this.axios = axios.create({
432
448
  baseURL: options.baseURL,
433
449
  timeout: options.timeout ?? DEFAULT_TIMEOUT,
434
450
  headers: {
435
451
  "User-Agent": `knowhere-node-sdk/${VERSION}`,
436
- Authorization: `Bearer ${options.apiKey}`,
452
+ ...options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : {},
437
453
  "Content-Type": "application/json",
438
454
  ...options.defaultHeaders
439
455
  },
@@ -445,6 +461,9 @@ var HttpClient = class {
445
461
  setupInterceptors() {
446
462
  this.axios.interceptors.request.use(
447
463
  (config) => {
464
+ if (this.authTokenProvider) {
465
+ return this.attachDynamicAuthorization(config);
466
+ }
448
467
  if (config.data && typeof config.data === "object") {
449
468
  config.data = keysToSnake(config.data);
450
469
  }
@@ -468,6 +487,19 @@ var HttpClient = class {
468
487
  }
469
488
  );
470
489
  }
490
+ async attachDynamicAuthorization(config) {
491
+ const token = await this.authTokenProvider?.();
492
+ if (!token) {
493
+ throw new ValidationError("Authentication token provider returned an empty token");
494
+ }
495
+ const headers = AxiosHeaders.from(config.headers);
496
+ headers.set("Authorization", `Bearer ${token}`);
497
+ config.headers = headers;
498
+ if (config.data && typeof config.data === "object") {
499
+ config.data = keysToSnake(config.data);
500
+ }
501
+ return config;
502
+ }
471
503
  handleError(error) {
472
504
  if (!error.response) {
473
505
  if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
@@ -752,11 +784,14 @@ async function pollJobStatus(httpClient, jobId, options) {
752
784
  // src/lib/result-parser.ts
753
785
  import JSZip from "jszip";
754
786
  import { promises as fs2 } from "fs";
755
- import { join, dirname } from "path";
787
+ import { join, dirname, resolve, sep } from "path";
756
788
  async function parseResult(httpClient, resultUrl, options) {
757
789
  const zipBuffer = await httpClient.download(resultUrl);
758
790
  if (options?.verifyChecksum !== false) {
759
791
  }
792
+ return parseResultBuffer(zipBuffer);
793
+ }
794
+ async function parseResultBuffer(zipBuffer) {
760
795
  const zip = await JSZip.loadAsync(zipBuffer);
761
796
  const manifestFile = zip.file("manifest.json");
762
797
  if (!manifestFile) {
@@ -821,7 +856,7 @@ async function parseResult(httpClient, resultUrl, options) {
821
856
  if (hierarchyViewFile) {
822
857
  hierarchyViewHtml = await hierarchyViewFile.async("string");
823
858
  }
824
- const result = {
859
+ return createParseResult({
825
860
  manifest,
826
861
  chunks,
827
862
  docNav,
@@ -832,6 +867,136 @@ async function parseResult(httpClient, resultUrl, options) {
832
867
  hierarchy,
833
868
  tocHierarchies,
834
869
  kbCsv,
870
+ hierarchyViewHtml
871
+ });
872
+ }
873
+ async function parseResultDirectory(directory) {
874
+ const manifestContent = await readRequiredTextFile(directory, "manifest.json");
875
+ let manifest = JSON.parse(manifestContent);
876
+ manifest = keysToCamel(manifest);
877
+ manifest = parseDates(manifest);
878
+ const chunksContent = await readRequiredTextFile(directory, "chunks.json");
879
+ let chunksData = JSON.parse(chunksContent);
880
+ chunksData = keysToCamel(chunksData);
881
+ const rawChunks = extractChunks(chunksData);
882
+ const chunks = [];
883
+ for (const chunkData of rawChunks) {
884
+ chunks.push(await processDirectoryChunk(directory, chunkData));
885
+ }
886
+ const fullMarkdown = await readOptionalTextFile(directory, "full.md");
887
+ const rawDocNav = await readOptionalJsonFile(directory, "doc_nav.json");
888
+ const docNav = rawDocNav === void 0 ? void 0 : keysToCamel(rawDocNav);
889
+ const hierarchy = await readOptionalJsonFile(directory, "hierarchy.json");
890
+ const rawChunksSlim = await readOptionalJsonFile(directory, "chunks_slim.json");
891
+ const chunksSlim = rawChunksSlim === void 0 ? void 0 : extractSlimChunks(keysToCamel(rawChunksSlim));
892
+ const rawTocHierarchies = await readOptionalJsonFile(directory, "toc_hierarchies.json");
893
+ const tocHierarchies = rawTocHierarchies === void 0 ? void 0 : keysToCamel(rawTocHierarchies);
894
+ const kbCsv = await readOptionalTextFile(directory, "kb.csv");
895
+ const hierarchyViewHtml = await readOptionalTextFile(directory, "hierarchy_view.html");
896
+ return createParseResult({
897
+ manifest,
898
+ chunks,
899
+ docNav,
900
+ fullMarkdown,
901
+ rawZip: Buffer.alloc(0),
902
+ chunksSlim,
903
+ hierarchy,
904
+ tocHierarchies,
905
+ kbCsv,
906
+ hierarchyViewHtml
907
+ });
908
+ }
909
+ async function saveExpandedParseResult(result, directory) {
910
+ if (result.rawZip.length > 0) {
911
+ const didExtractZip = await tryExtractRawZip(result.rawZip, directory);
912
+ if (didExtractZip) {
913
+ return directory;
914
+ }
915
+ }
916
+ await fs2.mkdir(directory, { recursive: true });
917
+ await fs2.writeFile(join(directory, "manifest.json"), JSON.stringify(result.manifest, null, 2));
918
+ if (result.docNav) {
919
+ await fs2.writeFile(join(directory, "doc_nav.json"), JSON.stringify(result.docNav, null, 2));
920
+ }
921
+ await fs2.writeFile(
922
+ join(directory, "chunks.json"),
923
+ JSON.stringify(serializeChunks(result.chunks), null, 2)
924
+ );
925
+ if (result.chunksSlim) {
926
+ await fs2.writeFile(
927
+ join(directory, "chunks_slim.json"),
928
+ JSON.stringify({ chunks: result.chunksSlim }, null, 2)
929
+ );
930
+ }
931
+ if (result.fullMarkdown) {
932
+ await fs2.writeFile(join(directory, "full.md"), result.fullMarkdown);
933
+ }
934
+ if (result.hierarchy) {
935
+ await fs2.writeFile(
936
+ join(directory, "hierarchy.json"),
937
+ JSON.stringify(result.hierarchy, null, 2)
938
+ );
939
+ }
940
+ if (result.tocHierarchies) {
941
+ await fs2.writeFile(
942
+ join(directory, "toc_hierarchies.json"),
943
+ JSON.stringify(result.tocHierarchies, null, 2)
944
+ );
945
+ }
946
+ if (result.kbCsv) {
947
+ await fs2.writeFile(join(directory, "kb.csv"), result.kbCsv);
948
+ }
949
+ if (result.hierarchyViewHtml) {
950
+ await fs2.writeFile(join(directory, "hierarchy_view.html"), result.hierarchyViewHtml);
951
+ }
952
+ for (const imageChunk of result.imageChunks) {
953
+ await writeBinaryAsset(directory, imageChunk.filePath, imageChunk.data);
954
+ }
955
+ for (const tableChunk of result.tableChunks) {
956
+ await writeTextAsset(directory, tableChunk.filePath, tableChunk.html);
957
+ }
958
+ return directory;
959
+ }
960
+ async function tryExtractRawZip(zipBuffer, directory) {
961
+ try {
962
+ const zip = await JSZip.loadAsync(zipBuffer);
963
+ await fs2.mkdir(directory, { recursive: true });
964
+ for (const entry of Object.values(zip.files)) {
965
+ if (entry.dir || entry.name === "result.zip") {
966
+ continue;
967
+ }
968
+ const outputPath = resolveAssetPath(directory, entry.name);
969
+ await fs2.mkdir(dirname(outputPath), { recursive: true });
970
+ await fs2.writeFile(outputPath, await entry.async("nodebuffer"));
971
+ }
972
+ return true;
973
+ } catch {
974
+ return false;
975
+ }
976
+ }
977
+ function createParseResult(parts) {
978
+ const {
979
+ manifest,
980
+ chunks,
981
+ docNav,
982
+ fullMarkdown,
983
+ rawZip,
984
+ chunksSlim,
985
+ hierarchy,
986
+ tocHierarchies,
987
+ kbCsv,
988
+ hierarchyViewHtml
989
+ } = parts;
990
+ return {
991
+ manifest,
992
+ chunks,
993
+ docNav,
994
+ fullMarkdown,
995
+ rawZip,
996
+ chunksSlim,
997
+ hierarchy,
998
+ tocHierarchies,
999
+ kbCsv,
835
1000
  hierarchyViewHtml,
836
1001
  get textChunks() {
837
1002
  return chunks.filter((c) => c.type === "text");
@@ -888,11 +1053,10 @@ async function parseResult(httpClient, resultUrl, options) {
888
1053
  for (const tableChunk of this.tableChunks) {
889
1054
  await tableChunk.save(directory);
890
1055
  }
891
- await fs2.writeFile(join(directory, "result.zip"), zipBuffer);
1056
+ await fs2.writeFile(join(directory, "result.zip"), rawZip);
892
1057
  return directory;
893
1058
  }
894
1059
  };
895
- return result;
896
1060
  }
897
1061
  function extractChunks(payload) {
898
1062
  if (Array.isArray(payload)) {
@@ -925,6 +1089,37 @@ function buildTextChunk(chunkData) {
925
1089
  metadata: chunkData.metadata ?? {}
926
1090
  };
927
1091
  }
1092
+ function buildImageChunk(chunkData, filePath, imageBuffer) {
1093
+ return {
1094
+ chunkId: chunkData.chunkId ?? "",
1095
+ type: "image",
1096
+ content: chunkData.content ?? "",
1097
+ path: chunkData.path ?? "",
1098
+ filePath,
1099
+ data: imageBuffer,
1100
+ metadata: chunkData.metadata ?? {},
1101
+ get format() {
1102
+ return getFileExtension(this.filePath);
1103
+ },
1104
+ async save(directory) {
1105
+ return writeBinaryAsset(directory, this.filePath, this.data);
1106
+ }
1107
+ };
1108
+ }
1109
+ function buildTableChunk(chunkData, filePath, html) {
1110
+ return {
1111
+ chunkId: chunkData.chunkId ?? "",
1112
+ type: "table",
1113
+ content: chunkData.content ?? "",
1114
+ path: chunkData.path ?? "",
1115
+ filePath,
1116
+ html,
1117
+ metadata: chunkData.metadata ?? {},
1118
+ async save(directory) {
1119
+ return writeTextAsset(directory, this.filePath, this.html);
1120
+ }
1121
+ };
1122
+ }
928
1123
  async function processChunk(zip, chunkData) {
929
1124
  if (chunkData.type === "text") {
930
1125
  return buildTextChunk(chunkData);
@@ -940,26 +1135,7 @@ async function processChunk(zip, chunkData) {
940
1135
  throw new KnowhereError(`Image file not found: ${filePath}`);
941
1136
  }
942
1137
  const imageBuffer = await imageFile.async("nodebuffer");
943
- const enrichedChunk = {
944
- chunkId: chunkData.chunkId ?? "",
945
- type: "image",
946
- content: chunkData.content ?? "",
947
- path: chunkData.path ?? "",
948
- filePath,
949
- data: imageBuffer,
950
- metadata: chunkData.metadata ?? {},
951
- get format() {
952
- return getFileExtension(this.filePath);
953
- },
954
- async save(directory) {
955
- const outputPath = join(directory, sanitizePath(this.filePath));
956
- const outputDir = dirname(outputPath);
957
- await fs2.mkdir(outputDir, { recursive: true });
958
- await fs2.writeFile(outputPath, this.data);
959
- return outputPath;
960
- }
961
- };
962
- return enrichedChunk;
1138
+ return buildImageChunk(chunkData, filePath, imageBuffer);
963
1139
  }
964
1140
  if (chunkData.type === "table") {
965
1141
  const filePath = getChunkFilePath(chunkData);
@@ -972,26 +1148,112 @@ async function processChunk(zip, chunkData) {
972
1148
  throw new KnowhereError(`Table file not found: ${filePath}`);
973
1149
  }
974
1150
  const html = await htmlFile.async("string");
975
- const enrichedChunk = {
976
- chunkId: chunkData.chunkId ?? "",
977
- type: "table",
978
- content: chunkData.content ?? "",
979
- path: chunkData.path ?? "",
980
- filePath,
981
- html,
982
- metadata: chunkData.metadata ?? {},
983
- async save(directory) {
984
- const outputPath = join(directory, sanitizePath(this.filePath));
985
- const outputDir = dirname(outputPath);
986
- await fs2.mkdir(outputDir, { recursive: true });
987
- await fs2.writeFile(outputPath, this.html);
988
- return outputPath;
1151
+ return buildTableChunk(chunkData, filePath, html);
1152
+ }
1153
+ return buildTextChunk(chunkData);
1154
+ }
1155
+ async function processDirectoryChunk(directory, chunkData) {
1156
+ if (chunkData.type === "text") {
1157
+ return buildTextChunk(chunkData);
1158
+ }
1159
+ if (chunkData.type === "image") {
1160
+ const filePath = getChunkFilePath(chunkData);
1161
+ if (!filePath) {
1162
+ throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1163
+ }
1164
+ try {
1165
+ const imageBuffer = await fs2.readFile(resolveAssetPath(directory, filePath));
1166
+ return buildImageChunk(chunkData, filePath, imageBuffer);
1167
+ } catch (error) {
1168
+ if (isMissingFileError(error)) {
1169
+ throw new KnowhereError(`Image file not found: ${filePath}`);
989
1170
  }
990
- };
991
- return enrichedChunk;
1171
+ throw error;
1172
+ }
1173
+ }
1174
+ if (chunkData.type === "table") {
1175
+ const filePath = getChunkFilePath(chunkData);
1176
+ if (!filePath) {
1177
+ throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1178
+ }
1179
+ try {
1180
+ const html = await fs2.readFile(resolveAssetPath(directory, filePath), "utf8");
1181
+ return buildTableChunk(chunkData, filePath, html);
1182
+ } catch (error) {
1183
+ if (isMissingFileError(error)) {
1184
+ throw new KnowhereError(`Table file not found: ${filePath}`);
1185
+ }
1186
+ throw error;
1187
+ }
992
1188
  }
993
1189
  return buildTextChunk(chunkData);
994
1190
  }
1191
+ function serializeChunks(chunks) {
1192
+ return {
1193
+ chunks: chunks.map((chunk) => {
1194
+ const rawChunk = {
1195
+ chunkId: chunk.chunkId,
1196
+ type: chunk.type,
1197
+ content: chunk.content,
1198
+ path: chunk.path,
1199
+ metadata: chunk.metadata
1200
+ };
1201
+ if (chunk.type === "image" || chunk.type === "table") {
1202
+ rawChunk.filePath = chunk.filePath;
1203
+ }
1204
+ return rawChunk;
1205
+ })
1206
+ };
1207
+ }
1208
+ async function readRequiredTextFile(directory, fileName) {
1209
+ try {
1210
+ return await fs2.readFile(join(directory, fileName), "utf8");
1211
+ } catch (error) {
1212
+ if (isMissingFileError(error)) {
1213
+ throw new KnowhereError(`${fileName} not found in result directory`);
1214
+ }
1215
+ throw error;
1216
+ }
1217
+ }
1218
+ async function readOptionalTextFile(directory, fileName) {
1219
+ try {
1220
+ return await fs2.readFile(join(directory, fileName), "utf8");
1221
+ } catch (error) {
1222
+ if (isMissingFileError(error)) {
1223
+ return void 0;
1224
+ }
1225
+ throw error;
1226
+ }
1227
+ }
1228
+ async function readOptionalJsonFile(directory, fileName) {
1229
+ const content = await readOptionalTextFile(directory, fileName);
1230
+ return content === void 0 ? void 0 : JSON.parse(content);
1231
+ }
1232
+ async function writeBinaryAsset(directory, filePath, data) {
1233
+ const outputPath = resolveAssetPath(directory, filePath);
1234
+ const outputDir = dirname(outputPath);
1235
+ await fs2.mkdir(outputDir, { recursive: true });
1236
+ await fs2.writeFile(outputPath, data);
1237
+ return outputPath;
1238
+ }
1239
+ async function writeTextAsset(directory, filePath, text) {
1240
+ const outputPath = resolveAssetPath(directory, filePath);
1241
+ const outputDir = dirname(outputPath);
1242
+ await fs2.mkdir(outputDir, { recursive: true });
1243
+ await fs2.writeFile(outputPath, text);
1244
+ return outputPath;
1245
+ }
1246
+ function resolveAssetPath(directory, filePath) {
1247
+ const root = resolve(directory);
1248
+ const outputPath = resolve(root, sanitizePath(filePath));
1249
+ if (outputPath !== root && !outputPath.startsWith(`${root}${sep}`)) {
1250
+ throw new KnowhereError(`Invalid result asset path: ${filePath}`);
1251
+ }
1252
+ return outputPath;
1253
+ }
1254
+ function isMissingFileError(error) {
1255
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1256
+ }
995
1257
 
996
1258
  // src/resources/jobs.ts
997
1259
  var Jobs = class extends BaseResource {
@@ -1000,11 +1262,7 @@ var Jobs = class extends BaseResource {
1000
1262
  * Create a new parsing job
1001
1263
  */
1002
1264
  async create(params) {
1003
- const job = await this.httpClient.post(
1004
- "/v1/jobs",
1005
- params
1006
- );
1007
- delete job.documentId;
1265
+ const job = await this.httpClient.post("/v1/jobs", params);
1008
1266
  if (job.uploadUrl) {
1009
1267
  this.pendingUploadJobs.set(job.jobId, job);
1010
1268
  }
@@ -1188,22 +1446,741 @@ var Documents = class extends BaseResource {
1188
1446
  }
1189
1447
  };
1190
1448
 
1449
+ // src/knowledge/local-store.ts
1450
+ import { createHash } from "crypto";
1451
+ import os from "os";
1452
+ import { promises as fs3 } from "fs";
1453
+ import path from "path";
1454
+ var STORE_VERSION = 1;
1455
+ var LOCAL_DOCUMENT_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
1456
+ var LocalKnowledgeStore = class {
1457
+ cacheDirectory;
1458
+ indexPath;
1459
+ resultCache = /* @__PURE__ */ new Map();
1460
+ constructor(cacheDirectory) {
1461
+ this.cacheDirectory = cacheDirectory ?? path.join(os.homedir(), ".knowhere-node-sdk", "knowledge");
1462
+ this.indexPath = path.join(this.cacheDirectory, "index.json");
1463
+ }
1464
+ async saveResult(result, options) {
1465
+ await fs3.mkdir(this.cacheDirectory, { recursive: true });
1466
+ const now = /* @__PURE__ */ new Date();
1467
+ const index = await this.readIndex();
1468
+ const localDocumentId = validateLocalDocumentId(
1469
+ options?.localDocumentId ?? createLocalDocumentId(result)
1470
+ );
1471
+ const resultDirectoryPath = this.getResultDirectoryPath(localDocumentId);
1472
+ await fs3.rm(resultDirectoryPath, { recursive: true, force: true });
1473
+ await saveExpandedParseResult(result, resultDirectoryPath);
1474
+ this.resultCache.set(localDocumentId, result);
1475
+ const existing = index.documents.find(
1476
+ (document) => document.localDocumentId === localDocumentId
1477
+ );
1478
+ const stored = {
1479
+ localDocumentId,
1480
+ jobId: result.jobId,
1481
+ documentId: result.documentId,
1482
+ namespace: result.namespace,
1483
+ sourceFileName: result.manifest.sourceFileName,
1484
+ chunkCount: result.chunks.length,
1485
+ typeCounts: countChunkTypes(result),
1486
+ resultDirectoryPath,
1487
+ createdAt: existing?.createdAt ?? now.toISOString(),
1488
+ updatedAt: now.toISOString()
1489
+ };
1490
+ const nextDocuments = [
1491
+ stored,
1492
+ ...index.documents.filter((document) => document.localDocumentId !== localDocumentId)
1493
+ ];
1494
+ const asyncParseJobs = (index.asyncParseJobs ?? []).map(
1495
+ (job) => job.jobId === result.jobId ? {
1496
+ ...job,
1497
+ localDocumentId,
1498
+ cacheStatus: "cached",
1499
+ updatedAt: now.toISOString()
1500
+ } : job
1501
+ );
1502
+ await this.writeIndex({
1503
+ version: STORE_VERSION,
1504
+ documents: nextDocuments,
1505
+ asyncParseJobs
1506
+ });
1507
+ return toLocalKnowledgeDocument(stored);
1508
+ }
1509
+ async saveAsyncParseJob(params) {
1510
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1511
+ const index = await this.readIndex();
1512
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1513
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1514
+ const stored = {
1515
+ jobId: params.jobId,
1516
+ localDocumentId: localDocumentId ?? existing?.localDocumentId,
1517
+ cacheStatus: existing?.cacheStatus ?? "pending",
1518
+ createdAt: existing?.createdAt ?? now,
1519
+ updatedAt: now
1520
+ };
1521
+ await this.writeIndex({
1522
+ version: STORE_VERSION,
1523
+ documents: index.documents,
1524
+ asyncParseJobs: [
1525
+ stored,
1526
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1527
+ ]
1528
+ });
1529
+ }
1530
+ async getAsyncParseJob(jobId) {
1531
+ const index = await this.readIndex();
1532
+ return (index.asyncParseJobs ?? []).find((job) => job.jobId === jobId);
1533
+ }
1534
+ async listRecoverableAsyncParseJobs() {
1535
+ const index = await this.readIndex();
1536
+ return (index.asyncParseJobs ?? []).filter((job) => job.cacheStatus === "pending" || job.cacheStatus === "not_available").map(toLocalKnowledgeAsyncParseJob);
1537
+ }
1538
+ async updateAsyncParseJobCacheStatus(params) {
1539
+ const index = await this.readIndex();
1540
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1541
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1542
+ if (!existing) {
1543
+ return;
1544
+ }
1545
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1546
+ const stored = {
1547
+ ...existing,
1548
+ localDocumentId: localDocumentId ?? existing.localDocumentId,
1549
+ cacheStatus: params.cacheStatus,
1550
+ updatedAt: now
1551
+ };
1552
+ await this.writeIndex({
1553
+ version: STORE_VERSION,
1554
+ documents: index.documents,
1555
+ asyncParseJobs: [
1556
+ stored,
1557
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1558
+ ]
1559
+ });
1560
+ }
1561
+ async listDocuments() {
1562
+ const index = await this.readIndex();
1563
+ return index.documents.map(toLocalKnowledgeDocument);
1564
+ }
1565
+ async getDocument(localDocumentId) {
1566
+ validateLocalDocumentId(localDocumentId);
1567
+ const index = await this.readIndex();
1568
+ const stored = index.documents.find((document) => document.localDocumentId === localDocumentId);
1569
+ return stored ? toLocalKnowledgeDocument(stored) : void 0;
1570
+ }
1571
+ async loadResult(localDocumentId) {
1572
+ const document = await this.getDocument(localDocumentId);
1573
+ if (!document) {
1574
+ throw new Error(`Local Knowhere document not found: ${localDocumentId}`);
1575
+ }
1576
+ const cachedResult = this.resultCache.get(localDocumentId);
1577
+ if (cachedResult) {
1578
+ return { document, result: cachedResult };
1579
+ }
1580
+ const result = await this.loadStoredResult(document);
1581
+ result.namespace = document.namespace;
1582
+ result.documentId = document.documentId;
1583
+ this.resultCache.set(localDocumentId, result);
1584
+ return { document, result };
1585
+ }
1586
+ getResultDirectoryPath(localDocumentId) {
1587
+ const documentsDirectory = path.resolve(this.cacheDirectory, "documents");
1588
+ const resultDirectoryPath = path.resolve(documentsDirectory, localDocumentId);
1589
+ if (!isPathInsideDirectory(resultDirectoryPath, documentsDirectory)) {
1590
+ throw new Error(`Local Knowhere document ID resolves outside the cache: ${localDocumentId}`);
1591
+ }
1592
+ return resultDirectoryPath;
1593
+ }
1594
+ async loadStoredResult(document) {
1595
+ return parseResultDirectory(document.resultDirectoryPath);
1596
+ }
1597
+ async readIndex() {
1598
+ try {
1599
+ const raw = await fs3.readFile(this.indexPath, "utf8");
1600
+ const parsed = JSON.parse(raw);
1601
+ if (parsed.version !== STORE_VERSION || !Array.isArray(parsed.documents)) {
1602
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1603
+ }
1604
+ return {
1605
+ version: STORE_VERSION,
1606
+ documents: parsed.documents,
1607
+ asyncParseJobs: Array.isArray(parsed.asyncParseJobs) ? parsed.asyncParseJobs : []
1608
+ };
1609
+ } catch (error) {
1610
+ if (isMissingFileError2(error)) {
1611
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1612
+ }
1613
+ throw error;
1614
+ }
1615
+ }
1616
+ async writeIndex(index) {
1617
+ await fs3.mkdir(this.cacheDirectory, { recursive: true });
1618
+ await fs3.writeFile(this.indexPath, JSON.stringify(index, null, 2));
1619
+ }
1620
+ };
1621
+ function validateLocalDocumentId(localDocumentId) {
1622
+ if (!LOCAL_DOCUMENT_ID_PATTERN.test(localDocumentId) || localDocumentId.includes("..") || path.basename(localDocumentId) !== localDocumentId) {
1623
+ throw new Error(
1624
+ "Local Knowhere document ID must be a safe slug containing only letters, numbers, dots, underscores, or hyphens"
1625
+ );
1626
+ }
1627
+ return localDocumentId;
1628
+ }
1629
+ function isPathInsideDirectory(targetPath, parentDirectory) {
1630
+ const relativePath = path.relative(parentDirectory, targetPath);
1631
+ return relativePath.length === 0 || !relativePath.startsWith("..") && !path.isAbsolute(relativePath);
1632
+ }
1633
+ function createLocalDocumentId(result) {
1634
+ const hash = createHash("sha256").update(result.jobId).update("\0").update(result.manifest.sourceFileName).digest("hex").slice(0, 16);
1635
+ return `local_${hash}`;
1636
+ }
1637
+ function countChunkTypes(result) {
1638
+ return result.chunks.reduce(
1639
+ (counts, chunk) => {
1640
+ counts[chunk.type] += 1;
1641
+ return counts;
1642
+ },
1643
+ { text: 0, image: 0, table: 0 }
1644
+ );
1645
+ }
1646
+ function toLocalKnowledgeDocument(stored) {
1647
+ return {
1648
+ localDocumentId: stored.localDocumentId,
1649
+ jobId: stored.jobId,
1650
+ documentId: stored.documentId,
1651
+ namespace: stored.namespace,
1652
+ sourceFileName: stored.sourceFileName,
1653
+ chunkCount: stored.chunkCount,
1654
+ typeCounts: stored.typeCounts,
1655
+ resultDirectoryPath: stored.resultDirectoryPath,
1656
+ createdAt: new Date(stored.createdAt),
1657
+ updatedAt: new Date(stored.updatedAt)
1658
+ };
1659
+ }
1660
+ function toLocalKnowledgeAsyncParseJob(stored) {
1661
+ return {
1662
+ ...stored,
1663
+ createdAt: new Date(stored.createdAt),
1664
+ updatedAt: new Date(stored.updatedAt)
1665
+ };
1666
+ }
1667
+ function isMissingFileError2(error) {
1668
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1669
+ }
1670
+
1671
+ // src/knowledge/knowledge.ts
1672
+ var DEFAULT_READ_LIMIT = 12;
1673
+ var MAX_READ_LIMIT = 40;
1674
+ var DEFAULT_GREP_LIMIT = 20;
1675
+ var MAX_GREP_LIMIT = 50;
1676
+ var DEFAULT_CONTEXT_CHARS = 80;
1677
+ var Knowledge = class _Knowledge {
1678
+ client;
1679
+ store;
1680
+ constructor(client, options) {
1681
+ this.client = client;
1682
+ this.store = new LocalKnowledgeStore(options?.cacheDirectory);
1683
+ }
1684
+ withCacheDirectory(cacheDirectory) {
1685
+ return new _Knowledge(this.client, { cacheDirectory });
1686
+ }
1687
+ async parse(params) {
1688
+ const result = await this.client.parse(params);
1689
+ const document = await this.store.saveResult(result, {
1690
+ localDocumentId: params.localDocumentId
1691
+ });
1692
+ return { document, result };
1693
+ }
1694
+ async startParse(params) {
1695
+ const job = await this.client.startParse(params);
1696
+ await this.store.saveAsyncParseJob({
1697
+ jobId: job.jobId,
1698
+ localDocumentId: params.localDocumentId
1699
+ });
1700
+ return {
1701
+ job,
1702
+ localDocumentId: params.localDocumentId
1703
+ };
1704
+ }
1705
+ async getJobStatus(jobId) {
1706
+ const job = await this.client.jobs.get(jobId);
1707
+ return {
1708
+ job,
1709
+ cache: await this.resolveAsyncCache(jobId, job.isDone, job.isFailed)
1710
+ };
1711
+ }
1712
+ async recoverPendingAsyncParseJobs() {
1713
+ const jobs = await this.store.listRecoverableAsyncParseJobs();
1714
+ const results = [];
1715
+ for (const job of jobs) {
1716
+ results.push(await this.getJobStatus(job.jobId));
1717
+ }
1718
+ return {
1719
+ checkedJobs: jobs.length,
1720
+ results
1721
+ };
1722
+ }
1723
+ async cacheJobResult(params) {
1724
+ const result = await this.client.jobs.load(params.jobId, {
1725
+ verifyChecksum: params.verifyChecksum
1726
+ });
1727
+ const document = await this.store.saveResult(result, {
1728
+ localDocumentId: params.localDocumentId
1729
+ });
1730
+ return { document, result };
1731
+ }
1732
+ async resolveAsyncCache(jobId, isDone, isFailed) {
1733
+ const trackedJob = await this.store.getAsyncParseJob(jobId);
1734
+ if (!trackedJob) {
1735
+ return { status: "untracked" };
1736
+ }
1737
+ if (trackedJob.cacheStatus === "cached" && trackedJob.localDocumentId) {
1738
+ const existingDocument = await this.store.getDocument(trackedJob.localDocumentId);
1739
+ if (existingDocument) {
1740
+ return {
1741
+ status: "already_cached",
1742
+ localDocumentId: trackedJob.localDocumentId,
1743
+ document: existingDocument
1744
+ };
1745
+ }
1746
+ }
1747
+ if (isFailed) {
1748
+ await this.store.updateAsyncParseJobCacheStatus({
1749
+ jobId,
1750
+ cacheStatus: "failed"
1751
+ });
1752
+ return {
1753
+ status: "failed",
1754
+ localDocumentId: trackedJob.localDocumentId
1755
+ };
1756
+ }
1757
+ if (!isDone) {
1758
+ return {
1759
+ status: "pending",
1760
+ localDocumentId: trackedJob.localDocumentId
1761
+ };
1762
+ }
1763
+ try {
1764
+ const cached = await this.cacheJobResult({
1765
+ jobId,
1766
+ localDocumentId: trackedJob.localDocumentId
1767
+ });
1768
+ return {
1769
+ status: "cached",
1770
+ localDocumentId: cached.document.localDocumentId,
1771
+ document: cached.document
1772
+ };
1773
+ } catch (error) {
1774
+ await this.store.updateAsyncParseJobCacheStatus({
1775
+ jobId,
1776
+ cacheStatus: "not_available"
1777
+ });
1778
+ return {
1779
+ status: "not_available",
1780
+ localDocumentId: trackedJob.localDocumentId,
1781
+ error: error instanceof Error ? error.message : String(error)
1782
+ };
1783
+ }
1784
+ }
1785
+ async listDocuments() {
1786
+ return this.store.listDocuments();
1787
+ }
1788
+ async getDocumentOutline(localDocumentId) {
1789
+ const { document, result } = await this.store.loadResult(localDocumentId);
1790
+ const chunks = indexChunks(result);
1791
+ const sections = buildFlatSections(result, chunks);
1792
+ const sectionTree = result.docNav?.sections && result.docNav.sections.length > 0 ? result.docNav.sections.map(
1793
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1794
+ ) : nestSections(sections);
1795
+ return {
1796
+ document,
1797
+ totalChunks: chunks.length,
1798
+ typeCounts: document.typeCounts,
1799
+ sections,
1800
+ sectionTree
1801
+ };
1802
+ }
1803
+ async readChunks(params) {
1804
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1805
+ const limit = clampLimit(params.limit, DEFAULT_READ_LIMIT, MAX_READ_LIMIT);
1806
+ const chunks = indexChunks(result).filter((chunk) => matchesReadScope(chunk, params));
1807
+ const selected = selectReadWindow(chunks, params, limit);
1808
+ const lastSelected = selected[selected.length - 1];
1809
+ const nextChunk = lastSelected && selected.length < chunks.length ? chunks[chunks.indexOf(lastSelected) + 1]?.position : void 0;
1810
+ return {
1811
+ document,
1812
+ chunks: selected.map(toReadChunk),
1813
+ nextChunk
1814
+ };
1815
+ }
1816
+ async grepChunks(params) {
1817
+ if (!params.pattern) {
1818
+ throw new ValidationError("pattern is required");
1819
+ }
1820
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1821
+ const maxResults = clampLimit(params.maxResults, DEFAULT_GREP_LIMIT, MAX_GREP_LIMIT);
1822
+ const contextChars = params.contextChars ?? DEFAULT_CONTEXT_CHARS;
1823
+ const matcher = createMatcher(params);
1824
+ const scopedChunks = indexChunks(result).filter((chunk) => matchesGrepScope(chunk, params));
1825
+ const matches = [];
1826
+ let scannedChunks = 0;
1827
+ for (const chunk of scopedChunks) {
1828
+ scannedChunks += 1;
1829
+ const chunkMatches = matcher(chunk.content);
1830
+ for (const match of chunkMatches) {
1831
+ matches.push({
1832
+ position: chunk.position,
1833
+ chunkId: chunk.chunkId,
1834
+ chunkType: chunk.chunkType,
1835
+ sectionPath: chunk.sectionPath,
1836
+ sourceChunkPath: chunk.sourceChunkPath,
1837
+ filePath: chunk.filePath,
1838
+ startOffset: match.startOffset,
1839
+ endOffset: match.endOffset,
1840
+ snippet: buildSnippet(chunk.content, match.startOffset, match.endOffset, contextChars)
1841
+ });
1842
+ if (matches.length >= maxResults) {
1843
+ return { document, matches, scannedChunks, truncated: true };
1844
+ }
1845
+ }
1846
+ }
1847
+ return { document, matches, scannedChunks, truncated: false };
1848
+ }
1849
+ async search(params) {
1850
+ const localDocuments = await this.resolveSearchDocuments(params.localDocumentIds);
1851
+ const rawResponse = await this.client.retrieval.query({
1852
+ query: params.query,
1853
+ namespace: params.namespace,
1854
+ topK: params.topK,
1855
+ useAgentic: params.useAgentic ?? false
1856
+ });
1857
+ const documentByServerId = new Map(
1858
+ localDocuments.filter((document) => document.documentId).map((document) => [document.documentId, document])
1859
+ );
1860
+ return {
1861
+ namespace: rawResponse.namespace,
1862
+ query: rawResponse.query,
1863
+ evidenceText: rawResponse.evidenceText,
1864
+ references: [
1865
+ ...rawResponse.referencedChunks.map(
1866
+ (reference) => ({
1867
+ localDocumentId: reference.documentId ? documentByServerId.get(reference.documentId)?.localDocumentId : void 0,
1868
+ documentId: reference.documentId,
1869
+ chunkId: reference.chunkId,
1870
+ sectionPath: reference.sectionPath,
1871
+ chunkType: reference.chunkType
1872
+ })
1873
+ ),
1874
+ ...rawResponse.results.map((result) => toResultReference(result, documentByServerId))
1875
+ ],
1876
+ results: rawResponse.results.map(
1877
+ (result) => toRemoteSearchResult(result, documentByServerId)
1878
+ ),
1879
+ rawResponse
1880
+ };
1881
+ }
1882
+ async resolveSearchDocuments(localDocumentIds) {
1883
+ const documents = await this.store.listDocuments();
1884
+ if (!localDocumentIds || localDocumentIds.length === 0) {
1885
+ return documents;
1886
+ }
1887
+ const requested = new Set(localDocumentIds);
1888
+ return documents.filter((document) => requested.has(document.localDocumentId));
1889
+ }
1890
+ };
1891
+ function indexChunks(result) {
1892
+ return result.chunks.map((chunk, index) => {
1893
+ const filePath = getChunkFilePath2(chunk);
1894
+ return {
1895
+ source: chunk,
1896
+ position: index + 1,
1897
+ chunkId: chunk.chunkId,
1898
+ chunkType: chunk.type,
1899
+ content: chunk.content,
1900
+ sectionPath: normalizeSectionPath(chunk.path, result.manifest.sourceFileName),
1901
+ sourceChunkPath: chunk.path,
1902
+ filePath,
1903
+ metadata: chunk.metadata
1904
+ };
1905
+ });
1906
+ }
1907
+ function getChunkFilePath2(chunk) {
1908
+ if (chunk.type === "image" || chunk.type === "table") {
1909
+ return chunk.filePath;
1910
+ }
1911
+ const filePath = chunk.metadata.filePath;
1912
+ return typeof filePath === "string" ? filePath : void 0;
1913
+ }
1914
+ function normalizeSectionPath(path3, sourceFileName) {
1915
+ if (!path3) {
1916
+ return "";
1917
+ }
1918
+ if (path3.startsWith("images/") || path3.startsWith("tables/")) {
1919
+ return path3;
1920
+ }
1921
+ const parts = path3.split("/").filter(Boolean);
1922
+ if (sourceFileName) {
1923
+ const fileNameIndex = parts.indexOf(sourceFileName);
1924
+ if (fileNameIndex >= 0) {
1925
+ return parts.slice(fileNameIndex + 1).join(" / ") || sourceFileName;
1926
+ }
1927
+ }
1928
+ if (parts.length <= 1) {
1929
+ return parts[0] ?? "";
1930
+ }
1931
+ return parts.slice(1).join(" / ");
1932
+ }
1933
+ function buildFlatSections(result, chunks) {
1934
+ if (result.docNav?.sections && result.docNav.sections.length > 0) {
1935
+ return flattenSections(
1936
+ result.docNav.sections.map(
1937
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1938
+ )
1939
+ );
1940
+ }
1941
+ const byPath = /* @__PURE__ */ new Map();
1942
+ for (const chunk of chunks) {
1943
+ const path3 = chunk.sectionPath || chunk.sourceChunkPath;
1944
+ const existing = byPath.get(path3);
1945
+ if (existing) {
1946
+ addChunkToSection(existing, chunk);
1947
+ } else {
1948
+ byPath.set(path3, createSectionFromChunk(path3, chunk));
1949
+ }
1950
+ }
1951
+ return [...byPath.values()].sort(compareSections);
1952
+ }
1953
+ function toKnowledgeSection(section, chunks, sourceFileName) {
1954
+ const sectionPath = normalizeSectionPath(section.path, sourceFileName);
1955
+ const scopedChunks = chunks.filter((chunk) => isInSection(chunk.sectionPath, sectionPath));
1956
+ const children = section.children.map(
1957
+ (child) => toKnowledgeSection(child, chunks, sourceFileName)
1958
+ );
1959
+ return {
1960
+ sectionPath,
1961
+ sectionTitle: section.title,
1962
+ sectionLevel: section.level,
1963
+ summary: section.summary,
1964
+ startChunk: minPosition(scopedChunks),
1965
+ endChunk: maxPosition(scopedChunks),
1966
+ chunkCount: scopedChunks.length,
1967
+ typeCounts: countIndexedTypes(scopedChunks),
1968
+ children
1969
+ };
1970
+ }
1971
+ function createSectionFromChunk(pathValue, chunk) {
1972
+ const parts = pathValue.split(" / ").filter(Boolean);
1973
+ return {
1974
+ sectionPath: pathValue,
1975
+ sectionTitle: parts[parts.length - 1] ?? pathValue,
1976
+ sectionLevel: Math.max(parts.length, 1),
1977
+ startChunk: chunk.position,
1978
+ endChunk: chunk.position,
1979
+ chunkCount: 1,
1980
+ typeCounts: { text: 0, image: 0, table: 0, [chunk.chunkType]: 1 },
1981
+ children: []
1982
+ };
1983
+ }
1984
+ function addChunkToSection(section, chunk) {
1985
+ section.startChunk = Math.min(section.startChunk ?? chunk.position, chunk.position);
1986
+ section.endChunk = Math.max(section.endChunk ?? chunk.position, chunk.position);
1987
+ section.chunkCount += 1;
1988
+ section.typeCounts[chunk.chunkType] += 1;
1989
+ }
1990
+ function flattenSections(sections) {
1991
+ return sections.flatMap((section) => [section, ...flattenSections(section.children)]);
1992
+ }
1993
+ function nestSections(sections) {
1994
+ const clonedSections = sections.map((section) => ({
1995
+ ...section,
1996
+ children: []
1997
+ }));
1998
+ const byPath = new Map(clonedSections.map((section) => [section.sectionPath, section]));
1999
+ const roots = [];
2000
+ for (const section of clonedSections) {
2001
+ const parentPath = getParentSectionPath(section.sectionPath);
2002
+ const parent = parentPath ? byPath.get(parentPath) : void 0;
2003
+ if (parent) {
2004
+ parent.children.push(section);
2005
+ } else {
2006
+ roots.push(section);
2007
+ }
2008
+ }
2009
+ return roots;
2010
+ }
2011
+ function getParentSectionPath(sectionPath) {
2012
+ const parts = sectionPath.split(" / ").filter(Boolean);
2013
+ if (parts.length <= 1) {
2014
+ return void 0;
2015
+ }
2016
+ return parts.slice(0, -1).join(" / ");
2017
+ }
2018
+ function compareSections(left, right) {
2019
+ return (left.startChunk ?? Number.MAX_SAFE_INTEGER) - (right.startChunk ?? Number.MAX_SAFE_INTEGER);
2020
+ }
2021
+ function minPosition(chunks) {
2022
+ if (chunks.length === 0) {
2023
+ return void 0;
2024
+ }
2025
+ return Math.min(...chunks.map((chunk) => chunk.position));
2026
+ }
2027
+ function maxPosition(chunks) {
2028
+ if (chunks.length === 0) {
2029
+ return void 0;
2030
+ }
2031
+ return Math.max(...chunks.map((chunk) => chunk.position));
2032
+ }
2033
+ function countIndexedTypes(chunks) {
2034
+ return chunks.reduce(
2035
+ (counts, chunk) => {
2036
+ counts[chunk.chunkType] += 1;
2037
+ return counts;
2038
+ },
2039
+ { text: 0, image: 0, table: 0 }
2040
+ );
2041
+ }
2042
+ function isInSection(chunkSectionPath, sectionPath) {
2043
+ return chunkSectionPath === sectionPath || chunkSectionPath.startsWith(`${sectionPath} / `);
2044
+ }
2045
+ function clampLimit(value, defaultValue, maxValue) {
2046
+ if (value === void 0) {
2047
+ return defaultValue;
2048
+ }
2049
+ return Math.min(Math.max(Math.floor(value), 1), maxValue);
2050
+ }
2051
+ function matchesReadScope(chunk, params) {
2052
+ if (params.chunkId && chunk.chunkId !== params.chunkId) {
2053
+ return false;
2054
+ }
2055
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2056
+ return false;
2057
+ }
2058
+ if (params.sectionPath && !isInSection(chunk.sectionPath, params.sectionPath)) {
2059
+ return false;
2060
+ }
2061
+ return true;
2062
+ }
2063
+ function selectReadWindow(chunks, params, limit) {
2064
+ if (params.chunkId) {
2065
+ return chunks.slice(0, limit);
2066
+ }
2067
+ const startChunk = params.startChunk ?? chunks[0]?.position ?? 1;
2068
+ const endChunk = params.endChunk ?? Number.MAX_SAFE_INTEGER;
2069
+ return chunks.filter((chunk) => chunk.position >= startChunk && chunk.position <= endChunk).slice(0, limit);
2070
+ }
2071
+ function toReadChunk(chunk) {
2072
+ return {
2073
+ position: chunk.position,
2074
+ chunkId: chunk.chunkId,
2075
+ chunkType: chunk.chunkType,
2076
+ content: chunk.content,
2077
+ sectionPath: chunk.sectionPath,
2078
+ sourceChunkPath: chunk.sourceChunkPath,
2079
+ filePath: chunk.filePath,
2080
+ metadata: chunk.metadata
2081
+ };
2082
+ }
2083
+ function matchesGrepScope(chunk, params) {
2084
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2085
+ return false;
2086
+ }
2087
+ if (params.sectionPathPrefix && !chunk.sectionPath.startsWith(params.sectionPathPrefix)) {
2088
+ return false;
2089
+ }
2090
+ return true;
2091
+ }
2092
+ function createMatcher(params) {
2093
+ if (params.isRegex) {
2094
+ const flags = params.isCaseSensitive ? "g" : "gi";
2095
+ const regex = new RegExp(params.pattern, flags);
2096
+ return (content) => {
2097
+ const matches = [];
2098
+ for (const match of content.matchAll(regex)) {
2099
+ const startOffset = match.index ?? 0;
2100
+ const text = match[0] ?? "";
2101
+ matches.push({ startOffset, endOffset: startOffset + text.length });
2102
+ if (text.length === 0) {
2103
+ break;
2104
+ }
2105
+ }
2106
+ return matches;
2107
+ };
2108
+ }
2109
+ const needle = params.isCaseSensitive ? params.pattern : params.pattern.toLowerCase();
2110
+ return (content) => {
2111
+ const haystack = params.isCaseSensitive ? content : content.toLowerCase();
2112
+ const matches = [];
2113
+ let index = haystack.indexOf(needle);
2114
+ while (index >= 0) {
2115
+ matches.push({ startOffset: index, endOffset: index + needle.length });
2116
+ index = haystack.indexOf(needle, index + Math.max(needle.length, 1));
2117
+ }
2118
+ return matches;
2119
+ };
2120
+ }
2121
+ function buildSnippet(content, startOffset, endOffset, contextChars) {
2122
+ const start = Math.max(0, startOffset - contextChars);
2123
+ const end = Math.min(content.length, endOffset + contextChars);
2124
+ return content.slice(start, end);
2125
+ }
2126
+ function toResultReference(result, documentByServerId) {
2127
+ const documentId = result.source.documentId ?? void 0;
2128
+ return {
2129
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2130
+ documentId,
2131
+ sectionPath: result.source.sectionPath ?? void 0,
2132
+ chunkType: result.chunkType,
2133
+ score: result.score
2134
+ };
2135
+ }
2136
+ function toRemoteSearchResult(result, documentByServerId) {
2137
+ const documentId = result.source.documentId ?? void 0;
2138
+ return {
2139
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2140
+ documentId,
2141
+ chunkType: result.chunkType,
2142
+ content: result.content,
2143
+ score: result.score,
2144
+ sectionPath: result.source.sectionPath ?? void 0,
2145
+ sourceFileName: result.source.sourceFileName ?? void 0
2146
+ };
2147
+ }
2148
+
1191
2149
  // src/client.ts
1192
2150
  function inferFileName(file, explicitFileName) {
1193
2151
  if (explicitFileName) {
1194
2152
  return explicitFileName;
1195
2153
  }
1196
2154
  if (typeof file === "string") {
1197
- return path.basename(file);
2155
+ return path2.basename(file);
1198
2156
  }
1199
2157
  if (isReadStream2(file) && typeof file.path === "string") {
1200
- return path.basename(file.path);
2158
+ return path2.basename(file.path);
1201
2159
  }
1202
2160
  return void 0;
1203
2161
  }
1204
2162
  function isReadStream2(file) {
1205
2163
  return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
1206
2164
  }
2165
+ function buildParsingParams(params) {
2166
+ const parsingParams = {
2167
+ model: params.model,
2168
+ ocrEnabled: params.ocr,
2169
+ docType: params.docType,
2170
+ smartTitleParse: params.smartTitleParse,
2171
+ summaryImage: params.summaryImage,
2172
+ summaryTable: params.summaryTable,
2173
+ summaryTxt: params.summaryTxt,
2174
+ addFragDesc: params.addFragDesc,
2175
+ kbDir: params.kbDir
2176
+ };
2177
+ Object.keys(parsingParams).forEach((key) => {
2178
+ if (parsingParams[key] === void 0) {
2179
+ delete parsingParams[key];
2180
+ }
2181
+ });
2182
+ return Object.keys(parsingParams).length > 0 ? parsingParams : void 0;
2183
+ }
1207
2184
  var Knowhere = class {
1208
2185
  /** Jobs resource for low-level API */
1209
2186
  jobs;
@@ -1211,21 +2188,25 @@ var Knowhere = class {
1211
2188
  retrieval;
1212
2189
  /** Documents resource for canonical document lifecycle operations */
1213
2190
  documents;
2191
+ /** Client-side local knowledge tools over parsed Knowhere results */
2192
+ knowledge;
1214
2193
  httpClient;
1215
2194
  /**
1216
2195
  * Create a new Knowhere client
1217
2196
  */
1218
2197
  constructor(options = {}) {
1219
2198
  const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
1220
- if (!apiKey) {
2199
+ const authTokenProvider = apiKey ? void 0 : options.authTokenProvider;
2200
+ if (!apiKey && !authTokenProvider) {
1221
2201
  throw new ValidationError(
1222
- `API key is required. Provide it via options.apiKey or ${ENV.API_KEY} environment variable.`
2202
+ `API authentication is required. Provide it via options.apiKey, options.authTokenProvider, or ${ENV.API_KEY} environment variable.`
1223
2203
  );
1224
2204
  }
1225
2205
  const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
1226
2206
  this.httpClient = new HttpClient({
1227
2207
  baseURL,
1228
2208
  apiKey,
2209
+ authTokenProvider,
1229
2210
  timeout: options.timeout,
1230
2211
  uploadTimeout: options.uploadTimeout,
1231
2212
  maxRetries: options.maxRetries,
@@ -1236,6 +2217,7 @@ var Knowhere = class {
1236
2217
  this.jobs = new Jobs(this.httpClient);
1237
2218
  this.retrieval = new Retrieval(this.httpClient);
1238
2219
  this.documents = new Documents(this.httpClient);
2220
+ this.knowledge = new Knowledge(this);
1239
2221
  }
1240
2222
  /**
1241
2223
  * High-level API: Parse a document and return structured results
@@ -1258,6 +2240,24 @@ var Knowhere = class {
1258
2240
  * ```
1259
2241
  */
1260
2242
  async parse(params) {
2243
+ const job = await this.startParse(params);
2244
+ const jobResult = await this.jobs.wait(job.jobId, {
2245
+ pollInterval: params.pollInterval,
2246
+ pollTimeout: params.pollTimeout,
2247
+ onProgress: params.onPollProgress,
2248
+ signal: params.signal
2249
+ });
2250
+ const result = await this.jobs.load(jobResult, {
2251
+ verifyChecksum: params.verifyChecksum
2252
+ });
2253
+ return enrichParseResult(result, jobResult);
2254
+ }
2255
+ /**
2256
+ * Start a parse job and return immediately after the URL job is created or
2257
+ * the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
2258
+ * to inspect completion and load results later.
2259
+ */
2260
+ async startParse(params) {
1261
2261
  if (!params.url && !params.file) {
1262
2262
  throw new ValidationError("Either url or file must be provided");
1263
2263
  }
@@ -1271,22 +2271,6 @@ var Knowhere = class {
1271
2271
  "fileName is required when file is a Buffer, Uint8Array, or stream without a path."
1272
2272
  );
1273
2273
  }
1274
- const parsingParams = {
1275
- model: params.model,
1276
- ocrEnabled: params.ocr,
1277
- docType: params.docType,
1278
- smartTitleParse: params.smartTitleParse,
1279
- summaryImage: params.summaryImage,
1280
- summaryTable: params.summaryTable,
1281
- summaryTxt: params.summaryTxt,
1282
- addFragDesc: params.addFragDesc,
1283
- kbDir: params.kbDir
1284
- };
1285
- Object.keys(parsingParams).forEach((key) => {
1286
- if (parsingParams[key] === void 0) {
1287
- delete parsingParams[key];
1288
- }
1289
- });
1290
2274
  const webhook = params.webhook;
1291
2275
  const job = await this.jobs.create({
1292
2276
  sourceType,
@@ -1295,7 +2279,8 @@ var Knowhere = class {
1295
2279
  dataId: params.dataId,
1296
2280
  namespace: params.namespace,
1297
2281
  documentId: params.documentId,
1298
- parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
2282
+ documentMetadata: params.documentMetadata,
2283
+ parsingParams: buildParsingParams(params),
1299
2284
  webhook
1300
2285
  });
1301
2286
  if (params.file) {
@@ -1305,16 +2290,7 @@ var Knowhere = class {
1305
2290
  signal: params.signal
1306
2291
  });
1307
2292
  }
1308
- const jobResult = await this.jobs.wait(job.jobId, {
1309
- pollInterval: params.pollInterval,
1310
- pollTimeout: params.pollTimeout,
1311
- onProgress: params.onPollProgress,
1312
- signal: params.signal
1313
- });
1314
- const result = await this.jobs.load(jobResult, {
1315
- verifyChecksum: params.verifyChecksum
1316
- });
1317
- return enrichParseResult(result, jobResult);
2293
+ return job;
1318
2294
  }
1319
2295
  };
1320
2296
  export {
@@ -1331,6 +2307,8 @@ export {
1331
2307
  Jobs,
1332
2308
  Knowhere,
1333
2309
  KnowhereError,
2310
+ Knowledge,
2311
+ LocalKnowledgeStore,
1334
2312
  NetworkError,
1335
2313
  NotFoundError,
1336
2314
  PaymentRequiredError,