@ontos-ai/knowhere-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,8 +1,10 @@
1
1
  // src/client.ts
2
- import path from "path";
2
+ import path2 from "path";
3
3
 
4
4
  // src/lib/http-client.ts
5
- import axios from "axios";
5
+ import axios, {
6
+ AxiosHeaders
7
+ } from "axios";
6
8
 
7
9
  // src/version.ts
8
10
  var VERSION = "0.1.0";
@@ -41,6 +43,7 @@ var NetworkError = class extends KnowhereError {
41
43
  this.cause = cause;
42
44
  this.name = "NetworkError";
43
45
  }
46
+ cause;
44
47
  };
45
48
  var TimeoutError = class extends NetworkError {
46
49
  constructor(message = "Request timed out") {
@@ -54,6 +57,7 @@ var PollingTimeoutError = class extends KnowhereError {
54
57
  this.elapsedMs = elapsedMs;
55
58
  this.name = "PollingTimeoutError";
56
59
  }
60
+ elapsedMs;
57
61
  };
58
62
  var ChecksumError = class extends KnowhereError {
59
63
  constructor(message = "Checksum verification failed", expected, actual) {
@@ -62,6 +66,8 @@ var ChecksumError = class extends KnowhereError {
62
66
  this.actual = actual;
63
67
  this.name = "ChecksumError";
64
68
  }
69
+ expected;
70
+ actual;
65
71
  };
66
72
  var ValidationError = class extends KnowhereError {
67
73
  constructor(message) {
@@ -87,6 +93,11 @@ var APIError = class extends KnowhereError {
87
93
  this.body = body;
88
94
  this.name = "APIError";
89
95
  }
96
+ statusCode;
97
+ code;
98
+ requestId;
99
+ details;
100
+ body;
90
101
  };
91
102
  var BadRequestError = class extends APIError {
92
103
  constructor(message, code, requestId, details, body) {
@@ -130,6 +141,7 @@ var RateLimitError = class extends APIError {
130
141
  this.retryAfter = retryAfter;
131
142
  this.name = "RateLimitError";
132
143
  }
144
+ retryAfter;
133
145
  };
134
146
  var InternalServerError = class extends APIError {
135
147
  constructor(message = "Internal server error", code, requestId, details, body) {
@@ -185,11 +197,13 @@ var JobFailedError = class extends KnowhereError {
185
197
  this.jobResult = jobResult;
186
198
  this.name = "JobFailedError";
187
199
  }
200
+ code;
201
+ jobResult;
188
202
  };
189
203
 
190
204
  // src/lib/utils.ts
191
205
  function sleep(ms) {
192
- return new Promise((resolve) => setTimeout(resolve, ms));
206
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
193
207
  }
194
208
  function snakeToCamel(str) {
195
209
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
@@ -295,8 +309,8 @@ function enrichParseResult(parseResult2, scope) {
295
309
  }
296
310
  return parseResult2;
297
311
  }
298
- function sanitizePath(path2) {
299
- let sanitized = path2.replace(/^\/+/, "");
312
+ function sanitizePath(path3) {
313
+ let sanitized = path3.replace(/^\/+/, "");
300
314
  sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
301
315
  sanitized = sanitized.replace(/\\/g, "/");
302
316
  return sanitized;
@@ -410,7 +424,7 @@ async function withRetry(fn, maxRetries, onRetry) {
410
424
  if (onRetry) {
411
425
  onRetry(attempt + 1, error);
412
426
  }
413
- await new Promise((resolve) => setTimeout(resolve, delay));
427
+ await new Promise((resolve2) => setTimeout(resolve2, delay));
414
428
  }
415
429
  }
416
430
  throw lastError;
@@ -423,17 +437,19 @@ var HttpClient = class {
423
437
  uploadTimeout;
424
438
  httpAgent;
425
439
  httpsAgent;
440
+ authTokenProvider;
426
441
  constructor(options) {
427
442
  this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
428
443
  this.uploadTimeout = options.uploadTimeout ?? 6e5;
429
444
  this.httpAgent = options.httpAgent;
430
445
  this.httpsAgent = options.httpsAgent;
446
+ this.authTokenProvider = options.authTokenProvider;
431
447
  this.axios = axios.create({
432
448
  baseURL: options.baseURL,
433
449
  timeout: options.timeout ?? DEFAULT_TIMEOUT,
434
450
  headers: {
435
451
  "User-Agent": `knowhere-node-sdk/${VERSION}`,
436
- Authorization: `Bearer ${options.apiKey}`,
452
+ ...options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : {},
437
453
  "Content-Type": "application/json",
438
454
  ...options.defaultHeaders
439
455
  },
@@ -445,6 +461,9 @@ var HttpClient = class {
445
461
  setupInterceptors() {
446
462
  this.axios.interceptors.request.use(
447
463
  (config) => {
464
+ if (this.authTokenProvider) {
465
+ return this.attachDynamicAuthorization(config);
466
+ }
448
467
  if (config.data && typeof config.data === "object") {
449
468
  config.data = keysToSnake(config.data);
450
469
  }
@@ -468,6 +487,19 @@ var HttpClient = class {
468
487
  }
469
488
  );
470
489
  }
490
+ async attachDynamicAuthorization(config) {
491
+ const token = await this.authTokenProvider?.();
492
+ if (!token) {
493
+ throw new ValidationError("Authentication token provider returned an empty token");
494
+ }
495
+ const headers = AxiosHeaders.from(config.headers);
496
+ headers.set("Authorization", `Bearer ${token}`);
497
+ config.headers = headers;
498
+ if (config.data && typeof config.data === "object") {
499
+ config.data = keysToSnake(config.data);
500
+ }
501
+ return config;
502
+ }
471
503
  handleError(error) {
472
504
  if (!error.response) {
473
505
  if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
@@ -752,11 +784,14 @@ async function pollJobStatus(httpClient, jobId, options) {
752
784
  // src/lib/result-parser.ts
753
785
  import JSZip from "jszip";
754
786
  import { promises as fs2 } from "fs";
755
- import { join, dirname } from "path";
787
+ import { join, dirname, resolve, sep } from "path";
756
788
  async function parseResult(httpClient, resultUrl, options) {
757
789
  const zipBuffer = await httpClient.download(resultUrl);
758
790
  if (options?.verifyChecksum !== false) {
759
791
  }
792
+ return parseResultBuffer(zipBuffer);
793
+ }
794
+ async function parseResultBuffer(zipBuffer) {
760
795
  const zip = await JSZip.loadAsync(zipBuffer);
761
796
  const manifestFile = zip.file("manifest.json");
762
797
  if (!manifestFile) {
@@ -821,7 +856,7 @@ async function parseResult(httpClient, resultUrl, options) {
821
856
  if (hierarchyViewFile) {
822
857
  hierarchyViewHtml = await hierarchyViewFile.async("string");
823
858
  }
824
- const result = {
859
+ return createParseResult({
825
860
  manifest,
826
861
  chunks,
827
862
  docNav,
@@ -832,6 +867,136 @@ async function parseResult(httpClient, resultUrl, options) {
832
867
  hierarchy,
833
868
  tocHierarchies,
834
869
  kbCsv,
870
+ hierarchyViewHtml
871
+ });
872
+ }
873
+ async function parseResultDirectory(directory) {
874
+ const manifestContent = await readRequiredTextFile(directory, "manifest.json");
875
+ let manifest = JSON.parse(manifestContent);
876
+ manifest = keysToCamel(manifest);
877
+ manifest = parseDates(manifest);
878
+ const chunksContent = await readRequiredTextFile(directory, "chunks.json");
879
+ let chunksData = JSON.parse(chunksContent);
880
+ chunksData = keysToCamel(chunksData);
881
+ const rawChunks = extractChunks(chunksData);
882
+ const chunks = [];
883
+ for (const chunkData of rawChunks) {
884
+ chunks.push(await processDirectoryChunk(directory, chunkData));
885
+ }
886
+ const fullMarkdown = await readOptionalTextFile(directory, "full.md");
887
+ const rawDocNav = await readOptionalJsonFile(directory, "doc_nav.json");
888
+ const docNav = rawDocNav === void 0 ? void 0 : keysToCamel(rawDocNav);
889
+ const hierarchy = await readOptionalJsonFile(directory, "hierarchy.json");
890
+ const rawChunksSlim = await readOptionalJsonFile(directory, "chunks_slim.json");
891
+ const chunksSlim = rawChunksSlim === void 0 ? void 0 : extractSlimChunks(keysToCamel(rawChunksSlim));
892
+ const rawTocHierarchies = await readOptionalJsonFile(directory, "toc_hierarchies.json");
893
+ const tocHierarchies = rawTocHierarchies === void 0 ? void 0 : keysToCamel(rawTocHierarchies);
894
+ const kbCsv = await readOptionalTextFile(directory, "kb.csv");
895
+ const hierarchyViewHtml = await readOptionalTextFile(directory, "hierarchy_view.html");
896
+ return createParseResult({
897
+ manifest,
898
+ chunks,
899
+ docNav,
900
+ fullMarkdown,
901
+ rawZip: Buffer.alloc(0),
902
+ chunksSlim,
903
+ hierarchy,
904
+ tocHierarchies,
905
+ kbCsv,
906
+ hierarchyViewHtml
907
+ });
908
+ }
909
+ async function saveExpandedParseResult(result, directory) {
910
+ if (result.rawZip.length > 0) {
911
+ const didExtractZip = await tryExtractRawZip(result.rawZip, directory);
912
+ if (didExtractZip) {
913
+ return directory;
914
+ }
915
+ }
916
+ await fs2.mkdir(directory, { recursive: true });
917
+ await fs2.writeFile(join(directory, "manifest.json"), JSON.stringify(result.manifest, null, 2));
918
+ if (result.docNav) {
919
+ await fs2.writeFile(join(directory, "doc_nav.json"), JSON.stringify(result.docNav, null, 2));
920
+ }
921
+ await fs2.writeFile(
922
+ join(directory, "chunks.json"),
923
+ JSON.stringify(serializeChunks(result.chunks), null, 2)
924
+ );
925
+ if (result.chunksSlim) {
926
+ await fs2.writeFile(
927
+ join(directory, "chunks_slim.json"),
928
+ JSON.stringify({ chunks: result.chunksSlim }, null, 2)
929
+ );
930
+ }
931
+ if (result.fullMarkdown) {
932
+ await fs2.writeFile(join(directory, "full.md"), result.fullMarkdown);
933
+ }
934
+ if (result.hierarchy) {
935
+ await fs2.writeFile(
936
+ join(directory, "hierarchy.json"),
937
+ JSON.stringify(result.hierarchy, null, 2)
938
+ );
939
+ }
940
+ if (result.tocHierarchies) {
941
+ await fs2.writeFile(
942
+ join(directory, "toc_hierarchies.json"),
943
+ JSON.stringify(result.tocHierarchies, null, 2)
944
+ );
945
+ }
946
+ if (result.kbCsv) {
947
+ await fs2.writeFile(join(directory, "kb.csv"), result.kbCsv);
948
+ }
949
+ if (result.hierarchyViewHtml) {
950
+ await fs2.writeFile(join(directory, "hierarchy_view.html"), result.hierarchyViewHtml);
951
+ }
952
+ for (const imageChunk of result.imageChunks) {
953
+ await writeBinaryAsset(directory, imageChunk.filePath, imageChunk.data);
954
+ }
955
+ for (const tableChunk of result.tableChunks) {
956
+ await writeTextAsset(directory, tableChunk.filePath, tableChunk.html);
957
+ }
958
+ return directory;
959
+ }
960
+ async function tryExtractRawZip(zipBuffer, directory) {
961
+ try {
962
+ const zip = await JSZip.loadAsync(zipBuffer);
963
+ await fs2.mkdir(directory, { recursive: true });
964
+ for (const entry of Object.values(zip.files)) {
965
+ if (entry.dir || entry.name === "result.zip") {
966
+ continue;
967
+ }
968
+ const outputPath = resolveAssetPath(directory, entry.name);
969
+ await fs2.mkdir(dirname(outputPath), { recursive: true });
970
+ await fs2.writeFile(outputPath, await entry.async("nodebuffer"));
971
+ }
972
+ return true;
973
+ } catch {
974
+ return false;
975
+ }
976
+ }
977
+ function createParseResult(parts) {
978
+ const {
979
+ manifest,
980
+ chunks,
981
+ docNav,
982
+ fullMarkdown,
983
+ rawZip,
984
+ chunksSlim,
985
+ hierarchy,
986
+ tocHierarchies,
987
+ kbCsv,
988
+ hierarchyViewHtml
989
+ } = parts;
990
+ return {
991
+ manifest,
992
+ chunks,
993
+ docNav,
994
+ fullMarkdown,
995
+ rawZip,
996
+ chunksSlim,
997
+ hierarchy,
998
+ tocHierarchies,
999
+ kbCsv,
835
1000
  hierarchyViewHtml,
836
1001
  get textChunks() {
837
1002
  return chunks.filter((c) => c.type === "text");
@@ -888,11 +1053,10 @@ async function parseResult(httpClient, resultUrl, options) {
888
1053
  for (const tableChunk of this.tableChunks) {
889
1054
  await tableChunk.save(directory);
890
1055
  }
891
- await fs2.writeFile(join(directory, "result.zip"), zipBuffer);
1056
+ await fs2.writeFile(join(directory, "result.zip"), rawZip);
892
1057
  return directory;
893
1058
  }
894
1059
  };
895
- return result;
896
1060
  }
897
1061
  function extractChunks(payload) {
898
1062
  if (Array.isArray(payload)) {
@@ -925,6 +1089,37 @@ function buildTextChunk(chunkData) {
925
1089
  metadata: chunkData.metadata ?? {}
926
1090
  };
927
1091
  }
1092
+ function buildImageChunk(chunkData, filePath, imageBuffer) {
1093
+ return {
1094
+ chunkId: chunkData.chunkId ?? "",
1095
+ type: "image",
1096
+ content: chunkData.content ?? "",
1097
+ path: chunkData.path ?? "",
1098
+ filePath,
1099
+ data: imageBuffer,
1100
+ metadata: chunkData.metadata ?? {},
1101
+ get format() {
1102
+ return getFileExtension(this.filePath);
1103
+ },
1104
+ async save(directory) {
1105
+ return writeBinaryAsset(directory, this.filePath, this.data);
1106
+ }
1107
+ };
1108
+ }
1109
+ function buildTableChunk(chunkData, filePath, html) {
1110
+ return {
1111
+ chunkId: chunkData.chunkId ?? "",
1112
+ type: "table",
1113
+ content: chunkData.content ?? "",
1114
+ path: chunkData.path ?? "",
1115
+ filePath,
1116
+ html,
1117
+ metadata: chunkData.metadata ?? {},
1118
+ async save(directory) {
1119
+ return writeTextAsset(directory, this.filePath, this.html);
1120
+ }
1121
+ };
1122
+ }
928
1123
  async function processChunk(zip, chunkData) {
929
1124
  if (chunkData.type === "text") {
930
1125
  return buildTextChunk(chunkData);
@@ -940,26 +1135,7 @@ async function processChunk(zip, chunkData) {
940
1135
  throw new KnowhereError(`Image file not found: ${filePath}`);
941
1136
  }
942
1137
  const imageBuffer = await imageFile.async("nodebuffer");
943
- const enrichedChunk = {
944
- chunkId: chunkData.chunkId ?? "",
945
- type: "image",
946
- content: chunkData.content ?? "",
947
- path: chunkData.path ?? "",
948
- filePath,
949
- data: imageBuffer,
950
- metadata: chunkData.metadata ?? {},
951
- get format() {
952
- return getFileExtension(this.filePath);
953
- },
954
- async save(directory) {
955
- const outputPath = join(directory, sanitizePath(this.filePath));
956
- const outputDir = dirname(outputPath);
957
- await fs2.mkdir(outputDir, { recursive: true });
958
- await fs2.writeFile(outputPath, this.data);
959
- return outputPath;
960
- }
961
- };
962
- return enrichedChunk;
1138
+ return buildImageChunk(chunkData, filePath, imageBuffer);
963
1139
  }
964
1140
  if (chunkData.type === "table") {
965
1141
  const filePath = getChunkFilePath(chunkData);
@@ -972,26 +1148,112 @@ async function processChunk(zip, chunkData) {
972
1148
  throw new KnowhereError(`Table file not found: ${filePath}`);
973
1149
  }
974
1150
  const html = await htmlFile.async("string");
975
- const enrichedChunk = {
976
- chunkId: chunkData.chunkId ?? "",
977
- type: "table",
978
- content: chunkData.content ?? "",
979
- path: chunkData.path ?? "",
980
- filePath,
981
- html,
982
- metadata: chunkData.metadata ?? {},
983
- async save(directory) {
984
- const outputPath = join(directory, sanitizePath(this.filePath));
985
- const outputDir = dirname(outputPath);
986
- await fs2.mkdir(outputDir, { recursive: true });
987
- await fs2.writeFile(outputPath, this.html);
988
- return outputPath;
1151
+ return buildTableChunk(chunkData, filePath, html);
1152
+ }
1153
+ return buildTextChunk(chunkData);
1154
+ }
1155
+ async function processDirectoryChunk(directory, chunkData) {
1156
+ if (chunkData.type === "text") {
1157
+ return buildTextChunk(chunkData);
1158
+ }
1159
+ if (chunkData.type === "image") {
1160
+ const filePath = getChunkFilePath(chunkData);
1161
+ if (!filePath) {
1162
+ throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1163
+ }
1164
+ try {
1165
+ const imageBuffer = await fs2.readFile(resolveAssetPath(directory, filePath));
1166
+ return buildImageChunk(chunkData, filePath, imageBuffer);
1167
+ } catch (error) {
1168
+ if (isMissingFileError(error)) {
1169
+ throw new KnowhereError(`Image file not found: ${filePath}`);
989
1170
  }
990
- };
991
- return enrichedChunk;
1171
+ throw error;
1172
+ }
1173
+ }
1174
+ if (chunkData.type === "table") {
1175
+ const filePath = getChunkFilePath(chunkData);
1176
+ if (!filePath) {
1177
+ throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
1178
+ }
1179
+ try {
1180
+ const html = await fs2.readFile(resolveAssetPath(directory, filePath), "utf8");
1181
+ return buildTableChunk(chunkData, filePath, html);
1182
+ } catch (error) {
1183
+ if (isMissingFileError(error)) {
1184
+ throw new KnowhereError(`Table file not found: ${filePath}`);
1185
+ }
1186
+ throw error;
1187
+ }
992
1188
  }
993
1189
  return buildTextChunk(chunkData);
994
1190
  }
1191
+ function serializeChunks(chunks) {
1192
+ return {
1193
+ chunks: chunks.map((chunk) => {
1194
+ const rawChunk = {
1195
+ chunkId: chunk.chunkId,
1196
+ type: chunk.type,
1197
+ content: chunk.content,
1198
+ path: chunk.path,
1199
+ metadata: chunk.metadata
1200
+ };
1201
+ if (chunk.type === "image" || chunk.type === "table") {
1202
+ rawChunk.filePath = chunk.filePath;
1203
+ }
1204
+ return rawChunk;
1205
+ })
1206
+ };
1207
+ }
1208
+ async function readRequiredTextFile(directory, fileName) {
1209
+ try {
1210
+ return await fs2.readFile(join(directory, fileName), "utf8");
1211
+ } catch (error) {
1212
+ if (isMissingFileError(error)) {
1213
+ throw new KnowhereError(`${fileName} not found in result directory`);
1214
+ }
1215
+ throw error;
1216
+ }
1217
+ }
1218
+ async function readOptionalTextFile(directory, fileName) {
1219
+ try {
1220
+ return await fs2.readFile(join(directory, fileName), "utf8");
1221
+ } catch (error) {
1222
+ if (isMissingFileError(error)) {
1223
+ return void 0;
1224
+ }
1225
+ throw error;
1226
+ }
1227
+ }
1228
+ async function readOptionalJsonFile(directory, fileName) {
1229
+ const content = await readOptionalTextFile(directory, fileName);
1230
+ return content === void 0 ? void 0 : JSON.parse(content);
1231
+ }
1232
+ async function writeBinaryAsset(directory, filePath, data) {
1233
+ const outputPath = resolveAssetPath(directory, filePath);
1234
+ const outputDir = dirname(outputPath);
1235
+ await fs2.mkdir(outputDir, { recursive: true });
1236
+ await fs2.writeFile(outputPath, data);
1237
+ return outputPath;
1238
+ }
1239
+ async function writeTextAsset(directory, filePath, text) {
1240
+ const outputPath = resolveAssetPath(directory, filePath);
1241
+ const outputDir = dirname(outputPath);
1242
+ await fs2.mkdir(outputDir, { recursive: true });
1243
+ await fs2.writeFile(outputPath, text);
1244
+ return outputPath;
1245
+ }
1246
+ function resolveAssetPath(directory, filePath) {
1247
+ const root = resolve(directory);
1248
+ const outputPath = resolve(root, sanitizePath(filePath));
1249
+ if (outputPath !== root && !outputPath.startsWith(`${root}${sep}`)) {
1250
+ throw new KnowhereError(`Invalid result asset path: ${filePath}`);
1251
+ }
1252
+ return outputPath;
1253
+ }
1254
+ function isMissingFileError(error) {
1255
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1256
+ }
995
1257
 
996
1258
  // src/resources/jobs.ts
997
1259
  var Jobs = class extends BaseResource {
@@ -1188,22 +1450,741 @@ var Documents = class extends BaseResource {
1188
1450
  }
1189
1451
  };
1190
1452
 
1453
+ // src/knowledge/local-store.ts
1454
+ import { createHash } from "crypto";
1455
+ import os from "os";
1456
+ import { promises as fs3 } from "fs";
1457
+ import path from "path";
1458
+ var STORE_VERSION = 1;
1459
+ var LOCAL_DOCUMENT_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
1460
+ var LocalKnowledgeStore = class {
1461
+ cacheDirectory;
1462
+ indexPath;
1463
+ resultCache = /* @__PURE__ */ new Map();
1464
+ constructor(cacheDirectory) {
1465
+ this.cacheDirectory = cacheDirectory ?? path.join(os.homedir(), ".knowhere-node-sdk", "knowledge");
1466
+ this.indexPath = path.join(this.cacheDirectory, "index.json");
1467
+ }
1468
+ async saveResult(result, options) {
1469
+ await fs3.mkdir(this.cacheDirectory, { recursive: true });
1470
+ const now = /* @__PURE__ */ new Date();
1471
+ const index = await this.readIndex();
1472
+ const localDocumentId = validateLocalDocumentId(
1473
+ options?.localDocumentId ?? createLocalDocumentId(result)
1474
+ );
1475
+ const resultDirectoryPath = this.getResultDirectoryPath(localDocumentId);
1476
+ await fs3.rm(resultDirectoryPath, { recursive: true, force: true });
1477
+ await saveExpandedParseResult(result, resultDirectoryPath);
1478
+ this.resultCache.set(localDocumentId, result);
1479
+ const existing = index.documents.find(
1480
+ (document) => document.localDocumentId === localDocumentId
1481
+ );
1482
+ const stored = {
1483
+ localDocumentId,
1484
+ jobId: result.jobId,
1485
+ documentId: result.documentId,
1486
+ namespace: result.namespace,
1487
+ sourceFileName: result.manifest.sourceFileName,
1488
+ chunkCount: result.chunks.length,
1489
+ typeCounts: countChunkTypes(result),
1490
+ resultDirectoryPath,
1491
+ createdAt: existing?.createdAt ?? now.toISOString(),
1492
+ updatedAt: now.toISOString()
1493
+ };
1494
+ const nextDocuments = [
1495
+ stored,
1496
+ ...index.documents.filter((document) => document.localDocumentId !== localDocumentId)
1497
+ ];
1498
+ const asyncParseJobs = (index.asyncParseJobs ?? []).map(
1499
+ (job) => job.jobId === result.jobId ? {
1500
+ ...job,
1501
+ localDocumentId,
1502
+ cacheStatus: "cached",
1503
+ updatedAt: now.toISOString()
1504
+ } : job
1505
+ );
1506
+ await this.writeIndex({
1507
+ version: STORE_VERSION,
1508
+ documents: nextDocuments,
1509
+ asyncParseJobs
1510
+ });
1511
+ return toLocalKnowledgeDocument(stored);
1512
+ }
1513
+ async saveAsyncParseJob(params) {
1514
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1515
+ const index = await this.readIndex();
1516
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1517
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1518
+ const stored = {
1519
+ jobId: params.jobId,
1520
+ localDocumentId: localDocumentId ?? existing?.localDocumentId,
1521
+ cacheStatus: existing?.cacheStatus ?? "pending",
1522
+ createdAt: existing?.createdAt ?? now,
1523
+ updatedAt: now
1524
+ };
1525
+ await this.writeIndex({
1526
+ version: STORE_VERSION,
1527
+ documents: index.documents,
1528
+ asyncParseJobs: [
1529
+ stored,
1530
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1531
+ ]
1532
+ });
1533
+ }
1534
+ async getAsyncParseJob(jobId) {
1535
+ const index = await this.readIndex();
1536
+ return (index.asyncParseJobs ?? []).find((job) => job.jobId === jobId);
1537
+ }
1538
+ async listRecoverableAsyncParseJobs() {
1539
+ const index = await this.readIndex();
1540
+ return (index.asyncParseJobs ?? []).filter((job) => job.cacheStatus === "pending" || job.cacheStatus === "not_available").map(toLocalKnowledgeAsyncParseJob);
1541
+ }
1542
+ async updateAsyncParseJobCacheStatus(params) {
1543
+ const index = await this.readIndex();
1544
+ const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
1545
+ const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
1546
+ if (!existing) {
1547
+ return;
1548
+ }
1549
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1550
+ const stored = {
1551
+ ...existing,
1552
+ localDocumentId: localDocumentId ?? existing.localDocumentId,
1553
+ cacheStatus: params.cacheStatus,
1554
+ updatedAt: now
1555
+ };
1556
+ await this.writeIndex({
1557
+ version: STORE_VERSION,
1558
+ documents: index.documents,
1559
+ asyncParseJobs: [
1560
+ stored,
1561
+ ...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
1562
+ ]
1563
+ });
1564
+ }
1565
+ async listDocuments() {
1566
+ const index = await this.readIndex();
1567
+ return index.documents.map(toLocalKnowledgeDocument);
1568
+ }
1569
+ async getDocument(localDocumentId) {
1570
+ validateLocalDocumentId(localDocumentId);
1571
+ const index = await this.readIndex();
1572
+ const stored = index.documents.find((document) => document.localDocumentId === localDocumentId);
1573
+ return stored ? toLocalKnowledgeDocument(stored) : void 0;
1574
+ }
1575
+ async loadResult(localDocumentId) {
1576
+ const document = await this.getDocument(localDocumentId);
1577
+ if (!document) {
1578
+ throw new Error(`Local Knowhere document not found: ${localDocumentId}`);
1579
+ }
1580
+ const cachedResult = this.resultCache.get(localDocumentId);
1581
+ if (cachedResult) {
1582
+ return { document, result: cachedResult };
1583
+ }
1584
+ const result = await this.loadStoredResult(document);
1585
+ result.namespace = document.namespace;
1586
+ result.documentId = document.documentId;
1587
+ this.resultCache.set(localDocumentId, result);
1588
+ return { document, result };
1589
+ }
1590
+ getResultDirectoryPath(localDocumentId) {
1591
+ const documentsDirectory = path.resolve(this.cacheDirectory, "documents");
1592
+ const resultDirectoryPath = path.resolve(documentsDirectory, localDocumentId);
1593
+ if (!isPathInsideDirectory(resultDirectoryPath, documentsDirectory)) {
1594
+ throw new Error(`Local Knowhere document ID resolves outside the cache: ${localDocumentId}`);
1595
+ }
1596
+ return resultDirectoryPath;
1597
+ }
1598
+ async loadStoredResult(document) {
1599
+ return parseResultDirectory(document.resultDirectoryPath);
1600
+ }
1601
+ async readIndex() {
1602
+ try {
1603
+ const raw = await fs3.readFile(this.indexPath, "utf8");
1604
+ const parsed = JSON.parse(raw);
1605
+ if (parsed.version !== STORE_VERSION || !Array.isArray(parsed.documents)) {
1606
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1607
+ }
1608
+ return {
1609
+ version: STORE_VERSION,
1610
+ documents: parsed.documents,
1611
+ asyncParseJobs: Array.isArray(parsed.asyncParseJobs) ? parsed.asyncParseJobs : []
1612
+ };
1613
+ } catch (error) {
1614
+ if (isMissingFileError2(error)) {
1615
+ return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
1616
+ }
1617
+ throw error;
1618
+ }
1619
+ }
1620
+ async writeIndex(index) {
1621
+ await fs3.mkdir(this.cacheDirectory, { recursive: true });
1622
+ await fs3.writeFile(this.indexPath, JSON.stringify(index, null, 2));
1623
+ }
1624
+ };
1625
+ function validateLocalDocumentId(localDocumentId) {
1626
+ if (!LOCAL_DOCUMENT_ID_PATTERN.test(localDocumentId) || localDocumentId.includes("..") || path.basename(localDocumentId) !== localDocumentId) {
1627
+ throw new Error(
1628
+ "Local Knowhere document ID must be a safe slug containing only letters, numbers, dots, underscores, or hyphens"
1629
+ );
1630
+ }
1631
+ return localDocumentId;
1632
+ }
1633
+ function isPathInsideDirectory(targetPath, parentDirectory) {
1634
+ const relativePath = path.relative(parentDirectory, targetPath);
1635
+ return relativePath.length === 0 || !relativePath.startsWith("..") && !path.isAbsolute(relativePath);
1636
+ }
1637
+ function createLocalDocumentId(result) {
1638
+ const hash = createHash("sha256").update(result.jobId).update("\0").update(result.manifest.sourceFileName).digest("hex").slice(0, 16);
1639
+ return `local_${hash}`;
1640
+ }
1641
+ function countChunkTypes(result) {
1642
+ return result.chunks.reduce(
1643
+ (counts, chunk) => {
1644
+ counts[chunk.type] += 1;
1645
+ return counts;
1646
+ },
1647
+ { text: 0, image: 0, table: 0 }
1648
+ );
1649
+ }
1650
+ function toLocalKnowledgeDocument(stored) {
1651
+ return {
1652
+ localDocumentId: stored.localDocumentId,
1653
+ jobId: stored.jobId,
1654
+ documentId: stored.documentId,
1655
+ namespace: stored.namespace,
1656
+ sourceFileName: stored.sourceFileName,
1657
+ chunkCount: stored.chunkCount,
1658
+ typeCounts: stored.typeCounts,
1659
+ resultDirectoryPath: stored.resultDirectoryPath,
1660
+ createdAt: new Date(stored.createdAt),
1661
+ updatedAt: new Date(stored.updatedAt)
1662
+ };
1663
+ }
1664
+ function toLocalKnowledgeAsyncParseJob(stored) {
1665
+ return {
1666
+ ...stored,
1667
+ createdAt: new Date(stored.createdAt),
1668
+ updatedAt: new Date(stored.updatedAt)
1669
+ };
1670
+ }
1671
+ function isMissingFileError2(error) {
1672
+ return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
1673
+ }
1674
+
1675
+ // src/knowledge/knowledge.ts
1676
+ var DEFAULT_READ_LIMIT = 12;
1677
+ var MAX_READ_LIMIT = 40;
1678
+ var DEFAULT_GREP_LIMIT = 20;
1679
+ var MAX_GREP_LIMIT = 50;
1680
+ var DEFAULT_CONTEXT_CHARS = 80;
1681
+ var Knowledge = class _Knowledge {
1682
+ client;
1683
+ store;
1684
+ constructor(client, options) {
1685
+ this.client = client;
1686
+ this.store = new LocalKnowledgeStore(options?.cacheDirectory);
1687
+ }
1688
+ withCacheDirectory(cacheDirectory) {
1689
+ return new _Knowledge(this.client, { cacheDirectory });
1690
+ }
1691
+ async parse(params) {
1692
+ const result = await this.client.parse(params);
1693
+ const document = await this.store.saveResult(result, {
1694
+ localDocumentId: params.localDocumentId
1695
+ });
1696
+ return { document, result };
1697
+ }
1698
+ async startParse(params) {
1699
+ const job = await this.client.startParse(params);
1700
+ await this.store.saveAsyncParseJob({
1701
+ jobId: job.jobId,
1702
+ localDocumentId: params.localDocumentId
1703
+ });
1704
+ return {
1705
+ job,
1706
+ localDocumentId: params.localDocumentId
1707
+ };
1708
+ }
1709
+ async getJobStatus(jobId) {
1710
+ const job = await this.client.jobs.get(jobId);
1711
+ return {
1712
+ job,
1713
+ cache: await this.resolveAsyncCache(jobId, job.isDone, job.isFailed)
1714
+ };
1715
+ }
1716
+ async recoverPendingAsyncParseJobs() {
1717
+ const jobs = await this.store.listRecoverableAsyncParseJobs();
1718
+ const results = [];
1719
+ for (const job of jobs) {
1720
+ results.push(await this.getJobStatus(job.jobId));
1721
+ }
1722
+ return {
1723
+ checkedJobs: jobs.length,
1724
+ results
1725
+ };
1726
+ }
1727
+ async cacheJobResult(params) {
1728
+ const result = await this.client.jobs.load(params.jobId, {
1729
+ verifyChecksum: params.verifyChecksum
1730
+ });
1731
+ const document = await this.store.saveResult(result, {
1732
+ localDocumentId: params.localDocumentId
1733
+ });
1734
+ return { document, result };
1735
+ }
1736
+ async resolveAsyncCache(jobId, isDone, isFailed) {
1737
+ const trackedJob = await this.store.getAsyncParseJob(jobId);
1738
+ if (!trackedJob) {
1739
+ return { status: "untracked" };
1740
+ }
1741
+ if (trackedJob.cacheStatus === "cached" && trackedJob.localDocumentId) {
1742
+ const existingDocument = await this.store.getDocument(trackedJob.localDocumentId);
1743
+ if (existingDocument) {
1744
+ return {
1745
+ status: "already_cached",
1746
+ localDocumentId: trackedJob.localDocumentId,
1747
+ document: existingDocument
1748
+ };
1749
+ }
1750
+ }
1751
+ if (isFailed) {
1752
+ await this.store.updateAsyncParseJobCacheStatus({
1753
+ jobId,
1754
+ cacheStatus: "failed"
1755
+ });
1756
+ return {
1757
+ status: "failed",
1758
+ localDocumentId: trackedJob.localDocumentId
1759
+ };
1760
+ }
1761
+ if (!isDone) {
1762
+ return {
1763
+ status: "pending",
1764
+ localDocumentId: trackedJob.localDocumentId
1765
+ };
1766
+ }
1767
+ try {
1768
+ const cached = await this.cacheJobResult({
1769
+ jobId,
1770
+ localDocumentId: trackedJob.localDocumentId
1771
+ });
1772
+ return {
1773
+ status: "cached",
1774
+ localDocumentId: cached.document.localDocumentId,
1775
+ document: cached.document
1776
+ };
1777
+ } catch (error) {
1778
+ await this.store.updateAsyncParseJobCacheStatus({
1779
+ jobId,
1780
+ cacheStatus: "not_available"
1781
+ });
1782
+ return {
1783
+ status: "not_available",
1784
+ localDocumentId: trackedJob.localDocumentId,
1785
+ error: error instanceof Error ? error.message : String(error)
1786
+ };
1787
+ }
1788
+ }
1789
+ async listDocuments() {
1790
+ return this.store.listDocuments();
1791
+ }
1792
+ async getDocumentOutline(localDocumentId) {
1793
+ const { document, result } = await this.store.loadResult(localDocumentId);
1794
+ const chunks = indexChunks(result);
1795
+ const sections = buildFlatSections(result, chunks);
1796
+ const sectionTree = result.docNav?.sections && result.docNav.sections.length > 0 ? result.docNav.sections.map(
1797
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1798
+ ) : nestSections(sections);
1799
+ return {
1800
+ document,
1801
+ totalChunks: chunks.length,
1802
+ typeCounts: document.typeCounts,
1803
+ sections,
1804
+ sectionTree
1805
+ };
1806
+ }
1807
+ async readChunks(params) {
1808
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1809
+ const limit = clampLimit(params.limit, DEFAULT_READ_LIMIT, MAX_READ_LIMIT);
1810
+ const chunks = indexChunks(result).filter((chunk) => matchesReadScope(chunk, params));
1811
+ const selected = selectReadWindow(chunks, params, limit);
1812
+ const lastSelected = selected[selected.length - 1];
1813
+ const nextChunk = lastSelected && selected.length < chunks.length ? chunks[chunks.indexOf(lastSelected) + 1]?.position : void 0;
1814
+ return {
1815
+ document,
1816
+ chunks: selected.map(toReadChunk),
1817
+ nextChunk
1818
+ };
1819
+ }
1820
+ async grepChunks(params) {
1821
+ if (!params.pattern) {
1822
+ throw new ValidationError("pattern is required");
1823
+ }
1824
+ const { document, result } = await this.store.loadResult(params.localDocumentId);
1825
+ const maxResults = clampLimit(params.maxResults, DEFAULT_GREP_LIMIT, MAX_GREP_LIMIT);
1826
+ const contextChars = params.contextChars ?? DEFAULT_CONTEXT_CHARS;
1827
+ const matcher = createMatcher(params);
1828
+ const scopedChunks = indexChunks(result).filter((chunk) => matchesGrepScope(chunk, params));
1829
+ const matches = [];
1830
+ let scannedChunks = 0;
1831
+ for (const chunk of scopedChunks) {
1832
+ scannedChunks += 1;
1833
+ const chunkMatches = matcher(chunk.content);
1834
+ for (const match of chunkMatches) {
1835
+ matches.push({
1836
+ position: chunk.position,
1837
+ chunkId: chunk.chunkId,
1838
+ chunkType: chunk.chunkType,
1839
+ sectionPath: chunk.sectionPath,
1840
+ sourceChunkPath: chunk.sourceChunkPath,
1841
+ filePath: chunk.filePath,
1842
+ startOffset: match.startOffset,
1843
+ endOffset: match.endOffset,
1844
+ snippet: buildSnippet(chunk.content, match.startOffset, match.endOffset, contextChars)
1845
+ });
1846
+ if (matches.length >= maxResults) {
1847
+ return { document, matches, scannedChunks, truncated: true };
1848
+ }
1849
+ }
1850
+ }
1851
+ return { document, matches, scannedChunks, truncated: false };
1852
+ }
1853
+ async search(params) {
1854
+ const localDocuments = await this.resolveSearchDocuments(params.localDocumentIds);
1855
+ const rawResponse = await this.client.retrieval.query({
1856
+ query: params.query,
1857
+ namespace: params.namespace,
1858
+ topK: params.topK,
1859
+ useAgentic: params.useAgentic ?? false
1860
+ });
1861
+ const documentByServerId = new Map(
1862
+ localDocuments.filter((document) => document.documentId).map((document) => [document.documentId, document])
1863
+ );
1864
+ return {
1865
+ namespace: rawResponse.namespace,
1866
+ query: rawResponse.query,
1867
+ evidenceText: rawResponse.evidenceText,
1868
+ references: [
1869
+ ...rawResponse.referencedChunks.map(
1870
+ (reference) => ({
1871
+ localDocumentId: reference.documentId ? documentByServerId.get(reference.documentId)?.localDocumentId : void 0,
1872
+ documentId: reference.documentId,
1873
+ chunkId: reference.chunkId,
1874
+ sectionPath: reference.sectionPath,
1875
+ chunkType: reference.chunkType
1876
+ })
1877
+ ),
1878
+ ...rawResponse.results.map((result) => toResultReference(result, documentByServerId))
1879
+ ],
1880
+ results: rawResponse.results.map(
1881
+ (result) => toRemoteSearchResult(result, documentByServerId)
1882
+ ),
1883
+ rawResponse
1884
+ };
1885
+ }
1886
+ async resolveSearchDocuments(localDocumentIds) {
1887
+ const documents = await this.store.listDocuments();
1888
+ if (!localDocumentIds || localDocumentIds.length === 0) {
1889
+ return documents;
1890
+ }
1891
+ const requested = new Set(localDocumentIds);
1892
+ return documents.filter((document) => requested.has(document.localDocumentId));
1893
+ }
1894
+ };
1895
+ function indexChunks(result) {
1896
+ return result.chunks.map((chunk, index) => {
1897
+ const filePath = getChunkFilePath2(chunk);
1898
+ return {
1899
+ source: chunk,
1900
+ position: index + 1,
1901
+ chunkId: chunk.chunkId,
1902
+ chunkType: chunk.type,
1903
+ content: chunk.content,
1904
+ sectionPath: normalizeSectionPath(chunk.path, result.manifest.sourceFileName),
1905
+ sourceChunkPath: chunk.path,
1906
+ filePath,
1907
+ metadata: chunk.metadata
1908
+ };
1909
+ });
1910
+ }
1911
+ function getChunkFilePath2(chunk) {
1912
+ if (chunk.type === "image" || chunk.type === "table") {
1913
+ return chunk.filePath;
1914
+ }
1915
+ const filePath = chunk.metadata.filePath;
1916
+ return typeof filePath === "string" ? filePath : void 0;
1917
+ }
1918
+ function normalizeSectionPath(path3, sourceFileName) {
1919
+ if (!path3) {
1920
+ return "";
1921
+ }
1922
+ if (path3.startsWith("images/") || path3.startsWith("tables/")) {
1923
+ return path3;
1924
+ }
1925
+ const parts = path3.split("/").filter(Boolean);
1926
+ if (sourceFileName) {
1927
+ const fileNameIndex = parts.indexOf(sourceFileName);
1928
+ if (fileNameIndex >= 0) {
1929
+ return parts.slice(fileNameIndex + 1).join(" / ") || sourceFileName;
1930
+ }
1931
+ }
1932
+ if (parts.length <= 1) {
1933
+ return parts[0] ?? "";
1934
+ }
1935
+ return parts.slice(1).join(" / ");
1936
+ }
1937
+ function buildFlatSections(result, chunks) {
1938
+ if (result.docNav?.sections && result.docNav.sections.length > 0) {
1939
+ return flattenSections(
1940
+ result.docNav.sections.map(
1941
+ (section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
1942
+ )
1943
+ );
1944
+ }
1945
+ const byPath = /* @__PURE__ */ new Map();
1946
+ for (const chunk of chunks) {
1947
+ const path3 = chunk.sectionPath || chunk.sourceChunkPath;
1948
+ const existing = byPath.get(path3);
1949
+ if (existing) {
1950
+ addChunkToSection(existing, chunk);
1951
+ } else {
1952
+ byPath.set(path3, createSectionFromChunk(path3, chunk));
1953
+ }
1954
+ }
1955
+ return [...byPath.values()].sort(compareSections);
1956
+ }
1957
+ function toKnowledgeSection(section, chunks, sourceFileName) {
1958
+ const sectionPath = normalizeSectionPath(section.path, sourceFileName);
1959
+ const scopedChunks = chunks.filter((chunk) => isInSection(chunk.sectionPath, sectionPath));
1960
+ const children = section.children.map(
1961
+ (child) => toKnowledgeSection(child, chunks, sourceFileName)
1962
+ );
1963
+ return {
1964
+ sectionPath,
1965
+ sectionTitle: section.title,
1966
+ sectionLevel: section.level,
1967
+ summary: section.summary,
1968
+ startChunk: minPosition(scopedChunks),
1969
+ endChunk: maxPosition(scopedChunks),
1970
+ chunkCount: scopedChunks.length,
1971
+ typeCounts: countIndexedTypes(scopedChunks),
1972
+ children
1973
+ };
1974
+ }
1975
+ function createSectionFromChunk(pathValue, chunk) {
1976
+ const parts = pathValue.split(" / ").filter(Boolean);
1977
+ return {
1978
+ sectionPath: pathValue,
1979
+ sectionTitle: parts[parts.length - 1] ?? pathValue,
1980
+ sectionLevel: Math.max(parts.length, 1),
1981
+ startChunk: chunk.position,
1982
+ endChunk: chunk.position,
1983
+ chunkCount: 1,
1984
+ typeCounts: { text: 0, image: 0, table: 0, [chunk.chunkType]: 1 },
1985
+ children: []
1986
+ };
1987
+ }
1988
+ function addChunkToSection(section, chunk) {
1989
+ section.startChunk = Math.min(section.startChunk ?? chunk.position, chunk.position);
1990
+ section.endChunk = Math.max(section.endChunk ?? chunk.position, chunk.position);
1991
+ section.chunkCount += 1;
1992
+ section.typeCounts[chunk.chunkType] += 1;
1993
+ }
1994
+ function flattenSections(sections) {
1995
+ return sections.flatMap((section) => [section, ...flattenSections(section.children)]);
1996
+ }
1997
+ function nestSections(sections) {
1998
+ const clonedSections = sections.map((section) => ({
1999
+ ...section,
2000
+ children: []
2001
+ }));
2002
+ const byPath = new Map(clonedSections.map((section) => [section.sectionPath, section]));
2003
+ const roots = [];
2004
+ for (const section of clonedSections) {
2005
+ const parentPath = getParentSectionPath(section.sectionPath);
2006
+ const parent = parentPath ? byPath.get(parentPath) : void 0;
2007
+ if (parent) {
2008
+ parent.children.push(section);
2009
+ } else {
2010
+ roots.push(section);
2011
+ }
2012
+ }
2013
+ return roots;
2014
+ }
2015
+ function getParentSectionPath(sectionPath) {
2016
+ const parts = sectionPath.split(" / ").filter(Boolean);
2017
+ if (parts.length <= 1) {
2018
+ return void 0;
2019
+ }
2020
+ return parts.slice(0, -1).join(" / ");
2021
+ }
2022
+ function compareSections(left, right) {
2023
+ return (left.startChunk ?? Number.MAX_SAFE_INTEGER) - (right.startChunk ?? Number.MAX_SAFE_INTEGER);
2024
+ }
2025
+ function minPosition(chunks) {
2026
+ if (chunks.length === 0) {
2027
+ return void 0;
2028
+ }
2029
+ return Math.min(...chunks.map((chunk) => chunk.position));
2030
+ }
2031
+ function maxPosition(chunks) {
2032
+ if (chunks.length === 0) {
2033
+ return void 0;
2034
+ }
2035
+ return Math.max(...chunks.map((chunk) => chunk.position));
2036
+ }
2037
+ function countIndexedTypes(chunks) {
2038
+ return chunks.reduce(
2039
+ (counts, chunk) => {
2040
+ counts[chunk.chunkType] += 1;
2041
+ return counts;
2042
+ },
2043
+ { text: 0, image: 0, table: 0 }
2044
+ );
2045
+ }
2046
+ function isInSection(chunkSectionPath, sectionPath) {
2047
+ return chunkSectionPath === sectionPath || chunkSectionPath.startsWith(`${sectionPath} / `);
2048
+ }
2049
+ function clampLimit(value, defaultValue, maxValue) {
2050
+ if (value === void 0) {
2051
+ return defaultValue;
2052
+ }
2053
+ return Math.min(Math.max(Math.floor(value), 1), maxValue);
2054
+ }
2055
+ function matchesReadScope(chunk, params) {
2056
+ if (params.chunkId && chunk.chunkId !== params.chunkId) {
2057
+ return false;
2058
+ }
2059
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2060
+ return false;
2061
+ }
2062
+ if (params.sectionPath && !isInSection(chunk.sectionPath, params.sectionPath)) {
2063
+ return false;
2064
+ }
2065
+ return true;
2066
+ }
2067
+ function selectReadWindow(chunks, params, limit) {
2068
+ if (params.chunkId) {
2069
+ return chunks.slice(0, limit);
2070
+ }
2071
+ const startChunk = params.startChunk ?? chunks[0]?.position ?? 1;
2072
+ const endChunk = params.endChunk ?? Number.MAX_SAFE_INTEGER;
2073
+ return chunks.filter((chunk) => chunk.position >= startChunk && chunk.position <= endChunk).slice(0, limit);
2074
+ }
2075
+ function toReadChunk(chunk) {
2076
+ return {
2077
+ position: chunk.position,
2078
+ chunkId: chunk.chunkId,
2079
+ chunkType: chunk.chunkType,
2080
+ content: chunk.content,
2081
+ sectionPath: chunk.sectionPath,
2082
+ sourceChunkPath: chunk.sourceChunkPath,
2083
+ filePath: chunk.filePath,
2084
+ metadata: chunk.metadata
2085
+ };
2086
+ }
2087
+ function matchesGrepScope(chunk, params) {
2088
+ if (params.chunkType && chunk.chunkType !== params.chunkType) {
2089
+ return false;
2090
+ }
2091
+ if (params.sectionPathPrefix && !chunk.sectionPath.startsWith(params.sectionPathPrefix)) {
2092
+ return false;
2093
+ }
2094
+ return true;
2095
+ }
2096
+ function createMatcher(params) {
2097
+ if (params.isRegex) {
2098
+ const flags = params.isCaseSensitive ? "g" : "gi";
2099
+ const regex = new RegExp(params.pattern, flags);
2100
+ return (content) => {
2101
+ const matches = [];
2102
+ for (const match of content.matchAll(regex)) {
2103
+ const startOffset = match.index ?? 0;
2104
+ const text = match[0] ?? "";
2105
+ matches.push({ startOffset, endOffset: startOffset + text.length });
2106
+ if (text.length === 0) {
2107
+ break;
2108
+ }
2109
+ }
2110
+ return matches;
2111
+ };
2112
+ }
2113
+ const needle = params.isCaseSensitive ? params.pattern : params.pattern.toLowerCase();
2114
+ return (content) => {
2115
+ const haystack = params.isCaseSensitive ? content : content.toLowerCase();
2116
+ const matches = [];
2117
+ let index = haystack.indexOf(needle);
2118
+ while (index >= 0) {
2119
+ matches.push({ startOffset: index, endOffset: index + needle.length });
2120
+ index = haystack.indexOf(needle, index + Math.max(needle.length, 1));
2121
+ }
2122
+ return matches;
2123
+ };
2124
+ }
2125
+ function buildSnippet(content, startOffset, endOffset, contextChars) {
2126
+ const start = Math.max(0, startOffset - contextChars);
2127
+ const end = Math.min(content.length, endOffset + contextChars);
2128
+ return content.slice(start, end);
2129
+ }
2130
+ function toResultReference(result, documentByServerId) {
2131
+ const documentId = result.source.documentId ?? void 0;
2132
+ return {
2133
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2134
+ documentId,
2135
+ sectionPath: result.source.sectionPath ?? void 0,
2136
+ chunkType: result.chunkType,
2137
+ score: result.score
2138
+ };
2139
+ }
2140
+ function toRemoteSearchResult(result, documentByServerId) {
2141
+ const documentId = result.source.documentId ?? void 0;
2142
+ return {
2143
+ localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
2144
+ documentId,
2145
+ chunkType: result.chunkType,
2146
+ content: result.content,
2147
+ score: result.score,
2148
+ sectionPath: result.source.sectionPath ?? void 0,
2149
+ sourceFileName: result.source.sourceFileName ?? void 0
2150
+ };
2151
+ }
2152
+
1191
2153
  // src/client.ts
1192
2154
  function inferFileName(file, explicitFileName) {
1193
2155
  if (explicitFileName) {
1194
2156
  return explicitFileName;
1195
2157
  }
1196
2158
  if (typeof file === "string") {
1197
- return path.basename(file);
2159
+ return path2.basename(file);
1198
2160
  }
1199
2161
  if (isReadStream2(file) && typeof file.path === "string") {
1200
- return path.basename(file.path);
2162
+ return path2.basename(file.path);
1201
2163
  }
1202
2164
  return void 0;
1203
2165
  }
1204
2166
  function isReadStream2(file) {
1205
2167
  return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
1206
2168
  }
2169
+ function buildParsingParams(params) {
2170
+ const parsingParams = {
2171
+ model: params.model,
2172
+ ocrEnabled: params.ocr,
2173
+ docType: params.docType,
2174
+ smartTitleParse: params.smartTitleParse,
2175
+ summaryImage: params.summaryImage,
2176
+ summaryTable: params.summaryTable,
2177
+ summaryTxt: params.summaryTxt,
2178
+ addFragDesc: params.addFragDesc,
2179
+ kbDir: params.kbDir
2180
+ };
2181
+ Object.keys(parsingParams).forEach((key) => {
2182
+ if (parsingParams[key] === void 0) {
2183
+ delete parsingParams[key];
2184
+ }
2185
+ });
2186
+ return Object.keys(parsingParams).length > 0 ? parsingParams : void 0;
2187
+ }
1207
2188
  var Knowhere = class {
1208
2189
  /** Jobs resource for low-level API */
1209
2190
  jobs;
@@ -1211,21 +2192,25 @@ var Knowhere = class {
1211
2192
  retrieval;
1212
2193
  /** Documents resource for canonical document lifecycle operations */
1213
2194
  documents;
2195
+ /** Client-side local knowledge tools over parsed Knowhere results */
2196
+ knowledge;
1214
2197
  httpClient;
1215
2198
  /**
1216
2199
  * Create a new Knowhere client
1217
2200
  */
1218
2201
  constructor(options = {}) {
1219
2202
  const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
1220
- if (!apiKey) {
2203
+ const authTokenProvider = apiKey ? void 0 : options.authTokenProvider;
2204
+ if (!apiKey && !authTokenProvider) {
1221
2205
  throw new ValidationError(
1222
- `API key is required. Provide it via options.apiKey or ${ENV.API_KEY} environment variable.`
2206
+ `API authentication is required. Provide it via options.apiKey, options.authTokenProvider, or ${ENV.API_KEY} environment variable.`
1223
2207
  );
1224
2208
  }
1225
2209
  const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
1226
2210
  this.httpClient = new HttpClient({
1227
2211
  baseURL,
1228
2212
  apiKey,
2213
+ authTokenProvider,
1229
2214
  timeout: options.timeout,
1230
2215
  uploadTimeout: options.uploadTimeout,
1231
2216
  maxRetries: options.maxRetries,
@@ -1236,6 +2221,7 @@ var Knowhere = class {
1236
2221
  this.jobs = new Jobs(this.httpClient);
1237
2222
  this.retrieval = new Retrieval(this.httpClient);
1238
2223
  this.documents = new Documents(this.httpClient);
2224
+ this.knowledge = new Knowledge(this);
1239
2225
  }
1240
2226
  /**
1241
2227
  * High-level API: Parse a document and return structured results
@@ -1258,6 +2244,24 @@ var Knowhere = class {
1258
2244
  * ```
1259
2245
  */
1260
2246
  async parse(params) {
2247
+ const job = await this.startParse(params);
2248
+ const jobResult = await this.jobs.wait(job.jobId, {
2249
+ pollInterval: params.pollInterval,
2250
+ pollTimeout: params.pollTimeout,
2251
+ onProgress: params.onPollProgress,
2252
+ signal: params.signal
2253
+ });
2254
+ const result = await this.jobs.load(jobResult, {
2255
+ verifyChecksum: params.verifyChecksum
2256
+ });
2257
+ return enrichParseResult(result, jobResult);
2258
+ }
2259
+ /**
2260
+ * Start a parse job and return immediately after the URL job is created or
2261
+ * the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
2262
+ * to inspect completion and load results later.
2263
+ */
2264
+ async startParse(params) {
1261
2265
  if (!params.url && !params.file) {
1262
2266
  throw new ValidationError("Either url or file must be provided");
1263
2267
  }
@@ -1271,22 +2275,6 @@ var Knowhere = class {
1271
2275
  "fileName is required when file is a Buffer, Uint8Array, or stream without a path."
1272
2276
  );
1273
2277
  }
1274
- const parsingParams = {
1275
- model: params.model,
1276
- ocrEnabled: params.ocr,
1277
- docType: params.docType,
1278
- smartTitleParse: params.smartTitleParse,
1279
- summaryImage: params.summaryImage,
1280
- summaryTable: params.summaryTable,
1281
- summaryTxt: params.summaryTxt,
1282
- addFragDesc: params.addFragDesc,
1283
- kbDir: params.kbDir
1284
- };
1285
- Object.keys(parsingParams).forEach((key) => {
1286
- if (parsingParams[key] === void 0) {
1287
- delete parsingParams[key];
1288
- }
1289
- });
1290
2278
  const webhook = params.webhook;
1291
2279
  const job = await this.jobs.create({
1292
2280
  sourceType,
@@ -1295,7 +2283,7 @@ var Knowhere = class {
1295
2283
  dataId: params.dataId,
1296
2284
  namespace: params.namespace,
1297
2285
  documentId: params.documentId,
1298
- parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
2286
+ parsingParams: buildParsingParams(params),
1299
2287
  webhook
1300
2288
  });
1301
2289
  if (params.file) {
@@ -1305,16 +2293,7 @@ var Knowhere = class {
1305
2293
  signal: params.signal
1306
2294
  });
1307
2295
  }
1308
- const jobResult = await this.jobs.wait(job.jobId, {
1309
- pollInterval: params.pollInterval,
1310
- pollTimeout: params.pollTimeout,
1311
- onProgress: params.onPollProgress,
1312
- signal: params.signal
1313
- });
1314
- const result = await this.jobs.load(jobResult, {
1315
- verifyChecksum: params.verifyChecksum
1316
- });
1317
- return enrichParseResult(result, jobResult);
2296
+ return job;
1318
2297
  }
1319
2298
  };
1320
2299
  export {
@@ -1331,6 +2310,8 @@ export {
1331
2310
  Jobs,
1332
2311
  Knowhere,
1333
2312
  KnowhereError,
2313
+ Knowledge,
2314
+ LocalKnowledgeStore,
1334
2315
  NetworkError,
1335
2316
  NotFoundError,
1336
2317
  PaymentRequiredError,