@ontos-ai/knowhere-sdk 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -9
- package/dist/index.d.mts +256 -2
- package/dist/index.d.ts +256 -2
- package/dist/index.js +1059 -81
- package/dist/index.mjs +1061 -83
- package/package.json +28 -21
package/dist/index.js
CHANGED
|
@@ -43,6 +43,8 @@ __export(index_exports, {
|
|
|
43
43
|
Jobs: () => Jobs,
|
|
44
44
|
Knowhere: () => Knowhere,
|
|
45
45
|
KnowhereError: () => KnowhereError,
|
|
46
|
+
Knowledge: () => Knowledge,
|
|
47
|
+
LocalKnowledgeStore: () => LocalKnowledgeStore,
|
|
46
48
|
NetworkError: () => NetworkError,
|
|
47
49
|
NotFoundError: () => NotFoundError,
|
|
48
50
|
PaymentRequiredError: () => PaymentRequiredError,
|
|
@@ -59,7 +61,7 @@ __export(index_exports, {
|
|
|
59
61
|
module.exports = __toCommonJS(index_exports);
|
|
60
62
|
|
|
61
63
|
// src/client.ts
|
|
62
|
-
var
|
|
64
|
+
var import_path3 = __toESM(require("path"));
|
|
63
65
|
|
|
64
66
|
// src/lib/http-client.ts
|
|
65
67
|
var import_axios = __toESM(require("axios"));
|
|
@@ -101,6 +103,7 @@ var NetworkError = class extends KnowhereError {
|
|
|
101
103
|
this.cause = cause;
|
|
102
104
|
this.name = "NetworkError";
|
|
103
105
|
}
|
|
106
|
+
cause;
|
|
104
107
|
};
|
|
105
108
|
var TimeoutError = class extends NetworkError {
|
|
106
109
|
constructor(message = "Request timed out") {
|
|
@@ -114,6 +117,7 @@ var PollingTimeoutError = class extends KnowhereError {
|
|
|
114
117
|
this.elapsedMs = elapsedMs;
|
|
115
118
|
this.name = "PollingTimeoutError";
|
|
116
119
|
}
|
|
120
|
+
elapsedMs;
|
|
117
121
|
};
|
|
118
122
|
var ChecksumError = class extends KnowhereError {
|
|
119
123
|
constructor(message = "Checksum verification failed", expected, actual) {
|
|
@@ -122,6 +126,8 @@ var ChecksumError = class extends KnowhereError {
|
|
|
122
126
|
this.actual = actual;
|
|
123
127
|
this.name = "ChecksumError";
|
|
124
128
|
}
|
|
129
|
+
expected;
|
|
130
|
+
actual;
|
|
125
131
|
};
|
|
126
132
|
var ValidationError = class extends KnowhereError {
|
|
127
133
|
constructor(message) {
|
|
@@ -147,6 +153,11 @@ var APIError = class extends KnowhereError {
|
|
|
147
153
|
this.body = body;
|
|
148
154
|
this.name = "APIError";
|
|
149
155
|
}
|
|
156
|
+
statusCode;
|
|
157
|
+
code;
|
|
158
|
+
requestId;
|
|
159
|
+
details;
|
|
160
|
+
body;
|
|
150
161
|
};
|
|
151
162
|
var BadRequestError = class extends APIError {
|
|
152
163
|
constructor(message, code, requestId, details, body) {
|
|
@@ -190,6 +201,7 @@ var RateLimitError = class extends APIError {
|
|
|
190
201
|
this.retryAfter = retryAfter;
|
|
191
202
|
this.name = "RateLimitError";
|
|
192
203
|
}
|
|
204
|
+
retryAfter;
|
|
193
205
|
};
|
|
194
206
|
var InternalServerError = class extends APIError {
|
|
195
207
|
constructor(message = "Internal server error", code, requestId, details, body) {
|
|
@@ -245,11 +257,13 @@ var JobFailedError = class extends KnowhereError {
|
|
|
245
257
|
this.jobResult = jobResult;
|
|
246
258
|
this.name = "JobFailedError";
|
|
247
259
|
}
|
|
260
|
+
code;
|
|
261
|
+
jobResult;
|
|
248
262
|
};
|
|
249
263
|
|
|
250
264
|
// src/lib/utils.ts
|
|
251
265
|
function sleep(ms) {
|
|
252
|
-
return new Promise((
|
|
266
|
+
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
253
267
|
}
|
|
254
268
|
function snakeToCamel(str) {
|
|
255
269
|
return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
|
|
@@ -355,8 +369,8 @@ function enrichParseResult(parseResult2, scope) {
|
|
|
355
369
|
}
|
|
356
370
|
return parseResult2;
|
|
357
371
|
}
|
|
358
|
-
function sanitizePath(
|
|
359
|
-
let sanitized =
|
|
372
|
+
function sanitizePath(path3) {
|
|
373
|
+
let sanitized = path3.replace(/^\/+/, "");
|
|
360
374
|
sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
|
|
361
375
|
sanitized = sanitized.replace(/\\/g, "/");
|
|
362
376
|
return sanitized;
|
|
@@ -470,7 +484,7 @@ async function withRetry(fn, maxRetries, onRetry) {
|
|
|
470
484
|
if (onRetry) {
|
|
471
485
|
onRetry(attempt + 1, error);
|
|
472
486
|
}
|
|
473
|
-
await new Promise((
|
|
487
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay));
|
|
474
488
|
}
|
|
475
489
|
}
|
|
476
490
|
throw lastError;
|
|
@@ -483,17 +497,19 @@ var HttpClient = class {
|
|
|
483
497
|
uploadTimeout;
|
|
484
498
|
httpAgent;
|
|
485
499
|
httpsAgent;
|
|
500
|
+
authTokenProvider;
|
|
486
501
|
constructor(options) {
|
|
487
502
|
this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
488
503
|
this.uploadTimeout = options.uploadTimeout ?? 6e5;
|
|
489
504
|
this.httpAgent = options.httpAgent;
|
|
490
505
|
this.httpsAgent = options.httpsAgent;
|
|
506
|
+
this.authTokenProvider = options.authTokenProvider;
|
|
491
507
|
this.axios = import_axios.default.create({
|
|
492
508
|
baseURL: options.baseURL,
|
|
493
509
|
timeout: options.timeout ?? DEFAULT_TIMEOUT,
|
|
494
510
|
headers: {
|
|
495
511
|
"User-Agent": `knowhere-node-sdk/${VERSION}`,
|
|
496
|
-
Authorization: `Bearer ${options.apiKey}
|
|
512
|
+
...options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : {},
|
|
497
513
|
"Content-Type": "application/json",
|
|
498
514
|
...options.defaultHeaders
|
|
499
515
|
},
|
|
@@ -505,6 +521,9 @@ var HttpClient = class {
|
|
|
505
521
|
setupInterceptors() {
|
|
506
522
|
this.axios.interceptors.request.use(
|
|
507
523
|
(config) => {
|
|
524
|
+
if (this.authTokenProvider) {
|
|
525
|
+
return this.attachDynamicAuthorization(config);
|
|
526
|
+
}
|
|
508
527
|
if (config.data && typeof config.data === "object") {
|
|
509
528
|
config.data = keysToSnake(config.data);
|
|
510
529
|
}
|
|
@@ -528,6 +547,19 @@ var HttpClient = class {
|
|
|
528
547
|
}
|
|
529
548
|
);
|
|
530
549
|
}
|
|
550
|
+
async attachDynamicAuthorization(config) {
|
|
551
|
+
const token = await this.authTokenProvider?.();
|
|
552
|
+
if (!token) {
|
|
553
|
+
throw new ValidationError("Authentication token provider returned an empty token");
|
|
554
|
+
}
|
|
555
|
+
const headers = import_axios.AxiosHeaders.from(config.headers);
|
|
556
|
+
headers.set("Authorization", `Bearer ${token}`);
|
|
557
|
+
config.headers = headers;
|
|
558
|
+
if (config.data && typeof config.data === "object") {
|
|
559
|
+
config.data = keysToSnake(config.data);
|
|
560
|
+
}
|
|
561
|
+
return config;
|
|
562
|
+
}
|
|
531
563
|
handleError(error) {
|
|
532
564
|
if (!error.response) {
|
|
533
565
|
if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
|
|
@@ -817,6 +849,9 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
817
849
|
const zipBuffer = await httpClient.download(resultUrl);
|
|
818
850
|
if (options?.verifyChecksum !== false) {
|
|
819
851
|
}
|
|
852
|
+
return parseResultBuffer(zipBuffer);
|
|
853
|
+
}
|
|
854
|
+
async function parseResultBuffer(zipBuffer) {
|
|
820
855
|
const zip = await import_jszip.default.loadAsync(zipBuffer);
|
|
821
856
|
const manifestFile = zip.file("manifest.json");
|
|
822
857
|
if (!manifestFile) {
|
|
@@ -881,7 +916,7 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
881
916
|
if (hierarchyViewFile) {
|
|
882
917
|
hierarchyViewHtml = await hierarchyViewFile.async("string");
|
|
883
918
|
}
|
|
884
|
-
|
|
919
|
+
return createParseResult({
|
|
885
920
|
manifest,
|
|
886
921
|
chunks,
|
|
887
922
|
docNav,
|
|
@@ -892,6 +927,136 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
892
927
|
hierarchy,
|
|
893
928
|
tocHierarchies,
|
|
894
929
|
kbCsv,
|
|
930
|
+
hierarchyViewHtml
|
|
931
|
+
});
|
|
932
|
+
}
|
|
933
|
+
async function parseResultDirectory(directory) {
|
|
934
|
+
const manifestContent = await readRequiredTextFile(directory, "manifest.json");
|
|
935
|
+
let manifest = JSON.parse(manifestContent);
|
|
936
|
+
manifest = keysToCamel(manifest);
|
|
937
|
+
manifest = parseDates(manifest);
|
|
938
|
+
const chunksContent = await readRequiredTextFile(directory, "chunks.json");
|
|
939
|
+
let chunksData = JSON.parse(chunksContent);
|
|
940
|
+
chunksData = keysToCamel(chunksData);
|
|
941
|
+
const rawChunks = extractChunks(chunksData);
|
|
942
|
+
const chunks = [];
|
|
943
|
+
for (const chunkData of rawChunks) {
|
|
944
|
+
chunks.push(await processDirectoryChunk(directory, chunkData));
|
|
945
|
+
}
|
|
946
|
+
const fullMarkdown = await readOptionalTextFile(directory, "full.md");
|
|
947
|
+
const rawDocNav = await readOptionalJsonFile(directory, "doc_nav.json");
|
|
948
|
+
const docNav = rawDocNav === void 0 ? void 0 : keysToCamel(rawDocNav);
|
|
949
|
+
const hierarchy = await readOptionalJsonFile(directory, "hierarchy.json");
|
|
950
|
+
const rawChunksSlim = await readOptionalJsonFile(directory, "chunks_slim.json");
|
|
951
|
+
const chunksSlim = rawChunksSlim === void 0 ? void 0 : extractSlimChunks(keysToCamel(rawChunksSlim));
|
|
952
|
+
const rawTocHierarchies = await readOptionalJsonFile(directory, "toc_hierarchies.json");
|
|
953
|
+
const tocHierarchies = rawTocHierarchies === void 0 ? void 0 : keysToCamel(rawTocHierarchies);
|
|
954
|
+
const kbCsv = await readOptionalTextFile(directory, "kb.csv");
|
|
955
|
+
const hierarchyViewHtml = await readOptionalTextFile(directory, "hierarchy_view.html");
|
|
956
|
+
return createParseResult({
|
|
957
|
+
manifest,
|
|
958
|
+
chunks,
|
|
959
|
+
docNav,
|
|
960
|
+
fullMarkdown,
|
|
961
|
+
rawZip: Buffer.alloc(0),
|
|
962
|
+
chunksSlim,
|
|
963
|
+
hierarchy,
|
|
964
|
+
tocHierarchies,
|
|
965
|
+
kbCsv,
|
|
966
|
+
hierarchyViewHtml
|
|
967
|
+
});
|
|
968
|
+
}
|
|
969
|
+
async function saveExpandedParseResult(result, directory) {
|
|
970
|
+
if (result.rawZip.length > 0) {
|
|
971
|
+
const didExtractZip = await tryExtractRawZip(result.rawZip, directory);
|
|
972
|
+
if (didExtractZip) {
|
|
973
|
+
return directory;
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
await import_fs2.promises.mkdir(directory, { recursive: true });
|
|
977
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "manifest.json"), JSON.stringify(result.manifest, null, 2));
|
|
978
|
+
if (result.docNav) {
|
|
979
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "doc_nav.json"), JSON.stringify(result.docNav, null, 2));
|
|
980
|
+
}
|
|
981
|
+
await import_fs2.promises.writeFile(
|
|
982
|
+
(0, import_path.join)(directory, "chunks.json"),
|
|
983
|
+
JSON.stringify(serializeChunks(result.chunks), null, 2)
|
|
984
|
+
);
|
|
985
|
+
if (result.chunksSlim) {
|
|
986
|
+
await import_fs2.promises.writeFile(
|
|
987
|
+
(0, import_path.join)(directory, "chunks_slim.json"),
|
|
988
|
+
JSON.stringify({ chunks: result.chunksSlim }, null, 2)
|
|
989
|
+
);
|
|
990
|
+
}
|
|
991
|
+
if (result.fullMarkdown) {
|
|
992
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "full.md"), result.fullMarkdown);
|
|
993
|
+
}
|
|
994
|
+
if (result.hierarchy) {
|
|
995
|
+
await import_fs2.promises.writeFile(
|
|
996
|
+
(0, import_path.join)(directory, "hierarchy.json"),
|
|
997
|
+
JSON.stringify(result.hierarchy, null, 2)
|
|
998
|
+
);
|
|
999
|
+
}
|
|
1000
|
+
if (result.tocHierarchies) {
|
|
1001
|
+
await import_fs2.promises.writeFile(
|
|
1002
|
+
(0, import_path.join)(directory, "toc_hierarchies.json"),
|
|
1003
|
+
JSON.stringify(result.tocHierarchies, null, 2)
|
|
1004
|
+
);
|
|
1005
|
+
}
|
|
1006
|
+
if (result.kbCsv) {
|
|
1007
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "kb.csv"), result.kbCsv);
|
|
1008
|
+
}
|
|
1009
|
+
if (result.hierarchyViewHtml) {
|
|
1010
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "hierarchy_view.html"), result.hierarchyViewHtml);
|
|
1011
|
+
}
|
|
1012
|
+
for (const imageChunk of result.imageChunks) {
|
|
1013
|
+
await writeBinaryAsset(directory, imageChunk.filePath, imageChunk.data);
|
|
1014
|
+
}
|
|
1015
|
+
for (const tableChunk of result.tableChunks) {
|
|
1016
|
+
await writeTextAsset(directory, tableChunk.filePath, tableChunk.html);
|
|
1017
|
+
}
|
|
1018
|
+
return directory;
|
|
1019
|
+
}
|
|
1020
|
+
async function tryExtractRawZip(zipBuffer, directory) {
|
|
1021
|
+
try {
|
|
1022
|
+
const zip = await import_jszip.default.loadAsync(zipBuffer);
|
|
1023
|
+
await import_fs2.promises.mkdir(directory, { recursive: true });
|
|
1024
|
+
for (const entry of Object.values(zip.files)) {
|
|
1025
|
+
if (entry.dir || entry.name === "result.zip") {
|
|
1026
|
+
continue;
|
|
1027
|
+
}
|
|
1028
|
+
const outputPath = resolveAssetPath(directory, entry.name);
|
|
1029
|
+
await import_fs2.promises.mkdir((0, import_path.dirname)(outputPath), { recursive: true });
|
|
1030
|
+
await import_fs2.promises.writeFile(outputPath, await entry.async("nodebuffer"));
|
|
1031
|
+
}
|
|
1032
|
+
return true;
|
|
1033
|
+
} catch {
|
|
1034
|
+
return false;
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
function createParseResult(parts) {
|
|
1038
|
+
const {
|
|
1039
|
+
manifest,
|
|
1040
|
+
chunks,
|
|
1041
|
+
docNav,
|
|
1042
|
+
fullMarkdown,
|
|
1043
|
+
rawZip,
|
|
1044
|
+
chunksSlim,
|
|
1045
|
+
hierarchy,
|
|
1046
|
+
tocHierarchies,
|
|
1047
|
+
kbCsv,
|
|
1048
|
+
hierarchyViewHtml
|
|
1049
|
+
} = parts;
|
|
1050
|
+
return {
|
|
1051
|
+
manifest,
|
|
1052
|
+
chunks,
|
|
1053
|
+
docNav,
|
|
1054
|
+
fullMarkdown,
|
|
1055
|
+
rawZip,
|
|
1056
|
+
chunksSlim,
|
|
1057
|
+
hierarchy,
|
|
1058
|
+
tocHierarchies,
|
|
1059
|
+
kbCsv,
|
|
895
1060
|
hierarchyViewHtml,
|
|
896
1061
|
get textChunks() {
|
|
897
1062
|
return chunks.filter((c) => c.type === "text");
|
|
@@ -948,11 +1113,10 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
948
1113
|
for (const tableChunk of this.tableChunks) {
|
|
949
1114
|
await tableChunk.save(directory);
|
|
950
1115
|
}
|
|
951
|
-
await import_fs2.promises.writeFile((0, import_path.join)(directory, "result.zip"),
|
|
1116
|
+
await import_fs2.promises.writeFile((0, import_path.join)(directory, "result.zip"), rawZip);
|
|
952
1117
|
return directory;
|
|
953
1118
|
}
|
|
954
1119
|
};
|
|
955
|
-
return result;
|
|
956
1120
|
}
|
|
957
1121
|
function extractChunks(payload) {
|
|
958
1122
|
if (Array.isArray(payload)) {
|
|
@@ -985,6 +1149,37 @@ function buildTextChunk(chunkData) {
|
|
|
985
1149
|
metadata: chunkData.metadata ?? {}
|
|
986
1150
|
};
|
|
987
1151
|
}
|
|
1152
|
+
function buildImageChunk(chunkData, filePath, imageBuffer) {
|
|
1153
|
+
return {
|
|
1154
|
+
chunkId: chunkData.chunkId ?? "",
|
|
1155
|
+
type: "image",
|
|
1156
|
+
content: chunkData.content ?? "",
|
|
1157
|
+
path: chunkData.path ?? "",
|
|
1158
|
+
filePath,
|
|
1159
|
+
data: imageBuffer,
|
|
1160
|
+
metadata: chunkData.metadata ?? {},
|
|
1161
|
+
get format() {
|
|
1162
|
+
return getFileExtension(this.filePath);
|
|
1163
|
+
},
|
|
1164
|
+
async save(directory) {
|
|
1165
|
+
return writeBinaryAsset(directory, this.filePath, this.data);
|
|
1166
|
+
}
|
|
1167
|
+
};
|
|
1168
|
+
}
|
|
1169
|
+
function buildTableChunk(chunkData, filePath, html) {
|
|
1170
|
+
return {
|
|
1171
|
+
chunkId: chunkData.chunkId ?? "",
|
|
1172
|
+
type: "table",
|
|
1173
|
+
content: chunkData.content ?? "",
|
|
1174
|
+
path: chunkData.path ?? "",
|
|
1175
|
+
filePath,
|
|
1176
|
+
html,
|
|
1177
|
+
metadata: chunkData.metadata ?? {},
|
|
1178
|
+
async save(directory) {
|
|
1179
|
+
return writeTextAsset(directory, this.filePath, this.html);
|
|
1180
|
+
}
|
|
1181
|
+
};
|
|
1182
|
+
}
|
|
988
1183
|
async function processChunk(zip, chunkData) {
|
|
989
1184
|
if (chunkData.type === "text") {
|
|
990
1185
|
return buildTextChunk(chunkData);
|
|
@@ -1000,26 +1195,7 @@ async function processChunk(zip, chunkData) {
|
|
|
1000
1195
|
throw new KnowhereError(`Image file not found: ${filePath}`);
|
|
1001
1196
|
}
|
|
1002
1197
|
const imageBuffer = await imageFile.async("nodebuffer");
|
|
1003
|
-
|
|
1004
|
-
chunkId: chunkData.chunkId ?? "",
|
|
1005
|
-
type: "image",
|
|
1006
|
-
content: chunkData.content ?? "",
|
|
1007
|
-
path: chunkData.path ?? "",
|
|
1008
|
-
filePath,
|
|
1009
|
-
data: imageBuffer,
|
|
1010
|
-
metadata: chunkData.metadata ?? {},
|
|
1011
|
-
get format() {
|
|
1012
|
-
return getFileExtension(this.filePath);
|
|
1013
|
-
},
|
|
1014
|
-
async save(directory) {
|
|
1015
|
-
const outputPath = (0, import_path.join)(directory, sanitizePath(this.filePath));
|
|
1016
|
-
const outputDir = (0, import_path.dirname)(outputPath);
|
|
1017
|
-
await import_fs2.promises.mkdir(outputDir, { recursive: true });
|
|
1018
|
-
await import_fs2.promises.writeFile(outputPath, this.data);
|
|
1019
|
-
return outputPath;
|
|
1020
|
-
}
|
|
1021
|
-
};
|
|
1022
|
-
return enrichedChunk;
|
|
1198
|
+
return buildImageChunk(chunkData, filePath, imageBuffer);
|
|
1023
1199
|
}
|
|
1024
1200
|
if (chunkData.type === "table") {
|
|
1025
1201
|
const filePath = getChunkFilePath(chunkData);
|
|
@@ -1032,26 +1208,112 @@ async function processChunk(zip, chunkData) {
|
|
|
1032
1208
|
throw new KnowhereError(`Table file not found: ${filePath}`);
|
|
1033
1209
|
}
|
|
1034
1210
|
const html = await htmlFile.async("string");
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1211
|
+
return buildTableChunk(chunkData, filePath, html);
|
|
1212
|
+
}
|
|
1213
|
+
return buildTextChunk(chunkData);
|
|
1214
|
+
}
|
|
1215
|
+
async function processDirectoryChunk(directory, chunkData) {
|
|
1216
|
+
if (chunkData.type === "text") {
|
|
1217
|
+
return buildTextChunk(chunkData);
|
|
1218
|
+
}
|
|
1219
|
+
if (chunkData.type === "image") {
|
|
1220
|
+
const filePath = getChunkFilePath(chunkData);
|
|
1221
|
+
if (!filePath) {
|
|
1222
|
+
throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
1223
|
+
}
|
|
1224
|
+
try {
|
|
1225
|
+
const imageBuffer = await import_fs2.promises.readFile(resolveAssetPath(directory, filePath));
|
|
1226
|
+
return buildImageChunk(chunkData, filePath, imageBuffer);
|
|
1227
|
+
} catch (error) {
|
|
1228
|
+
if (isMissingFileError(error)) {
|
|
1229
|
+
throw new KnowhereError(`Image file not found: ${filePath}`);
|
|
1049
1230
|
}
|
|
1050
|
-
|
|
1051
|
-
|
|
1231
|
+
throw error;
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
if (chunkData.type === "table") {
|
|
1235
|
+
const filePath = getChunkFilePath(chunkData);
|
|
1236
|
+
if (!filePath) {
|
|
1237
|
+
throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
1238
|
+
}
|
|
1239
|
+
try {
|
|
1240
|
+
const html = await import_fs2.promises.readFile(resolveAssetPath(directory, filePath), "utf8");
|
|
1241
|
+
return buildTableChunk(chunkData, filePath, html);
|
|
1242
|
+
} catch (error) {
|
|
1243
|
+
if (isMissingFileError(error)) {
|
|
1244
|
+
throw new KnowhereError(`Table file not found: ${filePath}`);
|
|
1245
|
+
}
|
|
1246
|
+
throw error;
|
|
1247
|
+
}
|
|
1052
1248
|
}
|
|
1053
1249
|
return buildTextChunk(chunkData);
|
|
1054
1250
|
}
|
|
1251
|
+
function serializeChunks(chunks) {
|
|
1252
|
+
return {
|
|
1253
|
+
chunks: chunks.map((chunk) => {
|
|
1254
|
+
const rawChunk = {
|
|
1255
|
+
chunkId: chunk.chunkId,
|
|
1256
|
+
type: chunk.type,
|
|
1257
|
+
content: chunk.content,
|
|
1258
|
+
path: chunk.path,
|
|
1259
|
+
metadata: chunk.metadata
|
|
1260
|
+
};
|
|
1261
|
+
if (chunk.type === "image" || chunk.type === "table") {
|
|
1262
|
+
rawChunk.filePath = chunk.filePath;
|
|
1263
|
+
}
|
|
1264
|
+
return rawChunk;
|
|
1265
|
+
})
|
|
1266
|
+
};
|
|
1267
|
+
}
|
|
1268
|
+
async function readRequiredTextFile(directory, fileName) {
|
|
1269
|
+
try {
|
|
1270
|
+
return await import_fs2.promises.readFile((0, import_path.join)(directory, fileName), "utf8");
|
|
1271
|
+
} catch (error) {
|
|
1272
|
+
if (isMissingFileError(error)) {
|
|
1273
|
+
throw new KnowhereError(`${fileName} not found in result directory`);
|
|
1274
|
+
}
|
|
1275
|
+
throw error;
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
async function readOptionalTextFile(directory, fileName) {
|
|
1279
|
+
try {
|
|
1280
|
+
return await import_fs2.promises.readFile((0, import_path.join)(directory, fileName), "utf8");
|
|
1281
|
+
} catch (error) {
|
|
1282
|
+
if (isMissingFileError(error)) {
|
|
1283
|
+
return void 0;
|
|
1284
|
+
}
|
|
1285
|
+
throw error;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
async function readOptionalJsonFile(directory, fileName) {
|
|
1289
|
+
const content = await readOptionalTextFile(directory, fileName);
|
|
1290
|
+
return content === void 0 ? void 0 : JSON.parse(content);
|
|
1291
|
+
}
|
|
1292
|
+
async function writeBinaryAsset(directory, filePath, data) {
|
|
1293
|
+
const outputPath = resolveAssetPath(directory, filePath);
|
|
1294
|
+
const outputDir = (0, import_path.dirname)(outputPath);
|
|
1295
|
+
await import_fs2.promises.mkdir(outputDir, { recursive: true });
|
|
1296
|
+
await import_fs2.promises.writeFile(outputPath, data);
|
|
1297
|
+
return outputPath;
|
|
1298
|
+
}
|
|
1299
|
+
async function writeTextAsset(directory, filePath, text) {
|
|
1300
|
+
const outputPath = resolveAssetPath(directory, filePath);
|
|
1301
|
+
const outputDir = (0, import_path.dirname)(outputPath);
|
|
1302
|
+
await import_fs2.promises.mkdir(outputDir, { recursive: true });
|
|
1303
|
+
await import_fs2.promises.writeFile(outputPath, text);
|
|
1304
|
+
return outputPath;
|
|
1305
|
+
}
|
|
1306
|
+
function resolveAssetPath(directory, filePath) {
|
|
1307
|
+
const root = (0, import_path.resolve)(directory);
|
|
1308
|
+
const outputPath = (0, import_path.resolve)(root, sanitizePath(filePath));
|
|
1309
|
+
if (outputPath !== root && !outputPath.startsWith(`${root}${import_path.sep}`)) {
|
|
1310
|
+
throw new KnowhereError(`Invalid result asset path: ${filePath}`);
|
|
1311
|
+
}
|
|
1312
|
+
return outputPath;
|
|
1313
|
+
}
|
|
1314
|
+
function isMissingFileError(error) {
|
|
1315
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
1316
|
+
}
|
|
1055
1317
|
|
|
1056
1318
|
// src/resources/jobs.ts
|
|
1057
1319
|
var Jobs = class extends BaseResource {
|
|
@@ -1060,11 +1322,7 @@ var Jobs = class extends BaseResource {
|
|
|
1060
1322
|
* Create a new parsing job
|
|
1061
1323
|
*/
|
|
1062
1324
|
async create(params) {
|
|
1063
|
-
const job = await this.httpClient.post(
|
|
1064
|
-
"/v1/jobs",
|
|
1065
|
-
params
|
|
1066
|
-
);
|
|
1067
|
-
delete job.documentId;
|
|
1325
|
+
const job = await this.httpClient.post("/v1/jobs", params);
|
|
1068
1326
|
if (job.uploadUrl) {
|
|
1069
1327
|
this.pendingUploadJobs.set(job.jobId, job);
|
|
1070
1328
|
}
|
|
@@ -1248,22 +1506,741 @@ var Documents = class extends BaseResource {
|
|
|
1248
1506
|
}
|
|
1249
1507
|
};
|
|
1250
1508
|
|
|
1509
|
+
// src/knowledge/local-store.ts
|
|
1510
|
+
var import_crypto = require("crypto");
|
|
1511
|
+
var import_os = __toESM(require("os"));
|
|
1512
|
+
var import_fs3 = require("fs");
|
|
1513
|
+
var import_path2 = __toESM(require("path"));
|
|
1514
|
+
var STORE_VERSION = 1;
|
|
1515
|
+
var LOCAL_DOCUMENT_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
|
|
1516
|
+
var LocalKnowledgeStore = class {
|
|
1517
|
+
cacheDirectory;
|
|
1518
|
+
indexPath;
|
|
1519
|
+
resultCache = /* @__PURE__ */ new Map();
|
|
1520
|
+
constructor(cacheDirectory) {
|
|
1521
|
+
this.cacheDirectory = cacheDirectory ?? import_path2.default.join(import_os.default.homedir(), ".knowhere-node-sdk", "knowledge");
|
|
1522
|
+
this.indexPath = import_path2.default.join(this.cacheDirectory, "index.json");
|
|
1523
|
+
}
|
|
1524
|
+
async saveResult(result, options) {
|
|
1525
|
+
await import_fs3.promises.mkdir(this.cacheDirectory, { recursive: true });
|
|
1526
|
+
const now = /* @__PURE__ */ new Date();
|
|
1527
|
+
const index = await this.readIndex();
|
|
1528
|
+
const localDocumentId = validateLocalDocumentId(
|
|
1529
|
+
options?.localDocumentId ?? createLocalDocumentId(result)
|
|
1530
|
+
);
|
|
1531
|
+
const resultDirectoryPath = this.getResultDirectoryPath(localDocumentId);
|
|
1532
|
+
await import_fs3.promises.rm(resultDirectoryPath, { recursive: true, force: true });
|
|
1533
|
+
await saveExpandedParseResult(result, resultDirectoryPath);
|
|
1534
|
+
this.resultCache.set(localDocumentId, result);
|
|
1535
|
+
const existing = index.documents.find(
|
|
1536
|
+
(document) => document.localDocumentId === localDocumentId
|
|
1537
|
+
);
|
|
1538
|
+
const stored = {
|
|
1539
|
+
localDocumentId,
|
|
1540
|
+
jobId: result.jobId,
|
|
1541
|
+
documentId: result.documentId,
|
|
1542
|
+
namespace: result.namespace,
|
|
1543
|
+
sourceFileName: result.manifest.sourceFileName,
|
|
1544
|
+
chunkCount: result.chunks.length,
|
|
1545
|
+
typeCounts: countChunkTypes(result),
|
|
1546
|
+
resultDirectoryPath,
|
|
1547
|
+
createdAt: existing?.createdAt ?? now.toISOString(),
|
|
1548
|
+
updatedAt: now.toISOString()
|
|
1549
|
+
};
|
|
1550
|
+
const nextDocuments = [
|
|
1551
|
+
stored,
|
|
1552
|
+
...index.documents.filter((document) => document.localDocumentId !== localDocumentId)
|
|
1553
|
+
];
|
|
1554
|
+
const asyncParseJobs = (index.asyncParseJobs ?? []).map(
|
|
1555
|
+
(job) => job.jobId === result.jobId ? {
|
|
1556
|
+
...job,
|
|
1557
|
+
localDocumentId,
|
|
1558
|
+
cacheStatus: "cached",
|
|
1559
|
+
updatedAt: now.toISOString()
|
|
1560
|
+
} : job
|
|
1561
|
+
);
|
|
1562
|
+
await this.writeIndex({
|
|
1563
|
+
version: STORE_VERSION,
|
|
1564
|
+
documents: nextDocuments,
|
|
1565
|
+
asyncParseJobs
|
|
1566
|
+
});
|
|
1567
|
+
return toLocalKnowledgeDocument(stored);
|
|
1568
|
+
}
|
|
1569
|
+
async saveAsyncParseJob(params) {
|
|
1570
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1571
|
+
const index = await this.readIndex();
|
|
1572
|
+
const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
|
|
1573
|
+
const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
|
|
1574
|
+
const stored = {
|
|
1575
|
+
jobId: params.jobId,
|
|
1576
|
+
localDocumentId: localDocumentId ?? existing?.localDocumentId,
|
|
1577
|
+
cacheStatus: existing?.cacheStatus ?? "pending",
|
|
1578
|
+
createdAt: existing?.createdAt ?? now,
|
|
1579
|
+
updatedAt: now
|
|
1580
|
+
};
|
|
1581
|
+
await this.writeIndex({
|
|
1582
|
+
version: STORE_VERSION,
|
|
1583
|
+
documents: index.documents,
|
|
1584
|
+
asyncParseJobs: [
|
|
1585
|
+
stored,
|
|
1586
|
+
...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
|
|
1587
|
+
]
|
|
1588
|
+
});
|
|
1589
|
+
}
|
|
1590
|
+
async getAsyncParseJob(jobId) {
|
|
1591
|
+
const index = await this.readIndex();
|
|
1592
|
+
return (index.asyncParseJobs ?? []).find((job) => job.jobId === jobId);
|
|
1593
|
+
}
|
|
1594
|
+
async listRecoverableAsyncParseJobs() {
|
|
1595
|
+
const index = await this.readIndex();
|
|
1596
|
+
return (index.asyncParseJobs ?? []).filter((job) => job.cacheStatus === "pending" || job.cacheStatus === "not_available").map(toLocalKnowledgeAsyncParseJob);
|
|
1597
|
+
}
|
|
1598
|
+
async updateAsyncParseJobCacheStatus(params) {
|
|
1599
|
+
const index = await this.readIndex();
|
|
1600
|
+
const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
|
|
1601
|
+
const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
|
|
1602
|
+
if (!existing) {
|
|
1603
|
+
return;
|
|
1604
|
+
}
|
|
1605
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1606
|
+
const stored = {
|
|
1607
|
+
...existing,
|
|
1608
|
+
localDocumentId: localDocumentId ?? existing.localDocumentId,
|
|
1609
|
+
cacheStatus: params.cacheStatus,
|
|
1610
|
+
updatedAt: now
|
|
1611
|
+
};
|
|
1612
|
+
await this.writeIndex({
|
|
1613
|
+
version: STORE_VERSION,
|
|
1614
|
+
documents: index.documents,
|
|
1615
|
+
asyncParseJobs: [
|
|
1616
|
+
stored,
|
|
1617
|
+
...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
|
|
1618
|
+
]
|
|
1619
|
+
});
|
|
1620
|
+
}
|
|
1621
|
+
async listDocuments() {
|
|
1622
|
+
const index = await this.readIndex();
|
|
1623
|
+
return index.documents.map(toLocalKnowledgeDocument);
|
|
1624
|
+
}
|
|
1625
|
+
async getDocument(localDocumentId) {
|
|
1626
|
+
validateLocalDocumentId(localDocumentId);
|
|
1627
|
+
const index = await this.readIndex();
|
|
1628
|
+
const stored = index.documents.find((document) => document.localDocumentId === localDocumentId);
|
|
1629
|
+
return stored ? toLocalKnowledgeDocument(stored) : void 0;
|
|
1630
|
+
}
|
|
1631
|
+
async loadResult(localDocumentId) {
|
|
1632
|
+
const document = await this.getDocument(localDocumentId);
|
|
1633
|
+
if (!document) {
|
|
1634
|
+
throw new Error(`Local Knowhere document not found: ${localDocumentId}`);
|
|
1635
|
+
}
|
|
1636
|
+
const cachedResult = this.resultCache.get(localDocumentId);
|
|
1637
|
+
if (cachedResult) {
|
|
1638
|
+
return { document, result: cachedResult };
|
|
1639
|
+
}
|
|
1640
|
+
const result = await this.loadStoredResult(document);
|
|
1641
|
+
result.namespace = document.namespace;
|
|
1642
|
+
result.documentId = document.documentId;
|
|
1643
|
+
this.resultCache.set(localDocumentId, result);
|
|
1644
|
+
return { document, result };
|
|
1645
|
+
}
|
|
1646
|
+
getResultDirectoryPath(localDocumentId) {
|
|
1647
|
+
const documentsDirectory = import_path2.default.resolve(this.cacheDirectory, "documents");
|
|
1648
|
+
const resultDirectoryPath = import_path2.default.resolve(documentsDirectory, localDocumentId);
|
|
1649
|
+
if (!isPathInsideDirectory(resultDirectoryPath, documentsDirectory)) {
|
|
1650
|
+
throw new Error(`Local Knowhere document ID resolves outside the cache: ${localDocumentId}`);
|
|
1651
|
+
}
|
|
1652
|
+
return resultDirectoryPath;
|
|
1653
|
+
}
|
|
1654
|
+
async loadStoredResult(document) {
|
|
1655
|
+
return parseResultDirectory(document.resultDirectoryPath);
|
|
1656
|
+
}
|
|
1657
|
+
async readIndex() {
|
|
1658
|
+
try {
|
|
1659
|
+
const raw = await import_fs3.promises.readFile(this.indexPath, "utf8");
|
|
1660
|
+
const parsed = JSON.parse(raw);
|
|
1661
|
+
if (parsed.version !== STORE_VERSION || !Array.isArray(parsed.documents)) {
|
|
1662
|
+
return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
|
|
1663
|
+
}
|
|
1664
|
+
return {
|
|
1665
|
+
version: STORE_VERSION,
|
|
1666
|
+
documents: parsed.documents,
|
|
1667
|
+
asyncParseJobs: Array.isArray(parsed.asyncParseJobs) ? parsed.asyncParseJobs : []
|
|
1668
|
+
};
|
|
1669
|
+
} catch (error) {
|
|
1670
|
+
if (isMissingFileError2(error)) {
|
|
1671
|
+
return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
|
|
1672
|
+
}
|
|
1673
|
+
throw error;
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
async writeIndex(index) {
|
|
1677
|
+
await import_fs3.promises.mkdir(this.cacheDirectory, { recursive: true });
|
|
1678
|
+
await import_fs3.promises.writeFile(this.indexPath, JSON.stringify(index, null, 2));
|
|
1679
|
+
}
|
|
1680
|
+
};
|
|
1681
|
+
function validateLocalDocumentId(localDocumentId) {
|
|
1682
|
+
if (!LOCAL_DOCUMENT_ID_PATTERN.test(localDocumentId) || localDocumentId.includes("..") || import_path2.default.basename(localDocumentId) !== localDocumentId) {
|
|
1683
|
+
throw new Error(
|
|
1684
|
+
"Local Knowhere document ID must be a safe slug containing only letters, numbers, dots, underscores, or hyphens"
|
|
1685
|
+
);
|
|
1686
|
+
}
|
|
1687
|
+
return localDocumentId;
|
|
1688
|
+
}
|
|
1689
|
+
function isPathInsideDirectory(targetPath, parentDirectory) {
|
|
1690
|
+
const relativePath = import_path2.default.relative(parentDirectory, targetPath);
|
|
1691
|
+
return relativePath.length === 0 || !relativePath.startsWith("..") && !import_path2.default.isAbsolute(relativePath);
|
|
1692
|
+
}
|
|
1693
|
+
function createLocalDocumentId(result) {
|
|
1694
|
+
const hash = (0, import_crypto.createHash)("sha256").update(result.jobId).update("\0").update(result.manifest.sourceFileName).digest("hex").slice(0, 16);
|
|
1695
|
+
return `local_${hash}`;
|
|
1696
|
+
}
|
|
1697
|
+
function countChunkTypes(result) {
|
|
1698
|
+
return result.chunks.reduce(
|
|
1699
|
+
(counts, chunk) => {
|
|
1700
|
+
counts[chunk.type] += 1;
|
|
1701
|
+
return counts;
|
|
1702
|
+
},
|
|
1703
|
+
{ text: 0, image: 0, table: 0 }
|
|
1704
|
+
);
|
|
1705
|
+
}
|
|
1706
|
+
function toLocalKnowledgeDocument(stored) {
|
|
1707
|
+
return {
|
|
1708
|
+
localDocumentId: stored.localDocumentId,
|
|
1709
|
+
jobId: stored.jobId,
|
|
1710
|
+
documentId: stored.documentId,
|
|
1711
|
+
namespace: stored.namespace,
|
|
1712
|
+
sourceFileName: stored.sourceFileName,
|
|
1713
|
+
chunkCount: stored.chunkCount,
|
|
1714
|
+
typeCounts: stored.typeCounts,
|
|
1715
|
+
resultDirectoryPath: stored.resultDirectoryPath,
|
|
1716
|
+
createdAt: new Date(stored.createdAt),
|
|
1717
|
+
updatedAt: new Date(stored.updatedAt)
|
|
1718
|
+
};
|
|
1719
|
+
}
|
|
1720
|
+
function toLocalKnowledgeAsyncParseJob(stored) {
|
|
1721
|
+
return {
|
|
1722
|
+
...stored,
|
|
1723
|
+
createdAt: new Date(stored.createdAt),
|
|
1724
|
+
updatedAt: new Date(stored.updatedAt)
|
|
1725
|
+
};
|
|
1726
|
+
}
|
|
1727
|
+
function isMissingFileError2(error) {
|
|
1728
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
// src/knowledge/knowledge.ts
|
|
1732
|
+
var DEFAULT_READ_LIMIT = 12;
|
|
1733
|
+
var MAX_READ_LIMIT = 40;
|
|
1734
|
+
var DEFAULT_GREP_LIMIT = 20;
|
|
1735
|
+
var MAX_GREP_LIMIT = 50;
|
|
1736
|
+
var DEFAULT_CONTEXT_CHARS = 80;
|
|
1737
|
+
var Knowledge = class _Knowledge {
|
|
1738
|
+
client;
|
|
1739
|
+
store;
|
|
1740
|
+
constructor(client, options) {
|
|
1741
|
+
this.client = client;
|
|
1742
|
+
this.store = new LocalKnowledgeStore(options?.cacheDirectory);
|
|
1743
|
+
}
|
|
1744
|
+
withCacheDirectory(cacheDirectory) {
|
|
1745
|
+
return new _Knowledge(this.client, { cacheDirectory });
|
|
1746
|
+
}
|
|
1747
|
+
async parse(params) {
|
|
1748
|
+
const result = await this.client.parse(params);
|
|
1749
|
+
const document = await this.store.saveResult(result, {
|
|
1750
|
+
localDocumentId: params.localDocumentId
|
|
1751
|
+
});
|
|
1752
|
+
return { document, result };
|
|
1753
|
+
}
|
|
1754
|
+
async startParse(params) {
|
|
1755
|
+
const job = await this.client.startParse(params);
|
|
1756
|
+
await this.store.saveAsyncParseJob({
|
|
1757
|
+
jobId: job.jobId,
|
|
1758
|
+
localDocumentId: params.localDocumentId
|
|
1759
|
+
});
|
|
1760
|
+
return {
|
|
1761
|
+
job,
|
|
1762
|
+
localDocumentId: params.localDocumentId
|
|
1763
|
+
};
|
|
1764
|
+
}
|
|
1765
|
+
async getJobStatus(jobId) {
|
|
1766
|
+
const job = await this.client.jobs.get(jobId);
|
|
1767
|
+
return {
|
|
1768
|
+
job,
|
|
1769
|
+
cache: await this.resolveAsyncCache(jobId, job.isDone, job.isFailed)
|
|
1770
|
+
};
|
|
1771
|
+
}
|
|
1772
|
+
async recoverPendingAsyncParseJobs() {
|
|
1773
|
+
const jobs = await this.store.listRecoverableAsyncParseJobs();
|
|
1774
|
+
const results = [];
|
|
1775
|
+
for (const job of jobs) {
|
|
1776
|
+
results.push(await this.getJobStatus(job.jobId));
|
|
1777
|
+
}
|
|
1778
|
+
return {
|
|
1779
|
+
checkedJobs: jobs.length,
|
|
1780
|
+
results
|
|
1781
|
+
};
|
|
1782
|
+
}
|
|
1783
|
+
async cacheJobResult(params) {
|
|
1784
|
+
const result = await this.client.jobs.load(params.jobId, {
|
|
1785
|
+
verifyChecksum: params.verifyChecksum
|
|
1786
|
+
});
|
|
1787
|
+
const document = await this.store.saveResult(result, {
|
|
1788
|
+
localDocumentId: params.localDocumentId
|
|
1789
|
+
});
|
|
1790
|
+
return { document, result };
|
|
1791
|
+
}
|
|
1792
|
+
async resolveAsyncCache(jobId, isDone, isFailed) {
|
|
1793
|
+
const trackedJob = await this.store.getAsyncParseJob(jobId);
|
|
1794
|
+
if (!trackedJob) {
|
|
1795
|
+
return { status: "untracked" };
|
|
1796
|
+
}
|
|
1797
|
+
if (trackedJob.cacheStatus === "cached" && trackedJob.localDocumentId) {
|
|
1798
|
+
const existingDocument = await this.store.getDocument(trackedJob.localDocumentId);
|
|
1799
|
+
if (existingDocument) {
|
|
1800
|
+
return {
|
|
1801
|
+
status: "already_cached",
|
|
1802
|
+
localDocumentId: trackedJob.localDocumentId,
|
|
1803
|
+
document: existingDocument
|
|
1804
|
+
};
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
if (isFailed) {
|
|
1808
|
+
await this.store.updateAsyncParseJobCacheStatus({
|
|
1809
|
+
jobId,
|
|
1810
|
+
cacheStatus: "failed"
|
|
1811
|
+
});
|
|
1812
|
+
return {
|
|
1813
|
+
status: "failed",
|
|
1814
|
+
localDocumentId: trackedJob.localDocumentId
|
|
1815
|
+
};
|
|
1816
|
+
}
|
|
1817
|
+
if (!isDone) {
|
|
1818
|
+
return {
|
|
1819
|
+
status: "pending",
|
|
1820
|
+
localDocumentId: trackedJob.localDocumentId
|
|
1821
|
+
};
|
|
1822
|
+
}
|
|
1823
|
+
try {
|
|
1824
|
+
const cached = await this.cacheJobResult({
|
|
1825
|
+
jobId,
|
|
1826
|
+
localDocumentId: trackedJob.localDocumentId
|
|
1827
|
+
});
|
|
1828
|
+
return {
|
|
1829
|
+
status: "cached",
|
|
1830
|
+
localDocumentId: cached.document.localDocumentId,
|
|
1831
|
+
document: cached.document
|
|
1832
|
+
};
|
|
1833
|
+
} catch (error) {
|
|
1834
|
+
await this.store.updateAsyncParseJobCacheStatus({
|
|
1835
|
+
jobId,
|
|
1836
|
+
cacheStatus: "not_available"
|
|
1837
|
+
});
|
|
1838
|
+
return {
|
|
1839
|
+
status: "not_available",
|
|
1840
|
+
localDocumentId: trackedJob.localDocumentId,
|
|
1841
|
+
error: error instanceof Error ? error.message : String(error)
|
|
1842
|
+
};
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
async listDocuments() {
|
|
1846
|
+
return this.store.listDocuments();
|
|
1847
|
+
}
|
|
1848
|
+
async getDocumentOutline(localDocumentId) {
|
|
1849
|
+
const { document, result } = await this.store.loadResult(localDocumentId);
|
|
1850
|
+
const chunks = indexChunks(result);
|
|
1851
|
+
const sections = buildFlatSections(result, chunks);
|
|
1852
|
+
const sectionTree = result.docNav?.sections && result.docNav.sections.length > 0 ? result.docNav.sections.map(
|
|
1853
|
+
(section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
|
|
1854
|
+
) : nestSections(sections);
|
|
1855
|
+
return {
|
|
1856
|
+
document,
|
|
1857
|
+
totalChunks: chunks.length,
|
|
1858
|
+
typeCounts: document.typeCounts,
|
|
1859
|
+
sections,
|
|
1860
|
+
sectionTree
|
|
1861
|
+
};
|
|
1862
|
+
}
|
|
1863
|
+
async readChunks(params) {
|
|
1864
|
+
const { document, result } = await this.store.loadResult(params.localDocumentId);
|
|
1865
|
+
const limit = clampLimit(params.limit, DEFAULT_READ_LIMIT, MAX_READ_LIMIT);
|
|
1866
|
+
const chunks = indexChunks(result).filter((chunk) => matchesReadScope(chunk, params));
|
|
1867
|
+
const selected = selectReadWindow(chunks, params, limit);
|
|
1868
|
+
const lastSelected = selected[selected.length - 1];
|
|
1869
|
+
const nextChunk = lastSelected && selected.length < chunks.length ? chunks[chunks.indexOf(lastSelected) + 1]?.position : void 0;
|
|
1870
|
+
return {
|
|
1871
|
+
document,
|
|
1872
|
+
chunks: selected.map(toReadChunk),
|
|
1873
|
+
nextChunk
|
|
1874
|
+
};
|
|
1875
|
+
}
|
|
1876
|
+
async grepChunks(params) {
|
|
1877
|
+
if (!params.pattern) {
|
|
1878
|
+
throw new ValidationError("pattern is required");
|
|
1879
|
+
}
|
|
1880
|
+
const { document, result } = await this.store.loadResult(params.localDocumentId);
|
|
1881
|
+
const maxResults = clampLimit(params.maxResults, DEFAULT_GREP_LIMIT, MAX_GREP_LIMIT);
|
|
1882
|
+
const contextChars = params.contextChars ?? DEFAULT_CONTEXT_CHARS;
|
|
1883
|
+
const matcher = createMatcher(params);
|
|
1884
|
+
const scopedChunks = indexChunks(result).filter((chunk) => matchesGrepScope(chunk, params));
|
|
1885
|
+
const matches = [];
|
|
1886
|
+
let scannedChunks = 0;
|
|
1887
|
+
for (const chunk of scopedChunks) {
|
|
1888
|
+
scannedChunks += 1;
|
|
1889
|
+
const chunkMatches = matcher(chunk.content);
|
|
1890
|
+
for (const match of chunkMatches) {
|
|
1891
|
+
matches.push({
|
|
1892
|
+
position: chunk.position,
|
|
1893
|
+
chunkId: chunk.chunkId,
|
|
1894
|
+
chunkType: chunk.chunkType,
|
|
1895
|
+
sectionPath: chunk.sectionPath,
|
|
1896
|
+
sourceChunkPath: chunk.sourceChunkPath,
|
|
1897
|
+
filePath: chunk.filePath,
|
|
1898
|
+
startOffset: match.startOffset,
|
|
1899
|
+
endOffset: match.endOffset,
|
|
1900
|
+
snippet: buildSnippet(chunk.content, match.startOffset, match.endOffset, contextChars)
|
|
1901
|
+
});
|
|
1902
|
+
if (matches.length >= maxResults) {
|
|
1903
|
+
return { document, matches, scannedChunks, truncated: true };
|
|
1904
|
+
}
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
return { document, matches, scannedChunks, truncated: false };
|
|
1908
|
+
}
|
|
1909
|
+
async search(params) {
|
|
1910
|
+
const localDocuments = await this.resolveSearchDocuments(params.localDocumentIds);
|
|
1911
|
+
const rawResponse = await this.client.retrieval.query({
|
|
1912
|
+
query: params.query,
|
|
1913
|
+
namespace: params.namespace,
|
|
1914
|
+
topK: params.topK,
|
|
1915
|
+
useAgentic: params.useAgentic ?? false
|
|
1916
|
+
});
|
|
1917
|
+
const documentByServerId = new Map(
|
|
1918
|
+
localDocuments.filter((document) => document.documentId).map((document) => [document.documentId, document])
|
|
1919
|
+
);
|
|
1920
|
+
return {
|
|
1921
|
+
namespace: rawResponse.namespace,
|
|
1922
|
+
query: rawResponse.query,
|
|
1923
|
+
evidenceText: rawResponse.evidenceText,
|
|
1924
|
+
references: [
|
|
1925
|
+
...rawResponse.referencedChunks.map(
|
|
1926
|
+
(reference) => ({
|
|
1927
|
+
localDocumentId: reference.documentId ? documentByServerId.get(reference.documentId)?.localDocumentId : void 0,
|
|
1928
|
+
documentId: reference.documentId,
|
|
1929
|
+
chunkId: reference.chunkId,
|
|
1930
|
+
sectionPath: reference.sectionPath,
|
|
1931
|
+
chunkType: reference.chunkType
|
|
1932
|
+
})
|
|
1933
|
+
),
|
|
1934
|
+
...rawResponse.results.map((result) => toResultReference(result, documentByServerId))
|
|
1935
|
+
],
|
|
1936
|
+
results: rawResponse.results.map(
|
|
1937
|
+
(result) => toRemoteSearchResult(result, documentByServerId)
|
|
1938
|
+
),
|
|
1939
|
+
rawResponse
|
|
1940
|
+
};
|
|
1941
|
+
}
|
|
1942
|
+
async resolveSearchDocuments(localDocumentIds) {
|
|
1943
|
+
const documents = await this.store.listDocuments();
|
|
1944
|
+
if (!localDocumentIds || localDocumentIds.length === 0) {
|
|
1945
|
+
return documents;
|
|
1946
|
+
}
|
|
1947
|
+
const requested = new Set(localDocumentIds);
|
|
1948
|
+
return documents.filter((document) => requested.has(document.localDocumentId));
|
|
1949
|
+
}
|
|
1950
|
+
};
|
|
1951
|
+
function indexChunks(result) {
|
|
1952
|
+
return result.chunks.map((chunk, index) => {
|
|
1953
|
+
const filePath = getChunkFilePath2(chunk);
|
|
1954
|
+
return {
|
|
1955
|
+
source: chunk,
|
|
1956
|
+
position: index + 1,
|
|
1957
|
+
chunkId: chunk.chunkId,
|
|
1958
|
+
chunkType: chunk.type,
|
|
1959
|
+
content: chunk.content,
|
|
1960
|
+
sectionPath: normalizeSectionPath(chunk.path, result.manifest.sourceFileName),
|
|
1961
|
+
sourceChunkPath: chunk.path,
|
|
1962
|
+
filePath,
|
|
1963
|
+
metadata: chunk.metadata
|
|
1964
|
+
};
|
|
1965
|
+
});
|
|
1966
|
+
}
|
|
1967
|
+
function getChunkFilePath2(chunk) {
|
|
1968
|
+
if (chunk.type === "image" || chunk.type === "table") {
|
|
1969
|
+
return chunk.filePath;
|
|
1970
|
+
}
|
|
1971
|
+
const filePath = chunk.metadata.filePath;
|
|
1972
|
+
return typeof filePath === "string" ? filePath : void 0;
|
|
1973
|
+
}
|
|
1974
|
+
function normalizeSectionPath(path3, sourceFileName) {
|
|
1975
|
+
if (!path3) {
|
|
1976
|
+
return "";
|
|
1977
|
+
}
|
|
1978
|
+
if (path3.startsWith("images/") || path3.startsWith("tables/")) {
|
|
1979
|
+
return path3;
|
|
1980
|
+
}
|
|
1981
|
+
const parts = path3.split("/").filter(Boolean);
|
|
1982
|
+
if (sourceFileName) {
|
|
1983
|
+
const fileNameIndex = parts.indexOf(sourceFileName);
|
|
1984
|
+
if (fileNameIndex >= 0) {
|
|
1985
|
+
return parts.slice(fileNameIndex + 1).join(" / ") || sourceFileName;
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
if (parts.length <= 1) {
|
|
1989
|
+
return parts[0] ?? "";
|
|
1990
|
+
}
|
|
1991
|
+
return parts.slice(1).join(" / ");
|
|
1992
|
+
}
|
|
1993
|
+
function buildFlatSections(result, chunks) {
|
|
1994
|
+
if (result.docNav?.sections && result.docNav.sections.length > 0) {
|
|
1995
|
+
return flattenSections(
|
|
1996
|
+
result.docNav.sections.map(
|
|
1997
|
+
(section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
|
|
1998
|
+
)
|
|
1999
|
+
);
|
|
2000
|
+
}
|
|
2001
|
+
const byPath = /* @__PURE__ */ new Map();
|
|
2002
|
+
for (const chunk of chunks) {
|
|
2003
|
+
const path3 = chunk.sectionPath || chunk.sourceChunkPath;
|
|
2004
|
+
const existing = byPath.get(path3);
|
|
2005
|
+
if (existing) {
|
|
2006
|
+
addChunkToSection(existing, chunk);
|
|
2007
|
+
} else {
|
|
2008
|
+
byPath.set(path3, createSectionFromChunk(path3, chunk));
|
|
2009
|
+
}
|
|
2010
|
+
}
|
|
2011
|
+
return [...byPath.values()].sort(compareSections);
|
|
2012
|
+
}
|
|
2013
|
+
function toKnowledgeSection(section, chunks, sourceFileName) {
|
|
2014
|
+
const sectionPath = normalizeSectionPath(section.path, sourceFileName);
|
|
2015
|
+
const scopedChunks = chunks.filter((chunk) => isInSection(chunk.sectionPath, sectionPath));
|
|
2016
|
+
const children = section.children.map(
|
|
2017
|
+
(child) => toKnowledgeSection(child, chunks, sourceFileName)
|
|
2018
|
+
);
|
|
2019
|
+
return {
|
|
2020
|
+
sectionPath,
|
|
2021
|
+
sectionTitle: section.title,
|
|
2022
|
+
sectionLevel: section.level,
|
|
2023
|
+
summary: section.summary,
|
|
2024
|
+
startChunk: minPosition(scopedChunks),
|
|
2025
|
+
endChunk: maxPosition(scopedChunks),
|
|
2026
|
+
chunkCount: scopedChunks.length,
|
|
2027
|
+
typeCounts: countIndexedTypes(scopedChunks),
|
|
2028
|
+
children
|
|
2029
|
+
};
|
|
2030
|
+
}
|
|
2031
|
+
function createSectionFromChunk(pathValue, chunk) {
|
|
2032
|
+
const parts = pathValue.split(" / ").filter(Boolean);
|
|
2033
|
+
return {
|
|
2034
|
+
sectionPath: pathValue,
|
|
2035
|
+
sectionTitle: parts[parts.length - 1] ?? pathValue,
|
|
2036
|
+
sectionLevel: Math.max(parts.length, 1),
|
|
2037
|
+
startChunk: chunk.position,
|
|
2038
|
+
endChunk: chunk.position,
|
|
2039
|
+
chunkCount: 1,
|
|
2040
|
+
typeCounts: { text: 0, image: 0, table: 0, [chunk.chunkType]: 1 },
|
|
2041
|
+
children: []
|
|
2042
|
+
};
|
|
2043
|
+
}
|
|
2044
|
+
function addChunkToSection(section, chunk) {
|
|
2045
|
+
section.startChunk = Math.min(section.startChunk ?? chunk.position, chunk.position);
|
|
2046
|
+
section.endChunk = Math.max(section.endChunk ?? chunk.position, chunk.position);
|
|
2047
|
+
section.chunkCount += 1;
|
|
2048
|
+
section.typeCounts[chunk.chunkType] += 1;
|
|
2049
|
+
}
|
|
2050
|
+
function flattenSections(sections) {
|
|
2051
|
+
return sections.flatMap((section) => [section, ...flattenSections(section.children)]);
|
|
2052
|
+
}
|
|
2053
|
+
function nestSections(sections) {
|
|
2054
|
+
const clonedSections = sections.map((section) => ({
|
|
2055
|
+
...section,
|
|
2056
|
+
children: []
|
|
2057
|
+
}));
|
|
2058
|
+
const byPath = new Map(clonedSections.map((section) => [section.sectionPath, section]));
|
|
2059
|
+
const roots = [];
|
|
2060
|
+
for (const section of clonedSections) {
|
|
2061
|
+
const parentPath = getParentSectionPath(section.sectionPath);
|
|
2062
|
+
const parent = parentPath ? byPath.get(parentPath) : void 0;
|
|
2063
|
+
if (parent) {
|
|
2064
|
+
parent.children.push(section);
|
|
2065
|
+
} else {
|
|
2066
|
+
roots.push(section);
|
|
2067
|
+
}
|
|
2068
|
+
}
|
|
2069
|
+
return roots;
|
|
2070
|
+
}
|
|
2071
|
+
function getParentSectionPath(sectionPath) {
|
|
2072
|
+
const parts = sectionPath.split(" / ").filter(Boolean);
|
|
2073
|
+
if (parts.length <= 1) {
|
|
2074
|
+
return void 0;
|
|
2075
|
+
}
|
|
2076
|
+
return parts.slice(0, -1).join(" / ");
|
|
2077
|
+
}
|
|
2078
|
+
function compareSections(left, right) {
|
|
2079
|
+
return (left.startChunk ?? Number.MAX_SAFE_INTEGER) - (right.startChunk ?? Number.MAX_SAFE_INTEGER);
|
|
2080
|
+
}
|
|
2081
|
+
function minPosition(chunks) {
|
|
2082
|
+
if (chunks.length === 0) {
|
|
2083
|
+
return void 0;
|
|
2084
|
+
}
|
|
2085
|
+
return Math.min(...chunks.map((chunk) => chunk.position));
|
|
2086
|
+
}
|
|
2087
|
+
function maxPosition(chunks) {
|
|
2088
|
+
if (chunks.length === 0) {
|
|
2089
|
+
return void 0;
|
|
2090
|
+
}
|
|
2091
|
+
return Math.max(...chunks.map((chunk) => chunk.position));
|
|
2092
|
+
}
|
|
2093
|
+
function countIndexedTypes(chunks) {
|
|
2094
|
+
return chunks.reduce(
|
|
2095
|
+
(counts, chunk) => {
|
|
2096
|
+
counts[chunk.chunkType] += 1;
|
|
2097
|
+
return counts;
|
|
2098
|
+
},
|
|
2099
|
+
{ text: 0, image: 0, table: 0 }
|
|
2100
|
+
);
|
|
2101
|
+
}
|
|
2102
|
+
function isInSection(chunkSectionPath, sectionPath) {
|
|
2103
|
+
return chunkSectionPath === sectionPath || chunkSectionPath.startsWith(`${sectionPath} / `);
|
|
2104
|
+
}
|
|
2105
|
+
function clampLimit(value, defaultValue, maxValue) {
|
|
2106
|
+
if (value === void 0) {
|
|
2107
|
+
return defaultValue;
|
|
2108
|
+
}
|
|
2109
|
+
return Math.min(Math.max(Math.floor(value), 1), maxValue);
|
|
2110
|
+
}
|
|
2111
|
+
function matchesReadScope(chunk, params) {
|
|
2112
|
+
if (params.chunkId && chunk.chunkId !== params.chunkId) {
|
|
2113
|
+
return false;
|
|
2114
|
+
}
|
|
2115
|
+
if (params.chunkType && chunk.chunkType !== params.chunkType) {
|
|
2116
|
+
return false;
|
|
2117
|
+
}
|
|
2118
|
+
if (params.sectionPath && !isInSection(chunk.sectionPath, params.sectionPath)) {
|
|
2119
|
+
return false;
|
|
2120
|
+
}
|
|
2121
|
+
return true;
|
|
2122
|
+
}
|
|
2123
|
+
function selectReadWindow(chunks, params, limit) {
|
|
2124
|
+
if (params.chunkId) {
|
|
2125
|
+
return chunks.slice(0, limit);
|
|
2126
|
+
}
|
|
2127
|
+
const startChunk = params.startChunk ?? chunks[0]?.position ?? 1;
|
|
2128
|
+
const endChunk = params.endChunk ?? Number.MAX_SAFE_INTEGER;
|
|
2129
|
+
return chunks.filter((chunk) => chunk.position >= startChunk && chunk.position <= endChunk).slice(0, limit);
|
|
2130
|
+
}
|
|
2131
|
+
function toReadChunk(chunk) {
|
|
2132
|
+
return {
|
|
2133
|
+
position: chunk.position,
|
|
2134
|
+
chunkId: chunk.chunkId,
|
|
2135
|
+
chunkType: chunk.chunkType,
|
|
2136
|
+
content: chunk.content,
|
|
2137
|
+
sectionPath: chunk.sectionPath,
|
|
2138
|
+
sourceChunkPath: chunk.sourceChunkPath,
|
|
2139
|
+
filePath: chunk.filePath,
|
|
2140
|
+
metadata: chunk.metadata
|
|
2141
|
+
};
|
|
2142
|
+
}
|
|
2143
|
+
function matchesGrepScope(chunk, params) {
|
|
2144
|
+
if (params.chunkType && chunk.chunkType !== params.chunkType) {
|
|
2145
|
+
return false;
|
|
2146
|
+
}
|
|
2147
|
+
if (params.sectionPathPrefix && !chunk.sectionPath.startsWith(params.sectionPathPrefix)) {
|
|
2148
|
+
return false;
|
|
2149
|
+
}
|
|
2150
|
+
return true;
|
|
2151
|
+
}
|
|
2152
|
+
function createMatcher(params) {
|
|
2153
|
+
if (params.isRegex) {
|
|
2154
|
+
const flags = params.isCaseSensitive ? "g" : "gi";
|
|
2155
|
+
const regex = new RegExp(params.pattern, flags);
|
|
2156
|
+
return (content) => {
|
|
2157
|
+
const matches = [];
|
|
2158
|
+
for (const match of content.matchAll(regex)) {
|
|
2159
|
+
const startOffset = match.index ?? 0;
|
|
2160
|
+
const text = match[0] ?? "";
|
|
2161
|
+
matches.push({ startOffset, endOffset: startOffset + text.length });
|
|
2162
|
+
if (text.length === 0) {
|
|
2163
|
+
break;
|
|
2164
|
+
}
|
|
2165
|
+
}
|
|
2166
|
+
return matches;
|
|
2167
|
+
};
|
|
2168
|
+
}
|
|
2169
|
+
const needle = params.isCaseSensitive ? params.pattern : params.pattern.toLowerCase();
|
|
2170
|
+
return (content) => {
|
|
2171
|
+
const haystack = params.isCaseSensitive ? content : content.toLowerCase();
|
|
2172
|
+
const matches = [];
|
|
2173
|
+
let index = haystack.indexOf(needle);
|
|
2174
|
+
while (index >= 0) {
|
|
2175
|
+
matches.push({ startOffset: index, endOffset: index + needle.length });
|
|
2176
|
+
index = haystack.indexOf(needle, index + Math.max(needle.length, 1));
|
|
2177
|
+
}
|
|
2178
|
+
return matches;
|
|
2179
|
+
};
|
|
2180
|
+
}
|
|
2181
|
+
function buildSnippet(content, startOffset, endOffset, contextChars) {
|
|
2182
|
+
const start = Math.max(0, startOffset - contextChars);
|
|
2183
|
+
const end = Math.min(content.length, endOffset + contextChars);
|
|
2184
|
+
return content.slice(start, end);
|
|
2185
|
+
}
|
|
2186
|
+
function toResultReference(result, documentByServerId) {
|
|
2187
|
+
const documentId = result.source.documentId ?? void 0;
|
|
2188
|
+
return {
|
|
2189
|
+
localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
|
|
2190
|
+
documentId,
|
|
2191
|
+
sectionPath: result.source.sectionPath ?? void 0,
|
|
2192
|
+
chunkType: result.chunkType,
|
|
2193
|
+
score: result.score
|
|
2194
|
+
};
|
|
2195
|
+
}
|
|
2196
|
+
function toRemoteSearchResult(result, documentByServerId) {
|
|
2197
|
+
const documentId = result.source.documentId ?? void 0;
|
|
2198
|
+
return {
|
|
2199
|
+
localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
|
|
2200
|
+
documentId,
|
|
2201
|
+
chunkType: result.chunkType,
|
|
2202
|
+
content: result.content,
|
|
2203
|
+
score: result.score,
|
|
2204
|
+
sectionPath: result.source.sectionPath ?? void 0,
|
|
2205
|
+
sourceFileName: result.source.sourceFileName ?? void 0
|
|
2206
|
+
};
|
|
2207
|
+
}
|
|
2208
|
+
|
|
1251
2209
|
// src/client.ts
|
|
1252
2210
|
function inferFileName(file, explicitFileName) {
|
|
1253
2211
|
if (explicitFileName) {
|
|
1254
2212
|
return explicitFileName;
|
|
1255
2213
|
}
|
|
1256
2214
|
if (typeof file === "string") {
|
|
1257
|
-
return
|
|
2215
|
+
return import_path3.default.basename(file);
|
|
1258
2216
|
}
|
|
1259
2217
|
if (isReadStream2(file) && typeof file.path === "string") {
|
|
1260
|
-
return
|
|
2218
|
+
return import_path3.default.basename(file.path);
|
|
1261
2219
|
}
|
|
1262
2220
|
return void 0;
|
|
1263
2221
|
}
|
|
1264
2222
|
function isReadStream2(file) {
|
|
1265
2223
|
return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
|
|
1266
2224
|
}
|
|
2225
|
+
function buildParsingParams(params) {
|
|
2226
|
+
const parsingParams = {
|
|
2227
|
+
model: params.model,
|
|
2228
|
+
ocrEnabled: params.ocr,
|
|
2229
|
+
docType: params.docType,
|
|
2230
|
+
smartTitleParse: params.smartTitleParse,
|
|
2231
|
+
summaryImage: params.summaryImage,
|
|
2232
|
+
summaryTable: params.summaryTable,
|
|
2233
|
+
summaryTxt: params.summaryTxt,
|
|
2234
|
+
addFragDesc: params.addFragDesc,
|
|
2235
|
+
kbDir: params.kbDir
|
|
2236
|
+
};
|
|
2237
|
+
Object.keys(parsingParams).forEach((key) => {
|
|
2238
|
+
if (parsingParams[key] === void 0) {
|
|
2239
|
+
delete parsingParams[key];
|
|
2240
|
+
}
|
|
2241
|
+
});
|
|
2242
|
+
return Object.keys(parsingParams).length > 0 ? parsingParams : void 0;
|
|
2243
|
+
}
|
|
1267
2244
|
var Knowhere = class {
|
|
1268
2245
|
/** Jobs resource for low-level API */
|
|
1269
2246
|
jobs;
|
|
@@ -1271,21 +2248,25 @@ var Knowhere = class {
|
|
|
1271
2248
|
retrieval;
|
|
1272
2249
|
/** Documents resource for canonical document lifecycle operations */
|
|
1273
2250
|
documents;
|
|
2251
|
+
/** Client-side local knowledge tools over parsed Knowhere results */
|
|
2252
|
+
knowledge;
|
|
1274
2253
|
httpClient;
|
|
1275
2254
|
/**
|
|
1276
2255
|
* Create a new Knowhere client
|
|
1277
2256
|
*/
|
|
1278
2257
|
constructor(options = {}) {
|
|
1279
2258
|
const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
|
|
1280
|
-
|
|
2259
|
+
const authTokenProvider = apiKey ? void 0 : options.authTokenProvider;
|
|
2260
|
+
if (!apiKey && !authTokenProvider) {
|
|
1281
2261
|
throw new ValidationError(
|
|
1282
|
-
`API
|
|
2262
|
+
`API authentication is required. Provide it via options.apiKey, options.authTokenProvider, or ${ENV.API_KEY} environment variable.`
|
|
1283
2263
|
);
|
|
1284
2264
|
}
|
|
1285
2265
|
const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
|
|
1286
2266
|
this.httpClient = new HttpClient({
|
|
1287
2267
|
baseURL,
|
|
1288
2268
|
apiKey,
|
|
2269
|
+
authTokenProvider,
|
|
1289
2270
|
timeout: options.timeout,
|
|
1290
2271
|
uploadTimeout: options.uploadTimeout,
|
|
1291
2272
|
maxRetries: options.maxRetries,
|
|
@@ -1296,6 +2277,7 @@ var Knowhere = class {
|
|
|
1296
2277
|
this.jobs = new Jobs(this.httpClient);
|
|
1297
2278
|
this.retrieval = new Retrieval(this.httpClient);
|
|
1298
2279
|
this.documents = new Documents(this.httpClient);
|
|
2280
|
+
this.knowledge = new Knowledge(this);
|
|
1299
2281
|
}
|
|
1300
2282
|
/**
|
|
1301
2283
|
* High-level API: Parse a document and return structured results
|
|
@@ -1318,6 +2300,24 @@ var Knowhere = class {
|
|
|
1318
2300
|
* ```
|
|
1319
2301
|
*/
|
|
1320
2302
|
async parse(params) {
|
|
2303
|
+
const job = await this.startParse(params);
|
|
2304
|
+
const jobResult = await this.jobs.wait(job.jobId, {
|
|
2305
|
+
pollInterval: params.pollInterval,
|
|
2306
|
+
pollTimeout: params.pollTimeout,
|
|
2307
|
+
onProgress: params.onPollProgress,
|
|
2308
|
+
signal: params.signal
|
|
2309
|
+
});
|
|
2310
|
+
const result = await this.jobs.load(jobResult, {
|
|
2311
|
+
verifyChecksum: params.verifyChecksum
|
|
2312
|
+
});
|
|
2313
|
+
return enrichParseResult(result, jobResult);
|
|
2314
|
+
}
|
|
2315
|
+
/**
|
|
2316
|
+
* Start a parse job and return immediately after the URL job is created or
|
|
2317
|
+
* the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
|
|
2318
|
+
* to inspect completion and load results later.
|
|
2319
|
+
*/
|
|
2320
|
+
async startParse(params) {
|
|
1321
2321
|
if (!params.url && !params.file) {
|
|
1322
2322
|
throw new ValidationError("Either url or file must be provided");
|
|
1323
2323
|
}
|
|
@@ -1331,22 +2331,6 @@ var Knowhere = class {
|
|
|
1331
2331
|
"fileName is required when file is a Buffer, Uint8Array, or stream without a path."
|
|
1332
2332
|
);
|
|
1333
2333
|
}
|
|
1334
|
-
const parsingParams = {
|
|
1335
|
-
model: params.model,
|
|
1336
|
-
ocrEnabled: params.ocr,
|
|
1337
|
-
docType: params.docType,
|
|
1338
|
-
smartTitleParse: params.smartTitleParse,
|
|
1339
|
-
summaryImage: params.summaryImage,
|
|
1340
|
-
summaryTable: params.summaryTable,
|
|
1341
|
-
summaryTxt: params.summaryTxt,
|
|
1342
|
-
addFragDesc: params.addFragDesc,
|
|
1343
|
-
kbDir: params.kbDir
|
|
1344
|
-
};
|
|
1345
|
-
Object.keys(parsingParams).forEach((key) => {
|
|
1346
|
-
if (parsingParams[key] === void 0) {
|
|
1347
|
-
delete parsingParams[key];
|
|
1348
|
-
}
|
|
1349
|
-
});
|
|
1350
2334
|
const webhook = params.webhook;
|
|
1351
2335
|
const job = await this.jobs.create({
|
|
1352
2336
|
sourceType,
|
|
@@ -1355,7 +2339,8 @@ var Knowhere = class {
|
|
|
1355
2339
|
dataId: params.dataId,
|
|
1356
2340
|
namespace: params.namespace,
|
|
1357
2341
|
documentId: params.documentId,
|
|
1358
|
-
|
|
2342
|
+
documentMetadata: params.documentMetadata,
|
|
2343
|
+
parsingParams: buildParsingParams(params),
|
|
1359
2344
|
webhook
|
|
1360
2345
|
});
|
|
1361
2346
|
if (params.file) {
|
|
@@ -1365,16 +2350,7 @@ var Knowhere = class {
|
|
|
1365
2350
|
signal: params.signal
|
|
1366
2351
|
});
|
|
1367
2352
|
}
|
|
1368
|
-
|
|
1369
|
-
pollInterval: params.pollInterval,
|
|
1370
|
-
pollTimeout: params.pollTimeout,
|
|
1371
|
-
onProgress: params.onPollProgress,
|
|
1372
|
-
signal: params.signal
|
|
1373
|
-
});
|
|
1374
|
-
const result = await this.jobs.load(jobResult, {
|
|
1375
|
-
verifyChecksum: params.verifyChecksum
|
|
1376
|
-
});
|
|
1377
|
-
return enrichParseResult(result, jobResult);
|
|
2353
|
+
return job;
|
|
1378
2354
|
}
|
|
1379
2355
|
};
|
|
1380
2356
|
// Annotate the CommonJS export names for ESM import in node:
|
|
@@ -1392,6 +2368,8 @@ var Knowhere = class {
|
|
|
1392
2368
|
Jobs,
|
|
1393
2369
|
Knowhere,
|
|
1394
2370
|
KnowhereError,
|
|
2371
|
+
Knowledge,
|
|
2372
|
+
LocalKnowledgeStore,
|
|
1395
2373
|
NetworkError,
|
|
1396
2374
|
NotFoundError,
|
|
1397
2375
|
PaymentRequiredError,
|