@ontos-ai/knowhere-sdk 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -5
- package/dist/index.d.mts +244 -2
- package/dist/index.d.ts +244 -2
- package/dist/index.js +1057 -76
- package/dist/index.mjs +1059 -78
- package/package.json +28 -21
package/dist/index.mjs
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
// src/client.ts
|
|
2
|
-
import
|
|
2
|
+
import path2 from "path";
|
|
3
3
|
|
|
4
4
|
// src/lib/http-client.ts
|
|
5
|
-
import axios
|
|
5
|
+
import axios, {
|
|
6
|
+
AxiosHeaders
|
|
7
|
+
} from "axios";
|
|
6
8
|
|
|
7
9
|
// src/version.ts
|
|
8
10
|
var VERSION = "0.1.0";
|
|
@@ -41,6 +43,7 @@ var NetworkError = class extends KnowhereError {
|
|
|
41
43
|
this.cause = cause;
|
|
42
44
|
this.name = "NetworkError";
|
|
43
45
|
}
|
|
46
|
+
cause;
|
|
44
47
|
};
|
|
45
48
|
var TimeoutError = class extends NetworkError {
|
|
46
49
|
constructor(message = "Request timed out") {
|
|
@@ -54,6 +57,7 @@ var PollingTimeoutError = class extends KnowhereError {
|
|
|
54
57
|
this.elapsedMs = elapsedMs;
|
|
55
58
|
this.name = "PollingTimeoutError";
|
|
56
59
|
}
|
|
60
|
+
elapsedMs;
|
|
57
61
|
};
|
|
58
62
|
var ChecksumError = class extends KnowhereError {
|
|
59
63
|
constructor(message = "Checksum verification failed", expected, actual) {
|
|
@@ -62,6 +66,8 @@ var ChecksumError = class extends KnowhereError {
|
|
|
62
66
|
this.actual = actual;
|
|
63
67
|
this.name = "ChecksumError";
|
|
64
68
|
}
|
|
69
|
+
expected;
|
|
70
|
+
actual;
|
|
65
71
|
};
|
|
66
72
|
var ValidationError = class extends KnowhereError {
|
|
67
73
|
constructor(message) {
|
|
@@ -87,6 +93,11 @@ var APIError = class extends KnowhereError {
|
|
|
87
93
|
this.body = body;
|
|
88
94
|
this.name = "APIError";
|
|
89
95
|
}
|
|
96
|
+
statusCode;
|
|
97
|
+
code;
|
|
98
|
+
requestId;
|
|
99
|
+
details;
|
|
100
|
+
body;
|
|
90
101
|
};
|
|
91
102
|
var BadRequestError = class extends APIError {
|
|
92
103
|
constructor(message, code, requestId, details, body) {
|
|
@@ -130,6 +141,7 @@ var RateLimitError = class extends APIError {
|
|
|
130
141
|
this.retryAfter = retryAfter;
|
|
131
142
|
this.name = "RateLimitError";
|
|
132
143
|
}
|
|
144
|
+
retryAfter;
|
|
133
145
|
};
|
|
134
146
|
var InternalServerError = class extends APIError {
|
|
135
147
|
constructor(message = "Internal server error", code, requestId, details, body) {
|
|
@@ -185,11 +197,13 @@ var JobFailedError = class extends KnowhereError {
|
|
|
185
197
|
this.jobResult = jobResult;
|
|
186
198
|
this.name = "JobFailedError";
|
|
187
199
|
}
|
|
200
|
+
code;
|
|
201
|
+
jobResult;
|
|
188
202
|
};
|
|
189
203
|
|
|
190
204
|
// src/lib/utils.ts
|
|
191
205
|
function sleep(ms) {
|
|
192
|
-
return new Promise((
|
|
206
|
+
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
193
207
|
}
|
|
194
208
|
function snakeToCamel(str) {
|
|
195
209
|
return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
|
|
@@ -295,8 +309,8 @@ function enrichParseResult(parseResult2, scope) {
|
|
|
295
309
|
}
|
|
296
310
|
return parseResult2;
|
|
297
311
|
}
|
|
298
|
-
function sanitizePath(
|
|
299
|
-
let sanitized =
|
|
312
|
+
function sanitizePath(path3) {
|
|
313
|
+
let sanitized = path3.replace(/^\/+/, "");
|
|
300
314
|
sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
|
|
301
315
|
sanitized = sanitized.replace(/\\/g, "/");
|
|
302
316
|
return sanitized;
|
|
@@ -410,7 +424,7 @@ async function withRetry(fn, maxRetries, onRetry) {
|
|
|
410
424
|
if (onRetry) {
|
|
411
425
|
onRetry(attempt + 1, error);
|
|
412
426
|
}
|
|
413
|
-
await new Promise((
|
|
427
|
+
await new Promise((resolve2) => setTimeout(resolve2, delay));
|
|
414
428
|
}
|
|
415
429
|
}
|
|
416
430
|
throw lastError;
|
|
@@ -423,17 +437,19 @@ var HttpClient = class {
|
|
|
423
437
|
uploadTimeout;
|
|
424
438
|
httpAgent;
|
|
425
439
|
httpsAgent;
|
|
440
|
+
authTokenProvider;
|
|
426
441
|
constructor(options) {
|
|
427
442
|
this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
428
443
|
this.uploadTimeout = options.uploadTimeout ?? 6e5;
|
|
429
444
|
this.httpAgent = options.httpAgent;
|
|
430
445
|
this.httpsAgent = options.httpsAgent;
|
|
446
|
+
this.authTokenProvider = options.authTokenProvider;
|
|
431
447
|
this.axios = axios.create({
|
|
432
448
|
baseURL: options.baseURL,
|
|
433
449
|
timeout: options.timeout ?? DEFAULT_TIMEOUT,
|
|
434
450
|
headers: {
|
|
435
451
|
"User-Agent": `knowhere-node-sdk/${VERSION}`,
|
|
436
|
-
Authorization: `Bearer ${options.apiKey}
|
|
452
|
+
...options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : {},
|
|
437
453
|
"Content-Type": "application/json",
|
|
438
454
|
...options.defaultHeaders
|
|
439
455
|
},
|
|
@@ -445,6 +461,9 @@ var HttpClient = class {
|
|
|
445
461
|
setupInterceptors() {
|
|
446
462
|
this.axios.interceptors.request.use(
|
|
447
463
|
(config) => {
|
|
464
|
+
if (this.authTokenProvider) {
|
|
465
|
+
return this.attachDynamicAuthorization(config);
|
|
466
|
+
}
|
|
448
467
|
if (config.data && typeof config.data === "object") {
|
|
449
468
|
config.data = keysToSnake(config.data);
|
|
450
469
|
}
|
|
@@ -468,6 +487,19 @@ var HttpClient = class {
|
|
|
468
487
|
}
|
|
469
488
|
);
|
|
470
489
|
}
|
|
490
|
+
async attachDynamicAuthorization(config) {
|
|
491
|
+
const token = await this.authTokenProvider?.();
|
|
492
|
+
if (!token) {
|
|
493
|
+
throw new ValidationError("Authentication token provider returned an empty token");
|
|
494
|
+
}
|
|
495
|
+
const headers = AxiosHeaders.from(config.headers);
|
|
496
|
+
headers.set("Authorization", `Bearer ${token}`);
|
|
497
|
+
config.headers = headers;
|
|
498
|
+
if (config.data && typeof config.data === "object") {
|
|
499
|
+
config.data = keysToSnake(config.data);
|
|
500
|
+
}
|
|
501
|
+
return config;
|
|
502
|
+
}
|
|
471
503
|
handleError(error) {
|
|
472
504
|
if (!error.response) {
|
|
473
505
|
if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
|
|
@@ -752,11 +784,14 @@ async function pollJobStatus(httpClient, jobId, options) {
|
|
|
752
784
|
// src/lib/result-parser.ts
|
|
753
785
|
import JSZip from "jszip";
|
|
754
786
|
import { promises as fs2 } from "fs";
|
|
755
|
-
import { join, dirname } from "path";
|
|
787
|
+
import { join, dirname, resolve, sep } from "path";
|
|
756
788
|
async function parseResult(httpClient, resultUrl, options) {
|
|
757
789
|
const zipBuffer = await httpClient.download(resultUrl);
|
|
758
790
|
if (options?.verifyChecksum !== false) {
|
|
759
791
|
}
|
|
792
|
+
return parseResultBuffer(zipBuffer);
|
|
793
|
+
}
|
|
794
|
+
async function parseResultBuffer(zipBuffer) {
|
|
760
795
|
const zip = await JSZip.loadAsync(zipBuffer);
|
|
761
796
|
const manifestFile = zip.file("manifest.json");
|
|
762
797
|
if (!manifestFile) {
|
|
@@ -821,7 +856,7 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
821
856
|
if (hierarchyViewFile) {
|
|
822
857
|
hierarchyViewHtml = await hierarchyViewFile.async("string");
|
|
823
858
|
}
|
|
824
|
-
|
|
859
|
+
return createParseResult({
|
|
825
860
|
manifest,
|
|
826
861
|
chunks,
|
|
827
862
|
docNav,
|
|
@@ -832,6 +867,136 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
832
867
|
hierarchy,
|
|
833
868
|
tocHierarchies,
|
|
834
869
|
kbCsv,
|
|
870
|
+
hierarchyViewHtml
|
|
871
|
+
});
|
|
872
|
+
}
|
|
873
|
+
async function parseResultDirectory(directory) {
|
|
874
|
+
const manifestContent = await readRequiredTextFile(directory, "manifest.json");
|
|
875
|
+
let manifest = JSON.parse(manifestContent);
|
|
876
|
+
manifest = keysToCamel(manifest);
|
|
877
|
+
manifest = parseDates(manifest);
|
|
878
|
+
const chunksContent = await readRequiredTextFile(directory, "chunks.json");
|
|
879
|
+
let chunksData = JSON.parse(chunksContent);
|
|
880
|
+
chunksData = keysToCamel(chunksData);
|
|
881
|
+
const rawChunks = extractChunks(chunksData);
|
|
882
|
+
const chunks = [];
|
|
883
|
+
for (const chunkData of rawChunks) {
|
|
884
|
+
chunks.push(await processDirectoryChunk(directory, chunkData));
|
|
885
|
+
}
|
|
886
|
+
const fullMarkdown = await readOptionalTextFile(directory, "full.md");
|
|
887
|
+
const rawDocNav = await readOptionalJsonFile(directory, "doc_nav.json");
|
|
888
|
+
const docNav = rawDocNav === void 0 ? void 0 : keysToCamel(rawDocNav);
|
|
889
|
+
const hierarchy = await readOptionalJsonFile(directory, "hierarchy.json");
|
|
890
|
+
const rawChunksSlim = await readOptionalJsonFile(directory, "chunks_slim.json");
|
|
891
|
+
const chunksSlim = rawChunksSlim === void 0 ? void 0 : extractSlimChunks(keysToCamel(rawChunksSlim));
|
|
892
|
+
const rawTocHierarchies = await readOptionalJsonFile(directory, "toc_hierarchies.json");
|
|
893
|
+
const tocHierarchies = rawTocHierarchies === void 0 ? void 0 : keysToCamel(rawTocHierarchies);
|
|
894
|
+
const kbCsv = await readOptionalTextFile(directory, "kb.csv");
|
|
895
|
+
const hierarchyViewHtml = await readOptionalTextFile(directory, "hierarchy_view.html");
|
|
896
|
+
return createParseResult({
|
|
897
|
+
manifest,
|
|
898
|
+
chunks,
|
|
899
|
+
docNav,
|
|
900
|
+
fullMarkdown,
|
|
901
|
+
rawZip: Buffer.alloc(0),
|
|
902
|
+
chunksSlim,
|
|
903
|
+
hierarchy,
|
|
904
|
+
tocHierarchies,
|
|
905
|
+
kbCsv,
|
|
906
|
+
hierarchyViewHtml
|
|
907
|
+
});
|
|
908
|
+
}
|
|
909
|
+
async function saveExpandedParseResult(result, directory) {
|
|
910
|
+
if (result.rawZip.length > 0) {
|
|
911
|
+
const didExtractZip = await tryExtractRawZip(result.rawZip, directory);
|
|
912
|
+
if (didExtractZip) {
|
|
913
|
+
return directory;
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
await fs2.mkdir(directory, { recursive: true });
|
|
917
|
+
await fs2.writeFile(join(directory, "manifest.json"), JSON.stringify(result.manifest, null, 2));
|
|
918
|
+
if (result.docNav) {
|
|
919
|
+
await fs2.writeFile(join(directory, "doc_nav.json"), JSON.stringify(result.docNav, null, 2));
|
|
920
|
+
}
|
|
921
|
+
await fs2.writeFile(
|
|
922
|
+
join(directory, "chunks.json"),
|
|
923
|
+
JSON.stringify(serializeChunks(result.chunks), null, 2)
|
|
924
|
+
);
|
|
925
|
+
if (result.chunksSlim) {
|
|
926
|
+
await fs2.writeFile(
|
|
927
|
+
join(directory, "chunks_slim.json"),
|
|
928
|
+
JSON.stringify({ chunks: result.chunksSlim }, null, 2)
|
|
929
|
+
);
|
|
930
|
+
}
|
|
931
|
+
if (result.fullMarkdown) {
|
|
932
|
+
await fs2.writeFile(join(directory, "full.md"), result.fullMarkdown);
|
|
933
|
+
}
|
|
934
|
+
if (result.hierarchy) {
|
|
935
|
+
await fs2.writeFile(
|
|
936
|
+
join(directory, "hierarchy.json"),
|
|
937
|
+
JSON.stringify(result.hierarchy, null, 2)
|
|
938
|
+
);
|
|
939
|
+
}
|
|
940
|
+
if (result.tocHierarchies) {
|
|
941
|
+
await fs2.writeFile(
|
|
942
|
+
join(directory, "toc_hierarchies.json"),
|
|
943
|
+
JSON.stringify(result.tocHierarchies, null, 2)
|
|
944
|
+
);
|
|
945
|
+
}
|
|
946
|
+
if (result.kbCsv) {
|
|
947
|
+
await fs2.writeFile(join(directory, "kb.csv"), result.kbCsv);
|
|
948
|
+
}
|
|
949
|
+
if (result.hierarchyViewHtml) {
|
|
950
|
+
await fs2.writeFile(join(directory, "hierarchy_view.html"), result.hierarchyViewHtml);
|
|
951
|
+
}
|
|
952
|
+
for (const imageChunk of result.imageChunks) {
|
|
953
|
+
await writeBinaryAsset(directory, imageChunk.filePath, imageChunk.data);
|
|
954
|
+
}
|
|
955
|
+
for (const tableChunk of result.tableChunks) {
|
|
956
|
+
await writeTextAsset(directory, tableChunk.filePath, tableChunk.html);
|
|
957
|
+
}
|
|
958
|
+
return directory;
|
|
959
|
+
}
|
|
960
|
+
async function tryExtractRawZip(zipBuffer, directory) {
|
|
961
|
+
try {
|
|
962
|
+
const zip = await JSZip.loadAsync(zipBuffer);
|
|
963
|
+
await fs2.mkdir(directory, { recursive: true });
|
|
964
|
+
for (const entry of Object.values(zip.files)) {
|
|
965
|
+
if (entry.dir || entry.name === "result.zip") {
|
|
966
|
+
continue;
|
|
967
|
+
}
|
|
968
|
+
const outputPath = resolveAssetPath(directory, entry.name);
|
|
969
|
+
await fs2.mkdir(dirname(outputPath), { recursive: true });
|
|
970
|
+
await fs2.writeFile(outputPath, await entry.async("nodebuffer"));
|
|
971
|
+
}
|
|
972
|
+
return true;
|
|
973
|
+
} catch {
|
|
974
|
+
return false;
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
function createParseResult(parts) {
|
|
978
|
+
const {
|
|
979
|
+
manifest,
|
|
980
|
+
chunks,
|
|
981
|
+
docNav,
|
|
982
|
+
fullMarkdown,
|
|
983
|
+
rawZip,
|
|
984
|
+
chunksSlim,
|
|
985
|
+
hierarchy,
|
|
986
|
+
tocHierarchies,
|
|
987
|
+
kbCsv,
|
|
988
|
+
hierarchyViewHtml
|
|
989
|
+
} = parts;
|
|
990
|
+
return {
|
|
991
|
+
manifest,
|
|
992
|
+
chunks,
|
|
993
|
+
docNav,
|
|
994
|
+
fullMarkdown,
|
|
995
|
+
rawZip,
|
|
996
|
+
chunksSlim,
|
|
997
|
+
hierarchy,
|
|
998
|
+
tocHierarchies,
|
|
999
|
+
kbCsv,
|
|
835
1000
|
hierarchyViewHtml,
|
|
836
1001
|
get textChunks() {
|
|
837
1002
|
return chunks.filter((c) => c.type === "text");
|
|
@@ -888,11 +1053,10 @@ async function parseResult(httpClient, resultUrl, options) {
|
|
|
888
1053
|
for (const tableChunk of this.tableChunks) {
|
|
889
1054
|
await tableChunk.save(directory);
|
|
890
1055
|
}
|
|
891
|
-
await fs2.writeFile(join(directory, "result.zip"),
|
|
1056
|
+
await fs2.writeFile(join(directory, "result.zip"), rawZip);
|
|
892
1057
|
return directory;
|
|
893
1058
|
}
|
|
894
1059
|
};
|
|
895
|
-
return result;
|
|
896
1060
|
}
|
|
897
1061
|
function extractChunks(payload) {
|
|
898
1062
|
if (Array.isArray(payload)) {
|
|
@@ -925,6 +1089,37 @@ function buildTextChunk(chunkData) {
|
|
|
925
1089
|
metadata: chunkData.metadata ?? {}
|
|
926
1090
|
};
|
|
927
1091
|
}
|
|
1092
|
+
function buildImageChunk(chunkData, filePath, imageBuffer) {
|
|
1093
|
+
return {
|
|
1094
|
+
chunkId: chunkData.chunkId ?? "",
|
|
1095
|
+
type: "image",
|
|
1096
|
+
content: chunkData.content ?? "",
|
|
1097
|
+
path: chunkData.path ?? "",
|
|
1098
|
+
filePath,
|
|
1099
|
+
data: imageBuffer,
|
|
1100
|
+
metadata: chunkData.metadata ?? {},
|
|
1101
|
+
get format() {
|
|
1102
|
+
return getFileExtension(this.filePath);
|
|
1103
|
+
},
|
|
1104
|
+
async save(directory) {
|
|
1105
|
+
return writeBinaryAsset(directory, this.filePath, this.data);
|
|
1106
|
+
}
|
|
1107
|
+
};
|
|
1108
|
+
}
|
|
1109
|
+
function buildTableChunk(chunkData, filePath, html) {
|
|
1110
|
+
return {
|
|
1111
|
+
chunkId: chunkData.chunkId ?? "",
|
|
1112
|
+
type: "table",
|
|
1113
|
+
content: chunkData.content ?? "",
|
|
1114
|
+
path: chunkData.path ?? "",
|
|
1115
|
+
filePath,
|
|
1116
|
+
html,
|
|
1117
|
+
metadata: chunkData.metadata ?? {},
|
|
1118
|
+
async save(directory) {
|
|
1119
|
+
return writeTextAsset(directory, this.filePath, this.html);
|
|
1120
|
+
}
|
|
1121
|
+
};
|
|
1122
|
+
}
|
|
928
1123
|
async function processChunk(zip, chunkData) {
|
|
929
1124
|
if (chunkData.type === "text") {
|
|
930
1125
|
return buildTextChunk(chunkData);
|
|
@@ -940,26 +1135,7 @@ async function processChunk(zip, chunkData) {
|
|
|
940
1135
|
throw new KnowhereError(`Image file not found: ${filePath}`);
|
|
941
1136
|
}
|
|
942
1137
|
const imageBuffer = await imageFile.async("nodebuffer");
|
|
943
|
-
|
|
944
|
-
chunkId: chunkData.chunkId ?? "",
|
|
945
|
-
type: "image",
|
|
946
|
-
content: chunkData.content ?? "",
|
|
947
|
-
path: chunkData.path ?? "",
|
|
948
|
-
filePath,
|
|
949
|
-
data: imageBuffer,
|
|
950
|
-
metadata: chunkData.metadata ?? {},
|
|
951
|
-
get format() {
|
|
952
|
-
return getFileExtension(this.filePath);
|
|
953
|
-
},
|
|
954
|
-
async save(directory) {
|
|
955
|
-
const outputPath = join(directory, sanitizePath(this.filePath));
|
|
956
|
-
const outputDir = dirname(outputPath);
|
|
957
|
-
await fs2.mkdir(outputDir, { recursive: true });
|
|
958
|
-
await fs2.writeFile(outputPath, this.data);
|
|
959
|
-
return outputPath;
|
|
960
|
-
}
|
|
961
|
-
};
|
|
962
|
-
return enrichedChunk;
|
|
1138
|
+
return buildImageChunk(chunkData, filePath, imageBuffer);
|
|
963
1139
|
}
|
|
964
1140
|
if (chunkData.type === "table") {
|
|
965
1141
|
const filePath = getChunkFilePath(chunkData);
|
|
@@ -972,26 +1148,112 @@ async function processChunk(zip, chunkData) {
|
|
|
972
1148
|
throw new KnowhereError(`Table file not found: ${filePath}`);
|
|
973
1149
|
}
|
|
974
1150
|
const html = await htmlFile.async("string");
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
1151
|
+
return buildTableChunk(chunkData, filePath, html);
|
|
1152
|
+
}
|
|
1153
|
+
return buildTextChunk(chunkData);
|
|
1154
|
+
}
|
|
1155
|
+
async function processDirectoryChunk(directory, chunkData) {
|
|
1156
|
+
if (chunkData.type === "text") {
|
|
1157
|
+
return buildTextChunk(chunkData);
|
|
1158
|
+
}
|
|
1159
|
+
if (chunkData.type === "image") {
|
|
1160
|
+
const filePath = getChunkFilePath(chunkData);
|
|
1161
|
+
if (!filePath) {
|
|
1162
|
+
throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
1163
|
+
}
|
|
1164
|
+
try {
|
|
1165
|
+
const imageBuffer = await fs2.readFile(resolveAssetPath(directory, filePath));
|
|
1166
|
+
return buildImageChunk(chunkData, filePath, imageBuffer);
|
|
1167
|
+
} catch (error) {
|
|
1168
|
+
if (isMissingFileError(error)) {
|
|
1169
|
+
throw new KnowhereError(`Image file not found: ${filePath}`);
|
|
989
1170
|
}
|
|
990
|
-
|
|
991
|
-
|
|
1171
|
+
throw error;
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
if (chunkData.type === "table") {
|
|
1175
|
+
const filePath = getChunkFilePath(chunkData);
|
|
1176
|
+
if (!filePath) {
|
|
1177
|
+
throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
|
|
1178
|
+
}
|
|
1179
|
+
try {
|
|
1180
|
+
const html = await fs2.readFile(resolveAssetPath(directory, filePath), "utf8");
|
|
1181
|
+
return buildTableChunk(chunkData, filePath, html);
|
|
1182
|
+
} catch (error) {
|
|
1183
|
+
if (isMissingFileError(error)) {
|
|
1184
|
+
throw new KnowhereError(`Table file not found: ${filePath}`);
|
|
1185
|
+
}
|
|
1186
|
+
throw error;
|
|
1187
|
+
}
|
|
992
1188
|
}
|
|
993
1189
|
return buildTextChunk(chunkData);
|
|
994
1190
|
}
|
|
1191
|
+
function serializeChunks(chunks) {
|
|
1192
|
+
return {
|
|
1193
|
+
chunks: chunks.map((chunk) => {
|
|
1194
|
+
const rawChunk = {
|
|
1195
|
+
chunkId: chunk.chunkId,
|
|
1196
|
+
type: chunk.type,
|
|
1197
|
+
content: chunk.content,
|
|
1198
|
+
path: chunk.path,
|
|
1199
|
+
metadata: chunk.metadata
|
|
1200
|
+
};
|
|
1201
|
+
if (chunk.type === "image" || chunk.type === "table") {
|
|
1202
|
+
rawChunk.filePath = chunk.filePath;
|
|
1203
|
+
}
|
|
1204
|
+
return rawChunk;
|
|
1205
|
+
})
|
|
1206
|
+
};
|
|
1207
|
+
}
|
|
1208
|
+
async function readRequiredTextFile(directory, fileName) {
|
|
1209
|
+
try {
|
|
1210
|
+
return await fs2.readFile(join(directory, fileName), "utf8");
|
|
1211
|
+
} catch (error) {
|
|
1212
|
+
if (isMissingFileError(error)) {
|
|
1213
|
+
throw new KnowhereError(`${fileName} not found in result directory`);
|
|
1214
|
+
}
|
|
1215
|
+
throw error;
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
async function readOptionalTextFile(directory, fileName) {
|
|
1219
|
+
try {
|
|
1220
|
+
return await fs2.readFile(join(directory, fileName), "utf8");
|
|
1221
|
+
} catch (error) {
|
|
1222
|
+
if (isMissingFileError(error)) {
|
|
1223
|
+
return void 0;
|
|
1224
|
+
}
|
|
1225
|
+
throw error;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
async function readOptionalJsonFile(directory, fileName) {
|
|
1229
|
+
const content = await readOptionalTextFile(directory, fileName);
|
|
1230
|
+
return content === void 0 ? void 0 : JSON.parse(content);
|
|
1231
|
+
}
|
|
1232
|
+
async function writeBinaryAsset(directory, filePath, data) {
|
|
1233
|
+
const outputPath = resolveAssetPath(directory, filePath);
|
|
1234
|
+
const outputDir = dirname(outputPath);
|
|
1235
|
+
await fs2.mkdir(outputDir, { recursive: true });
|
|
1236
|
+
await fs2.writeFile(outputPath, data);
|
|
1237
|
+
return outputPath;
|
|
1238
|
+
}
|
|
1239
|
+
async function writeTextAsset(directory, filePath, text) {
|
|
1240
|
+
const outputPath = resolveAssetPath(directory, filePath);
|
|
1241
|
+
const outputDir = dirname(outputPath);
|
|
1242
|
+
await fs2.mkdir(outputDir, { recursive: true });
|
|
1243
|
+
await fs2.writeFile(outputPath, text);
|
|
1244
|
+
return outputPath;
|
|
1245
|
+
}
|
|
1246
|
+
function resolveAssetPath(directory, filePath) {
|
|
1247
|
+
const root = resolve(directory);
|
|
1248
|
+
const outputPath = resolve(root, sanitizePath(filePath));
|
|
1249
|
+
if (outputPath !== root && !outputPath.startsWith(`${root}${sep}`)) {
|
|
1250
|
+
throw new KnowhereError(`Invalid result asset path: ${filePath}`);
|
|
1251
|
+
}
|
|
1252
|
+
return outputPath;
|
|
1253
|
+
}
|
|
1254
|
+
function isMissingFileError(error) {
|
|
1255
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
1256
|
+
}
|
|
995
1257
|
|
|
996
1258
|
// src/resources/jobs.ts
|
|
997
1259
|
var Jobs = class extends BaseResource {
|
|
@@ -1188,22 +1450,741 @@ var Documents = class extends BaseResource {
|
|
|
1188
1450
|
}
|
|
1189
1451
|
};
|
|
1190
1452
|
|
|
1453
|
+
// src/knowledge/local-store.ts
|
|
1454
|
+
import { createHash } from "crypto";
|
|
1455
|
+
import os from "os";
|
|
1456
|
+
import { promises as fs3 } from "fs";
|
|
1457
|
+
import path from "path";
|
|
1458
|
+
var STORE_VERSION = 1;
|
|
1459
|
+
var LOCAL_DOCUMENT_ID_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
|
|
1460
|
+
var LocalKnowledgeStore = class {
|
|
1461
|
+
cacheDirectory;
|
|
1462
|
+
indexPath;
|
|
1463
|
+
resultCache = /* @__PURE__ */ new Map();
|
|
1464
|
+
constructor(cacheDirectory) {
|
|
1465
|
+
this.cacheDirectory = cacheDirectory ?? path.join(os.homedir(), ".knowhere-node-sdk", "knowledge");
|
|
1466
|
+
this.indexPath = path.join(this.cacheDirectory, "index.json");
|
|
1467
|
+
}
|
|
1468
|
+
async saveResult(result, options) {
|
|
1469
|
+
await fs3.mkdir(this.cacheDirectory, { recursive: true });
|
|
1470
|
+
const now = /* @__PURE__ */ new Date();
|
|
1471
|
+
const index = await this.readIndex();
|
|
1472
|
+
const localDocumentId = validateLocalDocumentId(
|
|
1473
|
+
options?.localDocumentId ?? createLocalDocumentId(result)
|
|
1474
|
+
);
|
|
1475
|
+
const resultDirectoryPath = this.getResultDirectoryPath(localDocumentId);
|
|
1476
|
+
await fs3.rm(resultDirectoryPath, { recursive: true, force: true });
|
|
1477
|
+
await saveExpandedParseResult(result, resultDirectoryPath);
|
|
1478
|
+
this.resultCache.set(localDocumentId, result);
|
|
1479
|
+
const existing = index.documents.find(
|
|
1480
|
+
(document) => document.localDocumentId === localDocumentId
|
|
1481
|
+
);
|
|
1482
|
+
const stored = {
|
|
1483
|
+
localDocumentId,
|
|
1484
|
+
jobId: result.jobId,
|
|
1485
|
+
documentId: result.documentId,
|
|
1486
|
+
namespace: result.namespace,
|
|
1487
|
+
sourceFileName: result.manifest.sourceFileName,
|
|
1488
|
+
chunkCount: result.chunks.length,
|
|
1489
|
+
typeCounts: countChunkTypes(result),
|
|
1490
|
+
resultDirectoryPath,
|
|
1491
|
+
createdAt: existing?.createdAt ?? now.toISOString(),
|
|
1492
|
+
updatedAt: now.toISOString()
|
|
1493
|
+
};
|
|
1494
|
+
const nextDocuments = [
|
|
1495
|
+
stored,
|
|
1496
|
+
...index.documents.filter((document) => document.localDocumentId !== localDocumentId)
|
|
1497
|
+
];
|
|
1498
|
+
const asyncParseJobs = (index.asyncParseJobs ?? []).map(
|
|
1499
|
+
(job) => job.jobId === result.jobId ? {
|
|
1500
|
+
...job,
|
|
1501
|
+
localDocumentId,
|
|
1502
|
+
cacheStatus: "cached",
|
|
1503
|
+
updatedAt: now.toISOString()
|
|
1504
|
+
} : job
|
|
1505
|
+
);
|
|
1506
|
+
await this.writeIndex({
|
|
1507
|
+
version: STORE_VERSION,
|
|
1508
|
+
documents: nextDocuments,
|
|
1509
|
+
asyncParseJobs
|
|
1510
|
+
});
|
|
1511
|
+
return toLocalKnowledgeDocument(stored);
|
|
1512
|
+
}
|
|
1513
|
+
async saveAsyncParseJob(params) {
|
|
1514
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1515
|
+
const index = await this.readIndex();
|
|
1516
|
+
const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
|
|
1517
|
+
const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
|
|
1518
|
+
const stored = {
|
|
1519
|
+
jobId: params.jobId,
|
|
1520
|
+
localDocumentId: localDocumentId ?? existing?.localDocumentId,
|
|
1521
|
+
cacheStatus: existing?.cacheStatus ?? "pending",
|
|
1522
|
+
createdAt: existing?.createdAt ?? now,
|
|
1523
|
+
updatedAt: now
|
|
1524
|
+
};
|
|
1525
|
+
await this.writeIndex({
|
|
1526
|
+
version: STORE_VERSION,
|
|
1527
|
+
documents: index.documents,
|
|
1528
|
+
asyncParseJobs: [
|
|
1529
|
+
stored,
|
|
1530
|
+
...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
|
|
1531
|
+
]
|
|
1532
|
+
});
|
|
1533
|
+
}
|
|
1534
|
+
async getAsyncParseJob(jobId) {
|
|
1535
|
+
const index = await this.readIndex();
|
|
1536
|
+
return (index.asyncParseJobs ?? []).find((job) => job.jobId === jobId);
|
|
1537
|
+
}
|
|
1538
|
+
async listRecoverableAsyncParseJobs() {
|
|
1539
|
+
const index = await this.readIndex();
|
|
1540
|
+
return (index.asyncParseJobs ?? []).filter((job) => job.cacheStatus === "pending" || job.cacheStatus === "not_available").map(toLocalKnowledgeAsyncParseJob);
|
|
1541
|
+
}
|
|
1542
|
+
async updateAsyncParseJobCacheStatus(params) {
|
|
1543
|
+
const index = await this.readIndex();
|
|
1544
|
+
const localDocumentId = params.localDocumentId ? validateLocalDocumentId(params.localDocumentId) : void 0;
|
|
1545
|
+
const existing = (index.asyncParseJobs ?? []).find((job) => job.jobId === params.jobId);
|
|
1546
|
+
if (!existing) {
|
|
1547
|
+
return;
|
|
1548
|
+
}
|
|
1549
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1550
|
+
const stored = {
|
|
1551
|
+
...existing,
|
|
1552
|
+
localDocumentId: localDocumentId ?? existing.localDocumentId,
|
|
1553
|
+
cacheStatus: params.cacheStatus,
|
|
1554
|
+
updatedAt: now
|
|
1555
|
+
};
|
|
1556
|
+
await this.writeIndex({
|
|
1557
|
+
version: STORE_VERSION,
|
|
1558
|
+
documents: index.documents,
|
|
1559
|
+
asyncParseJobs: [
|
|
1560
|
+
stored,
|
|
1561
|
+
...(index.asyncParseJobs ?? []).filter((job) => job.jobId !== params.jobId)
|
|
1562
|
+
]
|
|
1563
|
+
});
|
|
1564
|
+
}
|
|
1565
|
+
async listDocuments() {
|
|
1566
|
+
const index = await this.readIndex();
|
|
1567
|
+
return index.documents.map(toLocalKnowledgeDocument);
|
|
1568
|
+
}
|
|
1569
|
+
async getDocument(localDocumentId) {
|
|
1570
|
+
validateLocalDocumentId(localDocumentId);
|
|
1571
|
+
const index = await this.readIndex();
|
|
1572
|
+
const stored = index.documents.find((document) => document.localDocumentId === localDocumentId);
|
|
1573
|
+
return stored ? toLocalKnowledgeDocument(stored) : void 0;
|
|
1574
|
+
}
|
|
1575
|
+
async loadResult(localDocumentId) {
|
|
1576
|
+
const document = await this.getDocument(localDocumentId);
|
|
1577
|
+
if (!document) {
|
|
1578
|
+
throw new Error(`Local Knowhere document not found: ${localDocumentId}`);
|
|
1579
|
+
}
|
|
1580
|
+
const cachedResult = this.resultCache.get(localDocumentId);
|
|
1581
|
+
if (cachedResult) {
|
|
1582
|
+
return { document, result: cachedResult };
|
|
1583
|
+
}
|
|
1584
|
+
const result = await this.loadStoredResult(document);
|
|
1585
|
+
result.namespace = document.namespace;
|
|
1586
|
+
result.documentId = document.documentId;
|
|
1587
|
+
this.resultCache.set(localDocumentId, result);
|
|
1588
|
+
return { document, result };
|
|
1589
|
+
}
|
|
1590
|
+
getResultDirectoryPath(localDocumentId) {
|
|
1591
|
+
const documentsDirectory = path.resolve(this.cacheDirectory, "documents");
|
|
1592
|
+
const resultDirectoryPath = path.resolve(documentsDirectory, localDocumentId);
|
|
1593
|
+
if (!isPathInsideDirectory(resultDirectoryPath, documentsDirectory)) {
|
|
1594
|
+
throw new Error(`Local Knowhere document ID resolves outside the cache: ${localDocumentId}`);
|
|
1595
|
+
}
|
|
1596
|
+
return resultDirectoryPath;
|
|
1597
|
+
}
|
|
1598
|
+
async loadStoredResult(document) {
|
|
1599
|
+
return parseResultDirectory(document.resultDirectoryPath);
|
|
1600
|
+
}
|
|
1601
|
+
async readIndex() {
|
|
1602
|
+
try {
|
|
1603
|
+
const raw = await fs3.readFile(this.indexPath, "utf8");
|
|
1604
|
+
const parsed = JSON.parse(raw);
|
|
1605
|
+
if (parsed.version !== STORE_VERSION || !Array.isArray(parsed.documents)) {
|
|
1606
|
+
return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
|
|
1607
|
+
}
|
|
1608
|
+
return {
|
|
1609
|
+
version: STORE_VERSION,
|
|
1610
|
+
documents: parsed.documents,
|
|
1611
|
+
asyncParseJobs: Array.isArray(parsed.asyncParseJobs) ? parsed.asyncParseJobs : []
|
|
1612
|
+
};
|
|
1613
|
+
} catch (error) {
|
|
1614
|
+
if (isMissingFileError2(error)) {
|
|
1615
|
+
return { version: STORE_VERSION, documents: [], asyncParseJobs: [] };
|
|
1616
|
+
}
|
|
1617
|
+
throw error;
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
async writeIndex(index) {
|
|
1621
|
+
await fs3.mkdir(this.cacheDirectory, { recursive: true });
|
|
1622
|
+
await fs3.writeFile(this.indexPath, JSON.stringify(index, null, 2));
|
|
1623
|
+
}
|
|
1624
|
+
};
|
|
1625
|
+
function validateLocalDocumentId(localDocumentId) {
|
|
1626
|
+
if (!LOCAL_DOCUMENT_ID_PATTERN.test(localDocumentId) || localDocumentId.includes("..") || path.basename(localDocumentId) !== localDocumentId) {
|
|
1627
|
+
throw new Error(
|
|
1628
|
+
"Local Knowhere document ID must be a safe slug containing only letters, numbers, dots, underscores, or hyphens"
|
|
1629
|
+
);
|
|
1630
|
+
}
|
|
1631
|
+
return localDocumentId;
|
|
1632
|
+
}
|
|
1633
|
+
function isPathInsideDirectory(targetPath, parentDirectory) {
|
|
1634
|
+
const relativePath = path.relative(parentDirectory, targetPath);
|
|
1635
|
+
return relativePath.length === 0 || !relativePath.startsWith("..") && !path.isAbsolute(relativePath);
|
|
1636
|
+
}
|
|
1637
|
+
function createLocalDocumentId(result) {
|
|
1638
|
+
const hash = createHash("sha256").update(result.jobId).update("\0").update(result.manifest.sourceFileName).digest("hex").slice(0, 16);
|
|
1639
|
+
return `local_${hash}`;
|
|
1640
|
+
}
|
|
1641
|
+
function countChunkTypes(result) {
|
|
1642
|
+
return result.chunks.reduce(
|
|
1643
|
+
(counts, chunk) => {
|
|
1644
|
+
counts[chunk.type] += 1;
|
|
1645
|
+
return counts;
|
|
1646
|
+
},
|
|
1647
|
+
{ text: 0, image: 0, table: 0 }
|
|
1648
|
+
);
|
|
1649
|
+
}
|
|
1650
|
+
function toLocalKnowledgeDocument(stored) {
|
|
1651
|
+
return {
|
|
1652
|
+
localDocumentId: stored.localDocumentId,
|
|
1653
|
+
jobId: stored.jobId,
|
|
1654
|
+
documentId: stored.documentId,
|
|
1655
|
+
namespace: stored.namespace,
|
|
1656
|
+
sourceFileName: stored.sourceFileName,
|
|
1657
|
+
chunkCount: stored.chunkCount,
|
|
1658
|
+
typeCounts: stored.typeCounts,
|
|
1659
|
+
resultDirectoryPath: stored.resultDirectoryPath,
|
|
1660
|
+
createdAt: new Date(stored.createdAt),
|
|
1661
|
+
updatedAt: new Date(stored.updatedAt)
|
|
1662
|
+
};
|
|
1663
|
+
}
|
|
1664
|
+
function toLocalKnowledgeAsyncParseJob(stored) {
|
|
1665
|
+
return {
|
|
1666
|
+
...stored,
|
|
1667
|
+
createdAt: new Date(stored.createdAt),
|
|
1668
|
+
updatedAt: new Date(stored.updatedAt)
|
|
1669
|
+
};
|
|
1670
|
+
}
|
|
1671
|
+
function isMissingFileError2(error) {
|
|
1672
|
+
return typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
// src/knowledge/knowledge.ts
|
|
1676
|
+
var DEFAULT_READ_LIMIT = 12;
|
|
1677
|
+
var MAX_READ_LIMIT = 40;
|
|
1678
|
+
var DEFAULT_GREP_LIMIT = 20;
|
|
1679
|
+
var MAX_GREP_LIMIT = 50;
|
|
1680
|
+
var DEFAULT_CONTEXT_CHARS = 80;
|
|
1681
|
+
var Knowledge = class _Knowledge {
|
|
1682
|
+
client;
|
|
1683
|
+
store;
|
|
1684
|
+
constructor(client, options) {
|
|
1685
|
+
this.client = client;
|
|
1686
|
+
this.store = new LocalKnowledgeStore(options?.cacheDirectory);
|
|
1687
|
+
}
|
|
1688
|
+
withCacheDirectory(cacheDirectory) {
|
|
1689
|
+
return new _Knowledge(this.client, { cacheDirectory });
|
|
1690
|
+
}
|
|
1691
|
+
async parse(params) {
|
|
1692
|
+
const result = await this.client.parse(params);
|
|
1693
|
+
const document = await this.store.saveResult(result, {
|
|
1694
|
+
localDocumentId: params.localDocumentId
|
|
1695
|
+
});
|
|
1696
|
+
return { document, result };
|
|
1697
|
+
}
|
|
1698
|
+
async startParse(params) {
|
|
1699
|
+
const job = await this.client.startParse(params);
|
|
1700
|
+
await this.store.saveAsyncParseJob({
|
|
1701
|
+
jobId: job.jobId,
|
|
1702
|
+
localDocumentId: params.localDocumentId
|
|
1703
|
+
});
|
|
1704
|
+
return {
|
|
1705
|
+
job,
|
|
1706
|
+
localDocumentId: params.localDocumentId
|
|
1707
|
+
};
|
|
1708
|
+
}
|
|
1709
|
+
async getJobStatus(jobId) {
|
|
1710
|
+
const job = await this.client.jobs.get(jobId);
|
|
1711
|
+
return {
|
|
1712
|
+
job,
|
|
1713
|
+
cache: await this.resolveAsyncCache(jobId, job.isDone, job.isFailed)
|
|
1714
|
+
};
|
|
1715
|
+
}
|
|
1716
|
+
async recoverPendingAsyncParseJobs() {
|
|
1717
|
+
const jobs = await this.store.listRecoverableAsyncParseJobs();
|
|
1718
|
+
const results = [];
|
|
1719
|
+
for (const job of jobs) {
|
|
1720
|
+
results.push(await this.getJobStatus(job.jobId));
|
|
1721
|
+
}
|
|
1722
|
+
return {
|
|
1723
|
+
checkedJobs: jobs.length,
|
|
1724
|
+
results
|
|
1725
|
+
};
|
|
1726
|
+
}
|
|
1727
|
+
async cacheJobResult(params) {
|
|
1728
|
+
const result = await this.client.jobs.load(params.jobId, {
|
|
1729
|
+
verifyChecksum: params.verifyChecksum
|
|
1730
|
+
});
|
|
1731
|
+
const document = await this.store.saveResult(result, {
|
|
1732
|
+
localDocumentId: params.localDocumentId
|
|
1733
|
+
});
|
|
1734
|
+
return { document, result };
|
|
1735
|
+
}
|
|
1736
|
+
async resolveAsyncCache(jobId, isDone, isFailed) {
|
|
1737
|
+
const trackedJob = await this.store.getAsyncParseJob(jobId);
|
|
1738
|
+
if (!trackedJob) {
|
|
1739
|
+
return { status: "untracked" };
|
|
1740
|
+
}
|
|
1741
|
+
if (trackedJob.cacheStatus === "cached" && trackedJob.localDocumentId) {
|
|
1742
|
+
const existingDocument = await this.store.getDocument(trackedJob.localDocumentId);
|
|
1743
|
+
if (existingDocument) {
|
|
1744
|
+
return {
|
|
1745
|
+
status: "already_cached",
|
|
1746
|
+
localDocumentId: trackedJob.localDocumentId,
|
|
1747
|
+
document: existingDocument
|
|
1748
|
+
};
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
if (isFailed) {
|
|
1752
|
+
await this.store.updateAsyncParseJobCacheStatus({
|
|
1753
|
+
jobId,
|
|
1754
|
+
cacheStatus: "failed"
|
|
1755
|
+
});
|
|
1756
|
+
return {
|
|
1757
|
+
status: "failed",
|
|
1758
|
+
localDocumentId: trackedJob.localDocumentId
|
|
1759
|
+
};
|
|
1760
|
+
}
|
|
1761
|
+
if (!isDone) {
|
|
1762
|
+
return {
|
|
1763
|
+
status: "pending",
|
|
1764
|
+
localDocumentId: trackedJob.localDocumentId
|
|
1765
|
+
};
|
|
1766
|
+
}
|
|
1767
|
+
try {
|
|
1768
|
+
const cached = await this.cacheJobResult({
|
|
1769
|
+
jobId,
|
|
1770
|
+
localDocumentId: trackedJob.localDocumentId
|
|
1771
|
+
});
|
|
1772
|
+
return {
|
|
1773
|
+
status: "cached",
|
|
1774
|
+
localDocumentId: cached.document.localDocumentId,
|
|
1775
|
+
document: cached.document
|
|
1776
|
+
};
|
|
1777
|
+
} catch (error) {
|
|
1778
|
+
await this.store.updateAsyncParseJobCacheStatus({
|
|
1779
|
+
jobId,
|
|
1780
|
+
cacheStatus: "not_available"
|
|
1781
|
+
});
|
|
1782
|
+
return {
|
|
1783
|
+
status: "not_available",
|
|
1784
|
+
localDocumentId: trackedJob.localDocumentId,
|
|
1785
|
+
error: error instanceof Error ? error.message : String(error)
|
|
1786
|
+
};
|
|
1787
|
+
}
|
|
1788
|
+
}
|
|
1789
|
+
async listDocuments() {
|
|
1790
|
+
return this.store.listDocuments();
|
|
1791
|
+
}
|
|
1792
|
+
async getDocumentOutline(localDocumentId) {
|
|
1793
|
+
const { document, result } = await this.store.loadResult(localDocumentId);
|
|
1794
|
+
const chunks = indexChunks(result);
|
|
1795
|
+
const sections = buildFlatSections(result, chunks);
|
|
1796
|
+
const sectionTree = result.docNav?.sections && result.docNav.sections.length > 0 ? result.docNav.sections.map(
|
|
1797
|
+
(section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
|
|
1798
|
+
) : nestSections(sections);
|
|
1799
|
+
return {
|
|
1800
|
+
document,
|
|
1801
|
+
totalChunks: chunks.length,
|
|
1802
|
+
typeCounts: document.typeCounts,
|
|
1803
|
+
sections,
|
|
1804
|
+
sectionTree
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1807
|
+
async readChunks(params) {
|
|
1808
|
+
const { document, result } = await this.store.loadResult(params.localDocumentId);
|
|
1809
|
+
const limit = clampLimit(params.limit, DEFAULT_READ_LIMIT, MAX_READ_LIMIT);
|
|
1810
|
+
const chunks = indexChunks(result).filter((chunk) => matchesReadScope(chunk, params));
|
|
1811
|
+
const selected = selectReadWindow(chunks, params, limit);
|
|
1812
|
+
const lastSelected = selected[selected.length - 1];
|
|
1813
|
+
const nextChunk = lastSelected && selected.length < chunks.length ? chunks[chunks.indexOf(lastSelected) + 1]?.position : void 0;
|
|
1814
|
+
return {
|
|
1815
|
+
document,
|
|
1816
|
+
chunks: selected.map(toReadChunk),
|
|
1817
|
+
nextChunk
|
|
1818
|
+
};
|
|
1819
|
+
}
|
|
1820
|
+
async grepChunks(params) {
|
|
1821
|
+
if (!params.pattern) {
|
|
1822
|
+
throw new ValidationError("pattern is required");
|
|
1823
|
+
}
|
|
1824
|
+
const { document, result } = await this.store.loadResult(params.localDocumentId);
|
|
1825
|
+
const maxResults = clampLimit(params.maxResults, DEFAULT_GREP_LIMIT, MAX_GREP_LIMIT);
|
|
1826
|
+
const contextChars = params.contextChars ?? DEFAULT_CONTEXT_CHARS;
|
|
1827
|
+
const matcher = createMatcher(params);
|
|
1828
|
+
const scopedChunks = indexChunks(result).filter((chunk) => matchesGrepScope(chunk, params));
|
|
1829
|
+
const matches = [];
|
|
1830
|
+
let scannedChunks = 0;
|
|
1831
|
+
for (const chunk of scopedChunks) {
|
|
1832
|
+
scannedChunks += 1;
|
|
1833
|
+
const chunkMatches = matcher(chunk.content);
|
|
1834
|
+
for (const match of chunkMatches) {
|
|
1835
|
+
matches.push({
|
|
1836
|
+
position: chunk.position,
|
|
1837
|
+
chunkId: chunk.chunkId,
|
|
1838
|
+
chunkType: chunk.chunkType,
|
|
1839
|
+
sectionPath: chunk.sectionPath,
|
|
1840
|
+
sourceChunkPath: chunk.sourceChunkPath,
|
|
1841
|
+
filePath: chunk.filePath,
|
|
1842
|
+
startOffset: match.startOffset,
|
|
1843
|
+
endOffset: match.endOffset,
|
|
1844
|
+
snippet: buildSnippet(chunk.content, match.startOffset, match.endOffset, contextChars)
|
|
1845
|
+
});
|
|
1846
|
+
if (matches.length >= maxResults) {
|
|
1847
|
+
return { document, matches, scannedChunks, truncated: true };
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
return { document, matches, scannedChunks, truncated: false };
|
|
1852
|
+
}
|
|
1853
|
+
async search(params) {
|
|
1854
|
+
const localDocuments = await this.resolveSearchDocuments(params.localDocumentIds);
|
|
1855
|
+
const rawResponse = await this.client.retrieval.query({
|
|
1856
|
+
query: params.query,
|
|
1857
|
+
namespace: params.namespace,
|
|
1858
|
+
topK: params.topK,
|
|
1859
|
+
useAgentic: params.useAgentic ?? false
|
|
1860
|
+
});
|
|
1861
|
+
const documentByServerId = new Map(
|
|
1862
|
+
localDocuments.filter((document) => document.documentId).map((document) => [document.documentId, document])
|
|
1863
|
+
);
|
|
1864
|
+
return {
|
|
1865
|
+
namespace: rawResponse.namespace,
|
|
1866
|
+
query: rawResponse.query,
|
|
1867
|
+
evidenceText: rawResponse.evidenceText,
|
|
1868
|
+
references: [
|
|
1869
|
+
...rawResponse.referencedChunks.map(
|
|
1870
|
+
(reference) => ({
|
|
1871
|
+
localDocumentId: reference.documentId ? documentByServerId.get(reference.documentId)?.localDocumentId : void 0,
|
|
1872
|
+
documentId: reference.documentId,
|
|
1873
|
+
chunkId: reference.chunkId,
|
|
1874
|
+
sectionPath: reference.sectionPath,
|
|
1875
|
+
chunkType: reference.chunkType
|
|
1876
|
+
})
|
|
1877
|
+
),
|
|
1878
|
+
...rawResponse.results.map((result) => toResultReference(result, documentByServerId))
|
|
1879
|
+
],
|
|
1880
|
+
results: rawResponse.results.map(
|
|
1881
|
+
(result) => toRemoteSearchResult(result, documentByServerId)
|
|
1882
|
+
),
|
|
1883
|
+
rawResponse
|
|
1884
|
+
};
|
|
1885
|
+
}
|
|
1886
|
+
async resolveSearchDocuments(localDocumentIds) {
|
|
1887
|
+
const documents = await this.store.listDocuments();
|
|
1888
|
+
if (!localDocumentIds || localDocumentIds.length === 0) {
|
|
1889
|
+
return documents;
|
|
1890
|
+
}
|
|
1891
|
+
const requested = new Set(localDocumentIds);
|
|
1892
|
+
return documents.filter((document) => requested.has(document.localDocumentId));
|
|
1893
|
+
}
|
|
1894
|
+
};
|
|
1895
|
+
function indexChunks(result) {
|
|
1896
|
+
return result.chunks.map((chunk, index) => {
|
|
1897
|
+
const filePath = getChunkFilePath2(chunk);
|
|
1898
|
+
return {
|
|
1899
|
+
source: chunk,
|
|
1900
|
+
position: index + 1,
|
|
1901
|
+
chunkId: chunk.chunkId,
|
|
1902
|
+
chunkType: chunk.type,
|
|
1903
|
+
content: chunk.content,
|
|
1904
|
+
sectionPath: normalizeSectionPath(chunk.path, result.manifest.sourceFileName),
|
|
1905
|
+
sourceChunkPath: chunk.path,
|
|
1906
|
+
filePath,
|
|
1907
|
+
metadata: chunk.metadata
|
|
1908
|
+
};
|
|
1909
|
+
});
|
|
1910
|
+
}
|
|
1911
|
+
function getChunkFilePath2(chunk) {
|
|
1912
|
+
if (chunk.type === "image" || chunk.type === "table") {
|
|
1913
|
+
return chunk.filePath;
|
|
1914
|
+
}
|
|
1915
|
+
const filePath = chunk.metadata.filePath;
|
|
1916
|
+
return typeof filePath === "string" ? filePath : void 0;
|
|
1917
|
+
}
|
|
1918
|
+
function normalizeSectionPath(path3, sourceFileName) {
|
|
1919
|
+
if (!path3) {
|
|
1920
|
+
return "";
|
|
1921
|
+
}
|
|
1922
|
+
if (path3.startsWith("images/") || path3.startsWith("tables/")) {
|
|
1923
|
+
return path3;
|
|
1924
|
+
}
|
|
1925
|
+
const parts = path3.split("/").filter(Boolean);
|
|
1926
|
+
if (sourceFileName) {
|
|
1927
|
+
const fileNameIndex = parts.indexOf(sourceFileName);
|
|
1928
|
+
if (fileNameIndex >= 0) {
|
|
1929
|
+
return parts.slice(fileNameIndex + 1).join(" / ") || sourceFileName;
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
if (parts.length <= 1) {
|
|
1933
|
+
return parts[0] ?? "";
|
|
1934
|
+
}
|
|
1935
|
+
return parts.slice(1).join(" / ");
|
|
1936
|
+
}
|
|
1937
|
+
function buildFlatSections(result, chunks) {
|
|
1938
|
+
if (result.docNav?.sections && result.docNav.sections.length > 0) {
|
|
1939
|
+
return flattenSections(
|
|
1940
|
+
result.docNav.sections.map(
|
|
1941
|
+
(section) => toKnowledgeSection(section, chunks, result.manifest.sourceFileName)
|
|
1942
|
+
)
|
|
1943
|
+
);
|
|
1944
|
+
}
|
|
1945
|
+
const byPath = /* @__PURE__ */ new Map();
|
|
1946
|
+
for (const chunk of chunks) {
|
|
1947
|
+
const path3 = chunk.sectionPath || chunk.sourceChunkPath;
|
|
1948
|
+
const existing = byPath.get(path3);
|
|
1949
|
+
if (existing) {
|
|
1950
|
+
addChunkToSection(existing, chunk);
|
|
1951
|
+
} else {
|
|
1952
|
+
byPath.set(path3, createSectionFromChunk(path3, chunk));
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
return [...byPath.values()].sort(compareSections);
|
|
1956
|
+
}
|
|
1957
|
+
function toKnowledgeSection(section, chunks, sourceFileName) {
|
|
1958
|
+
const sectionPath = normalizeSectionPath(section.path, sourceFileName);
|
|
1959
|
+
const scopedChunks = chunks.filter((chunk) => isInSection(chunk.sectionPath, sectionPath));
|
|
1960
|
+
const children = section.children.map(
|
|
1961
|
+
(child) => toKnowledgeSection(child, chunks, sourceFileName)
|
|
1962
|
+
);
|
|
1963
|
+
return {
|
|
1964
|
+
sectionPath,
|
|
1965
|
+
sectionTitle: section.title,
|
|
1966
|
+
sectionLevel: section.level,
|
|
1967
|
+
summary: section.summary,
|
|
1968
|
+
startChunk: minPosition(scopedChunks),
|
|
1969
|
+
endChunk: maxPosition(scopedChunks),
|
|
1970
|
+
chunkCount: scopedChunks.length,
|
|
1971
|
+
typeCounts: countIndexedTypes(scopedChunks),
|
|
1972
|
+
children
|
|
1973
|
+
};
|
|
1974
|
+
}
|
|
1975
|
+
function createSectionFromChunk(pathValue, chunk) {
|
|
1976
|
+
const parts = pathValue.split(" / ").filter(Boolean);
|
|
1977
|
+
return {
|
|
1978
|
+
sectionPath: pathValue,
|
|
1979
|
+
sectionTitle: parts[parts.length - 1] ?? pathValue,
|
|
1980
|
+
sectionLevel: Math.max(parts.length, 1),
|
|
1981
|
+
startChunk: chunk.position,
|
|
1982
|
+
endChunk: chunk.position,
|
|
1983
|
+
chunkCount: 1,
|
|
1984
|
+
typeCounts: { text: 0, image: 0, table: 0, [chunk.chunkType]: 1 },
|
|
1985
|
+
children: []
|
|
1986
|
+
};
|
|
1987
|
+
}
|
|
1988
|
+
function addChunkToSection(section, chunk) {
|
|
1989
|
+
section.startChunk = Math.min(section.startChunk ?? chunk.position, chunk.position);
|
|
1990
|
+
section.endChunk = Math.max(section.endChunk ?? chunk.position, chunk.position);
|
|
1991
|
+
section.chunkCount += 1;
|
|
1992
|
+
section.typeCounts[chunk.chunkType] += 1;
|
|
1993
|
+
}
|
|
1994
|
+
function flattenSections(sections) {
|
|
1995
|
+
return sections.flatMap((section) => [section, ...flattenSections(section.children)]);
|
|
1996
|
+
}
|
|
1997
|
+
function nestSections(sections) {
|
|
1998
|
+
const clonedSections = sections.map((section) => ({
|
|
1999
|
+
...section,
|
|
2000
|
+
children: []
|
|
2001
|
+
}));
|
|
2002
|
+
const byPath = new Map(clonedSections.map((section) => [section.sectionPath, section]));
|
|
2003
|
+
const roots = [];
|
|
2004
|
+
for (const section of clonedSections) {
|
|
2005
|
+
const parentPath = getParentSectionPath(section.sectionPath);
|
|
2006
|
+
const parent = parentPath ? byPath.get(parentPath) : void 0;
|
|
2007
|
+
if (parent) {
|
|
2008
|
+
parent.children.push(section);
|
|
2009
|
+
} else {
|
|
2010
|
+
roots.push(section);
|
|
2011
|
+
}
|
|
2012
|
+
}
|
|
2013
|
+
return roots;
|
|
2014
|
+
}
|
|
2015
|
+
function getParentSectionPath(sectionPath) {
|
|
2016
|
+
const parts = sectionPath.split(" / ").filter(Boolean);
|
|
2017
|
+
if (parts.length <= 1) {
|
|
2018
|
+
return void 0;
|
|
2019
|
+
}
|
|
2020
|
+
return parts.slice(0, -1).join(" / ");
|
|
2021
|
+
}
|
|
2022
|
+
function compareSections(left, right) {
|
|
2023
|
+
return (left.startChunk ?? Number.MAX_SAFE_INTEGER) - (right.startChunk ?? Number.MAX_SAFE_INTEGER);
|
|
2024
|
+
}
|
|
2025
|
+
function minPosition(chunks) {
|
|
2026
|
+
if (chunks.length === 0) {
|
|
2027
|
+
return void 0;
|
|
2028
|
+
}
|
|
2029
|
+
return Math.min(...chunks.map((chunk) => chunk.position));
|
|
2030
|
+
}
|
|
2031
|
+
function maxPosition(chunks) {
|
|
2032
|
+
if (chunks.length === 0) {
|
|
2033
|
+
return void 0;
|
|
2034
|
+
}
|
|
2035
|
+
return Math.max(...chunks.map((chunk) => chunk.position));
|
|
2036
|
+
}
|
|
2037
|
+
function countIndexedTypes(chunks) {
|
|
2038
|
+
return chunks.reduce(
|
|
2039
|
+
(counts, chunk) => {
|
|
2040
|
+
counts[chunk.chunkType] += 1;
|
|
2041
|
+
return counts;
|
|
2042
|
+
},
|
|
2043
|
+
{ text: 0, image: 0, table: 0 }
|
|
2044
|
+
);
|
|
2045
|
+
}
|
|
2046
|
+
function isInSection(chunkSectionPath, sectionPath) {
|
|
2047
|
+
return chunkSectionPath === sectionPath || chunkSectionPath.startsWith(`${sectionPath} / `);
|
|
2048
|
+
}
|
|
2049
|
+
function clampLimit(value, defaultValue, maxValue) {
|
|
2050
|
+
if (value === void 0) {
|
|
2051
|
+
return defaultValue;
|
|
2052
|
+
}
|
|
2053
|
+
return Math.min(Math.max(Math.floor(value), 1), maxValue);
|
|
2054
|
+
}
|
|
2055
|
+
function matchesReadScope(chunk, params) {
|
|
2056
|
+
if (params.chunkId && chunk.chunkId !== params.chunkId) {
|
|
2057
|
+
return false;
|
|
2058
|
+
}
|
|
2059
|
+
if (params.chunkType && chunk.chunkType !== params.chunkType) {
|
|
2060
|
+
return false;
|
|
2061
|
+
}
|
|
2062
|
+
if (params.sectionPath && !isInSection(chunk.sectionPath, params.sectionPath)) {
|
|
2063
|
+
return false;
|
|
2064
|
+
}
|
|
2065
|
+
return true;
|
|
2066
|
+
}
|
|
2067
|
+
function selectReadWindow(chunks, params, limit) {
|
|
2068
|
+
if (params.chunkId) {
|
|
2069
|
+
return chunks.slice(0, limit);
|
|
2070
|
+
}
|
|
2071
|
+
const startChunk = params.startChunk ?? chunks[0]?.position ?? 1;
|
|
2072
|
+
const endChunk = params.endChunk ?? Number.MAX_SAFE_INTEGER;
|
|
2073
|
+
return chunks.filter((chunk) => chunk.position >= startChunk && chunk.position <= endChunk).slice(0, limit);
|
|
2074
|
+
}
|
|
2075
|
+
function toReadChunk(chunk) {
|
|
2076
|
+
return {
|
|
2077
|
+
position: chunk.position,
|
|
2078
|
+
chunkId: chunk.chunkId,
|
|
2079
|
+
chunkType: chunk.chunkType,
|
|
2080
|
+
content: chunk.content,
|
|
2081
|
+
sectionPath: chunk.sectionPath,
|
|
2082
|
+
sourceChunkPath: chunk.sourceChunkPath,
|
|
2083
|
+
filePath: chunk.filePath,
|
|
2084
|
+
metadata: chunk.metadata
|
|
2085
|
+
};
|
|
2086
|
+
}
|
|
2087
|
+
function matchesGrepScope(chunk, params) {
|
|
2088
|
+
if (params.chunkType && chunk.chunkType !== params.chunkType) {
|
|
2089
|
+
return false;
|
|
2090
|
+
}
|
|
2091
|
+
if (params.sectionPathPrefix && !chunk.sectionPath.startsWith(params.sectionPathPrefix)) {
|
|
2092
|
+
return false;
|
|
2093
|
+
}
|
|
2094
|
+
return true;
|
|
2095
|
+
}
|
|
2096
|
+
function createMatcher(params) {
|
|
2097
|
+
if (params.isRegex) {
|
|
2098
|
+
const flags = params.isCaseSensitive ? "g" : "gi";
|
|
2099
|
+
const regex = new RegExp(params.pattern, flags);
|
|
2100
|
+
return (content) => {
|
|
2101
|
+
const matches = [];
|
|
2102
|
+
for (const match of content.matchAll(regex)) {
|
|
2103
|
+
const startOffset = match.index ?? 0;
|
|
2104
|
+
const text = match[0] ?? "";
|
|
2105
|
+
matches.push({ startOffset, endOffset: startOffset + text.length });
|
|
2106
|
+
if (text.length === 0) {
|
|
2107
|
+
break;
|
|
2108
|
+
}
|
|
2109
|
+
}
|
|
2110
|
+
return matches;
|
|
2111
|
+
};
|
|
2112
|
+
}
|
|
2113
|
+
const needle = params.isCaseSensitive ? params.pattern : params.pattern.toLowerCase();
|
|
2114
|
+
return (content) => {
|
|
2115
|
+
const haystack = params.isCaseSensitive ? content : content.toLowerCase();
|
|
2116
|
+
const matches = [];
|
|
2117
|
+
let index = haystack.indexOf(needle);
|
|
2118
|
+
while (index >= 0) {
|
|
2119
|
+
matches.push({ startOffset: index, endOffset: index + needle.length });
|
|
2120
|
+
index = haystack.indexOf(needle, index + Math.max(needle.length, 1));
|
|
2121
|
+
}
|
|
2122
|
+
return matches;
|
|
2123
|
+
};
|
|
2124
|
+
}
|
|
2125
|
+
function buildSnippet(content, startOffset, endOffset, contextChars) {
|
|
2126
|
+
const start = Math.max(0, startOffset - contextChars);
|
|
2127
|
+
const end = Math.min(content.length, endOffset + contextChars);
|
|
2128
|
+
return content.slice(start, end);
|
|
2129
|
+
}
|
|
2130
|
+
function toResultReference(result, documentByServerId) {
|
|
2131
|
+
const documentId = result.source.documentId ?? void 0;
|
|
2132
|
+
return {
|
|
2133
|
+
localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
|
|
2134
|
+
documentId,
|
|
2135
|
+
sectionPath: result.source.sectionPath ?? void 0,
|
|
2136
|
+
chunkType: result.chunkType,
|
|
2137
|
+
score: result.score
|
|
2138
|
+
};
|
|
2139
|
+
}
|
|
2140
|
+
function toRemoteSearchResult(result, documentByServerId) {
|
|
2141
|
+
const documentId = result.source.documentId ?? void 0;
|
|
2142
|
+
return {
|
|
2143
|
+
localDocumentId: documentId ? documentByServerId.get(documentId)?.localDocumentId : void 0,
|
|
2144
|
+
documentId,
|
|
2145
|
+
chunkType: result.chunkType,
|
|
2146
|
+
content: result.content,
|
|
2147
|
+
score: result.score,
|
|
2148
|
+
sectionPath: result.source.sectionPath ?? void 0,
|
|
2149
|
+
sourceFileName: result.source.sourceFileName ?? void 0
|
|
2150
|
+
};
|
|
2151
|
+
}
|
|
2152
|
+
|
|
1191
2153
|
// src/client.ts
|
|
1192
2154
|
function inferFileName(file, explicitFileName) {
|
|
1193
2155
|
if (explicitFileName) {
|
|
1194
2156
|
return explicitFileName;
|
|
1195
2157
|
}
|
|
1196
2158
|
if (typeof file === "string") {
|
|
1197
|
-
return
|
|
2159
|
+
return path2.basename(file);
|
|
1198
2160
|
}
|
|
1199
2161
|
if (isReadStream2(file) && typeof file.path === "string") {
|
|
1200
|
-
return
|
|
2162
|
+
return path2.basename(file.path);
|
|
1201
2163
|
}
|
|
1202
2164
|
return void 0;
|
|
1203
2165
|
}
|
|
1204
2166
|
function isReadStream2(file) {
|
|
1205
2167
|
return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
|
|
1206
2168
|
}
|
|
2169
|
+
function buildParsingParams(params) {
|
|
2170
|
+
const parsingParams = {
|
|
2171
|
+
model: params.model,
|
|
2172
|
+
ocrEnabled: params.ocr,
|
|
2173
|
+
docType: params.docType,
|
|
2174
|
+
smartTitleParse: params.smartTitleParse,
|
|
2175
|
+
summaryImage: params.summaryImage,
|
|
2176
|
+
summaryTable: params.summaryTable,
|
|
2177
|
+
summaryTxt: params.summaryTxt,
|
|
2178
|
+
addFragDesc: params.addFragDesc,
|
|
2179
|
+
kbDir: params.kbDir
|
|
2180
|
+
};
|
|
2181
|
+
Object.keys(parsingParams).forEach((key) => {
|
|
2182
|
+
if (parsingParams[key] === void 0) {
|
|
2183
|
+
delete parsingParams[key];
|
|
2184
|
+
}
|
|
2185
|
+
});
|
|
2186
|
+
return Object.keys(parsingParams).length > 0 ? parsingParams : void 0;
|
|
2187
|
+
}
|
|
1207
2188
|
var Knowhere = class {
|
|
1208
2189
|
/** Jobs resource for low-level API */
|
|
1209
2190
|
jobs;
|
|
@@ -1211,21 +2192,25 @@ var Knowhere = class {
|
|
|
1211
2192
|
retrieval;
|
|
1212
2193
|
/** Documents resource for canonical document lifecycle operations */
|
|
1213
2194
|
documents;
|
|
2195
|
+
/** Client-side local knowledge tools over parsed Knowhere results */
|
|
2196
|
+
knowledge;
|
|
1214
2197
|
httpClient;
|
|
1215
2198
|
/**
|
|
1216
2199
|
* Create a new Knowhere client
|
|
1217
2200
|
*/
|
|
1218
2201
|
constructor(options = {}) {
|
|
1219
2202
|
const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
|
|
1220
|
-
|
|
2203
|
+
const authTokenProvider = apiKey ? void 0 : options.authTokenProvider;
|
|
2204
|
+
if (!apiKey && !authTokenProvider) {
|
|
1221
2205
|
throw new ValidationError(
|
|
1222
|
-
`API
|
|
2206
|
+
`API authentication is required. Provide it via options.apiKey, options.authTokenProvider, or ${ENV.API_KEY} environment variable.`
|
|
1223
2207
|
);
|
|
1224
2208
|
}
|
|
1225
2209
|
const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
|
|
1226
2210
|
this.httpClient = new HttpClient({
|
|
1227
2211
|
baseURL,
|
|
1228
2212
|
apiKey,
|
|
2213
|
+
authTokenProvider,
|
|
1229
2214
|
timeout: options.timeout,
|
|
1230
2215
|
uploadTimeout: options.uploadTimeout,
|
|
1231
2216
|
maxRetries: options.maxRetries,
|
|
@@ -1236,6 +2221,7 @@ var Knowhere = class {
|
|
|
1236
2221
|
this.jobs = new Jobs(this.httpClient);
|
|
1237
2222
|
this.retrieval = new Retrieval(this.httpClient);
|
|
1238
2223
|
this.documents = new Documents(this.httpClient);
|
|
2224
|
+
this.knowledge = new Knowledge(this);
|
|
1239
2225
|
}
|
|
1240
2226
|
/**
|
|
1241
2227
|
* High-level API: Parse a document and return structured results
|
|
@@ -1258,6 +2244,24 @@ var Knowhere = class {
|
|
|
1258
2244
|
* ```
|
|
1259
2245
|
*/
|
|
1260
2246
|
async parse(params) {
|
|
2247
|
+
const job = await this.startParse(params);
|
|
2248
|
+
const jobResult = await this.jobs.wait(job.jobId, {
|
|
2249
|
+
pollInterval: params.pollInterval,
|
|
2250
|
+
pollTimeout: params.pollTimeout,
|
|
2251
|
+
onProgress: params.onPollProgress,
|
|
2252
|
+
signal: params.signal
|
|
2253
|
+
});
|
|
2254
|
+
const result = await this.jobs.load(jobResult, {
|
|
2255
|
+
verifyChecksum: params.verifyChecksum
|
|
2256
|
+
});
|
|
2257
|
+
return enrichParseResult(result, jobResult);
|
|
2258
|
+
}
|
|
2259
|
+
/**
|
|
2260
|
+
* Start a parse job and return immediately after the URL job is created or
|
|
2261
|
+
* the local file is uploaded. Use jobs.get()/jobs.wait() and jobs.load()
|
|
2262
|
+
* to inspect completion and load results later.
|
|
2263
|
+
*/
|
|
2264
|
+
async startParse(params) {
|
|
1261
2265
|
if (!params.url && !params.file) {
|
|
1262
2266
|
throw new ValidationError("Either url or file must be provided");
|
|
1263
2267
|
}
|
|
@@ -1271,22 +2275,6 @@ var Knowhere = class {
|
|
|
1271
2275
|
"fileName is required when file is a Buffer, Uint8Array, or stream without a path."
|
|
1272
2276
|
);
|
|
1273
2277
|
}
|
|
1274
|
-
const parsingParams = {
|
|
1275
|
-
model: params.model,
|
|
1276
|
-
ocrEnabled: params.ocr,
|
|
1277
|
-
docType: params.docType,
|
|
1278
|
-
smartTitleParse: params.smartTitleParse,
|
|
1279
|
-
summaryImage: params.summaryImage,
|
|
1280
|
-
summaryTable: params.summaryTable,
|
|
1281
|
-
summaryTxt: params.summaryTxt,
|
|
1282
|
-
addFragDesc: params.addFragDesc,
|
|
1283
|
-
kbDir: params.kbDir
|
|
1284
|
-
};
|
|
1285
|
-
Object.keys(parsingParams).forEach((key) => {
|
|
1286
|
-
if (parsingParams[key] === void 0) {
|
|
1287
|
-
delete parsingParams[key];
|
|
1288
|
-
}
|
|
1289
|
-
});
|
|
1290
2278
|
const webhook = params.webhook;
|
|
1291
2279
|
const job = await this.jobs.create({
|
|
1292
2280
|
sourceType,
|
|
@@ -1295,7 +2283,7 @@ var Knowhere = class {
|
|
|
1295
2283
|
dataId: params.dataId,
|
|
1296
2284
|
namespace: params.namespace,
|
|
1297
2285
|
documentId: params.documentId,
|
|
1298
|
-
parsingParams:
|
|
2286
|
+
parsingParams: buildParsingParams(params),
|
|
1299
2287
|
webhook
|
|
1300
2288
|
});
|
|
1301
2289
|
if (params.file) {
|
|
@@ -1305,16 +2293,7 @@ var Knowhere = class {
|
|
|
1305
2293
|
signal: params.signal
|
|
1306
2294
|
});
|
|
1307
2295
|
}
|
|
1308
|
-
|
|
1309
|
-
pollInterval: params.pollInterval,
|
|
1310
|
-
pollTimeout: params.pollTimeout,
|
|
1311
|
-
onProgress: params.onPollProgress,
|
|
1312
|
-
signal: params.signal
|
|
1313
|
-
});
|
|
1314
|
-
const result = await this.jobs.load(jobResult, {
|
|
1315
|
-
verifyChecksum: params.verifyChecksum
|
|
1316
|
-
});
|
|
1317
|
-
return enrichParseResult(result, jobResult);
|
|
2296
|
+
return job;
|
|
1318
2297
|
}
|
|
1319
2298
|
};
|
|
1320
2299
|
export {
|
|
@@ -1331,6 +2310,8 @@ export {
|
|
|
1331
2310
|
Jobs,
|
|
1332
2311
|
Knowhere,
|
|
1333
2312
|
KnowhereError,
|
|
2313
|
+
Knowledge,
|
|
2314
|
+
LocalKnowledgeStore,
|
|
1334
2315
|
NetworkError,
|
|
1335
2316
|
NotFoundError,
|
|
1336
2317
|
PaymentRequiredError,
|