@ontos-ai/knowhere-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,1187 @@
1
+ // src/client.ts
2
+ import path from "path";
3
+
4
+ // src/lib/http-client.ts
5
+ import axios from "axios";
6
+
7
+ // src/version.ts
8
+ var VERSION = "0.1.0";
9
+
10
+ // src/constants.ts
11
+ var DEFAULT_BASE_URL = "https://api.knowhereto.ai";
12
+ var DEFAULT_TIMEOUT = 6e4;
13
+ var DEFAULT_MAX_RETRIES = 5;
14
+ var DEFAULT_POLL_INTERVAL = 1e4;
15
+ var MAX_POLL_INTERVAL = 3e4;
16
+ var DEFAULT_POLL_TIMEOUT = 18e5;
17
+ var POLL_INTERVAL_INCREASE_THRESHOLD = 6e4;
18
+ var POLL_INTERVAL_MULTIPLIER = 1.2;
19
+ var INITIAL_RETRY_DELAY = 500;
20
+ var MAX_RETRY_DELAY = 3e4;
21
+ var RETRY_DELAY_BASE = 2;
22
+ var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([409, 429, 502, 503, 504]);
23
+ var TERMINAL_JOB_STATUSES = /* @__PURE__ */ new Set(["done", "failed"]);
24
+ var ENV = {
25
+ API_KEY: "KNOWHERE_API_KEY",
26
+ BASE_URL: "KNOWHERE_BASE_URL",
27
+ LOG_LEVEL: "KNOWHERE_LOG_LEVEL"
28
+ };
29
+
30
+ // src/errors/base.ts
31
+ var KnowhereError = class extends Error {
32
+ constructor(message) {
33
+ super(message);
34
+ this.name = "KnowhereError";
35
+ Object.setPrototypeOf(this, new.target.prototype);
36
+ }
37
+ };
38
+ var NetworkError = class extends KnowhereError {
39
+ constructor(message, cause) {
40
+ super(message);
41
+ this.cause = cause;
42
+ this.name = "NetworkError";
43
+ }
44
+ };
45
+ var TimeoutError = class extends NetworkError {
46
+ constructor(message = "Request timed out") {
47
+ super(message);
48
+ this.name = "TimeoutError";
49
+ }
50
+ };
51
+ var PollingTimeoutError = class extends KnowhereError {
52
+ constructor(message = "Polling timed out", elapsedMs) {
53
+ super(message);
54
+ this.elapsedMs = elapsedMs;
55
+ this.name = "PollingTimeoutError";
56
+ }
57
+ };
58
+ var ChecksumError = class extends KnowhereError {
59
+ constructor(message = "Checksum verification failed", expected, actual) {
60
+ super(message);
61
+ this.expected = expected;
62
+ this.actual = actual;
63
+ this.name = "ChecksumError";
64
+ }
65
+ };
66
+ var ValidationError = class extends KnowhereError {
67
+ constructor(message) {
68
+ super(message);
69
+ this.name = "ValidationError";
70
+ }
71
+ };
72
+ var InvalidStateError = class extends KnowhereError {
73
+ constructor(message) {
74
+ super(message);
75
+ this.name = "InvalidStateError";
76
+ }
77
+ };
78
+
79
+ // src/errors/api-errors.ts
80
+ var APIError = class extends KnowhereError {
81
+ constructor(message, statusCode, code, requestId, details, body) {
82
+ super(message);
83
+ this.statusCode = statusCode;
84
+ this.code = code;
85
+ this.requestId = requestId;
86
+ this.details = details;
87
+ this.body = body;
88
+ this.name = "APIError";
89
+ }
90
+ };
91
+ var BadRequestError = class extends APIError {
92
+ constructor(message, code, requestId, details, body) {
93
+ super(message, 400, code, requestId, details, body);
94
+ this.name = "BadRequestError";
95
+ }
96
+ };
97
+ var AuthenticationError = class extends APIError {
98
+ constructor(message = "Authentication failed", code, requestId, details, body) {
99
+ super(message, 401, code, requestId, details, body);
100
+ this.name = "AuthenticationError";
101
+ }
102
+ };
103
+ var PaymentRequiredError = class extends APIError {
104
+ constructor(message = "Payment required", code, requestId, details, body) {
105
+ super(message, 402, code, requestId, details, body);
106
+ this.name = "PaymentRequiredError";
107
+ }
108
+ };
109
+ var PermissionDeniedError = class extends APIError {
110
+ constructor(message = "Permission denied", code, requestId, details, body) {
111
+ super(message, 403, code, requestId, details, body);
112
+ this.name = "PermissionDeniedError";
113
+ }
114
+ };
115
+ var NotFoundError = class extends APIError {
116
+ constructor(message = "Resource not found", code, requestId, details, body) {
117
+ super(message, 404, code, requestId, details, body);
118
+ this.name = "NotFoundError";
119
+ }
120
+ };
121
+ var ConflictError = class extends APIError {
122
+ constructor(message = "Conflict", code, requestId, details, body) {
123
+ super(message, 409, code, requestId, details, body);
124
+ this.name = "ConflictError";
125
+ }
126
+ };
127
+ var RateLimitError = class extends APIError {
128
+ constructor(message = "Rate limit exceeded", code, requestId, details, body, retryAfter) {
129
+ super(message, 429, code, requestId, details, body);
130
+ this.retryAfter = retryAfter;
131
+ this.name = "RateLimitError";
132
+ }
133
+ };
134
+ var InternalServerError = class extends APIError {
135
+ constructor(message = "Internal server error", code, requestId, details, body) {
136
+ super(message, 500, code, requestId, details, body);
137
+ this.name = "InternalServerError";
138
+ }
139
+ };
140
+ var ServiceUnavailableError = class extends APIError {
141
+ constructor(message = "Service unavailable", statusCode = 503, code, requestId, details, body) {
142
+ super(message, statusCode, code, requestId, details, body);
143
+ this.name = "ServiceUnavailableError";
144
+ }
145
+ };
146
+ var GatewayTimeoutError = class extends APIError {
147
+ constructor(message = "Gateway timeout", code, requestId, details, body) {
148
+ super(message, 504, code, requestId, details, body);
149
+ this.name = "GatewayTimeoutError";
150
+ }
151
+ };
152
+ function createAPIError(statusCode, message, code, requestId, details, body, retryAfter) {
153
+ switch (statusCode) {
154
+ case 400:
155
+ return new BadRequestError(message, code, requestId, details, body);
156
+ case 401:
157
+ return new AuthenticationError(message, code, requestId, details, body);
158
+ case 402:
159
+ return new PaymentRequiredError(message, code, requestId, details, body);
160
+ case 403:
161
+ return new PermissionDeniedError(message, code, requestId, details, body);
162
+ case 404:
163
+ return new NotFoundError(message, code, requestId, details, body);
164
+ case 409:
165
+ return new ConflictError(message, code, requestId, details, body);
166
+ case 429:
167
+ return new RateLimitError(message, code, requestId, details, body, retryAfter);
168
+ case 500:
169
+ return new InternalServerError(message, code, requestId, details, body);
170
+ case 502:
171
+ case 503:
172
+ return new ServiceUnavailableError(message, statusCode, code, requestId, details, body);
173
+ case 504:
174
+ return new GatewayTimeoutError(message, code, requestId, details, body);
175
+ default:
176
+ return new APIError(message, statusCode, code, requestId, details, body);
177
+ }
178
+ }
179
+
180
+ // src/errors/job-errors.ts
181
+ var JobFailedError = class extends KnowhereError {
182
+ constructor(message, code, jobResult) {
183
+ super(message);
184
+ this.code = code;
185
+ this.jobResult = jobResult;
186
+ this.name = "JobFailedError";
187
+ }
188
+ };
189
+
190
+ // src/lib/utils.ts
191
+ function sleep(ms) {
192
+ return new Promise((resolve) => setTimeout(resolve, ms));
193
+ }
194
+ function snakeToCamel(str) {
195
+ return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
196
+ }
197
+ function camelToSnake(str) {
198
+ return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
199
+ }
200
+ function keysToCamel(obj) {
201
+ if (obj === null || obj === void 0) {
202
+ return obj;
203
+ }
204
+ if (Array.isArray(obj)) {
205
+ return obj.map((item) => keysToCamel(item));
206
+ }
207
+ if (typeof obj === "object" && obj.constructor === Object) {
208
+ const result = {};
209
+ for (const [key, value] of Object.entries(obj)) {
210
+ result[snakeToCamel(key)] = keysToCamel(value);
211
+ }
212
+ return result;
213
+ }
214
+ return obj;
215
+ }
216
+ function keysToSnake(obj) {
217
+ if (obj === null || obj === void 0) {
218
+ return obj;
219
+ }
220
+ if (Array.isArray(obj)) {
221
+ return obj.map((item) => keysToSnake(item));
222
+ }
223
+ if (typeof obj === "object" && obj.constructor === Object) {
224
+ const result = {};
225
+ for (const [key, value] of Object.entries(obj)) {
226
+ result[camelToSnake(key)] = keysToSnake(value);
227
+ }
228
+ return result;
229
+ }
230
+ return obj;
231
+ }
232
+ function parseDates(obj) {
233
+ if (obj === null || obj === void 0) {
234
+ return obj;
235
+ }
236
+ if (Array.isArray(obj)) {
237
+ return obj.map((item) => parseDates(item));
238
+ }
239
+ if (typeof obj === "object" && obj.constructor === Object) {
240
+ const result = {};
241
+ for (const [key, value] of Object.entries(obj)) {
242
+ if ((key.endsWith("At") || key.endsWith("Date")) && typeof value === "string" && /^\d{4}-\d{2}-\d{2}T/.test(value)) {
243
+ result[key] = new Date(value);
244
+ } else {
245
+ result[key] = parseDates(value);
246
+ }
247
+ }
248
+ return result;
249
+ }
250
+ return obj;
251
+ }
252
+ function isTerminalStatus(status) {
253
+ return TERMINAL_JOB_STATUSES.has(status);
254
+ }
255
+ function enrichJobResult(jobResult) {
256
+ const computedProperties = {};
257
+ if (!Object.getOwnPropertyDescriptor(jobResult, "isTerminal")) {
258
+ computedProperties.isTerminal = {
259
+ get() {
260
+ return isTerminalStatus(this.status);
261
+ },
262
+ enumerable: true,
263
+ configurable: true
264
+ };
265
+ }
266
+ if (!Object.getOwnPropertyDescriptor(jobResult, "isDone")) {
267
+ computedProperties.isDone = {
268
+ get() {
269
+ return this.status === "done";
270
+ },
271
+ enumerable: true,
272
+ configurable: true
273
+ };
274
+ }
275
+ if (!Object.getOwnPropertyDescriptor(jobResult, "isFailed")) {
276
+ computedProperties.isFailed = {
277
+ get() {
278
+ return this.status === "failed";
279
+ },
280
+ enumerable: true,
281
+ configurable: true
282
+ };
283
+ }
284
+ if (Object.keys(computedProperties).length > 0) {
285
+ Object.defineProperties(jobResult, computedProperties);
286
+ }
287
+ return jobResult;
288
+ }
289
+ function sanitizePath(path2) {
290
+ let sanitized = path2.replace(/^\/+/, "");
291
+ sanitized = sanitized.replace(/\.\.(\/|\\)/g, "");
292
+ sanitized = sanitized.replace(/\\/g, "/");
293
+ return sanitized;
294
+ }
295
+ function getFileExtension(filename) {
296
+ const match = filename.match(/\.([^.]+)$/);
297
+ return match ? match[1].toLowerCase() : "";
298
+ }
299
+ function jitter(value, percent = 0.2) {
300
+ const randomFactor = 1 + (Math.random() * 2 - 1) * percent;
301
+ return Math.round(value * randomFactor);
302
+ }
303
+
304
+ // src/lib/retry.ts
305
+ function getErrorCode(error) {
306
+ const errorWithResponse = error;
307
+ return errorWithResponse?.response?.data?.error?.code ?? errorWithResponse?.response?.data?.code ?? errorWithResponse?.code;
308
+ }
309
+ function getErrorDetails(error) {
310
+ const errorWithResponse = error;
311
+ return errorWithResponse?.response?.data?.error?.details ?? errorWithResponse?.response?.data?.details ?? errorWithResponse?.details;
312
+ }
313
+ function getBodyRetryAfter(error) {
314
+ const details = getErrorDetails(error);
315
+ if (!details) {
316
+ return void 0;
317
+ }
318
+ const rawRetryAfter = details.retry_after ?? details.retryAfter;
319
+ if (typeof rawRetryAfter === "number" && Number.isFinite(rawRetryAfter) && rawRetryAfter >= 0) {
320
+ return rawRetryAfter * 1e3;
321
+ }
322
+ if (typeof rawRetryAfter === "string") {
323
+ const parsed = Number.parseFloat(rawRetryAfter);
324
+ if (Number.isFinite(parsed) && parsed >= 0) {
325
+ return parsed * 1e3;
326
+ }
327
+ }
328
+ return void 0;
329
+ }
330
+ function shouldRetry(error, attempt, maxRetries) {
331
+ if (attempt >= maxRetries) {
332
+ return false;
333
+ }
334
+ if (isRetryableError(error)) {
335
+ return true;
336
+ }
337
+ return false;
338
+ }
339
+ function isRetryableError(error) {
340
+ if (error instanceof Error && (error.message.includes("ECONNRESET") || error.message.includes("ETIMEDOUT") || error.message.includes("ENOTFOUND") || error.message.includes("ECONNREFUSED"))) {
341
+ return true;
342
+ }
343
+ const errorWithResponse = error;
344
+ const statusCode = errorWithResponse?.response?.status ?? errorWithResponse?.statusCode;
345
+ if (statusCode && typeof statusCode === "number" && RETRYABLE_STATUS_CODES.has(statusCode)) {
346
+ const code = getErrorCode(error);
347
+ if (statusCode === 429) {
348
+ const retryAfter = getRetryAfter(error);
349
+ return retryAfter !== void 0;
350
+ }
351
+ if (statusCode === 409) {
352
+ return code === "ABORTED";
353
+ }
354
+ return true;
355
+ }
356
+ return false;
357
+ }
358
+ function calculateRetryDelay(attempt) {
359
+ const baseDelay = INITIAL_RETRY_DELAY * Math.pow(RETRY_DELAY_BASE, attempt);
360
+ const delayWithJitter = jitter(baseDelay);
361
+ return Math.min(delayWithJitter, MAX_RETRY_DELAY);
362
+ }
363
+ function getRetryAfter(error) {
364
+ const errorWithResponse = error;
365
+ if (typeof errorWithResponse?.retryAfter === "number" && Number.isFinite(errorWithResponse.retryAfter) && errorWithResponse.retryAfter >= 0) {
366
+ return errorWithResponse.retryAfter * 1e3;
367
+ }
368
+ const bodyRetryAfter = getBodyRetryAfter(error);
369
+ if (bodyRetryAfter !== void 0) {
370
+ return bodyRetryAfter;
371
+ }
372
+ const retryAfter = errorWithResponse?.response?.headers?.["retry-after"];
373
+ if (!retryAfter || typeof retryAfter !== "string") {
374
+ return void 0;
375
+ }
376
+ const seconds = Number.parseFloat(retryAfter);
377
+ if (!Number.isNaN(seconds)) {
378
+ return seconds * 1e3;
379
+ }
380
+ const date = new Date(retryAfter);
381
+ if (!isNaN(date.getTime())) {
382
+ return Math.max(0, date.getTime() - Date.now());
383
+ }
384
+ return void 0;
385
+ }
386
+ async function withRetry(fn, maxRetries, onRetry) {
387
+ let lastError;
388
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
389
+ try {
390
+ return await fn();
391
+ } catch (error) {
392
+ lastError = error;
393
+ if (!shouldRetry(error, attempt, maxRetries)) {
394
+ throw error;
395
+ }
396
+ let delay = calculateRetryDelay(attempt);
397
+ const retryAfter = getRetryAfter(error);
398
+ if (retryAfter !== void 0) {
399
+ delay = retryAfter;
400
+ }
401
+ if (onRetry) {
402
+ onRetry(attempt + 1, error);
403
+ }
404
+ await new Promise((resolve) => setTimeout(resolve, delay));
405
+ }
406
+ }
407
+ throw lastError;
408
+ }
409
+
410
+ // src/lib/http-client.ts
411
+ var HttpClient = class {
412
+ axios;
413
+ maxRetries;
414
+ uploadTimeout;
415
+ httpAgent;
416
+ httpsAgent;
417
+ constructor(options) {
418
+ this.maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
419
+ this.uploadTimeout = options.uploadTimeout ?? 6e5;
420
+ this.httpAgent = options.httpAgent;
421
+ this.httpsAgent = options.httpsAgent;
422
+ this.axios = axios.create({
423
+ baseURL: options.baseURL,
424
+ timeout: options.timeout ?? DEFAULT_TIMEOUT,
425
+ headers: {
426
+ "User-Agent": `knowhere-node-sdk/${VERSION}`,
427
+ Authorization: `Bearer ${options.apiKey}`,
428
+ "Content-Type": "application/json",
429
+ ...options.defaultHeaders
430
+ },
431
+ httpAgent: options.httpAgent,
432
+ httpsAgent: options.httpsAgent
433
+ });
434
+ this.setupInterceptors();
435
+ }
436
+ setupInterceptors() {
437
+ this.axios.interceptors.request.use(
438
+ (config) => {
439
+ if (config.data && typeof config.data === "object") {
440
+ config.data = keysToSnake(config.data);
441
+ }
442
+ return config;
443
+ },
444
+ (error) => {
445
+ return Promise.reject(this.handleError(error));
446
+ }
447
+ );
448
+ this.axios.interceptors.response.use(
449
+ (response) => {
450
+ if (response.data && typeof response.data === "object") {
451
+ let data = keysToCamel(response.data);
452
+ data = parseDates(data);
453
+ response.data = data;
454
+ }
455
+ return response;
456
+ },
457
+ (error) => {
458
+ return Promise.reject(this.handleError(error));
459
+ }
460
+ );
461
+ }
462
+ handleError(error) {
463
+ if (!error.response) {
464
+ if (error.code === "ECONNABORTED" || error.message.includes("timeout")) {
465
+ return new TimeoutError("Request timed out");
466
+ }
467
+ return new NetworkError(error.message || "Network error", error);
468
+ }
469
+ const { status, data, headers } = error.response;
470
+ const errorData = this.normalizeErrorData(data);
471
+ const errorObject = this.getErrorObject(errorData);
472
+ const message = this.getErrorMessage(errorObject, status);
473
+ const code = this.getErrorCode(errorObject);
474
+ const requestId = headers["x-request-id"] ?? (typeof errorObject?.request_id === "string" ? errorObject.request_id : void 0);
475
+ const details = errorObject?.details && typeof errorObject.details === "object" && errorObject.details.constructor === Object ? errorObject.details : void 0;
476
+ let retryAfter;
477
+ if (status === 429) {
478
+ const retryAfterMs = getRetryAfter(error);
479
+ retryAfter = retryAfterMs !== void 0 ? Math.ceil(retryAfterMs / 1e3) : void 0;
480
+ }
481
+ return createAPIError(status, message, code, requestId, details, data, retryAfter);
482
+ }
483
+ getErrorObject(errorData) {
484
+ if (!errorData) {
485
+ return void 0;
486
+ }
487
+ const nestedError = errorData.error;
488
+ if (nestedError && typeof nestedError === "object" && nestedError.constructor === Object) {
489
+ return nestedError;
490
+ }
491
+ return errorData;
492
+ }
493
+ normalizeErrorData(data) {
494
+ if (data && typeof data === "object" && data.constructor === Object) {
495
+ return data;
496
+ }
497
+ const decoded = this.decodeErrorPayload(data);
498
+ if (!decoded) {
499
+ return void 0;
500
+ }
501
+ try {
502
+ const parsed = JSON.parse(decoded);
503
+ if (parsed && typeof parsed === "object" && parsed.constructor === Object) {
504
+ return parsed;
505
+ }
506
+ } catch {
507
+ }
508
+ const xmlCode = decoded.match(/<Code>([^<]+)<\/Code>/i)?.[1];
509
+ const xmlMessage = decoded.match(/<Message>([^<]+)<\/Message>/i)?.[1];
510
+ if (xmlCode || xmlMessage) {
511
+ return {
512
+ code: xmlCode,
513
+ message: [xmlCode, xmlMessage].filter(Boolean).join(": ")
514
+ };
515
+ }
516
+ return {
517
+ message: decoded.slice(0, 300)
518
+ };
519
+ }
520
+ decodeErrorPayload(data) {
521
+ if (typeof data === "string") {
522
+ return data.trim();
523
+ }
524
+ if (data instanceof ArrayBuffer) {
525
+ return Buffer.from(data).toString("utf8").trim();
526
+ }
527
+ if (ArrayBuffer.isView(data)) {
528
+ return Buffer.from(data.buffer, data.byteOffset, data.byteLength).toString("utf8").trim();
529
+ }
530
+ if (Buffer.isBuffer(data)) {
531
+ return data.toString("utf8").trim();
532
+ }
533
+ return void 0;
534
+ }
535
+ getErrorMessage(errorData, status) {
536
+ if (!errorData) {
537
+ return `HTTP ${status} error`;
538
+ }
539
+ return typeof errorData.message === "string" ? errorData.message : typeof errorData.error === "string" ? errorData.error : `HTTP ${status} error`;
540
+ }
541
+ getErrorCode(errorData) {
542
+ if (!errorData) {
543
+ return void 0;
544
+ }
545
+ return typeof errorData.code === "string" ? errorData.code : void 0;
546
+ }
547
+ async requestExternal(request) {
548
+ try {
549
+ return await request();
550
+ } catch (error) {
551
+ if (axios.isAxiosError(error)) {
552
+ throw this.handleError(error);
553
+ }
554
+ throw error;
555
+ }
556
+ }
557
+ /**
558
+ * GET request
559
+ */
560
+ async get(url, config) {
561
+ return withRetry(
562
+ async () => {
563
+ const response = await this.axios.get(url, config);
564
+ return response.data;
565
+ },
566
+ this.maxRetries,
567
+ (attempt, error) => {
568
+ console.warn(`Retry attempt ${attempt} for GET ${url}:`, error);
569
+ }
570
+ );
571
+ }
572
+ /**
573
+ * POST request
574
+ */
575
+ async post(url, data, config) {
576
+ return withRetry(
577
+ async () => {
578
+ const response = await this.axios.post(url, data, config);
579
+ return response.data;
580
+ },
581
+ this.maxRetries,
582
+ (attempt, error) => {
583
+ console.warn(`Retry attempt ${attempt} for POST ${url}:`, error);
584
+ }
585
+ );
586
+ }
587
+ /**
588
+ * PUT request (typically for uploads, no retry)
589
+ */
590
+ async put(url, data, config) {
591
+ const response = await this.axios.put(url, data, {
592
+ ...config,
593
+ timeout: this.uploadTimeout
594
+ });
595
+ return response.data;
596
+ }
597
+ /**
598
+ * Download file as buffer
599
+ */
600
+ async download(url, config) {
601
+ return withRetry(
602
+ async () => {
603
+ return this.requestExternal(async () => {
604
+ const response = await axios.get(url, {
605
+ ...config,
606
+ responseType: "arraybuffer",
607
+ timeout: config?.timeout ?? this.uploadTimeout,
608
+ httpAgent: this.httpAgent,
609
+ httpsAgent: this.httpsAgent
610
+ });
611
+ return Buffer.from(response.data);
612
+ });
613
+ },
614
+ this.maxRetries,
615
+ (attempt, error) => {
616
+ console.warn(`Retry attempt ${attempt} for download ${url}:`, error);
617
+ }
618
+ );
619
+ }
620
+ /**
621
+ * Upload file with progress tracking
622
+ */
623
+ async upload(url, data, options) {
624
+ await this.requestExternal(async () => {
625
+ await axios.put(url, data, {
626
+ headers: {
627
+ ...options?.headers
628
+ },
629
+ timeout: this.uploadTimeout,
630
+ signal: options?.signal,
631
+ httpAgent: this.httpAgent,
632
+ httpsAgent: this.httpsAgent,
633
+ onUploadProgress: (progressEvent) => {
634
+ if (options?.onProgress) {
635
+ const loaded = progressEvent.loaded;
636
+ const total = progressEvent.total;
637
+ const percent = total ? Math.round(loaded / total * 100) : 0;
638
+ options.onProgress({ loaded, total, percent });
639
+ }
640
+ }
641
+ });
642
+ });
643
+ }
644
+ /**
645
+ * Get the underlying axios instance
646
+ */
647
+ getAxiosInstance() {
648
+ return this.axios;
649
+ }
650
+ };
651
+
652
+ // src/resources/base.ts
653
+ var BaseResource = class {
654
+ httpClient;
655
+ constructor(httpClient) {
656
+ this.httpClient = httpClient;
657
+ }
658
+ };
659
+
660
+ // src/lib/upload.ts
661
+ import { createReadStream, promises as fs } from "fs";
662
+ async function uploadFile(httpClient, uploadUrl, file, options) {
663
+ let data;
664
+ let contentLength;
665
+ if (typeof file === "string") {
666
+ const stats = await fs.stat(file);
667
+ contentLength = stats.size;
668
+ data = createReadStream(file);
669
+ } else if (file instanceof Buffer) {
670
+ contentLength = file.length;
671
+ data = file;
672
+ } else if (isReadStream(file)) {
673
+ data = file;
674
+ const streamWithBytes = file;
675
+ contentLength = streamWithBytes.bytesRead;
676
+ } else if (file instanceof Uint8Array) {
677
+ contentLength = file.length;
678
+ data = Buffer.from(file);
679
+ } else {
680
+ throw new ValidationError("Unsupported file type");
681
+ }
682
+ await httpClient.upload(uploadUrl, data, {
683
+ headers: {
684
+ "Content-Type": "application/octet-stream",
685
+ ...contentLength ? { "Content-Length": contentLength.toString() } : {},
686
+ ...options?.headers
687
+ },
688
+ onProgress: options?.onProgress,
689
+ signal: options?.signal
690
+ });
691
+ }
692
+ function isReadStream(obj) {
693
+ return typeof obj === "object" && obj !== null && "pipe" in obj && "read" in obj && typeof obj.pipe === "function";
694
+ }
695
+
696
+ // src/lib/polling.ts
697
+ async function pollJobStatus(httpClient, jobId, options) {
698
+ const pollInterval = options?.pollInterval ?? DEFAULT_POLL_INTERVAL;
699
+ const pollTimeout = options?.pollTimeout ?? DEFAULT_POLL_TIMEOUT;
700
+ const onProgress = options?.onProgress;
701
+ const signal = options?.signal;
702
+ const startTime = Date.now();
703
+ let currentInterval = pollInterval;
704
+ while (true) {
705
+ if (signal?.aborted) {
706
+ throw new Error("Polling aborted");
707
+ }
708
+ const jobResult = await httpClient.get(`/v1/jobs/${jobId}`);
709
+ enrichJobResult(jobResult);
710
+ const elapsed = Date.now() - startTime;
711
+ const elapsedSeconds = Math.floor(elapsed / 1e3);
712
+ if (onProgress) {
713
+ const progress = {
714
+ status: jobResult.status,
715
+ elapsedSeconds,
716
+ jobResult
717
+ };
718
+ onProgress(progress);
719
+ }
720
+ if (jobResult.isTerminal) {
721
+ if (jobResult.isDone) {
722
+ return jobResult;
723
+ }
724
+ if (jobResult.isFailed && jobResult.error) {
725
+ throw new JobFailedError(jobResult.error.message, jobResult.error.code, jobResult);
726
+ }
727
+ throw new JobFailedError(
728
+ `Job ${jobId} failed with status ${jobResult.status}`,
729
+ "UNKNOWN_ERROR",
730
+ jobResult
731
+ );
732
+ }
733
+ if (elapsed >= pollTimeout) {
734
+ throw new PollingTimeoutError(`Polling timeout after ${elapsedSeconds} seconds`, elapsed);
735
+ }
736
+ if (elapsed > POLL_INTERVAL_INCREASE_THRESHOLD) {
737
+ currentInterval = Math.min(currentInterval * POLL_INTERVAL_MULTIPLIER, MAX_POLL_INTERVAL);
738
+ }
739
+ await sleep(currentInterval);
740
+ }
741
+ }
742
+
743
+ // src/lib/result-parser.ts
744
+ import JSZip from "jszip";
745
+ import { promises as fs2 } from "fs";
746
+ import { join, dirname } from "path";
747
+ async function parseResult(httpClient, resultUrl, options) {
748
+ const zipBuffer = await httpClient.download(resultUrl);
749
+ if (options?.verifyChecksum !== false) {
750
+ }
751
+ const zip = await JSZip.loadAsync(zipBuffer);
752
+ const manifestFile = zip.file("manifest.json");
753
+ if (!manifestFile) {
754
+ throw new KnowhereError("manifest.json not found in ZIP");
755
+ }
756
+ const manifestContent = await manifestFile.async("string");
757
+ let manifest = JSON.parse(manifestContent);
758
+ manifest = keysToCamel(manifest);
759
+ manifest = parseDates(manifest);
760
+ const chunksFile = zip.file("chunks.json");
761
+ if (!chunksFile) {
762
+ throw new KnowhereError("chunks.json not found in ZIP");
763
+ }
764
+ const chunksContent = await chunksFile.async("string");
765
+ let chunksData = JSON.parse(chunksContent);
766
+ chunksData = keysToCamel(chunksData);
767
+ const rawChunks = extractChunks(chunksData);
768
+ const chunks = [];
769
+ for (const chunkData of rawChunks) {
770
+ const chunk = await processChunk(zip, chunkData);
771
+ chunks.push(chunk);
772
+ }
773
+ let fullMarkdown;
774
+ const fullMdFile = zip.file("full.md");
775
+ if (fullMdFile) {
776
+ fullMarkdown = await fullMdFile.async("string");
777
+ }
778
+ let hierarchy;
779
+ const hierarchyFile = zip.file("hierarchy.json");
780
+ if (hierarchyFile) {
781
+ const hierarchyContent = await hierarchyFile.async("string");
782
+ hierarchy = JSON.parse(hierarchyContent);
783
+ }
784
+ const result = {
785
+ manifest,
786
+ chunks,
787
+ fullMarkdown,
788
+ hierarchy,
789
+ rawZip: zipBuffer,
790
+ get textChunks() {
791
+ return chunks.filter((c) => c.type === "text");
792
+ },
793
+ get imageChunks() {
794
+ return chunks.filter((c) => c.type === "image");
795
+ },
796
+ get tableChunks() {
797
+ return chunks.filter((c) => c.type === "table");
798
+ },
799
+ get jobId() {
800
+ return manifest.jobId;
801
+ },
802
+ get statistics() {
803
+ return manifest.statistics;
804
+ },
805
+ getChunk(chunkId) {
806
+ return chunks.find((c) => c.chunkId === chunkId);
807
+ },
808
+ async save(directory) {
809
+ await fs2.mkdir(directory, { recursive: true });
810
+ await fs2.writeFile(join(directory, "manifest.json"), JSON.stringify(manifest, null, 2));
811
+ await fs2.writeFile(join(directory, "chunks.json"), JSON.stringify(chunks, null, 2));
812
+ if (fullMarkdown) {
813
+ await fs2.writeFile(join(directory, "full.md"), fullMarkdown);
814
+ }
815
+ if (hierarchy) {
816
+ await fs2.writeFile(join(directory, "hierarchy.json"), JSON.stringify(hierarchy, null, 2));
817
+ }
818
+ for (const imageChunk of this.imageChunks) {
819
+ await imageChunk.save(directory);
820
+ }
821
+ for (const tableChunk of this.tableChunks) {
822
+ await tableChunk.save(directory);
823
+ }
824
+ await fs2.writeFile(join(directory, "result.zip"), zipBuffer);
825
+ return directory;
826
+ }
827
+ };
828
+ return result;
829
+ }
830
+ function extractChunks(payload) {
831
+ if (Array.isArray(payload)) {
832
+ return payload;
833
+ }
834
+ if (Array.isArray(payload.chunks)) {
835
+ return payload.chunks;
836
+ }
837
+ return [];
838
+ }
839
+ function getChunkMetadata(chunkData) {
840
+ if (!chunkData.metadata) {
841
+ return {};
842
+ }
843
+ return chunkData.metadata;
844
+ }
845
+ function getChunkFilePath(chunkData) {
846
+ const metadata = getChunkMetadata(chunkData);
847
+ return chunkData.filePath ?? metadata.filePath ?? chunkData.path;
848
+ }
849
+ function normalizeTextChunk(chunkData) {
850
+ const metadata = getChunkMetadata(chunkData);
851
+ return {
852
+ chunkId: chunkData.chunkId ?? "",
853
+ type: "text",
854
+ content: chunkData.content ?? "",
855
+ path: chunkData.path ?? "",
856
+ length: metadata.length ?? chunkData.length ?? 0,
857
+ tokens: metadata.tokens ?? chunkData.tokens,
858
+ keywords: metadata.keywords ?? chunkData.keywords,
859
+ summary: metadata.summary ?? chunkData.summary,
860
+ relationships: metadata.relationships ?? chunkData.relationships
861
+ };
862
+ }
863
+ async function processChunk(zip, chunkData) {
864
+ if (chunkData.type === "text") {
865
+ return normalizeTextChunk(chunkData);
866
+ }
867
+ if (chunkData.type === "image") {
868
+ const metadata = getChunkMetadata(chunkData);
869
+ const filePath = getChunkFilePath(chunkData);
870
+ if (!filePath) {
871
+ throw new KnowhereError(`Image chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
872
+ }
873
+ const sanitized = sanitizePath(filePath);
874
+ const imageFile = zip.file(sanitized);
875
+ if (!imageFile) {
876
+ throw new KnowhereError(`Image file not found: ${filePath}`);
877
+ }
878
+ const imageBuffer = await imageFile.async("nodebuffer");
879
+ const enrichedChunk = {
880
+ chunkId: chunkData.chunkId ?? "",
881
+ type: "image",
882
+ content: chunkData.content ?? "",
883
+ path: chunkData.path ?? "",
884
+ length: metadata.length ?? chunkData.length ?? 0,
885
+ filePath,
886
+ summary: metadata.summary ?? chunkData.summary,
887
+ data: imageBuffer,
888
+ get format() {
889
+ return getFileExtension(this.filePath);
890
+ },
891
+ async save(directory) {
892
+ const outputPath = join(directory, sanitizePath(this.filePath));
893
+ const outputDir = dirname(outputPath);
894
+ await fs2.mkdir(outputDir, { recursive: true });
895
+ await fs2.writeFile(outputPath, this.data);
896
+ return outputPath;
897
+ }
898
+ };
899
+ return enrichedChunk;
900
+ }
901
+ if (chunkData.type === "table") {
902
+ const metadata = getChunkMetadata(chunkData);
903
+ const filePath = getChunkFilePath(chunkData);
904
+ if (!filePath) {
905
+ throw new KnowhereError(`Table chunk missing file path: ${chunkData.chunkId ?? "unknown"}`);
906
+ }
907
+ const sanitized = sanitizePath(filePath);
908
+ const htmlFile = zip.file(sanitized);
909
+ if (!htmlFile) {
910
+ throw new KnowhereError(`Table file not found: ${filePath}`);
911
+ }
912
+ const html = await htmlFile.async("string");
913
+ const enrichedChunk = {
914
+ chunkId: chunkData.chunkId ?? "",
915
+ type: "table",
916
+ content: chunkData.content ?? "",
917
+ path: chunkData.path ?? "",
918
+ length: metadata.length ?? chunkData.length ?? 0,
919
+ filePath,
920
+ tableType: metadata.tableType ?? chunkData.tableType,
921
+ summary: metadata.summary ?? chunkData.summary,
922
+ html,
923
+ async save(directory) {
924
+ const outputPath = join(directory, sanitizePath(this.filePath));
925
+ const outputDir = dirname(outputPath);
926
+ await fs2.mkdir(outputDir, { recursive: true });
927
+ await fs2.writeFile(outputPath, this.html);
928
+ return outputPath;
929
+ }
930
+ };
931
+ return enrichedChunk;
932
+ }
933
+ return normalizeTextChunk(chunkData);
934
+ }
935
+
936
+ // src/resources/jobs.ts
937
+ var Jobs = class extends BaseResource {
938
+ pendingUploadJobs = /* @__PURE__ */ new Map();
939
+ /**
940
+ * Create a new parsing job
941
+ */
942
+ async create(params) {
943
+ const job = await this.httpClient.post("/v1/jobs", params);
944
+ if (job.uploadUrl) {
945
+ this.pendingUploadJobs.set(job.jobId, job);
946
+ }
947
+ return job;
948
+ }
949
+ /**
950
+ * Get job status
951
+ */
952
+ async get(jobId) {
953
+ const jobResult = await this.httpClient.get(`/v1/jobs/${jobId}`);
954
+ enrichJobResult(jobResult);
955
+ return jobResult;
956
+ }
957
+ /**
958
+ * Upload file for a job
959
+ */
960
+ async upload(jobOrId, params) {
961
+ const response = this.resolveUploadJob(jobOrId);
962
+ if (!response.uploadUrl) {
963
+ throw new NotFoundError(
964
+ "Upload URL not available for this job. Pass the Job object returned from create() or a direct upload URL string."
965
+ );
966
+ }
967
+ await uploadFile(this.httpClient, response.uploadUrl, params.file, {
968
+ headers: response.uploadHeaders,
969
+ onProgress: params.onProgress,
970
+ signal: params.signal
971
+ });
972
+ this.pendingUploadJobs.delete(response.jobId);
973
+ }
974
+ /**
975
+ * Wait for job completion
976
+ */
977
+ async wait(jobId, options) {
978
+ return pollJobStatus(this.httpClient, jobId, options);
979
+ }
980
+ /**
981
+ * Load parse result from completed job
982
+ */
983
+ async load(jobResultOrIdOrUrl, options) {
984
+ const jobResult = await this.resolveLoadJobResult(jobResultOrIdOrUrl);
985
+ if (!jobResult.isDone) {
986
+ throw new Error(`Job ${jobResult.jobId} is not done yet (status: ${jobResult.status})`);
987
+ }
988
+ if (!jobResult.resultUrl) {
989
+ throw new NotFoundError("Result URL not available");
990
+ }
991
+ return parseResult(this.httpClient, jobResult.resultUrl, options);
992
+ }
993
+ isHttpUrl(value) {
994
+ return /^https?:\/\//i.test(value);
995
+ }
996
+ resolveUploadJob(jobOrId) {
997
+ if (typeof jobOrId !== "string") {
998
+ if (jobOrId.uploadUrl) {
999
+ this.pendingUploadJobs.set(jobOrId.jobId, jobOrId);
1000
+ }
1001
+ return jobOrId;
1002
+ }
1003
+ if (this.isHttpUrl(jobOrId)) {
1004
+ return {
1005
+ jobId: "direct-upload-url",
1006
+ status: "waiting-file",
1007
+ sourceType: "file",
1008
+ createdAt: /* @__PURE__ */ new Date(0),
1009
+ uploadUrl: jobOrId
1010
+ };
1011
+ }
1012
+ const cachedJob = this.pendingUploadJobs.get(jobOrId);
1013
+ if (cachedJob) {
1014
+ return cachedJob;
1015
+ }
1016
+ throw new InvalidStateError(
1017
+ `Upload URL not available for job ${jobOrId}. Pass the Job object returned from create() or a direct upload URL string.`
1018
+ );
1019
+ }
1020
+ async resolveLoadJobResult(jobResultOrIdOrUrl) {
1021
+ if (typeof jobResultOrIdOrUrl !== "string") {
1022
+ enrichJobResult(jobResultOrIdOrUrl);
1023
+ return jobResultOrIdOrUrl;
1024
+ }
1025
+ if (this.isHttpUrl(jobResultOrIdOrUrl)) {
1026
+ return {
1027
+ jobId: "direct-result-url",
1028
+ status: "done",
1029
+ sourceType: "file",
1030
+ createdAt: /* @__PURE__ */ new Date(0),
1031
+ resultUrl: jobResultOrIdOrUrl,
1032
+ resultUrlExpiresAt: /* @__PURE__ */ new Date(0),
1033
+ isTerminal: true,
1034
+ isDone: true,
1035
+ isFailed: false
1036
+ };
1037
+ }
1038
+ return this.get(jobResultOrIdOrUrl);
1039
+ }
1040
+ };
1041
+
1042
+ // src/client.ts
1043
+ function inferFileName(file, explicitFileName) {
1044
+ if (explicitFileName) {
1045
+ return explicitFileName;
1046
+ }
1047
+ if (typeof file === "string") {
1048
+ return path.basename(file);
1049
+ }
1050
+ if (isReadStream2(file) && typeof file.path === "string") {
1051
+ return path.basename(file.path);
1052
+ }
1053
+ return void 0;
1054
+ }
1055
+ function isReadStream2(file) {
1056
+ return typeof file === "object" && file !== null && "pipe" in file && typeof file.pipe === "function";
1057
+ }
1058
+ var Knowhere = class {
1059
+ /** Jobs resource for low-level API */
1060
+ jobs;
1061
+ httpClient;
1062
+ /**
1063
+ * Create a new Knowhere client
1064
+ */
1065
+ constructor(options = {}) {
1066
+ const apiKey = options.apiKey ?? process.env[ENV.API_KEY];
1067
+ if (!apiKey) {
1068
+ throw new ValidationError(
1069
+ `API key is required. Provide it via options.apiKey or ${ENV.API_KEY} environment variable.`
1070
+ );
1071
+ }
1072
+ const baseURL = options.baseURL ?? process.env[ENV.BASE_URL] ?? DEFAULT_BASE_URL;
1073
+ this.httpClient = new HttpClient({
1074
+ baseURL,
1075
+ apiKey,
1076
+ timeout: options.timeout,
1077
+ uploadTimeout: options.uploadTimeout,
1078
+ maxRetries: options.maxRetries,
1079
+ defaultHeaders: options.defaultHeaders,
1080
+ httpAgent: options.httpAgent,
1081
+ httpsAgent: options.httpsAgent
1082
+ });
1083
+ this.jobs = new Jobs(this.httpClient);
1084
+ }
1085
+ /**
1086
+ * High-level API: Parse a document and return structured results
1087
+ *
1088
+ * @example
1089
+ * ```typescript
1090
+ * // Parse from URL
1091
+ * const result = await client.parse({ url: 'https://example.com/doc.pdf' });
1092
+ *
1093
+ * // Parse from file
1094
+ * const result = await client.parse({ file: './document.pdf' });
1095
+ *
1096
+ * // Parse with options
1097
+ * const result = await client.parse({
1098
+ * url: 'https://example.com/doc.pdf',
1099
+ * model: 'advanced',
1100
+ * ocr: true,
1101
+ * onUploadProgress: (p) => console.log(`${p.percent}%`),
1102
+ * });
1103
+ * ```
1104
+ */
1105
+ async parse(params) {
1106
+ if (!params.url && !params.file) {
1107
+ throw new ValidationError("Either url or file must be provided");
1108
+ }
1109
+ if (params.url && params.file) {
1110
+ throw new ValidationError("Only one of url or file can be provided");
1111
+ }
1112
+ const sourceType = params.url ? "url" : "file";
1113
+ const resolvedFileName = inferFileName(params.file, params.fileName);
1114
+ if (params.file && !resolvedFileName) {
1115
+ throw new ValidationError(
1116
+ "fileName is required when file is a Buffer, Uint8Array, or stream without a path."
1117
+ );
1118
+ }
1119
+ const parsingParams = {
1120
+ model: params.model,
1121
+ ocrEnabled: params.ocr,
1122
+ docType: params.docType,
1123
+ smartTitleParse: params.smartTitleParse,
1124
+ summaryImage: params.summaryImage,
1125
+ summaryTable: params.summaryTable,
1126
+ summaryTxt: params.summaryText,
1127
+ addFragDesc: params.addFragDesc,
1128
+ kbDir: params.kbDir
1129
+ };
1130
+ Object.keys(parsingParams).forEach((key) => {
1131
+ if (parsingParams[key] === void 0) {
1132
+ delete parsingParams[key];
1133
+ }
1134
+ });
1135
+ const webhook = params.webhook;
1136
+ const job = await this.jobs.create({
1137
+ sourceType,
1138
+ sourceUrl: params.url,
1139
+ fileName: resolvedFileName,
1140
+ dataId: params.dataId,
1141
+ parsingParams: Object.keys(parsingParams).length > 0 ? parsingParams : void 0,
1142
+ webhook
1143
+ });
1144
+ if (params.file) {
1145
+ await this.jobs.upload(job, {
1146
+ file: params.file,
1147
+ onProgress: params.onUploadProgress,
1148
+ signal: params.signal
1149
+ });
1150
+ }
1151
+ const jobResult = await this.jobs.wait(job.jobId, {
1152
+ pollInterval: params.pollInterval,
1153
+ pollTimeout: params.pollTimeout,
1154
+ onProgress: params.onPollProgress,
1155
+ signal: params.signal
1156
+ });
1157
+ const result = await this.jobs.load(jobResult, {
1158
+ verifyChecksum: params.verifyChecksum
1159
+ });
1160
+ return result;
1161
+ }
1162
+ };
1163
+ export {
1164
+ APIError,
1165
+ AuthenticationError,
1166
+ BadRequestError,
1167
+ ChecksumError,
1168
+ ConflictError,
1169
+ GatewayTimeoutError,
1170
+ InternalServerError,
1171
+ InvalidStateError,
1172
+ JobFailedError,
1173
+ Jobs,
1174
+ Knowhere,
1175
+ KnowhereError,
1176
+ NetworkError,
1177
+ NotFoundError,
1178
+ PaymentRequiredError,
1179
+ PermissionDeniedError,
1180
+ PollingTimeoutError,
1181
+ RateLimitError,
1182
+ ServiceUnavailableError,
1183
+ TimeoutError,
1184
+ VERSION,
1185
+ ValidationError,
1186
+ Knowhere as default
1187
+ };