@ontos-ai/knowhere-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,668 @@
1
+ import { Agent } from 'http';
2
+ import { Agent as Agent$1 } from 'https';
3
+ import { ReadStream } from 'fs';
4
+ import { AxiosRequestConfig, AxiosInstance } from 'axios';
5
+
6
+ /**
7
+ * Configuration options for the Knowhere client
8
+ */
9
+ interface KnowhereOptions {
10
+ /** API authentication key (defaults to KNOWHERE_API_KEY env var) */
11
+ apiKey?: string;
12
+ /** API base URL (defaults to https://api.knowhereto.ai) */
13
+ baseURL?: string;
14
+ /** Request timeout in milliseconds (default: 60000) */
15
+ timeout?: number;
16
+ /** Upload timeout in milliseconds (default: 600000) */
17
+ uploadTimeout?: number;
18
+ /** Maximum number of retry attempts (default: 5) */
19
+ maxRetries?: number;
20
+ /** Additional headers to include in all requests */
21
+ defaultHeaders?: Record<string, string>;
22
+ /** Custom HTTP agent for HTTP requests */
23
+ httpAgent?: Agent;
24
+ /** Custom HTTPS agent for HTTPS requests */
25
+ httpsAgent?: Agent$1;
26
+ }
27
+
28
+ /**
29
+ * Job status
30
+ */
31
+ type JobStatus = 'pending' | 'waiting-file' | 'running' | 'converting' | 'done' | 'failed';
32
+ /**
33
+ * Job creation response
34
+ */
35
+ interface Job {
36
+ /** Unique job identifier */
37
+ jobId: string;
38
+ /** Current job status */
39
+ status: JobStatus;
40
+ /** Source type (file or url) */
41
+ sourceType: string;
42
+ /** Optional custom data identifier */
43
+ dataId?: string;
44
+ /** Job creation timestamp */
45
+ createdAt: Date;
46
+ /** Presigned URL for file upload (if sourceType is 'file') */
47
+ uploadUrl?: string;
48
+ /** Headers to include in upload request */
49
+ uploadHeaders?: Record<string, string>;
50
+ /** Upload URL expiration time in seconds */
51
+ expiresIn?: number;
52
+ }
53
+ /**
54
+ * Job error details
55
+ */
56
+ interface JobError {
57
+ /** Error code */
58
+ code: string;
59
+ /** Human-readable error message */
60
+ message: string;
61
+ /** Request ID for debugging */
62
+ requestId: string;
63
+ /** Additional error details */
64
+ details?: Record<string, unknown>;
65
+ }
66
+ /**
67
+ * Job status response with full details
68
+ */
69
+ interface JobResult {
70
+ /** Unique job identifier */
71
+ jobId: string;
72
+ /** Current job status */
73
+ status: JobStatus;
74
+ /** Source type (file or url) */
75
+ sourceType: string;
76
+ /** Optional custom data identifier */
77
+ dataId?: string;
78
+ /** Job creation timestamp */
79
+ createdAt: Date;
80
+ /** Processing progress information */
81
+ progress?: Record<string, unknown>;
82
+ /** Error details (if job failed) */
83
+ error?: JobError;
84
+ /** Result metadata */
85
+ result?: Record<string, unknown>;
86
+ /** Presigned URL to download result ZIP */
87
+ resultUrl?: string;
88
+ /** Result URL expiration timestamp */
89
+ resultUrlExpiresAt?: Date;
90
+ /** Original file name */
91
+ fileName?: string;
92
+ /** File extension */
93
+ fileExtension?: string;
94
+ /** Model used for parsing */
95
+ model?: string;
96
+ /** Whether OCR was enabled */
97
+ ocrEnabled?: boolean;
98
+ /** Processing duration in seconds */
99
+ durationSeconds?: number;
100
+ /** Credits consumed */
101
+ creditsSpent?: number;
102
+ /** Whether the job is in a terminal state (done or failed) */
103
+ readonly isTerminal: boolean;
104
+ /** Whether the job completed successfully */
105
+ readonly isDone: boolean;
106
+ /** Whether the job failed */
107
+ readonly isFailed: boolean;
108
+ }
109
+
110
+ /**
111
+ * Parsing model options
112
+ */
113
+ type ParsingModel = 'base' | 'advanced';
114
+ /**
115
+ * Document type options
116
+ */
117
+ type DocType = 'auto' | 'pdf' | 'docx' | 'txt' | 'md';
118
+ /**
119
+ * Parsing configuration parameters
120
+ */
121
+ interface ParsingParams {
122
+ /** Parsing model to use (default: 'base') */
123
+ model?: ParsingModel;
124
+ /** Enable OCR for scanned documents (default: false) */
125
+ ocrEnabled?: boolean;
126
+ /** Knowledge base directory */
127
+ kbDir?: string;
128
+ /** Document type hint (default: 'auto') */
129
+ docType?: DocType;
130
+ /** Enable smart title parsing (default: false) */
131
+ smartTitleParse?: boolean;
132
+ /** Generate image summaries (default: false) */
133
+ summaryImage?: boolean;
134
+ /** Generate table summaries (default: false) */
135
+ summaryTable?: boolean;
136
+ /** Generate text summaries (default: false) */
137
+ summaryTxt?: boolean;
138
+ /** Additional fragment description */
139
+ addFragDesc?: string;
140
+ }
141
+ /**
142
+ * Webhook configuration
143
+ */
144
+ interface WebhookConfig {
145
+ /** Webhook URL to notify on job completion */
146
+ url: string;
147
+ }
148
+ /**
149
+ * Job creation parameters
150
+ */
151
+ interface CreateJobParams {
152
+ /** Source type: 'file' for upload, 'url' for remote document */
153
+ sourceType: 'file' | 'url';
154
+ /** Source URL (required if sourceType is 'url') */
155
+ sourceUrl?: string;
156
+ /** File name (required if sourceType is 'file') */
157
+ fileName?: string;
158
+ /** Optional custom data identifier */
159
+ dataId?: string;
160
+ /** Parsing configuration */
161
+ parsingParams?: ParsingParams;
162
+ /** Webhook configuration */
163
+ webhook?: WebhookConfig;
164
+ }
165
+ /**
166
+ * File upload parameters
167
+ */
168
+ interface UploadParams {
169
+ /** File to upload (path, Buffer, Stream, or Uint8Array) */
170
+ file: string | Buffer | ReadStream | Uint8Array;
171
+ /** Upload progress callback */
172
+ onProgress?: (progress: UploadProgress) => void;
173
+ /** Abort signal for cancellation */
174
+ signal?: AbortSignal;
175
+ }
176
+ /**
177
+ * Wait options for polling
178
+ */
179
+ interface WaitOptions {
180
+ /** Polling interval in milliseconds (default: 10000) */
181
+ pollInterval?: number;
182
+ /** Maximum wait time in milliseconds (default: 1800000 = 30 minutes) */
183
+ pollTimeout?: number;
184
+ /** Progress callback */
185
+ onProgress?: (status: PollProgress) => void;
186
+ /** Abort signal for cancellation */
187
+ signal?: AbortSignal;
188
+ }
189
+ /**
190
+ * Load options for result parsing
191
+ */
192
+ interface LoadOptions {
193
+ /** Whether to verify ZIP checksum (default: true) */
194
+ verifyChecksum?: boolean;
195
+ }
196
+ /**
197
+ * High-level parse parameters
198
+ */
199
+ interface ParseParams {
200
+ /** Source URL (mutually exclusive with file) */
201
+ url?: string;
202
+ /** File to parse (path, Buffer, Stream, or Uint8Array) */
203
+ file?: string | Buffer | ReadStream | Uint8Array;
204
+ /** File name (auto-inferred for file paths and fs.ReadStream with a path) */
205
+ fileName?: string;
206
+ /** Parsing model (default: 'base') */
207
+ model?: ParsingModel;
208
+ /** Enable OCR (default: false) */
209
+ ocr?: boolean;
210
+ /** Document type hint */
211
+ docType?: DocType;
212
+ /** Enable smart title parsing */
213
+ smartTitleParse?: boolean;
214
+ /** Generate image summaries */
215
+ summaryImage?: boolean;
216
+ /** Generate table summaries */
217
+ summaryTable?: boolean;
218
+ /** Generate text summaries */
219
+ summaryText?: boolean;
220
+ /** Custom data identifier */
221
+ dataId?: string;
222
+ /** Additional fragment description */
223
+ addFragDesc?: string;
224
+ /** Knowledge base directory */
225
+ kbDir?: string;
226
+ /** Polling interval in milliseconds */
227
+ pollInterval?: number;
228
+ /** Maximum wait time in milliseconds */
229
+ pollTimeout?: number;
230
+ /** Whether to verify ZIP checksum (default: true) */
231
+ verifyChecksum?: boolean;
232
+ /** Webhook configuration */
233
+ webhook?: WebhookConfig;
234
+ /** Upload progress callback */
235
+ onUploadProgress?: (progress: UploadProgress) => void;
236
+ /** Poll progress callback */
237
+ onPollProgress?: (status: PollProgress) => void;
238
+ /** Abort signal */
239
+ signal?: AbortSignal;
240
+ }
241
+ /**
242
+ * Upload progress information
243
+ */
244
+ interface UploadProgress {
245
+ /** Bytes uploaded */
246
+ loaded: number;
247
+ /** Total bytes (may be undefined for streams) */
248
+ total?: number;
249
+ /** Upload percentage (0-100) */
250
+ percent: number;
251
+ }
252
+ /**
253
+ * Polling progress information
254
+ */
255
+ interface PollProgress {
256
+ /** Current job status */
257
+ status: JobStatus;
258
+ /** Elapsed time in seconds */
259
+ elapsedSeconds: number;
260
+ /** Current job result */
261
+ jobResult: JobResult;
262
+ }
263
+
264
+ /**
265
+ * Statistics about the parsed document
266
+ */
267
+ interface Statistics {
268
+ /** Total number of chunks */
269
+ totalChunks: number;
270
+ /** Number of text chunks */
271
+ textChunks: number;
272
+ /** Number of image chunks */
273
+ imageChunks: number;
274
+ /** Number of table chunks */
275
+ tableChunks: number;
276
+ /** Total number of pages (if applicable) */
277
+ totalPages?: number;
278
+ }
279
+ /**
280
+ * File index mapping chunk IDs to file paths
281
+ */
282
+ interface FileIndex {
283
+ [chunkId: string]: string;
284
+ }
285
+ /**
286
+ * Manifest containing metadata about the parse result
287
+ */
288
+ interface Manifest {
289
+ /** Manifest version */
290
+ version: string;
291
+ /** Job ID */
292
+ jobId: string;
293
+ /** Custom data ID */
294
+ dataId?: string;
295
+ /** Original source file name */
296
+ sourceFileName: string;
297
+ /** Processing completion date */
298
+ processingDate: Date;
299
+ /** Statistics */
300
+ statistics: Statistics;
301
+ /** File index */
302
+ files: FileIndex;
303
+ }
304
+ /**
305
+ * Base chunk properties
306
+ */
307
+ interface BaseChunk {
308
+ /** Unique chunk identifier */
309
+ chunkId: string;
310
+ /** Chunk type */
311
+ type: 'text' | 'image' | 'table';
312
+ /** Main content */
313
+ content: string;
314
+ /** Relative path in ZIP */
315
+ path: string;
316
+ }
317
+ /**
318
+ * Text chunk
319
+ */
320
+ interface TextChunk extends BaseChunk {
321
+ type: 'text';
322
+ /** Content length */
323
+ length: number;
324
+ /** Tokens or token count, depending on backend payload */
325
+ tokens?: number | string[];
326
+ /** Extracted keywords */
327
+ keywords?: string[];
328
+ /** Generated summary */
329
+ summary?: string;
330
+ /** Related chunk IDs */
331
+ relationships?: string[];
332
+ }
333
+ /**
334
+ * Image chunk
335
+ */
336
+ interface ImageChunk extends BaseChunk {
337
+ type: 'image';
338
+ /** Content length */
339
+ length: number;
340
+ /** Relative file path in ZIP */
341
+ filePath: string;
342
+ /** Generated summary */
343
+ summary?: string;
344
+ /** Image data buffer */
345
+ data: Buffer;
346
+ /** Image format (derived from file extension) */
347
+ readonly format: string;
348
+ /** Save image to disk */
349
+ save(directory: string): Promise<string>;
350
+ }
351
+ /**
352
+ * Table chunk
353
+ */
354
+ interface TableChunk extends BaseChunk {
355
+ type: 'table';
356
+ /** Content length */
357
+ length: number;
358
+ /** Relative file path in ZIP */
359
+ filePath: string;
360
+ /** Table type */
361
+ tableType?: string;
362
+ /** Generated summary */
363
+ summary?: string;
364
+ /** HTML representation */
365
+ html: string;
366
+ /** Save table HTML to disk */
367
+ save(directory: string): Promise<string>;
368
+ }
369
+ /**
370
+ * Union type of all chunk types
371
+ */
372
+ type Chunk = TextChunk | ImageChunk | TableChunk;
373
+ /**
374
+ * Complete parse result
375
+ */
376
+ interface ParseResult {
377
+ /** Manifest metadata */
378
+ manifest: Manifest;
379
+ /** All chunks */
380
+ chunks: Chunk[];
381
+ /** Full document as Markdown (if available) */
382
+ fullMarkdown?: string;
383
+ /** Document hierarchy (if available) */
384
+ hierarchy?: unknown;
385
+ /** Raw ZIP buffer */
386
+ rawZip: Buffer;
387
+ /** Text chunks only */
388
+ readonly textChunks: TextChunk[];
389
+ /** Image chunks only */
390
+ readonly imageChunks: ImageChunk[];
391
+ /** Table chunks only */
392
+ readonly tableChunks: TableChunk[];
393
+ /** Job ID */
394
+ readonly jobId: string;
395
+ /** Statistics */
396
+ readonly statistics: Statistics;
397
+ /** Find a specific chunk by ID */
398
+ getChunk(chunkId: string): Chunk | undefined;
399
+ /** Save all results to a directory */
400
+ save(directory: string): Promise<string>;
401
+ }
402
+
403
+ interface HttpClientOptions {
404
+ baseURL: string;
405
+ apiKey: string;
406
+ timeout?: number;
407
+ uploadTimeout?: number;
408
+ maxRetries?: number;
409
+ defaultHeaders?: Record<string, string>;
410
+ httpAgent?: Agent;
411
+ httpsAgent?: Agent$1;
412
+ }
413
+ /**
414
+ * HTTP client wrapper around axios with retry logic and error handling
415
+ */
416
+ declare class HttpClient {
417
+ private axios;
418
+ private maxRetries;
419
+ private uploadTimeout;
420
+ private httpAgent?;
421
+ private httpsAgent?;
422
+ constructor(options: HttpClientOptions);
423
+ private setupInterceptors;
424
+ private handleError;
425
+ private getErrorObject;
426
+ private normalizeErrorData;
427
+ private decodeErrorPayload;
428
+ private getErrorMessage;
429
+ private getErrorCode;
430
+ private requestExternal;
431
+ /**
432
+ * GET request
433
+ */
434
+ get<T = unknown>(url: string, config?: AxiosRequestConfig): Promise<T>;
435
+ /**
436
+ * POST request
437
+ */
438
+ post<T = unknown>(url: string, data?: unknown, config?: AxiosRequestConfig): Promise<T>;
439
+ /**
440
+ * PUT request (typically for uploads, no retry)
441
+ */
442
+ put<T = unknown>(url: string, data?: unknown, config?: AxiosRequestConfig): Promise<T>;
443
+ /**
444
+ * Download file as buffer
445
+ */
446
+ download(url: string, config?: AxiosRequestConfig): Promise<Buffer>;
447
+ /**
448
+ * Upload file with progress tracking
449
+ */
450
+ upload(url: string, data: unknown, options?: {
451
+ headers?: Record<string, string>;
452
+ onProgress?: (progress: {
453
+ loaded: number;
454
+ total?: number;
455
+ percent: number;
456
+ }) => void;
457
+ signal?: AbortSignal;
458
+ }): Promise<void>;
459
+ /**
460
+ * Get the underlying axios instance
461
+ */
462
+ getAxiosInstance(): AxiosInstance;
463
+ }
464
+
465
+ /**
466
+ * Base class for all API resources
467
+ */
468
+ declare abstract class BaseResource {
469
+ protected httpClient: HttpClient;
470
+ constructor(httpClient: HttpClient);
471
+ }
472
+
473
+ /**
474
+ * Jobs resource for managing parsing jobs
475
+ */
476
+ declare class Jobs extends BaseResource {
477
+ private pendingUploadJobs;
478
+ /**
479
+ * Create a new parsing job
480
+ */
481
+ create(params: CreateJobParams): Promise<Job>;
482
+ /**
483
+ * Get job status
484
+ */
485
+ get(jobId: string): Promise<JobResult>;
486
+ /**
487
+ * Upload file for a job
488
+ */
489
+ upload(jobOrId: string | Job, params: UploadParams): Promise<void>;
490
+ /**
491
+ * Wait for job completion
492
+ */
493
+ wait(jobId: string, options?: WaitOptions): Promise<JobResult>;
494
+ /**
495
+ * Load parse result from completed job
496
+ */
497
+ load(jobResultOrIdOrUrl: JobResult | string, options?: LoadOptions): Promise<ParseResult>;
498
+ private isHttpUrl;
499
+ private resolveUploadJob;
500
+ private resolveLoadJobResult;
501
+ }
502
+
503
+ /**
504
+ * Main Knowhere SDK client
505
+ */
506
+ declare class Knowhere {
507
+ /** Jobs resource for low-level API */
508
+ readonly jobs: Jobs;
509
+ private httpClient;
510
+ /**
511
+ * Create a new Knowhere client
512
+ */
513
+ constructor(options?: KnowhereOptions);
514
+ /**
515
+ * High-level API: Parse a document and return structured results
516
+ *
517
+ * @example
518
+ * ```typescript
519
+ * // Parse from URL
520
+ * const result = await client.parse({ url: 'https://example.com/doc.pdf' });
521
+ *
522
+ * // Parse from file
523
+ * const result = await client.parse({ file: './document.pdf' });
524
+ *
525
+ * // Parse with options
526
+ * const result = await client.parse({
527
+ * url: 'https://example.com/doc.pdf',
528
+ * model: 'advanced',
529
+ * ocr: true,
530
+ * onUploadProgress: (p) => console.log(`${p.percent}%`),
531
+ * });
532
+ * ```
533
+ */
534
+ parse(params: ParseParams): Promise<ParseResult>;
535
+ }
536
+
537
+ declare const VERSION = "0.1.0";
538
+
539
+ /**
540
+ * Base error class for all Knowhere SDK errors
541
+ */
542
+ declare class KnowhereError extends Error {
543
+ constructor(message: string);
544
+ }
545
+ /**
546
+ * Network-related errors
547
+ */
548
+ declare class NetworkError extends KnowhereError {
549
+ readonly cause?: Error | undefined;
550
+ constructor(message: string, cause?: Error | undefined);
551
+ }
552
+ /**
553
+ * Request timeout error
554
+ */
555
+ declare class TimeoutError extends NetworkError {
556
+ constructor(message?: string);
557
+ }
558
+ /**
559
+ * Polling timeout error
560
+ */
561
+ declare class PollingTimeoutError extends KnowhereError {
562
+ readonly elapsedMs: number;
563
+ constructor(message: string | undefined, elapsedMs: number);
564
+ }
565
+ /**
566
+ * ZIP checksum verification failed
567
+ */
568
+ declare class ChecksumError extends KnowhereError {
569
+ readonly expected?: string | undefined;
570
+ readonly actual?: string | undefined;
571
+ constructor(message?: string, expected?: string | undefined, actual?: string | undefined);
572
+ }
573
+ /**
574
+ * Raised when the caller provides invalid arguments
575
+ */
576
+ declare class ValidationError extends KnowhereError {
577
+ constructor(message: string);
578
+ }
579
+ /**
580
+ * Raised when an object is in an unexpected state for the operation
581
+ */
582
+ declare class InvalidStateError extends KnowhereError {
583
+ constructor(message: string);
584
+ }
585
+
586
+ /**
587
+ * Base class for all API errors
588
+ */
589
+ declare class APIError extends KnowhereError {
590
+ readonly statusCode: number;
591
+ readonly code?: string | undefined;
592
+ readonly requestId?: string | undefined;
593
+ readonly details?: Record<string, unknown> | undefined;
594
+ readonly body?: unknown | undefined;
595
+ constructor(message: string, statusCode: number, code?: string | undefined, requestId?: string | undefined, details?: Record<string, unknown> | undefined, body?: unknown | undefined);
596
+ }
597
+ /**
598
+ * 400 Bad Request
599
+ */
600
+ declare class BadRequestError extends APIError {
601
+ constructor(message: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
602
+ }
603
+ /**
604
+ * 401 Unauthorized
605
+ */
606
+ declare class AuthenticationError extends APIError {
607
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
608
+ }
609
+ /**
610
+ * 402 Payment Required
611
+ */
612
+ declare class PaymentRequiredError extends APIError {
613
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
614
+ }
615
+ /**
616
+ * 403 Forbidden
617
+ */
618
+ declare class PermissionDeniedError extends APIError {
619
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
620
+ }
621
+ /**
622
+ * 404 Not Found
623
+ */
624
+ declare class NotFoundError extends APIError {
625
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
626
+ }
627
+ /**
628
+ * 409 Conflict
629
+ */
630
+ declare class ConflictError extends APIError {
631
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
632
+ }
633
+ /**
634
+ * 429 Rate Limit Exceeded
635
+ */
636
+ declare class RateLimitError extends APIError {
637
+ readonly retryAfter?: number | undefined;
638
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown, retryAfter?: number | undefined);
639
+ }
640
+ /**
641
+ * 500 Internal Server Error
642
+ */
643
+ declare class InternalServerError extends APIError {
644
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
645
+ }
646
+ /**
647
+ * 502/503 Service Unavailable
648
+ */
649
+ declare class ServiceUnavailableError extends APIError {
650
+ constructor(message?: string, statusCode?: number, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
651
+ }
652
+ /**
653
+ * 504 Gateway Timeout
654
+ */
655
+ declare class GatewayTimeoutError extends APIError {
656
+ constructor(message?: string, code?: string, requestId?: string, details?: Record<string, unknown>, body?: unknown);
657
+ }
658
+
659
+ /**
660
+ * Job execution failed
661
+ */
662
+ declare class JobFailedError extends KnowhereError {
663
+ readonly code: string;
664
+ readonly jobResult: JobResult;
665
+ constructor(message: string, code: string, jobResult: JobResult);
666
+ }
667
+
668
+ export { APIError, AuthenticationError, BadRequestError, type BaseChunk, ChecksumError, type Chunk, ConflictError, type CreateJobParams, type DocType, type FileIndex, GatewayTimeoutError, type ImageChunk, InternalServerError, InvalidStateError, type Job, type JobError, JobFailedError, type JobResult, type JobStatus, Jobs, Knowhere, KnowhereError, type KnowhereOptions, type LoadOptions, type Manifest, NetworkError, NotFoundError, type ParseParams, type ParseResult, type ParsingModel, type ParsingParams, PaymentRequiredError, PermissionDeniedError, type PollProgress, PollingTimeoutError, RateLimitError, ServiceUnavailableError, type Statistics, type TableChunk, type TextChunk, TimeoutError, type UploadParams, type UploadProgress, VERSION, ValidationError, type WaitOptions, type WebhookConfig, Knowhere as default };