@superlinked/sie-sdk 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,907 @@
1
+ export { maxsim, maxsimBatch, maxsimDocuments } from './scoring.js';
2
+
3
+ /**
4
+ * Types for the SIE TypeScript SDK
5
+ *
6
+ * These types mirror the Python SDK (packages/sie_sdk/src/sie_sdk/types.py)
7
+ * for full feature parity.
8
+ */
9
+ /**
10
+ * Output dtype options for quantized embeddings.
11
+ * Matches Python DType literal.
12
+ */
13
+ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" | "ubinary";
14
+ /**
15
+ * Output type options for encode operation.
16
+ */
17
+ type OutputType = "dense" | "sparse" | "multivector";
18
+ /**
19
+ * A single item to encode, score, or extract from.
20
+ *
21
+ * For simple text encoding, just use `{ text: "your text here" }`.
22
+ *
23
+ * @example
24
+ * // Simple text
25
+ * { text: "Hello world" }
26
+ *
27
+ * // With ID for tracking through results
28
+ * { id: "doc-1", text: "Document text" }
29
+ *
30
+ * // With images for multimodal models (ColPali, CLIP)
31
+ * { text: "Description", images: [imageBytes] }
32
+ *
33
+ * // Pre-encoded multivector (for use with maxsim utility)
34
+ * { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
35
+ */
36
+ interface Item {
37
+ /** Optional ID to track this item through results */
38
+ id?: string;
39
+ /** Text content to encode */
40
+ text?: string;
41
+ /** Images as byte arrays (JPEG/PNG) for multimodal models */
42
+ images?: Uint8Array[];
43
+ /** Pre-encoded multivector (for use with maxsim utility) */
44
+ multivector?: Float32Array[];
45
+ /** Arbitrary metadata (passed through to results) */
46
+ metadata?: Record<string, unknown>;
47
+ }
48
+ /**
49
+ * Sparse vector result with non-zero indices and values.
50
+ * Used by SPLADE-type models.
51
+ */
52
+ interface SparseResult {
53
+ /** Token indices with non-zero weights */
54
+ indices: Int32Array;
55
+ /** Weight values for each index */
56
+ values: Float32Array;
57
+ }
58
+ /**
59
+ * Server-side timing breakdown for a request.
60
+ */
61
+ interface TimingInfo {
62
+ totalMs?: number;
63
+ queueMs?: number;
64
+ tokenizationMs?: number;
65
+ inferenceMs?: number;
66
+ }
67
+ /**
68
+ * Result of encoding a single item.
69
+ *
70
+ * Contains the item ID (if provided) and one or more output representations
71
+ * depending on what was requested via outputTypes.
72
+ */
73
+ interface EncodeResult {
74
+ /** Item ID (echoed from request if provided) */
75
+ id?: string;
76
+ /** Dense embedding vector, shape [dims] */
77
+ dense?: Float32Array;
78
+ /** Sparse embedding with indices and values */
79
+ sparse?: SparseResult;
80
+ /** Multi-vector embedding for late interaction models, shape [numTokens][tokenDims] */
81
+ multivector?: Float32Array[];
82
+ /** Server-side timing breakdown */
83
+ timing?: TimingInfo;
84
+ }
85
+ /**
86
+ * Model dimension information.
87
+ */
88
+ interface ModelDims {
89
+ dense?: number;
90
+ sparse?: number;
91
+ multivector?: number;
92
+ }
93
+ /**
94
+ * Information about a model returned by listModels().
95
+ */
96
+ interface ModelInfo {
97
+ /** Model name/identifier */
98
+ name: string;
99
+ /** Whether the model is currently loaded in memory */
100
+ loaded: boolean;
101
+ /** Supported input types: ["text"], ["text", "image"], etc. */
102
+ inputs: string[];
103
+ /** Supported output types: ["dense"], ["dense", "sparse"], etc. */
104
+ outputs: string[];
105
+ /** Embedding dimensions for each output type */
106
+ dims?: ModelDims;
107
+ /** Maximum sequence length the model supports */
108
+ maxSequenceLength?: number;
109
+ }
110
+ /**
111
+ * A single score entry from reranking.
112
+ */
113
+ interface ScoreEntry {
114
+ /** ID of the item (from request or auto-generated) */
115
+ itemId: string;
116
+ /** Relevance score (higher = more relevant) */
117
+ score: number;
118
+ /** Position in sorted order (0 = most relevant) */
119
+ rank: number;
120
+ }
121
+ /**
122
+ * Result of scoring items against a query.
123
+ */
124
+ interface ScoreResult {
125
+ /** Model used for scoring */
126
+ model?: string;
127
+ /** Query ID (echoed from request if provided) */
128
+ queryId?: string;
129
+ /** Score entries, sorted by relevance (descending) */
130
+ scores: ScoreEntry[];
131
+ }
132
+ /**
133
+ * A single extracted entity (NER span).
134
+ */
135
+ interface Entity {
136
+ /** The extracted text span */
137
+ text: string;
138
+ /** Entity type/label (e.g., "person", "organization") */
139
+ label: string;
140
+ /** Confidence score */
141
+ score: number;
142
+ /** Start character offset in the original text */
143
+ start?: number;
144
+ /** End character offset in the original text */
145
+ end?: number;
146
+ /** Bounding box [x, y, width, height] for image-based extraction */
147
+ bbox?: number[];
148
+ }
149
+ /**
150
+ * Result of extraction for a single item.
151
+ */
152
+ interface ExtractResult {
153
+ /** Item ID (echoed from request if provided) */
154
+ id?: string;
155
+ /** List of extracted entities */
156
+ entities: Entity[];
157
+ }
158
+ /**
159
+ * Information about a worker in the cluster.
160
+ */
161
+ interface WorkerInfo {
162
+ /** Worker base URL */
163
+ url: string;
164
+ /** GPU type (e.g., "l4", "a100-80gb") */
165
+ gpu: string;
166
+ /** Whether the worker is healthy */
167
+ healthy: boolean;
168
+ /** Number of items in the worker's queue */
169
+ queueDepth: number;
170
+ /** List of model names loaded on this worker */
171
+ loadedModels: string[];
172
+ }
173
+ /**
174
+ * Cluster capacity information returned by getCapacity().
175
+ */
176
+ interface CapacityInfo {
177
+ /** Overall cluster status: "healthy", "degraded", "no_workers" */
178
+ status: string;
179
+ /** Number of healthy workers */
180
+ workerCount: number;
181
+ /** Number of GPUs available */
182
+ gpuCount: number;
183
+ /** Number of unique models loaded across all workers */
184
+ modelsLoaded: number;
185
+ /** GPU types configured in the cluster */
186
+ configuredGpuTypes: string[];
187
+ /** GPU types currently running */
188
+ liveGpuTypes: string[];
189
+ /** List of worker details */
190
+ workers: WorkerInfo[];
191
+ }
192
+ /**
193
+ * Pool specification for creating resource pools.
194
+ */
195
+ interface PoolSpec {
196
+ /** Pool name (used in GPU param as "poolName/gpuType") */
197
+ name: string;
198
+ /** GPU requirements, e.g., { l4: 2, "a100-40gb": 1 } */
199
+ gpus?: Record<string, number>;
200
+ }
201
+ /**
202
+ * Pool status information.
203
+ */
204
+ interface PoolStatus {
205
+ /** Pool state: "pending", "active", "expired" */
206
+ state: string;
207
+ /** Workers assigned to this pool */
208
+ assignedWorkers: Array<{
209
+ name: string;
210
+ url: string;
211
+ gpu: string;
212
+ }>;
213
+ /** Unix timestamp when pool was created */
214
+ createdAt?: number;
215
+ /** Unix timestamp of last lease renewal */
216
+ lastRenewed?: number;
217
+ }
218
+ /**
219
+ * Full pool information.
220
+ */
221
+ interface PoolInfo {
222
+ /** Pool name */
223
+ name: string;
224
+ /** Pool specification */
225
+ spec: {
226
+ gpus?: Record<string, number>;
227
+ };
228
+ /** Pool status */
229
+ status: PoolStatus;
230
+ }
231
+ type ModelState = "available" | "loading" | "loaded" | "unloading";
232
+ interface ClusterSummary {
233
+ worker_count: number;
234
+ gpu_count: number;
235
+ models_loaded: number;
236
+ total_qps: number;
237
+ }
238
+ interface ClusterWorkerInfo {
239
+ url: string;
240
+ gpu: string;
241
+ healthy: boolean;
242
+ queue_depth: number;
243
+ loaded_models: string[];
244
+ }
245
+ interface ModelSummary {
246
+ name: string;
247
+ state: ModelState;
248
+ worker_count: number;
249
+ gpu_types: string[];
250
+ total_queue_depth: number;
251
+ }
252
+ interface ServerInfo {
253
+ version: string;
254
+ uptime_seconds: number;
255
+ user: string;
256
+ working_dir: string;
257
+ pid: number;
258
+ }
259
+ interface GPUMetrics {
260
+ device: string;
261
+ name: string;
262
+ gpu_type: string;
263
+ utilization_pct: number;
264
+ memory_used_bytes: number;
265
+ memory_total_bytes: number;
266
+ memory_threshold_pct?: number;
267
+ }
268
+ interface ModelConfig {
269
+ hf_id: string;
270
+ adapter: string;
271
+ inputs: string[];
272
+ outputs: string[];
273
+ dims: Record<string, number | null>;
274
+ max_sequence_length?: number;
275
+ pooling?: string | null;
276
+ normalize?: boolean;
277
+ adapter_options_loadtime?: Record<string, unknown> | null;
278
+ adapter_options_runtime?: Record<string, unknown> | null;
279
+ }
280
+ interface ModelStatus {
281
+ name: string;
282
+ state: ModelState;
283
+ device: string | null;
284
+ memory_bytes: number;
285
+ config: ModelConfig;
286
+ queue_depth: number;
287
+ queue_pending_items: number;
288
+ }
289
+ interface WorkerStatusMessage {
290
+ timestamp: number;
291
+ name: string;
292
+ gpu: string;
293
+ gpu_count: number;
294
+ bundle: string;
295
+ machine_profile: string;
296
+ loaded_models: string[];
297
+ server: ServerInfo;
298
+ gpus: GPUMetrics[];
299
+ models: ModelStatus[];
300
+ counters: Record<string, Record<string, number>>;
301
+ histograms: Record<string, Record<string, Record<string, unknown>>>;
302
+ }
303
+ interface ClusterStatusMessage {
304
+ timestamp: number;
305
+ cluster: ClusterSummary;
306
+ workers: ClusterWorkerInfo[];
307
+ models: ModelSummary[];
308
+ }
309
+ type StatusMessage = WorkerStatusMessage | ClusterStatusMessage;
310
+ /**
311
+ * Options for SIEClient constructor.
312
+ */
313
+ interface SIEClientOptions {
314
+ /** Request timeout in milliseconds (default: 30000) */
315
+ timeout?: number;
316
+ /** Default GPU type for all requests (e.g., "l4", "a100-80gb") */
317
+ gpu?: string;
318
+ /** API key for authentication (sent as Bearer token) */
319
+ apiKey?: string;
320
+ /** Whether to auto-retry on 202 (provisioning) responses */
321
+ waitForCapacity?: boolean;
322
+ /** Maximum time to wait for provisioning in milliseconds (default: 300000) */
323
+ provisionTimeout?: number;
324
+ }
325
+ /**
326
+ * Options for encode operation.
327
+ */
328
+ interface EncodeOptions {
329
+ /** Output types to request: ["dense"], ["sparse"], ["dense", "sparse", "multivector"] */
330
+ outputTypes?: OutputType[];
331
+ /** Instruction prefix for instruction-tuned models */
332
+ instruction?: string;
333
+ /** Whether this is a query (for asymmetric models) */
334
+ isQuery?: boolean;
335
+ /** Output dtype for quantization */
336
+ outputDtype?: DType;
337
+ /** GPU type for this request (overrides client default) */
338
+ gpu?: string;
339
+ /** Whether to wait for capacity (overrides client default) */
340
+ waitForCapacity?: boolean;
341
+ }
342
+ /**
343
+ * Options for score operation.
344
+ */
345
+ interface ScoreOptions {
346
+ /** Return only top K results */
347
+ topK?: number;
348
+ /** GPU type for this request */
349
+ gpu?: string;
350
+ /** Whether to wait for capacity */
351
+ waitForCapacity?: boolean;
352
+ }
353
+ /**
354
+ * Options for extract operation.
355
+ */
356
+ interface ExtractOptions {
357
+ /** Entity labels to extract (e.g., ["person", "organization"]) */
358
+ labels: string[];
359
+ /** Minimum confidence threshold (0-1) */
360
+ threshold?: number;
361
+ /** GPU type for this request */
362
+ gpu?: string;
363
+ /** Whether to wait for capacity */
364
+ waitForCapacity?: boolean;
365
+ }
366
+ /**
367
+ * Helper to convert typed arrays to regular number array.
368
+ * Useful for JSON serialization or working with libraries that expect number[].
369
+ */
370
+ declare function toNumberArray(arr: Float32Array | Int32Array): number[];
371
+ /**
372
+ * Helper to convert number array to Float32Array.
373
+ */
374
+ declare function toFloat32Array(arr: number[]): Float32Array;
375
+
376
+ /**
377
+ * SIE Client implementation
378
+ *
379
+ * @example
380
+ * ```typescript
381
+ * import { SIEClient } from "@superlinked/sie-sdk";
382
+ *
383
+ * const client = new SIEClient("http://localhost:8080");
384
+ *
385
+ * // Encode single item
386
+ * const result = await client.encode("bge-m3", { text: "Hello world" });
387
+ * console.log(result.dense); // Float32Array
388
+ *
389
+ * // Batch encode
390
+ * const results = await client.encode("bge-m3", [
391
+ * { text: "First document" },
392
+ * { text: "Second document" },
393
+ * ]);
394
+ *
395
+ * // With GPU routing and auto-retry for capacity
396
+ * const resultWithGpu = await client.encode(
397
+ * "bge-m3",
398
+ * { text: "Hello" },
399
+ * { gpu: "l4", waitForCapacity: true },
400
+ * );
401
+ *
402
+ * await client.close();
403
+ * ```
404
+ */
405
+
406
+ /**
407
+ * SIE Client for embedding, scoring, and extraction.
408
+ *
409
+ * The client is async-only (no synchronous methods) and uses native fetch.
410
+ * It handles msgpack serialization, error parsing, and retry logic.
411
+ *
412
+ * @example Resource pool usage
413
+ * ```typescript
414
+ * const client = new SIEClient("http://router:8080");
415
+ *
416
+ * // Create a dedicated pool
417
+ * await client.createPool("eval-bench", { l4: 2 });
418
+ *
419
+ * // Use pool for requests
420
+ * await client.encode("bge-m3", { text: "Hello" }, { gpu: "eval-bench/l4" });
421
+ *
422
+ * // Check pool status
423
+ * const pool = await client.getPool("eval-bench");
424
+ * console.log(`Pool state: ${pool?.status.state}`);
425
+ *
426
+ * // Clean up
427
+ * await client.deletePool("eval-bench");
428
+ * await client.close();
429
+ * ```
430
+ */
431
+ declare class SIEClient {
432
+ private readonly baseUrl;
433
+ private readonly timeout;
434
+ private readonly gpu?;
435
+ private readonly apiKey?;
436
+ private readonly defaultWaitForCapacity;
437
+ private readonly provisionTimeout;
438
+ private readonly pools;
439
+ private versionWarningLogged;
440
+ /**
441
+ * Create a new SIE client.
442
+ *
443
+ * @param baseUrl - Base URL of the SIE server (e.g., "http://localhost:8080")
444
+ * @param options - Client options
445
+ */
446
+ constructor(baseUrl: string, options?: SIEClientOptions);
447
+ /**
448
+ * Get the base URL of the SIE server.
449
+ *
450
+ * @returns The normalized base URL (without trailing slash)
451
+ */
452
+ getBaseUrl(): string;
453
+ /**
454
+ * Encode a single item.
455
+ *
456
+ * @param model - Model name (e.g., "bge-m3")
457
+ * @param item - Item to encode
458
+ * @param options - Encode options
459
+ * @returns Encode result with embeddings
460
+ */
461
+ encode(model: string, item: Item, options?: EncodeOptions): Promise<EncodeResult>;
462
+ /**
463
+ * Encode multiple items.
464
+ *
465
+ * @param model - Model name (e.g., "bge-m3")
466
+ * @param items - Items to encode
467
+ * @param options - Encode options
468
+ * @returns Array of encode results in same order as input
469
+ */
470
+ encode(model: string, items: Item[], options?: EncodeOptions): Promise<EncodeResult[]>;
471
+ /**
472
+ * List available models.
473
+ *
474
+ * @returns Array of model information
475
+ */
476
+ listModels(): Promise<ModelInfo[]>;
477
+ /**
478
+ * Stream real-time status updates from a worker or router.
479
+ *
480
+ * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
481
+ * "auto" detects the endpoint via /health.
482
+ */
483
+ watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
484
+ /**
485
+ * Score items against a query using a reranker model.
486
+ *
487
+ * @param model - Model name (e.g., "bge-reranker-v2")
488
+ * @param query - Query item
489
+ * @param items - Items to score against the query
490
+ * @param options - Score options
491
+ * @returns Score result with sorted scores
492
+ *
493
+ * @example
494
+ * ```typescript
495
+ * const result = await client.score(
496
+ * "bge-reranker-v2",
497
+ * { text: "What is machine learning?" },
498
+ * [
499
+ * { id: "doc-1", text: "Machine learning is..." },
500
+ * { id: "doc-2", text: "Python is..." },
501
+ * ],
502
+ * );
503
+ *
504
+ * // Scores are sorted by relevance (descending)
505
+ * console.log(result.scores[0].itemId); // most relevant
506
+ * ```
507
+ */
508
+ score(model: string, query: Item, items: Item[], options?: ScoreOptions): Promise<ScoreResult>;
509
+ /**
510
+ * Extract entities from a single item.
511
+ *
512
+ * @param model - Model name (e.g., "gliner-multi-v2.1")
513
+ * @param item - Item to extract from
514
+ * @param options - Extract options with labels
515
+ * @returns Extract result with entities
516
+ */
517
+ extract(model: string, item: Item, options: ExtractOptions): Promise<ExtractResult>;
518
+ /**
519
+ * Extract entities from multiple items.
520
+ *
521
+ * @param model - Model name (e.g., "gliner-multi-v2.1")
522
+ * @param items - Items to extract from
523
+ * @param options - Extract options with labels
524
+ * @returns Array of extract results in same order as input
525
+ */
526
+ extract(model: string, items: Item[], options: ExtractOptions): Promise<ExtractResult[]>;
527
+ /**
528
+ * Close the client and cleanup resources.
529
+ *
530
+ * Stops pool lease renewal timers. Note that pools are not deleted
531
+ * automatically - they are garbage collected by the router after inactivity.
532
+ * This allows pool reuse if the client reconnects.
533
+ */
534
+ close(): Promise<void>;
535
+ /**
536
+ * Create a resource pool for isolated capacity.
537
+ *
538
+ * Pools provide dedicated worker capacity, isolated from other clients.
539
+ * Workers are assigned to pools and only serve requests from that pool.
540
+ *
541
+ * @param name - Pool name (used in GPU param as "poolName/machineProfile")
542
+ * @param gpus - Machine profile requirements, e.g., { "l4": 2, "l4-spot": 1 }
543
+ *
544
+ * @example
545
+ * ```typescript
546
+ * // Create a pool with 2 L4 GPUs
547
+ * await client.createPool("eval-bench", { l4: 2 });
548
+ *
549
+ * // Use the pool for requests
550
+ * await client.encode("bge-m3", { text: "Hello" }, { gpu: "eval-bench/l4" });
551
+ *
552
+ * // Clean up when done
553
+ * await client.deletePool("eval-bench");
554
+ * ```
555
+ */
556
+ createPool(name: string, gpus: Record<string, number>): Promise<void>;
557
+ /**
558
+ * Get information about a pool.
559
+ *
560
+ * @param name - Pool name to query
561
+ * @returns PoolInfo if pool exists, null otherwise
562
+ *
563
+ * @example
564
+ * ```typescript
565
+ * await client.createPool("eval-bench", { l4: 2 });
566
+ * const pool = await client.getPool("eval-bench");
567
+ * console.log(`Pool state: ${pool?.status.state}`);
568
+ * console.log(`Workers: ${pool?.status.assignedWorkers.length}`);
569
+ * ```
570
+ */
571
+ getPool(name: string): Promise<PoolInfo | null>;
572
+ /**
573
+ * Delete a pool.
574
+ *
575
+ * @param name - Pool name to delete
576
+ * @returns true if pool was deleted, false if pool didn't exist
577
+ *
578
+ * @example
579
+ * ```typescript
580
+ * // Clean up pool when done
581
+ * const deleted = await client.deletePool("eval-bench");
582
+ * if (deleted) {
583
+ * console.log("Pool deleted successfully");
584
+ * }
585
+ * ```
586
+ */
587
+ deletePool(name: string): Promise<boolean>;
588
+ private checkServerVersion;
589
+ /**
590
+ * Parse GPU parameter into pool and GPU components.
591
+ *
592
+ * Supports "pool/gpu" format for pool routing.
593
+ */
594
+ private parseGpuParam;
595
+ /**
596
+ * Get current cluster capacity information.
597
+ *
598
+ * Queries the router's /health endpoint for cluster state. Useful for
599
+ * checking if specific GPU types are available before sending requests.
600
+ *
601
+ * @param gpu - Optional filter to check specific GPU type availability
602
+ * @returns CapacityInfo with worker count, GPU types, and worker details
603
+ *
604
+ * @example
605
+ * ```typescript
606
+ * // Check cluster state
607
+ * const capacity = await client.getCapacity();
608
+ * console.log(`Workers: ${capacity.workerCount}, GPUs: ${capacity.liveGpuTypes}`);
609
+ *
610
+ * // Check if L4 GPUs are available
611
+ * const l4Capacity = await client.getCapacity("l4");
612
+ * if (l4Capacity.workerCount > 0) {
613
+ * console.log("L4 workers available");
614
+ * }
615
+ * ```
616
+ */
617
+ getCapacity(gpu?: string): Promise<CapacityInfo>;
618
+ /**
619
+ * Wait for GPU capacity to become available.
620
+ *
621
+ * Polls the router until workers with the specified GPU type are online.
622
+ * This is useful for pre-warming the cluster before running benchmarks.
623
+ *
624
+ * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
625
+ * @param options - Wait options
626
+ * @returns CapacityInfo once capacity is available
627
+ *
628
+ * @example
629
+ * ```typescript
630
+ * // Wait for L4 capacity before running benchmarks
631
+ * const capacity = await client.waitForCapacity("l4", { timeout: 300000 });
632
+ * console.log(`Ready with ${capacity.workerCount} L4 workers`);
633
+ *
634
+ * // Wait and pre-load a model
635
+ * const capacityWithModel = await client.waitForCapacity("l4", { model: "bge-m3" });
636
+ * ```
637
+ */
638
+ waitForCapacity(gpu: string, options?: {
639
+ model?: string;
640
+ timeout?: number;
641
+ pollInterval?: number;
642
+ }): Promise<CapacityInfo>;
643
+ /**
644
+ * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
645
+ */
646
+ private requestWithRetry;
647
+ /**
648
+ * Make a single msgpack HTTP request to the SIE server (no retry logic).
649
+ */
650
+ private request;
651
+ /**
652
+ * Make a JSON HTTP request to the SIE server.
653
+ * Used for endpoints that return JSON (e.g., /v1/models, /health).
654
+ */
655
+ private requestJson;
656
+ private buildWsUrl;
657
+ private createWebSocket;
658
+ private detectEndpointType;
659
+ }
660
+
661
+ declare const SDK_VERSION = "0.1.8";
662
+
663
+ /**
664
+ * Error classes for the SIE TypeScript SDK.
665
+ *
666
+ * These errors mirror the Python SDK (packages/sie_sdk/src/sie_sdk/client/errors.py)
667
+ * for consistent error handling across languages.
668
+ *
669
+ * @example
670
+ * // Catching specific error types
671
+ * try {
672
+ * await client.encode("model", { text: "hello" });
673
+ * } catch (error) {
674
+ * if (error instanceof RequestError) {
675
+ * console.error(`Bad request (${error.code}): ${error.message}`);
676
+ * } else if (error instanceof ProvisioningError) {
677
+ * console.log(`GPU ${error.gpu} is provisioning, retry after ${error.retryAfter}ms`);
678
+ * } else if (error instanceof SIEConnectionError) {
679
+ * console.error("Cannot reach server:", error.message);
680
+ * }
681
+ * }
682
+ */
683
+ /**
684
+ * Base error for all SIE SDK errors.
685
+ *
686
+ * All SIE errors extend this class, so you can catch all SDK errors with:
687
+ * `catch (error) { if (error instanceof SIEError) { ... } }`
688
+ */
689
+ declare class SIEError extends Error {
690
+ constructor(message: string);
691
+ }
692
+ /**
693
+ * Error connecting to the SIE server.
694
+ *
695
+ * Raised when:
696
+ * - Network is unreachable
697
+ * - DNS resolution fails
698
+ * - Connection times out
699
+ * - Server refuses connection
700
+ */
701
+ declare class SIEConnectionError extends SIEError {
702
+ constructor(message: string);
703
+ }
704
+ /**
705
+ * Error in the request (4xx responses).
706
+ *
707
+ * Raised when the client sends an invalid request:
708
+ * - 400: Bad request (invalid parameters, malformed body)
709
+ * - 401: Unauthorized (missing or invalid API key)
710
+ * - 403: Forbidden (insufficient permissions)
711
+ * - 404: Not found (invalid endpoint or model)
712
+ * - 422: Validation error (invalid input format)
713
+ */
714
+ declare class RequestError extends SIEError {
715
+ /** Error code from the server (e.g., "INVALID_MODEL", "VALIDATION_ERROR") */
716
+ readonly code: string | undefined;
717
+ /** HTTP status code (400-499) */
718
+ readonly statusCode: number | undefined;
719
+ constructor(message: string, code?: string, statusCode?: number);
720
+ }
721
+ /**
722
+ * Error from the server (5xx responses).
723
+ *
724
+ * Raised when the server encounters an internal error:
725
+ * - 500: Internal server error
726
+ * - 502: Bad gateway
727
+ * - 503: Service unavailable
728
+ * - 504: Gateway timeout
729
+ */
730
+ declare class ServerError extends SIEError {
731
+ /** Error code from the server (e.g., "INTERNAL_ERROR", "LORA_LOADING") */
732
+ readonly code: string | undefined;
733
+ /** HTTP status code (500-599) */
734
+ readonly statusCode: number | undefined;
735
+ constructor(message: string, code?: string, statusCode?: number);
736
+ }
737
+ /**
738
+ * Error when capacity is not available and provisioning timed out.
739
+ *
740
+ * Raised when:
741
+ * - Server returns 202 (no capacity, provisioning)
742
+ * - waitForCapacity is false (caller doesn't want to wait)
743
+ * - Or provisioning timeout exceeded
744
+ *
745
+ * The caller can use `retryAfter` to know when to retry.
746
+ */
747
+ declare class ProvisioningError extends SIEError {
748
+ /** The GPU type that was requested */
749
+ readonly gpu: string | undefined;
750
+ /** Suggested retry delay in milliseconds (from server Retry-After header) */
751
+ readonly retryAfter: number | undefined;
752
+ constructor(message: string, gpu?: string, retryAfter?: number);
753
+ }
754
+ /**
755
+ * Error related to resource pool operations.
756
+ *
757
+ * Raised when:
758
+ * - Pool creation fails (e.g., insufficient capacity)
759
+ * - Pool not found
760
+ * - Pool in invalid state (e.g., expired)
761
+ * - Pool lease renewal fails
762
+ */
763
+ declare class PoolError extends SIEError {
764
+ /** Name of the pool */
765
+ readonly poolName: string | undefined;
766
+ /** Current pool state (if known): "pending", "active", "expired" */
767
+ readonly state: string | undefined;
768
+ constructor(message: string, poolName?: string, state?: string);
769
+ }
770
+ /**
771
+ * Error when LoRA adapter is loading and retry limit exceeded.
772
+ *
773
+ * Raised when:
774
+ * - Server returns 503 with LORA_LOADING code
775
+ * - Retry limit is exceeded
776
+ *
777
+ * This usually means the adapter is being loaded from disk/network
778
+ * and the caller should wait longer or reduce request rate.
779
+ */
780
+ declare class LoraLoadingError extends SIEError {
781
+ /** The LoRA adapter that was requested */
782
+ readonly lora: string | undefined;
783
+ /** The model the LoRA was requested for */
784
+ readonly model: string | undefined;
785
+ constructor(message: string, lora?: string, model?: string);
786
+ }
787
+ /**
788
+ * Error when model is loading and retry limit exceeded.
789
+ *
790
+ * Raised when:
791
+ * - Server returns 503 with MODEL_LOADING code
792
+ * - Retry limit is exceeded
793
+ *
794
+ * This usually means the model is being loaded from disk/HuggingFace
795
+ * and the caller should wait longer.
796
+ */
797
+ declare class ModelLoadingError extends SIEError {
798
+ /** The model that was requested */
799
+ readonly model: string | undefined;
800
+ constructor(message: string, model?: string);
801
+ }
802
+
803
+ /**
804
+ * MessagePack serialization with msgpack-numpy compatibility.
805
+ *
806
+ * The SIE server uses Python's msgpack-numpy library which serializes numpy arrays
807
+ * using extension type 78 ('N'). This module provides compatible encoding/decoding.
808
+ *
809
+ * Wire format for numpy arrays (extension type 78):
810
+ * - dtype string (e.g., '<f4' for float32, '<i4' for int32) terminated by '|'
811
+ * - shape as comma-separated dimensions terminated by '|'
812
+ * - raw array data in little-endian format
813
+ */
814
+ /**
815
+ * Pack a message to MessagePack format (msgpack-numpy compatible)
816
+ */
817
+ declare function packMessage(data: unknown): Uint8Array;
818
+ /**
819
+ * Unpack a MessagePack message (msgpack-numpy compatible)
820
+ *
821
+ * Note: msgpack-numpy uses byte string keys (b'nd', b'type', b'shape', b'data') for numpy
822
+ * array metadata. In JavaScript these become Uint8Array which need to be decoded as text.
823
+ * After decoding, we recursively convert numpy array maps to typed arrays.
824
+ */
825
+ declare function unpackMessage<T = unknown>(data: Uint8Array): T;
826
+
827
+ /**
828
+ * Image handling utilities for the SIE TypeScript SDK.
829
+ *
830
+ * Per design.md Section 4.3, images are serialized as bytes for transport.
831
+ * This module handles conversion from various input formats to Uint8Array.
832
+ *
833
+ * Supported input formats:
834
+ * - Uint8Array (raw bytes)
835
+ * - ArrayBuffer / Buffer (Node.js)
836
+ * - Blob / File (browser)
837
+ * - string (base64 or data URL)
838
+ *
839
+ * @example
840
+ * ```typescript
841
+ * import { toImageBytes } from "@superlinked/sie-sdk";
842
+ *
843
+ * // From file input (browser)
844
+ * const file = document.querySelector('input[type="file"]').files[0];
845
+ * const bytes = await toImageBytes(file);
846
+ *
847
+ * // From base64 string
848
+ * const bytes = await toImageBytes(base64String);
849
+ *
850
+ * // From Uint8Array (passthrough)
851
+ * const bytes = await toImageBytes(existingBytes);
852
+ * ```
853
+ */
854
+ /**
855
+ * Type for all supported image input formats.
856
+ */
857
+ type ImageInput = Uint8Array | ArrayBuffer | Blob | string;
858
+ /**
859
+ * Wire format for images sent to the server.
860
+ * Per design.md Section 4.3.
861
+ */
862
+ interface ImageWireFormat {
863
+ data: Uint8Array;
864
+ format: "jpeg" | "png" | "webp";
865
+ }
866
+ /**
867
+ * Convert various image input types to Uint8Array.
868
+ *
869
+ * Accepts:
870
+ * - Uint8Array: passed through as-is
871
+ * - ArrayBuffer / Buffer: wrapped in Uint8Array
872
+ * - Blob / File: read as ArrayBuffer then wrapped
873
+ * - string: decoded from base64 or data URL
874
+ *
875
+ * @param input - Image data in any supported format
876
+ * @returns Image bytes as Uint8Array
877
+ *
878
+ * @example
879
+ * ```typescript
880
+ * // From base64 string
881
+ * const bytes = await toImageBytes(base64String);
882
+ *
883
+ * // From file (browser)
884
+ * const bytes = await toImageBytes(file);
885
+ * ```
886
+ */
887
+ declare function toImageBytes(input: ImageInput): Promise<Uint8Array>;
888
+ /**
889
+ * Convert image bytes to wire format for transport.
890
+ *
891
+ * Per design.md Section 4.3, images are sent as:
892
+ * `{ data: <bytes>, format: "jpeg" | "png" | "webp" }`
893
+ *
894
+ * @param input - Image data in any supported format
895
+ * @param format - Image format (defaults to "jpeg")
896
+ * @returns Image in wire format
897
+ */
898
+ declare function toImageWireFormat(input: ImageInput, format?: "jpeg" | "png" | "webp"): Promise<ImageWireFormat>;
899
+ /**
900
+ * Detect image format from bytes (magic number check).
901
+ *
902
+ * @param bytes - Image bytes
903
+ * @returns Detected format or "unknown"
904
+ */
905
+ declare function detectImageFormat(bytes: Uint8Array): "jpeg" | "png" | "webp" | "unknown";
906
+
907
+ export { type CapacityInfo, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, detectImageFormat, packMessage, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };