@superlinked/sie-sdk 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/dist/index.cjs +1406 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +907 -0
- package/dist/index.d.ts +907 -0
- package/dist/index.js +1385 -0
- package/dist/index.js.map +1 -0
- package/dist/scoring.cjs +42 -0
- package/dist/scoring.cjs.map +1 -0
- package/dist/scoring.d.cts +19 -0
- package/dist/scoring.d.ts +19 -0
- package/dist/scoring.js +38 -0
- package/dist/scoring.js.map +1 -0
- package/package.json +79 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,907 @@
|
|
|
1
|
+
export { maxsim, maxsimBatch, maxsimDocuments } from './scoring.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Types for the SIE TypeScript SDK
|
|
5
|
+
*
|
|
6
|
+
* These types mirror the Python SDK (packages/sie_sdk/src/sie_sdk/types.py)
|
|
7
|
+
* for full feature parity.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Output dtype options for quantized embeddings.
|
|
11
|
+
* Matches Python DType literal.
|
|
12
|
+
*/
|
|
13
|
+
type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" | "ubinary";
|
|
14
|
+
/**
|
|
15
|
+
* Output type options for encode operation.
|
|
16
|
+
*/
|
|
17
|
+
type OutputType = "dense" | "sparse" | "multivector";
|
|
18
|
+
/**
|
|
19
|
+
* A single item to encode, score, or extract from.
|
|
20
|
+
*
|
|
21
|
+
* For simple text encoding, just use `{ text: "your text here" }`.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* // Simple text
|
|
25
|
+
* { text: "Hello world" }
|
|
26
|
+
*
|
|
27
|
+
* // With ID for tracking through results
|
|
28
|
+
* { id: "doc-1", text: "Document text" }
|
|
29
|
+
*
|
|
30
|
+
* // With images for multimodal models (ColPali, CLIP)
|
|
31
|
+
* { text: "Description", images: [imageBytes] }
|
|
32
|
+
*
|
|
33
|
+
* // Pre-encoded multivector (for use with maxsim utility)
|
|
34
|
+
* { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
|
|
35
|
+
*/
|
|
36
|
+
interface Item {
|
|
37
|
+
/** Optional ID to track this item through results */
|
|
38
|
+
id?: string;
|
|
39
|
+
/** Text content to encode */
|
|
40
|
+
text?: string;
|
|
41
|
+
/** Images as byte arrays (JPEG/PNG) for multimodal models */
|
|
42
|
+
images?: Uint8Array[];
|
|
43
|
+
/** Pre-encoded multivector (for use with maxsim utility) */
|
|
44
|
+
multivector?: Float32Array[];
|
|
45
|
+
/** Arbitrary metadata (passed through to results) */
|
|
46
|
+
metadata?: Record<string, unknown>;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Sparse vector result with non-zero indices and values.
|
|
50
|
+
* Used by SPLADE-type models.
|
|
51
|
+
*/
|
|
52
|
+
interface SparseResult {
|
|
53
|
+
/** Token indices with non-zero weights */
|
|
54
|
+
indices: Int32Array;
|
|
55
|
+
/** Weight values for each index */
|
|
56
|
+
values: Float32Array;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Server-side timing breakdown for a request.
|
|
60
|
+
*/
|
|
61
|
+
interface TimingInfo {
|
|
62
|
+
totalMs?: number;
|
|
63
|
+
queueMs?: number;
|
|
64
|
+
tokenizationMs?: number;
|
|
65
|
+
inferenceMs?: number;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Result of encoding a single item.
|
|
69
|
+
*
|
|
70
|
+
* Contains the item ID (if provided) and one or more output representations
|
|
71
|
+
* depending on what was requested via outputTypes.
|
|
72
|
+
*/
|
|
73
|
+
interface EncodeResult {
|
|
74
|
+
/** Item ID (echoed from request if provided) */
|
|
75
|
+
id?: string;
|
|
76
|
+
/** Dense embedding vector, shape [dims] */
|
|
77
|
+
dense?: Float32Array;
|
|
78
|
+
/** Sparse embedding with indices and values */
|
|
79
|
+
sparse?: SparseResult;
|
|
80
|
+
/** Multi-vector embedding for late interaction models, shape [numTokens][tokenDims] */
|
|
81
|
+
multivector?: Float32Array[];
|
|
82
|
+
/** Server-side timing breakdown */
|
|
83
|
+
timing?: TimingInfo;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Model dimension information.
|
|
87
|
+
*/
|
|
88
|
+
interface ModelDims {
|
|
89
|
+
dense?: number;
|
|
90
|
+
sparse?: number;
|
|
91
|
+
multivector?: number;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Information about a model returned by listModels().
|
|
95
|
+
*/
|
|
96
|
+
interface ModelInfo {
|
|
97
|
+
/** Model name/identifier */
|
|
98
|
+
name: string;
|
|
99
|
+
/** Whether the model is currently loaded in memory */
|
|
100
|
+
loaded: boolean;
|
|
101
|
+
/** Supported input types: ["text"], ["text", "image"], etc. */
|
|
102
|
+
inputs: string[];
|
|
103
|
+
/** Supported output types: ["dense"], ["dense", "sparse"], etc. */
|
|
104
|
+
outputs: string[];
|
|
105
|
+
/** Embedding dimensions for each output type */
|
|
106
|
+
dims?: ModelDims;
|
|
107
|
+
/** Maximum sequence length the model supports */
|
|
108
|
+
maxSequenceLength?: number;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* A single score entry from reranking.
|
|
112
|
+
*/
|
|
113
|
+
interface ScoreEntry {
|
|
114
|
+
/** ID of the item (from request or auto-generated) */
|
|
115
|
+
itemId: string;
|
|
116
|
+
/** Relevance score (higher = more relevant) */
|
|
117
|
+
score: number;
|
|
118
|
+
/** Position in sorted order (0 = most relevant) */
|
|
119
|
+
rank: number;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Result of scoring items against a query.
|
|
123
|
+
*/
|
|
124
|
+
interface ScoreResult {
|
|
125
|
+
/** Model used for scoring */
|
|
126
|
+
model?: string;
|
|
127
|
+
/** Query ID (echoed from request if provided) */
|
|
128
|
+
queryId?: string;
|
|
129
|
+
/** Score entries, sorted by relevance (descending) */
|
|
130
|
+
scores: ScoreEntry[];
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* A single extracted entity (NER span).
|
|
134
|
+
*/
|
|
135
|
+
interface Entity {
|
|
136
|
+
/** The extracted text span */
|
|
137
|
+
text: string;
|
|
138
|
+
/** Entity type/label (e.g., "person", "organization") */
|
|
139
|
+
label: string;
|
|
140
|
+
/** Confidence score */
|
|
141
|
+
score: number;
|
|
142
|
+
/** Start character offset in the original text */
|
|
143
|
+
start?: number;
|
|
144
|
+
/** End character offset in the original text */
|
|
145
|
+
end?: number;
|
|
146
|
+
/** Bounding box [x, y, width, height] for image-based extraction */
|
|
147
|
+
bbox?: number[];
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Result of extraction for a single item.
|
|
151
|
+
*/
|
|
152
|
+
interface ExtractResult {
|
|
153
|
+
/** Item ID (echoed from request if provided) */
|
|
154
|
+
id?: string;
|
|
155
|
+
/** List of extracted entities */
|
|
156
|
+
entities: Entity[];
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Information about a worker in the cluster.
|
|
160
|
+
*/
|
|
161
|
+
interface WorkerInfo {
|
|
162
|
+
/** Worker base URL */
|
|
163
|
+
url: string;
|
|
164
|
+
/** GPU type (e.g., "l4", "a100-80gb") */
|
|
165
|
+
gpu: string;
|
|
166
|
+
/** Whether the worker is healthy */
|
|
167
|
+
healthy: boolean;
|
|
168
|
+
/** Number of items in the worker's queue */
|
|
169
|
+
queueDepth: number;
|
|
170
|
+
/** List of model names loaded on this worker */
|
|
171
|
+
loadedModels: string[];
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Cluster capacity information returned by getCapacity().
|
|
175
|
+
*/
|
|
176
|
+
interface CapacityInfo {
|
|
177
|
+
/** Overall cluster status: "healthy", "degraded", "no_workers" */
|
|
178
|
+
status: string;
|
|
179
|
+
/** Number of healthy workers */
|
|
180
|
+
workerCount: number;
|
|
181
|
+
/** Number of GPUs available */
|
|
182
|
+
gpuCount: number;
|
|
183
|
+
/** Number of unique models loaded across all workers */
|
|
184
|
+
modelsLoaded: number;
|
|
185
|
+
/** GPU types configured in the cluster */
|
|
186
|
+
configuredGpuTypes: string[];
|
|
187
|
+
/** GPU types currently running */
|
|
188
|
+
liveGpuTypes: string[];
|
|
189
|
+
/** List of worker details */
|
|
190
|
+
workers: WorkerInfo[];
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Pool specification for creating resource pools.
|
|
194
|
+
*/
|
|
195
|
+
interface PoolSpec {
|
|
196
|
+
/** Pool name (used in GPU param as "poolName/gpuType") */
|
|
197
|
+
name: string;
|
|
198
|
+
/** GPU requirements, e.g., { l4: 2, "a100-40gb": 1 } */
|
|
199
|
+
gpus?: Record<string, number>;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Pool status information.
|
|
203
|
+
*/
|
|
204
|
+
interface PoolStatus {
|
|
205
|
+
/** Pool state: "pending", "active", "expired" */
|
|
206
|
+
state: string;
|
|
207
|
+
/** Workers assigned to this pool */
|
|
208
|
+
assignedWorkers: Array<{
|
|
209
|
+
name: string;
|
|
210
|
+
url: string;
|
|
211
|
+
gpu: string;
|
|
212
|
+
}>;
|
|
213
|
+
/** Unix timestamp when pool was created */
|
|
214
|
+
createdAt?: number;
|
|
215
|
+
/** Unix timestamp of last lease renewal */
|
|
216
|
+
lastRenewed?: number;
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Full pool information.
|
|
220
|
+
*/
|
|
221
|
+
interface PoolInfo {
|
|
222
|
+
/** Pool name */
|
|
223
|
+
name: string;
|
|
224
|
+
/** Pool specification */
|
|
225
|
+
spec: {
|
|
226
|
+
gpus?: Record<string, number>;
|
|
227
|
+
};
|
|
228
|
+
/** Pool status */
|
|
229
|
+
status: PoolStatus;
|
|
230
|
+
}
|
|
231
|
+
type ModelState = "available" | "loading" | "loaded" | "unloading";
|
|
232
|
+
interface ClusterSummary {
|
|
233
|
+
worker_count: number;
|
|
234
|
+
gpu_count: number;
|
|
235
|
+
models_loaded: number;
|
|
236
|
+
total_qps: number;
|
|
237
|
+
}
|
|
238
|
+
interface ClusterWorkerInfo {
|
|
239
|
+
url: string;
|
|
240
|
+
gpu: string;
|
|
241
|
+
healthy: boolean;
|
|
242
|
+
queue_depth: number;
|
|
243
|
+
loaded_models: string[];
|
|
244
|
+
}
|
|
245
|
+
interface ModelSummary {
|
|
246
|
+
name: string;
|
|
247
|
+
state: ModelState;
|
|
248
|
+
worker_count: number;
|
|
249
|
+
gpu_types: string[];
|
|
250
|
+
total_queue_depth: number;
|
|
251
|
+
}
|
|
252
|
+
interface ServerInfo {
|
|
253
|
+
version: string;
|
|
254
|
+
uptime_seconds: number;
|
|
255
|
+
user: string;
|
|
256
|
+
working_dir: string;
|
|
257
|
+
pid: number;
|
|
258
|
+
}
|
|
259
|
+
interface GPUMetrics {
|
|
260
|
+
device: string;
|
|
261
|
+
name: string;
|
|
262
|
+
gpu_type: string;
|
|
263
|
+
utilization_pct: number;
|
|
264
|
+
memory_used_bytes: number;
|
|
265
|
+
memory_total_bytes: number;
|
|
266
|
+
memory_threshold_pct?: number;
|
|
267
|
+
}
|
|
268
|
+
interface ModelConfig {
|
|
269
|
+
hf_id: string;
|
|
270
|
+
adapter: string;
|
|
271
|
+
inputs: string[];
|
|
272
|
+
outputs: string[];
|
|
273
|
+
dims: Record<string, number | null>;
|
|
274
|
+
max_sequence_length?: number;
|
|
275
|
+
pooling?: string | null;
|
|
276
|
+
normalize?: boolean;
|
|
277
|
+
adapter_options_loadtime?: Record<string, unknown> | null;
|
|
278
|
+
adapter_options_runtime?: Record<string, unknown> | null;
|
|
279
|
+
}
|
|
280
|
+
interface ModelStatus {
|
|
281
|
+
name: string;
|
|
282
|
+
state: ModelState;
|
|
283
|
+
device: string | null;
|
|
284
|
+
memory_bytes: number;
|
|
285
|
+
config: ModelConfig;
|
|
286
|
+
queue_depth: number;
|
|
287
|
+
queue_pending_items: number;
|
|
288
|
+
}
|
|
289
|
+
interface WorkerStatusMessage {
|
|
290
|
+
timestamp: number;
|
|
291
|
+
name: string;
|
|
292
|
+
gpu: string;
|
|
293
|
+
gpu_count: number;
|
|
294
|
+
bundle: string;
|
|
295
|
+
machine_profile: string;
|
|
296
|
+
loaded_models: string[];
|
|
297
|
+
server: ServerInfo;
|
|
298
|
+
gpus: GPUMetrics[];
|
|
299
|
+
models: ModelStatus[];
|
|
300
|
+
counters: Record<string, Record<string, number>>;
|
|
301
|
+
histograms: Record<string, Record<string, Record<string, unknown>>>;
|
|
302
|
+
}
|
|
303
|
+
interface ClusterStatusMessage {
|
|
304
|
+
timestamp: number;
|
|
305
|
+
cluster: ClusterSummary;
|
|
306
|
+
workers: ClusterWorkerInfo[];
|
|
307
|
+
models: ModelSummary[];
|
|
308
|
+
}
|
|
309
|
+
type StatusMessage = WorkerStatusMessage | ClusterStatusMessage;
|
|
310
|
+
/**
|
|
311
|
+
* Options for SIEClient constructor.
|
|
312
|
+
*/
|
|
313
|
+
interface SIEClientOptions {
|
|
314
|
+
/** Request timeout in milliseconds (default: 30000) */
|
|
315
|
+
timeout?: number;
|
|
316
|
+
/** Default GPU type for all requests (e.g., "l4", "a100-80gb") */
|
|
317
|
+
gpu?: string;
|
|
318
|
+
/** API key for authentication (sent as Bearer token) */
|
|
319
|
+
apiKey?: string;
|
|
320
|
+
/** Whether to auto-retry on 202 (provisioning) responses */
|
|
321
|
+
waitForCapacity?: boolean;
|
|
322
|
+
/** Maximum time to wait for provisioning in milliseconds (default: 300000) */
|
|
323
|
+
provisionTimeout?: number;
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Options for encode operation.
|
|
327
|
+
*/
|
|
328
|
+
interface EncodeOptions {
|
|
329
|
+
/** Output types to request: ["dense"], ["sparse"], ["dense", "sparse", "multivector"] */
|
|
330
|
+
outputTypes?: OutputType[];
|
|
331
|
+
/** Instruction prefix for instruction-tuned models */
|
|
332
|
+
instruction?: string;
|
|
333
|
+
/** Whether this is a query (for asymmetric models) */
|
|
334
|
+
isQuery?: boolean;
|
|
335
|
+
/** Output dtype for quantization */
|
|
336
|
+
outputDtype?: DType;
|
|
337
|
+
/** GPU type for this request (overrides client default) */
|
|
338
|
+
gpu?: string;
|
|
339
|
+
/** Whether to wait for capacity (overrides client default) */
|
|
340
|
+
waitForCapacity?: boolean;
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Options for score operation.
|
|
344
|
+
*/
|
|
345
|
+
interface ScoreOptions {
|
|
346
|
+
/** Return only top K results */
|
|
347
|
+
topK?: number;
|
|
348
|
+
/** GPU type for this request */
|
|
349
|
+
gpu?: string;
|
|
350
|
+
/** Whether to wait for capacity */
|
|
351
|
+
waitForCapacity?: boolean;
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Options for extract operation.
|
|
355
|
+
*/
|
|
356
|
+
interface ExtractOptions {
|
|
357
|
+
/** Entity labels to extract (e.g., ["person", "organization"]) */
|
|
358
|
+
labels: string[];
|
|
359
|
+
/** Minimum confidence threshold (0-1) */
|
|
360
|
+
threshold?: number;
|
|
361
|
+
/** GPU type for this request */
|
|
362
|
+
gpu?: string;
|
|
363
|
+
/** Whether to wait for capacity */
|
|
364
|
+
waitForCapacity?: boolean;
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Helper to convert typed arrays to regular number array.
|
|
368
|
+
* Useful for JSON serialization or working with libraries that expect number[].
|
|
369
|
+
*/
|
|
370
|
+
declare function toNumberArray(arr: Float32Array | Int32Array): number[];
|
|
371
|
+
/**
|
|
372
|
+
* Helper to convert number array to Float32Array.
|
|
373
|
+
*/
|
|
374
|
+
declare function toFloat32Array(arr: number[]): Float32Array;
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* SIE Client implementation
|
|
378
|
+
*
|
|
379
|
+
* @example
|
|
380
|
+
* ```typescript
|
|
381
|
+
* import { SIEClient } from "@superlinked/sie-sdk";
|
|
382
|
+
*
|
|
383
|
+
* const client = new SIEClient("http://localhost:8080");
|
|
384
|
+
*
|
|
385
|
+
* // Encode single item
|
|
386
|
+
* const result = await client.encode("bge-m3", { text: "Hello world" });
|
|
387
|
+
* console.log(result.dense); // Float32Array
|
|
388
|
+
*
|
|
389
|
+
* // Batch encode
|
|
390
|
+
* const results = await client.encode("bge-m3", [
|
|
391
|
+
* { text: "First document" },
|
|
392
|
+
* { text: "Second document" },
|
|
393
|
+
* ]);
|
|
394
|
+
*
|
|
395
|
+
* // With GPU routing and auto-retry for capacity
|
|
396
|
+
* const resultWithGpu = await client.encode(
|
|
397
|
+
* "bge-m3",
|
|
398
|
+
* { text: "Hello" },
|
|
399
|
+
* { gpu: "l4", waitForCapacity: true },
|
|
400
|
+
* );
|
|
401
|
+
*
|
|
402
|
+
* await client.close();
|
|
403
|
+
* ```
|
|
404
|
+
*/
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* SIE Client for embedding, scoring, and extraction.
|
|
408
|
+
*
|
|
409
|
+
* The client is async-only (no synchronous methods) and uses native fetch.
|
|
410
|
+
* It handles msgpack serialization, error parsing, and retry logic.
|
|
411
|
+
*
|
|
412
|
+
* @example Resource pool usage
|
|
413
|
+
* ```typescript
|
|
414
|
+
* const client = new SIEClient("http://router:8080");
|
|
415
|
+
*
|
|
416
|
+
* // Create a dedicated pool
|
|
417
|
+
* await client.createPool("eval-bench", { l4: 2 });
|
|
418
|
+
*
|
|
419
|
+
* // Use pool for requests
|
|
420
|
+
* await client.encode("bge-m3", { text: "Hello" }, { gpu: "eval-bench/l4" });
|
|
421
|
+
*
|
|
422
|
+
* // Check pool status
|
|
423
|
+
* const pool = await client.getPool("eval-bench");
|
|
424
|
+
* console.log(`Pool state: ${pool?.status.state}`);
|
|
425
|
+
*
|
|
426
|
+
* // Clean up
|
|
427
|
+
* await client.deletePool("eval-bench");
|
|
428
|
+
* await client.close();
|
|
429
|
+
* ```
|
|
430
|
+
*/
|
|
431
|
+
declare class SIEClient {
|
|
432
|
+
private readonly baseUrl;
|
|
433
|
+
private readonly timeout;
|
|
434
|
+
private readonly gpu?;
|
|
435
|
+
private readonly apiKey?;
|
|
436
|
+
private readonly defaultWaitForCapacity;
|
|
437
|
+
private readonly provisionTimeout;
|
|
438
|
+
private readonly pools;
|
|
439
|
+
private versionWarningLogged;
|
|
440
|
+
/**
|
|
441
|
+
* Create a new SIE client.
|
|
442
|
+
*
|
|
443
|
+
* @param baseUrl - Base URL of the SIE server (e.g., "http://localhost:8080")
|
|
444
|
+
* @param options - Client options
|
|
445
|
+
*/
|
|
446
|
+
constructor(baseUrl: string, options?: SIEClientOptions);
|
|
447
|
+
/**
|
|
448
|
+
* Get the base URL of the SIE server.
|
|
449
|
+
*
|
|
450
|
+
* @returns The normalized base URL (without trailing slash)
|
|
451
|
+
*/
|
|
452
|
+
getBaseUrl(): string;
|
|
453
|
+
/**
|
|
454
|
+
* Encode a single item.
|
|
455
|
+
*
|
|
456
|
+
* @param model - Model name (e.g., "bge-m3")
|
|
457
|
+
* @param item - Item to encode
|
|
458
|
+
* @param options - Encode options
|
|
459
|
+
* @returns Encode result with embeddings
|
|
460
|
+
*/
|
|
461
|
+
encode(model: string, item: Item, options?: EncodeOptions): Promise<EncodeResult>;
|
|
462
|
+
/**
|
|
463
|
+
* Encode multiple items.
|
|
464
|
+
*
|
|
465
|
+
* @param model - Model name (e.g., "bge-m3")
|
|
466
|
+
* @param items - Items to encode
|
|
467
|
+
* @param options - Encode options
|
|
468
|
+
* @returns Array of encode results in same order as input
|
|
469
|
+
*/
|
|
470
|
+
encode(model: string, items: Item[], options?: EncodeOptions): Promise<EncodeResult[]>;
|
|
471
|
+
/**
|
|
472
|
+
* List available models.
|
|
473
|
+
*
|
|
474
|
+
* @returns Array of model information
|
|
475
|
+
*/
|
|
476
|
+
listModels(): Promise<ModelInfo[]>;
|
|
477
|
+
/**
|
|
478
|
+
* Stream real-time status updates from a worker or router.
|
|
479
|
+
*
|
|
480
|
+
* @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
|
|
481
|
+
* "auto" detects the endpoint via /health.
|
|
482
|
+
*/
|
|
483
|
+
watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
|
|
484
|
+
/**
|
|
485
|
+
* Score items against a query using a reranker model.
|
|
486
|
+
*
|
|
487
|
+
* @param model - Model name (e.g., "bge-reranker-v2")
|
|
488
|
+
* @param query - Query item
|
|
489
|
+
* @param items - Items to score against the query
|
|
490
|
+
* @param options - Score options
|
|
491
|
+
* @returns Score result with sorted scores
|
|
492
|
+
*
|
|
493
|
+
* @example
|
|
494
|
+
* ```typescript
|
|
495
|
+
* const result = await client.score(
|
|
496
|
+
* "bge-reranker-v2",
|
|
497
|
+
* { text: "What is machine learning?" },
|
|
498
|
+
* [
|
|
499
|
+
* { id: "doc-1", text: "Machine learning is..." },
|
|
500
|
+
* { id: "doc-2", text: "Python is..." },
|
|
501
|
+
* ],
|
|
502
|
+
* );
|
|
503
|
+
*
|
|
504
|
+
* // Scores are sorted by relevance (descending)
|
|
505
|
+
* console.log(result.scores[0].itemId); // most relevant
|
|
506
|
+
* ```
|
|
507
|
+
*/
|
|
508
|
+
score(model: string, query: Item, items: Item[], options?: ScoreOptions): Promise<ScoreResult>;
|
|
509
|
+
/**
|
|
510
|
+
* Extract entities from a single item.
|
|
511
|
+
*
|
|
512
|
+
* @param model - Model name (e.g., "gliner-multi-v2.1")
|
|
513
|
+
* @param item - Item to extract from
|
|
514
|
+
* @param options - Extract options with labels
|
|
515
|
+
* @returns Extract result with entities
|
|
516
|
+
*/
|
|
517
|
+
extract(model: string, item: Item, options: ExtractOptions): Promise<ExtractResult>;
|
|
518
|
+
/**
|
|
519
|
+
* Extract entities from multiple items.
|
|
520
|
+
*
|
|
521
|
+
* @param model - Model name (e.g., "gliner-multi-v2.1")
|
|
522
|
+
* @param items - Items to extract from
|
|
523
|
+
* @param options - Extract options with labels
|
|
524
|
+
* @returns Array of extract results in same order as input
|
|
525
|
+
*/
|
|
526
|
+
extract(model: string, items: Item[], options: ExtractOptions): Promise<ExtractResult[]>;
|
|
527
|
+
/**
|
|
528
|
+
* Close the client and cleanup resources.
|
|
529
|
+
*
|
|
530
|
+
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
531
|
+
* automatically - they are garbage collected by the router after inactivity.
|
|
532
|
+
* This allows pool reuse if the client reconnects.
|
|
533
|
+
*/
|
|
534
|
+
close(): Promise<void>;
|
|
535
|
+
/**
|
|
536
|
+
* Create a resource pool for isolated capacity.
|
|
537
|
+
*
|
|
538
|
+
* Pools provide dedicated worker capacity, isolated from other clients.
|
|
539
|
+
* Workers are assigned to pools and only serve requests from that pool.
|
|
540
|
+
*
|
|
541
|
+
* @param name - Pool name (used in GPU param as "poolName/machineProfile")
|
|
542
|
+
* @param gpus - Machine profile requirements, e.g., { "l4": 2, "l4-spot": 1 }
|
|
543
|
+
*
|
|
544
|
+
* @example
|
|
545
|
+
* ```typescript
|
|
546
|
+
* // Create a pool with 2 L4 GPUs
|
|
547
|
+
* await client.createPool("eval-bench", { l4: 2 });
|
|
548
|
+
*
|
|
549
|
+
* // Use the pool for requests
|
|
550
|
+
* await client.encode("bge-m3", { text: "Hello" }, { gpu: "eval-bench/l4" });
|
|
551
|
+
*
|
|
552
|
+
* // Clean up when done
|
|
553
|
+
* await client.deletePool("eval-bench");
|
|
554
|
+
* ```
|
|
555
|
+
*/
|
|
556
|
+
createPool(name: string, gpus: Record<string, number>): Promise<void>;
|
|
557
|
+
/**
|
|
558
|
+
* Get information about a pool.
|
|
559
|
+
*
|
|
560
|
+
* @param name - Pool name to query
|
|
561
|
+
* @returns PoolInfo if pool exists, null otherwise
|
|
562
|
+
*
|
|
563
|
+
* @example
|
|
564
|
+
* ```typescript
|
|
565
|
+
* await client.createPool("eval-bench", { l4: 2 });
|
|
566
|
+
* const pool = await client.getPool("eval-bench");
|
|
567
|
+
* console.log(`Pool state: ${pool?.status.state}`);
|
|
568
|
+
* console.log(`Workers: ${pool?.status.assignedWorkers.length}`);
|
|
569
|
+
* ```
|
|
570
|
+
*/
|
|
571
|
+
getPool(name: string): Promise<PoolInfo | null>;
|
|
572
|
+
/**
|
|
573
|
+
* Delete a pool.
|
|
574
|
+
*
|
|
575
|
+
* @param name - Pool name to delete
|
|
576
|
+
* @returns true if pool was deleted, false if pool didn't exist
|
|
577
|
+
*
|
|
578
|
+
* @example
|
|
579
|
+
* ```typescript
|
|
580
|
+
* // Clean up pool when done
|
|
581
|
+
* const deleted = await client.deletePool("eval-bench");
|
|
582
|
+
* if (deleted) {
|
|
583
|
+
* console.log("Pool deleted successfully");
|
|
584
|
+
* }
|
|
585
|
+
* ```
|
|
586
|
+
*/
|
|
587
|
+
deletePool(name: string): Promise<boolean>;
|
|
588
|
+
private checkServerVersion;
|
|
589
|
+
/**
|
|
590
|
+
* Parse GPU parameter into pool and GPU components.
|
|
591
|
+
*
|
|
592
|
+
* Supports "pool/gpu" format for pool routing.
|
|
593
|
+
*/
|
|
594
|
+
private parseGpuParam;
|
|
595
|
+
/**
|
|
596
|
+
* Get current cluster capacity information.
|
|
597
|
+
*
|
|
598
|
+
* Queries the router's /health endpoint for cluster state. Useful for
|
|
599
|
+
* checking if specific GPU types are available before sending requests.
|
|
600
|
+
*
|
|
601
|
+
* @param gpu - Optional filter to check specific GPU type availability
|
|
602
|
+
* @returns CapacityInfo with worker count, GPU types, and worker details
|
|
603
|
+
*
|
|
604
|
+
* @example
|
|
605
|
+
* ```typescript
|
|
606
|
+
* // Check cluster state
|
|
607
|
+
* const capacity = await client.getCapacity();
|
|
608
|
+
* console.log(`Workers: ${capacity.workerCount}, GPUs: ${capacity.liveGpuTypes}`);
|
|
609
|
+
*
|
|
610
|
+
* // Check if L4 GPUs are available
|
|
611
|
+
* const l4Capacity = await client.getCapacity("l4");
|
|
612
|
+
* if (l4Capacity.workerCount > 0) {
|
|
613
|
+
* console.log("L4 workers available");
|
|
614
|
+
* }
|
|
615
|
+
* ```
|
|
616
|
+
*/
|
|
617
|
+
getCapacity(gpu?: string): Promise<CapacityInfo>;
|
|
618
|
+
/**
|
|
619
|
+
* Wait for GPU capacity to become available.
|
|
620
|
+
*
|
|
621
|
+
* Polls the router until workers with the specified GPU type are online.
|
|
622
|
+
* This is useful for pre-warming the cluster before running benchmarks.
|
|
623
|
+
*
|
|
624
|
+
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
625
|
+
* @param options - Wait options
|
|
626
|
+
* @returns CapacityInfo once capacity is available
|
|
627
|
+
*
|
|
628
|
+
* @example
|
|
629
|
+
* ```typescript
|
|
630
|
+
* // Wait for L4 capacity before running benchmarks
|
|
631
|
+
* const capacity = await client.waitForCapacity("l4", { timeout: 300000 });
|
|
632
|
+
* console.log(`Ready with ${capacity.workerCount} L4 workers`);
|
|
633
|
+
*
|
|
634
|
+
* // Wait and pre-load a model
|
|
635
|
+
* const capacityWithModel = await client.waitForCapacity("l4", { model: "bge-m3" });
|
|
636
|
+
* ```
|
|
637
|
+
*/
|
|
638
|
+
waitForCapacity(gpu: string, options?: {
|
|
639
|
+
model?: string;
|
|
640
|
+
timeout?: number;
|
|
641
|
+
pollInterval?: number;
|
|
642
|
+
}): Promise<CapacityInfo>;
|
|
643
|
+
/**
|
|
644
|
+
* Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
|
|
645
|
+
*/
|
|
646
|
+
private requestWithRetry;
|
|
647
|
+
/**
|
|
648
|
+
* Make a single msgpack HTTP request to the SIE server (no retry logic).
|
|
649
|
+
*/
|
|
650
|
+
private request;
|
|
651
|
+
/**
|
|
652
|
+
* Make a JSON HTTP request to the SIE server.
|
|
653
|
+
* Used for endpoints that return JSON (e.g., /v1/models, /health).
|
|
654
|
+
*/
|
|
655
|
+
private requestJson;
|
|
656
|
+
private buildWsUrl;
|
|
657
|
+
private createWebSocket;
|
|
658
|
+
private detectEndpointType;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
declare const SDK_VERSION = "0.1.8";
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Error classes for the SIE TypeScript SDK.
|
|
665
|
+
*
|
|
666
|
+
* These errors mirror the Python SDK (packages/sie_sdk/src/sie_sdk/client/errors.py)
|
|
667
|
+
* for consistent error handling across languages.
|
|
668
|
+
*
|
|
669
|
+
* @example
|
|
670
|
+
* // Catching specific error types
|
|
671
|
+
* try {
|
|
672
|
+
* await client.encode("model", { text: "hello" });
|
|
673
|
+
* } catch (error) {
|
|
674
|
+
* if (error instanceof RequestError) {
|
|
675
|
+
* console.error(`Bad request (${error.code}): ${error.message}`);
|
|
676
|
+
* } else if (error instanceof ProvisioningError) {
|
|
677
|
+
* console.log(`GPU ${error.gpu} is provisioning, retry after ${error.retryAfter}ms`);
|
|
678
|
+
* } else if (error instanceof SIEConnectionError) {
|
|
679
|
+
* console.error("Cannot reach server:", error.message);
|
|
680
|
+
* }
|
|
681
|
+
* }
|
|
682
|
+
*/
|
|
683
|
+
/**
|
|
684
|
+
* Base error for all SIE SDK errors.
|
|
685
|
+
*
|
|
686
|
+
* All SIE errors extend this class, so you can catch all SDK errors with:
|
|
687
|
+
* `catch (error) { if (error instanceof SIEError) { ... } }`
|
|
688
|
+
*/
|
|
689
|
+
declare class SIEError extends Error {
|
|
690
|
+
constructor(message: string);
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Error connecting to the SIE server.
|
|
694
|
+
*
|
|
695
|
+
* Raised when:
|
|
696
|
+
* - Network is unreachable
|
|
697
|
+
* - DNS resolution fails
|
|
698
|
+
* - Connection times out
|
|
699
|
+
* - Server refuses connection
|
|
700
|
+
*/
|
|
701
|
+
declare class SIEConnectionError extends SIEError {
|
|
702
|
+
constructor(message: string);
|
|
703
|
+
}
|
|
704
|
+
/**
|
|
705
|
+
* Error in the request (4xx responses).
|
|
706
|
+
*
|
|
707
|
+
* Raised when the client sends an invalid request:
|
|
708
|
+
* - 400: Bad request (invalid parameters, malformed body)
|
|
709
|
+
* - 401: Unauthorized (missing or invalid API key)
|
|
710
|
+
* - 403: Forbidden (insufficient permissions)
|
|
711
|
+
* - 404: Not found (invalid endpoint or model)
|
|
712
|
+
* - 422: Validation error (invalid input format)
|
|
713
|
+
*/
|
|
714
|
+
declare class RequestError extends SIEError {
|
|
715
|
+
/** Error code from the server (e.g., "INVALID_MODEL", "VALIDATION_ERROR") */
|
|
716
|
+
readonly code: string | undefined;
|
|
717
|
+
/** HTTP status code (400-499) */
|
|
718
|
+
readonly statusCode: number | undefined;
|
|
719
|
+
constructor(message: string, code?: string, statusCode?: number);
|
|
720
|
+
}
|
|
721
|
+
/**
|
|
722
|
+
* Error from the server (5xx responses).
|
|
723
|
+
*
|
|
724
|
+
* Raised when the server encounters an internal error:
|
|
725
|
+
* - 500: Internal server error
|
|
726
|
+
* - 502: Bad gateway
|
|
727
|
+
* - 503: Service unavailable
|
|
728
|
+
* - 504: Gateway timeout
|
|
729
|
+
*/
|
|
730
|
+
declare class ServerError extends SIEError {
|
|
731
|
+
/** Error code from the server (e.g., "INTERNAL_ERROR", "LORA_LOADING") */
|
|
732
|
+
readonly code: string | undefined;
|
|
733
|
+
/** HTTP status code (500-599) */
|
|
734
|
+
readonly statusCode: number | undefined;
|
|
735
|
+
constructor(message: string, code?: string, statusCode?: number);
|
|
736
|
+
}
|
|
737
|
+
/**
|
|
738
|
+
* Error when capacity is not available and provisioning timed out.
|
|
739
|
+
*
|
|
740
|
+
* Raised when:
|
|
741
|
+
* - Server returns 202 (no capacity, provisioning)
|
|
742
|
+
* - waitForCapacity is false (caller doesn't want to wait)
|
|
743
|
+
* - Or provisioning timeout exceeded
|
|
744
|
+
*
|
|
745
|
+
* The caller can use `retryAfter` to know when to retry.
|
|
746
|
+
*/
|
|
747
|
+
declare class ProvisioningError extends SIEError {
|
|
748
|
+
/** The GPU type that was requested */
|
|
749
|
+
readonly gpu: string | undefined;
|
|
750
|
+
/** Suggested retry delay in milliseconds (from server Retry-After header) */
|
|
751
|
+
readonly retryAfter: number | undefined;
|
|
752
|
+
constructor(message: string, gpu?: string, retryAfter?: number);
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Error related to resource pool operations.
|
|
756
|
+
*
|
|
757
|
+
* Raised when:
|
|
758
|
+
* - Pool creation fails (e.g., insufficient capacity)
|
|
759
|
+
* - Pool not found
|
|
760
|
+
* - Pool in invalid state (e.g., expired)
|
|
761
|
+
* - Pool lease renewal fails
|
|
762
|
+
*/
|
|
763
|
+
declare class PoolError extends SIEError {
|
|
764
|
+
/** Name of the pool */
|
|
765
|
+
readonly poolName: string | undefined;
|
|
766
|
+
/** Current pool state (if known): "pending", "active", "expired" */
|
|
767
|
+
readonly state: string | undefined;
|
|
768
|
+
constructor(message: string, poolName?: string, state?: string);
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Error when LoRA adapter is loading and retry limit exceeded.
|
|
772
|
+
*
|
|
773
|
+
* Raised when:
|
|
774
|
+
* - Server returns 503 with LORA_LOADING code
|
|
775
|
+
* - Retry limit is exceeded
|
|
776
|
+
*
|
|
777
|
+
* This usually means the adapter is being loaded from disk/network
|
|
778
|
+
* and the caller should wait longer or reduce request rate.
|
|
779
|
+
*/
|
|
780
|
+
declare class LoraLoadingError extends SIEError {
|
|
781
|
+
/** The LoRA adapter that was requested */
|
|
782
|
+
readonly lora: string | undefined;
|
|
783
|
+
/** The model the LoRA was requested for */
|
|
784
|
+
readonly model: string | undefined;
|
|
785
|
+
constructor(message: string, lora?: string, model?: string);
|
|
786
|
+
}
|
|
787
|
+
/**
|
|
788
|
+
* Error when model is loading and retry limit exceeded.
|
|
789
|
+
*
|
|
790
|
+
* Raised when:
|
|
791
|
+
* - Server returns 503 with MODEL_LOADING code
|
|
792
|
+
* - Retry limit is exceeded
|
|
793
|
+
*
|
|
794
|
+
* This usually means the model is being loaded from disk/HuggingFace
|
|
795
|
+
* and the caller should wait longer.
|
|
796
|
+
*/
|
|
797
|
+
declare class ModelLoadingError extends SIEError {
|
|
798
|
+
/** The model that was requested */
|
|
799
|
+
readonly model: string | undefined;
|
|
800
|
+
constructor(message: string, model?: string);
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
/**
|
|
804
|
+
* MessagePack serialization with msgpack-numpy compatibility.
|
|
805
|
+
*
|
|
806
|
+
* The SIE server uses Python's msgpack-numpy library which serializes numpy arrays
|
|
807
|
+
* using extension type 78 ('N'). This module provides compatible encoding/decoding.
|
|
808
|
+
*
|
|
809
|
+
* Wire format for numpy arrays (extension type 78):
|
|
810
|
+
* - dtype string (e.g., '<f4' for float32, '<i4' for int32) terminated by '|'
|
|
811
|
+
* - shape as comma-separated dimensions terminated by '|'
|
|
812
|
+
* - raw array data in little-endian format
|
|
813
|
+
*/
|
|
814
|
+
/**
|
|
815
|
+
* Pack a message to MessagePack format (msgpack-numpy compatible)
|
|
816
|
+
*/
|
|
817
|
+
declare function packMessage(data: unknown): Uint8Array;
|
|
818
|
+
/**
|
|
819
|
+
* Unpack a MessagePack message (msgpack-numpy compatible)
|
|
820
|
+
*
|
|
821
|
+
* Note: msgpack-numpy uses byte string keys (b'nd', b'type', b'shape', b'data') for numpy
|
|
822
|
+
* array metadata. In JavaScript these become Uint8Array which need to be decoded as text.
|
|
823
|
+
* After decoding, we recursively convert numpy array maps to typed arrays.
|
|
824
|
+
*/
|
|
825
|
+
declare function unpackMessage<T = unknown>(data: Uint8Array): T;
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* Image handling utilities for the SIE TypeScript SDK.
|
|
829
|
+
*
|
|
830
|
+
* Per design.md Section 4.3, images are serialized as bytes for transport.
|
|
831
|
+
* This module handles conversion from various input formats to Uint8Array.
|
|
832
|
+
*
|
|
833
|
+
* Supported input formats:
|
|
834
|
+
* - Uint8Array (raw bytes)
|
|
835
|
+
* - ArrayBuffer / Buffer (Node.js)
|
|
836
|
+
* - Blob / File (browser)
|
|
837
|
+
* - string (base64 or data URL)
|
|
838
|
+
*
|
|
839
|
+
* @example
|
|
840
|
+
* ```typescript
|
|
841
|
+
* import { toImageBytes } from "@superlinked/sie-sdk";
|
|
842
|
+
*
|
|
843
|
+
* // From file input (browser)
|
|
844
|
+
* const file = document.querySelector('input[type="file"]').files[0];
|
|
845
|
+
* const bytes = await toImageBytes(file);
|
|
846
|
+
*
|
|
847
|
+
* // From base64 string
|
|
848
|
+
* const bytes = await toImageBytes(base64String);
|
|
849
|
+
*
|
|
850
|
+
* // From Uint8Array (passthrough)
|
|
851
|
+
* const bytes = await toImageBytes(existingBytes);
|
|
852
|
+
* ```
|
|
853
|
+
*/
|
|
854
|
+
/**
|
|
855
|
+
* Type for all supported image input formats.
|
|
856
|
+
*/
|
|
857
|
+
type ImageInput = Uint8Array | ArrayBuffer | Blob | string;
|
|
858
|
+
/**
|
|
859
|
+
* Wire format for images sent to the server.
|
|
860
|
+
* Per design.md Section 4.3.
|
|
861
|
+
*/
|
|
862
|
+
interface ImageWireFormat {
|
|
863
|
+
data: Uint8Array;
|
|
864
|
+
format: "jpeg" | "png" | "webp";
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Convert various image input types to Uint8Array.
|
|
868
|
+
*
|
|
869
|
+
* Accepts:
|
|
870
|
+
* - Uint8Array: passed through as-is
|
|
871
|
+
* - ArrayBuffer / Buffer: wrapped in Uint8Array
|
|
872
|
+
* - Blob / File: read as ArrayBuffer then wrapped
|
|
873
|
+
* - string: decoded from base64 or data URL
|
|
874
|
+
*
|
|
875
|
+
* @param input - Image data in any supported format
|
|
876
|
+
* @returns Image bytes as Uint8Array
|
|
877
|
+
*
|
|
878
|
+
* @example
|
|
879
|
+
* ```typescript
|
|
880
|
+
* // From base64 string
|
|
881
|
+
* const bytes = await toImageBytes(base64String);
|
|
882
|
+
*
|
|
883
|
+
* // From file (browser)
|
|
884
|
+
* const bytes = await toImageBytes(file);
|
|
885
|
+
* ```
|
|
886
|
+
*/
|
|
887
|
+
declare function toImageBytes(input: ImageInput): Promise<Uint8Array>;
|
|
888
|
+
/**
|
|
889
|
+
* Convert image bytes to wire format for transport.
|
|
890
|
+
*
|
|
891
|
+
* Per design.md Section 4.3, images are sent as:
|
|
892
|
+
* `{ data: <bytes>, format: "jpeg" | "png" | "webp" }`
|
|
893
|
+
*
|
|
894
|
+
* @param input - Image data in any supported format
|
|
895
|
+
* @param format - Image format (defaults to "jpeg")
|
|
896
|
+
* @returns Image in wire format
|
|
897
|
+
*/
|
|
898
|
+
declare function toImageWireFormat(input: ImageInput, format?: "jpeg" | "png" | "webp"): Promise<ImageWireFormat>;
|
|
899
|
+
/**
|
|
900
|
+
* Detect image format from bytes (magic number check).
|
|
901
|
+
*
|
|
902
|
+
* @param bytes - Image bytes
|
|
903
|
+
* @returns Detected format or "unknown"
|
|
904
|
+
*/
|
|
905
|
+
declare function detectImageFormat(bytes: Uint8Array): "jpeg" | "png" | "webp" | "unknown";
|
|
906
|
+
|
|
907
|
+
export { type CapacityInfo, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, detectImageFormat, packMessage, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|