@superlinked/sie-sdk 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1406 @@
1
+ 'use strict';
2
+
3
+ var msgpack = require('@msgpack/msgpack');
4
+
5
+ // src/errors.ts
6
+ var SIEError = class extends Error {
7
+ constructor(message) {
8
+ super(message);
9
+ this.name = "SIEError";
10
+ Object.setPrototypeOf(this, new.target.prototype);
11
+ }
12
+ };
13
+ var SIEConnectionError = class extends SIEError {
14
+ constructor(message) {
15
+ super(message);
16
+ this.name = "SIEConnectionError";
17
+ }
18
+ };
19
+ var RequestError = class extends SIEError {
20
+ /** Error code from the server (e.g., "INVALID_MODEL", "VALIDATION_ERROR") */
21
+ code;
22
+ /** HTTP status code (400-499) */
23
+ statusCode;
24
+ constructor(message, code, statusCode) {
25
+ super(message);
26
+ this.name = "RequestError";
27
+ this.code = code;
28
+ this.statusCode = statusCode;
29
+ }
30
+ };
31
+ var ServerError = class extends SIEError {
32
+ /** Error code from the server (e.g., "INTERNAL_ERROR", "LORA_LOADING") */
33
+ code;
34
+ /** HTTP status code (500-599) */
35
+ statusCode;
36
+ constructor(message, code, statusCode) {
37
+ super(message);
38
+ this.name = "ServerError";
39
+ this.code = code;
40
+ this.statusCode = statusCode;
41
+ }
42
+ };
43
+ var ProvisioningError = class extends SIEError {
44
+ /** The GPU type that was requested */
45
+ gpu;
46
+ /** Suggested retry delay in milliseconds (from server Retry-After header) */
47
+ retryAfter;
48
+ constructor(message, gpu, retryAfter) {
49
+ super(message);
50
+ this.name = "ProvisioningError";
51
+ this.gpu = gpu;
52
+ this.retryAfter = retryAfter;
53
+ }
54
+ };
55
+ var PoolError = class extends SIEError {
56
+ /** Name of the pool */
57
+ poolName;
58
+ /** Current pool state (if known): "pending", "active", "expired" */
59
+ state;
60
+ constructor(message, poolName, state) {
61
+ super(message);
62
+ this.name = "PoolError";
63
+ this.poolName = poolName;
64
+ this.state = state;
65
+ }
66
+ };
67
+ var LoraLoadingError = class extends SIEError {
68
+ /** The LoRA adapter that was requested */
69
+ lora;
70
+ /** The model the LoRA was requested for */
71
+ model;
72
+ constructor(message, lora, model) {
73
+ super(message);
74
+ this.name = "LoraLoadingError";
75
+ this.lora = lora;
76
+ this.model = model;
77
+ }
78
+ };
79
+ var ModelLoadingError = class extends SIEError {
80
+ /** The model that was requested */
81
+ model;
82
+ constructor(message, model) {
83
+ super(message);
84
+ this.name = "ModelLoadingError";
85
+ this.model = model;
86
+ }
87
+ };
88
+
89
+ // src/internal/constants.ts
90
+ var MSGPACK_CONTENT_TYPE = "application/msgpack";
91
+ var JSON_CONTENT_TYPE = "application/json";
92
+ var HTTP_ACCEPTED = 202;
93
+ var HTTP_CLIENT_ERROR_MIN = 400;
94
+ var HTTP_CLIENT_ERROR_MAX = 499;
95
+ var HTTP_SERVER_ERROR_MIN = 500;
96
+ var HTTP_SERVER_ERROR_MAX = 599;
97
+ var DEFAULT_TIMEOUT = 3e4;
98
+ var DEFAULT_PROVISION_TIMEOUT = 3e5;
99
+ var DEFAULT_RETRY_DELAY = 5e3;
100
+ var DEFAULT_LEASE_RENEWAL_INTERVAL = 6e4;
101
+ var LORA_LOADING_MAX_RETRIES = 10;
102
+ var LORA_LOADING_DEFAULT_DELAY = 1e3;
103
+ var LORA_LOADING_ERROR_CODE = "LORA_LOADING";
104
+ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
105
+ var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
106
+ var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
107
+ var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
108
+
109
+ // src/version.ts
110
+ var SDK_VERSION = "0.1.8";
111
+ var EXT_TYPE_NUMPY = 78;
112
+ function parseDtype(dtype) {
113
+ const typeChar = dtype.slice(-2, -1);
114
+ const sizeChar = dtype.slice(-1);
115
+ const size = Number.parseInt(sizeChar, 10);
116
+ switch (`${typeChar}${size}`) {
117
+ case "f4":
118
+ return { size: 4, construct: (buf) => new Float32Array(buf) };
119
+ case "f8":
120
+ return { size: 8, construct: (buf) => new Float64Array(buf) };
121
+ case "f2":
122
+ return {
123
+ size: 2,
124
+ construct: (buf) => {
125
+ const float16 = new Uint16Array(buf);
126
+ const float32 = new Float32Array(float16.length);
127
+ for (let i = 0; i < float16.length; i++) {
128
+ float32[i] = float16ToFloat32(float16[i] ?? 0);
129
+ }
130
+ return float32;
131
+ }
132
+ };
133
+ case "i4":
134
+ return { size: 4, construct: (buf) => new Int32Array(buf) };
135
+ case "i2":
136
+ return { size: 2, construct: (buf) => new Int16Array(buf) };
137
+ case "i1":
138
+ return { size: 1, construct: (buf) => new Int8Array(buf) };
139
+ case "u1":
140
+ return { size: 1, construct: (buf) => new Uint8Array(buf) };
141
+ default:
142
+ throw new Error(`Unsupported numpy dtype: ${dtype}`);
143
+ }
144
+ }
145
+ function float16ToFloat32(h) {
146
+ const sign = h >>> 15 & 1;
147
+ const exp = h >>> 10 & 31;
148
+ const frac = h & 1023;
149
+ if (exp === 0) {
150
+ if (frac === 0) {
151
+ return sign ? -0 : 0;
152
+ }
153
+ const f = frac / 1024;
154
+ return (sign ? -1 : 1) * f * 2 ** -14;
155
+ }
156
+ if (exp === 31) {
157
+ return frac === 0 ? sign ? Number.NEGATIVE_INFINITY : Number.POSITIVE_INFINITY : Number.NaN;
158
+ }
159
+ return (sign ? -1 : 1) * (1 + frac / 1024) * 2 ** (exp - 15);
160
+ }
161
+ function decodeNumpyArray(data) {
162
+ let dtypeEnd = 0;
163
+ while (dtypeEnd < data.length && data[dtypeEnd] !== 124) {
164
+ dtypeEnd++;
165
+ }
166
+ const dtypeBytes = data.slice(0, dtypeEnd);
167
+ const dtype = new TextDecoder().decode(dtypeBytes);
168
+ let shapeEnd = dtypeEnd + 1;
169
+ while (shapeEnd < data.length && data[shapeEnd] !== 124) {
170
+ shapeEnd++;
171
+ }
172
+ const shapeBytes = data.slice(dtypeEnd + 1, shapeEnd);
173
+ const shapeStr = new TextDecoder().decode(shapeBytes);
174
+ const shape = shapeStr.length > 0 ? shapeStr.split(",").map((s) => Number.parseInt(s, 10)) : [];
175
+ const arrayData = data.slice(shapeEnd + 1);
176
+ const { size, construct } = parseDtype(dtype);
177
+ const totalElements = shape.length > 0 ? shape.reduce((a, b) => a * b, 1) : arrayData.length / size;
178
+ const buffer = new ArrayBuffer(totalElements * size);
179
+ new Uint8Array(buffer).set(arrayData.slice(0, totalElements * size));
180
+ return construct(buffer);
181
+ }
182
+ function encodeNumpyArray(arr) {
183
+ let dtype;
184
+ if (arr instanceof Float32Array) {
185
+ dtype = "<f4";
186
+ } else if (arr instanceof Int32Array) {
187
+ dtype = "<i4";
188
+ } else {
189
+ throw new Error("Unsupported TypedArray type");
190
+ }
191
+ const dtypeBytes = new TextEncoder().encode(dtype);
192
+ const shapeBytes = new TextEncoder().encode(arr.length.toString());
193
+ const separator = new Uint8Array([124]);
194
+ const dataBytes = new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength);
195
+ const result = new Uint8Array(dtypeBytes.length + 1 + shapeBytes.length + 1 + dataBytes.length);
196
+ let offset = 0;
197
+ result.set(dtypeBytes, offset);
198
+ offset += dtypeBytes.length;
199
+ result.set(separator, offset);
200
+ offset += 1;
201
+ result.set(shapeBytes, offset);
202
+ offset += shapeBytes.length;
203
+ result.set(separator, offset);
204
+ offset += 1;
205
+ result.set(dataBytes, offset);
206
+ return result;
207
+ }
208
+ function createExtensionCodec() {
209
+ const codec = new msgpack.ExtensionCodec();
210
+ codec.register({
211
+ type: EXT_TYPE_NUMPY,
212
+ encode: (value) => {
213
+ if (value instanceof Float32Array || value instanceof Int32Array) {
214
+ return encodeNumpyArray(value);
215
+ }
216
+ return null;
217
+ },
218
+ decode: (data) => {
219
+ return decodeNumpyArray(data);
220
+ }
221
+ });
222
+ return codec;
223
+ }
224
+ var extensionCodec = createExtensionCodec();
225
+ function packMessage(data) {
226
+ return msgpack.encode(data, { extensionCodec });
227
+ }
228
+ function isNumpyArrayMap(obj) {
229
+ if (typeof obj !== "object" || obj === null) {
230
+ return false;
231
+ }
232
+ const map = obj;
233
+ return map.nd === true && typeof map.type === "string" && Array.isArray(map.shape) && map.data instanceof Uint8Array;
234
+ }
235
+ function convertNumpyArrayMap(map) {
236
+ const dtype = map.type;
237
+ const arrayData = map.data;
238
+ const { size, construct } = parseDtype(dtype);
239
+ if (map.shape.length === 2 && map.shape[0] !== void 0 && map.shape[1] !== void 0) {
240
+ const numRows = map.shape[0];
241
+ const numCols = map.shape[1];
242
+ const result = [];
243
+ for (let row = 0; row < numRows; row++) {
244
+ const offset = row * numCols * size;
245
+ const buffer2 = new ArrayBuffer(numCols * size);
246
+ new Uint8Array(buffer2).set(arrayData.slice(offset, offset + numCols * size));
247
+ result.push(construct(buffer2));
248
+ }
249
+ return result;
250
+ }
251
+ const totalElements = map.shape.length > 0 ? map.shape.reduce((a, b) => a * b, 1) : arrayData.length / size;
252
+ const buffer = new ArrayBuffer(totalElements * size);
253
+ new Uint8Array(buffer).set(arrayData.slice(0, totalElements * size));
254
+ return construct(buffer);
255
+ }
256
+ function convertNumpyArrays(obj) {
257
+ if (obj === null || obj === void 0) {
258
+ return obj;
259
+ }
260
+ if (isNumpyArrayMap(obj)) {
261
+ return convertNumpyArrayMap(obj);
262
+ }
263
+ if (Array.isArray(obj)) {
264
+ return obj.map((item) => convertNumpyArrays(item));
265
+ }
266
+ if (ArrayBuffer.isView(obj)) {
267
+ return obj;
268
+ }
269
+ if (typeof obj === "object") {
270
+ const result = {};
271
+ for (const [key, value] of Object.entries(obj)) {
272
+ result[key] = convertNumpyArrays(value);
273
+ }
274
+ return result;
275
+ }
276
+ return obj;
277
+ }
278
+ function unpackMessage(data) {
279
+ const decoded = msgpack.decode(data, {
280
+ extensionCodec,
281
+ // Convert byte string keys (Uint8Array) to text strings
282
+ mapKeyConverter: (key) => {
283
+ if (typeof key === "string" || typeof key === "number") {
284
+ return key;
285
+ }
286
+ if (key instanceof Uint8Array) {
287
+ return new TextDecoder().decode(key);
288
+ }
289
+ return JSON.stringify(key);
290
+ }
291
+ });
292
+ return convertNumpyArrays(decoded);
293
+ }
294
+
295
+ // src/internal/retry.ts
296
+ function getRetryAfter(header) {
297
+ if (!header) return void 0;
298
+ const seconds = Number.parseInt(header, 10);
299
+ if (!Number.isNaN(seconds) && seconds > 0) {
300
+ return seconds * 1e3;
301
+ }
302
+ const date = new Date(header);
303
+ if (!Number.isNaN(date.getTime())) {
304
+ const delay = date.getTime() - Date.now();
305
+ return delay > 0 ? delay : void 0;
306
+ }
307
+ return void 0;
308
+ }
309
+
310
+ // src/internal/parsing.ts
311
+ function getRetryAfter2(response) {
312
+ const header = response.headers.get("Retry-After");
313
+ return getRetryAfter(header);
314
+ }
315
+ async function getErrorCode(response) {
316
+ try {
317
+ const contentType = response.headers.get("content-type") ?? "";
318
+ let data;
319
+ if (contentType.includes(MSGPACK_CONTENT_TYPE)) {
320
+ const buffer = await response.arrayBuffer();
321
+ data = unpackMessage(new Uint8Array(buffer));
322
+ } else {
323
+ data = await response.json();
324
+ }
325
+ if (data.error && typeof data.error === "object") {
326
+ const error = data.error;
327
+ if (typeof error.code === "string") {
328
+ return error.code;
329
+ }
330
+ }
331
+ if (data.detail && typeof data.detail === "object") {
332
+ const detail = data.detail;
333
+ if (typeof detail.code === "string") {
334
+ return detail.code;
335
+ }
336
+ }
337
+ if (typeof data.code === "string") {
338
+ return data.code;
339
+ }
340
+ } catch {
341
+ }
342
+ return void 0;
343
+ }
344
+ async function handleError(response, gpu) {
345
+ const { status } = response;
346
+ let errorBody = {};
347
+ try {
348
+ errorBody = await response.json();
349
+ } catch {
350
+ }
351
+ const code = errorBody.code ?? "UNKNOWN";
352
+ const message = errorBody.detail ?? response.statusText;
353
+ if (status === HTTP_ACCEPTED) {
354
+ const retryAfter = response.headers.get("Retry-After");
355
+ throw new ProvisioningError(
356
+ message,
357
+ gpu,
358
+ retryAfter ? Number.parseInt(retryAfter, 10) * 1e3 : void 0
359
+ );
360
+ }
361
+ if (status >= HTTP_CLIENT_ERROR_MIN && status <= HTTP_CLIENT_ERROR_MAX) {
362
+ throw new RequestError(message, code, status);
363
+ }
364
+ if (status >= HTTP_SERVER_ERROR_MIN && status <= HTTP_SERVER_ERROR_MAX) {
365
+ throw new ServerError(message, code, status);
366
+ }
367
+ throw new ServerError(message, code, status);
368
+ }
369
+ function parseEncodeResult(data) {
370
+ const result = {};
371
+ if (data.id !== void 0) {
372
+ result.id = data.id;
373
+ }
374
+ if (data.dense) {
375
+ result.dense = data.dense.values;
376
+ }
377
+ if (data.sparse) {
378
+ result.sparse = {
379
+ indices: data.sparse.indices,
380
+ values: data.sparse.values
381
+ };
382
+ }
383
+ if (data.multivector) {
384
+ result.multivector = data.multivector.values;
385
+ }
386
+ if (data.timing) {
387
+ result.timing = {
388
+ totalMs: data.timing.total_ms,
389
+ queueMs: data.timing.queue_ms,
390
+ tokenizationMs: data.timing.tokenization_ms,
391
+ inferenceMs: data.timing.inference_ms
392
+ };
393
+ }
394
+ return result;
395
+ }
396
+ function parseEncodeResults(data) {
397
+ return data.map(parseEncodeResult);
398
+ }
399
+ function parseScoreEntry(data) {
400
+ return {
401
+ itemId: data.item_id,
402
+ score: data.score,
403
+ rank: data.rank
404
+ };
405
+ }
406
+ function parseScoreResult(data) {
407
+ const wire = data;
408
+ return {
409
+ model: wire.model,
410
+ queryId: wire.query_id,
411
+ scores: wire.scores.map(parseScoreEntry)
412
+ };
413
+ }
414
+ function parseEntity(data) {
415
+ return {
416
+ text: data.text,
417
+ label: data.label,
418
+ score: data.score,
419
+ start: data.start,
420
+ end: data.end,
421
+ bbox: data.bbox
422
+ };
423
+ }
424
+ function parseExtractResult(data) {
425
+ return {
426
+ id: data.id,
427
+ entities: data.entities.map(parseEntity)
428
+ };
429
+ }
430
+ function parseExtractResults(data) {
431
+ return data.map(parseExtractResult);
432
+ }
433
+ function parseCapacityInfo(data, gpuFilter) {
434
+ const wire = data;
435
+ let workers = wire.workers ?? [];
436
+ if (gpuFilter) {
437
+ const gpuLower = gpuFilter.toLowerCase();
438
+ workers = workers.filter((w) => w.gpu.toLowerCase() === gpuLower);
439
+ }
440
+ const parsedWorkers = workers.map((w) => ({
441
+ url: w.url,
442
+ gpu: w.gpu,
443
+ healthy: w.healthy,
444
+ queueDepth: w.queue_depth,
445
+ loadedModels: w.loaded_models
446
+ }));
447
+ return {
448
+ status: wire.status,
449
+ workerCount: gpuFilter ? parsedWorkers.length : wire.cluster?.worker_count ?? 0,
450
+ gpuCount: wire.cluster?.gpu_count ?? 0,
451
+ modelsLoaded: wire.cluster?.models_loaded ?? 0,
452
+ configuredGpuTypes: wire.configured_gpu_types ?? [],
453
+ liveGpuTypes: wire.live_gpu_types ?? [],
454
+ workers: parsedWorkers
455
+ };
456
+ }
457
+
458
+ // src/client.ts
459
+ function sleep(ms) {
460
+ return new Promise((resolve) => setTimeout(resolve, ms));
461
+ }
462
+ function abortableSleep(ms, signal) {
463
+ if (signal.aborted) return Promise.resolve(true);
464
+ return new Promise((resolve) => {
465
+ const onAbort = () => {
466
+ clearTimeout(timeoutId);
467
+ resolve(true);
468
+ };
469
+ const timeoutId = setTimeout(() => {
470
+ signal.removeEventListener("abort", onAbort);
471
+ resolve(false);
472
+ }, ms);
473
+ signal.addEventListener("abort", onAbort, { once: true });
474
+ });
475
+ }
476
+ var _LEASE_RENEWAL_MAX_RETRIES = 5;
477
+ var SIEClient = class {
478
+ baseUrl;
479
+ timeout;
480
+ gpu;
481
+ apiKey;
482
+ defaultWaitForCapacity;
483
+ provisionTimeout;
484
+ // Pool state: track created pools and their lease renewal scheduling
485
+ pools = /* @__PURE__ */ new Map();
486
+ // Version negotiation state
487
+ versionWarningLogged = false;
488
+ // Note: LoRA and model loading retry counters are now local to each method
489
+ // to avoid interference between concurrent requests
490
+ /**
491
+ * Create a new SIE client.
492
+ *
493
+ * @param baseUrl - Base URL of the SIE server (e.g., "http://localhost:8080")
494
+ * @param options - Client options
495
+ */
496
+ constructor(baseUrl, options = {}) {
497
+ this.baseUrl = baseUrl.replace(/\/$/, "");
498
+ this.timeout = options.timeout ?? DEFAULT_TIMEOUT;
499
+ this.gpu = options.gpu;
500
+ this.apiKey = options.apiKey;
501
+ this.defaultWaitForCapacity = options.waitForCapacity ?? false;
502
+ this.provisionTimeout = options.provisionTimeout ?? DEFAULT_PROVISION_TIMEOUT;
503
+ }
504
+ /**
505
+ * Get the base URL of the SIE server.
506
+ *
507
+ * @returns The normalized base URL (without trailing slash)
508
+ */
509
+ getBaseUrl() {
510
+ return this.baseUrl;
511
+ }
512
+ /**
513
+ * Encode one or more items.
514
+ */
515
+ async encode(model, items, options = {}) {
516
+ const isSingleItem = !Array.isArray(items);
517
+ const itemsArray = isSingleItem ? [items] : items;
518
+ const body = {
519
+ items: itemsArray
520
+ };
521
+ const params = {};
522
+ if (options.outputTypes) {
523
+ params.output_types = options.outputTypes;
524
+ }
525
+ if (options.instruction !== void 0) {
526
+ params.instruction = options.instruction;
527
+ }
528
+ if (options.isQuery !== void 0) {
529
+ params.is_query = options.isQuery;
530
+ }
531
+ if (options.outputDtype !== void 0) {
532
+ params.output_dtype = options.outputDtype;
533
+ }
534
+ if (Object.keys(params).length > 0) {
535
+ body.params = params;
536
+ }
537
+ const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
538
+ const { pool, gpu } = this.parseGpuParam(options.gpu);
539
+ const response = await this.requestWithRetry(
540
+ `/v1/encode/${encodeURIComponent(model)}`,
541
+ body,
542
+ pool,
543
+ gpu,
544
+ waitForCapacity,
545
+ model
546
+ );
547
+ const data = unpackMessage(new Uint8Array(await response.arrayBuffer()));
548
+ const results = parseEncodeResults(data.items);
549
+ if (isSingleItem) {
550
+ const first = results[0];
551
+ if (!first) {
552
+ throw new Error("No results returned from encode");
553
+ }
554
+ return first;
555
+ }
556
+ return results;
557
+ }
558
+ /**
559
+ * List available models.
560
+ *
561
+ * @returns Array of model information
562
+ */
563
+ async listModels() {
564
+ const response = await this.requestJson("/v1/models", "GET");
565
+ const data = await response.json();
566
+ return data.models.map((m) => ({
567
+ name: m.name,
568
+ loaded: m.loaded,
569
+ inputs: m.inputs,
570
+ outputs: m.outputs,
571
+ dims: m.dims,
572
+ maxSequenceLength: m.max_sequence_length
573
+ }));
574
+ }
575
+ /**
576
+ * Stream real-time status updates from a worker or router.
577
+ *
578
+ * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
579
+ * "auto" detects the endpoint via /health.
580
+ */
581
+ async *watch(mode = "auto") {
582
+ const endpoint = mode === "auto" ? await this.detectEndpointType() : mode;
583
+ const path = endpoint === "cluster" ? "/ws/cluster-status" : "/ws/status";
584
+ const wsUrl = this.buildWsUrl(path);
585
+ const ws = this.createWebSocket(wsUrl);
586
+ const queue = [];
587
+ let resolveNext = null;
588
+ let rejectNext = null;
589
+ let closed = false;
590
+ const notify = () => {
591
+ if (resolveNext) {
592
+ resolveNext();
593
+ resolveNext = null;
594
+ }
595
+ };
596
+ const fail = (error) => {
597
+ if (rejectNext) {
598
+ rejectNext(error);
599
+ rejectNext = null;
600
+ }
601
+ };
602
+ const waitForMessage = () => new Promise((resolve, reject) => {
603
+ resolveNext = resolve;
604
+ rejectNext = reject;
605
+ });
606
+ const parseMessage = (data) => {
607
+ if (typeof data === "string") {
608
+ return JSON.parse(data);
609
+ }
610
+ if (data instanceof ArrayBuffer) {
611
+ return JSON.parse(new TextDecoder().decode(new Uint8Array(data)));
612
+ }
613
+ if (data instanceof Uint8Array) {
614
+ return JSON.parse(new TextDecoder().decode(data));
615
+ }
616
+ throw new Error("Unsupported WebSocket message type");
617
+ };
618
+ const openPromise = new Promise((resolve, reject) => {
619
+ ws.addEventListener("open", () => resolve());
620
+ ws.addEventListener("error", (event) => reject(event));
621
+ });
622
+ ws.addEventListener("message", (event) => {
623
+ try {
624
+ queue.push(parseMessage(event.data));
625
+ notify();
626
+ } catch (error) {
627
+ fail(error);
628
+ }
629
+ });
630
+ ws.addEventListener("close", () => {
631
+ closed = true;
632
+ notify();
633
+ });
634
+ try {
635
+ await openPromise;
636
+ while (!closed || queue.length > 0) {
637
+ if (queue.length === 0) {
638
+ await waitForMessage();
639
+ continue;
640
+ }
641
+ const next = queue.shift();
642
+ if (next) {
643
+ yield next;
644
+ }
645
+ }
646
+ } finally {
647
+ ws.close();
648
+ }
649
+ }
650
+ /**
651
+ * Score items against a query using a reranker model.
652
+ *
653
+ * @param model - Model name (e.g., "bge-reranker-v2")
654
+ * @param query - Query item
655
+ * @param items - Items to score against the query
656
+ * @param options - Score options
657
+ * @returns Score result with sorted scores
658
+ *
659
+ * @example
660
+ * ```typescript
661
+ * const result = await client.score(
662
+ * "bge-reranker-v2",
663
+ * { text: "What is machine learning?" },
664
+ * [
665
+ * { id: "doc-1", text: "Machine learning is..." },
666
+ * { id: "doc-2", text: "Python is..." },
667
+ * ],
668
+ * );
669
+ *
670
+ * // Scores are sorted by relevance (descending)
671
+ * console.log(result.scores[0].itemId); // most relevant
672
+ * ```
673
+ */
674
+ async score(model, query, items, options = {}) {
675
+ const body = {
676
+ query,
677
+ items
678
+ };
679
+ if (options.topK !== void 0) {
680
+ body.top_k = options.topK;
681
+ }
682
+ const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
683
+ const { pool, gpu } = this.parseGpuParam(options.gpu);
684
+ const response = await this.requestWithRetry(
685
+ `/v1/score/${encodeURIComponent(model)}`,
686
+ body,
687
+ pool,
688
+ gpu,
689
+ waitForCapacity,
690
+ model
691
+ );
692
+ const data = unpackMessage(new Uint8Array(await response.arrayBuffer()));
693
+ return parseScoreResult(data);
694
+ }
695
+ /**
696
+ * Extract entities from one or more items.
697
+ *
698
+ * @example
699
+ * ```typescript
700
+ * const result = await client.extract(
701
+ * "gliner-multi-v2.1",
702
+ * { text: "Apple was founded by Steve Jobs." },
703
+ * { labels: ["person", "organization"] },
704
+ * );
705
+ *
706
+ * for (const entity of result.entities) {
707
+ * console.log(`${entity.text} (${entity.label})`);
708
+ * }
709
+ * // Output:
710
+ * // Apple (organization)
711
+ * // Steve Jobs (person)
712
+ * ```
713
+ */
714
+ async extract(model, items, options) {
715
+ const isSingleItem = !Array.isArray(items);
716
+ const itemsArray = isSingleItem ? [items] : items;
717
+ const body = {
718
+ items: itemsArray
719
+ };
720
+ const params = {
721
+ labels: options.labels
722
+ };
723
+ if (options.threshold !== void 0) {
724
+ params.threshold = options.threshold;
725
+ }
726
+ body.params = params;
727
+ const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
728
+ const { pool, gpu } = this.parseGpuParam(options.gpu);
729
+ const response = await this.requestWithRetry(
730
+ `/v1/extract/${encodeURIComponent(model)}`,
731
+ body,
732
+ pool,
733
+ gpu,
734
+ waitForCapacity,
735
+ model
736
+ );
737
+ const data = unpackMessage(new Uint8Array(await response.arrayBuffer()));
738
+ const results = parseExtractResults(data.items);
739
+ if (isSingleItem) {
740
+ const first = results[0];
741
+ if (!first) {
742
+ throw new Error("No results returned from extract");
743
+ }
744
+ return first;
745
+ }
746
+ return results;
747
+ }
748
+ /**
749
+ * Close the client and cleanup resources.
750
+ *
751
+ * Stops pool lease renewal timers. Note that pools are not deleted
752
+ * automatically - they are garbage collected by the router after inactivity.
753
+ * This allows pool reuse if the client reconnects.
754
+ */
755
+ async close() {
756
+ for (const [, poolState] of this.pools) {
757
+ if (poolState.timeoutId !== null) {
758
+ clearTimeout(poolState.timeoutId);
759
+ }
760
+ poolState.abortController.abort();
761
+ }
762
+ this.pools.clear();
763
+ }
764
+ /**
765
+ * Create a resource pool for isolated capacity.
766
+ *
767
+ * Pools provide dedicated worker capacity, isolated from other clients.
768
+ * Workers are assigned to pools and only serve requests from that pool.
769
+ *
770
+ * @param name - Pool name (used in GPU param as "poolName/machineProfile")
771
+ * @param gpus - Machine profile requirements, e.g., { "l4": 2, "l4-spot": 1 }
772
+ *
773
+ * @example
774
+ * ```typescript
775
+ * // Create a pool with 2 L4 GPUs
776
+ * await client.createPool("eval-bench", { l4: 2 });
777
+ *
778
+ * // Use the pool for requests
779
+ * await client.encode("bge-m3", { text: "Hello" }, { gpu: "eval-bench/l4" });
780
+ *
781
+ * // Clean up when done
782
+ * await client.deletePool("eval-bench");
783
+ * ```
784
+ */
785
+ async createPool(name, gpus) {
786
+ if (this.pools.has(name)) {
787
+ return;
788
+ }
789
+ const requestBody = { name, gpus };
790
+ const url = `${this.baseUrl}/v1/pools`;
791
+ const headers = {
792
+ "Content-Type": JSON_CONTENT_TYPE,
793
+ Accept: JSON_CONTENT_TYPE,
794
+ [SDK_VERSION_HEADER]: SDK_VERSION
795
+ };
796
+ if (this.apiKey) {
797
+ headers.Authorization = `Bearer ${this.apiKey}`;
798
+ }
799
+ const controller = new AbortController();
800
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
801
+ try {
802
+ const response = await fetch(url, {
803
+ method: "POST",
804
+ headers,
805
+ body: JSON.stringify(requestBody),
806
+ signal: controller.signal
807
+ });
808
+ if (response.status >= HTTP_CLIENT_ERROR_MIN) {
809
+ let errorMsg = response.statusText;
810
+ try {
811
+ const data = await response.json();
812
+ errorMsg = data.detail?.message ?? JSON.stringify(data);
813
+ } catch {
814
+ }
815
+ throw new PoolError(`Failed to create pool '${name}': ${errorMsg}`, name);
816
+ }
817
+ const abortController = new AbortController();
818
+ const poolState = {
819
+ timeoutId: null,
820
+ abortController,
821
+ isRenewing: false
822
+ };
823
+ const renewLoop = async () => {
824
+ if (abortController.signal.aborted) return;
825
+ if (poolState.isRenewing) return;
826
+ poolState.isRenewing = true;
827
+ try {
828
+ const renewUrl = `${this.baseUrl}/v1/pools/${encodeURIComponent(name)}/renew`;
829
+ const renewHeaders = {
830
+ Accept: JSON_CONTENT_TYPE
831
+ };
832
+ if (this.apiKey) {
833
+ renewHeaders.Authorization = `Bearer ${this.apiKey}`;
834
+ }
835
+ for (let attempt = 0; attempt < _LEASE_RENEWAL_MAX_RETRIES; attempt++) {
836
+ if (abortController.signal.aborted) return;
837
+ const perAttempt = new AbortController();
838
+ const onPoolAbort = () => perAttempt.abort();
839
+ abortController.signal.addEventListener("abort", onPoolAbort, { once: true });
840
+ const attemptTimeout = setTimeout(() => perAttempt.abort(), this.timeout);
841
+ try {
842
+ const resp = await fetch(renewUrl, {
843
+ method: "POST",
844
+ headers: renewHeaders,
845
+ signal: perAttempt.signal
846
+ });
847
+ if (resp.ok) break;
848
+ } catch (error) {
849
+ if (abortController.signal.aborted) return;
850
+ } finally {
851
+ clearTimeout(attemptTimeout);
852
+ abortController.signal.removeEventListener("abort", onPoolAbort);
853
+ }
854
+ if (attempt < _LEASE_RENEWAL_MAX_RETRIES - 1) {
855
+ const aborted = await abortableSleep(
856
+ Math.min(2 ** attempt * 1e3, 1e4),
857
+ abortController.signal
858
+ );
859
+ if (aborted) return;
860
+ }
861
+ }
862
+ } finally {
863
+ poolState.isRenewing = false;
864
+ }
865
+ if (!abortController.signal.aborted) {
866
+ poolState.timeoutId = setTimeout(renewLoop, DEFAULT_LEASE_RENEWAL_INTERVAL);
867
+ }
868
+ };
869
+ poolState.timeoutId = setTimeout(renewLoop, DEFAULT_LEASE_RENEWAL_INTERVAL);
870
+ this.pools.set(name, poolState);
871
+ } catch (error) {
872
+ if (error instanceof PoolError) {
873
+ throw error;
874
+ }
875
+ if (error instanceof Error && error.name === "AbortError") {
876
+ throw new PoolError(`Timeout creating pool '${name}'`, name);
877
+ }
878
+ throw new PoolError(
879
+ `Failed to create pool '${name}': ${error instanceof Error ? error.message : "Unknown error"}`,
880
+ name
881
+ );
882
+ } finally {
883
+ clearTimeout(timeoutId);
884
+ }
885
+ }
886
+ /**
887
+ * Get information about a pool.
888
+ *
889
+ * @param name - Pool name to query
890
+ * @returns PoolInfo if pool exists, null otherwise
891
+ *
892
+ * @example
893
+ * ```typescript
894
+ * await client.createPool("eval-bench", { l4: 2 });
895
+ * const pool = await client.getPool("eval-bench");
896
+ * console.log(`Pool state: ${pool?.status.state}`);
897
+ * console.log(`Workers: ${pool?.status.assignedWorkers.length}`);
898
+ * ```
899
+ */
900
+ async getPool(name) {
901
+ try {
902
+ const response = await this.requestJson(`/v1/pools/${encodeURIComponent(name)}`);
903
+ const data = await response.json();
904
+ return {
905
+ name: data.name,
906
+ spec: data.spec,
907
+ status: {
908
+ state: data.status.state,
909
+ assignedWorkers: data.status.assigned_workers,
910
+ createdAt: data.status.created_at,
911
+ lastRenewed: data.status.last_renewed
912
+ }
913
+ };
914
+ } catch {
915
+ return null;
916
+ }
917
+ }
918
+ /**
919
+ * Delete a pool.
920
+ *
921
+ * @param name - Pool name to delete
922
+ * @returns true if pool was deleted, false if pool didn't exist
923
+ *
924
+ * @example
925
+ * ```typescript
926
+ * // Clean up pool when done
927
+ * const deleted = await client.deletePool("eval-bench");
928
+ * if (deleted) {
929
+ * console.log("Pool deleted successfully");
930
+ * }
931
+ * ```
932
+ */
933
+ async deletePool(name) {
934
+ const poolState = this.pools.get(name);
935
+ if (poolState) {
936
+ if (poolState.timeoutId !== null) {
937
+ clearTimeout(poolState.timeoutId);
938
+ }
939
+ poolState.abortController.abort();
940
+ this.pools.delete(name);
941
+ }
942
+ try {
943
+ const url = `${this.baseUrl}/v1/pools/${encodeURIComponent(name)}`;
944
+ const headers = {
945
+ Accept: JSON_CONTENT_TYPE
946
+ };
947
+ if (this.apiKey) {
948
+ headers.Authorization = `Bearer ${this.apiKey}`;
949
+ }
950
+ const controller = new AbortController();
951
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
952
+ try {
953
+ const response = await fetch(url, {
954
+ method: "DELETE",
955
+ headers,
956
+ signal: controller.signal
957
+ });
958
+ return response.ok || response.status === 404;
959
+ } finally {
960
+ clearTimeout(timeoutId);
961
+ }
962
+ } catch {
963
+ return false;
964
+ }
965
+ }
966
+ checkServerVersion(response) {
967
+ if (this.versionWarningLogged) return;
968
+ const serverVersion = response.headers.get(SERVER_VERSION_HEADER);
969
+ if (!serverVersion) return;
970
+ try {
971
+ const sdkParts = SDK_VERSION.split(".").map(Number);
972
+ const serverParts = serverVersion.split(".").map(Number);
973
+ if (sdkParts.length < 2 || serverParts.length < 2) return;
974
+ const sdkMajor = sdkParts[0];
975
+ const sdkMinor = sdkParts[1];
976
+ const serverMajor = serverParts[0];
977
+ const serverMinor = serverParts[1];
978
+ if (sdkMajor === void 0 || sdkMinor === void 0 || serverMajor === void 0 || serverMinor === void 0) {
979
+ return;
980
+ }
981
+ if (sdkMajor !== serverMajor || Math.abs(sdkMinor - serverMinor) > 1) {
982
+ console.warn(
983
+ `[SIE SDK] Version skew detected: SDK ${SDK_VERSION}, server ${serverVersion}. Consider upgrading.`
984
+ );
985
+ this.versionWarningLogged = true;
986
+ }
987
+ } catch {
988
+ }
989
+ }
990
+ /**
991
+ * Parse GPU parameter into pool and GPU components.
992
+ *
993
+ * Supports "pool/gpu" format for pool routing.
994
+ */
995
+ parseGpuParam(gpu) {
996
+ const effectiveGpu = gpu ?? this.gpu;
997
+ if (!effectiveGpu) {
998
+ return {};
999
+ }
1000
+ const parts = effectiveGpu.split("/");
1001
+ if (parts.length === 2 && parts[0] && parts[1]) {
1002
+ return { pool: parts[0], gpu: parts[1] };
1003
+ }
1004
+ return { gpu: effectiveGpu };
1005
+ }
1006
+ /**
1007
+ * Get current cluster capacity information.
1008
+ *
1009
+ * Queries the router's /health endpoint for cluster state. Useful for
1010
+ * checking if specific GPU types are available before sending requests.
1011
+ *
1012
+ * @param gpu - Optional filter to check specific GPU type availability
1013
+ * @returns CapacityInfo with worker count, GPU types, and worker details
1014
+ *
1015
+ * @example
1016
+ * ```typescript
1017
+ * // Check cluster state
1018
+ * const capacity = await client.getCapacity();
1019
+ * console.log(`Workers: ${capacity.workerCount}, GPUs: ${capacity.liveGpuTypes}`);
1020
+ *
1021
+ * // Check if L4 GPUs are available
1022
+ * const l4Capacity = await client.getCapacity("l4");
1023
+ * if (l4Capacity.workerCount > 0) {
1024
+ * console.log("L4 workers available");
1025
+ * }
1026
+ * ```
1027
+ */
1028
+ async getCapacity(gpu) {
1029
+ const response = await this.requestJson("/health");
1030
+ const data = await response.json();
1031
+ if (data.type !== "router") {
1032
+ throw new RequestError(
1033
+ "getCapacity() requires a router endpoint. This appears to be a worker.",
1034
+ "not_router",
1035
+ 400
1036
+ );
1037
+ }
1038
+ return parseCapacityInfo(data, gpu);
1039
+ }
1040
+ /**
1041
+ * Wait for GPU capacity to become available.
1042
+ *
1043
+ * Polls the router until workers with the specified GPU type are online.
1044
+ * This is useful for pre-warming the cluster before running benchmarks.
1045
+ *
1046
+ * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
1047
+ * @param options - Wait options
1048
+ * @returns CapacityInfo once capacity is available
1049
+ *
1050
+ * @example
1051
+ * ```typescript
1052
+ * // Wait for L4 capacity before running benchmarks
1053
+ * const capacity = await client.waitForCapacity("l4", { timeout: 300000 });
1054
+ * console.log(`Ready with ${capacity.workerCount} L4 workers`);
1055
+ *
1056
+ * // Wait and pre-load a model
1057
+ * const capacityWithModel = await client.waitForCapacity("l4", { model: "bge-m3" });
1058
+ * ```
1059
+ */
1060
+ async waitForCapacity(gpu, options = {}) {
1061
+ const timeout = options.timeout ?? this.provisionTimeout;
1062
+ const pollInterval = options.pollInterval ?? 5e3;
1063
+ const startTime = Date.now();
1064
+ if (options.model) {
1065
+ await this.encode(options.model, { text: "warmup" }, { gpu, waitForCapacity: true });
1066
+ return this.getCapacity(gpu);
1067
+ }
1068
+ while (true) {
1069
+ try {
1070
+ const capacity = await this.getCapacity(gpu);
1071
+ if (capacity.workerCount > 0) {
1072
+ return capacity;
1073
+ }
1074
+ } catch {
1075
+ }
1076
+ const elapsed = Date.now() - startTime;
1077
+ if (elapsed >= timeout) {
1078
+ throw new ProvisioningError(
1079
+ `Timeout after ${elapsed}ms waiting for GPU '${gpu}' capacity`,
1080
+ gpu
1081
+ );
1082
+ }
1083
+ const remaining = timeout - elapsed;
1084
+ const delay = Math.min(pollInterval, remaining);
1085
+ await sleep(delay);
1086
+ }
1087
+ }
1088
+ /**
1089
+ * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
1090
+ */
1091
+ async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
1092
+ const startTime = Date.now();
1093
+ let loraRetries = 0;
1094
+ while (true) {
1095
+ const response = await this.request(path, body, pool, gpu);
1096
+ if (response.status === HTTP_ACCEPTED) {
1097
+ const retryAfter = getRetryAfter2(response);
1098
+ if (!waitForCapacity) {
1099
+ throw new ProvisioningError(
1100
+ `No capacity available for GPU '${gpu}'. Server is provisioning.`,
1101
+ gpu,
1102
+ retryAfter
1103
+ );
1104
+ }
1105
+ const elapsed = Date.now() - startTime;
1106
+ if (elapsed >= this.provisionTimeout) {
1107
+ throw new ProvisioningError(
1108
+ `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
1109
+ gpu,
1110
+ retryAfter
1111
+ );
1112
+ }
1113
+ const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1114
+ const remaining = this.provisionTimeout - elapsed;
1115
+ const actualDelay = Math.min(delay, remaining);
1116
+ await sleep(actualDelay);
1117
+ continue;
1118
+ }
1119
+ if (response.status === 503) {
1120
+ const clonedResponse = response.clone();
1121
+ const errorCode = await getErrorCode(clonedResponse);
1122
+ if (errorCode === LORA_LOADING_ERROR_CODE) {
1123
+ loraRetries += 1;
1124
+ if (loraRetries > LORA_LOADING_MAX_RETRIES) {
1125
+ throw new LoraLoadingError(
1126
+ `LoRA loading timeout after ${loraRetries} retries`,
1127
+ void 0,
1128
+ // We don't have lora name at this level
1129
+ model
1130
+ );
1131
+ }
1132
+ const retryAfter = getRetryAfter2(response);
1133
+ const delay = retryAfter ?? LORA_LOADING_DEFAULT_DELAY;
1134
+ await sleep(delay);
1135
+ continue;
1136
+ }
1137
+ if (errorCode === MODEL_LOADING_ERROR_CODE) {
1138
+ const elapsed = Date.now() - startTime;
1139
+ if (elapsed >= this.provisionTimeout) {
1140
+ throw new ModelLoadingError(
1141
+ `Model loading timeout after ${(elapsed / 1e3).toFixed(1)}s for '${model}'`,
1142
+ model
1143
+ );
1144
+ }
1145
+ const retryAfter = getRetryAfter2(response);
1146
+ const delay = retryAfter ?? MODEL_LOADING_DEFAULT_DELAY;
1147
+ const remaining = this.provisionTimeout - elapsed;
1148
+ const actualDelay = Math.min(delay, remaining);
1149
+ await sleep(actualDelay);
1150
+ continue;
1151
+ }
1152
+ }
1153
+ if (!response.ok) {
1154
+ await handleError(response, gpu);
1155
+ }
1156
+ this.checkServerVersion(response);
1157
+ return response;
1158
+ }
1159
+ }
1160
+ /**
1161
+ * Make a single msgpack HTTP request to the SIE server (no retry logic).
1162
+ */
1163
+ async request(path, body, pool, gpu) {
1164
+ const url = `${this.baseUrl}${path}`;
1165
+ const headers = {
1166
+ Accept: MSGPACK_CONTENT_TYPE,
1167
+ [SDK_VERSION_HEADER]: SDK_VERSION
1168
+ };
1169
+ if (body !== void 0) {
1170
+ headers["Content-Type"] = MSGPACK_CONTENT_TYPE;
1171
+ }
1172
+ if (pool) {
1173
+ headers["X-SIE-Pool"] = pool;
1174
+ }
1175
+ if (gpu) {
1176
+ headers["X-SIE-MACHINE-PROFILE"] = gpu;
1177
+ }
1178
+ if (this.apiKey) {
1179
+ headers.Authorization = `Bearer ${this.apiKey}`;
1180
+ }
1181
+ const controller = new AbortController();
1182
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1183
+ try {
1184
+ const response = await fetch(url, {
1185
+ method: "POST",
1186
+ headers,
1187
+ body: body !== void 0 ? packMessage(body) : void 0,
1188
+ signal: controller.signal
1189
+ });
1190
+ return response;
1191
+ } catch (error) {
1192
+ if (error instanceof Error && error.name === "AbortError") {
1193
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1194
+ }
1195
+ if (error instanceof TypeError) {
1196
+ throw new SIEConnectionError(`Connection failed: ${error.message}`);
1197
+ }
1198
+ throw error;
1199
+ } finally {
1200
+ clearTimeout(timeoutId);
1201
+ }
1202
+ }
1203
+ /**
1204
+ * Make a JSON HTTP request to the SIE server.
1205
+ * Used for endpoints that return JSON (e.g., /v1/models, /health).
1206
+ */
1207
+ async requestJson(path, method = "GET") {
1208
+ const url = `${this.baseUrl}${path}`;
1209
+ const headers = {
1210
+ Accept: "application/json",
1211
+ [SDK_VERSION_HEADER]: SDK_VERSION
1212
+ };
1213
+ if (this.apiKey) {
1214
+ headers.Authorization = `Bearer ${this.apiKey}`;
1215
+ }
1216
+ const controller = new AbortController();
1217
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1218
+ try {
1219
+ const response = await fetch(url, {
1220
+ method,
1221
+ headers,
1222
+ signal: controller.signal
1223
+ });
1224
+ if (!response.ok) {
1225
+ await handleError(response);
1226
+ }
1227
+ return response;
1228
+ } catch (error) {
1229
+ if (error instanceof Error && error.name === "AbortError") {
1230
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1231
+ }
1232
+ if (error instanceof TypeError) {
1233
+ throw new SIEConnectionError(`Connection failed: ${error.message}`);
1234
+ }
1235
+ throw error;
1236
+ } finally {
1237
+ clearTimeout(timeoutId);
1238
+ }
1239
+ }
1240
+ buildWsUrl(path) {
1241
+ const url = new URL(this.baseUrl);
1242
+ url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
1243
+ url.pathname = `${url.pathname.replace(/\/$/, "")}${path}`;
1244
+ url.search = "";
1245
+ return url.toString();
1246
+ }
1247
+ createWebSocket(url) {
1248
+ const headers = this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : void 0;
1249
+ try {
1250
+ if (headers) {
1251
+ return new WebSocket(url, [], { headers });
1252
+ }
1253
+ return new WebSocket(url);
1254
+ } catch (error) {
1255
+ if (headers) {
1256
+ throw new SIEConnectionError(
1257
+ "WebSocket auth headers are not supported in this environment"
1258
+ );
1259
+ }
1260
+ throw error;
1261
+ }
1262
+ }
1263
+ async detectEndpointType() {
1264
+ const url = `${this.baseUrl}/health`;
1265
+ const headers = { Accept: "application/json" };
1266
+ if (this.apiKey) {
1267
+ headers.Authorization = `Bearer ${this.apiKey}`;
1268
+ }
1269
+ const controller = new AbortController();
1270
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
1271
+ try {
1272
+ const response = await fetch(url, {
1273
+ method: "GET",
1274
+ headers,
1275
+ signal: controller.signal
1276
+ });
1277
+ if (!response.ok) {
1278
+ return "worker";
1279
+ }
1280
+ const data = await response.json();
1281
+ return data.type === "router" ? "cluster" : "worker";
1282
+ } catch {
1283
+ return "worker";
1284
+ } finally {
1285
+ clearTimeout(timeoutId);
1286
+ }
1287
+ }
1288
+ };
1289
+
1290
+ // src/types.ts
1291
+ function toNumberArray(arr) {
1292
+ return Array.from(arr);
1293
+ }
1294
+ function toFloat32Array(arr) {
1295
+ return new Float32Array(arr);
1296
+ }
1297
+
1298
+ // src/scoring.ts
1299
+ function maxsim(query, document) {
1300
+ if (query.length === 0 || document.length === 0) {
1301
+ return 0;
1302
+ }
1303
+ let totalScore = 0;
1304
+ for (const queryToken of query) {
1305
+ let maxSim = Number.NEGATIVE_INFINITY;
1306
+ for (const docToken of document) {
1307
+ let sim = 0;
1308
+ for (let i = 0; i < queryToken.length; i++) {
1309
+ sim += (queryToken[i] ?? 0) * (docToken[i] ?? 0);
1310
+ }
1311
+ if (sim > maxSim) {
1312
+ maxSim = sim;
1313
+ }
1314
+ }
1315
+ totalScore += maxSim;
1316
+ }
1317
+ return totalScore;
1318
+ }
1319
+ function maxsimDocuments(query, documents) {
1320
+ return documents.map((doc) => maxsim(query, doc));
1321
+ }
1322
+ function maxsimBatch(queries, documents) {
1323
+ const scores = new Float32Array(queries.length * documents.length);
1324
+ let idx = 0;
1325
+ for (const query of queries) {
1326
+ for (const doc of documents) {
1327
+ scores[idx++] = maxsim(query, doc);
1328
+ }
1329
+ }
1330
+ return scores;
1331
+ }
1332
+
1333
+ // src/images.ts
1334
+ async function toImageBytes(input) {
1335
+ if (input instanceof Uint8Array) {
1336
+ return input;
1337
+ }
1338
+ if (input instanceof ArrayBuffer) {
1339
+ return new Uint8Array(input);
1340
+ }
1341
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
1342
+ const buffer = await input.arrayBuffer();
1343
+ return new Uint8Array(buffer);
1344
+ }
1345
+ if (typeof input === "string") {
1346
+ const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
1347
+ if (dataUrlMatch?.[1]) {
1348
+ return base64ToBytes(dataUrlMatch[1]);
1349
+ }
1350
+ return base64ToBytes(input);
1351
+ }
1352
+ throw new Error(`Unsupported image input type: ${typeof input}`);
1353
+ }
1354
+ function base64ToBytes(base64) {
1355
+ if (typeof atob === "function") {
1356
+ const binary = atob(base64);
1357
+ const bytes = new Uint8Array(binary.length);
1358
+ for (let i = 0; i < binary.length; i++) {
1359
+ bytes[i] = binary.charCodeAt(i);
1360
+ }
1361
+ return bytes;
1362
+ }
1363
+ return new Uint8Array(Buffer.from(base64, "base64"));
1364
+ }
1365
+ async function toImageWireFormat(input, format = "jpeg") {
1366
+ const data = await toImageBytes(input);
1367
+ return { data, format };
1368
+ }
1369
+ function detectImageFormat(bytes) {
1370
+ if (bytes.length < 4) {
1371
+ return "unknown";
1372
+ }
1373
+ if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
1374
+ return "jpeg";
1375
+ }
1376
+ if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
1377
+ return "png";
1378
+ }
1379
+ if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
1380
+ return "webp";
1381
+ }
1382
+ return "unknown";
1383
+ }
1384
+
1385
+ exports.LoraLoadingError = LoraLoadingError;
1386
+ exports.ModelLoadingError = ModelLoadingError;
1387
+ exports.PoolError = PoolError;
1388
+ exports.ProvisioningError = ProvisioningError;
1389
+ exports.RequestError = RequestError;
1390
+ exports.SDK_VERSION = SDK_VERSION;
1391
+ exports.SIEClient = SIEClient;
1392
+ exports.SIEConnectionError = SIEConnectionError;
1393
+ exports.SIEError = SIEError;
1394
+ exports.ServerError = ServerError;
1395
+ exports.detectImageFormat = detectImageFormat;
1396
+ exports.maxsim = maxsim;
1397
+ exports.maxsimBatch = maxsimBatch;
1398
+ exports.maxsimDocuments = maxsimDocuments;
1399
+ exports.packMessage = packMessage;
1400
+ exports.toFloat32Array = toFloat32Array;
1401
+ exports.toImageBytes = toImageBytes;
1402
+ exports.toImageWireFormat = toImageWireFormat;
1403
+ exports.toNumberArray = toNumberArray;
1404
+ exports.unpackMessage = unpackMessage;
1405
+ //# sourceMappingURL=index.cjs.map
1406
+ //# sourceMappingURL=index.cjs.map