@superlinked/sie-sdk 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -132,10 +132,61 @@ var InputTooLongError = class extends RequestError {
132
132
  }
133
133
  };
134
134
 
135
+ // src/images.ts
136
+ async function toImageBytes(input) {
137
+ if (input instanceof Uint8Array) {
138
+ return input;
139
+ }
140
+ if (input instanceof ArrayBuffer) {
141
+ return new Uint8Array(input);
142
+ }
143
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
144
+ const buffer = await input.arrayBuffer();
145
+ return new Uint8Array(buffer);
146
+ }
147
+ if (typeof input === "string") {
148
+ const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
149
+ if (dataUrlMatch?.[1]) {
150
+ return base64ToBytes(dataUrlMatch[1]);
151
+ }
152
+ return base64ToBytes(input);
153
+ }
154
+ throw new Error(`Unsupported image input type: ${typeof input}`);
155
+ }
156
+ function base64ToBytes(base64) {
157
+ if (typeof atob === "function") {
158
+ const binary = atob(base64);
159
+ const bytes = new Uint8Array(binary.length);
160
+ for (let i = 0; i < binary.length; i++) {
161
+ bytes[i] = binary.charCodeAt(i);
162
+ }
163
+ return bytes;
164
+ }
165
+ return new Uint8Array(Buffer.from(base64, "base64"));
166
+ }
167
+ async function toImageWireFormat(input, format = "jpeg") {
168
+ const data = await toImageBytes(input);
169
+ return { data, format };
170
+ }
171
+ function detectImageFormat(bytes) {
172
+ if (bytes.length < 4) {
173
+ return "unknown";
174
+ }
175
+ if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
176
+ return "jpeg";
177
+ }
178
+ if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
179
+ return "png";
180
+ }
181
+ if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
182
+ return "webp";
183
+ }
184
+ return "unknown";
185
+ }
186
+
135
187
  // src/internal/constants.ts
136
188
  var MSGPACK_CONTENT_TYPE = "application/msgpack";
137
189
  var JSON_CONTENT_TYPE = "application/json";
138
- var HTTP_ACCEPTED = 202;
139
190
  var HTTP_CLIENT_ERROR_MIN = 400;
140
191
  var HTTP_CLIENT_ERROR_MAX = 499;
141
192
  var HTTP_SERVER_ERROR_MIN = 500;
@@ -149,6 +200,7 @@ var LORA_LOADING_DEFAULT_DELAY = 1e3;
149
200
  var LORA_LOADING_ERROR_CODE = "LORA_LOADING";
150
201
  var MODEL_LOADING_DEFAULT_DELAY = 5e3;
151
202
  var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
203
+ var PROVISIONING_ERROR_CODE = "PROVISIONING";
152
204
  var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
153
205
  var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
154
206
  var EXT_TYPE_NUMPY = 78;
@@ -336,6 +388,11 @@ function unpackMessage(data) {
336
388
  }
337
389
 
338
390
  // src/internal/retry.ts
391
+ var RETRY_JITTER_FRACTION = 0.25;
392
+ function applyRetryJitter(delay) {
393
+ const low = delay * (1 - RETRY_JITTER_FRACTION);
394
+ return Math.max(0, low + Math.random() * (delay - low));
395
+ }
339
396
  function getRetryAfter(header) {
340
397
  if (!header) return void 0;
341
398
  const seconds = Number.parseInt(header, 10);
@@ -351,6 +408,11 @@ function getRetryAfter(header) {
351
408
  }
352
409
 
353
410
  // src/internal/parsing.ts
411
+ var SIE_ERROR_CODE_HEADER = "X-SIE-Error-Code";
412
+ function normalizeErrorCode(code) {
413
+ if (code === "provisioning") return PROVISIONING_ERROR_CODE;
414
+ return code;
415
+ }
354
416
  function getRetryAfter2(response) {
355
417
  const header = response.headers.get("Retry-After");
356
418
  return getRetryAfter(header);
@@ -379,10 +441,12 @@ async function getErrorDetail(response) {
379
441
  return void 0;
380
442
  }
381
443
  async function getErrorCode(response) {
444
+ const headerCode = response.headers.get(SIE_ERROR_CODE_HEADER);
445
+ if (headerCode) return headerCode;
382
446
  const detail = await getErrorDetail(response);
383
447
  if (!detail) return void 0;
384
448
  const code = detail.code;
385
- return typeof code === "string" ? code : void 0;
449
+ return typeof code === "string" ? normalizeErrorCode(code) : void 0;
386
450
  }
387
451
  async function throwIfModelLoadFailed(response, model) {
388
452
  if (response.status !== 502) return;
@@ -438,7 +502,8 @@ async function handleError(response, gpu) {
438
502
  message = response.statusText;
439
503
  }
440
504
  }
441
- if (status === HTTP_ACCEPTED) {
505
+ code = response.headers.get(SIE_ERROR_CODE_HEADER) ?? normalizeErrorCode(code);
506
+ if (status === 503 && code === PROVISIONING_ERROR_CODE) {
442
507
  const retryAfter = getRetryAfter2(response);
443
508
  throw new ProvisioningError(message, gpu, retryAfter);
444
509
  }
@@ -606,45 +671,38 @@ async function withProvisioningRetry(performFetch, opts) {
606
671
  const startTime = Date.now();
607
672
  while (true) {
608
673
  const response = await performFetch();
609
- if (response.status === HTTP_ACCEPTED) {
610
- if (!opts.waitForCapacity) {
611
- throw new ProvisioningError(
612
- "No capacity available. Server is provisioning.",
613
- opts.gpu,
614
- getRetryAfter2(response)
615
- );
616
- }
617
- const elapsed = Date.now() - startTime;
618
- if (elapsed >= opts.provisionTimeoutMs) {
619
- throw new ProvisioningError(
620
- `Provisioning timeout after ${elapsed}ms`,
621
- opts.gpu,
622
- getRetryAfter2(response)
623
- );
624
- }
625
- const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
626
- await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
627
- continue;
628
- }
629
674
  await throwIfModelLoadFailed(response, opts.model);
630
675
  if (response.status === 503) {
631
676
  const errorCode = await getErrorCode(response.clone());
632
- if (errorCode === MODEL_LOADING_ERROR_CODE) {
677
+ if (errorCode === PROVISIONING_ERROR_CODE) {
678
+ if (!opts.waitForCapacity) {
679
+ throw new ProvisioningError(
680
+ "No capacity available. Server is provisioning.",
681
+ opts.gpu,
682
+ getRetryAfter2(response)
683
+ );
684
+ }
633
685
  const elapsed = Date.now() - startTime;
634
686
  if (elapsed >= opts.provisionTimeoutMs) {
635
- throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
687
+ throw new ProvisioningError(
688
+ `Provisioning timeout after ${elapsed}ms`,
689
+ opts.gpu,
690
+ getRetryAfter2(response)
691
+ );
636
692
  }
637
- const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
693
+ const retryAfter = getRetryAfter2(response);
694
+ const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
638
695
  await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
639
696
  continue;
640
697
  }
641
- if (opts.waitForCapacity) {
698
+ if (errorCode === MODEL_LOADING_ERROR_CODE) {
642
699
  const elapsed = Date.now() - startTime;
643
- if (elapsed < opts.provisionTimeoutMs) {
644
- const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
645
- await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
646
- continue;
700
+ if (elapsed >= opts.provisionTimeoutMs) {
701
+ throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
647
702
  }
703
+ const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
704
+ await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
705
+ continue;
648
706
  }
649
707
  }
650
708
  if (!response.ok) {
@@ -796,7 +854,7 @@ function extractDataPayload(block) {
796
854
  }
797
855
 
798
856
  // src/version.ts
799
- var SDK_VERSION = "0.6.4";
857
+ var SDK_VERSION = "0.6.5";
800
858
 
801
859
  // src/client.ts
802
860
  function sleep2(ms) {
@@ -817,6 +875,24 @@ function abortableSleep(ms, signal) {
817
875
  });
818
876
  }
819
877
  var _LEASE_RENEWAL_MAX_RETRIES = 5;
878
+ function isImageWireFormat(image) {
879
+ return typeof image === "object" && image !== null && "data" in image;
880
+ }
881
+ async function imageForWire(image) {
882
+ if (isImageWireFormat(image)) {
883
+ return image;
884
+ }
885
+ return toImageWireFormat(image);
886
+ }
887
+ async function itemImagesForWire(item) {
888
+ if (!item.images || item.images.length === 0) {
889
+ return item;
890
+ }
891
+ return { ...item, images: await Promise.all(item.images.map(imageForWire)) };
892
+ }
893
+ async function itemsImagesForWire(items) {
894
+ return Promise.all(items.map(itemImagesForWire));
895
+ }
820
896
  function extractChatChunkError(chunk) {
821
897
  const err = chunk.error;
822
898
  if (!err) return null;
@@ -871,8 +947,9 @@ var SIEClient = class {
871
947
  async encode(model, items, options = {}) {
872
948
  const isSingleItem = !Array.isArray(items);
873
949
  const itemsArray = isSingleItem ? [items] : items;
950
+ const itemsForWire = await itemsImagesForWire(itemsArray);
874
951
  const body = {
875
- items: itemsArray
952
+ items: itemsForWire
876
953
  };
877
954
  const params = {};
878
955
  if (options.outputTypes) {
@@ -1316,11 +1393,11 @@ var SIEClient = class {
1316
1393
  * if the consumer-supplied `extractError` returns an `SIEStreamError`, the
1317
1394
  * generator throws it instead of yielding the chunk.
1318
1395
  *
1319
- * Retry policy mirrors {@link generate}: only the SAFE pre-execution
1320
- * capacity signals — `202` (provisioning) and `503 MODEL_LOADING` — are
1321
- * retried, and only while `waitForCapacity` is set and the provision
1322
- * budget remains. Once the body opens we never retry (the call is
1323
- * non-idempotent; a mid-stream failure must not re-issue generation).
1396
+ * Retry policy mirrors {@link generate}: only explicit SAFE
1397
+ * pre-execution capacity signals — `503 PROVISIONING` and
1398
+ * `503 MODEL_LOADING` are retried while the provision budget remains.
1399
+ * Once the body opens we never retry (the call is non-idempotent; a
1400
+ * mid-stream failure must not re-issue generation).
1324
1401
  *
1325
1402
  * @internal
1326
1403
  */
@@ -1368,40 +1445,27 @@ var SIEClient = class {
1368
1445
  } finally {
1369
1446
  clearTimeout(preStreamTimeoutId);
1370
1447
  }
1371
- if (attemptResponse.status === HTTP_ACCEPTED) {
1372
- if (!waitForCapacity) {
1373
- throw new ProvisioningError(
1374
- "No capacity available. Server is provisioning.",
1375
- gpu,
1376
- getRetryAfter2(attemptResponse)
1377
- );
1378
- }
1379
- const elapsed = Date.now() - startTime;
1380
- if (elapsed >= this.provisionTimeout) {
1381
- throw new ProvisioningError(
1382
- `Provisioning timeout after ${elapsed}ms`,
1383
- gpu,
1384
- getRetryAfter2(attemptResponse)
1385
- );
1386
- }
1387
- const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
1388
- if (await abortableSleep(
1389
- Math.min(delay, this.provisionTimeout - elapsed),
1390
- controller.signal
1391
- )) {
1392
- throw new SIEConnectionError("Stream aborted while provisioning", "other");
1393
- }
1394
- continue;
1395
- }
1396
1448
  await throwIfModelLoadFailed(attemptResponse, model);
1397
1449
  if (attemptResponse.status === 503) {
1398
1450
  const errorCode = await getErrorCode(attemptResponse.clone());
1399
- if (errorCode === MODEL_LOADING_ERROR_CODE && waitForCapacity) {
1451
+ if (errorCode === PROVISIONING_ERROR_CODE) {
1452
+ if (!waitForCapacity) {
1453
+ throw new ProvisioningError(
1454
+ "No capacity available. Server is provisioning.",
1455
+ gpu,
1456
+ getRetryAfter2(attemptResponse)
1457
+ );
1458
+ }
1400
1459
  const elapsed = Date.now() - startTime;
1401
1460
  if (elapsed >= this.provisionTimeout) {
1402
- throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
1461
+ throw new ProvisioningError(
1462
+ `Provisioning timeout after ${elapsed}ms`,
1463
+ gpu,
1464
+ getRetryAfter2(attemptResponse)
1465
+ );
1403
1466
  }
1404
- const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
1467
+ const retryAfter = getRetryAfter2(attemptResponse);
1468
+ const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
1405
1469
  if (await abortableSleep(
1406
1470
  Math.min(delay, this.provisionTimeout - elapsed),
1407
1471
  controller.signal
@@ -1410,18 +1474,19 @@ var SIEClient = class {
1410
1474
  }
1411
1475
  continue;
1412
1476
  }
1413
- if (waitForCapacity) {
1477
+ if (errorCode === MODEL_LOADING_ERROR_CODE) {
1414
1478
  const elapsed = Date.now() - startTime;
1415
- if (elapsed < this.provisionTimeout) {
1416
- const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
1417
- if (await abortableSleep(
1418
- Math.min(delay, this.provisionTimeout - elapsed),
1419
- controller.signal
1420
- )) {
1421
- throw new SIEConnectionError("Stream aborted while provisioning", "other");
1422
- }
1423
- continue;
1479
+ if (elapsed >= this.provisionTimeout) {
1480
+ throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
1481
+ }
1482
+ const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
1483
+ if (await abortableSleep(
1484
+ Math.min(delay, this.provisionTimeout - elapsed),
1485
+ controller.signal
1486
+ )) {
1487
+ throw new SIEConnectionError("Stream aborted while provisioning", "other");
1424
1488
  }
1489
+ continue;
1425
1490
  }
1426
1491
  }
1427
1492
  if (attemptResponse.status !== 200) {
@@ -1471,9 +1536,11 @@ var SIEClient = class {
1471
1536
  return headers;
1472
1537
  }
1473
1538
  async score(model, query, items, options = {}) {
1539
+ const queryForWire = await itemImagesForWire(query);
1540
+ const itemsForWire = await itemsImagesForWire(items);
1474
1541
  const body = {
1475
- query,
1476
- items
1542
+ query: queryForWire,
1543
+ items: itemsForWire
1477
1544
  };
1478
1545
  const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
1479
1546
  const { pool, gpu } = this.parseGpuParam(options.gpu);
@@ -1510,8 +1577,9 @@ var SIEClient = class {
1510
1577
  async extract(model, items, options) {
1511
1578
  const isSingleItem = !Array.isArray(items);
1512
1579
  const itemsArray = isSingleItem ? [items] : items;
1580
+ const itemsForWire = await itemsImagesForWire(itemsArray);
1513
1581
  const body = {
1514
- items: itemsArray
1582
+ items: itemsForWire
1515
1583
  };
1516
1584
  const params = {
1517
1585
  labels: options.labels
@@ -1897,9 +1965,9 @@ var SIEClient = class {
1897
1965
  /**
1898
1966
  * Make a msgpack HTTP request with retry logic.
1899
1967
  *
1900
- * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
1901
- * - 202 Accepted (provisioning)
1902
- * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
1968
+ * Retried (capped by `provisionTimeout`):
1969
+ * - 503 `PROVISIONING` when `waitForCapacity: true`
1970
+ * - 503 `MODEL_LOADING` / `LORA_LOADING`
1903
1971
  * - `SIEConnectionError` with `kind === "connect"` (issue #95)
1904
1972
  *
1905
1973
  * `kind === "timeout"` is NOT retried — would extend the user-visible
@@ -1924,34 +1992,34 @@ var SIEClient = class {
1924
1992
  }
1925
1993
  throw err;
1926
1994
  }
1927
- if (response.status === HTTP_ACCEPTED) {
1928
- const retryAfter = getRetryAfter2(response);
1929
- if (!waitForCapacity) {
1930
- throw new ProvisioningError(
1931
- `No capacity available for GPU '${gpu}'. Server is provisioning.`,
1932
- gpu,
1933
- retryAfter
1934
- );
1935
- }
1936
- const elapsed = Date.now() - startTime;
1937
- if (elapsed >= this.provisionTimeout) {
1938
- throw new ProvisioningError(
1939
- `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
1940
- gpu,
1941
- retryAfter
1942
- );
1943
- }
1944
- const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1945
- const remaining = this.provisionTimeout - elapsed;
1946
- const actualDelay = Math.min(delay, remaining);
1947
- await sleep2(actualDelay);
1948
- continue;
1949
- }
1950
1995
  await throwIfModelLoadFailed(response, model);
1951
1996
  await throwIfInputTooLong(response, model);
1952
1997
  if (response.status === 503) {
1953
1998
  const clonedResponse = response.clone();
1954
1999
  const errorCode = await getErrorCode(clonedResponse);
2000
+ if (errorCode === PROVISIONING_ERROR_CODE) {
2001
+ const retryAfter = getRetryAfter2(response);
2002
+ if (!waitForCapacity) {
2003
+ throw new ProvisioningError(
2004
+ `No capacity available for GPU '${gpu}'. Server is provisioning.`,
2005
+ gpu,
2006
+ retryAfter
2007
+ );
2008
+ }
2009
+ const elapsed = Date.now() - startTime;
2010
+ if (elapsed >= this.provisionTimeout) {
2011
+ throw new ProvisioningError(
2012
+ `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
2013
+ gpu,
2014
+ retryAfter
2015
+ );
2016
+ }
2017
+ const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
2018
+ const remaining = this.provisionTimeout - elapsed;
2019
+ const actualDelay = Math.min(delay, remaining);
2020
+ await sleep2(actualDelay);
2021
+ continue;
2022
+ }
1955
2023
  if (errorCode === LORA_LOADING_ERROR_CODE) {
1956
2024
  loraRetries += 1;
1957
2025
  if (loraRetries > LORA_LOADING_MAX_RETRIES) {
@@ -1982,17 +2050,6 @@ var SIEClient = class {
1982
2050
  await sleep2(actualDelay);
1983
2051
  continue;
1984
2052
  }
1985
- if (waitForCapacity) {
1986
- const elapsed = Date.now() - startTime;
1987
- if (elapsed < this.provisionTimeout) {
1988
- const retryAfter = getRetryAfter2(response);
1989
- const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1990
- const remaining = this.provisionTimeout - elapsed;
1991
- const actualDelay = Math.min(delay, remaining);
1992
- await sleep2(actualDelay);
1993
- continue;
1994
- }
1995
- }
1996
2053
  }
1997
2054
  if (!response.ok) {
1998
2055
  await handleError(response, gpu);
@@ -2219,58 +2276,6 @@ function maxsimBatch(queries, documents) {
2219
2276
  return scores;
2220
2277
  }
2221
2278
 
2222
- // src/images.ts
2223
- async function toImageBytes(input) {
2224
- if (input instanceof Uint8Array) {
2225
- return input;
2226
- }
2227
- if (input instanceof ArrayBuffer) {
2228
- return new Uint8Array(input);
2229
- }
2230
- if (typeof Blob !== "undefined" && input instanceof Blob) {
2231
- const buffer = await input.arrayBuffer();
2232
- return new Uint8Array(buffer);
2233
- }
2234
- if (typeof input === "string") {
2235
- const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
2236
- if (dataUrlMatch?.[1]) {
2237
- return base64ToBytes(dataUrlMatch[1]);
2238
- }
2239
- return base64ToBytes(input);
2240
- }
2241
- throw new Error(`Unsupported image input type: ${typeof input}`);
2242
- }
2243
- function base64ToBytes(base64) {
2244
- if (typeof atob === "function") {
2245
- const binary = atob(base64);
2246
- const bytes = new Uint8Array(binary.length);
2247
- for (let i = 0; i < binary.length; i++) {
2248
- bytes[i] = binary.charCodeAt(i);
2249
- }
2250
- return bytes;
2251
- }
2252
- return new Uint8Array(Buffer.from(base64, "base64"));
2253
- }
2254
- async function toImageWireFormat(input, format = "jpeg") {
2255
- const data = await toImageBytes(input);
2256
- return { data, format };
2257
- }
2258
- function detectImageFormat(bytes) {
2259
- if (bytes.length < 4) {
2260
- return "unknown";
2261
- }
2262
- if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
2263
- return "jpeg";
2264
- }
2265
- if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
2266
- return "png";
2267
- }
2268
- if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
2269
- return "webp";
2270
- }
2271
- return "unknown";
2272
- }
2273
-
2274
2279
  export { InputTooLongError, LoraLoadingError, ModelLoadFailedError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, SIEStreamError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
2275
2280
  //# sourceMappingURL=index.js.map
2276
2281
  //# sourceMappingURL=index.js.map