@superlinked/sie-sdk 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -134,10 +134,61 @@ var InputTooLongError = class extends RequestError {
134
134
  }
135
135
  };
136
136
 
137
+ // src/images.ts
138
+ async function toImageBytes(input) {
139
+ if (input instanceof Uint8Array) {
140
+ return input;
141
+ }
142
+ if (input instanceof ArrayBuffer) {
143
+ return new Uint8Array(input);
144
+ }
145
+ if (typeof Blob !== "undefined" && input instanceof Blob) {
146
+ const buffer = await input.arrayBuffer();
147
+ return new Uint8Array(buffer);
148
+ }
149
+ if (typeof input === "string") {
150
+ const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
151
+ if (dataUrlMatch?.[1]) {
152
+ return base64ToBytes(dataUrlMatch[1]);
153
+ }
154
+ return base64ToBytes(input);
155
+ }
156
+ throw new Error(`Unsupported image input type: ${typeof input}`);
157
+ }
158
+ function base64ToBytes(base64) {
159
+ if (typeof atob === "function") {
160
+ const binary = atob(base64);
161
+ const bytes = new Uint8Array(binary.length);
162
+ for (let i = 0; i < binary.length; i++) {
163
+ bytes[i] = binary.charCodeAt(i);
164
+ }
165
+ return bytes;
166
+ }
167
+ return new Uint8Array(Buffer.from(base64, "base64"));
168
+ }
169
+ async function toImageWireFormat(input, format = "jpeg") {
170
+ const data = await toImageBytes(input);
171
+ return { data, format };
172
+ }
173
+ function detectImageFormat(bytes) {
174
+ if (bytes.length < 4) {
175
+ return "unknown";
176
+ }
177
+ if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
178
+ return "jpeg";
179
+ }
180
+ if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
181
+ return "png";
182
+ }
183
+ if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
184
+ return "webp";
185
+ }
186
+ return "unknown";
187
+ }
188
+
137
189
  // src/internal/constants.ts
138
190
  var MSGPACK_CONTENT_TYPE = "application/msgpack";
139
191
  var JSON_CONTENT_TYPE = "application/json";
140
- var HTTP_ACCEPTED = 202;
141
192
  var HTTP_CLIENT_ERROR_MIN = 400;
142
193
  var HTTP_CLIENT_ERROR_MAX = 499;
143
194
  var HTTP_SERVER_ERROR_MIN = 500;
@@ -151,6 +202,7 @@ var LORA_LOADING_DEFAULT_DELAY = 1e3;
151
202
  var LORA_LOADING_ERROR_CODE = "LORA_LOADING";
152
203
  var MODEL_LOADING_DEFAULT_DELAY = 5e3;
153
204
  var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
205
+ var PROVISIONING_ERROR_CODE = "PROVISIONING";
154
206
  var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
155
207
  var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
156
208
  var EXT_TYPE_NUMPY = 78;
@@ -338,6 +390,11 @@ function unpackMessage(data) {
338
390
  }
339
391
 
340
392
  // src/internal/retry.ts
393
+ var RETRY_JITTER_FRACTION = 0.25;
394
+ function applyRetryJitter(delay) {
395
+ const low = delay * (1 - RETRY_JITTER_FRACTION);
396
+ return Math.max(0, low + Math.random() * (delay - low));
397
+ }
341
398
  function getRetryAfter(header) {
342
399
  if (!header) return void 0;
343
400
  const seconds = Number.parseInt(header, 10);
@@ -353,6 +410,11 @@ function getRetryAfter(header) {
353
410
  }
354
411
 
355
412
  // src/internal/parsing.ts
413
+ var SIE_ERROR_CODE_HEADER = "X-SIE-Error-Code";
414
+ function normalizeErrorCode(code) {
415
+ if (code === "provisioning") return PROVISIONING_ERROR_CODE;
416
+ return code;
417
+ }
356
418
  function getRetryAfter2(response) {
357
419
  const header = response.headers.get("Retry-After");
358
420
  return getRetryAfter(header);
@@ -381,10 +443,12 @@ async function getErrorDetail(response) {
381
443
  return void 0;
382
444
  }
383
445
  async function getErrorCode(response) {
446
+ const headerCode = response.headers.get(SIE_ERROR_CODE_HEADER);
447
+ if (headerCode) return headerCode;
384
448
  const detail = await getErrorDetail(response);
385
449
  if (!detail) return void 0;
386
450
  const code = detail.code;
387
- return typeof code === "string" ? code : void 0;
451
+ return typeof code === "string" ? normalizeErrorCode(code) : void 0;
388
452
  }
389
453
  async function throwIfModelLoadFailed(response, model) {
390
454
  if (response.status !== 502) return;
@@ -440,7 +504,8 @@ async function handleError(response, gpu) {
440
504
  message = response.statusText;
441
505
  }
442
506
  }
443
- if (status === HTTP_ACCEPTED) {
507
+ code = response.headers.get(SIE_ERROR_CODE_HEADER) ?? normalizeErrorCode(code);
508
+ if (status === 503 && code === PROVISIONING_ERROR_CODE) {
444
509
  const retryAfter = getRetryAfter2(response);
445
510
  throw new ProvisioningError(message, gpu, retryAfter);
446
511
  }
@@ -608,45 +673,38 @@ async function withProvisioningRetry(performFetch, opts) {
608
673
  const startTime = Date.now();
609
674
  while (true) {
610
675
  const response = await performFetch();
611
- if (response.status === HTTP_ACCEPTED) {
612
- if (!opts.waitForCapacity) {
613
- throw new ProvisioningError(
614
- "No capacity available. Server is provisioning.",
615
- opts.gpu,
616
- getRetryAfter2(response)
617
- );
618
- }
619
- const elapsed = Date.now() - startTime;
620
- if (elapsed >= opts.provisionTimeoutMs) {
621
- throw new ProvisioningError(
622
- `Provisioning timeout after ${elapsed}ms`,
623
- opts.gpu,
624
- getRetryAfter2(response)
625
- );
626
- }
627
- const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
628
- await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
629
- continue;
630
- }
631
676
  await throwIfModelLoadFailed(response, opts.model);
632
677
  if (response.status === 503) {
633
678
  const errorCode = await getErrorCode(response.clone());
634
- if (errorCode === MODEL_LOADING_ERROR_CODE) {
679
+ if (errorCode === PROVISIONING_ERROR_CODE) {
680
+ if (!opts.waitForCapacity) {
681
+ throw new ProvisioningError(
682
+ "No capacity available. Server is provisioning.",
683
+ opts.gpu,
684
+ getRetryAfter2(response)
685
+ );
686
+ }
635
687
  const elapsed = Date.now() - startTime;
636
688
  if (elapsed >= opts.provisionTimeoutMs) {
637
- throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
689
+ throw new ProvisioningError(
690
+ `Provisioning timeout after ${elapsed}ms`,
691
+ opts.gpu,
692
+ getRetryAfter2(response)
693
+ );
638
694
  }
639
- const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
695
+ const retryAfter = getRetryAfter2(response);
696
+ const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
640
697
  await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
641
698
  continue;
642
699
  }
643
- if (opts.waitForCapacity) {
700
+ if (errorCode === MODEL_LOADING_ERROR_CODE) {
644
701
  const elapsed = Date.now() - startTime;
645
- if (elapsed < opts.provisionTimeoutMs) {
646
- const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
647
- await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
648
- continue;
702
+ if (elapsed >= opts.provisionTimeoutMs) {
703
+ throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
649
704
  }
705
+ const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
706
+ await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
707
+ continue;
650
708
  }
651
709
  }
652
710
  if (!response.ok) {
@@ -798,7 +856,7 @@ function extractDataPayload(block) {
798
856
  }
799
857
 
800
858
  // src/version.ts
801
- var SDK_VERSION = "0.6.4";
859
+ var SDK_VERSION = "0.6.6";
802
860
 
803
861
  // src/client.ts
804
862
  function sleep2(ms) {
@@ -819,6 +877,24 @@ function abortableSleep(ms, signal) {
819
877
  });
820
878
  }
821
879
  var _LEASE_RENEWAL_MAX_RETRIES = 5;
880
+ function isImageWireFormat(image) {
881
+ return typeof image === "object" && image !== null && "data" in image;
882
+ }
883
+ async function imageForWire(image) {
884
+ if (isImageWireFormat(image)) {
885
+ return image;
886
+ }
887
+ return toImageWireFormat(image);
888
+ }
889
+ async function itemImagesForWire(item) {
890
+ if (!item.images || item.images.length === 0) {
891
+ return item;
892
+ }
893
+ return { ...item, images: await Promise.all(item.images.map(imageForWire)) };
894
+ }
895
+ async function itemsImagesForWire(items) {
896
+ return Promise.all(items.map(itemImagesForWire));
897
+ }
822
898
  function extractChatChunkError(chunk) {
823
899
  const err = chunk.error;
824
900
  if (!err) return null;
@@ -873,8 +949,9 @@ var SIEClient = class {
873
949
  async encode(model, items, options = {}) {
874
950
  const isSingleItem = !Array.isArray(items);
875
951
  const itemsArray = isSingleItem ? [items] : items;
952
+ const itemsForWire = await itemsImagesForWire(itemsArray);
876
953
  const body = {
877
- items: itemsArray
954
+ items: itemsForWire
878
955
  };
879
956
  const params = {};
880
957
  if (options.outputTypes) {
@@ -1318,11 +1395,11 @@ var SIEClient = class {
1318
1395
  * if the consumer-supplied `extractError` returns an `SIEStreamError`, the
1319
1396
  * generator throws it instead of yielding the chunk.
1320
1397
  *
1321
- * Retry policy mirrors {@link generate}: only the SAFE pre-execution
1322
- * capacity signals — `202` (provisioning) and `503 MODEL_LOADING` — are
1323
- * retried, and only while `waitForCapacity` is set and the provision
1324
- * budget remains. Once the body opens we never retry (the call is
1325
- * non-idempotent; a mid-stream failure must not re-issue generation).
1398
+ * Retry policy mirrors {@link generate}: only explicit SAFE
1399
+ * pre-execution capacity signals — `503 PROVISIONING` and
1400
+ * `503 MODEL_LOADING` are retried while the provision budget remains.
1401
+ * Once the body opens we never retry (the call is non-idempotent; a
1402
+ * mid-stream failure must not re-issue generation).
1326
1403
  *
1327
1404
  * @internal
1328
1405
  */
@@ -1370,40 +1447,27 @@ var SIEClient = class {
1370
1447
  } finally {
1371
1448
  clearTimeout(preStreamTimeoutId);
1372
1449
  }
1373
- if (attemptResponse.status === HTTP_ACCEPTED) {
1374
- if (!waitForCapacity) {
1375
- throw new ProvisioningError(
1376
- "No capacity available. Server is provisioning.",
1377
- gpu,
1378
- getRetryAfter2(attemptResponse)
1379
- );
1380
- }
1381
- const elapsed = Date.now() - startTime;
1382
- if (elapsed >= this.provisionTimeout) {
1383
- throw new ProvisioningError(
1384
- `Provisioning timeout after ${elapsed}ms`,
1385
- gpu,
1386
- getRetryAfter2(attemptResponse)
1387
- );
1388
- }
1389
- const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
1390
- if (await abortableSleep(
1391
- Math.min(delay, this.provisionTimeout - elapsed),
1392
- controller.signal
1393
- )) {
1394
- throw new SIEConnectionError("Stream aborted while provisioning", "other");
1395
- }
1396
- continue;
1397
- }
1398
1450
  await throwIfModelLoadFailed(attemptResponse, model);
1399
1451
  if (attemptResponse.status === 503) {
1400
1452
  const errorCode = await getErrorCode(attemptResponse.clone());
1401
- if (errorCode === MODEL_LOADING_ERROR_CODE && waitForCapacity) {
1453
+ if (errorCode === PROVISIONING_ERROR_CODE) {
1454
+ if (!waitForCapacity) {
1455
+ throw new ProvisioningError(
1456
+ "No capacity available. Server is provisioning.",
1457
+ gpu,
1458
+ getRetryAfter2(attemptResponse)
1459
+ );
1460
+ }
1402
1461
  const elapsed = Date.now() - startTime;
1403
1462
  if (elapsed >= this.provisionTimeout) {
1404
- throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
1463
+ throw new ProvisioningError(
1464
+ `Provisioning timeout after ${elapsed}ms`,
1465
+ gpu,
1466
+ getRetryAfter2(attemptResponse)
1467
+ );
1405
1468
  }
1406
- const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
1469
+ const retryAfter = getRetryAfter2(attemptResponse);
1470
+ const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
1407
1471
  if (await abortableSleep(
1408
1472
  Math.min(delay, this.provisionTimeout - elapsed),
1409
1473
  controller.signal
@@ -1412,18 +1476,19 @@ var SIEClient = class {
1412
1476
  }
1413
1477
  continue;
1414
1478
  }
1415
- if (waitForCapacity) {
1479
+ if (errorCode === MODEL_LOADING_ERROR_CODE) {
1416
1480
  const elapsed = Date.now() - startTime;
1417
- if (elapsed < this.provisionTimeout) {
1418
- const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
1419
- if (await abortableSleep(
1420
- Math.min(delay, this.provisionTimeout - elapsed),
1421
- controller.signal
1422
- )) {
1423
- throw new SIEConnectionError("Stream aborted while provisioning", "other");
1424
- }
1425
- continue;
1481
+ if (elapsed >= this.provisionTimeout) {
1482
+ throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
1483
+ }
1484
+ const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
1485
+ if (await abortableSleep(
1486
+ Math.min(delay, this.provisionTimeout - elapsed),
1487
+ controller.signal
1488
+ )) {
1489
+ throw new SIEConnectionError("Stream aborted while provisioning", "other");
1426
1490
  }
1491
+ continue;
1427
1492
  }
1428
1493
  }
1429
1494
  if (attemptResponse.status !== 200) {
@@ -1473,9 +1538,11 @@ var SIEClient = class {
1473
1538
  return headers;
1474
1539
  }
1475
1540
  async score(model, query, items, options = {}) {
1541
+ const queryForWire = await itemImagesForWire(query);
1542
+ const itemsForWire = await itemsImagesForWire(items);
1476
1543
  const body = {
1477
- query,
1478
- items
1544
+ query: queryForWire,
1545
+ items: itemsForWire
1479
1546
  };
1480
1547
  const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
1481
1548
  const { pool, gpu } = this.parseGpuParam(options.gpu);
@@ -1512,8 +1579,9 @@ var SIEClient = class {
1512
1579
  async extract(model, items, options) {
1513
1580
  const isSingleItem = !Array.isArray(items);
1514
1581
  const itemsArray = isSingleItem ? [items] : items;
1582
+ const itemsForWire = await itemsImagesForWire(itemsArray);
1515
1583
  const body = {
1516
- items: itemsArray
1584
+ items: itemsForWire
1517
1585
  };
1518
1586
  const params = {
1519
1587
  labels: options.labels
@@ -1899,9 +1967,9 @@ var SIEClient = class {
1899
1967
  /**
1900
1968
  * Make a msgpack HTTP request with retry logic.
1901
1969
  *
1902
- * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
1903
- * - 202 Accepted (provisioning)
1904
- * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
1970
+ * Retried (capped by `provisionTimeout`):
1971
+ * - 503 `PROVISIONING` when `waitForCapacity: true`
1972
+ * - 503 `MODEL_LOADING` / `LORA_LOADING`
1905
1973
  * - `SIEConnectionError` with `kind === "connect"` (issue #95)
1906
1974
  *
1907
1975
  * `kind === "timeout"` is NOT retried — would extend the user-visible
@@ -1926,34 +1994,34 @@ var SIEClient = class {
1926
1994
  }
1927
1995
  throw err;
1928
1996
  }
1929
- if (response.status === HTTP_ACCEPTED) {
1930
- const retryAfter = getRetryAfter2(response);
1931
- if (!waitForCapacity) {
1932
- throw new ProvisioningError(
1933
- `No capacity available for GPU '${gpu}'. Server is provisioning.`,
1934
- gpu,
1935
- retryAfter
1936
- );
1937
- }
1938
- const elapsed = Date.now() - startTime;
1939
- if (elapsed >= this.provisionTimeout) {
1940
- throw new ProvisioningError(
1941
- `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
1942
- gpu,
1943
- retryAfter
1944
- );
1945
- }
1946
- const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1947
- const remaining = this.provisionTimeout - elapsed;
1948
- const actualDelay = Math.min(delay, remaining);
1949
- await sleep2(actualDelay);
1950
- continue;
1951
- }
1952
1997
  await throwIfModelLoadFailed(response, model);
1953
1998
  await throwIfInputTooLong(response, model);
1954
1999
  if (response.status === 503) {
1955
2000
  const clonedResponse = response.clone();
1956
2001
  const errorCode = await getErrorCode(clonedResponse);
2002
+ if (errorCode === PROVISIONING_ERROR_CODE) {
2003
+ const retryAfter = getRetryAfter2(response);
2004
+ if (!waitForCapacity) {
2005
+ throw new ProvisioningError(
2006
+ `No capacity available for GPU '${gpu}'. Server is provisioning.`,
2007
+ gpu,
2008
+ retryAfter
2009
+ );
2010
+ }
2011
+ const elapsed = Date.now() - startTime;
2012
+ if (elapsed >= this.provisionTimeout) {
2013
+ throw new ProvisioningError(
2014
+ `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
2015
+ gpu,
2016
+ retryAfter
2017
+ );
2018
+ }
2019
+ const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
2020
+ const remaining = this.provisionTimeout - elapsed;
2021
+ const actualDelay = Math.min(delay, remaining);
2022
+ await sleep2(actualDelay);
2023
+ continue;
2024
+ }
1957
2025
  if (errorCode === LORA_LOADING_ERROR_CODE) {
1958
2026
  loraRetries += 1;
1959
2027
  if (loraRetries > LORA_LOADING_MAX_RETRIES) {
@@ -1984,17 +2052,6 @@ var SIEClient = class {
1984
2052
  await sleep2(actualDelay);
1985
2053
  continue;
1986
2054
  }
1987
- if (waitForCapacity) {
1988
- const elapsed = Date.now() - startTime;
1989
- if (elapsed < this.provisionTimeout) {
1990
- const retryAfter = getRetryAfter2(response);
1991
- const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1992
- const remaining = this.provisionTimeout - elapsed;
1993
- const actualDelay = Math.min(delay, remaining);
1994
- await sleep2(actualDelay);
1995
- continue;
1996
- }
1997
- }
1998
2055
  }
1999
2056
  if (!response.ok) {
2000
2057
  await handleError(response, gpu);
@@ -2221,58 +2278,6 @@ function maxsimBatch(queries, documents) {
2221
2278
  return scores;
2222
2279
  }
2223
2280
 
2224
- // src/images.ts
2225
- async function toImageBytes(input) {
2226
- if (input instanceof Uint8Array) {
2227
- return input;
2228
- }
2229
- if (input instanceof ArrayBuffer) {
2230
- return new Uint8Array(input);
2231
- }
2232
- if (typeof Blob !== "undefined" && input instanceof Blob) {
2233
- const buffer = await input.arrayBuffer();
2234
- return new Uint8Array(buffer);
2235
- }
2236
- if (typeof input === "string") {
2237
- const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
2238
- if (dataUrlMatch?.[1]) {
2239
- return base64ToBytes(dataUrlMatch[1]);
2240
- }
2241
- return base64ToBytes(input);
2242
- }
2243
- throw new Error(`Unsupported image input type: ${typeof input}`);
2244
- }
2245
- function base64ToBytes(base64) {
2246
- if (typeof atob === "function") {
2247
- const binary = atob(base64);
2248
- const bytes = new Uint8Array(binary.length);
2249
- for (let i = 0; i < binary.length; i++) {
2250
- bytes[i] = binary.charCodeAt(i);
2251
- }
2252
- return bytes;
2253
- }
2254
- return new Uint8Array(Buffer.from(base64, "base64"));
2255
- }
2256
- async function toImageWireFormat(input, format = "jpeg") {
2257
- const data = await toImageBytes(input);
2258
- return { data, format };
2259
- }
2260
- function detectImageFormat(bytes) {
2261
- if (bytes.length < 4) {
2262
- return "unknown";
2263
- }
2264
- if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
2265
- return "jpeg";
2266
- }
2267
- if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
2268
- return "png";
2269
- }
2270
- if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
2271
- return "webp";
2272
- }
2273
- return "unknown";
2274
- }
2275
-
2276
2281
  exports.InputTooLongError = InputTooLongError;
2277
2282
  exports.LoraLoadingError = LoraLoadingError;
2278
2283
  exports.ModelLoadFailedError = ModelLoadFailedError;