npm - @superlinked/sie-sdk - Versions diffs - 0.6.4 → 0.6.6 - Mend

@superlinked/sie-sdk 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -134,10 +134,61 @@ var InputTooLongError = class extends RequestError {
   }
 };
+// src/images.ts
+async function toImageBytes(input) {
+  if (input instanceof Uint8Array) {
+    return input;
+  }
+  if (input instanceof ArrayBuffer) {
+    return new Uint8Array(input);
+  }
+  if (typeof Blob !== "undefined" && input instanceof Blob) {
+    const buffer = await input.arrayBuffer();
+    return new Uint8Array(buffer);
+  }
+  if (typeof input === "string") {
+    const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
+    if (dataUrlMatch?.[1]) {
+      return base64ToBytes(dataUrlMatch[1]);
+    }
+    return base64ToBytes(input);
+  }
+  throw new Error(`Unsupported image input type: ${typeof input}`);
+}
+function base64ToBytes(base64) {
+  if (typeof atob === "function") {
+    const binary = atob(base64);
+    const bytes = new Uint8Array(binary.length);
+    for (let i = 0; i < binary.length; i++) {
+      bytes[i] = binary.charCodeAt(i);
+    }
+    return bytes;
+  }
+  return new Uint8Array(Buffer.from(base64, "base64"));
+}
+async function toImageWireFormat(input, format = "jpeg") {
+  const data = await toImageBytes(input);
+  return { data, format };
+}
+function detectImageFormat(bytes) {
+  if (bytes.length < 4) {
+    return "unknown";
+  }
+  if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
+    return "jpeg";
+  }
+  if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
+    return "png";
+  }
+  if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
+    return "webp";
+  }
+  return "unknown";
+}
 // src/internal/constants.ts
 var MSGPACK_CONTENT_TYPE = "application/msgpack";
 var JSON_CONTENT_TYPE = "application/json";
-var HTTP_ACCEPTED = 202;
 var HTTP_CLIENT_ERROR_MIN = 400;
 var HTTP_CLIENT_ERROR_MAX = 499;
 var HTTP_SERVER_ERROR_MIN = 500;
@@ -151,6 +202,7 @@ var LORA_LOADING_DEFAULT_DELAY = 1e3;
 var LORA_LOADING_ERROR_CODE = "LORA_LOADING";
 var MODEL_LOADING_DEFAULT_DELAY = 5e3;
 var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
+var PROVISIONING_ERROR_CODE = "PROVISIONING";
 var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
 var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
 var EXT_TYPE_NUMPY = 78;
@@ -338,6 +390,11 @@ function unpackMessage(data) {
 }
 // src/internal/retry.ts
+var RETRY_JITTER_FRACTION = 0.25;
+function applyRetryJitter(delay) {
+  const low = delay * (1 - RETRY_JITTER_FRACTION);
+  return Math.max(0, low + Math.random() * (delay - low));
+}
 function getRetryAfter(header) {
   if (!header) return void 0;
   const seconds = Number.parseInt(header, 10);
@@ -353,6 +410,11 @@ function getRetryAfter(header) {
 }
 // src/internal/parsing.ts
+var SIE_ERROR_CODE_HEADER = "X-SIE-Error-Code";
+function normalizeErrorCode(code) {
+  if (code === "provisioning") return PROVISIONING_ERROR_CODE;
+  return code;
+}
 function getRetryAfter2(response) {
   const header = response.headers.get("Retry-After");
   return getRetryAfter(header);
@@ -381,10 +443,12 @@ async function getErrorDetail(response) {
   return void 0;
 }
 async function getErrorCode(response) {
+  const headerCode = response.headers.get(SIE_ERROR_CODE_HEADER);
+  if (headerCode) return headerCode;
   const detail = await getErrorDetail(response);
   if (!detail) return void 0;
   const code = detail.code;
-  return typeof code === "string" ? code : void 0;
+  return typeof code === "string" ? normalizeErrorCode(code) : void 0;
 }
 async function throwIfModelLoadFailed(response, model) {
   if (response.status !== 502) return;
@@ -440,7 +504,8 @@ async function handleError(response, gpu) {
       message = response.statusText;
     }
   }
-  if (status === HTTP_ACCEPTED) {
+  code = response.headers.get(SIE_ERROR_CODE_HEADER) ?? normalizeErrorCode(code);
+  if (status === 503 && code === PROVISIONING_ERROR_CODE) {
     const retryAfter = getRetryAfter2(response);
     throw new ProvisioningError(message, gpu, retryAfter);
   }
@@ -608,45 +673,38 @@ async function withProvisioningRetry(performFetch, opts) {
   const startTime = Date.now();
   while (true) {
     const response = await performFetch();
-    if (response.status === HTTP_ACCEPTED) {
-      if (!opts.waitForCapacity) {
-        throw new ProvisioningError(
-          "No capacity available. Server is provisioning.",
-          opts.gpu,
-          getRetryAfter2(response)
-        );
-      }
-      const elapsed = Date.now() - startTime;
-      if (elapsed >= opts.provisionTimeoutMs) {
-        throw new ProvisioningError(
-          `Provisioning timeout after ${elapsed}ms`,
-          opts.gpu,
-          getRetryAfter2(response)
-        );
-      }
-      const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
-      await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
-      continue;
-    }
     await throwIfModelLoadFailed(response, opts.model);
     if (response.status === 503) {
       const errorCode = await getErrorCode(response.clone());
-      if (errorCode === MODEL_LOADING_ERROR_CODE) {
+      if (errorCode === PROVISIONING_ERROR_CODE) {
+        if (!opts.waitForCapacity) {
+          throw new ProvisioningError(
+            "No capacity available. Server is provisioning.",
+            opts.gpu,
+            getRetryAfter2(response)
+          );
+        }
         const elapsed = Date.now() - startTime;
         if (elapsed >= opts.provisionTimeoutMs) {
-          throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
+          throw new ProvisioningError(
+            `Provisioning timeout after ${elapsed}ms`,
+            opts.gpu,
+            getRetryAfter2(response)
+          );
         }
-        const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
+        const retryAfter = getRetryAfter2(response);
+        const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
         await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
         continue;
       }
-      if (opts.waitForCapacity) {
+      if (errorCode === MODEL_LOADING_ERROR_CODE) {
         const elapsed = Date.now() - startTime;
-        if (elapsed < opts.provisionTimeoutMs) {
-          const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
-          await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
-          continue;
+        if (elapsed >= opts.provisionTimeoutMs) {
+          throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
         }
+        const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
+        await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
+        continue;
       }
     }
     if (!response.ok) {
@@ -798,7 +856,7 @@ function extractDataPayload(block) {
 }
 // src/version.ts
-var SDK_VERSION = "0.6.4";
+var SDK_VERSION = "0.6.6";
 // src/client.ts
 function sleep2(ms) {
@@ -819,6 +877,24 @@ function abortableSleep(ms, signal) {
   });
 }
 var _LEASE_RENEWAL_MAX_RETRIES = 5;
+function isImageWireFormat(image) {
+  return typeof image === "object" && image !== null && "data" in image;
+}
+async function imageForWire(image) {
+  if (isImageWireFormat(image)) {
+    return image;
+  }
+  return toImageWireFormat(image);
+}
+async function itemImagesForWire(item) {
+  if (!item.images || item.images.length === 0) {
+    return item;
+  }
+  return { ...item, images: await Promise.all(item.images.map(imageForWire)) };
+}
+async function itemsImagesForWire(items) {
+  return Promise.all(items.map(itemImagesForWire));
+}
 function extractChatChunkError(chunk) {
   const err = chunk.error;
   if (!err) return null;
@@ -873,8 +949,9 @@ var SIEClient = class {
   async encode(model, items, options = {}) {
     const isSingleItem = !Array.isArray(items);
     const itemsArray = isSingleItem ? [items] : items;
+    const itemsForWire = await itemsImagesForWire(itemsArray);
     const body = {
-      items: itemsArray
+      items: itemsForWire
     };
     const params = {};
     if (options.outputTypes) {
@@ -1318,11 +1395,11 @@ var SIEClient = class {
    * if the consumer-supplied `extractError` returns an `SIEStreamError`, the
    * generator throws it instead of yielding the chunk.
    *
-   * Retry policy mirrors {@link generate}: only the SAFE pre-execution
-   * capacity signals — `202` (provisioning) and `503 MODEL_LOADING` — are
-   * retried, and only while `waitForCapacity` is set and the provision
-   * budget remains. Once the body opens we never retry (the call is
-   * non-idempotent; a mid-stream failure must not re-issue generation).
+   * Retry policy mirrors {@link generate}: only explicit SAFE
+   * pre-execution capacity signals — `503 PROVISIONING` and
+   * `503 MODEL_LOADING` — are retried while the provision budget remains.
+   * Once the body opens we never retry (the call is non-idempotent; a
+   * mid-stream failure must not re-issue generation).
    *
    * @internal
    */
@@ -1370,40 +1447,27 @@ var SIEClient = class {
         } finally {
           clearTimeout(preStreamTimeoutId);
         }
-        if (attemptResponse.status === HTTP_ACCEPTED) {
-          if (!waitForCapacity) {
-            throw new ProvisioningError(
-              "No capacity available. Server is provisioning.",
-              gpu,
-              getRetryAfter2(attemptResponse)
-            );
-          }
-          const elapsed = Date.now() - startTime;
-          if (elapsed >= this.provisionTimeout) {
-            throw new ProvisioningError(
-              `Provisioning timeout after ${elapsed}ms`,
-              gpu,
-              getRetryAfter2(attemptResponse)
-            );
-          }
-          const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
-          if (await abortableSleep(
-            Math.min(delay, this.provisionTimeout - elapsed),
-            controller.signal
-          )) {
-            throw new SIEConnectionError("Stream aborted while provisioning", "other");
-          }
-          continue;
-        }
         await throwIfModelLoadFailed(attemptResponse, model);
         if (attemptResponse.status === 503) {
           const errorCode = await getErrorCode(attemptResponse.clone());
-          if (errorCode === MODEL_LOADING_ERROR_CODE && waitForCapacity) {
+          if (errorCode === PROVISIONING_ERROR_CODE) {
+            if (!waitForCapacity) {
+              throw new ProvisioningError(
+                "No capacity available. Server is provisioning.",
+                gpu,
+                getRetryAfter2(attemptResponse)
+              );
+            }
             const elapsed = Date.now() - startTime;
             if (elapsed >= this.provisionTimeout) {
-              throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
+              throw new ProvisioningError(
+                `Provisioning timeout after ${elapsed}ms`,
+                gpu,
+                getRetryAfter2(attemptResponse)
+              );
             }
-            const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
+            const retryAfter = getRetryAfter2(attemptResponse);
+            const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
             if (await abortableSleep(
               Math.min(delay, this.provisionTimeout - elapsed),
               controller.signal
@@ -1412,18 +1476,19 @@ var SIEClient = class {
             }
             continue;
           }
-          if (waitForCapacity) {
+          if (errorCode === MODEL_LOADING_ERROR_CODE) {
             const elapsed = Date.now() - startTime;
-            if (elapsed < this.provisionTimeout) {
-              const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
-              if (await abortableSleep(
-                Math.min(delay, this.provisionTimeout - elapsed),
-                controller.signal
-              )) {
-                throw new SIEConnectionError("Stream aborted while provisioning", "other");
-              }
-              continue;
+            if (elapsed >= this.provisionTimeout) {
+              throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
+            }
+            const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
+            if (await abortableSleep(
+              Math.min(delay, this.provisionTimeout - elapsed),
+              controller.signal
+            )) {
+              throw new SIEConnectionError("Stream aborted while provisioning", "other");
             }
+            continue;
           }
         }
         if (attemptResponse.status !== 200) {
@@ -1473,9 +1538,11 @@ var SIEClient = class {
     return headers;
   }
   async score(model, query, items, options = {}) {
+    const queryForWire = await itemImagesForWire(query);
+    const itemsForWire = await itemsImagesForWire(items);
     const body = {
-      query,
-      items
+      query: queryForWire,
+      items: itemsForWire
     };
     const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
     const { pool, gpu } = this.parseGpuParam(options.gpu);
@@ -1512,8 +1579,9 @@ var SIEClient = class {
   async extract(model, items, options) {
     const isSingleItem = !Array.isArray(items);
     const itemsArray = isSingleItem ? [items] : items;
+    const itemsForWire = await itemsImagesForWire(itemsArray);
     const body = {
-      items: itemsArray
+      items: itemsForWire
     };
     const params = {
       labels: options.labels
@@ -1899,9 +1967,9 @@ var SIEClient = class {
   /**
    * Make a msgpack HTTP request with retry logic.
    *
-   * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
-   *  - 202 Accepted (provisioning)
-   *  - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
+   * Retried (capped by `provisionTimeout`):
+   *  - 503 `PROVISIONING` when `waitForCapacity: true`
+   *  - 503 `MODEL_LOADING` / `LORA_LOADING`
    *  - `SIEConnectionError` with `kind === "connect"` (issue #95)
    *
    * `kind === "timeout"` is NOT retried — would extend the user-visible
@@ -1926,34 +1994,34 @@ var SIEClient = class {
         }
         throw err;
       }
-      if (response.status === HTTP_ACCEPTED) {
-        const retryAfter = getRetryAfter2(response);
-        if (!waitForCapacity) {
-          throw new ProvisioningError(
-            `No capacity available for GPU '${gpu}'. Server is provisioning.`,
-            gpu,
-            retryAfter
-          );
-        }
-        const elapsed = Date.now() - startTime;
-        if (elapsed >= this.provisionTimeout) {
-          throw new ProvisioningError(
-            `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
-            gpu,
-            retryAfter
-          );
-        }
-        const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
-        const remaining = this.provisionTimeout - elapsed;
-        const actualDelay = Math.min(delay, remaining);
-        await sleep2(actualDelay);
-        continue;
-      }
       await throwIfModelLoadFailed(response, model);
       await throwIfInputTooLong(response, model);
       if (response.status === 503) {
         const clonedResponse = response.clone();
         const errorCode = await getErrorCode(clonedResponse);
+        if (errorCode === PROVISIONING_ERROR_CODE) {
+          const retryAfter = getRetryAfter2(response);
+          if (!waitForCapacity) {
+            throw new ProvisioningError(
+              `No capacity available for GPU '${gpu}'. Server is provisioning.`,
+              gpu,
+              retryAfter
+            );
+          }
+          const elapsed = Date.now() - startTime;
+          if (elapsed >= this.provisionTimeout) {
+            throw new ProvisioningError(
+              `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
+              gpu,
+              retryAfter
+            );
+          }
+          const delay = retryAfter ?? applyRetryJitter(DEFAULT_RETRY_DELAY);
+          const remaining = this.provisionTimeout - elapsed;
+          const actualDelay = Math.min(delay, remaining);
+          await sleep2(actualDelay);
+          continue;
+        }
         if (errorCode === LORA_LOADING_ERROR_CODE) {
           loraRetries += 1;
           if (loraRetries > LORA_LOADING_MAX_RETRIES) {
@@ -1984,17 +2052,6 @@ var SIEClient = class {
           await sleep2(actualDelay);
           continue;
         }
-        if (waitForCapacity) {
-          const elapsed = Date.now() - startTime;
-          if (elapsed < this.provisionTimeout) {
-            const retryAfter = getRetryAfter2(response);
-            const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
-            const remaining = this.provisionTimeout - elapsed;
-            const actualDelay = Math.min(delay, remaining);
-            await sleep2(actualDelay);
-            continue;
-          }
-        }
       }
       if (!response.ok) {
         await handleError(response, gpu);
@@ -2221,58 +2278,6 @@ function maxsimBatch(queries, documents) {
   return scores;
 }
-// src/images.ts
-async function toImageBytes(input) {
-  if (input instanceof Uint8Array) {
-    return input;
-  }
-  if (input instanceof ArrayBuffer) {
-    return new Uint8Array(input);
-  }
-  if (typeof Blob !== "undefined" && input instanceof Blob) {
-    const buffer = await input.arrayBuffer();
-    return new Uint8Array(buffer);
-  }
-  if (typeof input === "string") {
-    const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
-    if (dataUrlMatch?.[1]) {
-      return base64ToBytes(dataUrlMatch[1]);
-    }
-    return base64ToBytes(input);
-  }
-  throw new Error(`Unsupported image input type: ${typeof input}`);
-}
-function base64ToBytes(base64) {
-  if (typeof atob === "function") {
-    const binary = atob(base64);
-    const bytes = new Uint8Array(binary.length);
-    for (let i = 0; i < binary.length; i++) {
-      bytes[i] = binary.charCodeAt(i);
-    }
-    return bytes;
-  }
-  return new Uint8Array(Buffer.from(base64, "base64"));
-}
-async function toImageWireFormat(input, format = "jpeg") {
-  const data = await toImageBytes(input);
-  return { data, format };
-}
-function detectImageFormat(bytes) {
-  if (bytes.length < 4) {
-    return "unknown";
-  }
-  if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
-    return "jpeg";
-  }
-  if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
-    return "png";
-  }
-  if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
-    return "webp";
-  }
-  return "unknown";
-}
 exports.InputTooLongError = InputTooLongError;
 exports.LoraLoadingError = LoraLoadingError;
 exports.ModelLoadFailedError = ModelLoadFailedError;