npm - @superlinked/sie-sdk - Versions diffs - 0.1.8 - Mend

@superlinked/sie-sdk 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/index.cjs ADDED Viewed

@@ -0,0 +1,1406 @@
+'use strict';
+var msgpack = require('@msgpack/msgpack');
+// src/errors.ts
+var SIEError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "SIEError";
+    Object.setPrototypeOf(this, new.target.prototype);
+  }
+};
+var SIEConnectionError = class extends SIEError {
+  constructor(message) {
+    super(message);
+    this.name = "SIEConnectionError";
+  }
+};
+var RequestError = class extends SIEError {
+  /** Error code from the server (e.g., "INVALID_MODEL", "VALIDATION_ERROR") */
+  code;
+  /** HTTP status code (400-499) */
+  statusCode;
+  constructor(message, code, statusCode) {
+    super(message);
+    this.name = "RequestError";
+    this.code = code;
+    this.statusCode = statusCode;
+  }
+};
+var ServerError = class extends SIEError {
+  /** Error code from the server (e.g., "INTERNAL_ERROR", "LORA_LOADING") */
+  code;
+  /** HTTP status code (500-599) */
+  statusCode;
+  constructor(message, code, statusCode) {
+    super(message);
+    this.name = "ServerError";
+    this.code = code;
+    this.statusCode = statusCode;
+  }
+};
+var ProvisioningError = class extends SIEError {
+  /** The GPU type that was requested */
+  gpu;
+  /** Suggested retry delay in milliseconds (from server Retry-After header) */
+  retryAfter;
+  constructor(message, gpu, retryAfter) {
+    super(message);
+    this.name = "ProvisioningError";
+    this.gpu = gpu;
+    this.retryAfter = retryAfter;
+  }
+};
+var PoolError = class extends SIEError {
+  /** Name of the pool */
+  poolName;
+  /** Current pool state (if known): "pending", "active", "expired" */
+  state;
+  constructor(message, poolName, state) {
+    super(message);
+    this.name = "PoolError";
+    this.poolName = poolName;
+    this.state = state;
+  }
+};
+var LoraLoadingError = class extends SIEError {
+  /** The LoRA adapter that was requested */
+  lora;
+  /** The model the LoRA was requested for */
+  model;
+  constructor(message, lora, model) {
+    super(message);
+    this.name = "LoraLoadingError";
+    this.lora = lora;
+    this.model = model;
+  }
+};
+var ModelLoadingError = class extends SIEError {
+  /** The model that was requested */
+  model;
+  constructor(message, model) {
+    super(message);
+    this.name = "ModelLoadingError";
+    this.model = model;
+  }
+};
+// src/internal/constants.ts
+var MSGPACK_CONTENT_TYPE = "application/msgpack";
+var JSON_CONTENT_TYPE = "application/json";
+var HTTP_ACCEPTED = 202;
+var HTTP_CLIENT_ERROR_MIN = 400;
+var HTTP_CLIENT_ERROR_MAX = 499;
+var HTTP_SERVER_ERROR_MIN = 500;
+var HTTP_SERVER_ERROR_MAX = 599;
+var DEFAULT_TIMEOUT = 3e4;
+var DEFAULT_PROVISION_TIMEOUT = 3e5;
+var DEFAULT_RETRY_DELAY = 5e3;
+var DEFAULT_LEASE_RENEWAL_INTERVAL = 6e4;
+var LORA_LOADING_MAX_RETRIES = 10;
+var LORA_LOADING_DEFAULT_DELAY = 1e3;
+var LORA_LOADING_ERROR_CODE = "LORA_LOADING";
+var MODEL_LOADING_DEFAULT_DELAY = 5e3;
+var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
+var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
+var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
+// src/version.ts
+var SDK_VERSION = "0.1.8";
+var EXT_TYPE_NUMPY = 78;
+function parseDtype(dtype) {
+  const typeChar = dtype.slice(-2, -1);
+  const sizeChar = dtype.slice(-1);
+  const size = Number.parseInt(sizeChar, 10);
+  switch (`${typeChar}${size}`) {
+    case "f4":
+      return { size: 4, construct: (buf) => new Float32Array(buf) };
+    case "f8":
+      return { size: 8, construct: (buf) => new Float64Array(buf) };
+    case "f2":
+      return {
+        size: 2,
+        construct: (buf) => {
+          const float16 = new Uint16Array(buf);
+          const float32 = new Float32Array(float16.length);
+          for (let i = 0; i < float16.length; i++) {
+            float32[i] = float16ToFloat32(float16[i] ?? 0);
+          }
+          return float32;
+        }
+      };
+    case "i4":
+      return { size: 4, construct: (buf) => new Int32Array(buf) };
+    case "i2":
+      return { size: 2, construct: (buf) => new Int16Array(buf) };
+    case "i1":
+      return { size: 1, construct: (buf) => new Int8Array(buf) };
+    case "u1":
+      return { size: 1, construct: (buf) => new Uint8Array(buf) };
+    default:
+      throw new Error(`Unsupported numpy dtype: ${dtype}`);
+  }
+}
+function float16ToFloat32(h) {
+  const sign = h >>> 15 & 1;
+  const exp = h >>> 10 & 31;
+  const frac = h & 1023;
+  if (exp === 0) {
+    if (frac === 0) {
+      return sign ? -0 : 0;
+    }
+    const f = frac / 1024;
+    return (sign ? -1 : 1) * f * 2 ** -14;
+  }
+  if (exp === 31) {
+    return frac === 0 ? sign ? Number.NEGATIVE_INFINITY : Number.POSITIVE_INFINITY : Number.NaN;
+  }
+  return (sign ? -1 : 1) * (1 + frac / 1024) * 2 ** (exp - 15);
+}
+function decodeNumpyArray(data) {
+  let dtypeEnd = 0;
+  while (dtypeEnd < data.length && data[dtypeEnd] !== 124) {
+    dtypeEnd++;
+  }
+  const dtypeBytes = data.slice(0, dtypeEnd);
+  const dtype = new TextDecoder().decode(dtypeBytes);
+  let shapeEnd = dtypeEnd + 1;
+  while (shapeEnd < data.length && data[shapeEnd] !== 124) {
+    shapeEnd++;
+  }
+  const shapeBytes = data.slice(dtypeEnd + 1, shapeEnd);
+  const shapeStr = new TextDecoder().decode(shapeBytes);
+  const shape = shapeStr.length > 0 ? shapeStr.split(",").map((s) => Number.parseInt(s, 10)) : [];
+  const arrayData = data.slice(shapeEnd + 1);
+  const { size, construct } = parseDtype(dtype);
+  const totalElements = shape.length > 0 ? shape.reduce((a, b) => a * b, 1) : arrayData.length / size;
+  const buffer = new ArrayBuffer(totalElements * size);
+  new Uint8Array(buffer).set(arrayData.slice(0, totalElements * size));
+  return construct(buffer);
+}
+function encodeNumpyArray(arr) {
+  let dtype;
+  if (arr instanceof Float32Array) {
+    dtype = "<f4";
+  } else if (arr instanceof Int32Array) {
+    dtype = "<i4";
+  } else {
+    throw new Error("Unsupported TypedArray type");
+  }
+  const dtypeBytes = new TextEncoder().encode(dtype);
+  const shapeBytes = new TextEncoder().encode(arr.length.toString());
+  const separator = new Uint8Array([124]);
+  const dataBytes = new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength);
+  const result = new Uint8Array(dtypeBytes.length + 1 + shapeBytes.length + 1 + dataBytes.length);
+  let offset = 0;
+  result.set(dtypeBytes, offset);
+  offset += dtypeBytes.length;
+  result.set(separator, offset);
+  offset += 1;
+  result.set(shapeBytes, offset);
+  offset += shapeBytes.length;
+  result.set(separator, offset);
+  offset += 1;
+  result.set(dataBytes, offset);
+  return result;
+}
+function createExtensionCodec() {
+  const codec = new msgpack.ExtensionCodec();
+  codec.register({
+    type: EXT_TYPE_NUMPY,
+    encode: (value) => {
+      if (value instanceof Float32Array || value instanceof Int32Array) {
+        return encodeNumpyArray(value);
+      }
+      return null;
+    },
+    decode: (data) => {
+      return decodeNumpyArray(data);
+    }
+  });
+  return codec;
+}
+var extensionCodec = createExtensionCodec();
+function packMessage(data) {
+  return msgpack.encode(data, { extensionCodec });
+}
+function isNumpyArrayMap(obj) {
+  if (typeof obj !== "object" || obj === null) {
+    return false;
+  }
+  const map = obj;
+  return map.nd === true && typeof map.type === "string" && Array.isArray(map.shape) && map.data instanceof Uint8Array;
+}
+function convertNumpyArrayMap(map) {
+  const dtype = map.type;
+  const arrayData = map.data;
+  const { size, construct } = parseDtype(dtype);
+  if (map.shape.length === 2 && map.shape[0] !== void 0 && map.shape[1] !== void 0) {
+    const numRows = map.shape[0];
+    const numCols = map.shape[1];
+    const result = [];
+    for (let row = 0; row < numRows; row++) {
+      const offset = row * numCols * size;
+      const buffer2 = new ArrayBuffer(numCols * size);
+      new Uint8Array(buffer2).set(arrayData.slice(offset, offset + numCols * size));
+      result.push(construct(buffer2));
+    }
+    return result;
+  }
+  const totalElements = map.shape.length > 0 ? map.shape.reduce((a, b) => a * b, 1) : arrayData.length / size;
+  const buffer = new ArrayBuffer(totalElements * size);
+  new Uint8Array(buffer).set(arrayData.slice(0, totalElements * size));
+  return construct(buffer);
+}
+function convertNumpyArrays(obj) {
+  if (obj === null || obj === void 0) {
+    return obj;
+  }
+  if (isNumpyArrayMap(obj)) {
+    return convertNumpyArrayMap(obj);
+  }
+  if (Array.isArray(obj)) {
+    return obj.map((item) => convertNumpyArrays(item));
+  }
+  if (ArrayBuffer.isView(obj)) {
+    return obj;
+  }
+  if (typeof obj === "object") {
+    const result = {};
+    for (const [key, value] of Object.entries(obj)) {
+      result[key] = convertNumpyArrays(value);
+    }
+    return result;
+  }
+  return obj;
+}
+function unpackMessage(data) {
+  const decoded = msgpack.decode(data, {
+    extensionCodec,
+    // Convert byte string keys (Uint8Array) to text strings
+    mapKeyConverter: (key) => {
+      if (typeof key === "string" || typeof key === "number") {
+        return key;
+      }
+      if (key instanceof Uint8Array) {
+        return new TextDecoder().decode(key);
+      }
+      return JSON.stringify(key);
+    }
+  });
+  return convertNumpyArrays(decoded);
+}
+// src/internal/retry.ts
+function getRetryAfter(header) {
+  if (!header) return void 0;
+  const seconds = Number.parseInt(header, 10);
+  if (!Number.isNaN(seconds) && seconds > 0) {
+    return seconds * 1e3;
+  }
+  const date = new Date(header);
+  if (!Number.isNaN(date.getTime())) {
+    const delay = date.getTime() - Date.now();
+    return delay > 0 ? delay : void 0;
+  }
+  return void 0;
+}
+// src/internal/parsing.ts
+function getRetryAfter2(response) {
+  const header = response.headers.get("Retry-After");
+  return getRetryAfter(header);
+}
+async function getErrorCode(response) {
+  try {
+    const contentType = response.headers.get("content-type") ?? "";
+    let data;
+    if (contentType.includes(MSGPACK_CONTENT_TYPE)) {
+      const buffer = await response.arrayBuffer();
+      data = unpackMessage(new Uint8Array(buffer));
+    } else {
+      data = await response.json();
+    }
+    if (data.error && typeof data.error === "object") {
+      const error = data.error;
+      if (typeof error.code === "string") {
+        return error.code;
+      }
+    }
+    if (data.detail && typeof data.detail === "object") {
+      const detail = data.detail;
+      if (typeof detail.code === "string") {
+        return detail.code;
+      }
+    }
+    if (typeof data.code === "string") {
+      return data.code;
+    }
+  } catch {
+  }
+  return void 0;
+}
+async function handleError(response, gpu) {
+  const { status } = response;
+  let errorBody = {};
+  try {
+    errorBody = await response.json();
+  } catch {
+  }
+  const code = errorBody.code ?? "UNKNOWN";
+  const message = errorBody.detail ?? response.statusText;
+  if (status === HTTP_ACCEPTED) {
+    const retryAfter = response.headers.get("Retry-After");
+    throw new ProvisioningError(
+      message,
+      gpu,
+      retryAfter ? Number.parseInt(retryAfter, 10) * 1e3 : void 0
+    );
+  }
+  if (status >= HTTP_CLIENT_ERROR_MIN && status <= HTTP_CLIENT_ERROR_MAX) {
+    throw new RequestError(message, code, status);
+  }
+  if (status >= HTTP_SERVER_ERROR_MIN && status <= HTTP_SERVER_ERROR_MAX) {
+    throw new ServerError(message, code, status);
+  }
+  throw new ServerError(message, code, status);
+}
+function parseEncodeResult(data) {
+  const result = {};
+  if (data.id !== void 0) {
+    result.id = data.id;
+  }
+  if (data.dense) {
+    result.dense = data.dense.values;
+  }
+  if (data.sparse) {
+    result.sparse = {
+      indices: data.sparse.indices,
+      values: data.sparse.values
+    };
+  }
+  if (data.multivector) {
+    result.multivector = data.multivector.values;
+  }
+  if (data.timing) {
+    result.timing = {
+      totalMs: data.timing.total_ms,
+      queueMs: data.timing.queue_ms,
+      tokenizationMs: data.timing.tokenization_ms,
+      inferenceMs: data.timing.inference_ms
+    };
+  }
+  return result;
+}
+function parseEncodeResults(data) {
+  return data.map(parseEncodeResult);
+}
+function parseScoreEntry(data) {
+  return {
+    itemId: data.item_id,
+    score: data.score,
+    rank: data.rank
+  };
+}
+function parseScoreResult(data) {
+  const wire = data;
+  return {
+    model: wire.model,
+    queryId: wire.query_id,
+    scores: wire.scores.map(parseScoreEntry)
+  };
+}
+function parseEntity(data) {
+  return {
+    text: data.text,
+    label: data.label,
+    score: data.score,
+    start: data.start,
+    end: data.end,
+    bbox: data.bbox
+  };
+}
+function parseExtractResult(data) {
+  return {
+    id: data.id,
+    entities: data.entities.map(parseEntity)
+  };
+}
+function parseExtractResults(data) {
+  return data.map(parseExtractResult);
+}
+function parseCapacityInfo(data, gpuFilter) {
+  const wire = data;
+  let workers = wire.workers ?? [];
+  if (gpuFilter) {
+    const gpuLower = gpuFilter.toLowerCase();
+    workers = workers.filter((w) => w.gpu.toLowerCase() === gpuLower);
+  }
+  const parsedWorkers = workers.map((w) => ({
+    url: w.url,
+    gpu: w.gpu,
+    healthy: w.healthy,
+    queueDepth: w.queue_depth,
+    loadedModels: w.loaded_models
+  }));
+  return {
+    status: wire.status,
+    workerCount: gpuFilter ? parsedWorkers.length : wire.cluster?.worker_count ?? 0,
+    gpuCount: wire.cluster?.gpu_count ?? 0,
+    modelsLoaded: wire.cluster?.models_loaded ?? 0,
+    configuredGpuTypes: wire.configured_gpu_types ?? [],
+    liveGpuTypes: wire.live_gpu_types ?? [],
+    workers: parsedWorkers
+  };
+}
+// src/client.ts
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+function abortableSleep(ms, signal) {
+  if (signal.aborted) return Promise.resolve(true);
+  return new Promise((resolve) => {
+    const onAbort = () => {
+      clearTimeout(timeoutId);
+      resolve(true);
+    };
+    const timeoutId = setTimeout(() => {
+      signal.removeEventListener("abort", onAbort);
+      resolve(false);
+    }, ms);
+    signal.addEventListener("abort", onAbort, { once: true });
+  });
+}
+var _LEASE_RENEWAL_MAX_RETRIES = 5;
+var SIEClient = class {
+  baseUrl;
+  timeout;
+  gpu;
+  apiKey;
+  defaultWaitForCapacity;
+  provisionTimeout;
+  // Pool state: track created pools and their lease renewal scheduling
+  pools = /* @__PURE__ */ new Map();
+  // Version negotiation state
+  versionWarningLogged = false;
+  // Note: LoRA and model loading retry counters are now local to each method
+  // to avoid interference between concurrent requests
+  /**
+   * Create a new SIE client.
+   *
+   * @param baseUrl - Base URL of the SIE server (e.g., "http://localhost:8080")
+   * @param options - Client options
+   */
+  constructor(baseUrl, options = {}) {
+    this.baseUrl = baseUrl.replace(/\/$/, "");
+    this.timeout = options.timeout ?? DEFAULT_TIMEOUT;
+    this.gpu = options.gpu;
+    this.apiKey = options.apiKey;
+    this.defaultWaitForCapacity = options.waitForCapacity ?? false;
+    this.provisionTimeout = options.provisionTimeout ?? DEFAULT_PROVISION_TIMEOUT;
+  }
+  /**
+   * Get the base URL of the SIE server.
+   *
+   * @returns The normalized base URL (without trailing slash)
+   */
+  getBaseUrl() {
+    return this.baseUrl;
+  }
+  /**
+   * Encode one or more items.
+   */
+  async encode(model, items, options = {}) {
+    const isSingleItem = !Array.isArray(items);
+    const itemsArray = isSingleItem ? [items] : items;
+    const body = {
+      items: itemsArray
+    };
+    const params = {};
+    if (options.outputTypes) {
+      params.output_types = options.outputTypes;
+    }
+    if (options.instruction !== void 0) {
+      params.instruction = options.instruction;
+    }
+    if (options.isQuery !== void 0) {
+      params.is_query = options.isQuery;
+    }
+    if (options.outputDtype !== void 0) {
+      params.output_dtype = options.outputDtype;
+    }
+    if (Object.keys(params).length > 0) {
+      body.params = params;
+    }
+    const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
+    const { pool, gpu } = this.parseGpuParam(options.gpu);
+    const response = await this.requestWithRetry(
+      `/v1/encode/${encodeURIComponent(model)}`,
+      body,
+      pool,
+      gpu,
+      waitForCapacity,
+      model
+    );
+    const data = unpackMessage(new Uint8Array(await response.arrayBuffer()));
+    const results = parseEncodeResults(data.items);
+    if (isSingleItem) {
+      const first = results[0];
+      if (!first) {
+        throw new Error("No results returned from encode");
+      }
+      return first;
+    }
+    return results;
+  }
+  /**
+   * List available models.
+   *
+   * @returns Array of model information
+   */
+  async listModels() {
+    const response = await this.requestJson("/v1/models", "GET");
+    const data = await response.json();
+    return data.models.map((m) => ({
+      name: m.name,
+      loaded: m.loaded,
+      inputs: m.inputs,
+      outputs: m.outputs,
+      dims: m.dims,
+      maxSequenceLength: m.max_sequence_length
+    }));
+  }
+  /**
+   * Stream real-time status updates from a worker or router.
+   *
+   * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
+   *               "auto" detects the endpoint via /health.
+   */
+  async *watch(mode = "auto") {
+    const endpoint = mode === "auto" ? await this.detectEndpointType() : mode;
+    const path = endpoint === "cluster" ? "/ws/cluster-status" : "/ws/status";
+    const wsUrl = this.buildWsUrl(path);
+    const ws = this.createWebSocket(wsUrl);
+    const queue = [];
+    let resolveNext = null;
+    let rejectNext = null;
+    let closed = false;
+    const notify = () => {
+      if (resolveNext) {
+        resolveNext();
+        resolveNext = null;
+      }
+    };
+    const fail = (error) => {
+      if (rejectNext) {
+        rejectNext(error);
+        rejectNext = null;
+      }
+    };
+    const waitForMessage = () => new Promise((resolve, reject) => {
+      resolveNext = resolve;
+      rejectNext = reject;
+    });
+    const parseMessage = (data) => {
+      if (typeof data === "string") {
+        return JSON.parse(data);
+      }
+      if (data instanceof ArrayBuffer) {
+        return JSON.parse(new TextDecoder().decode(new Uint8Array(data)));
+      }
+      if (data instanceof Uint8Array) {
+        return JSON.parse(new TextDecoder().decode(data));
+      }
+      throw new Error("Unsupported WebSocket message type");
+    };
+    const openPromise = new Promise((resolve, reject) => {
+      ws.addEventListener("open", () => resolve());
+      ws.addEventListener("error", (event) => reject(event));
+    });
+    ws.addEventListener("message", (event) => {
+      try {
+        queue.push(parseMessage(event.data));
+        notify();
+      } catch (error) {
+        fail(error);
+      }
+    });
+    ws.addEventListener("close", () => {
+      closed = true;
+      notify();
+    });
+    try {
+      await openPromise;
+      while (!closed || queue.length > 0) {
+        if (queue.length === 0) {
+          await waitForMessage();
+          continue;
+        }
+        const next = queue.shift();
+        if (next) {
+          yield next;
+        }
+      }
+    } finally {
+      ws.close();
+    }
+  }
+  /**
+   * Score items against a query using a reranker model.
+   *
+   * @param model - Model name (e.g., "bge-reranker-v2")
+   * @param query - Query item
+   * @param items - Items to score against the query
+   * @param options - Score options
+   * @returns Score result with sorted scores
+   *
+   * @example
+   * ```typescript
+   * const result = await client.score(
+   *   "bge-reranker-v2",
+   *   { text: "What is machine learning?" },
+   *   [
+   *     { id: "doc-1", text: "Machine learning is..." },
+   *     { id: "doc-2", text: "Python is..." },
+   *   ],
+   * );
+   *
+   * // Scores are sorted by relevance (descending)
+   * console.log(result.scores[0].itemId); // most relevant
+   * ```
+   */
+  async score(model, query, items, options = {}) {
+    const body = {
+      query,
+      items
+    };
+    if (options.topK !== void 0) {
+      body.top_k = options.topK;
+    }
+    const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
+    const { pool, gpu } = this.parseGpuParam(options.gpu);
+    const response = await this.requestWithRetry(
+      `/v1/score/${encodeURIComponent(model)}`,
+      body,
+      pool,
+      gpu,
+      waitForCapacity,
+      model
+    );
+    const data = unpackMessage(new Uint8Array(await response.arrayBuffer()));
+    return parseScoreResult(data);
+  }
+  /**
+   * Extract entities from one or more items.
+   *
+   * @example
+   * ```typescript
+   * const result = await client.extract(
+   *   "gliner-multi-v2.1",
+   *   { text: "Apple was founded by Steve Jobs." },
+   *   { labels: ["person", "organization"] },
+   * );
+   *
+   * for (const entity of result.entities) {
+   *   console.log(`${entity.text} (${entity.label})`);
+   * }
+   * // Output:
+   * // Apple (organization)
+   * // Steve Jobs (person)
+   * ```
+   */
+  async extract(model, items, options) {
+    const isSingleItem = !Array.isArray(items);
+    const itemsArray = isSingleItem ? [items] : items;
+    const body = {
+      items: itemsArray
+    };
+    const params = {
+      labels: options.labels
+    };
+    if (options.threshold !== void 0) {
+      params.threshold = options.threshold;
+    }
+    body.params = params;
+    const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
+    const { pool, gpu } = this.parseGpuParam(options.gpu);
+    const response = await this.requestWithRetry(
+      `/v1/extract/${encodeURIComponent(model)}`,
+      body,
+      pool,
+      gpu,
+      waitForCapacity,
+      model
+    );
+    const data = unpackMessage(new Uint8Array(await response.arrayBuffer()));
+    const results = parseExtractResults(data.items);
+    if (isSingleItem) {
+      const first = results[0];
+      if (!first) {
+        throw new Error("No results returned from extract");
+      }
+      return first;
+    }
+    return results;
+  }
+  /**
+   * Close the client and cleanup resources.
+   *
+   * Stops pool lease renewal timers. Note that pools are not deleted
+   * automatically - they are garbage collected by the router after inactivity.
+   * This allows pool reuse if the client reconnects.
+   */
+  async close() {
+    for (const [, poolState] of this.pools) {
+      if (poolState.timeoutId !== null) {
+        clearTimeout(poolState.timeoutId);
+      }
+      poolState.abortController.abort();
+    }
+    this.pools.clear();
+  }
+  /**
+   * Create a resource pool for isolated capacity.
+   *
+   * Pools provide dedicated worker capacity, isolated from other clients.
+   * Workers are assigned to pools and only serve requests from that pool.
+   *
+   * @param name - Pool name (used in GPU param as "poolName/machineProfile")
+   * @param gpus - Machine profile requirements, e.g., { "l4": 2, "l4-spot": 1 }
+   *
+   * @example
+   * ```typescript
+   * // Create a pool with 2 L4 GPUs
+   * await client.createPool("eval-bench", { l4: 2 });
+   *
+   * // Use the pool for requests
+   * await client.encode("bge-m3", { text: "Hello" }, { gpu: "eval-bench/l4" });
+   *
+   * // Clean up when done
+   * await client.deletePool("eval-bench");
+   * ```
+   */
+  async createPool(name, gpus) {
+    if (this.pools.has(name)) {
+      return;
+    }
+    const requestBody = { name, gpus };
+    const url = `${this.baseUrl}/v1/pools`;
+    const headers = {
+      "Content-Type": JSON_CONTENT_TYPE,
+      Accept: JSON_CONTENT_TYPE,
+      [SDK_VERSION_HEADER]: SDK_VERSION
+    };
+    if (this.apiKey) {
+      headers.Authorization = `Bearer ${this.apiKey}`;
+    }
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.timeout);
+    try {
+      const response = await fetch(url, {
+        method: "POST",
+        headers,
+        body: JSON.stringify(requestBody),
+        signal: controller.signal
+      });
+      if (response.status >= HTTP_CLIENT_ERROR_MIN) {
+        let errorMsg = response.statusText;
+        try {
+          const data = await response.json();
+          errorMsg = data.detail?.message ?? JSON.stringify(data);
+        } catch {
+        }
+        throw new PoolError(`Failed to create pool '${name}': ${errorMsg}`, name);
+      }
+      const abortController = new AbortController();
+      const poolState = {
+        timeoutId: null,
+        abortController,
+        isRenewing: false
+      };
+      const renewLoop = async () => {
+        if (abortController.signal.aborted) return;
+        if (poolState.isRenewing) return;
+        poolState.isRenewing = true;
+        try {
+          const renewUrl = `${this.baseUrl}/v1/pools/${encodeURIComponent(name)}/renew`;
+          const renewHeaders = {
+            Accept: JSON_CONTENT_TYPE
+          };
+          if (this.apiKey) {
+            renewHeaders.Authorization = `Bearer ${this.apiKey}`;
+          }
+          for (let attempt = 0; attempt < _LEASE_RENEWAL_MAX_RETRIES; attempt++) {
+            if (abortController.signal.aborted) return;
+            const perAttempt = new AbortController();
+            const onPoolAbort = () => perAttempt.abort();
+            abortController.signal.addEventListener("abort", onPoolAbort, { once: true });
+            const attemptTimeout = setTimeout(() => perAttempt.abort(), this.timeout);
+            try {
+              const resp = await fetch(renewUrl, {
+                method: "POST",
+                headers: renewHeaders,
+                signal: perAttempt.signal
+              });
+              if (resp.ok) break;
+            } catch (error) {
+              if (abortController.signal.aborted) return;
+            } finally {
+              clearTimeout(attemptTimeout);
+              abortController.signal.removeEventListener("abort", onPoolAbort);
+            }
+            if (attempt < _LEASE_RENEWAL_MAX_RETRIES - 1) {
+              const aborted = await abortableSleep(
+                Math.min(2 ** attempt * 1e3, 1e4),
+                abortController.signal
+              );
+              if (aborted) return;
+            }
+          }
+        } finally {
+          poolState.isRenewing = false;
+        }
+        if (!abortController.signal.aborted) {
+          poolState.timeoutId = setTimeout(renewLoop, DEFAULT_LEASE_RENEWAL_INTERVAL);
+        }
+      };
+      poolState.timeoutId = setTimeout(renewLoop, DEFAULT_LEASE_RENEWAL_INTERVAL);
+      this.pools.set(name, poolState);
+    } catch (error) {
+      if (error instanceof PoolError) {
+        throw error;
+      }
+      if (error instanceof Error && error.name === "AbortError") {
+        throw new PoolError(`Timeout creating pool '${name}'`, name);
+      }
+      throw new PoolError(
+        `Failed to create pool '${name}': ${error instanceof Error ? error.message : "Unknown error"}`,
+        name
+      );
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+  /**
+   * Get information about a pool.
+   *
+   * @param name - Pool name to query
+   * @returns PoolInfo if pool exists, null otherwise
+   *
+   * @example
+   * ```typescript
+   * await client.createPool("eval-bench", { l4: 2 });
+   * const pool = await client.getPool("eval-bench");
+   * console.log(`Pool state: ${pool?.status.state}`);
+   * console.log(`Workers: ${pool?.status.assignedWorkers.length}`);
+   * ```
+   */
+  async getPool(name) {
+    try {
+      const response = await this.requestJson(`/v1/pools/${encodeURIComponent(name)}`);
+      const data = await response.json();
+      return {
+        name: data.name,
+        spec: data.spec,
+        status: {
+          state: data.status.state,
+          assignedWorkers: data.status.assigned_workers,
+          createdAt: data.status.created_at,
+          lastRenewed: data.status.last_renewed
+        }
+      };
+    } catch {
+      return null;
+    }
+  }
+  /**
+   * Delete a pool.
+   *
+   * @param name - Pool name to delete
+   * @returns true if pool was deleted, false if pool didn't exist
+   *
+   * @example
+   * ```typescript
+   * // Clean up pool when done
+   * const deleted = await client.deletePool("eval-bench");
+   * if (deleted) {
+   *   console.log("Pool deleted successfully");
+   * }
+   * ```
+   */
+  async deletePool(name) {
+    const poolState = this.pools.get(name);
+    if (poolState) {
+      if (poolState.timeoutId !== null) {
+        clearTimeout(poolState.timeoutId);
+      }
+      poolState.abortController.abort();
+      this.pools.delete(name);
+    }
+    try {
+      const url = `${this.baseUrl}/v1/pools/${encodeURIComponent(name)}`;
+      const headers = {
+        Accept: JSON_CONTENT_TYPE
+      };
+      if (this.apiKey) {
+        headers.Authorization = `Bearer ${this.apiKey}`;
+      }
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), this.timeout);
+      try {
+        const response = await fetch(url, {
+          method: "DELETE",
+          headers,
+          signal: controller.signal
+        });
+        return response.ok || response.status === 404;
+      } finally {
+        clearTimeout(timeoutId);
+      }
+    } catch {
+      return false;
+    }
+  }
+  checkServerVersion(response) {
+    if (this.versionWarningLogged) return;
+    const serverVersion = response.headers.get(SERVER_VERSION_HEADER);
+    if (!serverVersion) return;
+    try {
+      const sdkParts = SDK_VERSION.split(".").map(Number);
+      const serverParts = serverVersion.split(".").map(Number);
+      if (sdkParts.length < 2 || serverParts.length < 2) return;
+      const sdkMajor = sdkParts[0];
+      const sdkMinor = sdkParts[1];
+      const serverMajor = serverParts[0];
+      const serverMinor = serverParts[1];
+      if (sdkMajor === void 0 || sdkMinor === void 0 || serverMajor === void 0 || serverMinor === void 0) {
+        return;
+      }
+      if (sdkMajor !== serverMajor || Math.abs(sdkMinor - serverMinor) > 1) {
+        console.warn(
+          `[SIE SDK] Version skew detected: SDK ${SDK_VERSION}, server ${serverVersion}. Consider upgrading.`
+        );
+        this.versionWarningLogged = true;
+      }
+    } catch {
+    }
+  }
+  /**
+   * Parse GPU parameter into pool and GPU components.
+   *
+   * Supports "pool/gpu" format for pool routing.
+   */
+  parseGpuParam(gpu) {
+    const effectiveGpu = gpu ?? this.gpu;
+    if (!effectiveGpu) {
+      return {};
+    }
+    const parts = effectiveGpu.split("/");
+    if (parts.length === 2 && parts[0] && parts[1]) {
+      return { pool: parts[0], gpu: parts[1] };
+    }
+    return { gpu: effectiveGpu };
+  }
+  /**
+   * Get current cluster capacity information.
+   *
+   * Queries the router's /health endpoint for cluster state. Useful for
+   * checking if specific GPU types are available before sending requests.
+   *
+   * @param gpu - Optional filter to check specific GPU type availability
+   * @returns CapacityInfo with worker count, GPU types, and worker details
+   *
+   * @example
+   * ```typescript
+   * // Check cluster state
+   * const capacity = await client.getCapacity();
+   * console.log(`Workers: ${capacity.workerCount}, GPUs: ${capacity.liveGpuTypes}`);
+   *
+   * // Check if L4 GPUs are available
+   * const l4Capacity = await client.getCapacity("l4");
+   * if (l4Capacity.workerCount > 0) {
+   *   console.log("L4 workers available");
+   * }
+   * ```
+   */
+  async getCapacity(gpu) {
+    const response = await this.requestJson("/health");
+    const data = await response.json();
+    if (data.type !== "router") {
+      throw new RequestError(
+        "getCapacity() requires a router endpoint. This appears to be a worker.",
+        "not_router",
+        400
+      );
+    }
+    return parseCapacityInfo(data, gpu);
+  }
+  /**
+   * Wait for GPU capacity to become available.
+   *
+   * Polls the router until workers with the specified GPU type are online.
+   * This is useful for pre-warming the cluster before running benchmarks.
+   *
+   * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
+   * @param options - Wait options
+   * @returns CapacityInfo once capacity is available
+   *
+   * @example
+   * ```typescript
+   * // Wait for L4 capacity before running benchmarks
+   * const capacity = await client.waitForCapacity("l4", { timeout: 300000 });
+   * console.log(`Ready with ${capacity.workerCount} L4 workers`);
+   *
+   * // Wait and pre-load a model
+   * const capacityWithModel = await client.waitForCapacity("l4", { model: "bge-m3" });
+   * ```
+   */
+  async waitForCapacity(gpu, options = {}) {
+    const timeout = options.timeout ?? this.provisionTimeout;
+    const pollInterval = options.pollInterval ?? 5e3;
+    const startTime = Date.now();
+    if (options.model) {
+      await this.encode(options.model, { text: "warmup" }, { gpu, waitForCapacity: true });
+      return this.getCapacity(gpu);
+    }
+    while (true) {
+      try {
+        const capacity = await this.getCapacity(gpu);
+        if (capacity.workerCount > 0) {
+          return capacity;
+        }
+      } catch {
+      }
+      const elapsed = Date.now() - startTime;
+      if (elapsed >= timeout) {
+        throw new ProvisioningError(
+          `Timeout after ${elapsed}ms waiting for GPU '${gpu}' capacity`,
+          gpu
+        );
+      }
+      const remaining = timeout - elapsed;
+      const delay = Math.min(pollInterval, remaining);
+      await sleep(delay);
+    }
+  }
+  /**
+   * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
+   */
+  async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
+    const startTime = Date.now();
+    let loraRetries = 0;
+    while (true) {
+      const response = await this.request(path, body, pool, gpu);
+      if (response.status === HTTP_ACCEPTED) {
+        const retryAfter = getRetryAfter2(response);
+        if (!waitForCapacity) {
+          throw new ProvisioningError(
+            `No capacity available for GPU '${gpu}'. Server is provisioning.`,
+            gpu,
+            retryAfter
+          );
+        }
+        const elapsed = Date.now() - startTime;
+        if (elapsed >= this.provisionTimeout) {
+          throw new ProvisioningError(
+            `Provisioning timeout after ${elapsed}ms waiting for GPU '${gpu}'`,
+            gpu,
+            retryAfter
+          );
+        }
+        const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
+        const remaining = this.provisionTimeout - elapsed;
+        const actualDelay = Math.min(delay, remaining);
+        await sleep(actualDelay);
+        continue;
+      }
+      if (response.status === 503) {
+        const clonedResponse = response.clone();
+        const errorCode = await getErrorCode(clonedResponse);
+        if (errorCode === LORA_LOADING_ERROR_CODE) {
+          loraRetries += 1;
+          if (loraRetries > LORA_LOADING_MAX_RETRIES) {
+            throw new LoraLoadingError(
+              `LoRA loading timeout after ${loraRetries} retries`,
+              void 0,
+              // We don't have lora name at this level
+              model
+            );
+          }
+          const retryAfter = getRetryAfter2(response);
+          const delay = retryAfter ?? LORA_LOADING_DEFAULT_DELAY;
+          await sleep(delay);
+          continue;
+        }
+        if (errorCode === MODEL_LOADING_ERROR_CODE) {
+          const elapsed = Date.now() - startTime;
+          if (elapsed >= this.provisionTimeout) {
+            throw new ModelLoadingError(
+              `Model loading timeout after ${(elapsed / 1e3).toFixed(1)}s for '${model}'`,
+              model
+            );
+          }
+          const retryAfter = getRetryAfter2(response);
+          const delay = retryAfter ?? MODEL_LOADING_DEFAULT_DELAY;
+          const remaining = this.provisionTimeout - elapsed;
+          const actualDelay = Math.min(delay, remaining);
+          await sleep(actualDelay);
+          continue;
+        }
+      }
+      if (!response.ok) {
+        await handleError(response, gpu);
+      }
+      this.checkServerVersion(response);
+      return response;
+    }
+  }
+  /**
+   * Make a single msgpack HTTP request to the SIE server (no retry logic).
+   */
+  async request(path, body, pool, gpu) {
+    const url = `${this.baseUrl}${path}`;
+    const headers = {
+      Accept: MSGPACK_CONTENT_TYPE,
+      [SDK_VERSION_HEADER]: SDK_VERSION
+    };
+    if (body !== void 0) {
+      headers["Content-Type"] = MSGPACK_CONTENT_TYPE;
+    }
+    if (pool) {
+      headers["X-SIE-Pool"] = pool;
+    }
+    if (gpu) {
+      headers["X-SIE-MACHINE-PROFILE"] = gpu;
+    }
+    if (this.apiKey) {
+      headers.Authorization = `Bearer ${this.apiKey}`;
+    }
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.timeout);
+    try {
+      const response = await fetch(url, {
+        method: "POST",
+        headers,
+        body: body !== void 0 ? packMessage(body) : void 0,
+        signal: controller.signal
+      });
+      return response;
+    } catch (error) {
+      if (error instanceof Error && error.name === "AbortError") {
+        throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
+      }
+      if (error instanceof TypeError) {
+        throw new SIEConnectionError(`Connection failed: ${error.message}`);
+      }
+      throw error;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+  /**
+   * Make a JSON HTTP request to the SIE server.
+   * Used for endpoints that return JSON (e.g., /v1/models, /health).
+   */
+  async requestJson(path, method = "GET") {
+    const url = `${this.baseUrl}${path}`;
+    const headers = {
+      Accept: "application/json",
+      [SDK_VERSION_HEADER]: SDK_VERSION
+    };
+    if (this.apiKey) {
+      headers.Authorization = `Bearer ${this.apiKey}`;
+    }
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.timeout);
+    try {
+      const response = await fetch(url, {
+        method,
+        headers,
+        signal: controller.signal
+      });
+      if (!response.ok) {
+        await handleError(response);
+      }
+      return response;
+    } catch (error) {
+      if (error instanceof Error && error.name === "AbortError") {
+        throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
+      }
+      if (error instanceof TypeError) {
+        throw new SIEConnectionError(`Connection failed: ${error.message}`);
+      }
+      throw error;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+  buildWsUrl(path) {
+    const url = new URL(this.baseUrl);
+    url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
+    url.pathname = `${url.pathname.replace(/\/$/, "")}${path}`;
+    url.search = "";
+    return url.toString();
+  }
+  createWebSocket(url) {
+    const headers = this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : void 0;
+    try {
+      if (headers) {
+        return new WebSocket(url, [], { headers });
+      }
+      return new WebSocket(url);
+    } catch (error) {
+      if (headers) {
+        throw new SIEConnectionError(
+          "WebSocket auth headers are not supported in this environment"
+        );
+      }
+      throw error;
+    }
+  }
+  async detectEndpointType() {
+    const url = `${this.baseUrl}/health`;
+    const headers = { Accept: "application/json" };
+    if (this.apiKey) {
+      headers.Authorization = `Bearer ${this.apiKey}`;
+    }
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this.timeout);
+    try {
+      const response = await fetch(url, {
+        method: "GET",
+        headers,
+        signal: controller.signal
+      });
+      if (!response.ok) {
+        return "worker";
+      }
+      const data = await response.json();
+      return data.type === "router" ? "cluster" : "worker";
+    } catch {
+      return "worker";
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+};
+// src/types.ts
+function toNumberArray(arr) {
+  return Array.from(arr);
+}
+function toFloat32Array(arr) {
+  return new Float32Array(arr);
+}
+// src/scoring.ts
+function maxsim(query, document) {
+  if (query.length === 0 || document.length === 0) {
+    return 0;
+  }
+  let totalScore = 0;
+  for (const queryToken of query) {
+    let maxSim = Number.NEGATIVE_INFINITY;
+    for (const docToken of document) {
+      let sim = 0;
+      for (let i = 0; i < queryToken.length; i++) {
+        sim += (queryToken[i] ?? 0) * (docToken[i] ?? 0);
+      }
+      if (sim > maxSim) {
+        maxSim = sim;
+      }
+    }
+    totalScore += maxSim;
+  }
+  return totalScore;
+}
+function maxsimDocuments(query, documents) {
+  return documents.map((doc) => maxsim(query, doc));
+}
+function maxsimBatch(queries, documents) {
+  const scores = new Float32Array(queries.length * documents.length);
+  let idx = 0;
+  for (const query of queries) {
+    for (const doc of documents) {
+      scores[idx++] = maxsim(query, doc);
+    }
+  }
+  return scores;
+}
+// src/images.ts
+async function toImageBytes(input) {
+  if (input instanceof Uint8Array) {
+    return input;
+  }
+  if (input instanceof ArrayBuffer) {
+    return new Uint8Array(input);
+  }
+  if (typeof Blob !== "undefined" && input instanceof Blob) {
+    const buffer = await input.arrayBuffer();
+    return new Uint8Array(buffer);
+  }
+  if (typeof input === "string") {
+    const dataUrlMatch = input.match(/^data:[^;]+;base64,(.+)$/);
+    if (dataUrlMatch?.[1]) {
+      return base64ToBytes(dataUrlMatch[1]);
+    }
+    return base64ToBytes(input);
+  }
+  throw new Error(`Unsupported image input type: ${typeof input}`);
+}
+function base64ToBytes(base64) {
+  if (typeof atob === "function") {
+    const binary = atob(base64);
+    const bytes = new Uint8Array(binary.length);
+    for (let i = 0; i < binary.length; i++) {
+      bytes[i] = binary.charCodeAt(i);
+    }
+    return bytes;
+  }
+  return new Uint8Array(Buffer.from(base64, "base64"));
+}
+async function toImageWireFormat(input, format = "jpeg") {
+  const data = await toImageBytes(input);
+  return { data, format };
+}
+function detectImageFormat(bytes) {
+  if (bytes.length < 4) {
+    return "unknown";
+  }
+  if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
+    return "jpeg";
+  }
+  if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
+    return "png";
+  }
+  if (bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes.length >= 12 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
+    return "webp";
+  }
+  return "unknown";
+}
+exports.LoraLoadingError = LoraLoadingError;
+exports.ModelLoadingError = ModelLoadingError;
+exports.PoolError = PoolError;
+exports.ProvisioningError = ProvisioningError;
+exports.RequestError = RequestError;
+exports.SDK_VERSION = SDK_VERSION;
+exports.SIEClient = SIEClient;
+exports.SIEConnectionError = SIEConnectionError;
+exports.SIEError = SIEError;
+exports.ServerError = ServerError;
+exports.detectImageFormat = detectImageFormat;
+exports.maxsim = maxsim;
+exports.maxsimBatch = maxsimBatch;
+exports.maxsimDocuments = maxsimDocuments;
+exports.packMessage = packMessage;
+exports.toFloat32Array = toFloat32Array;
+exports.toImageBytes = toImageBytes;
+exports.toImageWireFormat = toImageWireFormat;
+exports.toNumberArray = toNumberArray;
+exports.unpackMessage = unpackMessage;
+//# sourceMappingURL=index.cjs.map
+//# sourceMappingURL=index.cjs.map