@superlinked/sie-sdk 0.1.10 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -11,9 +11,11 @@ var SIEError = class extends Error {
11
11
  }
12
12
  };
13
13
  var SIEConnectionError = class extends SIEError {
14
- constructor(message) {
14
+ kind;
15
+ constructor(message, kind = "other") {
15
16
  super(message);
16
17
  this.name = "SIEConnectionError";
18
+ this.kind = kind;
17
19
  }
18
20
  };
19
21
  var RequestError = class extends SIEError {
@@ -105,9 +107,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
105
107
  var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
106
108
  var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
107
109
  var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
108
-
109
- // src/version.ts
110
- var SDK_VERSION = "0.1.10";
111
110
  var EXT_TYPE_NUMPY = 78;
112
111
  function parseDtype(dtype) {
113
112
  const typeChar = dtype.slice(-2, -1);
@@ -476,6 +475,9 @@ function parseCapacityInfo(data, gpuFilter) {
476
475
  };
477
476
  }
478
477
 
478
+ // src/version.ts
479
+ var SDK_VERSION = "0.3.0";
480
+
479
481
  // src/client.ts
480
482
  function sleep(ms) {
481
483
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -616,9 +618,9 @@ var SIEClient = class {
616
618
  };
617
619
  }
618
620
  /**
619
- * Stream real-time status updates from a worker or router.
621
+ * Stream real-time status updates from a worker or gateway.
620
622
  *
621
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
623
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
622
624
  * "auto" detects the endpoint via /health.
623
625
  */
624
626
  async *watch(mode = "auto") {
@@ -789,7 +791,7 @@ var SIEClient = class {
789
791
  * Close the client and cleanup resources.
790
792
  *
791
793
  * Stops pool lease renewal timers. Note that pools are not deleted
792
- * automatically - they are garbage collected by the router after inactivity.
794
+ * automatically - they are garbage collected by the gateway after inactivity.
793
795
  * This allows pool reuse if the client reconnects.
794
796
  */
795
797
  async close() {
@@ -1046,7 +1048,7 @@ var SIEClient = class {
1046
1048
  /**
1047
1049
  * Get current cluster capacity information.
1048
1050
  *
1049
- * Queries the router's /health endpoint for cluster state. Useful for
1051
+ * Queries the gateway's /health endpoint for cluster state. Useful for
1050
1052
  * checking if specific GPU types are available before sending requests.
1051
1053
  *
1052
1054
  * @param gpu - Optional filter to check specific GPU type availability
@@ -1068,10 +1070,10 @@ var SIEClient = class {
1068
1070
  async getCapacity(gpu) {
1069
1071
  const response = await this.requestJson("/health");
1070
1072
  const data = await response.json();
1071
- if (data.type !== "router") {
1073
+ if (data.type !== "gateway") {
1072
1074
  throw new RequestError(
1073
- "getCapacity() requires a router endpoint. This appears to be a worker.",
1074
- "not_router",
1075
+ "getCapacity() requires a gateway endpoint. This appears to be a worker.",
1076
+ "not_gateway",
1075
1077
  400
1076
1078
  );
1077
1079
  }
@@ -1080,7 +1082,7 @@ var SIEClient = class {
1080
1082
  /**
1081
1083
  * Wait for GPU capacity to become available.
1082
1084
  *
1083
- * Polls the router until workers with the specified GPU type are online.
1085
+ * Polls the gateway until workers with the specified GPU type are online.
1084
1086
  * This is useful for pre-warming the cluster before running benchmarks.
1085
1087
  *
1086
1088
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -1126,13 +1128,35 @@ var SIEClient = class {
1126
1128
  }
1127
1129
  }
1128
1130
  /**
1129
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
1131
+ * Make a msgpack HTTP request with retry logic.
1132
+ *
1133
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
1134
+ * - 202 Accepted (provisioning)
1135
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
1136
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
1137
+ *
1138
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
1139
+ * timeout from `timeout` to `provisionTimeout`.
1130
1140
  */
1131
1141
  async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
1132
1142
  const startTime = Date.now();
1133
1143
  let loraRetries = 0;
1134
1144
  while (true) {
1135
- const response = await this.request(path, body, pool, gpu);
1145
+ let response;
1146
+ try {
1147
+ response = await this.request(path, body, pool, gpu);
1148
+ } catch (err) {
1149
+ if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
1150
+ const elapsed = Date.now() - startTime;
1151
+ if (elapsed < this.provisionTimeout) {
1152
+ const remaining = this.provisionTimeout - elapsed;
1153
+ const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
1154
+ await sleep(delay);
1155
+ continue;
1156
+ }
1157
+ }
1158
+ throw err;
1159
+ }
1136
1160
  if (response.status === HTTP_ACCEPTED) {
1137
1161
  const retryAfter = getRetryAfter2(response);
1138
1162
  if (!waitForCapacity) {
@@ -1189,6 +1213,17 @@ var SIEClient = class {
1189
1213
  await sleep(actualDelay);
1190
1214
  continue;
1191
1215
  }
1216
+ if (waitForCapacity) {
1217
+ const elapsed = Date.now() - startTime;
1218
+ if (elapsed < this.provisionTimeout) {
1219
+ const retryAfter = getRetryAfter2(response);
1220
+ const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1221
+ const remaining = this.provisionTimeout - elapsed;
1222
+ const actualDelay = Math.min(delay, remaining);
1223
+ await sleep(actualDelay);
1224
+ continue;
1225
+ }
1226
+ }
1192
1227
  }
1193
1228
  if (!response.ok) {
1194
1229
  await handleError(response, gpu);
@@ -1230,10 +1265,10 @@ var SIEClient = class {
1230
1265
  return response;
1231
1266
  } catch (error) {
1232
1267
  if (error instanceof Error && error.name === "AbortError") {
1233
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1268
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1234
1269
  }
1235
1270
  if (error instanceof TypeError) {
1236
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1271
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1237
1272
  }
1238
1273
  throw error;
1239
1274
  } finally {
@@ -1267,10 +1302,10 @@ var SIEClient = class {
1267
1302
  return response;
1268
1303
  } catch (error) {
1269
1304
  if (error instanceof Error && error.name === "AbortError") {
1270
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1305
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1271
1306
  }
1272
1307
  if (error instanceof TypeError) {
1273
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1308
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1274
1309
  }
1275
1310
  throw error;
1276
1311
  } finally {
@@ -1318,7 +1353,7 @@ var SIEClient = class {
1318
1353
  return "worker";
1319
1354
  }
1320
1355
  const data = await response.json();
1321
- return data.type === "router" ? "cluster" : "worker";
1356
+ return data.type === "gateway" ? "cluster" : "worker";
1322
1357
  } catch {
1323
1358
  return "worker";
1324
1359
  } finally {