@superlinked/sie-sdk 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -11,9 +11,11 @@ var SIEError = class extends Error {
11
11
  }
12
12
  };
13
13
  var SIEConnectionError = class extends SIEError {
14
- constructor(message) {
14
+ kind;
15
+ constructor(message, kind = "other") {
15
16
  super(message);
16
17
  this.name = "SIEConnectionError";
18
+ this.kind = kind;
17
19
  }
18
20
  };
19
21
  var RequestError = class extends SIEError {
@@ -85,6 +87,28 @@ var ModelLoadingError = class extends SIEError {
85
87
  this.model = model;
86
88
  }
87
89
  };
90
+ var ModelLoadFailedError = class extends ServerError {
91
+ /** The model that was requested */
92
+ model;
93
+ /**
94
+ * Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
95
+ * `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
96
+ * remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
97
+ */
98
+ errorClass;
99
+ /** Whether the failure is non-retryable per server policy. */
100
+ permanent;
101
+ /** How many load attempts the server has logged. */
102
+ attempts;
103
+ constructor(message, options) {
104
+ super(message, "MODEL_LOAD_FAILED", 502);
105
+ this.name = "ModelLoadFailedError";
106
+ this.model = options?.model;
107
+ this.errorClass = options?.errorClass;
108
+ this.permanent = options?.permanent ?? true;
109
+ this.attempts = options?.attempts ?? 1;
110
+ }
111
+ };
88
112
 
89
113
  // src/internal/constants.ts
90
114
  var MSGPACK_CONTENT_TYPE = "application/msgpack";
@@ -105,9 +129,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
105
129
  var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
106
130
  var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
107
131
  var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
108
-
109
- // src/version.ts
110
- var SDK_VERSION = "0.2.0";
111
132
  var EXT_TYPE_NUMPY = 78;
112
133
  function parseDtype(dtype) {
113
134
  const typeChar = dtype.slice(-2, -1);
@@ -312,7 +333,7 @@ function getRetryAfter2(response) {
312
333
  const header = response.headers.get("Retry-After");
313
334
  return getRetryAfter(header);
314
335
  }
315
- async function getErrorCode(response) {
336
+ async function getErrorDetail(response) {
316
337
  try {
317
338
  const contentType = response.headers.get("content-type") ?? "";
318
339
  let data;
@@ -323,24 +344,42 @@ async function getErrorCode(response) {
323
344
  data = await response.json();
324
345
  }
325
346
  if (data.error && typeof data.error === "object") {
326
- const error = data.error;
327
- if (typeof error.code === "string") {
328
- return error.code;
329
- }
347
+ return data.error;
330
348
  }
331
349
  if (data.detail && typeof data.detail === "object") {
332
- const detail = data.detail;
333
- if (typeof detail.code === "string") {
334
- return detail.code;
335
- }
350
+ return data.detail;
336
351
  }
337
352
  if (typeof data.code === "string") {
338
- return data.code;
353
+ return data;
339
354
  }
340
355
  } catch {
341
356
  }
342
357
  return void 0;
343
358
  }
359
+ async function getErrorCode(response) {
360
+ const detail = await getErrorDetail(response);
361
+ if (!detail) return void 0;
362
+ const code = detail.code;
363
+ return typeof code === "string" ? code : void 0;
364
+ }
365
+ async function throwIfModelLoadFailed(response, model) {
366
+ if (response.status !== 502) return;
367
+ const detail = await getErrorDetail(response.clone());
368
+ if (!detail) return;
369
+ if (detail.code !== "MODEL_LOAD_FAILED") return;
370
+ const errorClass = typeof detail.error_class === "string" ? detail.error_class : void 0;
371
+ const permanent = typeof detail.permanent === "boolean" ? detail.permanent : true;
372
+ const attemptsRaw = detail.attempts;
373
+ const parsedAttempts = typeof attemptsRaw === "number" ? attemptsRaw : typeof attemptsRaw === "string" ? Number.parseInt(attemptsRaw, 10) : Number.NaN;
374
+ const attempts = Number.isFinite(parsedAttempts) ? parsedAttempts : 1;
375
+ const message = typeof detail.message === "string" ? detail.message : `Model '${model ?? "?"}' failed to load`;
376
+ throw new ModelLoadFailedError(message, {
377
+ model,
378
+ errorClass,
379
+ permanent,
380
+ attempts
381
+ });
382
+ }
344
383
  async function handleError(response, gpu) {
345
384
  const { status } = response;
346
385
  let errorBody = {};
@@ -476,6 +515,9 @@ function parseCapacityInfo(data, gpuFilter) {
476
515
  };
477
516
  }
478
517
 
518
+ // src/version.ts
519
+ var SDK_VERSION = "0.3.1";
520
+
479
521
  // src/client.ts
480
522
  function sleep(ms) {
481
523
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -616,9 +658,9 @@ var SIEClient = class {
616
658
  };
617
659
  }
618
660
  /**
619
- * Stream real-time status updates from a worker or router.
661
+ * Stream real-time status updates from a worker or gateway.
620
662
  *
621
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
663
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
622
664
  * "auto" detects the endpoint via /health.
623
665
  */
624
666
  async *watch(mode = "auto") {
@@ -789,7 +831,7 @@ var SIEClient = class {
789
831
  * Close the client and cleanup resources.
790
832
  *
791
833
  * Stops pool lease renewal timers. Note that pools are not deleted
792
- * automatically - they are garbage collected by the router after inactivity.
834
+ * automatically - they are garbage collected by the gateway after inactivity.
793
835
  * This allows pool reuse if the client reconnects.
794
836
  */
795
837
  async close() {
@@ -1046,7 +1088,7 @@ var SIEClient = class {
1046
1088
  /**
1047
1089
  * Get current cluster capacity information.
1048
1090
  *
1049
- * Queries the router's /health endpoint for cluster state. Useful for
1091
+ * Queries the gateway's /health endpoint for cluster state. Useful for
1050
1092
  * checking if specific GPU types are available before sending requests.
1051
1093
  *
1052
1094
  * @param gpu - Optional filter to check specific GPU type availability
@@ -1068,10 +1110,10 @@ var SIEClient = class {
1068
1110
  async getCapacity(gpu) {
1069
1111
  const response = await this.requestJson("/health");
1070
1112
  const data = await response.json();
1071
- if (data.type !== "router") {
1113
+ if (data.type !== "gateway") {
1072
1114
  throw new RequestError(
1073
- "getCapacity() requires a router endpoint. This appears to be a worker.",
1074
- "not_router",
1115
+ "getCapacity() requires a gateway endpoint. This appears to be a worker.",
1116
+ "not_gateway",
1075
1117
  400
1076
1118
  );
1077
1119
  }
@@ -1080,7 +1122,7 @@ var SIEClient = class {
1080
1122
  /**
1081
1123
  * Wait for GPU capacity to become available.
1082
1124
  *
1083
- * Polls the router until workers with the specified GPU type are online.
1125
+ * Polls the gateway until workers with the specified GPU type are online.
1084
1126
  * This is useful for pre-warming the cluster before running benchmarks.
1085
1127
  *
1086
1128
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -1126,13 +1168,35 @@ var SIEClient = class {
1126
1168
  }
1127
1169
  }
1128
1170
  /**
1129
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
1171
+ * Make a msgpack HTTP request with retry logic.
1172
+ *
1173
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
1174
+ * - 202 Accepted (provisioning)
1175
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
1176
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
1177
+ *
1178
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
1179
+ * timeout from `timeout` to `provisionTimeout`.
1130
1180
  */
1131
1181
  async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
1132
1182
  const startTime = Date.now();
1133
1183
  let loraRetries = 0;
1134
1184
  while (true) {
1135
- const response = await this.request(path, body, pool, gpu);
1185
+ let response;
1186
+ try {
1187
+ response = await this.request(path, body, pool, gpu);
1188
+ } catch (err) {
1189
+ if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
1190
+ const elapsed = Date.now() - startTime;
1191
+ if (elapsed < this.provisionTimeout) {
1192
+ const remaining = this.provisionTimeout - elapsed;
1193
+ const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
1194
+ await sleep(delay);
1195
+ continue;
1196
+ }
1197
+ }
1198
+ throw err;
1199
+ }
1136
1200
  if (response.status === HTTP_ACCEPTED) {
1137
1201
  const retryAfter = getRetryAfter2(response);
1138
1202
  if (!waitForCapacity) {
@@ -1156,6 +1220,7 @@ var SIEClient = class {
1156
1220
  await sleep(actualDelay);
1157
1221
  continue;
1158
1222
  }
1223
+ await throwIfModelLoadFailed(response, model);
1159
1224
  if (response.status === 503) {
1160
1225
  const clonedResponse = response.clone();
1161
1226
  const errorCode = await getErrorCode(clonedResponse);
@@ -1189,6 +1254,17 @@ var SIEClient = class {
1189
1254
  await sleep(actualDelay);
1190
1255
  continue;
1191
1256
  }
1257
+ if (waitForCapacity) {
1258
+ const elapsed = Date.now() - startTime;
1259
+ if (elapsed < this.provisionTimeout) {
1260
+ const retryAfter = getRetryAfter2(response);
1261
+ const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1262
+ const remaining = this.provisionTimeout - elapsed;
1263
+ const actualDelay = Math.min(delay, remaining);
1264
+ await sleep(actualDelay);
1265
+ continue;
1266
+ }
1267
+ }
1192
1268
  }
1193
1269
  if (!response.ok) {
1194
1270
  await handleError(response, gpu);
@@ -1230,10 +1306,10 @@ var SIEClient = class {
1230
1306
  return response;
1231
1307
  } catch (error) {
1232
1308
  if (error instanceof Error && error.name === "AbortError") {
1233
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1309
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1234
1310
  }
1235
1311
  if (error instanceof TypeError) {
1236
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1312
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1237
1313
  }
1238
1314
  throw error;
1239
1315
  } finally {
@@ -1267,10 +1343,10 @@ var SIEClient = class {
1267
1343
  return response;
1268
1344
  } catch (error) {
1269
1345
  if (error instanceof Error && error.name === "AbortError") {
1270
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1346
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1271
1347
  }
1272
1348
  if (error instanceof TypeError) {
1273
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1349
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1274
1350
  }
1275
1351
  throw error;
1276
1352
  } finally {
@@ -1318,7 +1394,7 @@ var SIEClient = class {
1318
1394
  return "worker";
1319
1395
  }
1320
1396
  const data = await response.json();
1321
- return data.type === "router" ? "cluster" : "worker";
1397
+ return data.type === "gateway" ? "cluster" : "worker";
1322
1398
  } catch {
1323
1399
  return "worker";
1324
1400
  } finally {
@@ -1467,6 +1543,7 @@ function detectImageFormat(bytes) {
1467
1543
  }
1468
1544
 
1469
1545
  exports.LoraLoadingError = LoraLoadingError;
1546
+ exports.ModelLoadFailedError = ModelLoadFailedError;
1470
1547
  exports.ModelLoadingError = ModelLoadingError;
1471
1548
  exports.PoolError = PoolError;
1472
1549
  exports.ProvisioningError = ProvisioningError;