@superlinked/sie-sdk 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +106 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +76 -11
- package/dist/index.d.ts +76 -11
- package/dist/index.js +106 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -11,9 +11,11 @@ var SIEError = class extends Error {
|
|
|
11
11
|
}
|
|
12
12
|
};
|
|
13
13
|
var SIEConnectionError = class extends SIEError {
|
|
14
|
-
|
|
14
|
+
kind;
|
|
15
|
+
constructor(message, kind = "other") {
|
|
15
16
|
super(message);
|
|
16
17
|
this.name = "SIEConnectionError";
|
|
18
|
+
this.kind = kind;
|
|
17
19
|
}
|
|
18
20
|
};
|
|
19
21
|
var RequestError = class extends SIEError {
|
|
@@ -85,6 +87,28 @@ var ModelLoadingError = class extends SIEError {
|
|
|
85
87
|
this.model = model;
|
|
86
88
|
}
|
|
87
89
|
};
|
|
90
|
+
var ModelLoadFailedError = class extends ServerError {
|
|
91
|
+
/** The model that was requested */
|
|
92
|
+
model;
|
|
93
|
+
/**
|
|
94
|
+
* Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
|
|
95
|
+
* `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
|
|
96
|
+
* remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
|
|
97
|
+
*/
|
|
98
|
+
errorClass;
|
|
99
|
+
/** Whether the failure is non-retryable per server policy. */
|
|
100
|
+
permanent;
|
|
101
|
+
/** How many load attempts the server has logged. */
|
|
102
|
+
attempts;
|
|
103
|
+
constructor(message, options) {
|
|
104
|
+
super(message, "MODEL_LOAD_FAILED", 502);
|
|
105
|
+
this.name = "ModelLoadFailedError";
|
|
106
|
+
this.model = options?.model;
|
|
107
|
+
this.errorClass = options?.errorClass;
|
|
108
|
+
this.permanent = options?.permanent ?? true;
|
|
109
|
+
this.attempts = options?.attempts ?? 1;
|
|
110
|
+
}
|
|
111
|
+
};
|
|
88
112
|
|
|
89
113
|
// src/internal/constants.ts
|
|
90
114
|
var MSGPACK_CONTENT_TYPE = "application/msgpack";
|
|
@@ -105,9 +129,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
|
|
|
105
129
|
var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
|
|
106
130
|
var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
|
|
107
131
|
var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
|
|
108
|
-
|
|
109
|
-
// src/version.ts
|
|
110
|
-
var SDK_VERSION = "0.2.0";
|
|
111
132
|
var EXT_TYPE_NUMPY = 78;
|
|
112
133
|
function parseDtype(dtype) {
|
|
113
134
|
const typeChar = dtype.slice(-2, -1);
|
|
@@ -312,7 +333,7 @@ function getRetryAfter2(response) {
|
|
|
312
333
|
const header = response.headers.get("Retry-After");
|
|
313
334
|
return getRetryAfter(header);
|
|
314
335
|
}
|
|
315
|
-
async function
|
|
336
|
+
async function getErrorDetail(response) {
|
|
316
337
|
try {
|
|
317
338
|
const contentType = response.headers.get("content-type") ?? "";
|
|
318
339
|
let data;
|
|
@@ -323,24 +344,42 @@ async function getErrorCode(response) {
|
|
|
323
344
|
data = await response.json();
|
|
324
345
|
}
|
|
325
346
|
if (data.error && typeof data.error === "object") {
|
|
326
|
-
|
|
327
|
-
if (typeof error.code === "string") {
|
|
328
|
-
return error.code;
|
|
329
|
-
}
|
|
347
|
+
return data.error;
|
|
330
348
|
}
|
|
331
349
|
if (data.detail && typeof data.detail === "object") {
|
|
332
|
-
|
|
333
|
-
if (typeof detail.code === "string") {
|
|
334
|
-
return detail.code;
|
|
335
|
-
}
|
|
350
|
+
return data.detail;
|
|
336
351
|
}
|
|
337
352
|
if (typeof data.code === "string") {
|
|
338
|
-
return data
|
|
353
|
+
return data;
|
|
339
354
|
}
|
|
340
355
|
} catch {
|
|
341
356
|
}
|
|
342
357
|
return void 0;
|
|
343
358
|
}
|
|
359
|
+
async function getErrorCode(response) {
|
|
360
|
+
const detail = await getErrorDetail(response);
|
|
361
|
+
if (!detail) return void 0;
|
|
362
|
+
const code = detail.code;
|
|
363
|
+
return typeof code === "string" ? code : void 0;
|
|
364
|
+
}
|
|
365
|
+
async function throwIfModelLoadFailed(response, model) {
|
|
366
|
+
if (response.status !== 502) return;
|
|
367
|
+
const detail = await getErrorDetail(response.clone());
|
|
368
|
+
if (!detail) return;
|
|
369
|
+
if (detail.code !== "MODEL_LOAD_FAILED") return;
|
|
370
|
+
const errorClass = typeof detail.error_class === "string" ? detail.error_class : void 0;
|
|
371
|
+
const permanent = typeof detail.permanent === "boolean" ? detail.permanent : true;
|
|
372
|
+
const attemptsRaw = detail.attempts;
|
|
373
|
+
const parsedAttempts = typeof attemptsRaw === "number" ? attemptsRaw : typeof attemptsRaw === "string" ? Number.parseInt(attemptsRaw, 10) : Number.NaN;
|
|
374
|
+
const attempts = Number.isFinite(parsedAttempts) ? parsedAttempts : 1;
|
|
375
|
+
const message = typeof detail.message === "string" ? detail.message : `Model '${model ?? "?"}' failed to load`;
|
|
376
|
+
throw new ModelLoadFailedError(message, {
|
|
377
|
+
model,
|
|
378
|
+
errorClass,
|
|
379
|
+
permanent,
|
|
380
|
+
attempts
|
|
381
|
+
});
|
|
382
|
+
}
|
|
344
383
|
async function handleError(response, gpu) {
|
|
345
384
|
const { status } = response;
|
|
346
385
|
let errorBody = {};
|
|
@@ -476,6 +515,9 @@ function parseCapacityInfo(data, gpuFilter) {
|
|
|
476
515
|
};
|
|
477
516
|
}
|
|
478
517
|
|
|
518
|
+
// src/version.ts
|
|
519
|
+
var SDK_VERSION = "0.3.1";
|
|
520
|
+
|
|
479
521
|
// src/client.ts
|
|
480
522
|
function sleep(ms) {
|
|
481
523
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -616,9 +658,9 @@ var SIEClient = class {
|
|
|
616
658
|
};
|
|
617
659
|
}
|
|
618
660
|
/**
|
|
619
|
-
* Stream real-time status updates from a worker or
|
|
661
|
+
* Stream real-time status updates from a worker or gateway.
|
|
620
662
|
*
|
|
621
|
-
* @param mode - "cluster" uses
|
|
663
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
622
664
|
* "auto" detects the endpoint via /health.
|
|
623
665
|
*/
|
|
624
666
|
async *watch(mode = "auto") {
|
|
@@ -789,7 +831,7 @@ var SIEClient = class {
|
|
|
789
831
|
* Close the client and cleanup resources.
|
|
790
832
|
*
|
|
791
833
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
792
|
-
* automatically - they are garbage collected by the
|
|
834
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
793
835
|
* This allows pool reuse if the client reconnects.
|
|
794
836
|
*/
|
|
795
837
|
async close() {
|
|
@@ -1046,7 +1088,7 @@ var SIEClient = class {
|
|
|
1046
1088
|
/**
|
|
1047
1089
|
* Get current cluster capacity information.
|
|
1048
1090
|
*
|
|
1049
|
-
* Queries the
|
|
1091
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
1050
1092
|
* checking if specific GPU types are available before sending requests.
|
|
1051
1093
|
*
|
|
1052
1094
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -1068,10 +1110,10 @@ var SIEClient = class {
|
|
|
1068
1110
|
async getCapacity(gpu) {
|
|
1069
1111
|
const response = await this.requestJson("/health");
|
|
1070
1112
|
const data = await response.json();
|
|
1071
|
-
if (data.type !== "
|
|
1113
|
+
if (data.type !== "gateway") {
|
|
1072
1114
|
throw new RequestError(
|
|
1073
|
-
"getCapacity() requires a
|
|
1074
|
-
"
|
|
1115
|
+
"getCapacity() requires a gateway endpoint. This appears to be a worker.",
|
|
1116
|
+
"not_gateway",
|
|
1075
1117
|
400
|
|
1076
1118
|
);
|
|
1077
1119
|
}
|
|
@@ -1080,7 +1122,7 @@ var SIEClient = class {
|
|
|
1080
1122
|
/**
|
|
1081
1123
|
* Wait for GPU capacity to become available.
|
|
1082
1124
|
*
|
|
1083
|
-
* Polls the
|
|
1125
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
1084
1126
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
1085
1127
|
*
|
|
1086
1128
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -1126,13 +1168,35 @@ var SIEClient = class {
|
|
|
1126
1168
|
}
|
|
1127
1169
|
}
|
|
1128
1170
|
/**
|
|
1129
|
-
* Make a msgpack HTTP request with retry logic
|
|
1171
|
+
* Make a msgpack HTTP request with retry logic.
|
|
1172
|
+
*
|
|
1173
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
1174
|
+
* - 202 Accepted (provisioning)
|
|
1175
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
1176
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
1177
|
+
*
|
|
1178
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
1179
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
1130
1180
|
*/
|
|
1131
1181
|
async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
|
|
1132
1182
|
const startTime = Date.now();
|
|
1133
1183
|
let loraRetries = 0;
|
|
1134
1184
|
while (true) {
|
|
1135
|
-
|
|
1185
|
+
let response;
|
|
1186
|
+
try {
|
|
1187
|
+
response = await this.request(path, body, pool, gpu);
|
|
1188
|
+
} catch (err) {
|
|
1189
|
+
if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
|
|
1190
|
+
const elapsed = Date.now() - startTime;
|
|
1191
|
+
if (elapsed < this.provisionTimeout) {
|
|
1192
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1193
|
+
const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
|
|
1194
|
+
await sleep(delay);
|
|
1195
|
+
continue;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
throw err;
|
|
1199
|
+
}
|
|
1136
1200
|
if (response.status === HTTP_ACCEPTED) {
|
|
1137
1201
|
const retryAfter = getRetryAfter2(response);
|
|
1138
1202
|
if (!waitForCapacity) {
|
|
@@ -1156,6 +1220,7 @@ var SIEClient = class {
|
|
|
1156
1220
|
await sleep(actualDelay);
|
|
1157
1221
|
continue;
|
|
1158
1222
|
}
|
|
1223
|
+
await throwIfModelLoadFailed(response, model);
|
|
1159
1224
|
if (response.status === 503) {
|
|
1160
1225
|
const clonedResponse = response.clone();
|
|
1161
1226
|
const errorCode = await getErrorCode(clonedResponse);
|
|
@@ -1189,6 +1254,17 @@ var SIEClient = class {
|
|
|
1189
1254
|
await sleep(actualDelay);
|
|
1190
1255
|
continue;
|
|
1191
1256
|
}
|
|
1257
|
+
if (waitForCapacity) {
|
|
1258
|
+
const elapsed = Date.now() - startTime;
|
|
1259
|
+
if (elapsed < this.provisionTimeout) {
|
|
1260
|
+
const retryAfter = getRetryAfter2(response);
|
|
1261
|
+
const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
|
|
1262
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1263
|
+
const actualDelay = Math.min(delay, remaining);
|
|
1264
|
+
await sleep(actualDelay);
|
|
1265
|
+
continue;
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1192
1268
|
}
|
|
1193
1269
|
if (!response.ok) {
|
|
1194
1270
|
await handleError(response, gpu);
|
|
@@ -1230,10 +1306,10 @@ var SIEClient = class {
|
|
|
1230
1306
|
return response;
|
|
1231
1307
|
} catch (error) {
|
|
1232
1308
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1233
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1309
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1234
1310
|
}
|
|
1235
1311
|
if (error instanceof TypeError) {
|
|
1236
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1312
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1237
1313
|
}
|
|
1238
1314
|
throw error;
|
|
1239
1315
|
} finally {
|
|
@@ -1267,10 +1343,10 @@ var SIEClient = class {
|
|
|
1267
1343
|
return response;
|
|
1268
1344
|
} catch (error) {
|
|
1269
1345
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1270
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1346
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1271
1347
|
}
|
|
1272
1348
|
if (error instanceof TypeError) {
|
|
1273
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1349
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1274
1350
|
}
|
|
1275
1351
|
throw error;
|
|
1276
1352
|
} finally {
|
|
@@ -1318,7 +1394,7 @@ var SIEClient = class {
|
|
|
1318
1394
|
return "worker";
|
|
1319
1395
|
}
|
|
1320
1396
|
const data = await response.json();
|
|
1321
|
-
return data.type === "
|
|
1397
|
+
return data.type === "gateway" ? "cluster" : "worker";
|
|
1322
1398
|
} catch {
|
|
1323
1399
|
return "worker";
|
|
1324
1400
|
} finally {
|
|
@@ -1467,6 +1543,7 @@ function detectImageFormat(bytes) {
|
|
|
1467
1543
|
}
|
|
1468
1544
|
|
|
1469
1545
|
exports.LoraLoadingError = LoraLoadingError;
|
|
1546
|
+
exports.ModelLoadFailedError = ModelLoadFailedError;
|
|
1470
1547
|
exports.ModelLoadingError = ModelLoadingError;
|
|
1471
1548
|
exports.PoolError = PoolError;
|
|
1472
1549
|
exports.ProvisioningError = ProvisioningError;
|