@superlinked/sie-sdk 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +54 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +42 -10
- package/dist/index.d.ts +42 -10
- package/dist/index.js +54 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -11,9 +11,11 @@ var SIEError = class extends Error {
|
|
|
11
11
|
}
|
|
12
12
|
};
|
|
13
13
|
var SIEConnectionError = class extends SIEError {
|
|
14
|
-
|
|
14
|
+
kind;
|
|
15
|
+
constructor(message, kind = "other") {
|
|
15
16
|
super(message);
|
|
16
17
|
this.name = "SIEConnectionError";
|
|
18
|
+
this.kind = kind;
|
|
17
19
|
}
|
|
18
20
|
};
|
|
19
21
|
var RequestError = class extends SIEError {
|
|
@@ -105,9 +107,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
|
|
|
105
107
|
var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
|
|
106
108
|
var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
|
|
107
109
|
var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
|
|
108
|
-
|
|
109
|
-
// src/version.ts
|
|
110
|
-
var SDK_VERSION = "0.2.0";
|
|
111
110
|
var EXT_TYPE_NUMPY = 78;
|
|
112
111
|
function parseDtype(dtype) {
|
|
113
112
|
const typeChar = dtype.slice(-2, -1);
|
|
@@ -476,6 +475,9 @@ function parseCapacityInfo(data, gpuFilter) {
|
|
|
476
475
|
};
|
|
477
476
|
}
|
|
478
477
|
|
|
478
|
+
// src/version.ts
|
|
479
|
+
var SDK_VERSION = "0.3.0";
|
|
480
|
+
|
|
479
481
|
// src/client.ts
|
|
480
482
|
function sleep(ms) {
|
|
481
483
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -616,9 +618,9 @@ var SIEClient = class {
|
|
|
616
618
|
};
|
|
617
619
|
}
|
|
618
620
|
/**
|
|
619
|
-
* Stream real-time status updates from a worker or
|
|
621
|
+
* Stream real-time status updates from a worker or gateway.
|
|
620
622
|
*
|
|
621
|
-
* @param mode - "cluster" uses
|
|
623
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
622
624
|
* "auto" detects the endpoint via /health.
|
|
623
625
|
*/
|
|
624
626
|
async *watch(mode = "auto") {
|
|
@@ -789,7 +791,7 @@ var SIEClient = class {
|
|
|
789
791
|
* Close the client and cleanup resources.
|
|
790
792
|
*
|
|
791
793
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
792
|
-
* automatically - they are garbage collected by the
|
|
794
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
793
795
|
* This allows pool reuse if the client reconnects.
|
|
794
796
|
*/
|
|
795
797
|
async close() {
|
|
@@ -1046,7 +1048,7 @@ var SIEClient = class {
|
|
|
1046
1048
|
/**
|
|
1047
1049
|
* Get current cluster capacity information.
|
|
1048
1050
|
*
|
|
1049
|
-
* Queries the
|
|
1051
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
1050
1052
|
* checking if specific GPU types are available before sending requests.
|
|
1051
1053
|
*
|
|
1052
1054
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -1068,10 +1070,10 @@ var SIEClient = class {
|
|
|
1068
1070
|
async getCapacity(gpu) {
|
|
1069
1071
|
const response = await this.requestJson("/health");
|
|
1070
1072
|
const data = await response.json();
|
|
1071
|
-
if (data.type !== "
|
|
1073
|
+
if (data.type !== "gateway") {
|
|
1072
1074
|
throw new RequestError(
|
|
1073
|
-
"getCapacity() requires a
|
|
1074
|
-
"
|
|
1075
|
+
"getCapacity() requires a gateway endpoint. This appears to be a worker.",
|
|
1076
|
+
"not_gateway",
|
|
1075
1077
|
400
|
|
1076
1078
|
);
|
|
1077
1079
|
}
|
|
@@ -1080,7 +1082,7 @@ var SIEClient = class {
|
|
|
1080
1082
|
/**
|
|
1081
1083
|
* Wait for GPU capacity to become available.
|
|
1082
1084
|
*
|
|
1083
|
-
* Polls the
|
|
1085
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
1084
1086
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
1085
1087
|
*
|
|
1086
1088
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -1126,13 +1128,35 @@ var SIEClient = class {
|
|
|
1126
1128
|
}
|
|
1127
1129
|
}
|
|
1128
1130
|
/**
|
|
1129
|
-
* Make a msgpack HTTP request with retry logic
|
|
1131
|
+
* Make a msgpack HTTP request with retry logic.
|
|
1132
|
+
*
|
|
1133
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
1134
|
+
* - 202 Accepted (provisioning)
|
|
1135
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
1136
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
1137
|
+
*
|
|
1138
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
1139
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
1130
1140
|
*/
|
|
1131
1141
|
async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
|
|
1132
1142
|
const startTime = Date.now();
|
|
1133
1143
|
let loraRetries = 0;
|
|
1134
1144
|
while (true) {
|
|
1135
|
-
|
|
1145
|
+
let response;
|
|
1146
|
+
try {
|
|
1147
|
+
response = await this.request(path, body, pool, gpu);
|
|
1148
|
+
} catch (err) {
|
|
1149
|
+
if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
|
|
1150
|
+
const elapsed = Date.now() - startTime;
|
|
1151
|
+
if (elapsed < this.provisionTimeout) {
|
|
1152
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1153
|
+
const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
|
|
1154
|
+
await sleep(delay);
|
|
1155
|
+
continue;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
throw err;
|
|
1159
|
+
}
|
|
1136
1160
|
if (response.status === HTTP_ACCEPTED) {
|
|
1137
1161
|
const retryAfter = getRetryAfter2(response);
|
|
1138
1162
|
if (!waitForCapacity) {
|
|
@@ -1189,6 +1213,17 @@ var SIEClient = class {
|
|
|
1189
1213
|
await sleep(actualDelay);
|
|
1190
1214
|
continue;
|
|
1191
1215
|
}
|
|
1216
|
+
if (waitForCapacity) {
|
|
1217
|
+
const elapsed = Date.now() - startTime;
|
|
1218
|
+
if (elapsed < this.provisionTimeout) {
|
|
1219
|
+
const retryAfter = getRetryAfter2(response);
|
|
1220
|
+
const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
|
|
1221
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1222
|
+
const actualDelay = Math.min(delay, remaining);
|
|
1223
|
+
await sleep(actualDelay);
|
|
1224
|
+
continue;
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1192
1227
|
}
|
|
1193
1228
|
if (!response.ok) {
|
|
1194
1229
|
await handleError(response, gpu);
|
|
@@ -1230,10 +1265,10 @@ var SIEClient = class {
|
|
|
1230
1265
|
return response;
|
|
1231
1266
|
} catch (error) {
|
|
1232
1267
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1233
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1268
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1234
1269
|
}
|
|
1235
1270
|
if (error instanceof TypeError) {
|
|
1236
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1271
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1237
1272
|
}
|
|
1238
1273
|
throw error;
|
|
1239
1274
|
} finally {
|
|
@@ -1267,10 +1302,10 @@ var SIEClient = class {
|
|
|
1267
1302
|
return response;
|
|
1268
1303
|
} catch (error) {
|
|
1269
1304
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1270
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1305
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1271
1306
|
}
|
|
1272
1307
|
if (error instanceof TypeError) {
|
|
1273
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1308
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1274
1309
|
}
|
|
1275
1310
|
throw error;
|
|
1276
1311
|
} finally {
|
|
@@ -1318,7 +1353,7 @@ var SIEClient = class {
|
|
|
1318
1353
|
return "worker";
|
|
1319
1354
|
}
|
|
1320
1355
|
const data = await response.json();
|
|
1321
|
-
return data.type === "
|
|
1356
|
+
return data.type === "gateway" ? "cluster" : "worker";
|
|
1322
1357
|
} catch {
|
|
1323
1358
|
return "worker";
|
|
1324
1359
|
} finally {
|