@superlinked/sie-sdk 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +54 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +42 -10
- package/dist/index.d.ts +42 -10
- package/dist/index.js +54 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
|
|
|
15
15
|
* Output type options for encode operation.
|
|
16
16
|
*/
|
|
17
17
|
type OutputType = "dense" | "sparse" | "multivector";
|
|
18
|
+
/**
|
|
19
|
+
* Document input for composite-document extractors (PDF, DOCX, HTML, ...).
|
|
20
|
+
*
|
|
21
|
+
* The wire format is the document bytes plus an optional format hint. The
|
|
22
|
+
* hint is advisory — adapters may sniff the bytes when it is missing or
|
|
23
|
+
* unrecognized.
|
|
24
|
+
*/
|
|
25
|
+
interface DocumentInput {
|
|
26
|
+
/** Document bytes (raw file content) */
|
|
27
|
+
data: Uint8Array;
|
|
28
|
+
/** Document format hint: "pdf", "docx", "html", etc. */
|
|
29
|
+
format?: string;
|
|
30
|
+
}
|
|
18
31
|
/**
|
|
19
32
|
* A single item to encode, score, or extract from.
|
|
20
33
|
*
|
|
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
|
|
|
30
43
|
* // With images for multimodal models (ColPali, CLIP)
|
|
31
44
|
* { text: "Description", images: [imageBytes] }
|
|
32
45
|
*
|
|
46
|
+
* // With a document for composite-document extractors (Docling, ...)
|
|
47
|
+
* { document: { data: pdfBytes, format: "pdf" } }
|
|
48
|
+
*
|
|
33
49
|
* // Pre-encoded multivector (for use with maxsim utility)
|
|
34
50
|
* { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
|
|
35
51
|
*/
|
|
@@ -40,6 +56,8 @@ interface Item {
|
|
|
40
56
|
text?: string;
|
|
41
57
|
/** Images as byte arrays (JPEG/PNG) for multimodal models */
|
|
42
58
|
images?: Uint8Array[];
|
|
59
|
+
/** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
|
|
60
|
+
document?: DocumentInput;
|
|
43
61
|
/** Pre-encoded multivector (for use with maxsim utility) */
|
|
44
62
|
multivector?: Float32Array[];
|
|
45
63
|
/** Arbitrary metadata (passed through to results) */
|
|
@@ -98,7 +116,7 @@ interface ModelInfo {
|
|
|
98
116
|
name: string;
|
|
99
117
|
/** Whether the model is currently loaded in memory */
|
|
100
118
|
loaded: boolean;
|
|
101
|
-
/** Supported input types: ["text"], ["text", "image"], etc. */
|
|
119
|
+
/** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
|
|
102
120
|
inputs: string[];
|
|
103
121
|
/** Supported output types: ["dense"], ["dense", "sparse"], etc. */
|
|
104
122
|
outputs: string[];
|
|
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
|
|
|
448
466
|
*
|
|
449
467
|
* @example Resource pool usage
|
|
450
468
|
* ```typescript
|
|
451
|
-
* const client = new SIEClient("http://
|
|
469
|
+
* const client = new SIEClient("http://gateway:8080");
|
|
452
470
|
*
|
|
453
471
|
* // Create a dedicated pool
|
|
454
472
|
* await client.createPool("eval-bench", { l4: 2 });
|
|
@@ -523,9 +541,9 @@ declare class SIEClient {
|
|
|
523
541
|
*/
|
|
524
542
|
getModel(name: string): Promise<ModelInfo>;
|
|
525
543
|
/**
|
|
526
|
-
* Stream real-time status updates from a worker or
|
|
544
|
+
* Stream real-time status updates from a worker or gateway.
|
|
527
545
|
*
|
|
528
|
-
* @param mode - "cluster" uses
|
|
546
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
529
547
|
* "auto" detects the endpoint via /health.
|
|
530
548
|
*/
|
|
531
549
|
watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
|
|
@@ -576,7 +594,7 @@ declare class SIEClient {
|
|
|
576
594
|
* Close the client and cleanup resources.
|
|
577
595
|
*
|
|
578
596
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
579
|
-
* automatically - they are garbage collected by the
|
|
597
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
580
598
|
* This allows pool reuse if the client reconnects.
|
|
581
599
|
*/
|
|
582
600
|
close(): Promise<void>;
|
|
@@ -643,7 +661,7 @@ declare class SIEClient {
|
|
|
643
661
|
/**
|
|
644
662
|
* Get current cluster capacity information.
|
|
645
663
|
*
|
|
646
|
-
* Queries the
|
|
664
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
647
665
|
* checking if specific GPU types are available before sending requests.
|
|
648
666
|
*
|
|
649
667
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -666,7 +684,7 @@ declare class SIEClient {
|
|
|
666
684
|
/**
|
|
667
685
|
* Wait for GPU capacity to become available.
|
|
668
686
|
*
|
|
669
|
-
* Polls the
|
|
687
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
670
688
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
671
689
|
*
|
|
672
690
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -689,7 +707,15 @@ declare class SIEClient {
|
|
|
689
707
|
pollInterval?: number;
|
|
690
708
|
}): Promise<CapacityInfo>;
|
|
691
709
|
/**
|
|
692
|
-
* Make a msgpack HTTP request with retry logic
|
|
710
|
+
* Make a msgpack HTTP request with retry logic.
|
|
711
|
+
*
|
|
712
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
713
|
+
* - 202 Accepted (provisioning)
|
|
714
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
715
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
716
|
+
*
|
|
717
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
718
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
693
719
|
*/
|
|
694
720
|
private requestWithRetry;
|
|
695
721
|
/**
|
|
@@ -706,7 +732,7 @@ declare class SIEClient {
|
|
|
706
732
|
private detectEndpointType;
|
|
707
733
|
}
|
|
708
734
|
|
|
709
|
-
declare const SDK_VERSION = "0.
|
|
735
|
+
declare const SDK_VERSION = "0.3.0";
|
|
710
736
|
|
|
711
737
|
/**
|
|
712
738
|
* Helpers for converting SIE encode results to plain JavaScript types.
|
|
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
|
|
|
806
832
|
declare class SIEError extends Error {
|
|
807
833
|
constructor(message: string);
|
|
808
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* `SIEConnectionError` failure category. Only `"connect"` is auto-retried
|
|
837
|
+
* under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
|
|
838
|
+
*/
|
|
839
|
+
type SIEConnectionErrorKind = "connect" | "timeout" | "other";
|
|
809
840
|
/**
|
|
810
841
|
* Error connecting to the SIE server.
|
|
811
842
|
*
|
|
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
|
|
|
816
847
|
* - Server refuses connection
|
|
817
848
|
*/
|
|
818
849
|
declare class SIEConnectionError extends SIEError {
|
|
819
|
-
|
|
850
|
+
readonly kind: SIEConnectionErrorKind;
|
|
851
|
+
constructor(message: string, kind?: SIEConnectionErrorKind);
|
|
820
852
|
}
|
|
821
853
|
/**
|
|
822
854
|
* Error in the request (4xx responses).
|
package/dist/index.d.ts
CHANGED
|
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
|
|
|
15
15
|
* Output type options for encode operation.
|
|
16
16
|
*/
|
|
17
17
|
type OutputType = "dense" | "sparse" | "multivector";
|
|
18
|
+
/**
|
|
19
|
+
* Document input for composite-document extractors (PDF, DOCX, HTML, ...).
|
|
20
|
+
*
|
|
21
|
+
* The wire format is the document bytes plus an optional format hint. The
|
|
22
|
+
* hint is advisory — adapters may sniff the bytes when it is missing or
|
|
23
|
+
* unrecognized.
|
|
24
|
+
*/
|
|
25
|
+
interface DocumentInput {
|
|
26
|
+
/** Document bytes (raw file content) */
|
|
27
|
+
data: Uint8Array;
|
|
28
|
+
/** Document format hint: "pdf", "docx", "html", etc. */
|
|
29
|
+
format?: string;
|
|
30
|
+
}
|
|
18
31
|
/**
|
|
19
32
|
* A single item to encode, score, or extract from.
|
|
20
33
|
*
|
|
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
|
|
|
30
43
|
* // With images for multimodal models (ColPali, CLIP)
|
|
31
44
|
* { text: "Description", images: [imageBytes] }
|
|
32
45
|
*
|
|
46
|
+
* // With a document for composite-document extractors (Docling, ...)
|
|
47
|
+
* { document: { data: pdfBytes, format: "pdf" } }
|
|
48
|
+
*
|
|
33
49
|
* // Pre-encoded multivector (for use with maxsim utility)
|
|
34
50
|
* { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
|
|
35
51
|
*/
|
|
@@ -40,6 +56,8 @@ interface Item {
|
|
|
40
56
|
text?: string;
|
|
41
57
|
/** Images as byte arrays (JPEG/PNG) for multimodal models */
|
|
42
58
|
images?: Uint8Array[];
|
|
59
|
+
/** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
|
|
60
|
+
document?: DocumentInput;
|
|
43
61
|
/** Pre-encoded multivector (for use with maxsim utility) */
|
|
44
62
|
multivector?: Float32Array[];
|
|
45
63
|
/** Arbitrary metadata (passed through to results) */
|
|
@@ -98,7 +116,7 @@ interface ModelInfo {
|
|
|
98
116
|
name: string;
|
|
99
117
|
/** Whether the model is currently loaded in memory */
|
|
100
118
|
loaded: boolean;
|
|
101
|
-
/** Supported input types: ["text"], ["text", "image"], etc. */
|
|
119
|
+
/** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
|
|
102
120
|
inputs: string[];
|
|
103
121
|
/** Supported output types: ["dense"], ["dense", "sparse"], etc. */
|
|
104
122
|
outputs: string[];
|
|
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
|
|
|
448
466
|
*
|
|
449
467
|
* @example Resource pool usage
|
|
450
468
|
* ```typescript
|
|
451
|
-
* const client = new SIEClient("http://
|
|
469
|
+
* const client = new SIEClient("http://gateway:8080");
|
|
452
470
|
*
|
|
453
471
|
* // Create a dedicated pool
|
|
454
472
|
* await client.createPool("eval-bench", { l4: 2 });
|
|
@@ -523,9 +541,9 @@ declare class SIEClient {
|
|
|
523
541
|
*/
|
|
524
542
|
getModel(name: string): Promise<ModelInfo>;
|
|
525
543
|
/**
|
|
526
|
-
* Stream real-time status updates from a worker or
|
|
544
|
+
* Stream real-time status updates from a worker or gateway.
|
|
527
545
|
*
|
|
528
|
-
* @param mode - "cluster" uses
|
|
546
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
529
547
|
* "auto" detects the endpoint via /health.
|
|
530
548
|
*/
|
|
531
549
|
watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
|
|
@@ -576,7 +594,7 @@ declare class SIEClient {
|
|
|
576
594
|
* Close the client and cleanup resources.
|
|
577
595
|
*
|
|
578
596
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
579
|
-
* automatically - they are garbage collected by the
|
|
597
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
580
598
|
* This allows pool reuse if the client reconnects.
|
|
581
599
|
*/
|
|
582
600
|
close(): Promise<void>;
|
|
@@ -643,7 +661,7 @@ declare class SIEClient {
|
|
|
643
661
|
/**
|
|
644
662
|
* Get current cluster capacity information.
|
|
645
663
|
*
|
|
646
|
-
* Queries the
|
|
664
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
647
665
|
* checking if specific GPU types are available before sending requests.
|
|
648
666
|
*
|
|
649
667
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -666,7 +684,7 @@ declare class SIEClient {
|
|
|
666
684
|
/**
|
|
667
685
|
* Wait for GPU capacity to become available.
|
|
668
686
|
*
|
|
669
|
-
* Polls the
|
|
687
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
670
688
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
671
689
|
*
|
|
672
690
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -689,7 +707,15 @@ declare class SIEClient {
|
|
|
689
707
|
pollInterval?: number;
|
|
690
708
|
}): Promise<CapacityInfo>;
|
|
691
709
|
/**
|
|
692
|
-
* Make a msgpack HTTP request with retry logic
|
|
710
|
+
* Make a msgpack HTTP request with retry logic.
|
|
711
|
+
*
|
|
712
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
713
|
+
* - 202 Accepted (provisioning)
|
|
714
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
715
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
716
|
+
*
|
|
717
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
718
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
693
719
|
*/
|
|
694
720
|
private requestWithRetry;
|
|
695
721
|
/**
|
|
@@ -706,7 +732,7 @@ declare class SIEClient {
|
|
|
706
732
|
private detectEndpointType;
|
|
707
733
|
}
|
|
708
734
|
|
|
709
|
-
declare const SDK_VERSION = "0.
|
|
735
|
+
declare const SDK_VERSION = "0.3.0";
|
|
710
736
|
|
|
711
737
|
/**
|
|
712
738
|
* Helpers for converting SIE encode results to plain JavaScript types.
|
|
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
|
|
|
806
832
|
declare class SIEError extends Error {
|
|
807
833
|
constructor(message: string);
|
|
808
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* `SIEConnectionError` failure category. Only `"connect"` is auto-retried
|
|
837
|
+
* under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
|
|
838
|
+
*/
|
|
839
|
+
type SIEConnectionErrorKind = "connect" | "timeout" | "other";
|
|
809
840
|
/**
|
|
810
841
|
* Error connecting to the SIE server.
|
|
811
842
|
*
|
|
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
|
|
|
816
847
|
* - Server refuses connection
|
|
817
848
|
*/
|
|
818
849
|
declare class SIEConnectionError extends SIEError {
|
|
819
|
-
|
|
850
|
+
readonly kind: SIEConnectionErrorKind;
|
|
851
|
+
constructor(message: string, kind?: SIEConnectionErrorKind);
|
|
820
852
|
}
|
|
821
853
|
/**
|
|
822
854
|
* Error in the request (4xx responses).
|
package/dist/index.js
CHANGED
|
@@ -9,9 +9,11 @@ var SIEError = class extends Error {
|
|
|
9
9
|
}
|
|
10
10
|
};
|
|
11
11
|
var SIEConnectionError = class extends SIEError {
|
|
12
|
-
|
|
12
|
+
kind;
|
|
13
|
+
constructor(message, kind = "other") {
|
|
13
14
|
super(message);
|
|
14
15
|
this.name = "SIEConnectionError";
|
|
16
|
+
this.kind = kind;
|
|
15
17
|
}
|
|
16
18
|
};
|
|
17
19
|
var RequestError = class extends SIEError {
|
|
@@ -103,9 +105,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
|
|
|
103
105
|
var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
|
|
104
106
|
var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
|
|
105
107
|
var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
|
|
106
|
-
|
|
107
|
-
// src/version.ts
|
|
108
|
-
var SDK_VERSION = "0.2.0";
|
|
109
108
|
var EXT_TYPE_NUMPY = 78;
|
|
110
109
|
function parseDtype(dtype) {
|
|
111
110
|
const typeChar = dtype.slice(-2, -1);
|
|
@@ -474,6 +473,9 @@ function parseCapacityInfo(data, gpuFilter) {
|
|
|
474
473
|
};
|
|
475
474
|
}
|
|
476
475
|
|
|
476
|
+
// src/version.ts
|
|
477
|
+
var SDK_VERSION = "0.3.0";
|
|
478
|
+
|
|
477
479
|
// src/client.ts
|
|
478
480
|
function sleep(ms) {
|
|
479
481
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -614,9 +616,9 @@ var SIEClient = class {
|
|
|
614
616
|
};
|
|
615
617
|
}
|
|
616
618
|
/**
|
|
617
|
-
* Stream real-time status updates from a worker or
|
|
619
|
+
* Stream real-time status updates from a worker or gateway.
|
|
618
620
|
*
|
|
619
|
-
* @param mode - "cluster" uses
|
|
621
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
620
622
|
* "auto" detects the endpoint via /health.
|
|
621
623
|
*/
|
|
622
624
|
async *watch(mode = "auto") {
|
|
@@ -787,7 +789,7 @@ var SIEClient = class {
|
|
|
787
789
|
* Close the client and cleanup resources.
|
|
788
790
|
*
|
|
789
791
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
790
|
-
* automatically - they are garbage collected by the
|
|
792
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
791
793
|
* This allows pool reuse if the client reconnects.
|
|
792
794
|
*/
|
|
793
795
|
async close() {
|
|
@@ -1044,7 +1046,7 @@ var SIEClient = class {
|
|
|
1044
1046
|
/**
|
|
1045
1047
|
* Get current cluster capacity information.
|
|
1046
1048
|
*
|
|
1047
|
-
* Queries the
|
|
1049
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
1048
1050
|
* checking if specific GPU types are available before sending requests.
|
|
1049
1051
|
*
|
|
1050
1052
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -1066,10 +1068,10 @@ var SIEClient = class {
|
|
|
1066
1068
|
async getCapacity(gpu) {
|
|
1067
1069
|
const response = await this.requestJson("/health");
|
|
1068
1070
|
const data = await response.json();
|
|
1069
|
-
if (data.type !== "
|
|
1071
|
+
if (data.type !== "gateway") {
|
|
1070
1072
|
throw new RequestError(
|
|
1071
|
-
"getCapacity() requires a
|
|
1072
|
-
"
|
|
1073
|
+
"getCapacity() requires a gateway endpoint. This appears to be a worker.",
|
|
1074
|
+
"not_gateway",
|
|
1073
1075
|
400
|
|
1074
1076
|
);
|
|
1075
1077
|
}
|
|
@@ -1078,7 +1080,7 @@ var SIEClient = class {
|
|
|
1078
1080
|
/**
|
|
1079
1081
|
* Wait for GPU capacity to become available.
|
|
1080
1082
|
*
|
|
1081
|
-
* Polls the
|
|
1083
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
1082
1084
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
1083
1085
|
*
|
|
1084
1086
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -1124,13 +1126,35 @@ var SIEClient = class {
|
|
|
1124
1126
|
}
|
|
1125
1127
|
}
|
|
1126
1128
|
/**
|
|
1127
|
-
* Make a msgpack HTTP request with retry logic
|
|
1129
|
+
* Make a msgpack HTTP request with retry logic.
|
|
1130
|
+
*
|
|
1131
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
1132
|
+
* - 202 Accepted (provisioning)
|
|
1133
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
1134
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
1135
|
+
*
|
|
1136
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
1137
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
1128
1138
|
*/
|
|
1129
1139
|
async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
|
|
1130
1140
|
const startTime = Date.now();
|
|
1131
1141
|
let loraRetries = 0;
|
|
1132
1142
|
while (true) {
|
|
1133
|
-
|
|
1143
|
+
let response;
|
|
1144
|
+
try {
|
|
1145
|
+
response = await this.request(path, body, pool, gpu);
|
|
1146
|
+
} catch (err) {
|
|
1147
|
+
if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
|
|
1148
|
+
const elapsed = Date.now() - startTime;
|
|
1149
|
+
if (elapsed < this.provisionTimeout) {
|
|
1150
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1151
|
+
const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
|
|
1152
|
+
await sleep(delay);
|
|
1153
|
+
continue;
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
throw err;
|
|
1157
|
+
}
|
|
1134
1158
|
if (response.status === HTTP_ACCEPTED) {
|
|
1135
1159
|
const retryAfter = getRetryAfter2(response);
|
|
1136
1160
|
if (!waitForCapacity) {
|
|
@@ -1187,6 +1211,17 @@ var SIEClient = class {
|
|
|
1187
1211
|
await sleep(actualDelay);
|
|
1188
1212
|
continue;
|
|
1189
1213
|
}
|
|
1214
|
+
if (waitForCapacity) {
|
|
1215
|
+
const elapsed = Date.now() - startTime;
|
|
1216
|
+
if (elapsed < this.provisionTimeout) {
|
|
1217
|
+
const retryAfter = getRetryAfter2(response);
|
|
1218
|
+
const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
|
|
1219
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1220
|
+
const actualDelay = Math.min(delay, remaining);
|
|
1221
|
+
await sleep(actualDelay);
|
|
1222
|
+
continue;
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1190
1225
|
}
|
|
1191
1226
|
if (!response.ok) {
|
|
1192
1227
|
await handleError(response, gpu);
|
|
@@ -1228,10 +1263,10 @@ var SIEClient = class {
|
|
|
1228
1263
|
return response;
|
|
1229
1264
|
} catch (error) {
|
|
1230
1265
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1231
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1266
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1232
1267
|
}
|
|
1233
1268
|
if (error instanceof TypeError) {
|
|
1234
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1269
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1235
1270
|
}
|
|
1236
1271
|
throw error;
|
|
1237
1272
|
} finally {
|
|
@@ -1265,10 +1300,10 @@ var SIEClient = class {
|
|
|
1265
1300
|
return response;
|
|
1266
1301
|
} catch (error) {
|
|
1267
1302
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1268
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1303
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1269
1304
|
}
|
|
1270
1305
|
if (error instanceof TypeError) {
|
|
1271
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1306
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1272
1307
|
}
|
|
1273
1308
|
throw error;
|
|
1274
1309
|
} finally {
|
|
@@ -1316,7 +1351,7 @@ var SIEClient = class {
|
|
|
1316
1351
|
return "worker";
|
|
1317
1352
|
}
|
|
1318
1353
|
const data = await response.json();
|
|
1319
|
-
return data.type === "
|
|
1354
|
+
return data.type === "gateway" ? "cluster" : "worker";
|
|
1320
1355
|
} catch {
|
|
1321
1356
|
return "worker";
|
|
1322
1357
|
} finally {
|