@superlinked/sie-sdk 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +106 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +76 -11
- package/dist/index.d.ts +76 -11
- package/dist/index.js +106 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
|
|
|
15
15
|
* Output type options for encode operation.
|
|
16
16
|
*/
|
|
17
17
|
type OutputType = "dense" | "sparse" | "multivector";
|
|
18
|
+
/**
|
|
19
|
+
* Document input for composite-document extractors (PDF, DOCX, HTML, ...).
|
|
20
|
+
*
|
|
21
|
+
* The wire format is the document bytes plus an optional format hint. The
|
|
22
|
+
* hint is advisory — adapters may sniff the bytes when it is missing or
|
|
23
|
+
* unrecognized.
|
|
24
|
+
*/
|
|
25
|
+
interface DocumentInput {
|
|
26
|
+
/** Document bytes (raw file content) */
|
|
27
|
+
data: Uint8Array;
|
|
28
|
+
/** Document format hint: "pdf", "docx", "html", etc. */
|
|
29
|
+
format?: string;
|
|
30
|
+
}
|
|
18
31
|
/**
|
|
19
32
|
* A single item to encode, score, or extract from.
|
|
20
33
|
*
|
|
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
|
|
|
30
43
|
* // With images for multimodal models (ColPali, CLIP)
|
|
31
44
|
* { text: "Description", images: [imageBytes] }
|
|
32
45
|
*
|
|
46
|
+
* // With a document for composite-document extractors (Docling, ...)
|
|
47
|
+
* { document: { data: pdfBytes, format: "pdf" } }
|
|
48
|
+
*
|
|
33
49
|
* // Pre-encoded multivector (for use with maxsim utility)
|
|
34
50
|
* { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
|
|
35
51
|
*/
|
|
@@ -40,6 +56,8 @@ interface Item {
|
|
|
40
56
|
text?: string;
|
|
41
57
|
/** Images as byte arrays (JPEG/PNG) for multimodal models */
|
|
42
58
|
images?: Uint8Array[];
|
|
59
|
+
/** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
|
|
60
|
+
document?: DocumentInput;
|
|
43
61
|
/** Pre-encoded multivector (for use with maxsim utility) */
|
|
44
62
|
multivector?: Float32Array[];
|
|
45
63
|
/** Arbitrary metadata (passed through to results) */
|
|
@@ -98,7 +116,7 @@ interface ModelInfo {
|
|
|
98
116
|
name: string;
|
|
99
117
|
/** Whether the model is currently loaded in memory */
|
|
100
118
|
loaded: boolean;
|
|
101
|
-
/** Supported input types: ["text"], ["text", "image"], etc. */
|
|
119
|
+
/** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
|
|
102
120
|
inputs: string[];
|
|
103
121
|
/** Supported output types: ["dense"], ["dense", "sparse"], etc. */
|
|
104
122
|
outputs: string[];
|
|
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
|
|
|
448
466
|
*
|
|
449
467
|
* @example Resource pool usage
|
|
450
468
|
* ```typescript
|
|
451
|
-
* const client = new SIEClient("http://
|
|
469
|
+
* const client = new SIEClient("http://gateway:8080");
|
|
452
470
|
*
|
|
453
471
|
* // Create a dedicated pool
|
|
454
472
|
* await client.createPool("eval-bench", { l4: 2 });
|
|
@@ -523,9 +541,9 @@ declare class SIEClient {
|
|
|
523
541
|
*/
|
|
524
542
|
getModel(name: string): Promise<ModelInfo>;
|
|
525
543
|
/**
|
|
526
|
-
* Stream real-time status updates from a worker or
|
|
544
|
+
* Stream real-time status updates from a worker or gateway.
|
|
527
545
|
*
|
|
528
|
-
* @param mode - "cluster" uses
|
|
546
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
529
547
|
* "auto" detects the endpoint via /health.
|
|
530
548
|
*/
|
|
531
549
|
watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
|
|
@@ -576,7 +594,7 @@ declare class SIEClient {
|
|
|
576
594
|
* Close the client and cleanup resources.
|
|
577
595
|
*
|
|
578
596
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
579
|
-
* automatically - they are garbage collected by the
|
|
597
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
580
598
|
* This allows pool reuse if the client reconnects.
|
|
581
599
|
*/
|
|
582
600
|
close(): Promise<void>;
|
|
@@ -643,7 +661,7 @@ declare class SIEClient {
|
|
|
643
661
|
/**
|
|
644
662
|
* Get current cluster capacity information.
|
|
645
663
|
*
|
|
646
|
-
* Queries the
|
|
664
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
647
665
|
* checking if specific GPU types are available before sending requests.
|
|
648
666
|
*
|
|
649
667
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -666,7 +684,7 @@ declare class SIEClient {
|
|
|
666
684
|
/**
|
|
667
685
|
* Wait for GPU capacity to become available.
|
|
668
686
|
*
|
|
669
|
-
* Polls the
|
|
687
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
670
688
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
671
689
|
*
|
|
672
690
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -689,7 +707,15 @@ declare class SIEClient {
|
|
|
689
707
|
pollInterval?: number;
|
|
690
708
|
}): Promise<CapacityInfo>;
|
|
691
709
|
/**
|
|
692
|
-
* Make a msgpack HTTP request with retry logic
|
|
710
|
+
* Make a msgpack HTTP request with retry logic.
|
|
711
|
+
*
|
|
712
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
713
|
+
* - 202 Accepted (provisioning)
|
|
714
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
715
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
716
|
+
*
|
|
717
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
718
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
693
719
|
*/
|
|
694
720
|
private requestWithRetry;
|
|
695
721
|
/**
|
|
@@ -706,7 +732,7 @@ declare class SIEClient {
|
|
|
706
732
|
private detectEndpointType;
|
|
707
733
|
}
|
|
708
734
|
|
|
709
|
-
declare const SDK_VERSION = "0.
|
|
735
|
+
declare const SDK_VERSION = "0.3.1";
|
|
710
736
|
|
|
711
737
|
/**
|
|
712
738
|
* Helpers for converting SIE encode results to plain JavaScript types.
|
|
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
|
|
|
806
832
|
declare class SIEError extends Error {
|
|
807
833
|
constructor(message: string);
|
|
808
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* `SIEConnectionError` failure category. Only `"connect"` is auto-retried
|
|
837
|
+
* under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
|
|
838
|
+
*/
|
|
839
|
+
type SIEConnectionErrorKind = "connect" | "timeout" | "other";
|
|
809
840
|
/**
|
|
810
841
|
* Error connecting to the SIE server.
|
|
811
842
|
*
|
|
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
|
|
|
816
847
|
* - Server refuses connection
|
|
817
848
|
*/
|
|
818
849
|
declare class SIEConnectionError extends SIEError {
|
|
819
|
-
|
|
850
|
+
readonly kind: SIEConnectionErrorKind;
|
|
851
|
+
constructor(message: string, kind?: SIEConnectionErrorKind);
|
|
820
852
|
}
|
|
821
853
|
/**
|
|
822
854
|
* Error in the request (4xx responses).
|
|
@@ -916,6 +948,39 @@ declare class ModelLoadingError extends SIEError {
|
|
|
916
948
|
readonly model: string | undefined;
|
|
917
949
|
constructor(message: string, model?: string);
|
|
918
950
|
}
|
|
951
|
+
/**
|
|
952
|
+
* Error when the server reports a *terminal* model-load failure.
|
|
953
|
+
*
|
|
954
|
+
* Distinct from {@link ModelLoadingError} — this is thrown on the first
|
|
955
|
+
* response (no retry budget consumed) when the server returns HTTP
|
|
956
|
+
* `502 MODEL_LOAD_FAILED`. The server uses this code for permanent-class
|
|
957
|
+
* failures (gated repos, missing dependencies, unrecognised model
|
|
958
|
+
* architectures) where retrying would waste time. See sie-test#85.
|
|
959
|
+
*
|
|
960
|
+
* Permanent failures will not auto-clear; an operator must fix the
|
|
961
|
+
* underlying cause (e.g. set `HF_TOKEN`, accept the model license on
|
|
962
|
+
* HuggingFace, upgrade `transformers`).
|
|
963
|
+
*/
|
|
964
|
+
declare class ModelLoadFailedError extends ServerError {
|
|
965
|
+
/** The model that was requested */
|
|
966
|
+
readonly model: string | undefined;
|
|
967
|
+
/**
|
|
968
|
+
* Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
|
|
969
|
+
* `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
|
|
970
|
+
* remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
|
|
971
|
+
*/
|
|
972
|
+
readonly errorClass: string | undefined;
|
|
973
|
+
/** Whether the failure is non-retryable per server policy. */
|
|
974
|
+
readonly permanent: boolean;
|
|
975
|
+
/** How many load attempts the server has logged. */
|
|
976
|
+
readonly attempts: number;
|
|
977
|
+
constructor(message: string, options?: {
|
|
978
|
+
model?: string;
|
|
979
|
+
errorClass?: string;
|
|
980
|
+
permanent?: boolean;
|
|
981
|
+
attempts?: number;
|
|
982
|
+
});
|
|
983
|
+
}
|
|
919
984
|
|
|
920
985
|
/**
|
|
921
986
|
* MessagePack serialization with msgpack-numpy compatibility.
|
|
@@ -1021,4 +1086,4 @@ declare function toImageWireFormat(input: ImageInput, format?: "jpeg" | "png" |
|
|
|
1021
1086
|
*/
|
|
1022
1087
|
declare function detectImageFormat(bytes: Uint8Array): "jpeg" | "png" | "webp" | "unknown";
|
|
1023
1088
|
|
|
1024
|
-
export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
|
1089
|
+
export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadFailedError, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
package/dist/index.d.ts
CHANGED
|
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
|
|
|
15
15
|
* Output type options for encode operation.
|
|
16
16
|
*/
|
|
17
17
|
type OutputType = "dense" | "sparse" | "multivector";
|
|
18
|
+
/**
|
|
19
|
+
* Document input for composite-document extractors (PDF, DOCX, HTML, ...).
|
|
20
|
+
*
|
|
21
|
+
* The wire format is the document bytes plus an optional format hint. The
|
|
22
|
+
* hint is advisory — adapters may sniff the bytes when it is missing or
|
|
23
|
+
* unrecognized.
|
|
24
|
+
*/
|
|
25
|
+
interface DocumentInput {
|
|
26
|
+
/** Document bytes (raw file content) */
|
|
27
|
+
data: Uint8Array;
|
|
28
|
+
/** Document format hint: "pdf", "docx", "html", etc. */
|
|
29
|
+
format?: string;
|
|
30
|
+
}
|
|
18
31
|
/**
|
|
19
32
|
* A single item to encode, score, or extract from.
|
|
20
33
|
*
|
|
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
|
|
|
30
43
|
* // With images for multimodal models (ColPali, CLIP)
|
|
31
44
|
* { text: "Description", images: [imageBytes] }
|
|
32
45
|
*
|
|
46
|
+
* // With a document for composite-document extractors (Docling, ...)
|
|
47
|
+
* { document: { data: pdfBytes, format: "pdf" } }
|
|
48
|
+
*
|
|
33
49
|
* // Pre-encoded multivector (for use with maxsim utility)
|
|
34
50
|
* { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
|
|
35
51
|
*/
|
|
@@ -40,6 +56,8 @@ interface Item {
|
|
|
40
56
|
text?: string;
|
|
41
57
|
/** Images as byte arrays (JPEG/PNG) for multimodal models */
|
|
42
58
|
images?: Uint8Array[];
|
|
59
|
+
/** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
|
|
60
|
+
document?: DocumentInput;
|
|
43
61
|
/** Pre-encoded multivector (for use with maxsim utility) */
|
|
44
62
|
multivector?: Float32Array[];
|
|
45
63
|
/** Arbitrary metadata (passed through to results) */
|
|
@@ -98,7 +116,7 @@ interface ModelInfo {
|
|
|
98
116
|
name: string;
|
|
99
117
|
/** Whether the model is currently loaded in memory */
|
|
100
118
|
loaded: boolean;
|
|
101
|
-
/** Supported input types: ["text"], ["text", "image"], etc. */
|
|
119
|
+
/** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
|
|
102
120
|
inputs: string[];
|
|
103
121
|
/** Supported output types: ["dense"], ["dense", "sparse"], etc. */
|
|
104
122
|
outputs: string[];
|
|
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
|
|
|
448
466
|
*
|
|
449
467
|
* @example Resource pool usage
|
|
450
468
|
* ```typescript
|
|
451
|
-
* const client = new SIEClient("http://
|
|
469
|
+
* const client = new SIEClient("http://gateway:8080");
|
|
452
470
|
*
|
|
453
471
|
* // Create a dedicated pool
|
|
454
472
|
* await client.createPool("eval-bench", { l4: 2 });
|
|
@@ -523,9 +541,9 @@ declare class SIEClient {
|
|
|
523
541
|
*/
|
|
524
542
|
getModel(name: string): Promise<ModelInfo>;
|
|
525
543
|
/**
|
|
526
|
-
* Stream real-time status updates from a worker or
|
|
544
|
+
* Stream real-time status updates from a worker or gateway.
|
|
527
545
|
*
|
|
528
|
-
* @param mode - "cluster" uses
|
|
546
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
529
547
|
* "auto" detects the endpoint via /health.
|
|
530
548
|
*/
|
|
531
549
|
watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
|
|
@@ -576,7 +594,7 @@ declare class SIEClient {
|
|
|
576
594
|
* Close the client and cleanup resources.
|
|
577
595
|
*
|
|
578
596
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
579
|
-
* automatically - they are garbage collected by the
|
|
597
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
580
598
|
* This allows pool reuse if the client reconnects.
|
|
581
599
|
*/
|
|
582
600
|
close(): Promise<void>;
|
|
@@ -643,7 +661,7 @@ declare class SIEClient {
|
|
|
643
661
|
/**
|
|
644
662
|
* Get current cluster capacity information.
|
|
645
663
|
*
|
|
646
|
-
* Queries the
|
|
664
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
647
665
|
* checking if specific GPU types are available before sending requests.
|
|
648
666
|
*
|
|
649
667
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -666,7 +684,7 @@ declare class SIEClient {
|
|
|
666
684
|
/**
|
|
667
685
|
* Wait for GPU capacity to become available.
|
|
668
686
|
*
|
|
669
|
-
* Polls the
|
|
687
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
670
688
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
671
689
|
*
|
|
672
690
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -689,7 +707,15 @@ declare class SIEClient {
|
|
|
689
707
|
pollInterval?: number;
|
|
690
708
|
}): Promise<CapacityInfo>;
|
|
691
709
|
/**
|
|
692
|
-
* Make a msgpack HTTP request with retry logic
|
|
710
|
+
* Make a msgpack HTTP request with retry logic.
|
|
711
|
+
*
|
|
712
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
713
|
+
* - 202 Accepted (provisioning)
|
|
714
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
715
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
716
|
+
*
|
|
717
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
718
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
693
719
|
*/
|
|
694
720
|
private requestWithRetry;
|
|
695
721
|
/**
|
|
@@ -706,7 +732,7 @@ declare class SIEClient {
|
|
|
706
732
|
private detectEndpointType;
|
|
707
733
|
}
|
|
708
734
|
|
|
709
|
-
declare const SDK_VERSION = "0.
|
|
735
|
+
declare const SDK_VERSION = "0.3.1";
|
|
710
736
|
|
|
711
737
|
/**
|
|
712
738
|
* Helpers for converting SIE encode results to plain JavaScript types.
|
|
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
|
|
|
806
832
|
declare class SIEError extends Error {
|
|
807
833
|
constructor(message: string);
|
|
808
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* `SIEConnectionError` failure category. Only `"connect"` is auto-retried
|
|
837
|
+
* under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
|
|
838
|
+
*/
|
|
839
|
+
type SIEConnectionErrorKind = "connect" | "timeout" | "other";
|
|
809
840
|
/**
|
|
810
841
|
* Error connecting to the SIE server.
|
|
811
842
|
*
|
|
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
|
|
|
816
847
|
* - Server refuses connection
|
|
817
848
|
*/
|
|
818
849
|
declare class SIEConnectionError extends SIEError {
|
|
819
|
-
|
|
850
|
+
readonly kind: SIEConnectionErrorKind;
|
|
851
|
+
constructor(message: string, kind?: SIEConnectionErrorKind);
|
|
820
852
|
}
|
|
821
853
|
/**
|
|
822
854
|
* Error in the request (4xx responses).
|
|
@@ -916,6 +948,39 @@ declare class ModelLoadingError extends SIEError {
|
|
|
916
948
|
readonly model: string | undefined;
|
|
917
949
|
constructor(message: string, model?: string);
|
|
918
950
|
}
|
|
951
|
+
/**
|
|
952
|
+
* Error when the server reports a *terminal* model-load failure.
|
|
953
|
+
*
|
|
954
|
+
* Distinct from {@link ModelLoadingError} — this is thrown on the first
|
|
955
|
+
* response (no retry budget consumed) when the server returns HTTP
|
|
956
|
+
* `502 MODEL_LOAD_FAILED`. The server uses this code for permanent-class
|
|
957
|
+
* failures (gated repos, missing dependencies, unrecognised model
|
|
958
|
+
* architectures) where retrying would waste time. See sie-test#85.
|
|
959
|
+
*
|
|
960
|
+
* Permanent failures will not auto-clear; an operator must fix the
|
|
961
|
+
* underlying cause (e.g. set `HF_TOKEN`, accept the model license on
|
|
962
|
+
* HuggingFace, upgrade `transformers`).
|
|
963
|
+
*/
|
|
964
|
+
declare class ModelLoadFailedError extends ServerError {
|
|
965
|
+
/** The model that was requested */
|
|
966
|
+
readonly model: string | undefined;
|
|
967
|
+
/**
|
|
968
|
+
* Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
|
|
969
|
+
* `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
|
|
970
|
+
* remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
|
|
971
|
+
*/
|
|
972
|
+
readonly errorClass: string | undefined;
|
|
973
|
+
/** Whether the failure is non-retryable per server policy. */
|
|
974
|
+
readonly permanent: boolean;
|
|
975
|
+
/** How many load attempts the server has logged. */
|
|
976
|
+
readonly attempts: number;
|
|
977
|
+
constructor(message: string, options?: {
|
|
978
|
+
model?: string;
|
|
979
|
+
errorClass?: string;
|
|
980
|
+
permanent?: boolean;
|
|
981
|
+
attempts?: number;
|
|
982
|
+
});
|
|
983
|
+
}
|
|
919
984
|
|
|
920
985
|
/**
|
|
921
986
|
* MessagePack serialization with msgpack-numpy compatibility.
|
|
@@ -1021,4 +1086,4 @@ declare function toImageWireFormat(input: ImageInput, format?: "jpeg" | "png" |
|
|
|
1021
1086
|
*/
|
|
1022
1087
|
declare function detectImageFormat(bytes: Uint8Array): "jpeg" | "png" | "webp" | "unknown";
|
|
1023
1088
|
|
|
1024
|
-
export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
|
1089
|
+
export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadFailedError, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
package/dist/index.js
CHANGED
|
@@ -9,9 +9,11 @@ var SIEError = class extends Error {
|
|
|
9
9
|
}
|
|
10
10
|
};
|
|
11
11
|
var SIEConnectionError = class extends SIEError {
|
|
12
|
-
|
|
12
|
+
kind;
|
|
13
|
+
constructor(message, kind = "other") {
|
|
13
14
|
super(message);
|
|
14
15
|
this.name = "SIEConnectionError";
|
|
16
|
+
this.kind = kind;
|
|
15
17
|
}
|
|
16
18
|
};
|
|
17
19
|
var RequestError = class extends SIEError {
|
|
@@ -83,6 +85,28 @@ var ModelLoadingError = class extends SIEError {
|
|
|
83
85
|
this.model = model;
|
|
84
86
|
}
|
|
85
87
|
};
|
|
88
|
+
var ModelLoadFailedError = class extends ServerError {
|
|
89
|
+
/** The model that was requested */
|
|
90
|
+
model;
|
|
91
|
+
/**
|
|
92
|
+
* Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
|
|
93
|
+
* `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
|
|
94
|
+
* remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
|
|
95
|
+
*/
|
|
96
|
+
errorClass;
|
|
97
|
+
/** Whether the failure is non-retryable per server policy. */
|
|
98
|
+
permanent;
|
|
99
|
+
/** How many load attempts the server has logged. */
|
|
100
|
+
attempts;
|
|
101
|
+
constructor(message, options) {
|
|
102
|
+
super(message, "MODEL_LOAD_FAILED", 502);
|
|
103
|
+
this.name = "ModelLoadFailedError";
|
|
104
|
+
this.model = options?.model;
|
|
105
|
+
this.errorClass = options?.errorClass;
|
|
106
|
+
this.permanent = options?.permanent ?? true;
|
|
107
|
+
this.attempts = options?.attempts ?? 1;
|
|
108
|
+
}
|
|
109
|
+
};
|
|
86
110
|
|
|
87
111
|
// src/internal/constants.ts
|
|
88
112
|
var MSGPACK_CONTENT_TYPE = "application/msgpack";
|
|
@@ -103,9 +127,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
|
|
|
103
127
|
var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
|
|
104
128
|
var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
|
|
105
129
|
var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
|
|
106
|
-
|
|
107
|
-
// src/version.ts
|
|
108
|
-
var SDK_VERSION = "0.2.0";
|
|
109
130
|
var EXT_TYPE_NUMPY = 78;
|
|
110
131
|
function parseDtype(dtype) {
|
|
111
132
|
const typeChar = dtype.slice(-2, -1);
|
|
@@ -310,7 +331,7 @@ function getRetryAfter2(response) {
|
|
|
310
331
|
const header = response.headers.get("Retry-After");
|
|
311
332
|
return getRetryAfter(header);
|
|
312
333
|
}
|
|
313
|
-
async function
|
|
334
|
+
async function getErrorDetail(response) {
|
|
314
335
|
try {
|
|
315
336
|
const contentType = response.headers.get("content-type") ?? "";
|
|
316
337
|
let data;
|
|
@@ -321,24 +342,42 @@ async function getErrorCode(response) {
|
|
|
321
342
|
data = await response.json();
|
|
322
343
|
}
|
|
323
344
|
if (data.error && typeof data.error === "object") {
|
|
324
|
-
|
|
325
|
-
if (typeof error.code === "string") {
|
|
326
|
-
return error.code;
|
|
327
|
-
}
|
|
345
|
+
return data.error;
|
|
328
346
|
}
|
|
329
347
|
if (data.detail && typeof data.detail === "object") {
|
|
330
|
-
|
|
331
|
-
if (typeof detail.code === "string") {
|
|
332
|
-
return detail.code;
|
|
333
|
-
}
|
|
348
|
+
return data.detail;
|
|
334
349
|
}
|
|
335
350
|
if (typeof data.code === "string") {
|
|
336
|
-
return data
|
|
351
|
+
return data;
|
|
337
352
|
}
|
|
338
353
|
} catch {
|
|
339
354
|
}
|
|
340
355
|
return void 0;
|
|
341
356
|
}
|
|
357
|
+
async function getErrorCode(response) {
|
|
358
|
+
const detail = await getErrorDetail(response);
|
|
359
|
+
if (!detail) return void 0;
|
|
360
|
+
const code = detail.code;
|
|
361
|
+
return typeof code === "string" ? code : void 0;
|
|
362
|
+
}
|
|
363
|
+
async function throwIfModelLoadFailed(response, model) {
|
|
364
|
+
if (response.status !== 502) return;
|
|
365
|
+
const detail = await getErrorDetail(response.clone());
|
|
366
|
+
if (!detail) return;
|
|
367
|
+
if (detail.code !== "MODEL_LOAD_FAILED") return;
|
|
368
|
+
const errorClass = typeof detail.error_class === "string" ? detail.error_class : void 0;
|
|
369
|
+
const permanent = typeof detail.permanent === "boolean" ? detail.permanent : true;
|
|
370
|
+
const attemptsRaw = detail.attempts;
|
|
371
|
+
const parsedAttempts = typeof attemptsRaw === "number" ? attemptsRaw : typeof attemptsRaw === "string" ? Number.parseInt(attemptsRaw, 10) : Number.NaN;
|
|
372
|
+
const attempts = Number.isFinite(parsedAttempts) ? parsedAttempts : 1;
|
|
373
|
+
const message = typeof detail.message === "string" ? detail.message : `Model '${model ?? "?"}' failed to load`;
|
|
374
|
+
throw new ModelLoadFailedError(message, {
|
|
375
|
+
model,
|
|
376
|
+
errorClass,
|
|
377
|
+
permanent,
|
|
378
|
+
attempts
|
|
379
|
+
});
|
|
380
|
+
}
|
|
342
381
|
async function handleError(response, gpu) {
|
|
343
382
|
const { status } = response;
|
|
344
383
|
let errorBody = {};
|
|
@@ -474,6 +513,9 @@ function parseCapacityInfo(data, gpuFilter) {
|
|
|
474
513
|
};
|
|
475
514
|
}
|
|
476
515
|
|
|
516
|
+
// src/version.ts
|
|
517
|
+
var SDK_VERSION = "0.3.1";
|
|
518
|
+
|
|
477
519
|
// src/client.ts
|
|
478
520
|
function sleep(ms) {
|
|
479
521
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -614,9 +656,9 @@ var SIEClient = class {
|
|
|
614
656
|
};
|
|
615
657
|
}
|
|
616
658
|
/**
|
|
617
|
-
* Stream real-time status updates from a worker or
|
|
659
|
+
* Stream real-time status updates from a worker or gateway.
|
|
618
660
|
*
|
|
619
|
-
* @param mode - "cluster" uses
|
|
661
|
+
* @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
|
|
620
662
|
* "auto" detects the endpoint via /health.
|
|
621
663
|
*/
|
|
622
664
|
async *watch(mode = "auto") {
|
|
@@ -787,7 +829,7 @@ var SIEClient = class {
|
|
|
787
829
|
* Close the client and cleanup resources.
|
|
788
830
|
*
|
|
789
831
|
* Stops pool lease renewal timers. Note that pools are not deleted
|
|
790
|
-
* automatically - they are garbage collected by the
|
|
832
|
+
* automatically - they are garbage collected by the gateway after inactivity.
|
|
791
833
|
* This allows pool reuse if the client reconnects.
|
|
792
834
|
*/
|
|
793
835
|
async close() {
|
|
@@ -1044,7 +1086,7 @@ var SIEClient = class {
|
|
|
1044
1086
|
/**
|
|
1045
1087
|
* Get current cluster capacity information.
|
|
1046
1088
|
*
|
|
1047
|
-
* Queries the
|
|
1089
|
+
* Queries the gateway's /health endpoint for cluster state. Useful for
|
|
1048
1090
|
* checking if specific GPU types are available before sending requests.
|
|
1049
1091
|
*
|
|
1050
1092
|
* @param gpu - Optional filter to check specific GPU type availability
|
|
@@ -1066,10 +1108,10 @@ var SIEClient = class {
|
|
|
1066
1108
|
async getCapacity(gpu) {
|
|
1067
1109
|
const response = await this.requestJson("/health");
|
|
1068
1110
|
const data = await response.json();
|
|
1069
|
-
if (data.type !== "
|
|
1111
|
+
if (data.type !== "gateway") {
|
|
1070
1112
|
throw new RequestError(
|
|
1071
|
-
"getCapacity() requires a
|
|
1072
|
-
"
|
|
1113
|
+
"getCapacity() requires a gateway endpoint. This appears to be a worker.",
|
|
1114
|
+
"not_gateway",
|
|
1073
1115
|
400
|
|
1074
1116
|
);
|
|
1075
1117
|
}
|
|
@@ -1078,7 +1120,7 @@ var SIEClient = class {
|
|
|
1078
1120
|
/**
|
|
1079
1121
|
* Wait for GPU capacity to become available.
|
|
1080
1122
|
*
|
|
1081
|
-
* Polls the
|
|
1123
|
+
* Polls the gateway until workers with the specified GPU type are online.
|
|
1082
1124
|
* This is useful for pre-warming the cluster before running benchmarks.
|
|
1083
1125
|
*
|
|
1084
1126
|
* @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
|
|
@@ -1124,13 +1166,35 @@ var SIEClient = class {
|
|
|
1124
1166
|
}
|
|
1125
1167
|
}
|
|
1126
1168
|
/**
|
|
1127
|
-
* Make a msgpack HTTP request with retry logic
|
|
1169
|
+
* Make a msgpack HTTP request with retry logic.
|
|
1170
|
+
*
|
|
1171
|
+
* Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
|
|
1172
|
+
* - 202 Accepted (provisioning)
|
|
1173
|
+
* - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
|
|
1174
|
+
* - `SIEConnectionError` with `kind === "connect"` (issue #95)
|
|
1175
|
+
*
|
|
1176
|
+
* `kind === "timeout"` is NOT retried — would extend the user-visible
|
|
1177
|
+
* timeout from `timeout` to `provisionTimeout`.
|
|
1128
1178
|
*/
|
|
1129
1179
|
async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
|
|
1130
1180
|
const startTime = Date.now();
|
|
1131
1181
|
let loraRetries = 0;
|
|
1132
1182
|
while (true) {
|
|
1133
|
-
|
|
1183
|
+
let response;
|
|
1184
|
+
try {
|
|
1185
|
+
response = await this.request(path, body, pool, gpu);
|
|
1186
|
+
} catch (err) {
|
|
1187
|
+
if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
|
|
1188
|
+
const elapsed = Date.now() - startTime;
|
|
1189
|
+
if (elapsed < this.provisionTimeout) {
|
|
1190
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1191
|
+
const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
|
|
1192
|
+
await sleep(delay);
|
|
1193
|
+
continue;
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
throw err;
|
|
1197
|
+
}
|
|
1134
1198
|
if (response.status === HTTP_ACCEPTED) {
|
|
1135
1199
|
const retryAfter = getRetryAfter2(response);
|
|
1136
1200
|
if (!waitForCapacity) {
|
|
@@ -1154,6 +1218,7 @@ var SIEClient = class {
|
|
|
1154
1218
|
await sleep(actualDelay);
|
|
1155
1219
|
continue;
|
|
1156
1220
|
}
|
|
1221
|
+
await throwIfModelLoadFailed(response, model);
|
|
1157
1222
|
if (response.status === 503) {
|
|
1158
1223
|
const clonedResponse = response.clone();
|
|
1159
1224
|
const errorCode = await getErrorCode(clonedResponse);
|
|
@@ -1187,6 +1252,17 @@ var SIEClient = class {
|
|
|
1187
1252
|
await sleep(actualDelay);
|
|
1188
1253
|
continue;
|
|
1189
1254
|
}
|
|
1255
|
+
if (waitForCapacity) {
|
|
1256
|
+
const elapsed = Date.now() - startTime;
|
|
1257
|
+
if (elapsed < this.provisionTimeout) {
|
|
1258
|
+
const retryAfter = getRetryAfter2(response);
|
|
1259
|
+
const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
|
|
1260
|
+
const remaining = this.provisionTimeout - elapsed;
|
|
1261
|
+
const actualDelay = Math.min(delay, remaining);
|
|
1262
|
+
await sleep(actualDelay);
|
|
1263
|
+
continue;
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1190
1266
|
}
|
|
1191
1267
|
if (!response.ok) {
|
|
1192
1268
|
await handleError(response, gpu);
|
|
@@ -1228,10 +1304,10 @@ var SIEClient = class {
|
|
|
1228
1304
|
return response;
|
|
1229
1305
|
} catch (error) {
|
|
1230
1306
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1231
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1307
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1232
1308
|
}
|
|
1233
1309
|
if (error instanceof TypeError) {
|
|
1234
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1310
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1235
1311
|
}
|
|
1236
1312
|
throw error;
|
|
1237
1313
|
} finally {
|
|
@@ -1265,10 +1341,10 @@ var SIEClient = class {
|
|
|
1265
1341
|
return response;
|
|
1266
1342
|
} catch (error) {
|
|
1267
1343
|
if (error instanceof Error && error.name === "AbortError") {
|
|
1268
|
-
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms
|
|
1344
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1269
1345
|
}
|
|
1270
1346
|
if (error instanceof TypeError) {
|
|
1271
|
-
throw new SIEConnectionError(`Connection failed: ${error.message}
|
|
1347
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1272
1348
|
}
|
|
1273
1349
|
throw error;
|
|
1274
1350
|
} finally {
|
|
@@ -1316,7 +1392,7 @@ var SIEClient = class {
|
|
|
1316
1392
|
return "worker";
|
|
1317
1393
|
}
|
|
1318
1394
|
const data = await response.json();
|
|
1319
|
-
return data.type === "
|
|
1395
|
+
return data.type === "gateway" ? "cluster" : "worker";
|
|
1320
1396
|
} catch {
|
|
1321
1397
|
return "worker";
|
|
1322
1398
|
} finally {
|
|
@@ -1464,6 +1540,6 @@ function detectImageFormat(bytes) {
|
|
|
1464
1540
|
return "unknown";
|
|
1465
1541
|
}
|
|
1466
1542
|
|
|
1467
|
-
export { LoraLoadingError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
|
1543
|
+
export { LoraLoadingError, ModelLoadFailedError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
|
1468
1544
|
//# sourceMappingURL=index.js.map
|
|
1469
1545
|
//# sourceMappingURL=index.js.map
|