@superlinked/sie-sdk 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
15
15
  * Output type options for encode operation.
16
16
  */
17
17
  type OutputType = "dense" | "sparse" | "multivector";
18
+ /**
19
+ * Document input for composite-document extractors (PDF, DOCX, HTML, ...).
20
+ *
21
+ * The wire format is the document bytes plus an optional format hint. The
22
+ * hint is advisory — adapters may sniff the bytes when it is missing or
23
+ * unrecognized.
24
+ */
25
+ interface DocumentInput {
26
+ /** Document bytes (raw file content) */
27
+ data: Uint8Array;
28
+ /** Document format hint: "pdf", "docx", "html", etc. */
29
+ format?: string;
30
+ }
18
31
  /**
19
32
  * A single item to encode, score, or extract from.
20
33
  *
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
30
43
  * // With images for multimodal models (ColPali, CLIP)
31
44
  * { text: "Description", images: [imageBytes] }
32
45
  *
46
+ * // With a document for composite-document extractors (Docling, ...)
47
+ * { document: { data: pdfBytes, format: "pdf" } }
48
+ *
33
49
  * // Pre-encoded multivector (for use with maxsim utility)
34
50
  * { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
35
51
  */
@@ -40,6 +56,8 @@ interface Item {
40
56
  text?: string;
41
57
  /** Images as byte arrays (JPEG/PNG) for multimodal models */
42
58
  images?: Uint8Array[];
59
+ /** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
60
+ document?: DocumentInput;
43
61
  /** Pre-encoded multivector (for use with maxsim utility) */
44
62
  multivector?: Float32Array[];
45
63
  /** Arbitrary metadata (passed through to results) */
@@ -98,7 +116,7 @@ interface ModelInfo {
98
116
  name: string;
99
117
  /** Whether the model is currently loaded in memory */
100
118
  loaded: boolean;
101
- /** Supported input types: ["text"], ["text", "image"], etc. */
119
+ /** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
102
120
  inputs: string[];
103
121
  /** Supported output types: ["dense"], ["dense", "sparse"], etc. */
104
122
  outputs: string[];
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
448
466
  *
449
467
  * @example Resource pool usage
450
468
  * ```typescript
451
- * const client = new SIEClient("http://router:8080");
469
+ * const client = new SIEClient("http://gateway:8080");
452
470
  *
453
471
  * // Create a dedicated pool
454
472
  * await client.createPool("eval-bench", { l4: 2 });
@@ -523,9 +541,9 @@ declare class SIEClient {
523
541
  */
524
542
  getModel(name: string): Promise<ModelInfo>;
525
543
  /**
526
- * Stream real-time status updates from a worker or router.
544
+ * Stream real-time status updates from a worker or gateway.
527
545
  *
528
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
546
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
529
547
  * "auto" detects the endpoint via /health.
530
548
  */
531
549
  watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
@@ -576,7 +594,7 @@ declare class SIEClient {
576
594
  * Close the client and cleanup resources.
577
595
  *
578
596
  * Stops pool lease renewal timers. Note that pools are not deleted
579
- * automatically - they are garbage collected by the router after inactivity.
597
+ * automatically - they are garbage collected by the gateway after inactivity.
580
598
  * This allows pool reuse if the client reconnects.
581
599
  */
582
600
  close(): Promise<void>;
@@ -643,7 +661,7 @@ declare class SIEClient {
643
661
  /**
644
662
  * Get current cluster capacity information.
645
663
  *
646
- * Queries the router's /health endpoint for cluster state. Useful for
664
+ * Queries the gateway's /health endpoint for cluster state. Useful for
647
665
  * checking if specific GPU types are available before sending requests.
648
666
  *
649
667
  * @param gpu - Optional filter to check specific GPU type availability
@@ -666,7 +684,7 @@ declare class SIEClient {
666
684
  /**
667
685
  * Wait for GPU capacity to become available.
668
686
  *
669
- * Polls the router until workers with the specified GPU type are online.
687
+ * Polls the gateway until workers with the specified GPU type are online.
670
688
  * This is useful for pre-warming the cluster before running benchmarks.
671
689
  *
672
690
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -689,7 +707,15 @@ declare class SIEClient {
689
707
  pollInterval?: number;
690
708
  }): Promise<CapacityInfo>;
691
709
  /**
692
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
710
+ * Make a msgpack HTTP request with retry logic.
711
+ *
712
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
713
+ * - 202 Accepted (provisioning)
714
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
715
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
716
+ *
717
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
718
+ * timeout from `timeout` to `provisionTimeout`.
693
719
  */
694
720
  private requestWithRetry;
695
721
  /**
@@ -706,7 +732,7 @@ declare class SIEClient {
706
732
  private detectEndpointType;
707
733
  }
708
734
 
709
- declare const SDK_VERSION = "0.2.0";
735
+ declare const SDK_VERSION = "0.3.1";
710
736
 
711
737
  /**
712
738
  * Helpers for converting SIE encode results to plain JavaScript types.
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
806
832
  declare class SIEError extends Error {
807
833
  constructor(message: string);
808
834
  }
835
+ /**
836
+ * `SIEConnectionError` failure category. Only `"connect"` is auto-retried
837
+ * under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
838
+ */
839
+ type SIEConnectionErrorKind = "connect" | "timeout" | "other";
809
840
  /**
810
841
  * Error connecting to the SIE server.
811
842
  *
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
816
847
  * - Server refuses connection
817
848
  */
818
849
  declare class SIEConnectionError extends SIEError {
819
- constructor(message: string);
850
+ readonly kind: SIEConnectionErrorKind;
851
+ constructor(message: string, kind?: SIEConnectionErrorKind);
820
852
  }
821
853
  /**
822
854
  * Error in the request (4xx responses).
@@ -916,6 +948,39 @@ declare class ModelLoadingError extends SIEError {
916
948
  readonly model: string | undefined;
917
949
  constructor(message: string, model?: string);
918
950
  }
951
+ /**
952
+ * Error when the server reports a *terminal* model-load failure.
953
+ *
954
+ * Distinct from {@link ModelLoadingError} — this is thrown on the first
955
+ * response (no retry budget consumed) when the server returns HTTP
956
+ * `502 MODEL_LOAD_FAILED`. The server uses this code for permanent-class
957
+ * failures (gated repos, missing dependencies, unrecognised model
958
+ * architectures) where retrying would waste time. See sie-test#85.
959
+ *
960
+ * Permanent failures will not auto-clear; an operator must fix the
961
+ * underlying cause (e.g. set `HF_TOKEN`, accept the model license on
962
+ * HuggingFace, upgrade `transformers`).
963
+ */
964
+ declare class ModelLoadFailedError extends ServerError {
965
+ /** The model that was requested */
966
+ readonly model: string | undefined;
967
+ /**
968
+ * Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
969
+ * `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
970
+ * remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
971
+ */
972
+ readonly errorClass: string | undefined;
973
+ /** Whether the failure is non-retryable per server policy. */
974
+ readonly permanent: boolean;
975
+ /** How many load attempts the server has logged. */
976
+ readonly attempts: number;
977
+ constructor(message: string, options?: {
978
+ model?: string;
979
+ errorClass?: string;
980
+ permanent?: boolean;
981
+ attempts?: number;
982
+ });
983
+ }
919
984
 
920
985
  /**
921
986
  * MessagePack serialization with msgpack-numpy compatibility.
@@ -1021,4 +1086,4 @@ declare function toImageWireFormat(input: ImageInput, format?: "jpeg" | "png" |
1021
1086
  */
1022
1087
  declare function detectImageFormat(bytes: Uint8Array): "jpeg" | "png" | "webp" | "unknown";
1023
1088
 
1024
- export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
1089
+ export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadFailedError, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
package/dist/index.d.ts CHANGED
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
15
15
  * Output type options for encode operation.
16
16
  */
17
17
  type OutputType = "dense" | "sparse" | "multivector";
18
+ /**
19
+ * Document input for composite-document extractors (PDF, DOCX, HTML, ...).
20
+ *
21
+ * The wire format is the document bytes plus an optional format hint. The
22
+ * hint is advisory — adapters may sniff the bytes when it is missing or
23
+ * unrecognized.
24
+ */
25
+ interface DocumentInput {
26
+ /** Document bytes (raw file content) */
27
+ data: Uint8Array;
28
+ /** Document format hint: "pdf", "docx", "html", etc. */
29
+ format?: string;
30
+ }
18
31
  /**
19
32
  * A single item to encode, score, or extract from.
20
33
  *
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
30
43
  * // With images for multimodal models (ColPali, CLIP)
31
44
  * { text: "Description", images: [imageBytes] }
32
45
  *
46
+ * // With a document for composite-document extractors (Docling, ...)
47
+ * { document: { data: pdfBytes, format: "pdf" } }
48
+ *
33
49
  * // Pre-encoded multivector (for use with maxsim utility)
34
50
  * { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
35
51
  */
@@ -40,6 +56,8 @@ interface Item {
40
56
  text?: string;
41
57
  /** Images as byte arrays (JPEG/PNG) for multimodal models */
42
58
  images?: Uint8Array[];
59
+ /** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
60
+ document?: DocumentInput;
43
61
  /** Pre-encoded multivector (for use with maxsim utility) */
44
62
  multivector?: Float32Array[];
45
63
  /** Arbitrary metadata (passed through to results) */
@@ -98,7 +116,7 @@ interface ModelInfo {
98
116
  name: string;
99
117
  /** Whether the model is currently loaded in memory */
100
118
  loaded: boolean;
101
- /** Supported input types: ["text"], ["text", "image"], etc. */
119
+ /** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
102
120
  inputs: string[];
103
121
  /** Supported output types: ["dense"], ["dense", "sparse"], etc. */
104
122
  outputs: string[];
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
448
466
  *
449
467
  * @example Resource pool usage
450
468
  * ```typescript
451
- * const client = new SIEClient("http://router:8080");
469
+ * const client = new SIEClient("http://gateway:8080");
452
470
  *
453
471
  * // Create a dedicated pool
454
472
  * await client.createPool("eval-bench", { l4: 2 });
@@ -523,9 +541,9 @@ declare class SIEClient {
523
541
  */
524
542
  getModel(name: string): Promise<ModelInfo>;
525
543
  /**
526
- * Stream real-time status updates from a worker or router.
544
+ * Stream real-time status updates from a worker or gateway.
527
545
  *
528
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
546
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
529
547
  * "auto" detects the endpoint via /health.
530
548
  */
531
549
  watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
@@ -576,7 +594,7 @@ declare class SIEClient {
576
594
  * Close the client and cleanup resources.
577
595
  *
578
596
  * Stops pool lease renewal timers. Note that pools are not deleted
579
- * automatically - they are garbage collected by the router after inactivity.
597
+ * automatically - they are garbage collected by the gateway after inactivity.
580
598
  * This allows pool reuse if the client reconnects.
581
599
  */
582
600
  close(): Promise<void>;
@@ -643,7 +661,7 @@ declare class SIEClient {
643
661
  /**
644
662
  * Get current cluster capacity information.
645
663
  *
646
- * Queries the router's /health endpoint for cluster state. Useful for
664
+ * Queries the gateway's /health endpoint for cluster state. Useful for
647
665
  * checking if specific GPU types are available before sending requests.
648
666
  *
649
667
  * @param gpu - Optional filter to check specific GPU type availability
@@ -666,7 +684,7 @@ declare class SIEClient {
666
684
  /**
667
685
  * Wait for GPU capacity to become available.
668
686
  *
669
- * Polls the router until workers with the specified GPU type are online.
687
+ * Polls the gateway until workers with the specified GPU type are online.
670
688
  * This is useful for pre-warming the cluster before running benchmarks.
671
689
  *
672
690
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -689,7 +707,15 @@ declare class SIEClient {
689
707
  pollInterval?: number;
690
708
  }): Promise<CapacityInfo>;
691
709
  /**
692
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
710
+ * Make a msgpack HTTP request with retry logic.
711
+ *
712
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
713
+ * - 202 Accepted (provisioning)
714
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
715
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
716
+ *
717
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
718
+ * timeout from `timeout` to `provisionTimeout`.
693
719
  */
694
720
  private requestWithRetry;
695
721
  /**
@@ -706,7 +732,7 @@ declare class SIEClient {
706
732
  private detectEndpointType;
707
733
  }
708
734
 
709
- declare const SDK_VERSION = "0.2.0";
735
+ declare const SDK_VERSION = "0.3.1";
710
736
 
711
737
  /**
712
738
  * Helpers for converting SIE encode results to plain JavaScript types.
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
806
832
  declare class SIEError extends Error {
807
833
  constructor(message: string);
808
834
  }
835
+ /**
836
+ * `SIEConnectionError` failure category. Only `"connect"` is auto-retried
837
+ * under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
838
+ */
839
+ type SIEConnectionErrorKind = "connect" | "timeout" | "other";
809
840
  /**
810
841
  * Error connecting to the SIE server.
811
842
  *
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
816
847
  * - Server refuses connection
817
848
  */
818
849
  declare class SIEConnectionError extends SIEError {
819
- constructor(message: string);
850
+ readonly kind: SIEConnectionErrorKind;
851
+ constructor(message: string, kind?: SIEConnectionErrorKind);
820
852
  }
821
853
  /**
822
854
  * Error in the request (4xx responses).
@@ -916,6 +948,39 @@ declare class ModelLoadingError extends SIEError {
916
948
  readonly model: string | undefined;
917
949
  constructor(message: string, model?: string);
918
950
  }
951
+ /**
952
+ * Error when the server reports a *terminal* model-load failure.
953
+ *
954
+ * Distinct from {@link ModelLoadingError} — this is thrown on the first
955
+ * response (no retry budget consumed) when the server returns HTTP
956
+ * `502 MODEL_LOAD_FAILED`. The server uses this code for permanent-class
957
+ * failures (gated repos, missing dependencies, unrecognised model
958
+ * architectures) where retrying would waste time. See sie-test#85.
959
+ *
960
+ * Permanent failures will not auto-clear; an operator must fix the
961
+ * underlying cause (e.g. set `HF_TOKEN`, accept the model license on
962
+ * HuggingFace, upgrade `transformers`).
963
+ */
964
+ declare class ModelLoadFailedError extends ServerError {
965
+ /** The model that was requested */
966
+ readonly model: string | undefined;
967
+ /**
968
+ * Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
969
+ * `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
970
+ * remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
971
+ */
972
+ readonly errorClass: string | undefined;
973
+ /** Whether the failure is non-retryable per server policy. */
974
+ readonly permanent: boolean;
975
+ /** How many load attempts the server has logged. */
976
+ readonly attempts: number;
977
+ constructor(message: string, options?: {
978
+ model?: string;
979
+ errorClass?: string;
980
+ permanent?: boolean;
981
+ attempts?: number;
982
+ });
983
+ }
919
984
 
920
985
  /**
921
986
  * MessagePack serialization with msgpack-numpy compatibility.
@@ -1021,4 +1086,4 @@ declare function toImageWireFormat(input: ImageInput, format?: "jpeg" | "png" |
1021
1086
  */
1022
1087
  declare function detectImageFormat(bytes: Uint8Array): "jpeg" | "png" | "webp" | "unknown";
1023
1088
 
1024
- export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
1089
+ export { type CapacityInfo, type Classification, type ClusterStatusMessage, type ClusterSummary, type ClusterWorkerInfo, type DType, type DetectedObject, type EncodeOptions, type EncodeResult, type Entity, type ExtractOptions, type ExtractResult, type GPUMetrics, type ImageInput, type ImageWireFormat, type Item, LoraLoadingError, type ModelConfig, type ModelDims, type ModelInfo, ModelLoadFailedError, ModelLoadingError, type ModelState, type ModelStatus, type ModelSummary, type OutputType, PoolError, type PoolInfo, type PoolSpec, type PoolStatus, ProvisioningError, type Relation, RequestError, SDK_VERSION, SIEClient, type SIEClientOptions, SIEConnectionError, SIEError, type ScoreEntry, type ScoreOptions, type ScoreResult, ServerError, type ServerInfo, type SparseResult, type SparseVector, type StatusMessage, type TimingInfo, type WorkerInfo, type WorkerStatusMessage, denseEmbedding, detectImageFormat, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
package/dist/index.js CHANGED
@@ -9,9 +9,11 @@ var SIEError = class extends Error {
9
9
  }
10
10
  };
11
11
  var SIEConnectionError = class extends SIEError {
12
- constructor(message) {
12
+ kind;
13
+ constructor(message, kind = "other") {
13
14
  super(message);
14
15
  this.name = "SIEConnectionError";
16
+ this.kind = kind;
15
17
  }
16
18
  };
17
19
  var RequestError = class extends SIEError {
@@ -83,6 +85,28 @@ var ModelLoadingError = class extends SIEError {
83
85
  this.model = model;
84
86
  }
85
87
  };
88
+ var ModelLoadFailedError = class extends ServerError {
89
+ /** The model that was requested */
90
+ model;
91
+ /**
92
+ * Server-side classification: one of `GATED`, `OOM`, `DEPENDENCY`,
93
+ * `NOT_FOUND`, `NETWORK`, `UNKNOWN`. Use this to route to specific
94
+ * remediation paths (e.g. surface a "set HF_TOKEN" hint for `GATED`).
95
+ */
96
+ errorClass;
97
+ /** Whether the failure is non-retryable per server policy. */
98
+ permanent;
99
+ /** How many load attempts the server has logged. */
100
+ attempts;
101
+ constructor(message, options) {
102
+ super(message, "MODEL_LOAD_FAILED", 502);
103
+ this.name = "ModelLoadFailedError";
104
+ this.model = options?.model;
105
+ this.errorClass = options?.errorClass;
106
+ this.permanent = options?.permanent ?? true;
107
+ this.attempts = options?.attempts ?? 1;
108
+ }
109
+ };
86
110
 
87
111
  // src/internal/constants.ts
88
112
  var MSGPACK_CONTENT_TYPE = "application/msgpack";
@@ -103,9 +127,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
103
127
  var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
104
128
  var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
105
129
  var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
106
-
107
- // src/version.ts
108
- var SDK_VERSION = "0.2.0";
109
130
  var EXT_TYPE_NUMPY = 78;
110
131
  function parseDtype(dtype) {
111
132
  const typeChar = dtype.slice(-2, -1);
@@ -310,7 +331,7 @@ function getRetryAfter2(response) {
310
331
  const header = response.headers.get("Retry-After");
311
332
  return getRetryAfter(header);
312
333
  }
313
- async function getErrorCode(response) {
334
+ async function getErrorDetail(response) {
314
335
  try {
315
336
  const contentType = response.headers.get("content-type") ?? "";
316
337
  let data;
@@ -321,24 +342,42 @@ async function getErrorCode(response) {
321
342
  data = await response.json();
322
343
  }
323
344
  if (data.error && typeof data.error === "object") {
324
- const error = data.error;
325
- if (typeof error.code === "string") {
326
- return error.code;
327
- }
345
+ return data.error;
328
346
  }
329
347
  if (data.detail && typeof data.detail === "object") {
330
- const detail = data.detail;
331
- if (typeof detail.code === "string") {
332
- return detail.code;
333
- }
348
+ return data.detail;
334
349
  }
335
350
  if (typeof data.code === "string") {
336
- return data.code;
351
+ return data;
337
352
  }
338
353
  } catch {
339
354
  }
340
355
  return void 0;
341
356
  }
357
+ async function getErrorCode(response) {
358
+ const detail = await getErrorDetail(response);
359
+ if (!detail) return void 0;
360
+ const code = detail.code;
361
+ return typeof code === "string" ? code : void 0;
362
+ }
363
+ async function throwIfModelLoadFailed(response, model) {
364
+ if (response.status !== 502) return;
365
+ const detail = await getErrorDetail(response.clone());
366
+ if (!detail) return;
367
+ if (detail.code !== "MODEL_LOAD_FAILED") return;
368
+ const errorClass = typeof detail.error_class === "string" ? detail.error_class : void 0;
369
+ const permanent = typeof detail.permanent === "boolean" ? detail.permanent : true;
370
+ const attemptsRaw = detail.attempts;
371
+ const parsedAttempts = typeof attemptsRaw === "number" ? attemptsRaw : typeof attemptsRaw === "string" ? Number.parseInt(attemptsRaw, 10) : Number.NaN;
372
+ const attempts = Number.isFinite(parsedAttempts) ? parsedAttempts : 1;
373
+ const message = typeof detail.message === "string" ? detail.message : `Model '${model ?? "?"}' failed to load`;
374
+ throw new ModelLoadFailedError(message, {
375
+ model,
376
+ errorClass,
377
+ permanent,
378
+ attempts
379
+ });
380
+ }
342
381
  async function handleError(response, gpu) {
343
382
  const { status } = response;
344
383
  let errorBody = {};
@@ -474,6 +513,9 @@ function parseCapacityInfo(data, gpuFilter) {
474
513
  };
475
514
  }
476
515
 
516
+ // src/version.ts
517
+ var SDK_VERSION = "0.3.1";
518
+
477
519
  // src/client.ts
478
520
  function sleep(ms) {
479
521
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -614,9 +656,9 @@ var SIEClient = class {
614
656
  };
615
657
  }
616
658
  /**
617
- * Stream real-time status updates from a worker or router.
659
+ * Stream real-time status updates from a worker or gateway.
618
660
  *
619
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
661
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
620
662
  * "auto" detects the endpoint via /health.
621
663
  */
622
664
  async *watch(mode = "auto") {
@@ -787,7 +829,7 @@ var SIEClient = class {
787
829
  * Close the client and cleanup resources.
788
830
  *
789
831
  * Stops pool lease renewal timers. Note that pools are not deleted
790
- * automatically - they are garbage collected by the router after inactivity.
832
+ * automatically - they are garbage collected by the gateway after inactivity.
791
833
  * This allows pool reuse if the client reconnects.
792
834
  */
793
835
  async close() {
@@ -1044,7 +1086,7 @@ var SIEClient = class {
1044
1086
  /**
1045
1087
  * Get current cluster capacity information.
1046
1088
  *
1047
- * Queries the router's /health endpoint for cluster state. Useful for
1089
+ * Queries the gateway's /health endpoint for cluster state. Useful for
1048
1090
  * checking if specific GPU types are available before sending requests.
1049
1091
  *
1050
1092
  * @param gpu - Optional filter to check specific GPU type availability
@@ -1066,10 +1108,10 @@ var SIEClient = class {
1066
1108
  async getCapacity(gpu) {
1067
1109
  const response = await this.requestJson("/health");
1068
1110
  const data = await response.json();
1069
- if (data.type !== "router") {
1111
+ if (data.type !== "gateway") {
1070
1112
  throw new RequestError(
1071
- "getCapacity() requires a router endpoint. This appears to be a worker.",
1072
- "not_router",
1113
+ "getCapacity() requires a gateway endpoint. This appears to be a worker.",
1114
+ "not_gateway",
1073
1115
  400
1074
1116
  );
1075
1117
  }
@@ -1078,7 +1120,7 @@ var SIEClient = class {
1078
1120
  /**
1079
1121
  * Wait for GPU capacity to become available.
1080
1122
  *
1081
- * Polls the router until workers with the specified GPU type are online.
1123
+ * Polls the gateway until workers with the specified GPU type are online.
1082
1124
  * This is useful for pre-warming the cluster before running benchmarks.
1083
1125
  *
1084
1126
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -1124,13 +1166,35 @@ var SIEClient = class {
1124
1166
  }
1125
1167
  }
1126
1168
  /**
1127
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
1169
+ * Make a msgpack HTTP request with retry logic.
1170
+ *
1171
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
1172
+ * - 202 Accepted (provisioning)
1173
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
1174
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
1175
+ *
1176
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
1177
+ * timeout from `timeout` to `provisionTimeout`.
1128
1178
  */
1129
1179
  async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
1130
1180
  const startTime = Date.now();
1131
1181
  let loraRetries = 0;
1132
1182
  while (true) {
1133
- const response = await this.request(path, body, pool, gpu);
1183
+ let response;
1184
+ try {
1185
+ response = await this.request(path, body, pool, gpu);
1186
+ } catch (err) {
1187
+ if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
1188
+ const elapsed = Date.now() - startTime;
1189
+ if (elapsed < this.provisionTimeout) {
1190
+ const remaining = this.provisionTimeout - elapsed;
1191
+ const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
1192
+ await sleep(delay);
1193
+ continue;
1194
+ }
1195
+ }
1196
+ throw err;
1197
+ }
1134
1198
  if (response.status === HTTP_ACCEPTED) {
1135
1199
  const retryAfter = getRetryAfter2(response);
1136
1200
  if (!waitForCapacity) {
@@ -1154,6 +1218,7 @@ var SIEClient = class {
1154
1218
  await sleep(actualDelay);
1155
1219
  continue;
1156
1220
  }
1221
+ await throwIfModelLoadFailed(response, model);
1157
1222
  if (response.status === 503) {
1158
1223
  const clonedResponse = response.clone();
1159
1224
  const errorCode = await getErrorCode(clonedResponse);
@@ -1187,6 +1252,17 @@ var SIEClient = class {
1187
1252
  await sleep(actualDelay);
1188
1253
  continue;
1189
1254
  }
1255
+ if (waitForCapacity) {
1256
+ const elapsed = Date.now() - startTime;
1257
+ if (elapsed < this.provisionTimeout) {
1258
+ const retryAfter = getRetryAfter2(response);
1259
+ const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1260
+ const remaining = this.provisionTimeout - elapsed;
1261
+ const actualDelay = Math.min(delay, remaining);
1262
+ await sleep(actualDelay);
1263
+ continue;
1264
+ }
1265
+ }
1190
1266
  }
1191
1267
  if (!response.ok) {
1192
1268
  await handleError(response, gpu);
@@ -1228,10 +1304,10 @@ var SIEClient = class {
1228
1304
  return response;
1229
1305
  } catch (error) {
1230
1306
  if (error instanceof Error && error.name === "AbortError") {
1231
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1307
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1232
1308
  }
1233
1309
  if (error instanceof TypeError) {
1234
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1310
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1235
1311
  }
1236
1312
  throw error;
1237
1313
  } finally {
@@ -1265,10 +1341,10 @@ var SIEClient = class {
1265
1341
  return response;
1266
1342
  } catch (error) {
1267
1343
  if (error instanceof Error && error.name === "AbortError") {
1268
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1344
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1269
1345
  }
1270
1346
  if (error instanceof TypeError) {
1271
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1347
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1272
1348
  }
1273
1349
  throw error;
1274
1350
  } finally {
@@ -1316,7 +1392,7 @@ var SIEClient = class {
1316
1392
  return "worker";
1317
1393
  }
1318
1394
  const data = await response.json();
1319
- return data.type === "router" ? "cluster" : "worker";
1395
+ return data.type === "gateway" ? "cluster" : "worker";
1320
1396
  } catch {
1321
1397
  return "worker";
1322
1398
  } finally {
@@ -1464,6 +1540,6 @@ function detectImageFormat(bytes) {
1464
1540
  return "unknown";
1465
1541
  }
1466
1542
 
1467
- export { LoraLoadingError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
1543
+ export { LoraLoadingError, ModelLoadFailedError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
1468
1544
  //# sourceMappingURL=index.js.map
1469
1545
  //# sourceMappingURL=index.js.map