@superlinked/sie-sdk 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
15
15
  * Output type options for encode operation.
16
16
  */
17
17
  type OutputType = "dense" | "sparse" | "multivector";
18
+ /**
19
+ * Document input for composite-document extractors (PDF, DOCX, HTML, ...).
20
+ *
21
+ * The wire format is the document bytes plus an optional format hint. The
22
+ * hint is advisory — adapters may sniff the bytes when it is missing or
23
+ * unrecognized.
24
+ */
25
+ interface DocumentInput {
26
+ /** Document bytes (raw file content) */
27
+ data: Uint8Array;
28
+ /** Document format hint: "pdf", "docx", "html", etc. */
29
+ format?: string;
30
+ }
18
31
  /**
19
32
  * A single item to encode, score, or extract from.
20
33
  *
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
30
43
  * // With images for multimodal models (ColPali, CLIP)
31
44
  * { text: "Description", images: [imageBytes] }
32
45
  *
46
+ * // With a document for composite-document extractors (Docling, ...)
47
+ * { document: { data: pdfBytes, format: "pdf" } }
48
+ *
33
49
  * // Pre-encoded multivector (for use with maxsim utility)
34
50
  * { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
35
51
  */
@@ -40,6 +56,8 @@ interface Item {
40
56
  text?: string;
41
57
  /** Images as byte arrays (JPEG/PNG) for multimodal models */
42
58
  images?: Uint8Array[];
59
+ /** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
60
+ document?: DocumentInput;
43
61
  /** Pre-encoded multivector (for use with maxsim utility) */
44
62
  multivector?: Float32Array[];
45
63
  /** Arbitrary metadata (passed through to results) */
@@ -98,7 +116,7 @@ interface ModelInfo {
98
116
  name: string;
99
117
  /** Whether the model is currently loaded in memory */
100
118
  loaded: boolean;
101
- /** Supported input types: ["text"], ["text", "image"], etc. */
119
+ /** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
102
120
  inputs: string[];
103
121
  /** Supported output types: ["dense"], ["dense", "sparse"], etc. */
104
122
  outputs: string[];
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
448
466
  *
449
467
  * @example Resource pool usage
450
468
  * ```typescript
451
- * const client = new SIEClient("http://router:8080");
469
+ * const client = new SIEClient("http://gateway:8080");
452
470
  *
453
471
  * // Create a dedicated pool
454
472
  * await client.createPool("eval-bench", { l4: 2 });
@@ -523,9 +541,9 @@ declare class SIEClient {
523
541
  */
524
542
  getModel(name: string): Promise<ModelInfo>;
525
543
  /**
526
- * Stream real-time status updates from a worker or router.
544
+ * Stream real-time status updates from a worker or gateway.
527
545
  *
528
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
546
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
529
547
  * "auto" detects the endpoint via /health.
530
548
  */
531
549
  watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
@@ -576,7 +594,7 @@ declare class SIEClient {
576
594
  * Close the client and cleanup resources.
577
595
  *
578
596
  * Stops pool lease renewal timers. Note that pools are not deleted
579
- * automatically - they are garbage collected by the router after inactivity.
597
+ * automatically - they are garbage collected by the gateway after inactivity.
580
598
  * This allows pool reuse if the client reconnects.
581
599
  */
582
600
  close(): Promise<void>;
@@ -643,7 +661,7 @@ declare class SIEClient {
643
661
  /**
644
662
  * Get current cluster capacity information.
645
663
  *
646
- * Queries the router's /health endpoint for cluster state. Useful for
664
+ * Queries the gateway's /health endpoint for cluster state. Useful for
647
665
  * checking if specific GPU types are available before sending requests.
648
666
  *
649
667
  * @param gpu - Optional filter to check specific GPU type availability
@@ -666,7 +684,7 @@ declare class SIEClient {
666
684
  /**
667
685
  * Wait for GPU capacity to become available.
668
686
  *
669
- * Polls the router until workers with the specified GPU type are online.
687
+ * Polls the gateway until workers with the specified GPU type are online.
670
688
  * This is useful for pre-warming the cluster before running benchmarks.
671
689
  *
672
690
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -689,7 +707,15 @@ declare class SIEClient {
689
707
  pollInterval?: number;
690
708
  }): Promise<CapacityInfo>;
691
709
  /**
692
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
710
+ * Make a msgpack HTTP request with retry logic.
711
+ *
712
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
713
+ * - 202 Accepted (provisioning)
714
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
715
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
716
+ *
717
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
718
+ * timeout from `timeout` to `provisionTimeout`.
693
719
  */
694
720
  private requestWithRetry;
695
721
  /**
@@ -706,7 +732,7 @@ declare class SIEClient {
706
732
  private detectEndpointType;
707
733
  }
708
734
 
709
- declare const SDK_VERSION = "0.2.0";
735
+ declare const SDK_VERSION = "0.3.0";
710
736
 
711
737
  /**
712
738
  * Helpers for converting SIE encode results to plain JavaScript types.
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
806
832
  declare class SIEError extends Error {
807
833
  constructor(message: string);
808
834
  }
835
+ /**
836
+ * `SIEConnectionError` failure category. Only `"connect"` is auto-retried
837
+ * under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
838
+ */
839
+ type SIEConnectionErrorKind = "connect" | "timeout" | "other";
809
840
  /**
810
841
  * Error connecting to the SIE server.
811
842
  *
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
816
847
  * - Server refuses connection
817
848
  */
818
849
  declare class SIEConnectionError extends SIEError {
819
- constructor(message: string);
850
+ readonly kind: SIEConnectionErrorKind;
851
+ constructor(message: string, kind?: SIEConnectionErrorKind);
820
852
  }
821
853
  /**
822
854
  * Error in the request (4xx responses).
package/dist/index.d.ts CHANGED
@@ -15,6 +15,19 @@ type DType = "float32" | "float16" | "bfloat16" | "int8" | "uint8" | "binary" |
15
15
  * Output type options for encode operation.
16
16
  */
17
17
  type OutputType = "dense" | "sparse" | "multivector";
18
+ /**
19
+ * Document input for composite-document extractors (PDF, DOCX, HTML, ...).
20
+ *
21
+ * The wire format is the document bytes plus an optional format hint. The
22
+ * hint is advisory — adapters may sniff the bytes when it is missing or
23
+ * unrecognized.
24
+ */
25
+ interface DocumentInput {
26
+ /** Document bytes (raw file content) */
27
+ data: Uint8Array;
28
+ /** Document format hint: "pdf", "docx", "html", etc. */
29
+ format?: string;
30
+ }
18
31
  /**
19
32
  * A single item to encode, score, or extract from.
20
33
  *
@@ -30,6 +43,9 @@ type OutputType = "dense" | "sparse" | "multivector";
30
43
  * // With images for multimodal models (ColPali, CLIP)
31
44
  * { text: "Description", images: [imageBytes] }
32
45
  *
46
+ * // With a document for composite-document extractors (Docling, ...)
47
+ * { document: { data: pdfBytes, format: "pdf" } }
48
+ *
33
49
  * // Pre-encoded multivector (for use with maxsim utility)
34
50
  * { multivector: [tokenEmbedding1, tokenEmbedding2, ...] }
35
51
  */
@@ -40,6 +56,8 @@ interface Item {
40
56
  text?: string;
41
57
  /** Images as byte arrays (JPEG/PNG) for multimodal models */
42
58
  images?: Uint8Array[];
59
+ /** Document for composite-document extractors (PDF, DOCX, HTML, ...) */
60
+ document?: DocumentInput;
43
61
  /** Pre-encoded multivector (for use with maxsim utility) */
44
62
  multivector?: Float32Array[];
45
63
  /** Arbitrary metadata (passed through to results) */
@@ -98,7 +116,7 @@ interface ModelInfo {
98
116
  name: string;
99
117
  /** Whether the model is currently loaded in memory */
100
118
  loaded: boolean;
101
- /** Supported input types: ["text"], ["text", "image"], etc. */
119
+ /** Supported input types: ["text"], ["text", "image"], ["text", "document"], etc. */
102
120
  inputs: string[];
103
121
  /** Supported output types: ["dense"], ["dense", "sparse"], etc. */
104
122
  outputs: string[];
@@ -448,7 +466,7 @@ declare function toFloat32Array(arr: number[]): Float32Array;
448
466
  *
449
467
  * @example Resource pool usage
450
468
  * ```typescript
451
- * const client = new SIEClient("http://router:8080");
469
+ * const client = new SIEClient("http://gateway:8080");
452
470
  *
453
471
  * // Create a dedicated pool
454
472
  * await client.createPool("eval-bench", { l4: 2 });
@@ -523,9 +541,9 @@ declare class SIEClient {
523
541
  */
524
542
  getModel(name: string): Promise<ModelInfo>;
525
543
  /**
526
- * Stream real-time status updates from a worker or router.
544
+ * Stream real-time status updates from a worker or gateway.
527
545
  *
528
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
546
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
529
547
  * "auto" detects the endpoint via /health.
530
548
  */
531
549
  watch(mode?: "auto" | "cluster" | "worker"): AsyncGenerator<StatusMessage>;
@@ -576,7 +594,7 @@ declare class SIEClient {
576
594
  * Close the client and cleanup resources.
577
595
  *
578
596
  * Stops pool lease renewal timers. Note that pools are not deleted
579
- * automatically - they are garbage collected by the router after inactivity.
597
+ * automatically - they are garbage collected by the gateway after inactivity.
580
598
  * This allows pool reuse if the client reconnects.
581
599
  */
582
600
  close(): Promise<void>;
@@ -643,7 +661,7 @@ declare class SIEClient {
643
661
  /**
644
662
  * Get current cluster capacity information.
645
663
  *
646
- * Queries the router's /health endpoint for cluster state. Useful for
664
+ * Queries the gateway's /health endpoint for cluster state. Useful for
647
665
  * checking if specific GPU types are available before sending requests.
648
666
  *
649
667
  * @param gpu - Optional filter to check specific GPU type availability
@@ -666,7 +684,7 @@ declare class SIEClient {
666
684
  /**
667
685
  * Wait for GPU capacity to become available.
668
686
  *
669
- * Polls the router until workers with the specified GPU type are online.
687
+ * Polls the gateway until workers with the specified GPU type are online.
670
688
  * This is useful for pre-warming the cluster before running benchmarks.
671
689
  *
672
690
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -689,7 +707,15 @@ declare class SIEClient {
689
707
  pollInterval?: number;
690
708
  }): Promise<CapacityInfo>;
691
709
  /**
692
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
710
+ * Make a msgpack HTTP request with retry logic.
711
+ *
712
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
713
+ * - 202 Accepted (provisioning)
714
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
715
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
716
+ *
717
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
718
+ * timeout from `timeout` to `provisionTimeout`.
693
719
  */
694
720
  private requestWithRetry;
695
721
  /**
@@ -706,7 +732,7 @@ declare class SIEClient {
706
732
  private detectEndpointType;
707
733
  }
708
734
 
709
- declare const SDK_VERSION = "0.2.0";
735
+ declare const SDK_VERSION = "0.3.0";
710
736
 
711
737
  /**
712
738
  * Helpers for converting SIE encode results to plain JavaScript types.
@@ -806,6 +832,11 @@ declare function multivectorEmbedding(raw: Float32Array[]): number[][];
806
832
  declare class SIEError extends Error {
807
833
  constructor(message: string);
808
834
  }
835
+ /**
836
+ * `SIEConnectionError` failure category. Only `"connect"` is auto-retried
837
+ * under `waitForCapacity: true`; `"timeout"` and `"other"` fail fast.
838
+ */
839
+ type SIEConnectionErrorKind = "connect" | "timeout" | "other";
809
840
  /**
810
841
  * Error connecting to the SIE server.
811
842
  *
@@ -816,7 +847,8 @@ declare class SIEError extends Error {
816
847
  * - Server refuses connection
817
848
  */
818
849
  declare class SIEConnectionError extends SIEError {
819
- constructor(message: string);
850
+ readonly kind: SIEConnectionErrorKind;
851
+ constructor(message: string, kind?: SIEConnectionErrorKind);
820
852
  }
821
853
  /**
822
854
  * Error in the request (4xx responses).
package/dist/index.js CHANGED
@@ -9,9 +9,11 @@ var SIEError = class extends Error {
9
9
  }
10
10
  };
11
11
  var SIEConnectionError = class extends SIEError {
12
- constructor(message) {
12
+ kind;
13
+ constructor(message, kind = "other") {
13
14
  super(message);
14
15
  this.name = "SIEConnectionError";
16
+ this.kind = kind;
15
17
  }
16
18
  };
17
19
  var RequestError = class extends SIEError {
@@ -103,9 +105,6 @@ var MODEL_LOADING_DEFAULT_DELAY = 5e3;
103
105
  var MODEL_LOADING_ERROR_CODE = "MODEL_LOADING";
104
106
  var SDK_VERSION_HEADER = "X-SIE-SDK-Version";
105
107
  var SERVER_VERSION_HEADER = "X-SIE-Server-Version";
106
-
107
- // src/version.ts
108
- var SDK_VERSION = "0.2.0";
109
108
  var EXT_TYPE_NUMPY = 78;
110
109
  function parseDtype(dtype) {
111
110
  const typeChar = dtype.slice(-2, -1);
@@ -474,6 +473,9 @@ function parseCapacityInfo(data, gpuFilter) {
474
473
  };
475
474
  }
476
475
 
476
+ // src/version.ts
477
+ var SDK_VERSION = "0.3.0";
478
+
477
479
  // src/client.ts
478
480
  function sleep(ms) {
479
481
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -614,9 +616,9 @@ var SIEClient = class {
614
616
  };
615
617
  }
616
618
  /**
617
- * Stream real-time status updates from a worker or router.
619
+ * Stream real-time status updates from a worker or gateway.
618
620
  *
619
- * @param mode - "cluster" uses router /ws/cluster-status, "worker" uses /ws/status.
621
+ * @param mode - "cluster" uses gateway /ws/cluster-status, "worker" uses /ws/status.
620
622
  * "auto" detects the endpoint via /health.
621
623
  */
622
624
  async *watch(mode = "auto") {
@@ -787,7 +789,7 @@ var SIEClient = class {
787
789
  * Close the client and cleanup resources.
788
790
  *
789
791
  * Stops pool lease renewal timers. Note that pools are not deleted
790
- * automatically - they are garbage collected by the router after inactivity.
792
+ * automatically - they are garbage collected by the gateway after inactivity.
791
793
  * This allows pool reuse if the client reconnects.
792
794
  */
793
795
  async close() {
@@ -1044,7 +1046,7 @@ var SIEClient = class {
1044
1046
  /**
1045
1047
  * Get current cluster capacity information.
1046
1048
  *
1047
- * Queries the router's /health endpoint for cluster state. Useful for
1049
+ * Queries the gateway's /health endpoint for cluster state. Useful for
1048
1050
  * checking if specific GPU types are available before sending requests.
1049
1051
  *
1050
1052
  * @param gpu - Optional filter to check specific GPU type availability
@@ -1066,10 +1068,10 @@ var SIEClient = class {
1066
1068
  async getCapacity(gpu) {
1067
1069
  const response = await this.requestJson("/health");
1068
1070
  const data = await response.json();
1069
- if (data.type !== "router") {
1071
+ if (data.type !== "gateway") {
1070
1072
  throw new RequestError(
1071
- "getCapacity() requires a router endpoint. This appears to be a worker.",
1072
- "not_router",
1073
+ "getCapacity() requires a gateway endpoint. This appears to be a worker.",
1074
+ "not_gateway",
1073
1075
  400
1074
1076
  );
1075
1077
  }
@@ -1078,7 +1080,7 @@ var SIEClient = class {
1078
1080
  /**
1079
1081
  * Wait for GPU capacity to become available.
1080
1082
  *
1081
- * Polls the router until workers with the specified GPU type are online.
1083
+ * Polls the gateway until workers with the specified GPU type are online.
1082
1084
  * This is useful for pre-warming the cluster before running benchmarks.
1083
1085
  *
1084
1086
  * @param gpu - GPU type to wait for (e.g., "l4", "a100-80gb")
@@ -1124,13 +1126,35 @@ var SIEClient = class {
1124
1126
  }
1125
1127
  }
1126
1128
  /**
1127
- * Make a msgpack HTTP request with retry logic for 202 and LoRA loading.
1129
+ * Make a msgpack HTTP request with retry logic.
1130
+ *
1131
+ * Retried (only when `waitForCapacity: true`, capped by `provisionTimeout`):
1132
+ * - 202 Accepted (provisioning)
1133
+ * - 503 `MODEL_LOADING` / `LORA_LOADING` / no error code (scale-from-zero)
1134
+ * - `SIEConnectionError` with `kind === "connect"` (issue #95)
1135
+ *
1136
+ * `kind === "timeout"` is NOT retried — would extend the user-visible
1137
+ * timeout from `timeout` to `provisionTimeout`.
1128
1138
  */
1129
1139
  async requestWithRetry(path, body, pool, gpu, waitForCapacity, model) {
1130
1140
  const startTime = Date.now();
1131
1141
  let loraRetries = 0;
1132
1142
  while (true) {
1133
- const response = await this.request(path, body, pool, gpu);
1143
+ let response;
1144
+ try {
1145
+ response = await this.request(path, body, pool, gpu);
1146
+ } catch (err) {
1147
+ if (waitForCapacity && err instanceof SIEConnectionError && err.kind === "connect") {
1148
+ const elapsed = Date.now() - startTime;
1149
+ if (elapsed < this.provisionTimeout) {
1150
+ const remaining = this.provisionTimeout - elapsed;
1151
+ const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
1152
+ await sleep(delay);
1153
+ continue;
1154
+ }
1155
+ }
1156
+ throw err;
1157
+ }
1134
1158
  if (response.status === HTTP_ACCEPTED) {
1135
1159
  const retryAfter = getRetryAfter2(response);
1136
1160
  if (!waitForCapacity) {
@@ -1187,6 +1211,17 @@ var SIEClient = class {
1187
1211
  await sleep(actualDelay);
1188
1212
  continue;
1189
1213
  }
1214
+ if (waitForCapacity) {
1215
+ const elapsed = Date.now() - startTime;
1216
+ if (elapsed < this.provisionTimeout) {
1217
+ const retryAfter = getRetryAfter2(response);
1218
+ const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
1219
+ const remaining = this.provisionTimeout - elapsed;
1220
+ const actualDelay = Math.min(delay, remaining);
1221
+ await sleep(actualDelay);
1222
+ continue;
1223
+ }
1224
+ }
1190
1225
  }
1191
1226
  if (!response.ok) {
1192
1227
  await handleError(response, gpu);
@@ -1228,10 +1263,10 @@ var SIEClient = class {
1228
1263
  return response;
1229
1264
  } catch (error) {
1230
1265
  if (error instanceof Error && error.name === "AbortError") {
1231
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1266
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1232
1267
  }
1233
1268
  if (error instanceof TypeError) {
1234
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1269
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1235
1270
  }
1236
1271
  throw error;
1237
1272
  } finally {
@@ -1265,10 +1300,10 @@ var SIEClient = class {
1265
1300
  return response;
1266
1301
  } catch (error) {
1267
1302
  if (error instanceof Error && error.name === "AbortError") {
1268
- throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`);
1303
+ throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
1269
1304
  }
1270
1305
  if (error instanceof TypeError) {
1271
- throw new SIEConnectionError(`Connection failed: ${error.message}`);
1306
+ throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
1272
1307
  }
1273
1308
  throw error;
1274
1309
  } finally {
@@ -1316,7 +1351,7 @@ var SIEClient = class {
1316
1351
  return "worker";
1317
1352
  }
1318
1353
  const data = await response.json();
1319
- return data.type === "router" ? "cluster" : "worker";
1354
+ return data.type === "gateway" ? "cluster" : "worker";
1320
1355
  } catch {
1321
1356
  return "worker";
1322
1357
  } finally {