@superlinked/sie-sdk 0.3.4 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +709 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +532 -10
- package/dist/index.d.ts +532 -10
- package/dist/index.js +709 -18
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -85,6 +85,21 @@ var ModelLoadingError = class extends SIEError {
|
|
|
85
85
|
this.model = model;
|
|
86
86
|
}
|
|
87
87
|
};
|
|
88
|
+
var SIEStreamError = class extends SIEError {
|
|
89
|
+
/** SIE-native error code (e.g. `context_exceeded`, `cancelled`). */
|
|
90
|
+
code;
|
|
91
|
+
/** OpenAI-style error type (e.g. `context_length_exceeded`, `server_error`). */
|
|
92
|
+
errorType;
|
|
93
|
+
/** Offending field name when known (chat shape only). */
|
|
94
|
+
param;
|
|
95
|
+
constructor(message, options) {
|
|
96
|
+
super(message);
|
|
97
|
+
this.name = "SIEStreamError";
|
|
98
|
+
this.code = options?.code;
|
|
99
|
+
this.errorType = options?.errorType;
|
|
100
|
+
this.param = options?.param;
|
|
101
|
+
}
|
|
102
|
+
};
|
|
88
103
|
var ModelLoadFailedError = class extends ServerError {
|
|
89
104
|
/** The model that was requested */
|
|
90
105
|
model;
|
|
@@ -324,7 +339,7 @@ function unpackMessage(data) {
|
|
|
324
339
|
function getRetryAfter(header) {
|
|
325
340
|
if (!header) return void 0;
|
|
326
341
|
const seconds = Number.parseInt(header, 10);
|
|
327
|
-
if (!Number.isNaN(seconds) && seconds
|
|
342
|
+
if (!Number.isNaN(seconds) && seconds >= 0) {
|
|
328
343
|
return seconds * 1e3;
|
|
329
344
|
}
|
|
330
345
|
const date = new Date(header);
|
|
@@ -523,6 +538,41 @@ function parseExtractResult(data) {
|
|
|
523
538
|
function parseExtractResults(data) {
|
|
524
539
|
return data.map(parseExtractResult);
|
|
525
540
|
}
|
|
541
|
+
function describeType(value) {
|
|
542
|
+
if (value === null) return "null";
|
|
543
|
+
return typeof value;
|
|
544
|
+
}
|
|
545
|
+
function coerceTokenCount(v) {
|
|
546
|
+
return typeof v === "number" && Number.isFinite(v) ? Math.trunc(v) : 0;
|
|
547
|
+
}
|
|
548
|
+
function parseGenerateResult(data) {
|
|
549
|
+
const wire = data;
|
|
550
|
+
if (typeof wire.model !== "string") {
|
|
551
|
+
throw new RequestError(
|
|
552
|
+
`Generate response missing string 'model' field: got ${describeType(wire.model)}`
|
|
553
|
+
);
|
|
554
|
+
}
|
|
555
|
+
if (typeof wire.text !== "string") {
|
|
556
|
+
throw new RequestError(
|
|
557
|
+
`Generate response missing string 'text' field: got ${describeType(wire.text)}`
|
|
558
|
+
);
|
|
559
|
+
}
|
|
560
|
+
const usage = wire.usage ?? {};
|
|
561
|
+
const finish = wire.finish_reason ?? "stop";
|
|
562
|
+
return {
|
|
563
|
+
model: wire.model,
|
|
564
|
+
text: wire.text,
|
|
565
|
+
finishReason: finish,
|
|
566
|
+
usage: {
|
|
567
|
+
promptTokens: coerceTokenCount(usage.prompt_tokens),
|
|
568
|
+
completionTokens: coerceTokenCount(usage.completion_tokens),
|
|
569
|
+
totalTokens: coerceTokenCount(usage.total_tokens)
|
|
570
|
+
},
|
|
571
|
+
attemptId: wire.attempt_id,
|
|
572
|
+
ttftMs: wire.ttft_ms,
|
|
573
|
+
tpotMs: wire.tpot_ms
|
|
574
|
+
};
|
|
575
|
+
}
|
|
526
576
|
function parseCapacityInfo(data, gpuFilter) {
|
|
527
577
|
const wire = data;
|
|
528
578
|
let workers = wire.workers ?? [];
|
|
@@ -548,11 +598,208 @@ function parseCapacityInfo(data, gpuFilter) {
|
|
|
548
598
|
};
|
|
549
599
|
}
|
|
550
600
|
|
|
601
|
+
// src/internal/provisioning.ts
|
|
602
|
+
function sleep(ms) {
|
|
603
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
604
|
+
}
|
|
605
|
+
async function withProvisioningRetry(performFetch, opts) {
|
|
606
|
+
const startTime = Date.now();
|
|
607
|
+
while (true) {
|
|
608
|
+
const response = await performFetch();
|
|
609
|
+
if (response.status === HTTP_ACCEPTED) {
|
|
610
|
+
if (!opts.waitForCapacity) {
|
|
611
|
+
throw new ProvisioningError(
|
|
612
|
+
"No capacity available. Server is provisioning.",
|
|
613
|
+
opts.gpu,
|
|
614
|
+
getRetryAfter2(response)
|
|
615
|
+
);
|
|
616
|
+
}
|
|
617
|
+
const elapsed = Date.now() - startTime;
|
|
618
|
+
if (elapsed >= opts.provisionTimeoutMs) {
|
|
619
|
+
throw new ProvisioningError(
|
|
620
|
+
`Provisioning timeout after ${elapsed}ms`,
|
|
621
|
+
opts.gpu,
|
|
622
|
+
getRetryAfter2(response)
|
|
623
|
+
);
|
|
624
|
+
}
|
|
625
|
+
const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
|
|
626
|
+
await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
|
|
627
|
+
continue;
|
|
628
|
+
}
|
|
629
|
+
await throwIfModelLoadFailed(response, opts.model);
|
|
630
|
+
if (response.status === 503) {
|
|
631
|
+
const errorCode = await getErrorCode(response.clone());
|
|
632
|
+
if (errorCode === MODEL_LOADING_ERROR_CODE) {
|
|
633
|
+
const elapsed = Date.now() - startTime;
|
|
634
|
+
if (elapsed >= opts.provisionTimeoutMs) {
|
|
635
|
+
throw new ModelLoadingError(`Model loading timeout for '${opts.model}'`, opts.model);
|
|
636
|
+
}
|
|
637
|
+
const delay = getRetryAfter2(response) ?? MODEL_LOADING_DEFAULT_DELAY;
|
|
638
|
+
await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
|
|
639
|
+
continue;
|
|
640
|
+
}
|
|
641
|
+
if (opts.waitForCapacity) {
|
|
642
|
+
const elapsed = Date.now() - startTime;
|
|
643
|
+
if (elapsed < opts.provisionTimeoutMs) {
|
|
644
|
+
const delay = getRetryAfter2(response) ?? DEFAULT_RETRY_DELAY;
|
|
645
|
+
await sleep(Math.min(delay, opts.provisionTimeoutMs - elapsed));
|
|
646
|
+
continue;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
if (!response.ok) {
|
|
651
|
+
await handleError(response);
|
|
652
|
+
}
|
|
653
|
+
if (response.status !== 200) {
|
|
654
|
+
throw new RequestError(`Unexpected response status ${response.status}`);
|
|
655
|
+
}
|
|
656
|
+
return response;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// src/sse.ts
|
|
661
|
+
var SSE_DONE = "[DONE]";
|
|
662
|
+
var MAX_SSE_BUFFER_CHARS = 8 * 1024 * 1024;
|
|
663
|
+
async function* parseSseStream(reader, signal) {
|
|
664
|
+
const decoder = new TextDecoder("utf-8");
|
|
665
|
+
let buffer = "";
|
|
666
|
+
let completedCleanly = false;
|
|
667
|
+
const onAbort = () => {
|
|
668
|
+
reader.cancel().catch(() => {
|
|
669
|
+
});
|
|
670
|
+
};
|
|
671
|
+
if (signal) {
|
|
672
|
+
if (signal.aborted) {
|
|
673
|
+
throw new SIEConnectionError("Stream aborted before first read", "other");
|
|
674
|
+
}
|
|
675
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
676
|
+
}
|
|
677
|
+
try {
|
|
678
|
+
while (true) {
|
|
679
|
+
if (signal?.aborted) {
|
|
680
|
+
throw new SIEConnectionError("Stream aborted by caller", "other");
|
|
681
|
+
}
|
|
682
|
+
let result;
|
|
683
|
+
try {
|
|
684
|
+
if (signal) {
|
|
685
|
+
if (signal.aborted) {
|
|
686
|
+
throw new SIEConnectionError("Stream aborted by caller", "other");
|
|
687
|
+
}
|
|
688
|
+
result = await new Promise((resolve, reject) => {
|
|
689
|
+
let settled = false;
|
|
690
|
+
const onAbortRace = () => {
|
|
691
|
+
if (settled) return;
|
|
692
|
+
settled = true;
|
|
693
|
+
signal.removeEventListener("abort", onAbortRace);
|
|
694
|
+
reject(new SIEConnectionError("Stream aborted by caller", "other"));
|
|
695
|
+
};
|
|
696
|
+
signal.addEventListener("abort", onAbortRace, { once: true });
|
|
697
|
+
reader.read().then(
|
|
698
|
+
(r) => {
|
|
699
|
+
if (settled) return;
|
|
700
|
+
settled = true;
|
|
701
|
+
signal.removeEventListener("abort", onAbortRace);
|
|
702
|
+
resolve(r);
|
|
703
|
+
},
|
|
704
|
+
(err) => {
|
|
705
|
+
if (settled) return;
|
|
706
|
+
settled = true;
|
|
707
|
+
signal.removeEventListener("abort", onAbortRace);
|
|
708
|
+
reject(err);
|
|
709
|
+
}
|
|
710
|
+
);
|
|
711
|
+
});
|
|
712
|
+
} else {
|
|
713
|
+
result = await reader.read();
|
|
714
|
+
}
|
|
715
|
+
} catch (err) {
|
|
716
|
+
if (err instanceof SIEConnectionError) throw err;
|
|
717
|
+
if (signal?.aborted) {
|
|
718
|
+
throw new SIEConnectionError("Stream aborted by caller", "other");
|
|
719
|
+
}
|
|
720
|
+
throw err;
|
|
721
|
+
}
|
|
722
|
+
if (result.done) {
|
|
723
|
+
buffer += decoder.decode();
|
|
724
|
+
break;
|
|
725
|
+
}
|
|
726
|
+
buffer += decoder.decode(result.value, { stream: true });
|
|
727
|
+
if (buffer.length > MAX_SSE_BUFFER_CHARS) {
|
|
728
|
+
throw new SIEStreamError(
|
|
729
|
+
`SSE event buffer exceeded ${MAX_SSE_BUFFER_CHARS} chars without an event terminator`
|
|
730
|
+
);
|
|
731
|
+
}
|
|
732
|
+
let sepIdx;
|
|
733
|
+
while (true) {
|
|
734
|
+
const lfIdx = buffer.indexOf("\n\n");
|
|
735
|
+
const crlfIdx = buffer.indexOf("\r\n\r\n");
|
|
736
|
+
if (lfIdx === -1 && crlfIdx === -1) break;
|
|
737
|
+
let sepLen = 2;
|
|
738
|
+
if (lfIdx === -1) {
|
|
739
|
+
sepIdx = crlfIdx;
|
|
740
|
+
sepLen = 4;
|
|
741
|
+
} else if (crlfIdx === -1) {
|
|
742
|
+
sepIdx = lfIdx;
|
|
743
|
+
} else {
|
|
744
|
+
if (lfIdx < crlfIdx) {
|
|
745
|
+
sepIdx = lfIdx;
|
|
746
|
+
} else {
|
|
747
|
+
sepIdx = crlfIdx;
|
|
748
|
+
sepLen = 4;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
const eventBlock = buffer.slice(0, sepIdx);
|
|
752
|
+
buffer = buffer.slice(sepIdx + sepLen);
|
|
753
|
+
const payload = extractDataPayload(eventBlock);
|
|
754
|
+
if (payload === null) continue;
|
|
755
|
+
if (payload === SSE_DONE) {
|
|
756
|
+
completedCleanly = true;
|
|
757
|
+
return;
|
|
758
|
+
}
|
|
759
|
+
yield payload;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
const tail = buffer.replace(/\r?\n$/, "");
|
|
763
|
+
if (tail !== "") {
|
|
764
|
+
const payload = extractDataPayload(tail);
|
|
765
|
+
if (payload !== null && payload !== SSE_DONE) {
|
|
766
|
+
yield payload;
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
completedCleanly = true;
|
|
770
|
+
} finally {
|
|
771
|
+
if (signal) signal.removeEventListener("abort", onAbort);
|
|
772
|
+
if (completedCleanly) {
|
|
773
|
+
try {
|
|
774
|
+
reader.releaseLock();
|
|
775
|
+
} catch {
|
|
776
|
+
}
|
|
777
|
+
} else {
|
|
778
|
+
await reader.cancel().catch(() => {
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
function extractDataPayload(block) {
|
|
784
|
+
const lines = block.split(/\r?\n/);
|
|
785
|
+
const parts = [];
|
|
786
|
+
for (const line of lines) {
|
|
787
|
+
if (line === "" || line.startsWith(":")) continue;
|
|
788
|
+
if (line.startsWith("data:")) {
|
|
789
|
+
let value = line.slice(5);
|
|
790
|
+
if (value.startsWith(" ")) value = value.slice(1);
|
|
791
|
+
parts.push(value);
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
if (parts.length === 0) return null;
|
|
795
|
+
return parts.join("\n");
|
|
796
|
+
}
|
|
797
|
+
|
|
551
798
|
// src/version.ts
|
|
552
|
-
var SDK_VERSION = "0.
|
|
799
|
+
var SDK_VERSION = "0.4.1";
|
|
553
800
|
|
|
554
801
|
// src/client.ts
|
|
555
|
-
function
|
|
802
|
+
function sleep2(ms) {
|
|
556
803
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
557
804
|
}
|
|
558
805
|
function abortableSleep(ms, signal) {
|
|
@@ -570,6 +817,19 @@ function abortableSleep(ms, signal) {
|
|
|
570
817
|
});
|
|
571
818
|
}
|
|
572
819
|
var _LEASE_RENEWAL_MAX_RETRIES = 5;
|
|
820
|
+
function extractChatChunkError(chunk) {
|
|
821
|
+
const err = chunk.error;
|
|
822
|
+
if (!err) return null;
|
|
823
|
+
return new SIEStreamError(err.message ?? "stream error", {
|
|
824
|
+
code: err.code,
|
|
825
|
+
errorType: err.type,
|
|
826
|
+
param: err.param
|
|
827
|
+
});
|
|
828
|
+
}
|
|
829
|
+
function extractGenerateChunkError(chunk) {
|
|
830
|
+
if (!chunk.error) return null;
|
|
831
|
+
return new SIEStreamError(chunk.error.message, { code: chunk.error.code });
|
|
832
|
+
}
|
|
573
833
|
var SIEClient = class {
|
|
574
834
|
baseUrl;
|
|
575
835
|
timeout;
|
|
@@ -789,6 +1049,427 @@ var SIEClient = class {
|
|
|
789
1049
|
* console.log(result.scores[0].itemId); // most relevant
|
|
790
1050
|
* ```
|
|
791
1051
|
*/
|
|
1052
|
+
/**
|
|
1053
|
+
* Generate text from a prompt (walking-skeleton SDK surface).
|
|
1054
|
+
*
|
|
1055
|
+
* The SDK does not currently expose streaming chunks. The worker streams
|
|
1056
|
+
* to the gateway, the gateway aggregates, and the SDK returns the
|
|
1057
|
+
* assembled result plus SIE-native timing metadata (TTFT, TPOT,
|
|
1058
|
+
* attempt id).
|
|
1059
|
+
*
|
|
1060
|
+
* @example
|
|
1061
|
+
* ```typescript
|
|
1062
|
+
* const result = await client.generate(
|
|
1063
|
+
* "Qwen__Qwen3-4B-Instruct-2507",
|
|
1064
|
+
* "Write a haiku about the sea.",
|
|
1065
|
+
* { maxNewTokens: 64, temperature: 0.7 },
|
|
1066
|
+
* );
|
|
1067
|
+
* console.log(result.text);
|
|
1068
|
+
* console.log(`TTFT: ${result.ttftMs}ms`);
|
|
1069
|
+
* ```
|
|
1070
|
+
*/
|
|
1071
|
+
async generate(model, prompt, options) {
|
|
1072
|
+
const body = {
|
|
1073
|
+
prompt,
|
|
1074
|
+
max_new_tokens: options.maxNewTokens,
|
|
1075
|
+
temperature: options.temperature ?? 1,
|
|
1076
|
+
top_p: options.topP ?? 1
|
|
1077
|
+
};
|
|
1078
|
+
if (options.stop !== void 0) {
|
|
1079
|
+
body.stop = options.stop;
|
|
1080
|
+
}
|
|
1081
|
+
const { pool, gpu } = this.parseGpuParam(options.gpu);
|
|
1082
|
+
const headers = {
|
|
1083
|
+
Accept: "application/json",
|
|
1084
|
+
"Content-Type": JSON_CONTENT_TYPE,
|
|
1085
|
+
[SDK_VERSION_HEADER]: SDK_VERSION
|
|
1086
|
+
};
|
|
1087
|
+
if (pool) headers["X-SIE-Pool"] = pool;
|
|
1088
|
+
if (gpu) headers["X-SIE-MACHINE-PROFILE"] = gpu;
|
|
1089
|
+
if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
|
|
1090
|
+
const safeModel = model.replaceAll("/", "__");
|
|
1091
|
+
const url = `${this.baseUrl}/v1/generate/${encodeURIComponent(safeModel)}`;
|
|
1092
|
+
const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
|
|
1093
|
+
const response = await withProvisioningRetry(() => this.performJsonPost(url, body, headers), {
|
|
1094
|
+
model,
|
|
1095
|
+
gpu,
|
|
1096
|
+
waitForCapacity,
|
|
1097
|
+
provisionTimeoutMs: this.provisionTimeout
|
|
1098
|
+
});
|
|
1099
|
+
const data = await response.json();
|
|
1100
|
+
if (data === null || typeof data !== "object") {
|
|
1101
|
+
throw new RequestError("Unexpected generate response shape");
|
|
1102
|
+
}
|
|
1103
|
+
return parseGenerateResult(data);
|
|
1104
|
+
}
|
|
1105
|
+
/**
|
|
1106
|
+
* Per-attempt JSON POST used by the non-streaming surfaces
|
|
1107
|
+
* ({@link generate}, {@link chatCompletions}) inside the
|
|
1108
|
+
* {@link withProvisioningRetry} loop.
|
|
1109
|
+
*
|
|
1110
|
+
* Translates low-level transport failures into typed errors that the
|
|
1111
|
+
* retry loop will surface verbatim:
|
|
1112
|
+
* - `AbortError` → `SIEConnectionError` (per-attempt timeout)
|
|
1113
|
+
* - `TypeError` → `SIEConnectionError` (NOT retried — generation is
|
|
1114
|
+
* non-idempotent, so a mid-flight drop must surface instead of
|
|
1115
|
+
* silently re-issuing a billable generation)
|
|
1116
|
+
*
|
|
1117
|
+
* Each call uses a fresh `AbortController` so concurrent retries don't
|
|
1118
|
+
* share state, and the per-attempt timeout is bounded by `this.timeout`
|
|
1119
|
+
* (NOT the cumulative provisioning budget).
|
|
1120
|
+
*/
|
|
1121
|
+
async performJsonPost(url, body, headers) {
|
|
1122
|
+
const controller = new AbortController();
|
|
1123
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
1124
|
+
try {
|
|
1125
|
+
return await fetch(url, {
|
|
1126
|
+
method: "POST",
|
|
1127
|
+
headers,
|
|
1128
|
+
body: JSON.stringify(body),
|
|
1129
|
+
signal: controller.signal
|
|
1130
|
+
});
|
|
1131
|
+
} catch (err) {
|
|
1132
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
1133
|
+
throw new SIEConnectionError(`Request timeout after ${this.timeout}ms`, "timeout");
|
|
1134
|
+
}
|
|
1135
|
+
if (err instanceof TypeError) {
|
|
1136
|
+
throw new SIEConnectionError(`Connection failed: ${err.message}`, "connect");
|
|
1137
|
+
}
|
|
1138
|
+
throw err;
|
|
1139
|
+
} finally {
|
|
1140
|
+
clearTimeout(timeoutId);
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
/**
|
|
1144
|
+
* Non-streaming chat-completion call against `/v1/chat/completions`.
|
|
1145
|
+
*
|
|
1146
|
+
* This is the OpenAI-compatible surface. The request body is forwarded
|
|
1147
|
+
* verbatim as JSON, so any field documented at
|
|
1148
|
+
* <https://platform.openai.com/docs/api-reference/chat/create> can be set;
|
|
1149
|
+
* the gateway will reject fields it does not yet support with
|
|
1150
|
+
* `400 unsupported_field`. SIE-native routing hints (`routing_key`,
|
|
1151
|
+
* `prompt_cache_key`) are part of the same request shape.
|
|
1152
|
+
*
|
|
1153
|
+
* Error semantics mirror `generate()`: 4xx → `RequestError`, 5xx →
|
|
1154
|
+
* `ServerError` (or the more specific `ModelLoadFailedError` for 502
|
|
1155
|
+
* `MODEL_LOAD_FAILED`), connection / timeout failures →
|
|
1156
|
+
* `SIEConnectionError`.
|
|
1157
|
+
*
|
|
1158
|
+
* If `req.stream === true`, this method throws `RequestError` immediately —
|
|
1159
|
+
* use {@link streamChatCompletions} instead. We do not auto-route because
|
|
1160
|
+
* the return type is fundamentally different (`Promise` vs
|
|
1161
|
+
* `AsyncGenerator`) and silently flipping would mis-type the call site.
|
|
1162
|
+
*
|
|
1163
|
+
* @example
|
|
1164
|
+
* ```typescript
|
|
1165
|
+
* const reply = await client.chatCompletions({
|
|
1166
|
+
* model: "Qwen/Qwen3-4B-Instruct-2507",
|
|
1167
|
+
* messages: [{ role: "user", content: "Write a haiku about the sea." }],
|
|
1168
|
+
* max_completion_tokens: 64,
|
|
1169
|
+
* });
|
|
1170
|
+
* console.log(reply.choices[0]?.message.content);
|
|
1171
|
+
* ```
|
|
1172
|
+
*/
|
|
1173
|
+
async chatCompletions(req, options = {}) {
|
|
1174
|
+
if (req.stream === true) {
|
|
1175
|
+
throw new RequestError(
|
|
1176
|
+
"chatCompletions() cannot be used with stream:true \u2014 use streamChatCompletions() instead.",
|
|
1177
|
+
"invalid_request",
|
|
1178
|
+
400
|
|
1179
|
+
);
|
|
1180
|
+
}
|
|
1181
|
+
const body = { ...req, stream: false };
|
|
1182
|
+
const url = `${this.baseUrl}/v1/chat/completions`;
|
|
1183
|
+
const headers = this.buildChatHeaders("application/json");
|
|
1184
|
+
const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
|
|
1185
|
+
const provisionTimeoutMs = options.provisionTimeoutMs ?? this.provisionTimeout;
|
|
1186
|
+
const response = await withProvisioningRetry(() => this.performJsonPost(url, body, headers), {
|
|
1187
|
+
model: req.model,
|
|
1188
|
+
gpu: void 0,
|
|
1189
|
+
waitForCapacity,
|
|
1190
|
+
provisionTimeoutMs
|
|
1191
|
+
});
|
|
1192
|
+
this.checkServerVersion(response);
|
|
1193
|
+
const data = await response.json();
|
|
1194
|
+
if (data === null || typeof data !== "object") {
|
|
1195
|
+
throw new RequestError("Unexpected chat.completion response shape");
|
|
1196
|
+
}
|
|
1197
|
+
return data;
|
|
1198
|
+
}
|
|
1199
|
+
/**
|
|
1200
|
+
* Streaming chat-completion call against `/v1/chat/completions` with
|
|
1201
|
+
* `Accept: text/event-stream`.
|
|
1202
|
+
*
|
|
1203
|
+
* Yields `ChatCompletionChunk` events in the order the gateway emits them.
|
|
1204
|
+
* The terminal chunk carries `finish_reason`; if
|
|
1205
|
+
* `req.stream_options.include_usage === true`, a final usage-only chunk
|
|
1206
|
+
* (`choices: []`, populated `usage`) follows it. The generator completes
|
|
1207
|
+
* cleanly on the `data: [DONE]` sentinel.
|
|
1208
|
+
*
|
|
1209
|
+
* Error semantics:
|
|
1210
|
+
*
|
|
1211
|
+
* - HTTP 4xx / 5xx **before** the stream opens → throws `RequestError` /
|
|
1212
|
+
* `ServerError` (same as {@link chatCompletions}).
|
|
1213
|
+
* - A chunk containing `error: { ... }` mid-stream → throws
|
|
1214
|
+
* {@link SIEStreamError}. The error chunk is consumed, never yielded.
|
|
1215
|
+
* - `signal.abort()` mid-stream → the generator throws
|
|
1216
|
+
* `SIEConnectionError` and releases the underlying reader, which
|
|
1217
|
+
* fires `StreamCancelGuard` on the gateway side.
|
|
1218
|
+
*
|
|
1219
|
+
* `req.stream` is set to `true` automatically; any existing value is
|
|
1220
|
+
* overwritten. We do not validate `req.stream === false` because the
|
|
1221
|
+
* call-site intent is unambiguous.
|
|
1222
|
+
*
|
|
1223
|
+
* @param req The chat-completion request. See {@link ChatCompletionRequest}.
|
|
1224
|
+
* @param signal Optional `AbortSignal` for cooperative cancellation.
|
|
1225
|
+
*
|
|
1226
|
+
* @example
|
|
1227
|
+
* ```typescript
|
|
1228
|
+
* const controller = new AbortController();
|
|
1229
|
+
* try {
|
|
1230
|
+
* for await (const chunk of client.streamChatCompletions(
|
|
1231
|
+
* {
|
|
1232
|
+
* model: "Qwen/Qwen3-4B-Instruct-2507",
|
|
1233
|
+
* messages: [{ role: "user", content: "Count to ten." }],
|
|
1234
|
+
* stream_options: { include_usage: true },
|
|
1235
|
+
* },
|
|
1236
|
+
* controller.signal,
|
|
1237
|
+
* )) {
|
|
1238
|
+
* process.stdout.write(chunk.choices[0]?.delta.content ?? "");
|
|
1239
|
+
* }
|
|
1240
|
+
* } catch (err) {
|
|
1241
|
+
* if (err instanceof SIEStreamError) {
|
|
1242
|
+
* console.error(`mid-stream error: ${err.code} — ${err.message}`);
|
|
1243
|
+
* } else throw err;
|
|
1244
|
+
* }
|
|
1245
|
+
* ```
|
|
1246
|
+
*/
|
|
1247
|
+
async *streamChatCompletions(req, signal) {
|
|
1248
|
+
const body = { ...req, stream: true };
|
|
1249
|
+
const url = `${this.baseUrl}/v1/chat/completions`;
|
|
1250
|
+
yield* this.consumeSseStream(
|
|
1251
|
+
url,
|
|
1252
|
+
body,
|
|
1253
|
+
req.model,
|
|
1254
|
+
signal,
|
|
1255
|
+
(chunk) => extractChatChunkError(chunk)
|
|
1256
|
+
);
|
|
1257
|
+
}
|
|
1258
|
+
/**
|
|
1259
|
+
* Streaming companion to {@link generate} — opens an SSE connection to
|
|
1260
|
+
* `/v1/generate/{model}` with `stream: true` and yields the SIE-native
|
|
1261
|
+
* chunk shape documented in
|
|
1262
|
+
* `packages/sie_gateway/src/handlers/sse.rs::build_generate_chunk_event`.
|
|
1263
|
+
*
|
|
1264
|
+
* The first delta carries `seq: 0` and `text_delta` populated; the
|
|
1265
|
+
* terminal chunk has `done: true`, `finish_reason`, and (typically)
|
|
1266
|
+
* `usage` + `ttft_ms`. The generator completes on the `data: [DONE]`
|
|
1267
|
+
* sentinel.
|
|
1268
|
+
*
|
|
1269
|
+
* Error semantics match {@link streamChatCompletions}: pre-stream HTTP
|
|
1270
|
+
* errors throw normally, mid-stream `error` chunks throw
|
|
1271
|
+
* {@link SIEStreamError}.
|
|
1272
|
+
*
|
|
1273
|
+
* @example
|
|
1274
|
+
* ```typescript
|
|
1275
|
+
* for await (const chunk of client.streamGenerate(
|
|
1276
|
+
* "Qwen/Qwen3-4B-Instruct-2507",
|
|
1277
|
+
* "Write a haiku.",
|
|
1278
|
+
* { maxNewTokens: 64, temperature: 0.7 },
|
|
1279
|
+
* )) {
|
|
1280
|
+
* process.stdout.write(chunk.text_delta);
|
|
1281
|
+
* if (chunk.done) console.log(`\nTTFT: ${chunk.ttft_ms}ms`);
|
|
1282
|
+
* }
|
|
1283
|
+
* ```
|
|
1284
|
+
*/
|
|
1285
|
+
async *streamGenerate(model, prompt, options, signal) {
|
|
1286
|
+
const body = {
|
|
1287
|
+
prompt,
|
|
1288
|
+
max_new_tokens: options.maxNewTokens,
|
|
1289
|
+
temperature: options.temperature ?? 1,
|
|
1290
|
+
top_p: options.topP ?? 1,
|
|
1291
|
+
stream: true
|
|
1292
|
+
};
|
|
1293
|
+
if (options.stop !== void 0) body.stop = options.stop;
|
|
1294
|
+
const safeModel = model.replaceAll("/", "__");
|
|
1295
|
+
const url = `${this.baseUrl}/v1/generate/${encodeURIComponent(safeModel)}`;
|
|
1296
|
+
const { pool, gpu } = this.parseGpuParam(options.gpu);
|
|
1297
|
+
const waitForCapacity = options.waitForCapacity ?? this.defaultWaitForCapacity;
|
|
1298
|
+
yield* this.consumeSseStream(
|
|
1299
|
+
url,
|
|
1300
|
+
body,
|
|
1301
|
+
model,
|
|
1302
|
+
signal,
|
|
1303
|
+
(chunk) => extractGenerateChunkError(chunk),
|
|
1304
|
+
{ pool, gpu },
|
|
1305
|
+
{ waitForCapacity }
|
|
1306
|
+
);
|
|
1307
|
+
}
|
|
1308
|
+
/**
|
|
1309
|
+
* Shared SSE consumption helper for the streaming methods.
|
|
1310
|
+
*
|
|
1311
|
+
* Performs a pre-stream provisioning retry loop (honoring
|
|
1312
|
+
* `waitForCapacity`/`provisionTimeout`), surfaces pre-stream errors via
|
|
1313
|
+
* {@link handleError} (so callers see the same `RequestError` /
|
|
1314
|
+
* `ServerError` hierarchy as the non-streaming endpoints), then iterates
|
|
1315
|
+
* the SSE payloads via {@link parseSseStream}. Each payload is JSON-parsed;
|
|
1316
|
+
* if the consumer-supplied `extractError` returns an `SIEStreamError`, the
|
|
1317
|
+
* generator throws it instead of yielding the chunk.
|
|
1318
|
+
*
|
|
1319
|
+
* Retry policy mirrors {@link generate}: only the SAFE pre-execution
|
|
1320
|
+
* capacity signals — `202` (provisioning) and `503 MODEL_LOADING` — are
|
|
1321
|
+
* retried, and only while `waitForCapacity` is set and the provision
|
|
1322
|
+
* budget remains. Once the body opens we never retry (the call is
|
|
1323
|
+
* non-idempotent; a mid-stream failure must not re-issue generation).
|
|
1324
|
+
*
|
|
1325
|
+
* @internal
|
|
1326
|
+
*/
|
|
1327
|
+
async *consumeSseStream(url, body, model, signal, extractError, routing, provisioning) {
|
|
1328
|
+
const headers = this.buildChatHeaders("text/event-stream");
|
|
1329
|
+
if (routing?.pool) headers["X-SIE-Pool"] = routing.pool;
|
|
1330
|
+
if (routing?.gpu) headers["X-SIE-MACHINE-PROFILE"] = routing.gpu;
|
|
1331
|
+
const waitForCapacity = provisioning?.waitForCapacity ?? this.defaultWaitForCapacity;
|
|
1332
|
+
const gpu = routing?.gpu;
|
|
1333
|
+
const controller = new AbortController();
|
|
1334
|
+
const onCallerAbort = () => controller.abort();
|
|
1335
|
+
if (signal) {
|
|
1336
|
+
if (signal.aborted) {
|
|
1337
|
+
throw new SIEConnectionError("Stream aborted before request", "other");
|
|
1338
|
+
}
|
|
1339
|
+
signal.addEventListener("abort", onCallerAbort, { once: true });
|
|
1340
|
+
}
|
|
1341
|
+
try {
|
|
1342
|
+
const startTime = Date.now();
|
|
1343
|
+
let response;
|
|
1344
|
+
while (true) {
|
|
1345
|
+
if (signal?.aborted) {
|
|
1346
|
+
throw new SIEConnectionError("Stream aborted before request", "other");
|
|
1347
|
+
}
|
|
1348
|
+
const preStreamTimeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
1349
|
+
let attemptResponse;
|
|
1350
|
+
try {
|
|
1351
|
+
attemptResponse = await fetch(url, {
|
|
1352
|
+
method: "POST",
|
|
1353
|
+
headers,
|
|
1354
|
+
body: JSON.stringify(body),
|
|
1355
|
+
signal: controller.signal
|
|
1356
|
+
});
|
|
1357
|
+
} catch (error) {
|
|
1358
|
+
if (signal?.aborted) {
|
|
1359
|
+
throw new SIEConnectionError("Stream aborted before response", "other");
|
|
1360
|
+
}
|
|
1361
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
1362
|
+
throw new SIEConnectionError(`Stream open timeout after ${this.timeout}ms`, "timeout");
|
|
1363
|
+
}
|
|
1364
|
+
if (error instanceof TypeError) {
|
|
1365
|
+
throw new SIEConnectionError(`Connection failed: ${error.message}`, "connect");
|
|
1366
|
+
}
|
|
1367
|
+
throw error;
|
|
1368
|
+
} finally {
|
|
1369
|
+
clearTimeout(preStreamTimeoutId);
|
|
1370
|
+
}
|
|
1371
|
+
if (attemptResponse.status === HTTP_ACCEPTED) {
|
|
1372
|
+
if (!waitForCapacity) {
|
|
1373
|
+
throw new ProvisioningError(
|
|
1374
|
+
"No capacity available. Server is provisioning.",
|
|
1375
|
+
gpu,
|
|
1376
|
+
getRetryAfter2(attemptResponse)
|
|
1377
|
+
);
|
|
1378
|
+
}
|
|
1379
|
+
const elapsed = Date.now() - startTime;
|
|
1380
|
+
if (elapsed >= this.provisionTimeout) {
|
|
1381
|
+
throw new ProvisioningError(
|
|
1382
|
+
`Provisioning timeout after ${elapsed}ms`,
|
|
1383
|
+
gpu,
|
|
1384
|
+
getRetryAfter2(attemptResponse)
|
|
1385
|
+
);
|
|
1386
|
+
}
|
|
1387
|
+
const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
|
|
1388
|
+
if (await abortableSleep(
|
|
1389
|
+
Math.min(delay, this.provisionTimeout - elapsed),
|
|
1390
|
+
controller.signal
|
|
1391
|
+
)) {
|
|
1392
|
+
throw new SIEConnectionError("Stream aborted while provisioning", "other");
|
|
1393
|
+
}
|
|
1394
|
+
continue;
|
|
1395
|
+
}
|
|
1396
|
+
await throwIfModelLoadFailed(attemptResponse, model);
|
|
1397
|
+
if (attemptResponse.status === 503) {
|
|
1398
|
+
const errorCode = await getErrorCode(attemptResponse.clone());
|
|
1399
|
+
if (errorCode === MODEL_LOADING_ERROR_CODE && waitForCapacity) {
|
|
1400
|
+
const elapsed = Date.now() - startTime;
|
|
1401
|
+
if (elapsed >= this.provisionTimeout) {
|
|
1402
|
+
throw new ModelLoadingError(`Model loading timeout for '${model}'`, model);
|
|
1403
|
+
}
|
|
1404
|
+
const delay = getRetryAfter2(attemptResponse) ?? MODEL_LOADING_DEFAULT_DELAY;
|
|
1405
|
+
if (await abortableSleep(
|
|
1406
|
+
Math.min(delay, this.provisionTimeout - elapsed),
|
|
1407
|
+
controller.signal
|
|
1408
|
+
)) {
|
|
1409
|
+
throw new SIEConnectionError("Stream aborted while provisioning", "other");
|
|
1410
|
+
}
|
|
1411
|
+
continue;
|
|
1412
|
+
}
|
|
1413
|
+
if (waitForCapacity) {
|
|
1414
|
+
const elapsed = Date.now() - startTime;
|
|
1415
|
+
if (elapsed < this.provisionTimeout) {
|
|
1416
|
+
const delay = getRetryAfter2(attemptResponse) ?? DEFAULT_RETRY_DELAY;
|
|
1417
|
+
if (await abortableSleep(
|
|
1418
|
+
Math.min(delay, this.provisionTimeout - elapsed),
|
|
1419
|
+
controller.signal
|
|
1420
|
+
)) {
|
|
1421
|
+
throw new SIEConnectionError("Stream aborted while provisioning", "other");
|
|
1422
|
+
}
|
|
1423
|
+
continue;
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
if (attemptResponse.status !== 200) {
|
|
1428
|
+
await handleError(attemptResponse);
|
|
1429
|
+
}
|
|
1430
|
+
response = attemptResponse;
|
|
1431
|
+
break;
|
|
1432
|
+
}
|
|
1433
|
+
if (!response) {
|
|
1434
|
+
throw new RequestError("Streaming request failed without producing a response");
|
|
1435
|
+
}
|
|
1436
|
+
this.checkServerVersion(response);
|
|
1437
|
+
const bodyStream = response.body;
|
|
1438
|
+
if (!bodyStream) {
|
|
1439
|
+
throw new RequestError("Streaming response has no body");
|
|
1440
|
+
}
|
|
1441
|
+
const reader = bodyStream.getReader();
|
|
1442
|
+
for await (const payload of parseSseStream(reader, signal ?? controller.signal)) {
|
|
1443
|
+
let chunk;
|
|
1444
|
+
try {
|
|
1445
|
+
chunk = JSON.parse(payload);
|
|
1446
|
+
} catch (err) {
|
|
1447
|
+
throw new RequestError(
|
|
1448
|
+
`Failed to parse SSE chunk as JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
1449
|
+
);
|
|
1450
|
+
}
|
|
1451
|
+
const streamErr = extractError(chunk);
|
|
1452
|
+
if (streamErr) throw streamErr;
|
|
1453
|
+
yield chunk;
|
|
1454
|
+
}
|
|
1455
|
+
} finally {
|
|
1456
|
+
if (signal) signal.removeEventListener("abort", onCallerAbort);
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
/**
|
|
1460
|
+
* Build the standard JSON header set for the chat-completions surface.
|
|
1461
|
+
* Pulled out so both the streaming and non-streaming paths agree on
|
|
1462
|
+
* auth / version / content-type wiring.
|
|
1463
|
+
*/
|
|
1464
|
+
buildChatHeaders(accept) {
|
|
1465
|
+
const headers = {
|
|
1466
|
+
Accept: accept,
|
|
1467
|
+
"Content-Type": JSON_CONTENT_TYPE,
|
|
1468
|
+
[SDK_VERSION_HEADER]: SDK_VERSION
|
|
1469
|
+
};
|
|
1470
|
+
if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
|
|
1471
|
+
return headers;
|
|
1472
|
+
}
|
|
792
1473
|
async score(model, query, items, options = {}) {
|
|
793
1474
|
const body = {
|
|
794
1475
|
query,
|
|
@@ -880,17 +1561,18 @@ var SIEClient = class {
|
|
|
880
1561
|
this.pools.clear();
|
|
881
1562
|
}
|
|
882
1563
|
/**
|
|
883
|
-
* Create a resource pool for isolated capacity.
|
|
1564
|
+
* Create or update a resource pool for isolated capacity.
|
|
884
1565
|
*
|
|
885
1566
|
* Pools provide dedicated worker capacity, isolated from other clients.
|
|
886
1567
|
* Workers are assigned to pools and only serve requests from that pool.
|
|
887
1568
|
*
|
|
888
1569
|
* @param name - Pool name (used in GPU param as "poolName/machineProfile")
|
|
889
|
-
* @param gpus -
|
|
1570
|
+
* @param gpus - Optional machine profile requirements for pool readiness, e.g., { "l4": 2, "l4-spot": 1 }
|
|
1571
|
+
* @param gpuCaps - Optional maximum assigned workers per machine profile
|
|
890
1572
|
*
|
|
891
1573
|
* @example
|
|
892
1574
|
* ```typescript
|
|
893
|
-
* // Create a pool with 2 L4 GPUs
|
|
1575
|
+
* // Create or update a pool with 2 L4 GPUs
|
|
894
1576
|
* await client.createPool("eval-bench", { l4: 2 });
|
|
895
1577
|
*
|
|
896
1578
|
* // Use the pool for requests
|
|
@@ -900,11 +1582,17 @@ var SIEClient = class {
|
|
|
900
1582
|
* await client.deletePool("eval-bench");
|
|
901
1583
|
* ```
|
|
902
1584
|
*/
|
|
903
|
-
async createPool(name, gpus) {
|
|
904
|
-
|
|
905
|
-
|
|
1585
|
+
async createPool(name, gpus, gpuCaps) {
|
|
1586
|
+
const alreadyTracking = this.pools.has(name);
|
|
1587
|
+
const requestBody = {
|
|
1588
|
+
name
|
|
1589
|
+
};
|
|
1590
|
+
if (gpus !== void 0) {
|
|
1591
|
+
requestBody.gpus = gpus;
|
|
1592
|
+
}
|
|
1593
|
+
if (gpuCaps) {
|
|
1594
|
+
requestBody.gpu_caps = gpuCaps;
|
|
906
1595
|
}
|
|
907
|
-
const requestBody = { name, gpus };
|
|
908
1596
|
const url = `${this.baseUrl}/v1/pools`;
|
|
909
1597
|
const headers = {
|
|
910
1598
|
"Content-Type": JSON_CONTENT_TYPE,
|
|
@@ -932,6 +1620,9 @@ var SIEClient = class {
|
|
|
932
1620
|
}
|
|
933
1621
|
throw new PoolError(`Failed to create pool '${name}': ${errorMsg}`, name);
|
|
934
1622
|
}
|
|
1623
|
+
if (alreadyTracking || this.pools.has(name)) {
|
|
1624
|
+
return;
|
|
1625
|
+
}
|
|
935
1626
|
const abortController = new AbortController();
|
|
936
1627
|
const poolState = {
|
|
937
1628
|
timeoutId: null,
|
|
@@ -963,7 +1654,7 @@ var SIEClient = class {
|
|
|
963
1654
|
signal: perAttempt.signal
|
|
964
1655
|
});
|
|
965
1656
|
if (resp.ok) break;
|
|
966
|
-
} catch
|
|
1657
|
+
} catch {
|
|
967
1658
|
if (abortController.signal.aborted) return;
|
|
968
1659
|
} finally {
|
|
969
1660
|
clearTimeout(attemptTimeout);
|
|
@@ -1200,7 +1891,7 @@ var SIEClient = class {
|
|
|
1200
1891
|
}
|
|
1201
1892
|
const remaining = timeout - elapsed;
|
|
1202
1893
|
const delay = Math.min(pollInterval, remaining);
|
|
1203
|
-
await
|
|
1894
|
+
await sleep2(delay);
|
|
1204
1895
|
}
|
|
1205
1896
|
}
|
|
1206
1897
|
/**
|
|
@@ -1227,7 +1918,7 @@ var SIEClient = class {
|
|
|
1227
1918
|
if (elapsed < this.provisionTimeout) {
|
|
1228
1919
|
const remaining = this.provisionTimeout - elapsed;
|
|
1229
1920
|
const delay = Math.min(DEFAULT_RETRY_DELAY, remaining);
|
|
1230
|
-
await
|
|
1921
|
+
await sleep2(delay);
|
|
1231
1922
|
continue;
|
|
1232
1923
|
}
|
|
1233
1924
|
}
|
|
@@ -1253,7 +1944,7 @@ var SIEClient = class {
|
|
|
1253
1944
|
const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
|
|
1254
1945
|
const remaining = this.provisionTimeout - elapsed;
|
|
1255
1946
|
const actualDelay = Math.min(delay, remaining);
|
|
1256
|
-
await
|
|
1947
|
+
await sleep2(actualDelay);
|
|
1257
1948
|
continue;
|
|
1258
1949
|
}
|
|
1259
1950
|
await throwIfModelLoadFailed(response, model);
|
|
@@ -1273,7 +1964,7 @@ var SIEClient = class {
|
|
|
1273
1964
|
}
|
|
1274
1965
|
const retryAfter = getRetryAfter2(response);
|
|
1275
1966
|
const delay = retryAfter ?? LORA_LOADING_DEFAULT_DELAY;
|
|
1276
|
-
await
|
|
1967
|
+
await sleep2(delay);
|
|
1277
1968
|
continue;
|
|
1278
1969
|
}
|
|
1279
1970
|
if (errorCode === MODEL_LOADING_ERROR_CODE) {
|
|
@@ -1288,7 +1979,7 @@ var SIEClient = class {
|
|
|
1288
1979
|
const delay = retryAfter ?? MODEL_LOADING_DEFAULT_DELAY;
|
|
1289
1980
|
const remaining = this.provisionTimeout - elapsed;
|
|
1290
1981
|
const actualDelay = Math.min(delay, remaining);
|
|
1291
|
-
await
|
|
1982
|
+
await sleep2(actualDelay);
|
|
1292
1983
|
continue;
|
|
1293
1984
|
}
|
|
1294
1985
|
if (waitForCapacity) {
|
|
@@ -1298,7 +1989,7 @@ var SIEClient = class {
|
|
|
1298
1989
|
const delay = retryAfter ?? DEFAULT_RETRY_DELAY;
|
|
1299
1990
|
const remaining = this.provisionTimeout - elapsed;
|
|
1300
1991
|
const actualDelay = Math.min(delay, remaining);
|
|
1301
|
-
await
|
|
1992
|
+
await sleep2(actualDelay);
|
|
1302
1993
|
continue;
|
|
1303
1994
|
}
|
|
1304
1995
|
}
|
|
@@ -1579,6 +2270,6 @@ function detectImageFormat(bytes) {
|
|
|
1579
2270
|
return "unknown";
|
|
1580
2271
|
}
|
|
1581
2272
|
|
|
1582
|
-
export { InputTooLongError, LoraLoadingError, ModelLoadFailedError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
|
2273
|
+
export { InputTooLongError, LoraLoadingError, ModelLoadFailedError, ModelLoadingError, PoolError, ProvisioningError, RequestError, SDK_VERSION, SIEClient, SIEConnectionError, SIEError, SIEStreamError, ServerError, denseEmbedding, detectImageFormat, maxsim, maxsimBatch, maxsimDocuments, multivectorEmbedding, normalizeSparseVector, packMessage, sparseEmbedding, sparseEmbeddingMap, toFloat32Array, toImageBytes, toImageWireFormat, toNumberArray, unpackMessage };
|
|
1583
2274
|
//# sourceMappingURL=index.js.map
|
|
1584
2275
|
//# sourceMappingURL=index.js.map
|