@infersec/conduit 1.73.0 → 1.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -19921,6 +19921,7 @@ const LLMModelFormatSchema = _enum([
|
|
|
19921
19921
|
// Llama.cpp
|
|
19922
19922
|
"gguf"
|
|
19923
19923
|
]);
|
|
19924
|
+
const LLMModelTaskTypeSchema = _enum(["text-generation", "embeddings"]);
|
|
19924
19925
|
const LLMModelSchema = object({
|
|
19925
19926
|
format: LLMModelFormatSchema,
|
|
19926
19927
|
id: string$1().min(1),
|
|
@@ -19935,7 +19936,8 @@ const LLMModelSchema = object({
|
|
|
19935
19936
|
slug: string$1().min(1),
|
|
19936
19937
|
type: literal("huggingface")
|
|
19937
19938
|
})
|
|
19938
|
-
])
|
|
19939
|
+
]),
|
|
19940
|
+
taskType: LLMModelTaskTypeSchema
|
|
19939
19941
|
});
|
|
19940
19942
|
object({
|
|
19941
19943
|
filePath: string$1().min(1),
|
|
@@ -20643,6 +20645,34 @@ const CompletionCreateParamsSchema = object({
|
|
|
20643
20645
|
top_p: number$1().min(0).max(1).nullable().optional(),
|
|
20644
20646
|
user: string$1().optional()
|
|
20645
20647
|
});
|
|
20648
|
+
// ==================== EMBEDDINGS ====================
|
|
20649
|
+
const EmbeddingCreateParamsSchema = object({
|
|
20650
|
+
dimensions: number$1().int().positive().nullable().optional(),
|
|
20651
|
+
encoding_format: _enum(["float", "base64"]).nullable().optional(),
|
|
20652
|
+
input: union([
|
|
20653
|
+
string$1(),
|
|
20654
|
+
array(string$1()),
|
|
20655
|
+
array(number$1()),
|
|
20656
|
+
array(array(number$1()))
|
|
20657
|
+
]),
|
|
20658
|
+
model: string$1(),
|
|
20659
|
+
user: string$1().optional()
|
|
20660
|
+
});
|
|
20661
|
+
const EmbeddingUsageSchema = object({
|
|
20662
|
+
prompt_tokens: number$1(),
|
|
20663
|
+
total_tokens: number$1()
|
|
20664
|
+
});
|
|
20665
|
+
const EmbeddingDataSchema = object({
|
|
20666
|
+
embedding: array(number$1()),
|
|
20667
|
+
index: number$1(),
|
|
20668
|
+
object: literal("embedding")
|
|
20669
|
+
});
|
|
20670
|
+
object({
|
|
20671
|
+
data: array(EmbeddingDataSchema),
|
|
20672
|
+
model: string$1(),
|
|
20673
|
+
object: literal("list"),
|
|
20674
|
+
usage: EmbeddingUsageSchema
|
|
20675
|
+
});
|
|
20646
20676
|
|
|
20647
20677
|
const API_CLIENT_CONDUIT_GENERAL_REFERENCE = {
|
|
20648
20678
|
"/conduit/engine/start": {
|
|
@@ -20708,6 +20738,17 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
|
|
|
20708
20738
|
}
|
|
20709
20739
|
}
|
|
20710
20740
|
},
|
|
20741
|
+
"/v1/embeddings": {
|
|
20742
|
+
POST: {
|
|
20743
|
+
auth: {
|
|
20744
|
+
type: "shared-secret"
|
|
20745
|
+
},
|
|
20746
|
+
body: EmbeddingCreateParamsSchema,
|
|
20747
|
+
response: {
|
|
20748
|
+
type: "text-stream"
|
|
20749
|
+
}
|
|
20750
|
+
}
|
|
20751
|
+
},
|
|
20711
20752
|
"/v1/models": {
|
|
20712
20753
|
GET: {
|
|
20713
20754
|
auth: {
|
|
@@ -20743,6 +20784,12 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
|
|
|
20743
20784
|
endpointID: ULIDSchema.describe("Endpoint identifier")
|
|
20744
20785
|
}}
|
|
20745
20786
|
},
|
|
20787
|
+
"/api/inferencing/:endpointID/oai/v1/embeddings": {
|
|
20788
|
+
POST: {
|
|
20789
|
+
parameters: {
|
|
20790
|
+
endpointID: ULIDSchema.describe("Endpoint identifier")
|
|
20791
|
+
}}
|
|
20792
|
+
},
|
|
20746
20793
|
"/api/inferencing/:endpointID/oai/v1/models": {
|
|
20747
20794
|
GET: {
|
|
20748
20795
|
parameters: {
|
|
@@ -20771,7 +20818,8 @@ object({
|
|
|
20771
20818
|
.min(3)
|
|
20772
20819
|
.refine(value => value.includes("/"), {
|
|
20773
20820
|
message: "Slug must be fully qualified (owner/repo)"
|
|
20774
|
-
})
|
|
20821
|
+
}),
|
|
20822
|
+
taskType: LLMModelTaskTypeSchema.optional()
|
|
20775
20823
|
});
|
|
20776
20824
|
object({
|
|
20777
20825
|
results: array(object({
|
|
@@ -20782,6 +20830,7 @@ object({
|
|
|
20782
20830
|
name: string$1(),
|
|
20783
20831
|
provider: _enum(["storage", "huggingface"]),
|
|
20784
20832
|
providerSlug: string$1(),
|
|
20833
|
+
taskType: LLMModelTaskTypeSchema,
|
|
20785
20834
|
updated: string$1()
|
|
20786
20835
|
}))
|
|
20787
20836
|
});
|
|
@@ -20802,11 +20851,13 @@ object({
|
|
|
20802
20851
|
name: string$1(),
|
|
20803
20852
|
updated: string$1()
|
|
20804
20853
|
})),
|
|
20854
|
+
taskType: LLMModelTaskTypeSchema,
|
|
20805
20855
|
updated: string$1()
|
|
20806
20856
|
});
|
|
20807
20857
|
object({
|
|
20858
|
+
multimodalEnabled: boolean$1().optional(),
|
|
20808
20859
|
name: ResourceNameSchema.optional(),
|
|
20809
|
-
|
|
20860
|
+
taskType: LLMModelTaskTypeSchema.optional()
|
|
20810
20861
|
});
|
|
20811
20862
|
object({
|
|
20812
20863
|
success: literal(true)
|
|
@@ -20851,7 +20902,8 @@ object({
|
|
|
20851
20902
|
modelFormat: LLMModelFormatSchema,
|
|
20852
20903
|
name: string$1(),
|
|
20853
20904
|
provider: _enum(["storage", "huggingface"]),
|
|
20854
|
-
providerSlug: string$1()
|
|
20905
|
+
providerSlug: string$1(),
|
|
20906
|
+
taskType: LLMModelTaskTypeSchema
|
|
20855
20907
|
})
|
|
20856
20908
|
.nullable(),
|
|
20857
20909
|
modelQuantizationLabel: string$1().nullable(),
|
|
@@ -114830,6 +114882,9 @@ async function startVLLM({ enginePort, targetDirectory }) {
|
|
|
114830
114882
|
"--tensor-parallel-size",
|
|
114831
114883
|
String(tensorParallelSize)
|
|
114832
114884
|
];
|
|
114885
|
+
if (this.model.taskType === "embeddings") {
|
|
114886
|
+
args.push("--task", "embed");
|
|
114887
|
+
}
|
|
114833
114888
|
if (device) {
|
|
114834
114889
|
args.push("--device", device);
|
|
114835
114890
|
}
|
|
@@ -116583,6 +116638,9 @@ async function startLlamacpp({ enginePort, targetDirectory }) {
|
|
|
116583
116638
|
"--ctx-size",
|
|
116584
116639
|
String(contextLength)
|
|
116585
116640
|
];
|
|
116641
|
+
if (this.model.taskType === "embeddings") {
|
|
116642
|
+
args.push("--embedding");
|
|
116643
|
+
}
|
|
116586
116644
|
const gpuLayers = typeof engineConfig?.gpuLayers === "number"
|
|
116587
116645
|
? engineConfig.gpuLayers
|
|
116588
116646
|
: Number.parseInt(process.env.LLAMACPP_GPU_LAYERS ?? String(DEFAULT_LLAMACPP_GPU_LAYERS), 10);
|
|
@@ -117688,6 +117746,153 @@ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
|
|
|
117688
117746
|
}
|
|
117689
117747
|
return Math.round(tokensPerSecond);
|
|
117690
117748
|
}
|
|
117749
|
+
async function proxyEmbeddingsRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }) {
|
|
117750
|
+
function normalizeTokenCount(value) {
|
|
117751
|
+
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
117752
|
+
return value;
|
|
117753
|
+
}
|
|
117754
|
+
return 0;
|
|
117755
|
+
}
|
|
117756
|
+
function reportMetricsSafe(payload) {
|
|
117757
|
+
reportMetrics(payload).catch(error => {
|
|
117758
|
+
logger.warn("Failed to upload LLM prompt metrics", {
|
|
117759
|
+
error: asError(error),
|
|
117760
|
+
requestUrl: "/v1/embeddings"
|
|
117761
|
+
});
|
|
117762
|
+
});
|
|
117763
|
+
}
|
|
117764
|
+
const engineType = conduitConfiguration.engineConfig?.type ?? null;
|
|
117765
|
+
const engineConfig = conduitConfiguration.engineConfig?.config ?? null;
|
|
117766
|
+
const serializedBody = isPlainObject$2(body)
|
|
117767
|
+
? JSON.stringify(body)
|
|
117768
|
+
: typeof body === "string"
|
|
117769
|
+
? body
|
|
117770
|
+
: JSON.stringify(body);
|
|
117771
|
+
const requestBodyBytes = Buffer.byteLength(serializedBody, "utf8");
|
|
117772
|
+
const requestStartedAt = Date.now();
|
|
117773
|
+
let upstreamResponseOk = true;
|
|
117774
|
+
const onMonitoringComplete = ({ durationMs, error, responseBytes, usage }) => {
|
|
117775
|
+
const promptTokens = normalizeTokenCount(usage?.promptTokens);
|
|
117776
|
+
const totalTokens = normalizeTokenCount(usage?.totalTokens ?? promptTokens);
|
|
117777
|
+
const latencyMs = Math.max(0, durationMs);
|
|
117778
|
+
reportMetricsSafe({
|
|
117779
|
+
bytes: requestBodyBytes + responseBytes,
|
|
117780
|
+
completionTokens: 0,
|
|
117781
|
+
engine: engineType,
|
|
117782
|
+
endpointId: endpointId ?? null,
|
|
117783
|
+
latencyMs,
|
|
117784
|
+
modelId: modelID,
|
|
117785
|
+
promptTokens,
|
|
117786
|
+
requestBytes: requestBodyBytes,
|
|
117787
|
+
requestId: null,
|
|
117788
|
+
requestMethod: "POST",
|
|
117789
|
+
requestPath: "/v1/embeddings",
|
|
117790
|
+
responseBytes,
|
|
117791
|
+
successful: upstreamResponseOk && !error,
|
|
117792
|
+
timeToFirstTokenMs: null,
|
|
117793
|
+
tokensPerSecond: calculateTokensPerSecond$2({
|
|
117794
|
+
durationMs: latencyMs,
|
|
117795
|
+
totalTokens
|
|
117796
|
+
}),
|
|
117797
|
+
totalTokens
|
|
117798
|
+
});
|
|
117799
|
+
};
|
|
117800
|
+
const response = await modelManager
|
|
117801
|
+
.fetchOpenAI("/v1/embeddings", {
|
|
117802
|
+
body: serializedBody,
|
|
117803
|
+
headers: {
|
|
117804
|
+
"Content-Type": "application/json"
|
|
117805
|
+
},
|
|
117806
|
+
method: "POST",
|
|
117807
|
+
signal
|
|
117808
|
+
})
|
|
117809
|
+
.catch(error => {
|
|
117810
|
+
const err = asError(error);
|
|
117811
|
+
logEngineMetrics({
|
|
117812
|
+
agentEngineType: engineType ?? "unknown",
|
|
117813
|
+
error: err,
|
|
117814
|
+
level: "error",
|
|
117815
|
+
logger,
|
|
117816
|
+
requestBodyBytes,
|
|
117817
|
+
requestPath: "/v1/embeddings",
|
|
117818
|
+
responseBytes: 0,
|
|
117819
|
+
usage: null
|
|
117820
|
+
});
|
|
117821
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
117822
|
+
reportMetricsSafe({
|
|
117823
|
+
bytes: requestBodyBytes,
|
|
117824
|
+
completionTokens: 0,
|
|
117825
|
+
engine: engineType,
|
|
117826
|
+
endpointId: endpointId ?? null,
|
|
117827
|
+
latencyMs,
|
|
117828
|
+
modelId: modelID,
|
|
117829
|
+
promptTokens: 0,
|
|
117830
|
+
requestBytes: requestBodyBytes,
|
|
117831
|
+
requestId: null,
|
|
117832
|
+
requestMethod: "POST",
|
|
117833
|
+
requestPath: "/v1/embeddings",
|
|
117834
|
+
responseBytes: 0,
|
|
117835
|
+
successful: false,
|
|
117836
|
+
timeToFirstTokenMs: null,
|
|
117837
|
+
tokensPerSecond: 0,
|
|
117838
|
+
totalTokens: 0
|
|
117839
|
+
});
|
|
117840
|
+
throw err;
|
|
117841
|
+
});
|
|
117842
|
+
upstreamResponseOk = response.ok;
|
|
117843
|
+
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
117844
|
+
if (!response.body) {
|
|
117845
|
+
logEngineMetrics({
|
|
117846
|
+
agentEngineType: engineType ?? "unknown",
|
|
117847
|
+
level: response.ok ? "info" : "error",
|
|
117848
|
+
logger,
|
|
117849
|
+
requestBodyBytes,
|
|
117850
|
+
requestPath: "/v1/embeddings",
|
|
117851
|
+
responseBytes: 0,
|
|
117852
|
+
usage: null
|
|
117853
|
+
});
|
|
117854
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
117855
|
+
reportMetricsSafe({
|
|
117856
|
+
bytes: requestBodyBytes,
|
|
117857
|
+
completionTokens: 0,
|
|
117858
|
+
engine: engineType,
|
|
117859
|
+
endpointId: endpointId ?? null,
|
|
117860
|
+
latencyMs,
|
|
117861
|
+
modelId: modelID,
|
|
117862
|
+
promptTokens: 0,
|
|
117863
|
+
requestBytes: requestBodyBytes,
|
|
117864
|
+
requestId: null,
|
|
117865
|
+
requestMethod: "POST",
|
|
117866
|
+
requestPath: "/v1/embeddings",
|
|
117867
|
+
responseBytes: 0,
|
|
117868
|
+
successful: false,
|
|
117869
|
+
timeToFirstTokenMs: null,
|
|
117870
|
+
tokensPerSecond: 0,
|
|
117871
|
+
totalTokens: 0
|
|
117872
|
+
});
|
|
117873
|
+
return {
|
|
117874
|
+
status: response.status,
|
|
117875
|
+
statusText: responseStatusText
|
|
117876
|
+
};
|
|
117877
|
+
}
|
|
117878
|
+
const monitoredResponse = monitorEngineResponseSingle({
|
|
117879
|
+
agentEngineType: engineType ?? "unknown",
|
|
117880
|
+
body: Readable.fromWeb(response.body),
|
|
117881
|
+
contextLength: modelManager.contextLength,
|
|
117882
|
+
engineConfig,
|
|
117883
|
+
engineType: engineType ?? "unknown",
|
|
117884
|
+
logger,
|
|
117885
|
+
onComplete: onMonitoringComplete,
|
|
117886
|
+
requestBodyBytes,
|
|
117887
|
+
requestPath: "/v1/embeddings",
|
|
117888
|
+
requestStartedAt
|
|
117889
|
+
});
|
|
117890
|
+
return {
|
|
117891
|
+
body: monitoredResponse.stream,
|
|
117892
|
+
headers: Object.fromEntries(response.headers.entries()),
|
|
117893
|
+
status: response.status
|
|
117894
|
+
};
|
|
117895
|
+
}
|
|
117691
117896
|
async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }) {
|
|
117692
117897
|
function normalizeTokenCount(value) {
|
|
117693
117898
|
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
@@ -117710,6 +117915,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
|
|
|
117710
117915
|
const requestStartedAt = Date.now();
|
|
117711
117916
|
const requestBody = JSON.parse(serializedBody);
|
|
117712
117917
|
const streamRequested = requestBody.stream === true;
|
|
117918
|
+
let upstreamResponseOk = true;
|
|
117713
117919
|
const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
|
|
117714
117920
|
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
117715
117921
|
const promptTokens = normalizeTokenCount(usage?.promptTokens);
|
|
@@ -117728,7 +117934,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
|
|
|
117728
117934
|
requestMethod: "POST",
|
|
117729
117935
|
requestPath: path,
|
|
117730
117936
|
responseBytes,
|
|
117731
|
-
successful: !error,
|
|
117937
|
+
successful: upstreamResponseOk && !error,
|
|
117732
117938
|
timeToFirstTokenMs,
|
|
117733
117939
|
tokensPerSecond: calculateTokensPerSecond$2({
|
|
117734
117940
|
durationMs: latencyMs,
|
|
@@ -117779,6 +117985,7 @@ async function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointI
|
|
|
117779
117985
|
});
|
|
117780
117986
|
throw err;
|
|
117781
117987
|
});
|
|
117988
|
+
upstreamResponseOk = response.ok;
|
|
117782
117989
|
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
117783
117990
|
if (!response.ok) {
|
|
117784
117991
|
if (!response.body) {
|
|
@@ -117923,6 +118130,26 @@ function createConduitOpenAIAPIReferenceHandlers({ apiClient, conduitConfigurati
|
|
|
117923
118130
|
});
|
|
117924
118131
|
}
|
|
117925
118132
|
},
|
|
118133
|
+
"/v1/embeddings": {
|
|
118134
|
+
POST: async ({ body, req, res }) => {
|
|
118135
|
+
const modelID = getModelID();
|
|
118136
|
+
const modelManager = getModelManager();
|
|
118137
|
+
const abortController = new AbortController();
|
|
118138
|
+
res.on("close", () => {
|
|
118139
|
+
abortController.abort();
|
|
118140
|
+
});
|
|
118141
|
+
return proxyEmbeddingsRoute({
|
|
118142
|
+
body,
|
|
118143
|
+
conduitConfiguration: conduitConfiguration(),
|
|
118144
|
+
endpointId: extractEndpointId$1(req),
|
|
118145
|
+
logger,
|
|
118146
|
+
modelID,
|
|
118147
|
+
modelManager,
|
|
118148
|
+
reportMetrics: apiClient.reportPromptMetrics,
|
|
118149
|
+
signal: abortController.signal
|
|
118150
|
+
});
|
|
118151
|
+
}
|
|
118152
|
+
},
|
|
117926
118153
|
"/v1/models": {
|
|
117927
118154
|
GET: async () => {
|
|
117928
118155
|
const modelManager = getModelManager();
|
|
@@ -117962,6 +118189,9 @@ function createPostChatCompletionsHandler(options) {
|
|
|
117962
118189
|
function createPostCompletionsHandler(options) {
|
|
117963
118190
|
return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
|
|
117964
118191
|
}
|
|
118192
|
+
function createPostEmbeddingsHandler(options) {
|
|
118193
|
+
return createConduitOpenAIAPIReferenceHandlers(options)["/v1/embeddings"].POST;
|
|
118194
|
+
}
|
|
117965
118195
|
|
|
117966
118196
|
function isPlainObject$1(value) {
|
|
117967
118197
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -128707,6 +128937,17 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
128707
128937
|
startup
|
|
128708
128938
|
})
|
|
128709
128939
|
},
|
|
128940
|
+
"/v1/embeddings": {
|
|
128941
|
+
POST: createPostEmbeddingsHandler({
|
|
128942
|
+
apiClient,
|
|
128943
|
+
conduitConfiguration: () => conduitConfiguration,
|
|
128944
|
+
configuration,
|
|
128945
|
+
getModelID: () => conduitConfiguration.targetModel.id,
|
|
128946
|
+
getModelManager: () => modelManager,
|
|
128947
|
+
logger,
|
|
128948
|
+
startup
|
|
128949
|
+
})
|
|
128950
|
+
},
|
|
128710
128951
|
"/v1/models": {
|
|
128711
128952
|
GET: createGetModelsHandler({
|
|
128712
128953
|
apiClient,
|
|
@@ -129711,8 +129952,9 @@ class HuggingFaceClient {
|
|
|
129711
129952
|
}
|
|
129712
129953
|
}
|
|
129713
129954
|
}
|
|
129714
|
-
const
|
|
129715
|
-
|
|
129955
|
+
const taskPriority = new Map();
|
|
129956
|
+
pipelineTasks.forEach((task, index) => taskPriority.set(task, index));
|
|
129957
|
+
const modelsById = new Map();
|
|
129716
129958
|
await Promise.all(queries.map(async ({ task, tag }) => {
|
|
129717
129959
|
const searchParams = {
|
|
129718
129960
|
accessToken: this.apiKey ?? undefined,
|
|
@@ -129724,9 +129966,6 @@ class HuggingFaceClient {
|
|
|
129724
129966
|
}
|
|
129725
129967
|
};
|
|
129726
129968
|
for await (const entry of executeListWithRetry(searchParams)) {
|
|
129727
|
-
if (seenIds.has(entry.id)) {
|
|
129728
|
-
continue;
|
|
129729
|
-
}
|
|
129730
129969
|
const entryForUtils = {
|
|
129731
129970
|
config: entry.config,
|
|
129732
129971
|
gated: entry.gated,
|
|
@@ -129742,10 +129981,15 @@ class HuggingFaceClient {
|
|
|
129742
129981
|
if (targetFormats.length > 0 && !targetFormats.includes(format)) {
|
|
129743
129982
|
continue;
|
|
129744
129983
|
}
|
|
129745
|
-
|
|
129984
|
+
const existing = modelsById.get(entry.id);
|
|
129985
|
+
if (existing &&
|
|
129986
|
+
(taskPriority.get(task) ?? Number.MAX_SAFE_INTEGER) >=
|
|
129987
|
+
(taskPriority.get(existing.pipelineTask) ?? Number.MAX_SAFE_INTEGER)) {
|
|
129988
|
+
continue;
|
|
129989
|
+
}
|
|
129746
129990
|
const parameterCount = parseParameterCount(entry.id, entry.safetensors?.parameters);
|
|
129747
129991
|
const slug = entry.name?.trim() || entry.id;
|
|
129748
|
-
|
|
129992
|
+
modelsById.set(entry.id, {
|
|
129749
129993
|
downloads: entry.downloads,
|
|
129750
129994
|
format,
|
|
129751
129995
|
gated: entry.gated || false,
|
|
@@ -129753,13 +129997,14 @@ class HuggingFaceClient {
|
|
|
129753
129997
|
likes: entry.likes,
|
|
129754
129998
|
name: entry.name || entry.id,
|
|
129755
129999
|
parameterCount,
|
|
130000
|
+
pipelineTask: task,
|
|
129756
130001
|
quantization: extractQuantization(entryForUtils),
|
|
129757
130002
|
slug,
|
|
129758
130003
|
updatedAt: entry.updatedAt
|
|
129759
130004
|
});
|
|
129760
130005
|
}
|
|
129761
130006
|
}));
|
|
129762
|
-
return
|
|
130007
|
+
return Array.from(modelsById.values());
|
|
129763
130008
|
}
|
|
129764
130009
|
}
|
|
129765
130010
|
|
|
@@ -209,4 +209,34 @@ export declare function createPostCompletionsHandler(options: {
|
|
|
209
209
|
status: number;
|
|
210
210
|
statusText: string;
|
|
211
211
|
}>;
|
|
212
|
+
export declare function createPostEmbeddingsHandler(options: {
|
|
213
|
+
apiClient: APIClient;
|
|
214
|
+
conduitConfiguration: () => InferenceAgentConfiguration;
|
|
215
|
+
configuration: Configuration;
|
|
216
|
+
getModelID: () => string;
|
|
217
|
+
getModelManager: () => ModelManager;
|
|
218
|
+
logger: Logger;
|
|
219
|
+
startup: number;
|
|
220
|
+
}): (params: {
|
|
221
|
+
req: APIRequest;
|
|
222
|
+
res: import("@infersec/fetch").APIResponse;
|
|
223
|
+
parameters: Record<string, never>;
|
|
224
|
+
query: Record<string, never>;
|
|
225
|
+
body: {
|
|
226
|
+
input: string | number[] | string[] | number[][];
|
|
227
|
+
model: string;
|
|
228
|
+
dimensions?: number | null | undefined;
|
|
229
|
+
encoding_format?: "base64" | "float" | null | undefined;
|
|
230
|
+
user?: string | undefined;
|
|
231
|
+
};
|
|
232
|
+
responseSchema: undefined;
|
|
233
|
+
}) => Promise<{
|
|
234
|
+
body: import("stream").Readable;
|
|
235
|
+
headers?: Record<string, string>;
|
|
236
|
+
status: number;
|
|
237
|
+
} | {
|
|
238
|
+
headers?: Record<string, string>;
|
|
239
|
+
status: number;
|
|
240
|
+
statusText: string;
|
|
241
|
+
}>;
|
|
212
242
|
export {};
|
package/dist/utils/openai.d.ts
CHANGED
|
@@ -3,6 +3,23 @@ import { InferenceAgentConfiguration, InferenceAgentLLMMetricsPayload, type ULID
|
|
|
3
3
|
import { Logger } from "@infersec/logger";
|
|
4
4
|
import { Configuration } from "../configuration.js";
|
|
5
5
|
import { ModelManager } from "../modelManagement/ModelManager.js";
|
|
6
|
+
export declare function proxyEmbeddingsRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, reportMetrics, signal }: {
|
|
7
|
+
body: unknown;
|
|
8
|
+
conduitConfiguration: InferenceAgentConfiguration;
|
|
9
|
+
endpointId?: ULID | null;
|
|
10
|
+
logger: Logger;
|
|
11
|
+
modelID: ULID;
|
|
12
|
+
modelManager: ModelManager;
|
|
13
|
+
reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
|
|
14
|
+
signal?: AbortSignal;
|
|
15
|
+
}): Promise<{
|
|
16
|
+
body: Readable;
|
|
17
|
+
headers: Record<string, string>;
|
|
18
|
+
status: number;
|
|
19
|
+
} | {
|
|
20
|
+
status: number;
|
|
21
|
+
statusText: string;
|
|
22
|
+
}>;
|
|
6
23
|
export declare function proxyOpenAIStreamingRoute({ body, conduitConfiguration, endpointId, logger, modelID, modelManager, path, reportMetrics, signal }: {
|
|
7
24
|
body: unknown;
|
|
8
25
|
conduitConfiguration: InferenceAgentConfiguration;
|