@infersec/conduit 1.28.0 → 1.28.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/requestHandlers/createConduitGeneralAPIReferenceHandlers.d.ts +92 -0
- package/dist/requestHandlers/createConduitOpenAIAPIReferenceHandlers.d.ts +208 -0
- package/dist/requestHandlers/createHealthHandler.d.ts +2 -0
- package/dist/{start-DtAR3bT0.js → start-CyERvKjg.js} +1494 -1414
- package/package.json +1 -1
|
@@ -12,7 +12,7 @@ import require$$3$1 from 'assert';
|
|
|
12
12
|
import require$$4$1 from 'events';
|
|
13
13
|
import require$$1$1 from 'stream';
|
|
14
14
|
import crypto, { createHash } from 'node:crypto';
|
|
15
|
-
import require$$0$7, { Readable, Transform, getDefaultHighWaterMark, Duplex, Writable
|
|
15
|
+
import require$$0$7, { Readable, Transform, PassThrough, getDefaultHighWaterMark, Duplex, Writable } from 'node:stream';
|
|
16
16
|
import 'argon2';
|
|
17
17
|
import { spawn, ChildProcess, execFile, spawnSync } from 'node:child_process';
|
|
18
18
|
import require$$0$6 from 'node:assert';
|
|
@@ -108649,352 +108649,551 @@ class ModelManager extends EventEmitter {
|
|
|
108649
108649
|
}
|
|
108650
108650
|
}
|
|
108651
108651
|
|
|
108652
|
-
|
|
108653
|
-
|
|
108654
|
-
|
|
108655
|
-
|
|
108656
|
-
|
|
108657
|
-
|
|
108658
|
-
|
|
108659
|
-
|
|
108660
|
-
|
|
108661
|
-
|
|
108662
|
-
}
|
|
108663
|
-
|
|
108664
|
-
|
|
108665
|
-
|
|
108666
|
-
|
|
108667
|
-
|
|
108668
|
-
|
|
108669
|
-
|
|
108670
|
-
|
|
108652
|
+
function createConduitGeneralAPIReferenceHandlers({ cycleEngine, conduitStateManager, getModelManager, logger, setErrorState, startEngine, stopEngine, stopRequestedByControl }) {
|
|
108653
|
+
return {
|
|
108654
|
+
"/conduit/engine/start": {
|
|
108655
|
+
POST: async () => {
|
|
108656
|
+
const modelManager = getModelManager();
|
|
108657
|
+
if (conduitStateManager.getState().state !== "idle") {
|
|
108658
|
+
return {
|
|
108659
|
+
status: 409,
|
|
108660
|
+
statusText: "Engine can only be started from idle state"
|
|
108661
|
+
};
|
|
108662
|
+
}
|
|
108663
|
+
if (!modelManager.canStart) {
|
|
108664
|
+
return {
|
|
108665
|
+
status: 409,
|
|
108666
|
+
statusText: `Engine cannot be started from current state: ${modelManager.state}`
|
|
108667
|
+
};
|
|
108668
|
+
}
|
|
108669
|
+
try {
|
|
108670
|
+
logger.info("Received remote engine start request");
|
|
108671
|
+
await startEngine();
|
|
108672
|
+
return {
|
|
108673
|
+
body: {
|
|
108674
|
+
acknowledged: true
|
|
108675
|
+
},
|
|
108676
|
+
status: 202
|
|
108677
|
+
};
|
|
108678
|
+
}
|
|
108679
|
+
catch (error) {
|
|
108680
|
+
if (stopRequestedByControl() || modelManager.state === "stopped") {
|
|
108681
|
+
return {
|
|
108682
|
+
status: 409,
|
|
108683
|
+
statusText: "Engine start was interrupted"
|
|
108684
|
+
};
|
|
108671
108685
|
}
|
|
108672
|
-
const
|
|
108673
|
-
|
|
108674
|
-
|
|
108675
|
-
|
|
108676
|
-
|
|
108677
|
-
|
|
108678
|
-
|
|
108679
|
-
onRequestEnd,
|
|
108680
|
-
onRequestStart,
|
|
108681
|
-
reportMetrics,
|
|
108682
|
-
request: payload,
|
|
108683
|
-
signal
|
|
108684
|
-
}).catch(error => {
|
|
108685
|
-
logger.error("SSE request handler failed", {
|
|
108686
|
-
error: asError(error),
|
|
108687
|
-
requestMethod: payload.requestID
|
|
108688
|
-
});
|
|
108689
|
-
});
|
|
108690
|
-
},
|
|
108691
|
-
signal
|
|
108692
|
-
});
|
|
108693
|
-
}
|
|
108694
|
-
catch (error) {
|
|
108695
|
-
if (signal?.aborted) {
|
|
108696
|
-
return;
|
|
108686
|
+
const parsedError = asError(error);
|
|
108687
|
+
setErrorState({ error: parsedError.message });
|
|
108688
|
+
return {
|
|
108689
|
+
status: 500,
|
|
108690
|
+
statusText: parsedError.message
|
|
108691
|
+
};
|
|
108692
|
+
}
|
|
108697
108693
|
}
|
|
108698
|
-
|
|
108699
|
-
|
|
108700
|
-
|
|
108701
|
-
|
|
108702
|
-
|
|
108694
|
+
},
|
|
108695
|
+
"/conduit/engine/stop": {
|
|
108696
|
+
POST: async () => {
|
|
108697
|
+
const modelManager = getModelManager();
|
|
108698
|
+
const sourceState = conduitStateManager.getState().state;
|
|
108699
|
+
if (sourceState !== "bootingEngine" && sourceState !== "online") {
|
|
108700
|
+
return {
|
|
108701
|
+
status: 409,
|
|
108702
|
+
statusText: "Engine can only be stopped while booting or online"
|
|
108703
|
+
};
|
|
108704
|
+
}
|
|
108705
|
+
if (!modelManager.canStop) {
|
|
108706
|
+
return {
|
|
108707
|
+
status: 409,
|
|
108708
|
+
statusText: `Engine cannot be stopped from current state: ${modelManager.state}`
|
|
108709
|
+
};
|
|
108710
|
+
}
|
|
108711
|
+
logger.info("Received remote engine stop request");
|
|
108712
|
+
stopEngine({
|
|
108713
|
+
reason: "Remote shutdown requested"
|
|
108714
|
+
}).catch(error => {
|
|
108715
|
+
const parsedError = asError(error);
|
|
108716
|
+
logger.error("Remote engine stop request failed", {
|
|
108717
|
+
error: parsedError
|
|
108718
|
+
});
|
|
108719
|
+
setErrorState({ error: parsedError.message });
|
|
108703
108720
|
});
|
|
108721
|
+
return {
|
|
108722
|
+
body: {
|
|
108723
|
+
acknowledged: true
|
|
108724
|
+
},
|
|
108725
|
+
status: 202
|
|
108726
|
+
};
|
|
108704
108727
|
}
|
|
108705
|
-
|
|
108706
|
-
|
|
108728
|
+
},
|
|
108729
|
+
"/conduit/engine/cycle": {
|
|
108730
|
+
POST: async () => {
|
|
108731
|
+
const modelManager = getModelManager();
|
|
108732
|
+
const sourceState = conduitStateManager.getState().state;
|
|
108733
|
+
if (sourceState !== "bootingEngine" &&
|
|
108734
|
+
sourceState !== "online" &&
|
|
108735
|
+
sourceState !== "idle") {
|
|
108736
|
+
return {
|
|
108737
|
+
status: 409,
|
|
108738
|
+
statusText: "Engine can only be cycled while booting, online, or idle"
|
|
108739
|
+
};
|
|
108740
|
+
}
|
|
108741
|
+
if (sourceState !== "idle" && !modelManager.canStop) {
|
|
108742
|
+
return {
|
|
108743
|
+
status: 409,
|
|
108744
|
+
statusText: `Engine cannot be cycled from current state: ${modelManager.state}`
|
|
108745
|
+
};
|
|
108746
|
+
}
|
|
108747
|
+
try {
|
|
108748
|
+
logger.info("Received remote engine cycle request");
|
|
108749
|
+
await cycleEngine();
|
|
108750
|
+
return {
|
|
108751
|
+
body: {
|
|
108752
|
+
acknowledged: true
|
|
108753
|
+
},
|
|
108754
|
+
status: 202
|
|
108755
|
+
};
|
|
108756
|
+
}
|
|
108757
|
+
catch (error) {
|
|
108758
|
+
const parsedError = asError(error);
|
|
108759
|
+
setErrorState({ error: parsedError.message });
|
|
108760
|
+
return {
|
|
108761
|
+
status: 500,
|
|
108762
|
+
statusText: parsedError.message
|
|
108763
|
+
};
|
|
108764
|
+
}
|
|
108707
108765
|
}
|
|
108708
|
-
|
|
108709
|
-
|
|
108710
|
-
|
|
108711
|
-
|
|
108712
|
-
|
|
108713
|
-
|
|
108766
|
+
}
|
|
108767
|
+
};
|
|
108768
|
+
}
|
|
108769
|
+
function createPostCycleEngineHandler(options) {
|
|
108770
|
+
return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/cycle"].POST;
|
|
108771
|
+
}
|
|
108772
|
+
function createPostStartEngineHandler(options) {
|
|
108773
|
+
return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/start"].POST;
|
|
108774
|
+
}
|
|
108775
|
+
function createPostStopEngineHandler(options) {
|
|
108776
|
+
return createConduitGeneralAPIReferenceHandlers(options)["/conduit/engine/stop"].POST;
|
|
108777
|
+
}
|
|
108778
|
+
|
|
108779
|
+
/**
|
|
108780
|
+
* Coerce non-string tool_calls function.arguments to JSON strings.
|
|
108781
|
+
* Some LLM backends return arguments as parsed objects instead of
|
|
108782
|
+
* JSON strings, violating the OpenAI spec. This mutates in place
|
|
108783
|
+
* and returns true if any coercion was performed.
|
|
108784
|
+
*/
|
|
108785
|
+
function coerceToolCallArguments(parsed) {
|
|
108786
|
+
const choices = parsed.choices;
|
|
108787
|
+
if (!Array.isArray(choices))
|
|
108788
|
+
return false;
|
|
108789
|
+
let modified = false;
|
|
108790
|
+
for (const choice of choices) {
|
|
108791
|
+
if (!choice || typeof choice !== "object")
|
|
108792
|
+
continue;
|
|
108793
|
+
const choiceRecord = choice;
|
|
108794
|
+
const msg = choiceRecord.delta ?? choiceRecord.message;
|
|
108795
|
+
if (!msg || typeof msg !== "object")
|
|
108796
|
+
continue;
|
|
108797
|
+
const toolCalls = msg.tool_calls;
|
|
108798
|
+
if (!Array.isArray(toolCalls))
|
|
108799
|
+
continue;
|
|
108800
|
+
for (const tc of toolCalls) {
|
|
108801
|
+
if (!tc || typeof tc !== "object")
|
|
108802
|
+
continue;
|
|
108803
|
+
const fn = tc.function;
|
|
108804
|
+
if (!fn || typeof fn !== "object")
|
|
108805
|
+
continue;
|
|
108806
|
+
const fnRecord = fn;
|
|
108807
|
+
if (fnRecord.arguments !== undefined && typeof fnRecord.arguments !== "string") {
|
|
108808
|
+
fnRecord.arguments = JSON.stringify(fnRecord.arguments);
|
|
108809
|
+
modified = true;
|
|
108714
108810
|
}
|
|
108715
108811
|
}
|
|
108716
108812
|
}
|
|
108813
|
+
return modified;
|
|
108717
108814
|
}
|
|
108718
|
-
|
|
108719
|
-
|
|
108720
|
-
|
|
108721
|
-
logger.warn("Failed to upload LLM prompt metrics", {
|
|
108722
|
-
error: asError(error),
|
|
108723
|
-
requestUrl: request.path
|
|
108724
|
-
});
|
|
108725
|
-
});
|
|
108726
|
-
}
|
|
108727
|
-
const requestStartedAt = Date.now();
|
|
108728
|
-
const requestBytes = calculateRequestBytes(request.body ?? null);
|
|
108729
|
-
try {
|
|
108730
|
-
await onRequestStart?.(request);
|
|
108731
|
-
const response = await onRequest(request);
|
|
108732
|
-
const responseMetrics = await streamResponse({
|
|
108733
|
-
apiURL,
|
|
108734
|
-
configuration,
|
|
108735
|
-
logger,
|
|
108736
|
-
requestID: request.requestID,
|
|
108737
|
-
requestStartedAt,
|
|
108738
|
-
response,
|
|
108739
|
-
signal
|
|
108740
|
-
});
|
|
108741
|
-
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108742
|
-
const totalTokens = 0;
|
|
108743
|
-
const tokensPerSecond = calculateTokensPerSecond$1({
|
|
108744
|
-
durationMs: latencyMs,
|
|
108745
|
-
totalTokens
|
|
108746
|
-
});
|
|
108747
|
-
reportMetricsSafe({
|
|
108748
|
-
bytes: requestBytes + responseMetrics.responseBytes,
|
|
108749
|
-
completionTokens: 0,
|
|
108750
|
-
engine: configuration.agentEngineType,
|
|
108751
|
-
endpointId: null,
|
|
108752
|
-
latencyMs,
|
|
108753
|
-
modelId: modelID,
|
|
108754
|
-
promptTokens: 0,
|
|
108755
|
-
requestBytes,
|
|
108756
|
-
requestId: request.requestID,
|
|
108757
|
-
requestMethod: request.method,
|
|
108758
|
-
requestPath: request.path,
|
|
108759
|
-
responseBytes: responseMetrics.responseBytes,
|
|
108760
|
-
successful: responseMetrics.status < 400,
|
|
108761
|
-
timeToFirstTokenMs: responseMetrics.timeToFirstTokenMs,
|
|
108762
|
-
tokensPerSecond,
|
|
108763
|
-
totalTokens
|
|
108764
|
-
});
|
|
108765
|
-
}
|
|
108766
|
-
catch (error) {
|
|
108767
|
-
logger.error("SSE request failed", {
|
|
108768
|
-
error: asError(error),
|
|
108769
|
-
requestMethod: request.requestID
|
|
108770
|
-
});
|
|
108771
|
-
const failureMessage = "Bad gateway\n\nProxying failed";
|
|
108772
|
-
const failureBytes = Buffer.byteLength(failureMessage, "utf8");
|
|
108773
|
-
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108774
|
-
const totalTokens = 0;
|
|
108775
|
-
const tokensPerSecond = calculateTokensPerSecond$1({
|
|
108776
|
-
durationMs: latencyMs,
|
|
108777
|
-
totalTokens
|
|
108778
|
-
});
|
|
108779
|
-
const streamHandler = await sendChunkStream({
|
|
108780
|
-
apiURL,
|
|
108781
|
-
configuration,
|
|
108782
|
-
requestID: request.requestID,
|
|
108783
|
-
logger
|
|
108784
|
-
});
|
|
108785
|
-
await streamHandler.sendChunk({
|
|
108786
|
-
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
108787
|
-
sequence: 0,
|
|
108788
|
-
status: 502
|
|
108789
|
-
});
|
|
108790
|
-
await streamHandler.sendChunk({
|
|
108791
|
-
data: null,
|
|
108792
|
-
sequence: 1,
|
|
108793
|
-
status: 502
|
|
108794
|
-
});
|
|
108795
|
-
await streamHandler.end();
|
|
108796
|
-
reportMetricsSafe({
|
|
108797
|
-
bytes: requestBytes + failureBytes,
|
|
108798
|
-
completionTokens: 0,
|
|
108799
|
-
engine: configuration.agentEngineType,
|
|
108800
|
-
endpointId: null,
|
|
108801
|
-
latencyMs,
|
|
108802
|
-
modelId: modelID,
|
|
108803
|
-
promptTokens: 0,
|
|
108804
|
-
requestBytes,
|
|
108805
|
-
requestId: request.requestID,
|
|
108806
|
-
requestMethod: request.method,
|
|
108807
|
-
requestPath: request.path,
|
|
108808
|
-
responseBytes: failureBytes,
|
|
108809
|
-
successful: false,
|
|
108810
|
-
timeToFirstTokenMs: latencyMs,
|
|
108811
|
-
tokensPerSecond,
|
|
108812
|
-
totalTokens
|
|
108813
|
-
});
|
|
108815
|
+
function isEngineUsageChunk(value) {
|
|
108816
|
+
if (!value || typeof value !== "object") {
|
|
108817
|
+
return false;
|
|
108814
108818
|
}
|
|
108815
|
-
|
|
108816
|
-
|
|
108819
|
+
const record = value;
|
|
108820
|
+
if (!record.usage || typeof record.usage !== "object") {
|
|
108821
|
+
return false;
|
|
108817
108822
|
}
|
|
108823
|
+
return true;
|
|
108818
108824
|
}
|
|
108819
|
-
|
|
108820
|
-
|
|
108825
|
+
function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
108826
|
+
const startedAt = requestStartedAt ?? Date.now();
|
|
108827
|
+
const passThrough = new PassThrough();
|
|
108821
108828
|
let responseBytes = 0;
|
|
108822
|
-
let
|
|
108823
|
-
|
|
108824
|
-
|
|
108825
|
-
|
|
108826
|
-
|
|
108827
|
-
|
|
108828
|
-
|
|
108829
|
-
|
|
108830
|
-
for
|
|
108831
|
-
|
|
108832
|
-
|
|
108833
|
-
|
|
108829
|
+
let firstChunkAt = null;
|
|
108830
|
+
let usage = null;
|
|
108831
|
+
let buffer = "";
|
|
108832
|
+
let completed = false;
|
|
108833
|
+
function modifyChunkWithUsage(chunk) {
|
|
108834
|
+
const text = chunk.toString("utf8");
|
|
108835
|
+
const lines = text.split("\n");
|
|
108836
|
+
const modifiedLines = [];
|
|
108837
|
+
for (const rawLine of lines) {
|
|
108838
|
+
const line = rawLine.trim();
|
|
108839
|
+
if (!line.startsWith("data:")) {
|
|
108840
|
+
modifiedLines.push(rawLine);
|
|
108841
|
+
continue;
|
|
108834
108842
|
}
|
|
108835
|
-
const
|
|
108836
|
-
|
|
108837
|
-
|
|
108838
|
-
|
|
108839
|
-
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108843
|
+
const payload = line.slice(5).trim();
|
|
108844
|
+
if (!payload || payload === "[DONE]") {
|
|
108845
|
+
modifiedLines.push(rawLine);
|
|
108846
|
+
continue;
|
|
108840
108847
|
}
|
|
108841
|
-
responseBytes += buffer.length;
|
|
108842
|
-
await streamHandler.sendChunk({
|
|
108843
|
-
data: encodeBinaryChunk(buffer),
|
|
108844
|
-
sequence,
|
|
108845
|
-
status: response.status
|
|
108846
|
-
});
|
|
108847
|
-
sequence += 1;
|
|
108848
|
-
}
|
|
108849
|
-
await streamHandler.sendChunk({
|
|
108850
|
-
data: null,
|
|
108851
|
-
sequence,
|
|
108852
|
-
status: response.status
|
|
108853
|
-
});
|
|
108854
|
-
await streamHandler.end();
|
|
108855
|
-
return {
|
|
108856
|
-
responseBytes,
|
|
108857
|
-
status: response.status,
|
|
108858
|
-
timeToFirstTokenMs
|
|
108859
|
-
};
|
|
108860
|
-
}
|
|
108861
|
-
const responsePayload = response.body
|
|
108862
|
-
? typeof response.body === "string"
|
|
108863
|
-
? response.body
|
|
108864
|
-
: JSON.stringify(response.body)
|
|
108865
|
-
: "";
|
|
108866
|
-
if (responsePayload.length > 0) {
|
|
108867
|
-
responseBytes = Buffer.byteLength(responsePayload, "utf8");
|
|
108868
|
-
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
108869
|
-
}
|
|
108870
|
-
await streamHandler.sendChunk({
|
|
108871
|
-
data: encodeBinaryChunk(Buffer.from(responsePayload)),
|
|
108872
|
-
headers: response.headers,
|
|
108873
|
-
sequence,
|
|
108874
|
-
status: response.status
|
|
108875
|
-
});
|
|
108876
|
-
await streamHandler.sendChunk({
|
|
108877
|
-
data: null,
|
|
108878
|
-
sequence: sequence + 1,
|
|
108879
|
-
status: response.status
|
|
108880
|
-
});
|
|
108881
|
-
await streamHandler.end();
|
|
108882
|
-
logger.info("SSE response queued", {
|
|
108883
|
-
requestMethod: requestID
|
|
108884
|
-
});
|
|
108885
|
-
return {
|
|
108886
|
-
responseBytes,
|
|
108887
|
-
status: response.status,
|
|
108888
|
-
timeToFirstTokenMs
|
|
108889
|
-
};
|
|
108890
|
-
}
|
|
108891
|
-
function encodeBinaryChunk(chunk) {
|
|
108892
|
-
return chunk.toString("base64");
|
|
108893
|
-
}
|
|
108894
|
-
async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
|
|
108895
|
-
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
|
|
108896
|
-
const maxFlushAttempts = 3;
|
|
108897
|
-
let isAborted = false;
|
|
108898
|
-
let isClosed = false;
|
|
108899
|
-
let activeAbortController = null;
|
|
108900
|
-
const chunks = [];
|
|
108901
|
-
const sendChunk = async (payload) => {
|
|
108902
|
-
if (isAborted || isClosed) {
|
|
108903
|
-
return;
|
|
108904
|
-
}
|
|
108905
|
-
const response = ClientToServerAPIResponseSchema.parse({
|
|
108906
|
-
data: payload.data,
|
|
108907
|
-
headers: payload.headers,
|
|
108908
|
-
requestID,
|
|
108909
|
-
status: payload.status
|
|
108910
|
-
});
|
|
108911
|
-
const chunk = JSON.stringify({
|
|
108912
|
-
...response,
|
|
108913
|
-
sequence: payload.sequence
|
|
108914
|
-
});
|
|
108915
|
-
chunks.push(Buffer.from(chunk + "\n"));
|
|
108916
|
-
if (chunks.length >= 10) {
|
|
108917
|
-
await flushChunks();
|
|
108918
|
-
}
|
|
108919
|
-
};
|
|
108920
|
-
const flushChunks = async () => {
|
|
108921
|
-
if (chunks.length === 0 || isAborted) {
|
|
108922
|
-
return;
|
|
108923
|
-
}
|
|
108924
|
-
const batch = chunks.splice(0, chunks.length);
|
|
108925
|
-
const body = Buffer.concat(batch);
|
|
108926
|
-
for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
|
|
108927
108848
|
try {
|
|
108928
|
-
|
|
108929
|
-
|
|
108930
|
-
|
|
108931
|
-
|
|
108932
|
-
"content-type": "application/json",
|
|
108933
|
-
"x-api-key": configuration.apiKey
|
|
108934
|
-
},
|
|
108935
|
-
method: "POST",
|
|
108936
|
-
signal: activeAbortController.signal
|
|
108937
|
-
}, {
|
|
108938
|
-
maxAttempts: 2,
|
|
108939
|
-
timeoutMs: 15000
|
|
108940
|
-
});
|
|
108941
|
-
if (!response.ok) {
|
|
108942
|
-
throw new Error(`Chunk stream flush failed with status ${response.status}`);
|
|
108849
|
+
const parsed = JSON.parse(payload);
|
|
108850
|
+
let modified = false;
|
|
108851
|
+
if (coerceToolCallArguments(parsed)) {
|
|
108852
|
+
modified = true;
|
|
108943
108853
|
}
|
|
108944
|
-
|
|
108945
|
-
|
|
108946
|
-
|
|
108947
|
-
|
|
108948
|
-
|
|
108854
|
+
if (parsed.usage) {
|
|
108855
|
+
const usageChunk = parsed.usage;
|
|
108856
|
+
const effectiveContext = getEffectiveContextLength({
|
|
108857
|
+
contextLength,
|
|
108858
|
+
engine,
|
|
108859
|
+
parallelism
|
|
108860
|
+
});
|
|
108861
|
+
if (usageChunk.context_usage === undefined &&
|
|
108862
|
+
usageChunk.prompt_tokens !== undefined &&
|
|
108863
|
+
effectiveContext !== null) {
|
|
108864
|
+
usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
|
|
108865
|
+
modified = true;
|
|
108866
|
+
}
|
|
108949
108867
|
}
|
|
108950
|
-
if (
|
|
108951
|
-
|
|
108952
|
-
|
|
108868
|
+
if (modified) {
|
|
108869
|
+
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
108870
|
+
continue;
|
|
108953
108871
|
}
|
|
108954
|
-
logger.warn("Failed to send chunk batch", {
|
|
108955
|
-
...getNetworkErrorAttributes(error),
|
|
108956
|
-
error: asError(error)
|
|
108957
|
-
});
|
|
108958
|
-
await sleep(100 * attempt);
|
|
108959
108872
|
}
|
|
108960
|
-
|
|
108961
|
-
|
|
108873
|
+
catch (_error) {
|
|
108874
|
+
// Ignore malformed chunks
|
|
108962
108875
|
}
|
|
108876
|
+
modifiedLines.push(rawLine);
|
|
108963
108877
|
}
|
|
108964
|
-
|
|
108965
|
-
|
|
108966
|
-
|
|
108967
|
-
|
|
108878
|
+
return Buffer.from(modifiedLines.join("\n"), "utf8");
|
|
108879
|
+
}
|
|
108880
|
+
function parseUsageFromBuffer() {
|
|
108881
|
+
const lines = buffer.split("\n");
|
|
108882
|
+
buffer = lines.pop() ?? "";
|
|
108883
|
+
for (const rawLine of lines) {
|
|
108884
|
+
const line = rawLine.trim();
|
|
108885
|
+
if (!line.startsWith("data:")) {
|
|
108886
|
+
continue;
|
|
108887
|
+
}
|
|
108888
|
+
const payload = line.slice(5).trim();
|
|
108889
|
+
if (!payload || payload === "[DONE]") {
|
|
108890
|
+
continue;
|
|
108891
|
+
}
|
|
108892
|
+
try {
|
|
108893
|
+
const parsed = JSON.parse(payload);
|
|
108894
|
+
if (isEngineUsageChunk(parsed)) {
|
|
108895
|
+
const completionTokens = parsed.usage?.completion_tokens ?? null;
|
|
108896
|
+
const promptTokens = parsed.usage?.prompt_tokens ?? null;
|
|
108897
|
+
const totalTokens = parsed.usage?.total_tokens ?? null;
|
|
108898
|
+
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
108899
|
+
const effectiveContextForUsage = getEffectiveContextLength({
|
|
108900
|
+
contextLength,
|
|
108901
|
+
engine,
|
|
108902
|
+
parallelism
|
|
108903
|
+
});
|
|
108904
|
+
if (contextUsage === null &&
|
|
108905
|
+
promptTokens !== null &&
|
|
108906
|
+
effectiveContextForUsage !== null) {
|
|
108907
|
+
contextUsage = promptTokens / effectiveContextForUsage;
|
|
108908
|
+
}
|
|
108909
|
+
usage = {
|
|
108910
|
+
completionTokens,
|
|
108911
|
+
contextUsage,
|
|
108912
|
+
promptTokens,
|
|
108913
|
+
totalTokens
|
|
108914
|
+
};
|
|
108915
|
+
}
|
|
108916
|
+
}
|
|
108917
|
+
catch (_error) {
|
|
108918
|
+
// Ignore malformed chunks
|
|
108919
|
+
}
|
|
108968
108920
|
}
|
|
108969
|
-
|
|
108970
|
-
|
|
108971
|
-
|
|
108972
|
-
|
|
108973
|
-
isAborted = true;
|
|
108974
|
-
if (activeAbortController) {
|
|
108975
|
-
activeAbortController.abort();
|
|
108921
|
+
}
|
|
108922
|
+
function finalize(error) {
|
|
108923
|
+
if (completed) {
|
|
108924
|
+
return;
|
|
108976
108925
|
}
|
|
108977
|
-
|
|
108978
|
-
if (
|
|
108979
|
-
|
|
108980
|
-
|
|
108926
|
+
completed = true;
|
|
108927
|
+
if (onComplete) {
|
|
108928
|
+
const completion = onComplete({
|
|
108929
|
+
durationMs: Math.max(0, Date.now() - startedAt),
|
|
108930
|
+
error,
|
|
108931
|
+
requestBodyBytes,
|
|
108932
|
+
responseBytes,
|
|
108933
|
+
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
108934
|
+
usage
|
|
108981
108935
|
});
|
|
108936
|
+
if (completion && typeof completion.catch === "function") {
|
|
108937
|
+
completion.catch(error => {
|
|
108938
|
+
logger.error("Engine metrics completion failed", {
|
|
108939
|
+
error: asError(error),
|
|
108940
|
+
requestUrl: requestPath
|
|
108941
|
+
});
|
|
108942
|
+
});
|
|
108943
|
+
}
|
|
108982
108944
|
}
|
|
108983
|
-
};
|
|
108984
|
-
return {
|
|
108985
|
-
sendChunk,
|
|
108986
|
-
end,
|
|
108987
|
-
abort
|
|
108988
|
-
};
|
|
108989
|
-
}
|
|
108990
|
-
function calculateRequestBytes(body) {
|
|
108991
|
-
if (body === null || body === undefined) {
|
|
108992
|
-
return 0;
|
|
108993
108945
|
}
|
|
108994
|
-
|
|
108995
|
-
|
|
108946
|
+
body.on("data", (chunk) => {
|
|
108947
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
108948
|
+
if (firstChunkAt === null) {
|
|
108949
|
+
firstChunkAt = Date.now();
|
|
108950
|
+
}
|
|
108951
|
+
responseBytes += chunkBuffer.length;
|
|
108952
|
+
buffer += chunkBuffer.toString("utf8");
|
|
108953
|
+
parseUsageFromBuffer();
|
|
108954
|
+
passThrough.write(modifyChunkWithUsage(chunkBuffer));
|
|
108955
|
+
});
|
|
108956
|
+
body.once("error", err => {
|
|
108957
|
+
logEngineMetrics({
|
|
108958
|
+
agentEngineType,
|
|
108959
|
+
error: err,
|
|
108960
|
+
level: "error",
|
|
108961
|
+
logger,
|
|
108962
|
+
requestBodyBytes,
|
|
108963
|
+
requestPath,
|
|
108964
|
+
responseBytes,
|
|
108965
|
+
usage
|
|
108966
|
+
});
|
|
108967
|
+
finalize(err);
|
|
108968
|
+
passThrough.destroy(err);
|
|
108969
|
+
});
|
|
108970
|
+
body.once("end", () => {
|
|
108971
|
+
parseUsageFromBuffer();
|
|
108972
|
+
logEngineMetrics({
|
|
108973
|
+
agentEngineType,
|
|
108974
|
+
level: "info",
|
|
108975
|
+
logger,
|
|
108976
|
+
requestBodyBytes,
|
|
108977
|
+
requestPath,
|
|
108978
|
+
responseBytes,
|
|
108979
|
+
usage
|
|
108980
|
+
});
|
|
108981
|
+
finalize(null);
|
|
108982
|
+
passThrough.end();
|
|
108983
|
+
});
|
|
108984
|
+
body.once("close", () => {
|
|
108985
|
+
if (completed) {
|
|
108986
|
+
if (!passThrough.writableEnded) {
|
|
108987
|
+
passThrough.end();
|
|
108988
|
+
}
|
|
108989
|
+
return;
|
|
108990
|
+
}
|
|
108991
|
+
const closeError = new Error("Engine response stream closed before completion");
|
|
108992
|
+
logEngineMetrics({
|
|
108993
|
+
agentEngineType,
|
|
108994
|
+
error: closeError,
|
|
108995
|
+
level: "error",
|
|
108996
|
+
logger,
|
|
108997
|
+
requestBodyBytes,
|
|
108998
|
+
requestPath,
|
|
108999
|
+
responseBytes,
|
|
109000
|
+
usage
|
|
109001
|
+
});
|
|
109002
|
+
finalize(closeError);
|
|
109003
|
+
if (!passThrough.writableEnded) {
|
|
109004
|
+
passThrough.end();
|
|
109005
|
+
}
|
|
109006
|
+
});
|
|
109007
|
+
return {
|
|
109008
|
+
stream: passThrough
|
|
109009
|
+
};
|
|
109010
|
+
}
|
|
109011
|
+
function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
109012
|
+
const maxUsageCaptureBytes = 1024 * 1024;
|
|
109013
|
+
const startedAt = requestStartedAt ?? Date.now();
|
|
109014
|
+
const passThrough = new PassThrough();
|
|
109015
|
+
let responseBytes = 0;
|
|
109016
|
+
let firstChunkAt = null;
|
|
109017
|
+
let usage = null;
|
|
109018
|
+
const usageChunks = [];
|
|
109019
|
+
let usageBytes = 0;
|
|
109020
|
+
let usageCaptureEnabled = true;
|
|
109021
|
+
let completed = false;
|
|
109022
|
+
function finalize(error) {
|
|
109023
|
+
if (completed) {
|
|
109024
|
+
return;
|
|
109025
|
+
}
|
|
109026
|
+
completed = true;
|
|
109027
|
+
if (onComplete) {
|
|
109028
|
+
const completion = onComplete({
|
|
109029
|
+
durationMs: Math.max(0, Date.now() - startedAt),
|
|
109030
|
+
error,
|
|
109031
|
+
requestBodyBytes,
|
|
109032
|
+
responseBytes,
|
|
109033
|
+
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
109034
|
+
usage
|
|
109035
|
+
});
|
|
109036
|
+
if (completion && typeof completion.catch === "function") {
|
|
109037
|
+
completion.catch(error => {
|
|
109038
|
+
logger.error("Engine metrics completion failed", {
|
|
109039
|
+
error: asError(error),
|
|
109040
|
+
requestUrl: requestPath
|
|
109041
|
+
});
|
|
109042
|
+
});
|
|
109043
|
+
}
|
|
109044
|
+
}
|
|
108996
109045
|
}
|
|
108997
|
-
|
|
109046
|
+
body.on("data", (chunk) => {
|
|
109047
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
109048
|
+
if (firstChunkAt === null) {
|
|
109049
|
+
firstChunkAt = Date.now();
|
|
109050
|
+
}
|
|
109051
|
+
responseBytes += chunkBuffer.length;
|
|
109052
|
+
if (usageCaptureEnabled) {
|
|
109053
|
+
const nextSize = usageBytes + chunkBuffer.length;
|
|
109054
|
+
if (nextSize <= maxUsageCaptureBytes) {
|
|
109055
|
+
usageChunks.push(chunkBuffer);
|
|
109056
|
+
usageBytes = nextSize;
|
|
109057
|
+
}
|
|
109058
|
+
else {
|
|
109059
|
+
usageCaptureEnabled = false;
|
|
109060
|
+
usageChunks.length = 0;
|
|
109061
|
+
}
|
|
109062
|
+
}
|
|
109063
|
+
passThrough.write(chunkBuffer);
|
|
109064
|
+
});
|
|
109065
|
+
body.once("error", err => {
|
|
109066
|
+
logEngineMetrics({
|
|
109067
|
+
agentEngineType,
|
|
109068
|
+
error: err,
|
|
109069
|
+
level: "error",
|
|
109070
|
+
logger,
|
|
109071
|
+
requestBodyBytes,
|
|
109072
|
+
requestPath,
|
|
109073
|
+
responseBytes,
|
|
109074
|
+
usage
|
|
109075
|
+
});
|
|
109076
|
+
finalize(err);
|
|
109077
|
+
passThrough.destroy(err);
|
|
109078
|
+
});
|
|
109079
|
+
body.once("end", () => {
|
|
109080
|
+
if (usageCaptureEnabled) {
|
|
109081
|
+
try {
|
|
109082
|
+
const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
|
|
109083
|
+
if (parsed.usage) {
|
|
109084
|
+
const usageChunk = parsed.usage;
|
|
109085
|
+
const completionTokens = usageChunk.completion_tokens ?? null;
|
|
109086
|
+
const promptTokens = usageChunk.prompt_tokens ?? null;
|
|
109087
|
+
const totalTokens = usageChunk.total_tokens ?? null;
|
|
109088
|
+
let contextUsage = usageChunk.context_usage ?? null;
|
|
109089
|
+
const effectiveContext = getEffectiveContextLength({
|
|
109090
|
+
contextLength,
|
|
109091
|
+
engine,
|
|
109092
|
+
parallelism
|
|
109093
|
+
});
|
|
109094
|
+
if (contextUsage === null &&
|
|
109095
|
+
promptTokens !== null &&
|
|
109096
|
+
effectiveContext !== null) {
|
|
109097
|
+
contextUsage = promptTokens / effectiveContext;
|
|
109098
|
+
}
|
|
109099
|
+
usage = {
|
|
109100
|
+
completionTokens,
|
|
109101
|
+
contextUsage,
|
|
109102
|
+
promptTokens,
|
|
109103
|
+
totalTokens
|
|
109104
|
+
};
|
|
109105
|
+
}
|
|
109106
|
+
}
|
|
109107
|
+
catch (error) {
|
|
109108
|
+
logger.error("Failed to parse engine response body", {
|
|
109109
|
+
error: asError(error),
|
|
109110
|
+
requestUrl: requestPath
|
|
109111
|
+
});
|
|
109112
|
+
}
|
|
109113
|
+
}
|
|
109114
|
+
logEngineMetrics({
|
|
109115
|
+
agentEngineType,
|
|
109116
|
+
level: "info",
|
|
109117
|
+
logger,
|
|
109118
|
+
requestBodyBytes,
|
|
109119
|
+
requestPath,
|
|
109120
|
+
responseBytes,
|
|
109121
|
+
usage
|
|
109122
|
+
});
|
|
109123
|
+
finalize(null);
|
|
109124
|
+
passThrough.end();
|
|
109125
|
+
});
|
|
109126
|
+
body.once("close", () => {
|
|
109127
|
+
if (completed) {
|
|
109128
|
+
if (!passThrough.writableEnded) {
|
|
109129
|
+
passThrough.end();
|
|
109130
|
+
}
|
|
109131
|
+
return;
|
|
109132
|
+
}
|
|
109133
|
+
const closeError = new Error("Engine response stream closed before completion");
|
|
109134
|
+
logEngineMetrics({
|
|
109135
|
+
agentEngineType,
|
|
109136
|
+
error: closeError,
|
|
109137
|
+
level: "error",
|
|
109138
|
+
logger,
|
|
109139
|
+
requestBodyBytes,
|
|
109140
|
+
requestPath,
|
|
109141
|
+
responseBytes,
|
|
109142
|
+
usage
|
|
109143
|
+
});
|
|
109144
|
+
finalize(closeError);
|
|
109145
|
+
if (!passThrough.writableEnded) {
|
|
109146
|
+
passThrough.end();
|
|
109147
|
+
}
|
|
109148
|
+
});
|
|
109149
|
+
return {
|
|
109150
|
+
stream: passThrough
|
|
109151
|
+
};
|
|
109152
|
+
}
|
|
109153
|
+
function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
|
|
109154
|
+
const metricsMessage = [
|
|
109155
|
+
"LLM engine stream metrics",
|
|
109156
|
+
`path=${requestPath}`,
|
|
109157
|
+
`bytesTo=${requestBodyBytes}`,
|
|
109158
|
+
`bytesFrom=${responseBytes}`,
|
|
109159
|
+
`promptTokens=${usage?.promptTokens ?? "n/a"}`,
|
|
109160
|
+
`completionTokens=${usage?.completionTokens ?? "n/a"}`,
|
|
109161
|
+
`totalTokens=${usage?.totalTokens ?? "n/a"}`,
|
|
109162
|
+
`contextUsage=${usage?.contextUsage ?? "n/a"}`
|
|
109163
|
+
].join(" ");
|
|
109164
|
+
const attributes = {
|
|
109165
|
+
agentEngineType,
|
|
109166
|
+
requestUrl: requestPath
|
|
109167
|
+
};
|
|
109168
|
+
if (error) {
|
|
109169
|
+
attributes.error = error;
|
|
109170
|
+
}
|
|
109171
|
+
logger[level](metricsMessage, attributes);
|
|
109172
|
+
}
|
|
109173
|
+
|
|
109174
|
+
function isPlainObject$1(value) {
|
|
109175
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
109176
|
+
}
|
|
109177
|
+
function serializeRequestBody(body) {
|
|
109178
|
+
if (!isPlainObject$1(body)) {
|
|
109179
|
+
const payload = typeof body === "string" ? body : JSON.stringify(body);
|
|
109180
|
+
return {
|
|
109181
|
+
bytes: Buffer.byteLength(payload, "utf8"),
|
|
109182
|
+
payload
|
|
109183
|
+
};
|
|
109184
|
+
}
|
|
109185
|
+
const requestPayload = { ...body };
|
|
109186
|
+
const streamOptions = requestPayload.stream_options;
|
|
109187
|
+
const normalizedStreamOptions = isPlainObject$1(streamOptions)
|
|
109188
|
+
? { ...streamOptions }
|
|
109189
|
+
: {};
|
|
109190
|
+
normalizedStreamOptions.include_usage = true;
|
|
109191
|
+
requestPayload.stream_options = normalizedStreamOptions;
|
|
109192
|
+
const payload = JSON.stringify(requestPayload);
|
|
109193
|
+
return {
|
|
109194
|
+
bytes: Buffer.byteLength(payload, "utf8"),
|
|
109195
|
+
payload
|
|
109196
|
+
};
|
|
108998
109197
|
}
|
|
108999
109198
|
function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
|
|
109000
109199
|
if (durationMs <= 0) {
|
|
@@ -109006,107 +109205,714 @@ function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
|
|
|
109006
109205
|
}
|
|
109007
109206
|
return Math.round(tokensPerSecond);
|
|
109008
109207
|
}
|
|
109009
|
-
|
|
109010
|
-
|
|
109011
|
-
|
|
109012
|
-
|
|
109013
|
-
|
|
109014
|
-
|
|
109015
|
-
|
|
109016
|
-
|
|
109017
|
-
|
|
109208
|
+
async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID, modelManager, path, reportMetrics }) {
|
|
109209
|
+
function normalizeTokenCount(value) {
|
|
109210
|
+
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
109211
|
+
return value;
|
|
109212
|
+
}
|
|
109213
|
+
return 0;
|
|
109214
|
+
}
|
|
109215
|
+
function reportMetricsSafe(payload) {
|
|
109216
|
+
reportMetrics(payload).catch(error => {
|
|
109217
|
+
logger.warn("Failed to upload LLM prompt metrics", {
|
|
109218
|
+
error: asError(error),
|
|
109219
|
+
requestUrl: path
|
|
109220
|
+
});
|
|
109018
109221
|
});
|
|
109019
109222
|
}
|
|
109020
|
-
const
|
|
109021
|
-
|
|
109022
|
-
|
|
109023
|
-
|
|
109223
|
+
const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
|
|
109224
|
+
const requestStartedAt = Date.now();
|
|
109225
|
+
const requestBody = JSON.parse(serializedBody);
|
|
109226
|
+
const streamRequested = requestBody.stream === true;
|
|
109227
|
+
const onMonitoringComplete = ({ durationMs, error, responseBytes, timeToFirstTokenMs, usage }) => {
|
|
109228
|
+
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
109229
|
+
const promptTokens = normalizeTokenCount(usage?.promptTokens);
|
|
109230
|
+
const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
|
|
109231
|
+
const latencyMs = Math.max(0, durationMs);
|
|
109232
|
+
reportMetricsSafe({
|
|
109233
|
+
bytes: requestBodyBytes + responseBytes,
|
|
109234
|
+
completionTokens,
|
|
109235
|
+
engine: configuration.agentEngineType,
|
|
109236
|
+
endpointId: null,
|
|
109237
|
+
latencyMs,
|
|
109238
|
+
modelId: modelID,
|
|
109239
|
+
promptTokens,
|
|
109240
|
+
requestBytes: requestBodyBytes,
|
|
109241
|
+
requestId: null,
|
|
109242
|
+
requestMethod: "POST",
|
|
109243
|
+
requestPath: path,
|
|
109244
|
+
responseBytes,
|
|
109245
|
+
successful: !error,
|
|
109246
|
+
timeToFirstTokenMs,
|
|
109247
|
+
tokensPerSecond: calculateTokensPerSecond$1({
|
|
109248
|
+
durationMs: latencyMs,
|
|
109249
|
+
totalTokens
|
|
109250
|
+
}),
|
|
109251
|
+
totalTokens
|
|
109252
|
+
});
|
|
109253
|
+
};
|
|
109254
|
+
const response = await modelManager
|
|
109255
|
+
.fetchOpenAI(path, {
|
|
109256
|
+
body: serializedBody,
|
|
109257
|
+
headers: {
|
|
109258
|
+
"Content-Type": "application/json"
|
|
109259
|
+
},
|
|
109260
|
+
method: "POST"
|
|
109261
|
+
})
|
|
109262
|
+
.catch(error => {
|
|
109263
|
+
logEngineMetrics({
|
|
109264
|
+
agentEngineType: configuration.agentEngineType,
|
|
109265
|
+
error: error,
|
|
109266
|
+
level: "error",
|
|
109267
|
+
logger,
|
|
109268
|
+
requestBodyBytes,
|
|
109269
|
+
requestPath: path,
|
|
109270
|
+
responseBytes: 0,
|
|
109271
|
+
usage: null
|
|
109272
|
+
});
|
|
109273
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109274
|
+
reportMetricsSafe({
|
|
109275
|
+
bytes: requestBodyBytes,
|
|
109276
|
+
completionTokens: 0,
|
|
109277
|
+
engine: configuration.agentEngineType,
|
|
109278
|
+
endpointId: null,
|
|
109279
|
+
latencyMs,
|
|
109280
|
+
modelId: modelID,
|
|
109281
|
+
promptTokens: 0,
|
|
109282
|
+
requestBytes: requestBodyBytes,
|
|
109283
|
+
requestId: null,
|
|
109284
|
+
requestMethod: "POST",
|
|
109285
|
+
requestPath: path,
|
|
109286
|
+
responseBytes: 0,
|
|
109287
|
+
successful: false,
|
|
109288
|
+
timeToFirstTokenMs: null,
|
|
109289
|
+
tokensPerSecond: 0,
|
|
109290
|
+
totalTokens: 0
|
|
109291
|
+
});
|
|
109292
|
+
throw error;
|
|
109293
|
+
});
|
|
109294
|
+
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
109295
|
+
if (!response.ok) {
|
|
109296
|
+
const responseBody = await response.text().catch(() => null);
|
|
109297
|
+
const responseError = new Error(responseBody
|
|
109298
|
+
? `Upstream error response: ${responseBody}`
|
|
109299
|
+
: "Upstream error response: empty body");
|
|
109300
|
+
logger.error("LLM engine request failed", {
|
|
109301
|
+
error: responseError,
|
|
109302
|
+
requestUrl: path,
|
|
109303
|
+
statusCode: response.status,
|
|
109304
|
+
statusText: responseStatusText,
|
|
109305
|
+
responseBody: responseBody ?? undefined
|
|
109306
|
+
});
|
|
109307
|
+
if (!response.body) {
|
|
109308
|
+
return {
|
|
109309
|
+
status: response.status,
|
|
109310
|
+
statusText: responseStatusText
|
|
109311
|
+
};
|
|
109024
109312
|
}
|
|
109025
109313
|
}
|
|
109026
|
-
|
|
109027
|
-
|
|
109028
|
-
|
|
109029
|
-
|
|
109030
|
-
|
|
109031
|
-
|
|
109032
|
-
|
|
109314
|
+
if (!response.body) {
|
|
109315
|
+
logEngineMetrics({
|
|
109316
|
+
agentEngineType: configuration.agentEngineType,
|
|
109317
|
+
level: response.ok ? "info" : "error",
|
|
109318
|
+
logger,
|
|
109319
|
+
requestBodyBytes,
|
|
109320
|
+
requestPath: path,
|
|
109321
|
+
responseBytes: 0,
|
|
109322
|
+
usage: null
|
|
109323
|
+
});
|
|
109324
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109325
|
+
reportMetricsSafe({
|
|
109326
|
+
bytes: requestBodyBytes,
|
|
109327
|
+
completionTokens: 0,
|
|
109328
|
+
engine: configuration.agentEngineType,
|
|
109329
|
+
endpointId: null,
|
|
109330
|
+
latencyMs,
|
|
109331
|
+
modelId: modelID,
|
|
109332
|
+
promptTokens: 0,
|
|
109333
|
+
requestBytes: requestBodyBytes,
|
|
109334
|
+
requestId: null,
|
|
109335
|
+
requestMethod: "POST",
|
|
109336
|
+
requestPath: path,
|
|
109337
|
+
responseBytes: 0,
|
|
109338
|
+
successful: false,
|
|
109339
|
+
timeToFirstTokenMs: null,
|
|
109340
|
+
tokensPerSecond: 0,
|
|
109341
|
+
totalTokens: 0
|
|
109342
|
+
});
|
|
109343
|
+
return {
|
|
109344
|
+
status: response.status,
|
|
109345
|
+
statusText: responseStatusText
|
|
109346
|
+
};
|
|
109033
109347
|
}
|
|
109034
|
-
const
|
|
109348
|
+
const monitoredResponse = streamRequested
|
|
109349
|
+
? monitorEngineResponseStream({
|
|
109350
|
+
agentEngineType: configuration.agentEngineType,
|
|
109351
|
+
body: Readable.fromWeb(response.body),
|
|
109352
|
+
contextLength: modelManager.contextLength,
|
|
109353
|
+
engine: configuration.agentEngineType,
|
|
109354
|
+
logger,
|
|
109355
|
+
onComplete: onMonitoringComplete,
|
|
109356
|
+
parallelism: modelManager.parallelism,
|
|
109357
|
+
requestBodyBytes,
|
|
109358
|
+
requestPath: path,
|
|
109359
|
+
requestStartedAt
|
|
109360
|
+
})
|
|
109361
|
+
: monitorEngineResponseSingle({
|
|
109362
|
+
agentEngineType: configuration.agentEngineType,
|
|
109363
|
+
body: Readable.fromWeb(response.body),
|
|
109364
|
+
contextLength: modelManager.contextLength,
|
|
109365
|
+
engine: configuration.agentEngineType,
|
|
109366
|
+
logger,
|
|
109367
|
+
onComplete: onMonitoringComplete,
|
|
109368
|
+
parallelism: modelManager.parallelism,
|
|
109369
|
+
requestBodyBytes,
|
|
109370
|
+
requestPath: path,
|
|
109371
|
+
requestStartedAt
|
|
109372
|
+
});
|
|
109035
109373
|
return {
|
|
109036
|
-
body:
|
|
109374
|
+
body: monitoredResponse.stream,
|
|
109037
109375
|
headers: Object.fromEntries(response.headers.entries()),
|
|
109038
|
-
requestID: request.requestID,
|
|
109039
109376
|
status: response.status
|
|
109040
109377
|
};
|
|
109041
109378
|
}
|
|
109042
109379
|
|
|
109043
|
-
|
|
109044
|
-
|
|
109045
|
-
|
|
109046
|
-
|
|
109047
|
-
|
|
109048
|
-
|
|
109049
|
-
|
|
109050
|
-
|
|
109051
|
-
|
|
109052
|
-
|
|
109053
|
-
|
|
109054
|
-
|
|
109055
|
-
|
|
109056
|
-
|
|
109057
|
-
this.logger = logger;
|
|
109058
|
-
this.stateIntervalMs = stateIntervalMs;
|
|
109059
|
-
}
|
|
109060
|
-
async start() {
|
|
109061
|
-
await this.sendConduitState();
|
|
109062
|
-
this.stateInterval = setInterval(() => {
|
|
109063
|
-
this.sendConduitState().catch(error => {
|
|
109064
|
-
this.logger.error("Conduit state update failed", {
|
|
109065
|
-
error: asError(error)
|
|
109380
|
+
function createConduitOpenAIAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger, startup }) {
|
|
109381
|
+
return {
|
|
109382
|
+
"/v1/chat/completions": {
|
|
109383
|
+
POST: async ({ body }) => {
|
|
109384
|
+
const modelID = getModelID();
|
|
109385
|
+
const modelManager = getModelManager();
|
|
109386
|
+
return proxyOpenAIStreamingRoute({
|
|
109387
|
+
body,
|
|
109388
|
+
configuration,
|
|
109389
|
+
logger,
|
|
109390
|
+
modelID,
|
|
109391
|
+
modelManager,
|
|
109392
|
+
path: "/v1/chat/completions",
|
|
109393
|
+
reportMetrics: apiClient.reportPromptMetrics
|
|
109066
109394
|
});
|
|
109067
|
-
}
|
|
109068
|
-
},
|
|
109069
|
-
|
|
109070
|
-
|
|
109071
|
-
|
|
109072
|
-
|
|
109073
|
-
|
|
109074
|
-
|
|
109075
|
-
|
|
109076
|
-
|
|
109077
|
-
|
|
109078
|
-
|
|
109079
|
-
|
|
109080
|
-
|
|
109081
|
-
|
|
109082
|
-
|
|
109083
|
-
|
|
109084
|
-
|
|
109085
|
-
|
|
109086
|
-
|
|
109087
|
-
|
|
109088
|
-
|
|
109089
|
-
|
|
109090
|
-
|
|
109091
|
-
|
|
109092
|
-
|
|
109093
|
-
|
|
109094
|
-
|
|
109095
|
-
|
|
109096
|
-
|
|
109097
|
-
|
|
109098
|
-
|
|
109099
|
-
|
|
109100
|
-
|
|
109101
|
-
|
|
109102
|
-
|
|
109103
|
-
|
|
109104
|
-
|
|
109395
|
+
}
|
|
109396
|
+
},
|
|
109397
|
+
"/v1/completions": {
|
|
109398
|
+
POST: async ({ body }) => {
|
|
109399
|
+
const modelID = getModelID();
|
|
109400
|
+
const modelManager = getModelManager();
|
|
109401
|
+
return proxyOpenAIStreamingRoute({
|
|
109402
|
+
body,
|
|
109403
|
+
configuration,
|
|
109404
|
+
logger,
|
|
109405
|
+
modelID,
|
|
109406
|
+
modelManager,
|
|
109407
|
+
path: "/v1/completions",
|
|
109408
|
+
reportMetrics: apiClient.reportPromptMetrics
|
|
109409
|
+
});
|
|
109410
|
+
}
|
|
109411
|
+
},
|
|
109412
|
+
"/v1/models": {
|
|
109413
|
+
GET: async () => {
|
|
109414
|
+
const modelID = getModelID();
|
|
109415
|
+
const modelManager = getModelManager();
|
|
109416
|
+
const effectiveContextLength = getEffectiveContextLength({
|
|
109417
|
+
contextLength: modelManager.contextLength,
|
|
109418
|
+
engine: configuration.agentEngineType,
|
|
109419
|
+
parallelism: modelManager.parallelism
|
|
109420
|
+
});
|
|
109421
|
+
return {
|
|
109422
|
+
body: {
|
|
109423
|
+
data: [
|
|
109424
|
+
{
|
|
109425
|
+
created: startup / 1000,
|
|
109426
|
+
id: modelID,
|
|
109427
|
+
limit: {
|
|
109428
|
+
context: effectiveContextLength
|
|
109429
|
+
},
|
|
109430
|
+
object: "model",
|
|
109431
|
+
owned_by: "infersec"
|
|
109432
|
+
}
|
|
109433
|
+
],
|
|
109434
|
+
object: "list"
|
|
109435
|
+
},
|
|
109436
|
+
status: 200
|
|
109437
|
+
};
|
|
109438
|
+
}
|
|
109105
109439
|
}
|
|
109106
|
-
|
|
109107
|
-
|
|
109108
|
-
|
|
109109
|
-
|
|
109440
|
+
};
|
|
109441
|
+
}
|
|
109442
|
+
function createGetModelsHandler(options) {
|
|
109443
|
+
return createConduitOpenAIAPIReferenceHandlers(options)["/v1/models"].GET;
|
|
109444
|
+
}
|
|
109445
|
+
function createPostChatCompletionsHandler(options) {
|
|
109446
|
+
return createConduitOpenAIAPIReferenceHandlers(options)["/v1/chat/completions"].POST;
|
|
109447
|
+
}
|
|
109448
|
+
function createPostCompletionsHandler(options) {
|
|
109449
|
+
return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
|
|
109450
|
+
}
|
|
109451
|
+
|
|
109452
|
+
function createHealthHandler() {
|
|
109453
|
+
return (_req, res) => {
|
|
109454
|
+
res.status(200).send("OK");
|
|
109455
|
+
};
|
|
109456
|
+
}
|
|
109457
|
+
|
|
109458
|
+
async function handleSSERequests({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, signal }) {
|
|
109459
|
+
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/stream`;
|
|
109460
|
+
const maxReconnectDelayMs = 30000;
|
|
109461
|
+
let reconnectAttempt = 0;
|
|
109462
|
+
while (!signal?.aborted) {
|
|
109463
|
+
const connectionStartedAt = Date.now();
|
|
109464
|
+
try {
|
|
109465
|
+
await connectSSE(streamURL, {
|
|
109466
|
+
headers: {
|
|
109467
|
+
"x-api-key": configuration.apiKey
|
|
109468
|
+
},
|
|
109469
|
+
onError: (error) => {
|
|
109470
|
+
logger.error("SSE connection error", {
|
|
109471
|
+
error
|
|
109472
|
+
});
|
|
109473
|
+
},
|
|
109474
|
+
onMessage: (message) => {
|
|
109475
|
+
if (message.event !== "request") {
|
|
109476
|
+
return;
|
|
109477
|
+
}
|
|
109478
|
+
const payload = ServerToClientAPIRequestSchema.parse(JSON.parse(message.data));
|
|
109479
|
+
handleRequest({
|
|
109480
|
+
apiURL,
|
|
109481
|
+
configuration,
|
|
109482
|
+
logger,
|
|
109483
|
+
modelID,
|
|
109484
|
+
onRequest,
|
|
109485
|
+
onRequestEnd,
|
|
109486
|
+
onRequestStart,
|
|
109487
|
+
reportMetrics,
|
|
109488
|
+
request: payload,
|
|
109489
|
+
signal
|
|
109490
|
+
}).catch(error => {
|
|
109491
|
+
logger.error("SSE request handler failed", {
|
|
109492
|
+
error: asError(error),
|
|
109493
|
+
requestMethod: payload.requestID
|
|
109494
|
+
});
|
|
109495
|
+
});
|
|
109496
|
+
},
|
|
109497
|
+
signal
|
|
109498
|
+
});
|
|
109499
|
+
}
|
|
109500
|
+
catch (error) {
|
|
109501
|
+
if (signal?.aborted) {
|
|
109502
|
+
return;
|
|
109503
|
+
}
|
|
109504
|
+
const isTerminated = isTerminatedError(error);
|
|
109505
|
+
if (!isTerminated) {
|
|
109506
|
+
logger.error("SSE connection failed", {
|
|
109507
|
+
...getNetworkErrorAttributes(error),
|
|
109508
|
+
error: asError(error)
|
|
109509
|
+
});
|
|
109510
|
+
}
|
|
109511
|
+
if (signal?.aborted) {
|
|
109512
|
+
return;
|
|
109513
|
+
}
|
|
109514
|
+
if (!isTerminated) {
|
|
109515
|
+
const connectionDurationMs = Date.now() - connectionStartedAt;
|
|
109516
|
+
reconnectAttempt = connectionDurationMs > 10000 ? 0 : reconnectAttempt + 1;
|
|
109517
|
+
const reconnectDelayMs = Math.min(maxReconnectDelayMs, Math.max(1000, 1000 * 2 ** Math.min(6, reconnectAttempt)));
|
|
109518
|
+
logger.warn("SSE disconnected, retrying");
|
|
109519
|
+
await sleep(reconnectDelayMs);
|
|
109520
|
+
}
|
|
109521
|
+
}
|
|
109522
|
+
}
|
|
109523
|
+
}
|
|
109524
|
+
async function handleRequest({ apiURL, configuration, logger, modelID, onRequest, onRequestEnd, onRequestStart, reportMetrics, request, signal }) {
|
|
109525
|
+
function reportMetricsSafe(payload) {
|
|
109526
|
+
reportMetrics(payload).catch(error => {
|
|
109527
|
+
logger.warn("Failed to upload LLM prompt metrics", {
|
|
109528
|
+
error: asError(error),
|
|
109529
|
+
requestUrl: request.path
|
|
109530
|
+
});
|
|
109531
|
+
});
|
|
109532
|
+
}
|
|
109533
|
+
const requestStartedAt = Date.now();
|
|
109534
|
+
const requestBytes = calculateRequestBytes(request.body ?? null);
|
|
109535
|
+
try {
|
|
109536
|
+
await onRequestStart?.(request);
|
|
109537
|
+
const response = await onRequest(request);
|
|
109538
|
+
const responseMetrics = await streamResponse({
|
|
109539
|
+
apiURL,
|
|
109540
|
+
configuration,
|
|
109541
|
+
logger,
|
|
109542
|
+
requestID: request.requestID,
|
|
109543
|
+
requestStartedAt,
|
|
109544
|
+
response,
|
|
109545
|
+
signal
|
|
109546
|
+
});
|
|
109547
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109548
|
+
const totalTokens = 0;
|
|
109549
|
+
const tokensPerSecond = calculateTokensPerSecond({
|
|
109550
|
+
durationMs: latencyMs,
|
|
109551
|
+
totalTokens
|
|
109552
|
+
});
|
|
109553
|
+
reportMetricsSafe({
|
|
109554
|
+
bytes: requestBytes + responseMetrics.responseBytes,
|
|
109555
|
+
completionTokens: 0,
|
|
109556
|
+
engine: configuration.agentEngineType,
|
|
109557
|
+
endpointId: null,
|
|
109558
|
+
latencyMs,
|
|
109559
|
+
modelId: modelID,
|
|
109560
|
+
promptTokens: 0,
|
|
109561
|
+
requestBytes,
|
|
109562
|
+
requestId: request.requestID,
|
|
109563
|
+
requestMethod: request.method,
|
|
109564
|
+
requestPath: request.path,
|
|
109565
|
+
responseBytes: responseMetrics.responseBytes,
|
|
109566
|
+
successful: responseMetrics.status < 400,
|
|
109567
|
+
timeToFirstTokenMs: responseMetrics.timeToFirstTokenMs,
|
|
109568
|
+
tokensPerSecond,
|
|
109569
|
+
totalTokens
|
|
109570
|
+
});
|
|
109571
|
+
}
|
|
109572
|
+
catch (error) {
|
|
109573
|
+
logger.error("SSE request failed", {
|
|
109574
|
+
error: asError(error),
|
|
109575
|
+
requestMethod: request.requestID
|
|
109576
|
+
});
|
|
109577
|
+
const failureMessage = "Bad gateway\n\nProxying failed";
|
|
109578
|
+
const failureBytes = Buffer.byteLength(failureMessage, "utf8");
|
|
109579
|
+
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109580
|
+
const totalTokens = 0;
|
|
109581
|
+
const tokensPerSecond = calculateTokensPerSecond({
|
|
109582
|
+
durationMs: latencyMs,
|
|
109583
|
+
totalTokens
|
|
109584
|
+
});
|
|
109585
|
+
const streamHandler = await sendChunkStream({
|
|
109586
|
+
apiURL,
|
|
109587
|
+
configuration,
|
|
109588
|
+
requestID: request.requestID,
|
|
109589
|
+
logger
|
|
109590
|
+
});
|
|
109591
|
+
await streamHandler.sendChunk({
|
|
109592
|
+
data: encodeBinaryChunk(Buffer.from(failureMessage)),
|
|
109593
|
+
sequence: 0,
|
|
109594
|
+
status: 502
|
|
109595
|
+
});
|
|
109596
|
+
await streamHandler.sendChunk({
|
|
109597
|
+
data: null,
|
|
109598
|
+
sequence: 1,
|
|
109599
|
+
status: 502
|
|
109600
|
+
});
|
|
109601
|
+
await streamHandler.end();
|
|
109602
|
+
reportMetricsSafe({
|
|
109603
|
+
bytes: requestBytes + failureBytes,
|
|
109604
|
+
completionTokens: 0,
|
|
109605
|
+
engine: configuration.agentEngineType,
|
|
109606
|
+
endpointId: null,
|
|
109607
|
+
latencyMs,
|
|
109608
|
+
modelId: modelID,
|
|
109609
|
+
promptTokens: 0,
|
|
109610
|
+
requestBytes,
|
|
109611
|
+
requestId: request.requestID,
|
|
109612
|
+
requestMethod: request.method,
|
|
109613
|
+
requestPath: request.path,
|
|
109614
|
+
responseBytes: failureBytes,
|
|
109615
|
+
successful: false,
|
|
109616
|
+
timeToFirstTokenMs: latencyMs,
|
|
109617
|
+
tokensPerSecond,
|
|
109618
|
+
totalTokens
|
|
109619
|
+
});
|
|
109620
|
+
}
|
|
109621
|
+
finally {
|
|
109622
|
+
await onRequestEnd?.(request);
|
|
109623
|
+
}
|
|
109624
|
+
}
|
|
109625
|
+
async function streamResponse({ apiURL, configuration, logger, requestID, requestStartedAt, response, signal }) {
|
|
109626
|
+
let sequence = 0;
|
|
109627
|
+
let responseBytes = 0;
|
|
109628
|
+
let timeToFirstTokenMs = null;
|
|
109629
|
+
const streamHandler = await sendChunkStream({
|
|
109630
|
+
apiURL,
|
|
109631
|
+
configuration,
|
|
109632
|
+
requestID,
|
|
109633
|
+
logger
|
|
109634
|
+
});
|
|
109635
|
+
if (response.body instanceof Readable) {
|
|
109636
|
+
for await (const chunk of response.body) {
|
|
109637
|
+
if (signal?.aborted) {
|
|
109638
|
+
streamHandler.abort();
|
|
109639
|
+
throw new Error("Request cancelled");
|
|
109640
|
+
}
|
|
109641
|
+
const buffer = Buffer.isBuffer(chunk)
|
|
109642
|
+
? chunk
|
|
109643
|
+
: Buffer.from(chunk);
|
|
109644
|
+
if (timeToFirstTokenMs === null) {
|
|
109645
|
+
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109646
|
+
}
|
|
109647
|
+
responseBytes += buffer.length;
|
|
109648
|
+
await streamHandler.sendChunk({
|
|
109649
|
+
data: encodeBinaryChunk(buffer),
|
|
109650
|
+
sequence,
|
|
109651
|
+
status: response.status
|
|
109652
|
+
});
|
|
109653
|
+
sequence += 1;
|
|
109654
|
+
}
|
|
109655
|
+
await streamHandler.sendChunk({
|
|
109656
|
+
data: null,
|
|
109657
|
+
sequence,
|
|
109658
|
+
status: response.status
|
|
109659
|
+
});
|
|
109660
|
+
await streamHandler.end();
|
|
109661
|
+
return {
|
|
109662
|
+
responseBytes,
|
|
109663
|
+
status: response.status,
|
|
109664
|
+
timeToFirstTokenMs
|
|
109665
|
+
};
|
|
109666
|
+
}
|
|
109667
|
+
const responsePayload = response.body
|
|
109668
|
+
? typeof response.body === "string"
|
|
109669
|
+
? response.body
|
|
109670
|
+
: JSON.stringify(response.body)
|
|
109671
|
+
: "";
|
|
109672
|
+
if (responsePayload.length > 0) {
|
|
109673
|
+
responseBytes = Buffer.byteLength(responsePayload, "utf8");
|
|
109674
|
+
timeToFirstTokenMs = Math.max(0, Date.now() - requestStartedAt);
|
|
109675
|
+
}
|
|
109676
|
+
await streamHandler.sendChunk({
|
|
109677
|
+
data: encodeBinaryChunk(Buffer.from(responsePayload)),
|
|
109678
|
+
headers: response.headers,
|
|
109679
|
+
sequence,
|
|
109680
|
+
status: response.status
|
|
109681
|
+
});
|
|
109682
|
+
await streamHandler.sendChunk({
|
|
109683
|
+
data: null,
|
|
109684
|
+
sequence: sequence + 1,
|
|
109685
|
+
status: response.status
|
|
109686
|
+
});
|
|
109687
|
+
await streamHandler.end();
|
|
109688
|
+
logger.info("SSE response queued", {
|
|
109689
|
+
requestMethod: requestID
|
|
109690
|
+
});
|
|
109691
|
+
return {
|
|
109692
|
+
responseBytes,
|
|
109693
|
+
status: response.status,
|
|
109694
|
+
timeToFirstTokenMs
|
|
109695
|
+
};
|
|
109696
|
+
}
|
|
109697
|
+
function encodeBinaryChunk(chunk) {
|
|
109698
|
+
return chunk.toString("base64");
|
|
109699
|
+
}
|
|
109700
|
+
async function sendChunkStream({ apiURL, configuration, requestID, logger }) {
|
|
109701
|
+
const streamURL = `${apiURL}/conduit/api/v1/source/${configuration.inferenceSourceID}/requests/${requestID}/stream`;
|
|
109702
|
+
const maxFlushAttempts = 3;
|
|
109703
|
+
let isAborted = false;
|
|
109704
|
+
let isClosed = false;
|
|
109705
|
+
let activeAbortController = null;
|
|
109706
|
+
const chunks = [];
|
|
109707
|
+
const sendChunk = async (payload) => {
|
|
109708
|
+
if (isAborted || isClosed) {
|
|
109709
|
+
return;
|
|
109710
|
+
}
|
|
109711
|
+
const response = ClientToServerAPIResponseSchema.parse({
|
|
109712
|
+
data: payload.data,
|
|
109713
|
+
headers: payload.headers,
|
|
109714
|
+
requestID,
|
|
109715
|
+
status: payload.status
|
|
109716
|
+
});
|
|
109717
|
+
const chunk = JSON.stringify({
|
|
109718
|
+
...response,
|
|
109719
|
+
sequence: payload.sequence
|
|
109720
|
+
});
|
|
109721
|
+
chunks.push(Buffer.from(chunk + "\n"));
|
|
109722
|
+
if (chunks.length >= 10) {
|
|
109723
|
+
await flushChunks();
|
|
109724
|
+
}
|
|
109725
|
+
};
|
|
109726
|
+
const flushChunks = async () => {
|
|
109727
|
+
if (chunks.length === 0 || isAborted) {
|
|
109728
|
+
return;
|
|
109729
|
+
}
|
|
109730
|
+
const batch = chunks.splice(0, chunks.length);
|
|
109731
|
+
const body = Buffer.concat(batch);
|
|
109732
|
+
for (let attempt = 1; attempt <= maxFlushAttempts; attempt += 1) {
|
|
109733
|
+
try {
|
|
109734
|
+
activeAbortController = new AbortController();
|
|
109735
|
+
const response = await fetchWithRetry(streamURL, {
|
|
109736
|
+
body: body.toString(),
|
|
109737
|
+
headers: {
|
|
109738
|
+
"content-type": "application/json",
|
|
109739
|
+
"x-api-key": configuration.apiKey
|
|
109740
|
+
},
|
|
109741
|
+
method: "POST",
|
|
109742
|
+
signal: activeAbortController.signal
|
|
109743
|
+
}, {
|
|
109744
|
+
maxAttempts: 2,
|
|
109745
|
+
timeoutMs: 15000
|
|
109746
|
+
});
|
|
109747
|
+
if (!response.ok) {
|
|
109748
|
+
throw new Error(`Chunk stream flush failed with status ${response.status}`);
|
|
109749
|
+
}
|
|
109750
|
+
return;
|
|
109751
|
+
}
|
|
109752
|
+
catch (error) {
|
|
109753
|
+
if (isAborted) {
|
|
109754
|
+
return;
|
|
109755
|
+
}
|
|
109756
|
+
if (attempt >= maxFlushAttempts) {
|
|
109757
|
+
chunks.unshift(...batch);
|
|
109758
|
+
throw asError(error);
|
|
109759
|
+
}
|
|
109760
|
+
logger.warn("Failed to send chunk batch", {
|
|
109761
|
+
...getNetworkErrorAttributes(error),
|
|
109762
|
+
error: asError(error)
|
|
109763
|
+
});
|
|
109764
|
+
await sleep(100 * attempt);
|
|
109765
|
+
}
|
|
109766
|
+
finally {
|
|
109767
|
+
activeAbortController = null;
|
|
109768
|
+
}
|
|
109769
|
+
}
|
|
109770
|
+
};
|
|
109771
|
+
const end = async () => {
|
|
109772
|
+
if (isClosed || isAborted) {
|
|
109773
|
+
return;
|
|
109774
|
+
}
|
|
109775
|
+
await flushChunks();
|
|
109776
|
+
isClosed = true;
|
|
109777
|
+
};
|
|
109778
|
+
const abort = (error) => {
|
|
109779
|
+
isAborted = true;
|
|
109780
|
+
if (activeAbortController) {
|
|
109781
|
+
activeAbortController.abort();
|
|
109782
|
+
}
|
|
109783
|
+
chunks.length = 0;
|
|
109784
|
+
if (error) {
|
|
109785
|
+
logger.error("Chunk stream aborted", {
|
|
109786
|
+
error: asError(error)
|
|
109787
|
+
});
|
|
109788
|
+
}
|
|
109789
|
+
};
|
|
109790
|
+
return {
|
|
109791
|
+
sendChunk,
|
|
109792
|
+
end,
|
|
109793
|
+
abort
|
|
109794
|
+
};
|
|
109795
|
+
}
|
|
109796
|
+
function calculateRequestBytes(body) {
|
|
109797
|
+
if (body === null || body === undefined) {
|
|
109798
|
+
return 0;
|
|
109799
|
+
}
|
|
109800
|
+
if (typeof body === "string") {
|
|
109801
|
+
return Buffer.byteLength(body, "utf8");
|
|
109802
|
+
}
|
|
109803
|
+
return Buffer.byteLength(JSON.stringify(body), "utf8");
|
|
109804
|
+
}
|
|
109805
|
+
function calculateTokensPerSecond({ durationMs, totalTokens }) {
|
|
109806
|
+
if (durationMs <= 0) {
|
|
109807
|
+
return 0;
|
|
109808
|
+
}
|
|
109809
|
+
const tokensPerSecond = totalTokens / (durationMs / 1000);
|
|
109810
|
+
if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0) {
|
|
109811
|
+
return 0;
|
|
109812
|
+
}
|
|
109813
|
+
return Math.round(tokensPerSecond);
|
|
109814
|
+
}
|
|
109815
|
+
|
|
109816
|
+
/**
|
|
109817
|
+
* Proxy server requests to the local inference HTTP server.
|
|
109818
|
+
*/
|
|
109819
|
+
async function proxyRequest({ configuration, request }) {
|
|
109820
|
+
let finalPath = request.path;
|
|
109821
|
+
if (request.parameters) {
|
|
109822
|
+
Object.entries(request.parameters).forEach(([key, value]) => {
|
|
109823
|
+
finalPath = finalPath.replace(`:${key}`, String(value));
|
|
109824
|
+
});
|
|
109825
|
+
}
|
|
109826
|
+
const url = new URL(finalPath, `http://localhost:${configuration.port}`);
|
|
109827
|
+
if (request.query) {
|
|
109828
|
+
for (const [key, value] of Object.entries(request.query)) {
|
|
109829
|
+
url.searchParams.set(key, value);
|
|
109830
|
+
}
|
|
109831
|
+
}
|
|
109832
|
+
const fetchOptions = {
|
|
109833
|
+
method: request.method,
|
|
109834
|
+
headers: request.headers
|
|
109835
|
+
};
|
|
109836
|
+
if (request.body) {
|
|
109837
|
+
fetchOptions.body =
|
|
109838
|
+
typeof request.body === "object" ? JSON.stringify(request.body) : request.body;
|
|
109839
|
+
}
|
|
109840
|
+
const response = await undiciExports.fetch(url, fetchOptions);
|
|
109841
|
+
return {
|
|
109842
|
+
body: response.body ? Readable.fromWeb(response.body) : null,
|
|
109843
|
+
headers: Object.fromEntries(response.headers.entries()),
|
|
109844
|
+
requestID: request.requestID,
|
|
109845
|
+
status: response.status
|
|
109846
|
+
};
|
|
109847
|
+
}
|
|
109848
|
+
|
|
109849
|
+
class ConduitStateReportManager {
|
|
109850
|
+
apiClient;
|
|
109851
|
+
conduitStateManager;
|
|
109852
|
+
downloadProgressReportIntervalMs;
|
|
109853
|
+
logger;
|
|
109854
|
+
stateIntervalMs;
|
|
109855
|
+
conduitStateReportInFlight = false;
|
|
109856
|
+
lastConduitStateReportAt = 0;
|
|
109857
|
+
pendingConduitStateReport = null;
|
|
109858
|
+
stateInterval = null;
|
|
109859
|
+
constructor({ apiClient, conduitStateManager, downloadProgressReportIntervalMs, logger, stateIntervalMs }) {
|
|
109860
|
+
this.apiClient = apiClient;
|
|
109861
|
+
this.conduitStateManager = conduitStateManager;
|
|
109862
|
+
this.downloadProgressReportIntervalMs = downloadProgressReportIntervalMs;
|
|
109863
|
+
this.logger = logger;
|
|
109864
|
+
this.stateIntervalMs = stateIntervalMs;
|
|
109865
|
+
}
|
|
109866
|
+
async start() {
|
|
109867
|
+
await this.sendConduitState();
|
|
109868
|
+
this.stateInterval = setInterval(() => {
|
|
109869
|
+
this.sendConduitState().catch(error => {
|
|
109870
|
+
this.logger.error("Conduit state update failed", {
|
|
109871
|
+
error: asError(error)
|
|
109872
|
+
});
|
|
109873
|
+
});
|
|
109874
|
+
}, this.stateIntervalMs);
|
|
109875
|
+
}
|
|
109876
|
+
stop() {
|
|
109877
|
+
if (this.stateInterval) {
|
|
109878
|
+
clearInterval(this.stateInterval);
|
|
109879
|
+
this.stateInterval = null;
|
|
109880
|
+
}
|
|
109881
|
+
if (this.pendingConduitStateReport) {
|
|
109882
|
+
clearTimeout(this.pendingConduitStateReport);
|
|
109883
|
+
this.pendingConduitStateReport = null;
|
|
109884
|
+
}
|
|
109885
|
+
}
|
|
109886
|
+
reportDownloadProgress() {
|
|
109887
|
+
this.scheduleConduitStateReport();
|
|
109888
|
+
}
|
|
109889
|
+
async reportNow() {
|
|
109890
|
+
if (this.pendingConduitStateReport) {
|
|
109891
|
+
clearTimeout(this.pendingConduitStateReport);
|
|
109892
|
+
this.pendingConduitStateReport = null;
|
|
109893
|
+
}
|
|
109894
|
+
await this.triggerConduitStateReport();
|
|
109895
|
+
}
|
|
109896
|
+
reportStateChange() {
|
|
109897
|
+
if (this.pendingConduitStateReport) {
|
|
109898
|
+
clearTimeout(this.pendingConduitStateReport);
|
|
109899
|
+
this.pendingConduitStateReport = null;
|
|
109900
|
+
}
|
|
109901
|
+
this.triggerConduitStateReport().catch(error => {
|
|
109902
|
+
this.logger.error("Conduit state update failed", {
|
|
109903
|
+
error: asError(error)
|
|
109904
|
+
});
|
|
109905
|
+
});
|
|
109906
|
+
}
|
|
109907
|
+
async sendConduitState() {
|
|
109908
|
+
try {
|
|
109909
|
+
await this.apiClient.reportConduitState(this.conduitStateManager.touch());
|
|
109910
|
+
this.lastConduitStateReportAt = Date.now();
|
|
109911
|
+
}
|
|
109912
|
+
catch (error) {
|
|
109913
|
+
this.logger.error("Conduit state update failed", {
|
|
109914
|
+
...getNetworkErrorAttributes(error),
|
|
109915
|
+
error: asError(error)
|
|
109110
109916
|
});
|
|
109111
109917
|
}
|
|
109112
109918
|
}
|
|
@@ -109171,7 +109977,7 @@ class ConduitStateManager {
|
|
|
109171
109977
|
}
|
|
109172
109978
|
}
|
|
109173
109979
|
|
|
109174
|
-
function isPlainObject
|
|
109980
|
+
function isPlainObject(value) {
|
|
109175
109981
|
if (typeof value !== 'object' || value === null) {
|
|
109176
109982
|
return false;
|
|
109177
109983
|
}
|
|
@@ -109208,7 +110014,7 @@ const normalizeFileUrl = file => file instanceof URL ? fileURLToPath(file) : fil
|
|
|
109208
110014
|
// This also does basic validation on them and on the command file.
|
|
109209
110015
|
const normalizeParameters = (rawFile, rawArguments = [], rawOptions = {}) => {
|
|
109210
110016
|
const filePath = safeNormalizeFileUrl(rawFile, 'First argument');
|
|
109211
|
-
const [commandArguments, options] = isPlainObject
|
|
110017
|
+
const [commandArguments, options] = isPlainObject(rawArguments)
|
|
109212
110018
|
? [[], rawArguments]
|
|
109213
110019
|
: [rawArguments, rawOptions];
|
|
109214
110020
|
|
|
@@ -109226,7 +110032,7 @@ const normalizeParameters = (rawFile, rawArguments = [], rawOptions = {}) => {
|
|
|
109226
110032
|
throw new TypeError(`Arguments cannot contain null bytes ("\\0"): ${nullByteArgument}`);
|
|
109227
110033
|
}
|
|
109228
110034
|
|
|
109229
|
-
if (!isPlainObject
|
|
110035
|
+
if (!isPlainObject(options)) {
|
|
109230
110036
|
throw new TypeError(`Last argument must be an options object: ${options}`);
|
|
109231
110037
|
}
|
|
109232
110038
|
|
|
@@ -109423,7 +110229,7 @@ const parseExpression = expression => {
|
|
|
109423
110229
|
return String(expression);
|
|
109424
110230
|
}
|
|
109425
110231
|
|
|
109426
|
-
if (isPlainObject
|
|
110232
|
+
if (isPlainObject(expression) && ('stdout' in expression || 'isMaxBuffer' in expression)) {
|
|
109427
110233
|
return getSubprocessResult(expression);
|
|
109428
110234
|
}
|
|
109429
110235
|
|
|
@@ -109479,7 +110285,7 @@ const getStdioLength = ({stdio}) => Array.isArray(stdio)
|
|
|
109479
110285
|
? Math.max(stdio.length, STANDARD_STREAMS_ALIASES.length)
|
|
109480
110286
|
: STANDARD_STREAMS_ALIASES.length;
|
|
109481
110287
|
|
|
109482
|
-
const normalizeFdSpecificValue = (optionValue, optionArray, optionName) => isPlainObject
|
|
110288
|
+
const normalizeFdSpecificValue = (optionValue, optionArray, optionName) => isPlainObject(optionValue)
|
|
109483
110289
|
? normalizeOptionObject(optionValue, optionArray, optionName)
|
|
109484
110290
|
: optionArray.fill(optionValue);
|
|
109485
110291
|
|
|
@@ -113962,13 +114768,13 @@ const checkBooleanOption = (value, optionName) => {
|
|
|
113962
114768
|
const isGenerator = value => isAsyncGenerator(value) || isSyncGenerator(value);
|
|
113963
114769
|
const isAsyncGenerator = value => Object.prototype.toString.call(value) === '[object AsyncGeneratorFunction]';
|
|
113964
114770
|
const isSyncGenerator = value => Object.prototype.toString.call(value) === '[object GeneratorFunction]';
|
|
113965
|
-
const isTransformOptions = value => isPlainObject
|
|
114771
|
+
const isTransformOptions = value => isPlainObject(value)
|
|
113966
114772
|
&& (value.transform !== undefined || value.final !== undefined);
|
|
113967
114773
|
|
|
113968
114774
|
const isUrl = value => Object.prototype.toString.call(value) === '[object URL]';
|
|
113969
114775
|
const isRegularUrl = value => isUrl(value) && value.protocol !== 'file:';
|
|
113970
114776
|
|
|
113971
|
-
const isFilePathObject = value => isPlainObject
|
|
114777
|
+
const isFilePathObject = value => isPlainObject(value)
|
|
113972
114778
|
&& Object.keys(value).length > 0
|
|
113973
114779
|
&& Object.keys(value).every(key => FILE_PATH_KEYS.has(key))
|
|
113974
114780
|
&& isFilePathString(value.file);
|
|
@@ -114131,7 +114937,7 @@ const normalizeDuplex = ({
|
|
|
114131
114937
|
};
|
|
114132
114938
|
|
|
114133
114939
|
const normalizeTransformStream = ({stdioItem, stdioItem: {value}, index, newTransforms, direction}) => {
|
|
114134
|
-
const {transform, objectMode} = isPlainObject
|
|
114940
|
+
const {transform, objectMode} = isPlainObject(value) ? value : {transform: value};
|
|
114135
114941
|
const {writableObjectMode, readableObjectMode} = getTransformObjectModes(objectMode, index, newTransforms, direction);
|
|
114136
114942
|
return ({
|
|
114137
114943
|
...stdioItem,
|
|
@@ -114146,7 +114952,7 @@ const normalizeGenerator = ({stdioItem, stdioItem: {value}, index, newTransforms
|
|
|
114146
114952
|
binary: binaryOption = false,
|
|
114147
114953
|
preserveNewlines = false,
|
|
114148
114954
|
objectMode,
|
|
114149
|
-
} = isPlainObject
|
|
114955
|
+
} = isPlainObject(value) ? value : {transform: value};
|
|
114150
114956
|
const binary = binaryOption || BINARY_ENCODINGS.has(encoding);
|
|
114151
114957
|
const {writableObjectMode, readableObjectMode} = getTransformObjectModes(objectMode, index, newTransforms, direction);
|
|
114152
114958
|
return {
|
|
@@ -116909,7 +117715,7 @@ const unpipeOnSignalAbort = async (unpipeSignal, {sourceStream, mergedStream, fi
|
|
|
116909
117715
|
|
|
116910
117716
|
// Pipe a subprocess' `stdout`/`stderr`/`stdio` into another subprocess' `stdin`
|
|
116911
117717
|
const pipeToSubprocess = (sourceInfo, ...pipeArguments) => {
|
|
116912
|
-
if (isPlainObject
|
|
117718
|
+
if (isPlainObject(pipeArguments[0])) {
|
|
116913
117719
|
return pipeToSubprocess.bind(undefined, {
|
|
116914
117720
|
...sourceInfo,
|
|
116915
117721
|
boundOptions: {...sourceInfo.boundOptions, ...pipeArguments[0]},
|
|
@@ -118109,7 +118915,7 @@ const mergeOptions = (boundOptions, options) => {
|
|
|
118109
118915
|
};
|
|
118110
118916
|
|
|
118111
118917
|
const mergeOption = (optionName, boundOptionValue, optionValue) => {
|
|
118112
|
-
if (DEEP_OPTIONS.has(optionName) && isPlainObject
|
|
118918
|
+
if (DEEP_OPTIONS.has(optionName) && isPlainObject(boundOptionValue) && isPlainObject(optionValue)) {
|
|
118113
118919
|
return {...boundOptionValue, ...optionValue};
|
|
118114
118920
|
}
|
|
118115
118921
|
|
|
@@ -118141,7 +118947,7 @@ const createExeca = (mapArguments, boundOptions, deepOptions, setBoundExeca) =>
|
|
|
118141
118947
|
};
|
|
118142
118948
|
|
|
118143
118949
|
const callBoundExeca = ({mapArguments, deepOptions = {}, boundOptions = {}, setBoundExeca, createNested}, firstArgument, ...nextArguments) => {
|
|
118144
|
-
if (isPlainObject
|
|
118950
|
+
if (isPlainObject(firstArgument)) {
|
|
118145
118951
|
return createNested(mapArguments, mergeOptions(boundOptions, firstArgument), setBoundExeca);
|
|
118146
118952
|
}
|
|
118147
118953
|
|
|
@@ -118172,795 +118978,193 @@ const parseArguments = ({mapArguments, firstArgument, nextArguments, deepOptions
|
|
|
118172
118978
|
return {
|
|
118173
118979
|
file,
|
|
118174
118980
|
commandArguments,
|
|
118175
|
-
options,
|
|
118176
|
-
isSync,
|
|
118177
|
-
};
|
|
118178
|
-
};
|
|
118179
|
-
|
|
118180
|
-
// Main logic for `execaCommand()`
|
|
118181
|
-
const mapCommandAsync = ({file, commandArguments}) => parseCommand(file, commandArguments);
|
|
118182
|
-
|
|
118183
|
-
// Main logic for `execaCommandSync()`
|
|
118184
|
-
const mapCommandSync = ({file, commandArguments}) => ({...parseCommand(file, commandArguments), isSync: true});
|
|
118185
|
-
|
|
118186
|
-
// Convert `execaCommand(command)` into `execa(file, ...commandArguments)`
|
|
118187
|
-
const parseCommand = (command, unusedArguments) => {
|
|
118188
|
-
if (unusedArguments.length > 0) {
|
|
118189
|
-
throw new TypeError(`The command and its arguments must be passed as a single string: ${command} ${unusedArguments}.`);
|
|
118190
|
-
}
|
|
118191
|
-
|
|
118192
|
-
const [file, ...commandArguments] = parseCommandString(command);
|
|
118193
|
-
return {file, commandArguments};
|
|
118194
|
-
};
|
|
118195
|
-
|
|
118196
|
-
// Convert `command` string into an array of file or arguments to pass to $`${...fileOrCommandArguments}`
|
|
118197
|
-
const parseCommandString = command => {
|
|
118198
|
-
if (typeof command !== 'string') {
|
|
118199
|
-
throw new TypeError(`The command must be a string: ${String(command)}.`);
|
|
118200
|
-
}
|
|
118201
|
-
|
|
118202
|
-
const trimmedCommand = command.trim();
|
|
118203
|
-
if (trimmedCommand === '') {
|
|
118204
|
-
return [];
|
|
118205
|
-
}
|
|
118206
|
-
|
|
118207
|
-
const tokens = [];
|
|
118208
|
-
for (const token of trimmedCommand.split(SPACES_REGEXP)) {
|
|
118209
|
-
// Allow spaces to be escaped by a backslash if not meant as a delimiter
|
|
118210
|
-
const previousToken = tokens.at(-1);
|
|
118211
|
-
if (previousToken && previousToken.endsWith('\\')) {
|
|
118212
|
-
// Merge previous token with current one
|
|
118213
|
-
tokens[tokens.length - 1] = `${previousToken.slice(0, -1)} ${token}`;
|
|
118214
|
-
} else {
|
|
118215
|
-
tokens.push(token);
|
|
118216
|
-
}
|
|
118217
|
-
}
|
|
118218
|
-
|
|
118219
|
-
return tokens;
|
|
118220
|
-
};
|
|
118221
|
-
|
|
118222
|
-
const SPACES_REGEXP = / +/g;
|
|
118223
|
-
|
|
118224
|
-
// Sets `$.sync` and `$.s`
|
|
118225
|
-
const setScriptSync = (boundExeca, createNested, boundOptions) => {
|
|
118226
|
-
boundExeca.sync = createNested(mapScriptSync, boundOptions);
|
|
118227
|
-
boundExeca.s = boundExeca.sync;
|
|
118228
|
-
};
|
|
118229
|
-
|
|
118230
|
-
// Main logic for `$`
|
|
118231
|
-
const mapScriptAsync = ({options}) => getScriptOptions(options);
|
|
118232
|
-
|
|
118233
|
-
// Main logic for `$.sync`
|
|
118234
|
-
const mapScriptSync = ({options}) => ({...getScriptOptions(options), isSync: true});
|
|
118235
|
-
|
|
118236
|
-
// `$` is like `execa` but with script-friendly options: `{stdin: 'inherit', preferLocal: true}`
|
|
118237
|
-
const getScriptOptions = options => ({options: {...getScriptStdinOption(options), ...options}});
|
|
118238
|
-
|
|
118239
|
-
const getScriptStdinOption = ({input, inputFile, stdio}) => input === undefined && inputFile === undefined && stdio === undefined
|
|
118240
|
-
? {stdin: 'inherit'}
|
|
118241
|
-
: {};
|
|
118242
|
-
|
|
118243
|
-
// When using $(...).pipe(...), most script-friendly options should apply to both commands.
|
|
118244
|
-
// However, some options (like `stdin: 'inherit'`) would create issues with piping, i.e. cannot be deep.
|
|
118245
|
-
const deepScriptOptions = {preferLocal: true};
|
|
118246
|
-
|
|
118247
|
-
const execa = createExeca(() => ({}));
|
|
118248
|
-
createExeca(() => ({isSync: true}));
|
|
118249
|
-
createExeca(mapCommandAsync);
|
|
118250
|
-
createExeca(mapCommandSync);
|
|
118251
|
-
createExeca(mapNode);
|
|
118252
|
-
createExeca(mapScriptAsync, {}, deepScriptOptions, setScriptSync);
|
|
118253
|
-
|
|
118254
|
-
getIpcExport();
|
|
118255
|
-
|
|
118256
|
-
const MACHINE_ID_PATHS = ["/etc/machine-id", "/var/lib/dbus/machine-id"];
|
|
118257
|
-
async function readMachineIdentifier() {
|
|
118258
|
-
for (const path of MACHINE_ID_PATHS) {
|
|
118259
|
-
try {
|
|
118260
|
-
const contents = await readFile(path, "utf8");
|
|
118261
|
-
const trimmed = contents.trim();
|
|
118262
|
-
if (trimmed.length > 0) {
|
|
118263
|
-
return trimmed;
|
|
118264
|
-
}
|
|
118265
|
-
}
|
|
118266
|
-
catch {
|
|
118267
|
-
// Ignore and continue to next candidate
|
|
118268
|
-
}
|
|
118269
|
-
}
|
|
118270
|
-
return os.hostname();
|
|
118271
|
-
}
|
|
118272
|
-
async function detectLlamaCppVersion() {
|
|
118273
|
-
try {
|
|
118274
|
-
const { stdout } = await execa("llama-server", ["--version"]);
|
|
118275
|
-
const versionLine = stdout.trim();
|
|
118276
|
-
return versionLine.length > 0 ? (versionLine.split("\n")[0] ?? null) : null;
|
|
118277
|
-
}
|
|
118278
|
-
catch {
|
|
118279
|
-
return null;
|
|
118280
|
-
}
|
|
118281
|
-
}
|
|
118282
|
-
async function detectVLLMVersion() {
|
|
118283
|
-
try {
|
|
118284
|
-
const { stdout } = await execa("python3", [
|
|
118285
|
-
"-c",
|
|
118286
|
-
"import importlib.metadata as md; print(md.version('vllm'))"
|
|
118287
|
-
]);
|
|
118288
|
-
const version = stdout.trim();
|
|
118289
|
-
return version.length > 0 ? version : null;
|
|
118290
|
-
}
|
|
118291
|
-
catch {
|
|
118292
|
-
return null;
|
|
118293
|
-
}
|
|
118294
|
-
}
|
|
118295
|
-
function normalizeMegabytes(value) {
|
|
118296
|
-
if (typeof value !== "number" || Number.isNaN(value)) {
|
|
118297
|
-
return null;
|
|
118298
|
-
}
|
|
118299
|
-
return Math.round(value * 1024 * 1024);
|
|
118300
|
-
}
|
|
118301
|
-
function resolveCpuValue(value) {
|
|
118302
|
-
if (typeof value === "number" && Number.isFinite(value)) {
|
|
118303
|
-
return value;
|
|
118304
|
-
}
|
|
118305
|
-
if (typeof value === "string") {
|
|
118306
|
-
const parsed = Number(value);
|
|
118307
|
-
return Number.isFinite(parsed) ? parsed : null;
|
|
118308
|
-
}
|
|
118309
|
-
return null;
|
|
118310
|
-
}
|
|
118311
|
-
async function collectMachineMetadata() {
|
|
118312
|
-
const [cpuResult, memResult, osResult, graphicsResult] = await Promise.allSettled([
|
|
118313
|
-
si.cpu(),
|
|
118314
|
-
si.mem(),
|
|
118315
|
-
si.osInfo(),
|
|
118316
|
-
si.graphics()
|
|
118317
|
-
]);
|
|
118318
|
-
const cpuInfo = cpuResult.status === "fulfilled" ? cpuResult.value : null;
|
|
118319
|
-
const memInfo = memResult.status === "fulfilled" ? memResult.value : null;
|
|
118320
|
-
const osInfo = osResult.status === "fulfilled" ? osResult.value : null;
|
|
118321
|
-
const graphicsInfo = graphicsResult.status === "fulfilled"
|
|
118322
|
-
? graphicsResult.value
|
|
118323
|
-
: { controllers: [] };
|
|
118324
|
-
const gpus = (graphicsInfo.controllers ?? []).map((controller) => ({
|
|
118325
|
-
bus: controller.bus ?? null,
|
|
118326
|
-
driverVersion: controller.driverVersion ?? null,
|
|
118327
|
-
memoryFreeBytes: normalizeMegabytes(controller.memoryFree ?? null),
|
|
118328
|
-
memoryTotalBytes: normalizeMegabytes(controller.memoryTotal ?? null),
|
|
118329
|
-
model: controller.model ?? controller.name ?? null,
|
|
118330
|
-
temperatureCelsius: controller.temperatureGpu ?? null,
|
|
118331
|
-
vendor: controller.vendor ?? null
|
|
118332
|
-
}));
|
|
118333
|
-
const machineMetadata = {
|
|
118334
|
-
cpu: {
|
|
118335
|
-
baseClockGHz: resolveCpuValue(cpuInfo?.speed ?? null),
|
|
118336
|
-
logicalCores: cpuInfo?.cores ?? null,
|
|
118337
|
-
maxClockGHz: resolveCpuValue(cpuInfo?.speedMax ?? null),
|
|
118338
|
-
model: cpuInfo?.brand ?? null,
|
|
118339
|
-
physicalCores: cpuInfo?.physicalCores ?? null
|
|
118340
|
-
},
|
|
118341
|
-
gpus,
|
|
118342
|
-
hostname: os.hostname(),
|
|
118343
|
-
llamaCppVersion: await detectLlamaCppVersion(),
|
|
118344
|
-
machineID: await readMachineIdentifier(),
|
|
118345
|
-
memory: {
|
|
118346
|
-
availableBytes: memInfo?.available ?? null,
|
|
118347
|
-
totalBytes: memInfo?.total ?? null
|
|
118348
|
-
},
|
|
118349
|
-
os: {
|
|
118350
|
-
arch: osInfo?.arch ?? os.arch(),
|
|
118351
|
-
platform: osInfo?.platform ?? os.platform(),
|
|
118352
|
-
release: osInfo?.release ?? os.release(),
|
|
118353
|
-
type: osInfo?.kernel ?? null,
|
|
118354
|
-
version: osInfo?.build ?? null
|
|
118355
|
-
},
|
|
118356
|
-
vllmVersion: await detectVLLMVersion()
|
|
118357
|
-
};
|
|
118358
|
-
return machineMetadata;
|
|
118359
|
-
}
|
|
118360
|
-
|
|
118361
|
-
/**
|
|
118362
|
-
* Coerce non-string tool_calls function.arguments to JSON strings.
|
|
118363
|
-
* Some LLM backends return arguments as parsed objects instead of
|
|
118364
|
-
* JSON strings, violating the OpenAI spec. This mutates in place
|
|
118365
|
-
* and returns true if any coercion was performed.
|
|
118366
|
-
*/
|
|
118367
|
-
function coerceToolCallArguments(parsed) {
|
|
118368
|
-
const choices = parsed.choices;
|
|
118369
|
-
if (!Array.isArray(choices))
|
|
118370
|
-
return false;
|
|
118371
|
-
let modified = false;
|
|
118372
|
-
for (const choice of choices) {
|
|
118373
|
-
if (!choice || typeof choice !== "object")
|
|
118374
|
-
continue;
|
|
118375
|
-
const choiceRecord = choice;
|
|
118376
|
-
const msg = choiceRecord.delta ?? choiceRecord.message;
|
|
118377
|
-
if (!msg || typeof msg !== "object")
|
|
118378
|
-
continue;
|
|
118379
|
-
const toolCalls = msg.tool_calls;
|
|
118380
|
-
if (!Array.isArray(toolCalls))
|
|
118381
|
-
continue;
|
|
118382
|
-
for (const tc of toolCalls) {
|
|
118383
|
-
if (!tc || typeof tc !== "object")
|
|
118384
|
-
continue;
|
|
118385
|
-
const fn = tc.function;
|
|
118386
|
-
if (!fn || typeof fn !== "object")
|
|
118387
|
-
continue;
|
|
118388
|
-
const fnRecord = fn;
|
|
118389
|
-
if (fnRecord.arguments !== undefined && typeof fnRecord.arguments !== "string") {
|
|
118390
|
-
fnRecord.arguments = JSON.stringify(fnRecord.arguments);
|
|
118391
|
-
modified = true;
|
|
118392
|
-
}
|
|
118393
|
-
}
|
|
118394
|
-
}
|
|
118395
|
-
return modified;
|
|
118396
|
-
}
|
|
118397
|
-
function isEngineUsageChunk(value) {
|
|
118398
|
-
if (!value || typeof value !== "object") {
|
|
118399
|
-
return false;
|
|
118400
|
-
}
|
|
118401
|
-
const record = value;
|
|
118402
|
-
if (!record.usage || typeof record.usage !== "object") {
|
|
118403
|
-
return false;
|
|
118404
|
-
}
|
|
118405
|
-
return true;
|
|
118406
|
-
}
|
|
118407
|
-
function monitorEngineResponseStream({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
118408
|
-
const startedAt = requestStartedAt ?? Date.now();
|
|
118409
|
-
const passThrough = new PassThrough();
|
|
118410
|
-
let responseBytes = 0;
|
|
118411
|
-
let firstChunkAt = null;
|
|
118412
|
-
let usage = null;
|
|
118413
|
-
let buffer = "";
|
|
118414
|
-
let completed = false;
|
|
118415
|
-
function modifyChunkWithUsage(chunk) {
|
|
118416
|
-
const text = chunk.toString("utf8");
|
|
118417
|
-
const lines = text.split("\n");
|
|
118418
|
-
const modifiedLines = [];
|
|
118419
|
-
for (const rawLine of lines) {
|
|
118420
|
-
const line = rawLine.trim();
|
|
118421
|
-
if (!line.startsWith("data:")) {
|
|
118422
|
-
modifiedLines.push(rawLine);
|
|
118423
|
-
continue;
|
|
118424
|
-
}
|
|
118425
|
-
const payload = line.slice(5).trim();
|
|
118426
|
-
if (!payload || payload === "[DONE]") {
|
|
118427
|
-
modifiedLines.push(rawLine);
|
|
118428
|
-
continue;
|
|
118429
|
-
}
|
|
118430
|
-
try {
|
|
118431
|
-
const parsed = JSON.parse(payload);
|
|
118432
|
-
let modified = false;
|
|
118433
|
-
if (coerceToolCallArguments(parsed)) {
|
|
118434
|
-
modified = true;
|
|
118435
|
-
}
|
|
118436
|
-
if (parsed.usage) {
|
|
118437
|
-
const usageChunk = parsed.usage;
|
|
118438
|
-
const effectiveContext = getEffectiveContextLength({
|
|
118439
|
-
contextLength,
|
|
118440
|
-
engine,
|
|
118441
|
-
parallelism
|
|
118442
|
-
});
|
|
118443
|
-
if (usageChunk.context_usage === undefined &&
|
|
118444
|
-
usageChunk.prompt_tokens !== undefined &&
|
|
118445
|
-
effectiveContext !== null) {
|
|
118446
|
-
usageChunk.context_usage = usageChunk.prompt_tokens / effectiveContext;
|
|
118447
|
-
modified = true;
|
|
118448
|
-
}
|
|
118449
|
-
}
|
|
118450
|
-
if (modified) {
|
|
118451
|
-
modifiedLines.push("data: " + JSON.stringify(parsed));
|
|
118452
|
-
continue;
|
|
118453
|
-
}
|
|
118454
|
-
}
|
|
118455
|
-
catch (_error) {
|
|
118456
|
-
// Ignore malformed chunks
|
|
118457
|
-
}
|
|
118458
|
-
modifiedLines.push(rawLine);
|
|
118459
|
-
}
|
|
118460
|
-
return Buffer.from(modifiedLines.join("\n"), "utf8");
|
|
118461
|
-
}
|
|
118462
|
-
function parseUsageFromBuffer() {
|
|
118463
|
-
const lines = buffer.split("\n");
|
|
118464
|
-
buffer = lines.pop() ?? "";
|
|
118465
|
-
for (const rawLine of lines) {
|
|
118466
|
-
const line = rawLine.trim();
|
|
118467
|
-
if (!line.startsWith("data:")) {
|
|
118468
|
-
continue;
|
|
118469
|
-
}
|
|
118470
|
-
const payload = line.slice(5).trim();
|
|
118471
|
-
if (!payload || payload === "[DONE]") {
|
|
118472
|
-
continue;
|
|
118473
|
-
}
|
|
118474
|
-
try {
|
|
118475
|
-
const parsed = JSON.parse(payload);
|
|
118476
|
-
if (isEngineUsageChunk(parsed)) {
|
|
118477
|
-
const completionTokens = parsed.usage?.completion_tokens ?? null;
|
|
118478
|
-
const promptTokens = parsed.usage?.prompt_tokens ?? null;
|
|
118479
|
-
const totalTokens = parsed.usage?.total_tokens ?? null;
|
|
118480
|
-
let contextUsage = parsed.usage?.context_usage ?? null;
|
|
118481
|
-
const effectiveContextForUsage = getEffectiveContextLength({
|
|
118482
|
-
contextLength,
|
|
118483
|
-
engine,
|
|
118484
|
-
parallelism
|
|
118485
|
-
});
|
|
118486
|
-
if (contextUsage === null &&
|
|
118487
|
-
promptTokens !== null &&
|
|
118488
|
-
effectiveContextForUsage !== null) {
|
|
118489
|
-
contextUsage = promptTokens / effectiveContextForUsage;
|
|
118490
|
-
}
|
|
118491
|
-
usage = {
|
|
118492
|
-
completionTokens,
|
|
118493
|
-
contextUsage,
|
|
118494
|
-
promptTokens,
|
|
118495
|
-
totalTokens
|
|
118496
|
-
};
|
|
118497
|
-
}
|
|
118498
|
-
}
|
|
118499
|
-
catch (_error) {
|
|
118500
|
-
// Ignore malformed chunks
|
|
118501
|
-
}
|
|
118502
|
-
}
|
|
118503
|
-
}
|
|
118504
|
-
function finalize(error) {
|
|
118505
|
-
if (completed) {
|
|
118506
|
-
return;
|
|
118507
|
-
}
|
|
118508
|
-
completed = true;
|
|
118509
|
-
if (onComplete) {
|
|
118510
|
-
const completion = onComplete({
|
|
118511
|
-
durationMs: Math.max(0, Date.now() - startedAt),
|
|
118512
|
-
error,
|
|
118513
|
-
requestBodyBytes,
|
|
118514
|
-
responseBytes,
|
|
118515
|
-
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
118516
|
-
usage
|
|
118517
|
-
});
|
|
118518
|
-
if (completion && typeof completion.catch === "function") {
|
|
118519
|
-
completion.catch(error => {
|
|
118520
|
-
logger.error("Engine metrics completion failed", {
|
|
118521
|
-
error: asError(error),
|
|
118522
|
-
requestUrl: requestPath
|
|
118523
|
-
});
|
|
118524
|
-
});
|
|
118525
|
-
}
|
|
118526
|
-
}
|
|
118527
|
-
}
|
|
118528
|
-
body.on("data", (chunk) => {
|
|
118529
|
-
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
118530
|
-
if (firstChunkAt === null) {
|
|
118531
|
-
firstChunkAt = Date.now();
|
|
118532
|
-
}
|
|
118533
|
-
responseBytes += chunkBuffer.length;
|
|
118534
|
-
buffer += chunkBuffer.toString("utf8");
|
|
118535
|
-
parseUsageFromBuffer();
|
|
118536
|
-
passThrough.write(modifyChunkWithUsage(chunkBuffer));
|
|
118537
|
-
});
|
|
118538
|
-
body.once("error", err => {
|
|
118539
|
-
logEngineMetrics({
|
|
118540
|
-
agentEngineType,
|
|
118541
|
-
error: err,
|
|
118542
|
-
level: "error",
|
|
118543
|
-
logger,
|
|
118544
|
-
requestBodyBytes,
|
|
118545
|
-
requestPath,
|
|
118546
|
-
responseBytes,
|
|
118547
|
-
usage
|
|
118548
|
-
});
|
|
118549
|
-
finalize(err);
|
|
118550
|
-
passThrough.destroy(err);
|
|
118551
|
-
});
|
|
118552
|
-
body.once("end", () => {
|
|
118553
|
-
parseUsageFromBuffer();
|
|
118554
|
-
logEngineMetrics({
|
|
118555
|
-
agentEngineType,
|
|
118556
|
-
level: "info",
|
|
118557
|
-
logger,
|
|
118558
|
-
requestBodyBytes,
|
|
118559
|
-
requestPath,
|
|
118560
|
-
responseBytes,
|
|
118561
|
-
usage
|
|
118562
|
-
});
|
|
118563
|
-
finalize(null);
|
|
118564
|
-
passThrough.end();
|
|
118565
|
-
});
|
|
118566
|
-
body.once("close", () => {
|
|
118567
|
-
if (completed) {
|
|
118568
|
-
if (!passThrough.writableEnded) {
|
|
118569
|
-
passThrough.end();
|
|
118570
|
-
}
|
|
118571
|
-
return;
|
|
118572
|
-
}
|
|
118573
|
-
const closeError = new Error("Engine response stream closed before completion");
|
|
118574
|
-
logEngineMetrics({
|
|
118575
|
-
agentEngineType,
|
|
118576
|
-
error: closeError,
|
|
118577
|
-
level: "error",
|
|
118578
|
-
logger,
|
|
118579
|
-
requestBodyBytes,
|
|
118580
|
-
requestPath,
|
|
118581
|
-
responseBytes,
|
|
118582
|
-
usage
|
|
118583
|
-
});
|
|
118584
|
-
finalize(closeError);
|
|
118585
|
-
if (!passThrough.writableEnded) {
|
|
118586
|
-
passThrough.end();
|
|
118587
|
-
}
|
|
118588
|
-
});
|
|
118589
|
-
return {
|
|
118590
|
-
stream: passThrough
|
|
118591
|
-
};
|
|
118592
|
-
}
|
|
118593
|
-
function monitorEngineResponseSingle({ agentEngineType, body, contextLength, engine, logger, onComplete, parallelism, requestBodyBytes, requestPath, requestStartedAt }) {
|
|
118594
|
-
const maxUsageCaptureBytes = 1024 * 1024;
|
|
118595
|
-
const startedAt = requestStartedAt ?? Date.now();
|
|
118596
|
-
const passThrough = new PassThrough();
|
|
118597
|
-
let responseBytes = 0;
|
|
118598
|
-
let firstChunkAt = null;
|
|
118599
|
-
let usage = null;
|
|
118600
|
-
const usageChunks = [];
|
|
118601
|
-
let usageBytes = 0;
|
|
118602
|
-
let usageCaptureEnabled = true;
|
|
118603
|
-
let completed = false;
|
|
118604
|
-
function finalize(error) {
|
|
118605
|
-
if (completed) {
|
|
118606
|
-
return;
|
|
118607
|
-
}
|
|
118608
|
-
completed = true;
|
|
118609
|
-
if (onComplete) {
|
|
118610
|
-
const completion = onComplete({
|
|
118611
|
-
durationMs: Math.max(0, Date.now() - startedAt),
|
|
118612
|
-
error,
|
|
118613
|
-
requestBodyBytes,
|
|
118614
|
-
responseBytes,
|
|
118615
|
-
timeToFirstTokenMs: firstChunkAt === null ? null : Math.max(0, firstChunkAt - startedAt),
|
|
118616
|
-
usage
|
|
118617
|
-
});
|
|
118618
|
-
if (completion && typeof completion.catch === "function") {
|
|
118619
|
-
completion.catch(error => {
|
|
118620
|
-
logger.error("Engine metrics completion failed", {
|
|
118621
|
-
error: asError(error),
|
|
118622
|
-
requestUrl: requestPath
|
|
118623
|
-
});
|
|
118624
|
-
});
|
|
118625
|
-
}
|
|
118626
|
-
}
|
|
118627
|
-
}
|
|
118628
|
-
body.on("data", (chunk) => {
|
|
118629
|
-
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
118630
|
-
if (firstChunkAt === null) {
|
|
118631
|
-
firstChunkAt = Date.now();
|
|
118632
|
-
}
|
|
118633
|
-
responseBytes += chunkBuffer.length;
|
|
118634
|
-
if (usageCaptureEnabled) {
|
|
118635
|
-
const nextSize = usageBytes + chunkBuffer.length;
|
|
118636
|
-
if (nextSize <= maxUsageCaptureBytes) {
|
|
118637
|
-
usageChunks.push(chunkBuffer);
|
|
118638
|
-
usageBytes = nextSize;
|
|
118639
|
-
}
|
|
118640
|
-
else {
|
|
118641
|
-
usageCaptureEnabled = false;
|
|
118642
|
-
usageChunks.length = 0;
|
|
118643
|
-
}
|
|
118644
|
-
}
|
|
118645
|
-
passThrough.write(chunkBuffer);
|
|
118646
|
-
});
|
|
118647
|
-
body.once("error", err => {
|
|
118648
|
-
logEngineMetrics({
|
|
118649
|
-
agentEngineType,
|
|
118650
|
-
error: err,
|
|
118651
|
-
level: "error",
|
|
118652
|
-
logger,
|
|
118653
|
-
requestBodyBytes,
|
|
118654
|
-
requestPath,
|
|
118655
|
-
responseBytes,
|
|
118656
|
-
usage
|
|
118657
|
-
});
|
|
118658
|
-
finalize(err);
|
|
118659
|
-
passThrough.destroy(err);
|
|
118660
|
-
});
|
|
118661
|
-
body.once("end", () => {
|
|
118662
|
-
if (usageCaptureEnabled) {
|
|
118663
|
-
try {
|
|
118664
|
-
const parsed = JSON.parse(Buffer.concat(usageChunks).toString("utf8"));
|
|
118665
|
-
if (parsed.usage) {
|
|
118666
|
-
const usageChunk = parsed.usage;
|
|
118667
|
-
const completionTokens = usageChunk.completion_tokens ?? null;
|
|
118668
|
-
const promptTokens = usageChunk.prompt_tokens ?? null;
|
|
118669
|
-
const totalTokens = usageChunk.total_tokens ?? null;
|
|
118670
|
-
let contextUsage = usageChunk.context_usage ?? null;
|
|
118671
|
-
const effectiveContext = getEffectiveContextLength({
|
|
118672
|
-
contextLength,
|
|
118673
|
-
engine,
|
|
118674
|
-
parallelism
|
|
118675
|
-
});
|
|
118676
|
-
if (contextUsage === null &&
|
|
118677
|
-
promptTokens !== null &&
|
|
118678
|
-
effectiveContext !== null) {
|
|
118679
|
-
contextUsage = promptTokens / effectiveContext;
|
|
118680
|
-
}
|
|
118681
|
-
usage = {
|
|
118682
|
-
completionTokens,
|
|
118683
|
-
contextUsage,
|
|
118684
|
-
promptTokens,
|
|
118685
|
-
totalTokens
|
|
118686
|
-
};
|
|
118687
|
-
}
|
|
118688
|
-
}
|
|
118689
|
-
catch (error) {
|
|
118690
|
-
logger.error("Failed to parse engine response body", {
|
|
118691
|
-
error: asError(error),
|
|
118692
|
-
requestUrl: requestPath
|
|
118693
|
-
});
|
|
118694
|
-
}
|
|
118695
|
-
}
|
|
118696
|
-
logEngineMetrics({
|
|
118697
|
-
agentEngineType,
|
|
118698
|
-
level: "info",
|
|
118699
|
-
logger,
|
|
118700
|
-
requestBodyBytes,
|
|
118701
|
-
requestPath,
|
|
118702
|
-
responseBytes,
|
|
118703
|
-
usage
|
|
118704
|
-
});
|
|
118705
|
-
finalize(null);
|
|
118706
|
-
passThrough.end();
|
|
118707
|
-
});
|
|
118708
|
-
body.once("close", () => {
|
|
118709
|
-
if (completed) {
|
|
118710
|
-
if (!passThrough.writableEnded) {
|
|
118711
|
-
passThrough.end();
|
|
118981
|
+
options,
|
|
118982
|
+
isSync,
|
|
118983
|
+
};
|
|
118984
|
+
};
|
|
118985
|
+
|
|
118986
|
+
// Main logic for `execaCommand()`
|
|
118987
|
+
const mapCommandAsync = ({file, commandArguments}) => parseCommand(file, commandArguments);
|
|
118988
|
+
|
|
118989
|
+
// Main logic for `execaCommandSync()`
|
|
118990
|
+
const mapCommandSync = ({file, commandArguments}) => ({...parseCommand(file, commandArguments), isSync: true});
|
|
118991
|
+
|
|
118992
|
+
// Convert `execaCommand(command)` into `execa(file, ...commandArguments)`
|
|
118993
|
+
const parseCommand = (command, unusedArguments) => {
|
|
118994
|
+
if (unusedArguments.length > 0) {
|
|
118995
|
+
throw new TypeError(`The command and its arguments must be passed as a single string: ${command} ${unusedArguments}.`);
|
|
118996
|
+
}
|
|
118997
|
+
|
|
118998
|
+
const [file, ...commandArguments] = parseCommandString(command);
|
|
118999
|
+
return {file, commandArguments};
|
|
119000
|
+
};
|
|
119001
|
+
|
|
119002
|
+
// Convert `command` string into an array of file or arguments to pass to $`${...fileOrCommandArguments}`
|
|
119003
|
+
const parseCommandString = command => {
|
|
119004
|
+
if (typeof command !== 'string') {
|
|
119005
|
+
throw new TypeError(`The command must be a string: ${String(command)}.`);
|
|
119006
|
+
}
|
|
119007
|
+
|
|
119008
|
+
const trimmedCommand = command.trim();
|
|
119009
|
+
if (trimmedCommand === '') {
|
|
119010
|
+
return [];
|
|
119011
|
+
}
|
|
119012
|
+
|
|
119013
|
+
const tokens = [];
|
|
119014
|
+
for (const token of trimmedCommand.split(SPACES_REGEXP)) {
|
|
119015
|
+
// Allow spaces to be escaped by a backslash if not meant as a delimiter
|
|
119016
|
+
const previousToken = tokens.at(-1);
|
|
119017
|
+
if (previousToken && previousToken.endsWith('\\')) {
|
|
119018
|
+
// Merge previous token with current one
|
|
119019
|
+
tokens[tokens.length - 1] = `${previousToken.slice(0, -1)} ${token}`;
|
|
119020
|
+
} else {
|
|
119021
|
+
tokens.push(token);
|
|
119022
|
+
}
|
|
119023
|
+
}
|
|
119024
|
+
|
|
119025
|
+
return tokens;
|
|
119026
|
+
};
|
|
119027
|
+
|
|
119028
|
+
const SPACES_REGEXP = / +/g;
|
|
119029
|
+
|
|
119030
|
+
// Sets `$.sync` and `$.s`
|
|
119031
|
+
const setScriptSync = (boundExeca, createNested, boundOptions) => {
|
|
119032
|
+
boundExeca.sync = createNested(mapScriptSync, boundOptions);
|
|
119033
|
+
boundExeca.s = boundExeca.sync;
|
|
119034
|
+
};
|
|
119035
|
+
|
|
119036
|
+
// Main logic for `$`
|
|
119037
|
+
const mapScriptAsync = ({options}) => getScriptOptions(options);
|
|
119038
|
+
|
|
119039
|
+
// Main logic for `$.sync`
|
|
119040
|
+
const mapScriptSync = ({options}) => ({...getScriptOptions(options), isSync: true});
|
|
119041
|
+
|
|
119042
|
+
// `$` is like `execa` but with script-friendly options: `{stdin: 'inherit', preferLocal: true}`
|
|
119043
|
+
const getScriptOptions = options => ({options: {...getScriptStdinOption(options), ...options}});
|
|
119044
|
+
|
|
119045
|
+
const getScriptStdinOption = ({input, inputFile, stdio}) => input === undefined && inputFile === undefined && stdio === undefined
|
|
119046
|
+
? {stdin: 'inherit'}
|
|
119047
|
+
: {};
|
|
119048
|
+
|
|
119049
|
+
// When using $(...).pipe(...), most script-friendly options should apply to both commands.
|
|
119050
|
+
// However, some options (like `stdin: 'inherit'`) would create issues with piping, i.e. cannot be deep.
|
|
119051
|
+
const deepScriptOptions = {preferLocal: true};
|
|
119052
|
+
|
|
119053
|
+
const execa = createExeca(() => ({}));
|
|
119054
|
+
createExeca(() => ({isSync: true}));
|
|
119055
|
+
createExeca(mapCommandAsync);
|
|
119056
|
+
createExeca(mapCommandSync);
|
|
119057
|
+
createExeca(mapNode);
|
|
119058
|
+
createExeca(mapScriptAsync, {}, deepScriptOptions, setScriptSync);
|
|
119059
|
+
|
|
119060
|
+
getIpcExport();
|
|
119061
|
+
|
|
119062
|
+
const MACHINE_ID_PATHS = ["/etc/machine-id", "/var/lib/dbus/machine-id"];
|
|
119063
|
+
async function readMachineIdentifier() {
|
|
119064
|
+
for (const path of MACHINE_ID_PATHS) {
|
|
119065
|
+
try {
|
|
119066
|
+
const contents = await readFile(path, "utf8");
|
|
119067
|
+
const trimmed = contents.trim();
|
|
119068
|
+
if (trimmed.length > 0) {
|
|
119069
|
+
return trimmed;
|
|
118712
119070
|
}
|
|
118713
|
-
return;
|
|
118714
119071
|
}
|
|
118715
|
-
|
|
118716
|
-
|
|
118717
|
-
agentEngineType,
|
|
118718
|
-
error: closeError,
|
|
118719
|
-
level: "error",
|
|
118720
|
-
logger,
|
|
118721
|
-
requestBodyBytes,
|
|
118722
|
-
requestPath,
|
|
118723
|
-
responseBytes,
|
|
118724
|
-
usage
|
|
118725
|
-
});
|
|
118726
|
-
finalize(closeError);
|
|
118727
|
-
if (!passThrough.writableEnded) {
|
|
118728
|
-
passThrough.end();
|
|
119072
|
+
catch {
|
|
119073
|
+
// Ignore and continue to next candidate
|
|
118729
119074
|
}
|
|
118730
|
-
});
|
|
118731
|
-
return {
|
|
118732
|
-
stream: passThrough
|
|
118733
|
-
};
|
|
118734
|
-
}
|
|
118735
|
-
function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBytes, requestPath, responseBytes, usage }) {
|
|
118736
|
-
const metricsMessage = [
|
|
118737
|
-
"LLM engine stream metrics",
|
|
118738
|
-
`path=${requestPath}`,
|
|
118739
|
-
`bytesTo=${requestBodyBytes}`,
|
|
118740
|
-
`bytesFrom=${responseBytes}`,
|
|
118741
|
-
`promptTokens=${usage?.promptTokens ?? "n/a"}`,
|
|
118742
|
-
`completionTokens=${usage?.completionTokens ?? "n/a"}`,
|
|
118743
|
-
`totalTokens=${usage?.totalTokens ?? "n/a"}`,
|
|
118744
|
-
`contextUsage=${usage?.contextUsage ?? "n/a"}`
|
|
118745
|
-
].join(" ");
|
|
118746
|
-
const attributes = {
|
|
118747
|
-
agentEngineType,
|
|
118748
|
-
requestUrl: requestPath
|
|
118749
|
-
};
|
|
118750
|
-
if (error) {
|
|
118751
|
-
attributes.error = error;
|
|
118752
119075
|
}
|
|
118753
|
-
|
|
118754
|
-
}
|
|
118755
|
-
|
|
118756
|
-
function isPlainObject(value) {
|
|
118757
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
119076
|
+
return os.hostname();
|
|
118758
119077
|
}
|
|
118759
|
-
function
|
|
118760
|
-
|
|
118761
|
-
const
|
|
118762
|
-
|
|
118763
|
-
|
|
118764
|
-
|
|
118765
|
-
|
|
119078
|
+
async function detectLlamaCppVersion() {
|
|
119079
|
+
try {
|
|
119080
|
+
const { stdout } = await execa("llama-server", ["--version"]);
|
|
119081
|
+
const versionLine = stdout.trim();
|
|
119082
|
+
return versionLine.length > 0 ? (versionLine.split("\n")[0] ?? null) : null;
|
|
119083
|
+
}
|
|
119084
|
+
catch {
|
|
119085
|
+
return null;
|
|
118766
119086
|
}
|
|
118767
|
-
const requestPayload = { ...body };
|
|
118768
|
-
const streamOptions = requestPayload.stream_options;
|
|
118769
|
-
const normalizedStreamOptions = isPlainObject(streamOptions)
|
|
118770
|
-
? { ...streamOptions }
|
|
118771
|
-
: {};
|
|
118772
|
-
normalizedStreamOptions.include_usage = true;
|
|
118773
|
-
requestPayload.stream_options = normalizedStreamOptions;
|
|
118774
|
-
const payload = JSON.stringify(requestPayload);
|
|
118775
|
-
return {
|
|
118776
|
-
bytes: Buffer.byteLength(payload, "utf8"),
|
|
118777
|
-
payload
|
|
118778
|
-
};
|
|
118779
119087
|
}
|
|
118780
|
-
function
|
|
118781
|
-
|
|
118782
|
-
|
|
119088
|
+
async function detectVLLMVersion() {
|
|
119089
|
+
try {
|
|
119090
|
+
const { stdout } = await execa("python3", [
|
|
119091
|
+
"-c",
|
|
119092
|
+
"import importlib.metadata as md; print(md.version('vllm'))"
|
|
119093
|
+
]);
|
|
119094
|
+
const version = stdout.trim();
|
|
119095
|
+
return version.length > 0 ? version : null;
|
|
118783
119096
|
}
|
|
118784
|
-
|
|
118785
|
-
|
|
118786
|
-
return 0;
|
|
119097
|
+
catch {
|
|
119098
|
+
return null;
|
|
118787
119099
|
}
|
|
118788
|
-
return Math.round(tokensPerSecond);
|
|
118789
119100
|
}
|
|
118790
|
-
|
|
118791
|
-
|
|
118792
|
-
|
|
118793
|
-
return value;
|
|
118794
|
-
}
|
|
118795
|
-
return 0;
|
|
118796
|
-
}
|
|
118797
|
-
function reportMetricsSafe(payload) {
|
|
118798
|
-
reportMetrics(payload).catch(error => {
|
|
118799
|
-
logger.warn("Failed to upload LLM prompt metrics", {
|
|
118800
|
-
error: asError(error),
|
|
118801
|
-
requestUrl: path
|
|
118802
|
-
});
|
|
118803
|
-
});
|
|
119101
|
+
function normalizeMegabytes(value) {
|
|
119102
|
+
if (typeof value !== "number" || Number.isNaN(value)) {
|
|
119103
|
+
return null;
|
|
118804
119104
|
}
|
|
118805
|
-
|
|
118806
|
-
|
|
118807
|
-
|
|
118808
|
-
|
|
118809
|
-
|
|
118810
|
-
const completionTokens = normalizeTokenCount(usage?.completionTokens);
|
|
118811
|
-
const promptTokens = normalizeTokenCount(usage?.promptTokens);
|
|
118812
|
-
const totalTokens = normalizeTokenCount(usage?.totalTokens ?? completionTokens + promptTokens);
|
|
118813
|
-
const latencyMs = Math.max(0, durationMs);
|
|
118814
|
-
reportMetricsSafe({
|
|
118815
|
-
bytes: requestBodyBytes + responseBytes,
|
|
118816
|
-
completionTokens,
|
|
118817
|
-
engine: configuration.agentEngineType,
|
|
118818
|
-
endpointId: null,
|
|
118819
|
-
latencyMs,
|
|
118820
|
-
modelId: modelID,
|
|
118821
|
-
promptTokens,
|
|
118822
|
-
requestBytes: requestBodyBytes,
|
|
118823
|
-
requestId: null,
|
|
118824
|
-
requestMethod: "POST",
|
|
118825
|
-
requestPath: path,
|
|
118826
|
-
responseBytes,
|
|
118827
|
-
successful: !error,
|
|
118828
|
-
timeToFirstTokenMs,
|
|
118829
|
-
tokensPerSecond: calculateTokensPerSecond({
|
|
118830
|
-
durationMs: latencyMs,
|
|
118831
|
-
totalTokens
|
|
118832
|
-
}),
|
|
118833
|
-
totalTokens
|
|
118834
|
-
});
|
|
118835
|
-
};
|
|
118836
|
-
const response = await modelManager
|
|
118837
|
-
.fetchOpenAI(path, {
|
|
118838
|
-
body: serializedBody,
|
|
118839
|
-
headers: {
|
|
118840
|
-
"Content-Type": "application/json"
|
|
118841
|
-
},
|
|
118842
|
-
method: "POST"
|
|
118843
|
-
})
|
|
118844
|
-
.catch(error => {
|
|
118845
|
-
logEngineMetrics({
|
|
118846
|
-
agentEngineType: configuration.agentEngineType,
|
|
118847
|
-
error: error,
|
|
118848
|
-
level: "error",
|
|
118849
|
-
logger,
|
|
118850
|
-
requestBodyBytes,
|
|
118851
|
-
requestPath: path,
|
|
118852
|
-
responseBytes: 0,
|
|
118853
|
-
usage: null
|
|
118854
|
-
});
|
|
118855
|
-
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
118856
|
-
reportMetricsSafe({
|
|
118857
|
-
bytes: requestBodyBytes,
|
|
118858
|
-
completionTokens: 0,
|
|
118859
|
-
engine: configuration.agentEngineType,
|
|
118860
|
-
endpointId: null,
|
|
118861
|
-
latencyMs,
|
|
118862
|
-
modelId: modelID,
|
|
118863
|
-
promptTokens: 0,
|
|
118864
|
-
requestBytes: requestBodyBytes,
|
|
118865
|
-
requestId: null,
|
|
118866
|
-
requestMethod: "POST",
|
|
118867
|
-
requestPath: path,
|
|
118868
|
-
responseBytes: 0,
|
|
118869
|
-
successful: false,
|
|
118870
|
-
timeToFirstTokenMs: null,
|
|
118871
|
-
tokensPerSecond: 0,
|
|
118872
|
-
totalTokens: 0
|
|
118873
|
-
});
|
|
118874
|
-
throw error;
|
|
118875
|
-
});
|
|
118876
|
-
const responseStatusText = response.statusText ?? "Upstream request failed";
|
|
118877
|
-
if (!response.ok) {
|
|
118878
|
-
const responseBody = await response.text().catch(() => null);
|
|
118879
|
-
const responseError = new Error(responseBody
|
|
118880
|
-
? `Upstream error response: ${responseBody}`
|
|
118881
|
-
: "Upstream error response: empty body");
|
|
118882
|
-
logger.error("LLM engine request failed", {
|
|
118883
|
-
error: responseError,
|
|
118884
|
-
requestUrl: path,
|
|
118885
|
-
statusCode: response.status,
|
|
118886
|
-
statusText: responseStatusText,
|
|
118887
|
-
responseBody: responseBody ?? undefined
|
|
118888
|
-
});
|
|
118889
|
-
if (!response.body) {
|
|
118890
|
-
return {
|
|
118891
|
-
status: response.status,
|
|
118892
|
-
statusText: responseStatusText
|
|
118893
|
-
};
|
|
118894
|
-
}
|
|
119105
|
+
return Math.round(value * 1024 * 1024);
|
|
119106
|
+
}
|
|
119107
|
+
function resolveCpuValue(value) {
|
|
119108
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
119109
|
+
return value;
|
|
118895
119110
|
}
|
|
118896
|
-
if (
|
|
118897
|
-
|
|
118898
|
-
|
|
118899
|
-
level: response.ok ? "info" : "error",
|
|
118900
|
-
logger,
|
|
118901
|
-
requestBodyBytes,
|
|
118902
|
-
requestPath: path,
|
|
118903
|
-
responseBytes: 0,
|
|
118904
|
-
usage: null
|
|
118905
|
-
});
|
|
118906
|
-
const latencyMs = Math.max(0, Date.now() - requestStartedAt);
|
|
118907
|
-
reportMetricsSafe({
|
|
118908
|
-
bytes: requestBodyBytes,
|
|
118909
|
-
completionTokens: 0,
|
|
118910
|
-
engine: configuration.agentEngineType,
|
|
118911
|
-
endpointId: null,
|
|
118912
|
-
latencyMs,
|
|
118913
|
-
modelId: modelID,
|
|
118914
|
-
promptTokens: 0,
|
|
118915
|
-
requestBytes: requestBodyBytes,
|
|
118916
|
-
requestId: null,
|
|
118917
|
-
requestMethod: "POST",
|
|
118918
|
-
requestPath: path,
|
|
118919
|
-
responseBytes: 0,
|
|
118920
|
-
successful: false,
|
|
118921
|
-
timeToFirstTokenMs: null,
|
|
118922
|
-
tokensPerSecond: 0,
|
|
118923
|
-
totalTokens: 0
|
|
118924
|
-
});
|
|
118925
|
-
return {
|
|
118926
|
-
status: response.status,
|
|
118927
|
-
statusText: responseStatusText
|
|
118928
|
-
};
|
|
119111
|
+
if (typeof value === "string") {
|
|
119112
|
+
const parsed = Number(value);
|
|
119113
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
118929
119114
|
}
|
|
118930
|
-
|
|
118931
|
-
|
|
118932
|
-
|
|
118933
|
-
|
|
118934
|
-
|
|
118935
|
-
|
|
118936
|
-
|
|
118937
|
-
|
|
118938
|
-
|
|
118939
|
-
|
|
118940
|
-
|
|
118941
|
-
|
|
118942
|
-
|
|
118943
|
-
|
|
118944
|
-
|
|
118945
|
-
|
|
118946
|
-
|
|
118947
|
-
|
|
118948
|
-
|
|
118949
|
-
|
|
118950
|
-
|
|
118951
|
-
|
|
118952
|
-
|
|
118953
|
-
|
|
118954
|
-
|
|
118955
|
-
|
|
118956
|
-
|
|
118957
|
-
|
|
118958
|
-
|
|
119115
|
+
return null;
|
|
119116
|
+
}
|
|
119117
|
+
async function collectMachineMetadata() {
|
|
119118
|
+
const [cpuResult, memResult, osResult, graphicsResult] = await Promise.allSettled([
|
|
119119
|
+
si.cpu(),
|
|
119120
|
+
si.mem(),
|
|
119121
|
+
si.osInfo(),
|
|
119122
|
+
si.graphics()
|
|
119123
|
+
]);
|
|
119124
|
+
const cpuInfo = cpuResult.status === "fulfilled" ? cpuResult.value : null;
|
|
119125
|
+
const memInfo = memResult.status === "fulfilled" ? memResult.value : null;
|
|
119126
|
+
const osInfo = osResult.status === "fulfilled" ? osResult.value : null;
|
|
119127
|
+
const graphicsInfo = graphicsResult.status === "fulfilled"
|
|
119128
|
+
? graphicsResult.value
|
|
119129
|
+
: { controllers: [] };
|
|
119130
|
+
const gpus = (graphicsInfo.controllers ?? []).map((controller) => ({
|
|
119131
|
+
bus: controller.bus ?? null,
|
|
119132
|
+
driverVersion: controller.driverVersion ?? null,
|
|
119133
|
+
memoryFreeBytes: normalizeMegabytes(controller.memoryFree ?? null),
|
|
119134
|
+
memoryTotalBytes: normalizeMegabytes(controller.memoryTotal ?? null),
|
|
119135
|
+
model: controller.model ?? controller.name ?? null,
|
|
119136
|
+
temperatureCelsius: controller.temperatureGpu ?? null,
|
|
119137
|
+
vendor: controller.vendor ?? null
|
|
119138
|
+
}));
|
|
119139
|
+
const machineMetadata = {
|
|
119140
|
+
cpu: {
|
|
119141
|
+
baseClockGHz: resolveCpuValue(cpuInfo?.speed ?? null),
|
|
119142
|
+
logicalCores: cpuInfo?.cores ?? null,
|
|
119143
|
+
maxClockGHz: resolveCpuValue(cpuInfo?.speedMax ?? null),
|
|
119144
|
+
model: cpuInfo?.brand ?? null,
|
|
119145
|
+
physicalCores: cpuInfo?.physicalCores ?? null
|
|
119146
|
+
},
|
|
119147
|
+
gpus,
|
|
119148
|
+
hostname: os.hostname(),
|
|
119149
|
+
llamaCppVersion: await detectLlamaCppVersion(),
|
|
119150
|
+
machineID: await readMachineIdentifier(),
|
|
119151
|
+
memory: {
|
|
119152
|
+
availableBytes: memInfo?.available ?? null,
|
|
119153
|
+
totalBytes: memInfo?.total ?? null
|
|
119154
|
+
},
|
|
119155
|
+
os: {
|
|
119156
|
+
arch: osInfo?.arch ?? os.arch(),
|
|
119157
|
+
platform: osInfo?.platform ?? os.platform(),
|
|
119158
|
+
release: osInfo?.release ?? os.release(),
|
|
119159
|
+
type: osInfo?.kernel ?? null,
|
|
119160
|
+
version: osInfo?.build ?? null
|
|
119161
|
+
},
|
|
119162
|
+
vllmVersion: await detectVLLMVersion()
|
|
118959
119163
|
};
|
|
119164
|
+
return machineMetadata;
|
|
118960
119165
|
}
|
|
118961
119166
|
|
|
118962
119167
|
async function createApplication({ abortController, apiClient, configuration, logger }) {
|
|
118963
|
-
// Fetch configuration
|
|
118964
119168
|
logger.info("Fetching conduit configuration");
|
|
118965
119169
|
let conduitConfiguration = await apiClient.getConduitConfiguration();
|
|
118966
119170
|
logger.info("Received configuration", {
|
|
@@ -118986,7 +119190,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
118986
119190
|
let modelFileName = getConduitModelFileName(conduitConfiguration);
|
|
118987
119191
|
let modelName = getConduitModelName(conduitConfiguration);
|
|
118988
119192
|
const startup = Date.now();
|
|
118989
|
-
// Initialise model manager
|
|
118990
119193
|
let modelManager = new ModelManager({
|
|
118991
119194
|
contextLength: conduitConfiguration.contextLength ?? null,
|
|
118992
119195
|
engine: configuration.agentEngineType,
|
|
@@ -119027,6 +119230,7 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119027
119230
|
});
|
|
119028
119231
|
conduitStateReportManager.reportStateChange();
|
|
119029
119232
|
};
|
|
119233
|
+
let stopRequestedByControl = false;
|
|
119030
119234
|
const attachLifecycleListeners = () => {
|
|
119031
119235
|
modelManager.on("engineError", err => {
|
|
119032
119236
|
logger.error("LLM engine error", {
|
|
@@ -119035,6 +119239,9 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119035
119239
|
stopRequestedByControl = false;
|
|
119036
119240
|
setErrorState({ error: err.message });
|
|
119037
119241
|
});
|
|
119242
|
+
modelManager.on("engineReady", () => {
|
|
119243
|
+
setOnlineState();
|
|
119244
|
+
});
|
|
119038
119245
|
modelManager.on("engineTerminated", () => {
|
|
119039
119246
|
if (stopRequestedByControl) {
|
|
119040
119247
|
stopRequestedByControl = false;
|
|
@@ -119046,9 +119253,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119046
119253
|
});
|
|
119047
119254
|
conduitStateReportManager.reportStateChange();
|
|
119048
119255
|
});
|
|
119049
|
-
modelManager.on("engineReady", () => {
|
|
119050
|
-
setOnlineState();
|
|
119051
|
-
});
|
|
119052
119256
|
};
|
|
119053
119257
|
attachLifecycleListeners();
|
|
119054
119258
|
let lastDownloadKey = "";
|
|
@@ -119074,7 +119278,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119074
119278
|
});
|
|
119075
119279
|
conduitStateReportManager.reportDownloadProgress();
|
|
119076
119280
|
};
|
|
119077
|
-
let stopRequestedByControl = false;
|
|
119078
119281
|
async function startEngine() {
|
|
119079
119282
|
logger.info("Engine start requested");
|
|
119080
119283
|
conduitStateManager.setState({
|
|
@@ -119106,31 +119309,49 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119106
119309
|
});
|
|
119107
119310
|
await conduitStateReportManager.reportNow();
|
|
119108
119311
|
logger.info("Stopping engine process");
|
|
119109
|
-
|
|
119312
|
+
try {
|
|
119313
|
+
await modelManager.stop();
|
|
119314
|
+
}
|
|
119315
|
+
catch (error) {
|
|
119316
|
+
stopRequestedByControl = false;
|
|
119317
|
+
throw error;
|
|
119318
|
+
}
|
|
119110
119319
|
logger.info("Engine process stopped");
|
|
119111
119320
|
setIdleState({ reason });
|
|
119112
119321
|
}
|
|
119113
|
-
|
|
119114
|
-
|
|
119115
|
-
|
|
119116
|
-
|
|
119117
|
-
|
|
119118
|
-
|
|
119119
|
-
});
|
|
119120
|
-
modelManager.on("engineTerminated", () => {
|
|
119121
|
-
if (stopRequestedByControl) {
|
|
119122
|
-
stopRequestedByControl = false;
|
|
119123
|
-
setIdleState({ reason: "Remote shutdown requested" });
|
|
119124
|
-
return;
|
|
119322
|
+
async function cycleEngine() {
|
|
119323
|
+
const sourceState = conduitStateManager.getState().state;
|
|
119324
|
+
if (sourceState !== "idle") {
|
|
119325
|
+
await stopEngine({
|
|
119326
|
+
reason: "Remote cycle requested"
|
|
119327
|
+
});
|
|
119125
119328
|
}
|
|
119126
|
-
|
|
119127
|
-
|
|
119329
|
+
logger.info("Fetching new configuration");
|
|
119330
|
+
const newConduitConfiguration = await apiClient.getConduitConfiguration();
|
|
119331
|
+
logger.info("Received new configuration", {
|
|
119332
|
+
modelID: newConduitConfiguration.targetModel.id
|
|
119128
119333
|
});
|
|
119129
|
-
|
|
119130
|
-
|
|
119131
|
-
|
|
119132
|
-
|
|
119133
|
-
|
|
119334
|
+
logger.info("Updating configuration and model manager");
|
|
119335
|
+
conduitConfiguration = newConduitConfiguration;
|
|
119336
|
+
modelFileName = getConduitModelFileName(conduitConfiguration);
|
|
119337
|
+
modelName = getConduitModelName(conduitConfiguration);
|
|
119338
|
+
modelManager = new ModelManager({
|
|
119339
|
+
contextLength: conduitConfiguration.contextLength ?? null,
|
|
119340
|
+
engine: configuration.agentEngineType,
|
|
119341
|
+
logger,
|
|
119342
|
+
model: conduitConfiguration.targetModel,
|
|
119343
|
+
parallelism: conduitConfiguration.parallelism ?? null,
|
|
119344
|
+
root: configuration.rootDirectory
|
|
119345
|
+
});
|
|
119346
|
+
attachLifecycleListeners();
|
|
119347
|
+
if (sourceState === "idle") {
|
|
119348
|
+
logger.info("Restarting engine from idle");
|
|
119349
|
+
}
|
|
119350
|
+
else {
|
|
119351
|
+
logger.info("Restarting engine");
|
|
119352
|
+
}
|
|
119353
|
+
await startEngine();
|
|
119354
|
+
}
|
|
119134
119355
|
if (configuration.startMode === "idle") {
|
|
119135
119356
|
setIdleState({ reason: "Startup mode is idle" });
|
|
119136
119357
|
}
|
|
@@ -119143,165 +119364,47 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119143
119364
|
setErrorState({ error: parsedError.message });
|
|
119144
119365
|
});
|
|
119145
119366
|
}
|
|
119146
|
-
// #region API routes
|
|
119147
119367
|
const app = express();
|
|
119148
119368
|
const publicRouter = createRouter();
|
|
119149
119369
|
app.use(publicRouter);
|
|
119150
|
-
publicRouter.get("/health", (
|
|
119151
|
-
res.status(200).send("OK");
|
|
119152
|
-
});
|
|
119370
|
+
publicRouter.get("/health", createHealthHandler());
|
|
119153
119371
|
implementAPIReference({
|
|
119154
119372
|
api: {
|
|
119373
|
+
"/conduit/engine/cycle": {
|
|
119374
|
+
POST: createPostCycleEngineHandler({
|
|
119375
|
+
cycleEngine,
|
|
119376
|
+
conduitStateManager,
|
|
119377
|
+
getModelManager: () => modelManager,
|
|
119378
|
+
logger,
|
|
119379
|
+
setErrorState,
|
|
119380
|
+
startEngine,
|
|
119381
|
+
stopEngine,
|
|
119382
|
+
stopRequestedByControl: () => stopRequestedByControl
|
|
119383
|
+
})
|
|
119384
|
+
},
|
|
119155
119385
|
"/conduit/engine/start": {
|
|
119156
|
-
POST:
|
|
119157
|
-
|
|
119158
|
-
|
|
119159
|
-
|
|
119160
|
-
|
|
119161
|
-
|
|
119162
|
-
|
|
119163
|
-
|
|
119164
|
-
|
|
119165
|
-
|
|
119166
|
-
statusText: `Engine cannot be started from current state: ${modelManager.state}`
|
|
119167
|
-
};
|
|
119168
|
-
}
|
|
119169
|
-
try {
|
|
119170
|
-
logger.info("Received remote engine start request");
|
|
119171
|
-
await startEngine();
|
|
119172
|
-
return {
|
|
119173
|
-
body: {
|
|
119174
|
-
acknowledged: true
|
|
119175
|
-
},
|
|
119176
|
-
status: 202
|
|
119177
|
-
};
|
|
119178
|
-
}
|
|
119179
|
-
catch (error) {
|
|
119180
|
-
if (stopRequestedByControl || modelManager.state === "stopped") {
|
|
119181
|
-
return {
|
|
119182
|
-
status: 409,
|
|
119183
|
-
statusText: "Engine start was interrupted"
|
|
119184
|
-
};
|
|
119185
|
-
}
|
|
119186
|
-
const parsedError = asError(error);
|
|
119187
|
-
setErrorState({ error: parsedError.message });
|
|
119188
|
-
return {
|
|
119189
|
-
status: 500,
|
|
119190
|
-
statusText: parsedError.message
|
|
119191
|
-
};
|
|
119192
|
-
}
|
|
119193
|
-
}
|
|
119386
|
+
POST: createPostStartEngineHandler({
|
|
119387
|
+
cycleEngine,
|
|
119388
|
+
conduitStateManager,
|
|
119389
|
+
getModelManager: () => modelManager,
|
|
119390
|
+
logger,
|
|
119391
|
+
setErrorState,
|
|
119392
|
+
startEngine,
|
|
119393
|
+
stopEngine,
|
|
119394
|
+
stopRequestedByControl: () => stopRequestedByControl
|
|
119395
|
+
})
|
|
119194
119396
|
},
|
|
119195
119397
|
"/conduit/engine/stop": {
|
|
119196
|
-
POST:
|
|
119197
|
-
|
|
119198
|
-
|
|
119199
|
-
|
|
119200
|
-
|
|
119201
|
-
|
|
119202
|
-
|
|
119203
|
-
|
|
119204
|
-
|
|
119205
|
-
|
|
119206
|
-
status: 409,
|
|
119207
|
-
statusText: `Engine cannot be stopped from current state: ${modelManager.state}`
|
|
119208
|
-
};
|
|
119209
|
-
}
|
|
119210
|
-
try {
|
|
119211
|
-
logger.info("Received remote engine stop request");
|
|
119212
|
-
stopEngine({
|
|
119213
|
-
reason: "Remote shutdown requested"
|
|
119214
|
-
}).catch(error => {
|
|
119215
|
-
const parsedError = asError(error);
|
|
119216
|
-
logger.error("Remote engine stop request failed", {
|
|
119217
|
-
error: parsedError
|
|
119218
|
-
});
|
|
119219
|
-
setErrorState({ error: parsedError.message });
|
|
119220
|
-
});
|
|
119221
|
-
return {
|
|
119222
|
-
body: {
|
|
119223
|
-
acknowledged: true
|
|
119224
|
-
},
|
|
119225
|
-
status: 202
|
|
119226
|
-
};
|
|
119227
|
-
}
|
|
119228
|
-
catch (error) {
|
|
119229
|
-
const parsedError = asError(error);
|
|
119230
|
-
setErrorState({ error: parsedError.message });
|
|
119231
|
-
return {
|
|
119232
|
-
status: 500,
|
|
119233
|
-
statusText: parsedError.message
|
|
119234
|
-
};
|
|
119235
|
-
}
|
|
119236
|
-
}
|
|
119237
|
-
},
|
|
119238
|
-
"/conduit/engine/cycle": {
|
|
119239
|
-
POST: async ({ body }) => {
|
|
119240
|
-
const sourceState = conduitStateManager.getState().state;
|
|
119241
|
-
if (sourceState !== "bootingEngine" &&
|
|
119242
|
-
sourceState !== "online" &&
|
|
119243
|
-
sourceState !== "idle") {
|
|
119244
|
-
return {
|
|
119245
|
-
status: 409,
|
|
119246
|
-
statusText: "Engine can only be cycled while booting, online, or idle"
|
|
119247
|
-
};
|
|
119248
|
-
}
|
|
119249
|
-
if (sourceState !== "idle" && !modelManager.canStop) {
|
|
119250
|
-
return {
|
|
119251
|
-
status: 409,
|
|
119252
|
-
statusText: `Engine cannot be cycled from current state: ${modelManager.state}`
|
|
119253
|
-
};
|
|
119254
|
-
}
|
|
119255
|
-
try {
|
|
119256
|
-
logger.info("Received remote engine cycle request");
|
|
119257
|
-
const sourceState = conduitStateManager.getState().state;
|
|
119258
|
-
if (sourceState !== "idle") {
|
|
119259
|
-
await stopEngine({
|
|
119260
|
-
reason: "Remote cycle requested"
|
|
119261
|
-
});
|
|
119262
|
-
}
|
|
119263
|
-
logger.info("Fetching new configuration");
|
|
119264
|
-
const newConduitConfiguration = await apiClient.getConduitConfiguration();
|
|
119265
|
-
logger.info("Received new configuration", {
|
|
119266
|
-
modelID: newConduitConfiguration.targetModel.id
|
|
119267
|
-
});
|
|
119268
|
-
logger.info("Updating configuration and model manager");
|
|
119269
|
-
conduitConfiguration = newConduitConfiguration;
|
|
119270
|
-
modelFileName = getConduitModelFileName(conduitConfiguration);
|
|
119271
|
-
modelName = getConduitModelName(conduitConfiguration);
|
|
119272
|
-
modelManager = new ModelManager({
|
|
119273
|
-
contextLength: conduitConfiguration.contextLength ?? null,
|
|
119274
|
-
engine: configuration.agentEngineType,
|
|
119275
|
-
logger,
|
|
119276
|
-
model: conduitConfiguration.targetModel,
|
|
119277
|
-
parallelism: conduitConfiguration.parallelism ?? null,
|
|
119278
|
-
root: configuration.rootDirectory
|
|
119279
|
-
});
|
|
119280
|
-
attachLifecycleListeners();
|
|
119281
|
-
if (sourceState === "idle") {
|
|
119282
|
-
logger.info("Restarting engine from idle");
|
|
119283
|
-
await startEngine();
|
|
119284
|
-
}
|
|
119285
|
-
else {
|
|
119286
|
-
logger.info("Restarting engine");
|
|
119287
|
-
await startEngine();
|
|
119288
|
-
}
|
|
119289
|
-
return {
|
|
119290
|
-
body: {
|
|
119291
|
-
acknowledged: true
|
|
119292
|
-
},
|
|
119293
|
-
status: 202
|
|
119294
|
-
};
|
|
119295
|
-
}
|
|
119296
|
-
catch (error) {
|
|
119297
|
-
const parsedError = asError(error);
|
|
119298
|
-
setErrorState({ error: parsedError.message });
|
|
119299
|
-
return {
|
|
119300
|
-
status: 500,
|
|
119301
|
-
statusText: parsedError.message
|
|
119302
|
-
};
|
|
119303
|
-
}
|
|
119304
|
-
}
|
|
119398
|
+
POST: createPostStopEngineHandler({
|
|
119399
|
+
cycleEngine,
|
|
119400
|
+
conduitStateManager,
|
|
119401
|
+
getModelManager: () => modelManager,
|
|
119402
|
+
logger,
|
|
119403
|
+
setErrorState,
|
|
119404
|
+
startEngine,
|
|
119405
|
+
stopEngine,
|
|
119406
|
+
stopRequestedByControl: () => stopRequestedByControl
|
|
119407
|
+
})
|
|
119305
119408
|
}
|
|
119306
119409
|
},
|
|
119307
119410
|
logger,
|
|
@@ -119311,56 +119414,34 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119311
119414
|
implementAPIReference({
|
|
119312
119415
|
api: {
|
|
119313
119416
|
"/v1/chat/completions": {
|
|
119314
|
-
POST:
|
|
119315
|
-
|
|
119316
|
-
|
|
119317
|
-
|
|
119318
|
-
|
|
119319
|
-
|
|
119320
|
-
|
|
119321
|
-
|
|
119322
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
119323
|
-
});
|
|
119324
|
-
}
|
|
119417
|
+
POST: createPostChatCompletionsHandler({
|
|
119418
|
+
apiClient,
|
|
119419
|
+
configuration,
|
|
119420
|
+
getModelID: () => conduitConfiguration.targetModel.id,
|
|
119421
|
+
getModelManager: () => modelManager,
|
|
119422
|
+
logger,
|
|
119423
|
+
startup
|
|
119424
|
+
})
|
|
119325
119425
|
},
|
|
119326
119426
|
"/v1/completions": {
|
|
119327
|
-
POST:
|
|
119328
|
-
|
|
119329
|
-
|
|
119330
|
-
|
|
119331
|
-
|
|
119332
|
-
|
|
119333
|
-
|
|
119334
|
-
|
|
119335
|
-
reportMetrics: apiClient.reportPromptMetrics
|
|
119336
|
-
});
|
|
119337
|
-
}
|
|
119427
|
+
POST: createPostCompletionsHandler({
|
|
119428
|
+
apiClient,
|
|
119429
|
+
configuration,
|
|
119430
|
+
getModelID: () => conduitConfiguration.targetModel.id,
|
|
119431
|
+
getModelManager: () => modelManager,
|
|
119432
|
+
logger,
|
|
119433
|
+
startup
|
|
119434
|
+
})
|
|
119338
119435
|
},
|
|
119339
119436
|
"/v1/models": {
|
|
119340
|
-
GET:
|
|
119341
|
-
|
|
119342
|
-
|
|
119343
|
-
|
|
119344
|
-
|
|
119345
|
-
|
|
119346
|
-
|
|
119347
|
-
|
|
119348
|
-
object: "list",
|
|
119349
|
-
data: [
|
|
119350
|
-
{
|
|
119351
|
-
id: conduitConfiguration.targetModel.id,
|
|
119352
|
-
object: "model",
|
|
119353
|
-
created: startup / 1000,
|
|
119354
|
-
owned_by: "infersec",
|
|
119355
|
-
limit: {
|
|
119356
|
-
context: effectiveContextLength
|
|
119357
|
-
}
|
|
119358
|
-
}
|
|
119359
|
-
]
|
|
119360
|
-
},
|
|
119361
|
-
status: 200
|
|
119362
|
-
};
|
|
119363
|
-
}
|
|
119437
|
+
GET: createGetModelsHandler({
|
|
119438
|
+
apiClient,
|
|
119439
|
+
configuration,
|
|
119440
|
+
getModelID: () => conduitConfiguration.targetModel.id,
|
|
119441
|
+
getModelManager: () => modelManager,
|
|
119442
|
+
logger,
|
|
119443
|
+
startup
|
|
119444
|
+
})
|
|
119364
119445
|
}
|
|
119365
119446
|
},
|
|
119366
119447
|
logger,
|
|
@@ -119440,7 +119521,6 @@ async function createApplication({ abortController, apiClient, configuration, lo
|
|
|
119440
119521
|
app,
|
|
119441
119522
|
shutdown
|
|
119442
119523
|
};
|
|
119443
|
-
// #endregion
|
|
119444
119524
|
}
|
|
119445
119525
|
function getConduitModelFileName(configuration) {
|
|
119446
119526
|
const { source } = configuration.targetModel;
|