@llmops/app 0.3.2 → 0.3.4-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +124 -42
- package/dist/index.mjs +124 -42
- package/package.json +3 -3
package/dist/index.cjs
CHANGED
|
@@ -16432,7 +16432,8 @@ async function handleDirectProviderRequest(c, next, originalBody, providerSlug,
|
|
|
16432
16432
|
...originalBody,
|
|
16433
16433
|
model: modelName
|
|
16434
16434
|
};
|
|
16435
|
-
|
|
16435
|
+
const path$1 = c.req.path;
|
|
16436
|
+
if (path$1.endsWith("/chat/completions") || path$1.endsWith("/completions")) delete updatedBody.input;
|
|
16436
16437
|
const newHeaders = new Headers(c.req.raw.headers);
|
|
16437
16438
|
newHeaders.set("x-llmops-config", JSON.stringify(portkeyConfig));
|
|
16438
16439
|
if (portkeyConfig.default_input_guardrails) newHeaders.set("x-portkey-default-input-guardrails", JSON.stringify(portkeyConfig.default_input_guardrails));
|
|
@@ -16474,7 +16475,7 @@ const createGatewayAdapterMiddleware = () => {
|
|
|
16474
16475
|
const method = c.req.method;
|
|
16475
16476
|
const contentType = c.req.header("content-type")?.split(";")[0];
|
|
16476
16477
|
const isChatRequest = method === "POST" && contentType === "application/json" && (path$1.endsWith("/chat/completions") || path$1.endsWith("/completions"));
|
|
16477
|
-
if (!configId &&
|
|
16478
|
+
if (!configId && method === "POST" && contentType === "application/json") {
|
|
16478
16479
|
try {
|
|
16479
16480
|
const body = await c.req.json();
|
|
16480
16481
|
const model = body.model;
|
|
@@ -16613,12 +16614,49 @@ const createGatewayAdapterMiddleware = () => {
|
|
|
16613
16614
|
*/
|
|
16614
16615
|
function createStreamingCostExtractor() {
|
|
16615
16616
|
let extractedUsage = null;
|
|
16617
|
+
let extractedHookResults = void 0;
|
|
16616
16618
|
let buffer = "";
|
|
16617
16619
|
let resolveUsage;
|
|
16618
16620
|
const usagePromise = new Promise((resolve) => {
|
|
16619
16621
|
resolveUsage = resolve;
|
|
16620
16622
|
});
|
|
16621
16623
|
const decoder = new TextDecoder();
|
|
16624
|
+
/**
|
|
16625
|
+
* Parse an SSE message and extract usage/hook_results
|
|
16626
|
+
*/
|
|
16627
|
+
function parseSSEMessage(message) {
|
|
16628
|
+
const trimmed = message.trim();
|
|
16629
|
+
if (!trimmed) return;
|
|
16630
|
+
const lines = trimmed.split("\n");
|
|
16631
|
+
let eventType = null;
|
|
16632
|
+
let dataLine = null;
|
|
16633
|
+
for (const line of lines) if (line.startsWith("event:")) eventType = line.slice(6).trim();
|
|
16634
|
+
else if (line.startsWith("data:")) dataLine = line.slice(5).trim();
|
|
16635
|
+
if (dataLine === "[DONE]") return;
|
|
16636
|
+
if (!dataLine) return;
|
|
16637
|
+
try {
|
|
16638
|
+
const parsed = JSON.parse(dataLine);
|
|
16639
|
+
if (eventType === "hook_results" || parsed.hook_results) {
|
|
16640
|
+
const hookData = parsed.hook_results || parsed;
|
|
16641
|
+
if (hookData.before_request_hooks || hookData.after_request_hooks) extractedHookResults = {
|
|
16642
|
+
before_request_hooks: hookData.before_request_hooks,
|
|
16643
|
+
after_request_hooks: hookData.after_request_hooks
|
|
16644
|
+
};
|
|
16645
|
+
}
|
|
16646
|
+
const usageData = parsed;
|
|
16647
|
+
if (usageData.usage) {
|
|
16648
|
+
const promptTokens = usageData.usage.prompt_tokens ?? usageData.usage.input_tokens ?? 0;
|
|
16649
|
+
const completionTokens = usageData.usage.completion_tokens ?? usageData.usage.output_tokens ?? 0;
|
|
16650
|
+
extractedUsage = {
|
|
16651
|
+
promptTokens,
|
|
16652
|
+
completionTokens,
|
|
16653
|
+
totalTokens: usageData.usage.total_tokens ?? promptTokens + completionTokens,
|
|
16654
|
+
cachedTokens: usageData.usage.prompt_tokens_details?.cached_tokens ?? usageData.usage.input_tokens_details?.cached_tokens,
|
|
16655
|
+
hookResults: extractedHookResults
|
|
16656
|
+
};
|
|
16657
|
+
}
|
|
16658
|
+
} catch {}
|
|
16659
|
+
}
|
|
16622
16660
|
return {
|
|
16623
16661
|
stream: new TransformStream({
|
|
16624
16662
|
transform(chunk, controller) {
|
|
@@ -16627,39 +16665,17 @@ function createStreamingCostExtractor() {
|
|
|
16627
16665
|
buffer += text;
|
|
16628
16666
|
const messages = buffer.split("\n\n");
|
|
16629
16667
|
buffer = messages.pop() || "";
|
|
16630
|
-
for (const message of messages)
|
|
16631
|
-
const trimmed = message.trim();
|
|
16632
|
-
if (!trimmed) continue;
|
|
16633
|
-
if (!trimmed.startsWith("data:")) continue;
|
|
16634
|
-
const jsonPart = trimmed.slice(5).trim();
|
|
16635
|
-
if (jsonPart === "[DONE]") continue;
|
|
16636
|
-
try {
|
|
16637
|
-
const parsed = JSON.parse(jsonPart);
|
|
16638
|
-
if (parsed.usage) extractedUsage = {
|
|
16639
|
-
promptTokens: parsed.usage.prompt_tokens ?? 0,
|
|
16640
|
-
completionTokens: parsed.usage.completion_tokens ?? 0,
|
|
16641
|
-
totalTokens: parsed.usage.total_tokens ?? 0,
|
|
16642
|
-
cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
|
|
16643
|
-
};
|
|
16644
|
-
} catch {}
|
|
16645
|
-
}
|
|
16668
|
+
for (const message of messages) parseSSEMessage(message);
|
|
16646
16669
|
},
|
|
16647
16670
|
flush(controller) {
|
|
16648
|
-
if (buffer.trim())
|
|
16649
|
-
|
|
16650
|
-
|
|
16651
|
-
|
|
16652
|
-
|
|
16653
|
-
|
|
16654
|
-
|
|
16655
|
-
|
|
16656
|
-
completionTokens: parsed.usage.completion_tokens ?? 0,
|
|
16657
|
-
totalTokens: parsed.usage.total_tokens ?? 0,
|
|
16658
|
-
cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
|
|
16659
|
-
};
|
|
16660
|
-
} catch {}
|
|
16661
|
-
}
|
|
16662
|
-
}
|
|
16671
|
+
if (buffer.trim()) parseSSEMessage(buffer);
|
|
16672
|
+
if (extractedUsage && extractedHookResults) extractedUsage.hookResults = extractedHookResults;
|
|
16673
|
+
else if (!extractedUsage && extractedHookResults) extractedUsage = {
|
|
16674
|
+
promptTokens: 0,
|
|
16675
|
+
completionTokens: 0,
|
|
16676
|
+
totalTokens: 0,
|
|
16677
|
+
hookResults: extractedHookResults
|
|
16678
|
+
};
|
|
16663
16679
|
resolveUsage(extractedUsage);
|
|
16664
16680
|
}
|
|
16665
16681
|
}),
|
|
@@ -16899,6 +16915,47 @@ var PricingProvider = class {
|
|
|
16899
16915
|
};
|
|
16900
16916
|
const pricingProvider = new PricingProvider();
|
|
16901
16917
|
/**
|
|
16918
|
+
* Transform gateway hook results to our schema format for telemetry
|
|
16919
|
+
*/
|
|
16920
|
+
function transformHookResultsToGuardrailResults(hookResults, wasBlocked) {
|
|
16921
|
+
if (!hookResults) return null;
|
|
16922
|
+
const beforeHooks = hookResults.before_request_hooks || [];
|
|
16923
|
+
const afterHooks = hookResults.after_request_hooks || [];
|
|
16924
|
+
if (beforeHooks.length === 0 && afterHooks.length === 0) return null;
|
|
16925
|
+
const results = [];
|
|
16926
|
+
let totalLatencyMs = 0;
|
|
16927
|
+
for (const hook of beforeHooks) {
|
|
16928
|
+
totalLatencyMs += hook.execution_time;
|
|
16929
|
+
for (const check$1 of hook.checks) results.push({
|
|
16930
|
+
checkId: check$1.id,
|
|
16931
|
+
functionId: check$1.id.split(".")[1] || check$1.id,
|
|
16932
|
+
hookType: "beforeRequestHook",
|
|
16933
|
+
verdict: check$1.verdict,
|
|
16934
|
+
latencyMs: check$1.execution_time
|
|
16935
|
+
});
|
|
16936
|
+
}
|
|
16937
|
+
for (const hook of afterHooks) {
|
|
16938
|
+
totalLatencyMs += hook.execution_time;
|
|
16939
|
+
for (const check$1 of hook.checks) results.push({
|
|
16940
|
+
checkId: check$1.id,
|
|
16941
|
+
functionId: check$1.id.split(".")[1] || check$1.id,
|
|
16942
|
+
hookType: "afterRequestHook",
|
|
16943
|
+
verdict: check$1.verdict,
|
|
16944
|
+
latencyMs: check$1.execution_time
|
|
16945
|
+
});
|
|
16946
|
+
}
|
|
16947
|
+
const anyFailed = results.some((r) => !r.verdict);
|
|
16948
|
+
let action;
|
|
16949
|
+
if (wasBlocked) action = "blocked";
|
|
16950
|
+
else if (anyFailed) action = "logged";
|
|
16951
|
+
else action = "allowed";
|
|
16952
|
+
return {
|
|
16953
|
+
results,
|
|
16954
|
+
action,
|
|
16955
|
+
totalLatencyMs
|
|
16956
|
+
};
|
|
16957
|
+
}
|
|
16958
|
+
/**
|
|
16902
16959
|
* Creates cost tracking middleware that logs LLM requests with usage and cost data.
|
|
16903
16960
|
*
|
|
16904
16961
|
* Features:
|
|
@@ -16913,7 +16970,18 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16913
16970
|
return async (c, next) => {
|
|
16914
16971
|
if (!enabled) return next();
|
|
16915
16972
|
const path$1 = c.req.path;
|
|
16916
|
-
if (!
|
|
16973
|
+
if (![
|
|
16974
|
+
"/chat/completions",
|
|
16975
|
+
"/completions",
|
|
16976
|
+
"/responses",
|
|
16977
|
+
"/embeddings",
|
|
16978
|
+
"/images/generations",
|
|
16979
|
+
"/images/edits",
|
|
16980
|
+
"/audio/speech",
|
|
16981
|
+
"/audio/transcriptions",
|
|
16982
|
+
"/audio/translations",
|
|
16983
|
+
"/messages"
|
|
16984
|
+
].some((endpoint) => path$1.endsWith(endpoint) || endpoint === "/responses" && path$1.match(/\/responses\/[^/]+$/))) return next();
|
|
16917
16985
|
const requestId = (0, node_crypto.randomUUID)();
|
|
16918
16986
|
const startTime = Date.now();
|
|
16919
16987
|
c.header("x-llmops-request-id", requestId);
|
|
@@ -16976,6 +17044,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16976
17044
|
const { response: wrappedResponse, usagePromise } = wrapStreamingResponse(response);
|
|
16977
17045
|
c.res = wrappedResponse;
|
|
16978
17046
|
usagePromise.then(async (usage) => {
|
|
17047
|
+
const guardrailResults = usage?.hookResults ? transformHookResultsToGuardrailResults(usage.hookResults, statusCode === 446) : null;
|
|
16979
17048
|
await processUsageAndLog({
|
|
16980
17049
|
requestId,
|
|
16981
17050
|
provider,
|
|
@@ -16994,6 +17063,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16994
17063
|
totalTokens: usage.totalTokens,
|
|
16995
17064
|
cachedTokens: usage.cachedTokens
|
|
16996
17065
|
} : null,
|
|
17066
|
+
guardrailResults,
|
|
16997
17067
|
tags: customTags,
|
|
16998
17068
|
batchWriter,
|
|
16999
17069
|
trackErrors,
|
|
@@ -17004,14 +17074,24 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
17004
17074
|
});
|
|
17005
17075
|
} else {
|
|
17006
17076
|
let usage = null;
|
|
17077
|
+
let guardrailResults = null;
|
|
17007
17078
|
try {
|
|
17008
17079
|
const responseBody = await response.clone().json();
|
|
17009
|
-
if (responseBody.usage)
|
|
17010
|
-
promptTokens
|
|
17011
|
-
completionTokens
|
|
17012
|
-
|
|
17013
|
-
|
|
17014
|
-
|
|
17080
|
+
if (responseBody.usage) {
|
|
17081
|
+
const promptTokens = responseBody.usage.prompt_tokens ?? responseBody.usage.input_tokens ?? 0;
|
|
17082
|
+
const completionTokens = responseBody.usage.completion_tokens ?? responseBody.usage.output_tokens ?? 0;
|
|
17083
|
+
usage = {
|
|
17084
|
+
promptTokens,
|
|
17085
|
+
completionTokens,
|
|
17086
|
+
totalTokens: responseBody.usage.total_tokens || promptTokens + completionTokens,
|
|
17087
|
+
cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens ?? responseBody.usage.input_tokens_details?.cached_tokens
|
|
17088
|
+
};
|
|
17089
|
+
}
|
|
17090
|
+
if (responseBody.hook_results) {
|
|
17091
|
+
const wasBlocked = statusCode === 446;
|
|
17092
|
+
guardrailResults = transformHookResultsToGuardrailResults(responseBody.hook_results, wasBlocked);
|
|
17093
|
+
if (guardrailResults) log(`Extracted guardrail results: ${guardrailResults.results.length} checks, action=${guardrailResults.action}`);
|
|
17094
|
+
}
|
|
17015
17095
|
} catch {
|
|
17016
17096
|
log("Failed to parse response body for usage");
|
|
17017
17097
|
}
|
|
@@ -17028,6 +17108,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
17028
17108
|
latencyMs,
|
|
17029
17109
|
isStreaming: false,
|
|
17030
17110
|
usage,
|
|
17111
|
+
guardrailResults,
|
|
17031
17112
|
tags: customTags,
|
|
17032
17113
|
batchWriter,
|
|
17033
17114
|
trackErrors,
|
|
@@ -17040,7 +17121,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
17040
17121
|
* Process usage data and log to batch writer
|
|
17041
17122
|
*/
|
|
17042
17123
|
async function processUsageAndLog(params) {
|
|
17043
|
-
const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, tags = {}, batchWriter, trackErrors, log } = params;
|
|
17124
|
+
const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, guardrailResults, tags = {}, batchWriter, trackErrors, log } = params;
|
|
17044
17125
|
if (!trackErrors && statusCode >= 400) {
|
|
17045
17126
|
log(`Skipping error response (${statusCode})`);
|
|
17046
17127
|
return;
|
|
@@ -17091,7 +17172,8 @@ async function processUsageAndLog(params) {
|
|
|
17091
17172
|
statusCode,
|
|
17092
17173
|
latencyMs,
|
|
17093
17174
|
isStreaming,
|
|
17094
|
-
tags
|
|
17175
|
+
tags,
|
|
17176
|
+
guardrailResults: guardrailResults || null
|
|
17095
17177
|
};
|
|
17096
17178
|
batchWriter.enqueue(requestData);
|
|
17097
17179
|
log(`Enqueued request ${requestId} for logging`);
|
package/dist/index.mjs
CHANGED
|
@@ -16404,7 +16404,8 @@ async function handleDirectProviderRequest(c, next, originalBody, providerSlug,
|
|
|
16404
16404
|
...originalBody,
|
|
16405
16405
|
model: modelName
|
|
16406
16406
|
};
|
|
16407
|
-
|
|
16407
|
+
const path = c.req.path;
|
|
16408
|
+
if (path.endsWith("/chat/completions") || path.endsWith("/completions")) delete updatedBody.input;
|
|
16408
16409
|
const newHeaders = new Headers(c.req.raw.headers);
|
|
16409
16410
|
newHeaders.set("x-llmops-config", JSON.stringify(portkeyConfig));
|
|
16410
16411
|
if (portkeyConfig.default_input_guardrails) newHeaders.set("x-portkey-default-input-guardrails", JSON.stringify(portkeyConfig.default_input_guardrails));
|
|
@@ -16446,7 +16447,7 @@ const createGatewayAdapterMiddleware = () => {
|
|
|
16446
16447
|
const method = c.req.method;
|
|
16447
16448
|
const contentType = c.req.header("content-type")?.split(";")[0];
|
|
16448
16449
|
const isChatRequest = method === "POST" && contentType === "application/json" && (path.endsWith("/chat/completions") || path.endsWith("/completions"));
|
|
16449
|
-
if (!configId &&
|
|
16450
|
+
if (!configId && method === "POST" && contentType === "application/json") {
|
|
16450
16451
|
try {
|
|
16451
16452
|
const body = await c.req.json();
|
|
16452
16453
|
const model = body.model;
|
|
@@ -16585,12 +16586,49 @@ const createGatewayAdapterMiddleware = () => {
|
|
|
16585
16586
|
*/
|
|
16586
16587
|
function createStreamingCostExtractor() {
|
|
16587
16588
|
let extractedUsage = null;
|
|
16589
|
+
let extractedHookResults = void 0;
|
|
16588
16590
|
let buffer = "";
|
|
16589
16591
|
let resolveUsage;
|
|
16590
16592
|
const usagePromise = new Promise((resolve) => {
|
|
16591
16593
|
resolveUsage = resolve;
|
|
16592
16594
|
});
|
|
16593
16595
|
const decoder = new TextDecoder();
|
|
16596
|
+
/**
|
|
16597
|
+
* Parse an SSE message and extract usage/hook_results
|
|
16598
|
+
*/
|
|
16599
|
+
function parseSSEMessage(message) {
|
|
16600
|
+
const trimmed = message.trim();
|
|
16601
|
+
if (!trimmed) return;
|
|
16602
|
+
const lines = trimmed.split("\n");
|
|
16603
|
+
let eventType = null;
|
|
16604
|
+
let dataLine = null;
|
|
16605
|
+
for (const line of lines) if (line.startsWith("event:")) eventType = line.slice(6).trim();
|
|
16606
|
+
else if (line.startsWith("data:")) dataLine = line.slice(5).trim();
|
|
16607
|
+
if (dataLine === "[DONE]") return;
|
|
16608
|
+
if (!dataLine) return;
|
|
16609
|
+
try {
|
|
16610
|
+
const parsed = JSON.parse(dataLine);
|
|
16611
|
+
if (eventType === "hook_results" || parsed.hook_results) {
|
|
16612
|
+
const hookData = parsed.hook_results || parsed;
|
|
16613
|
+
if (hookData.before_request_hooks || hookData.after_request_hooks) extractedHookResults = {
|
|
16614
|
+
before_request_hooks: hookData.before_request_hooks,
|
|
16615
|
+
after_request_hooks: hookData.after_request_hooks
|
|
16616
|
+
};
|
|
16617
|
+
}
|
|
16618
|
+
const usageData = parsed;
|
|
16619
|
+
if (usageData.usage) {
|
|
16620
|
+
const promptTokens = usageData.usage.prompt_tokens ?? usageData.usage.input_tokens ?? 0;
|
|
16621
|
+
const completionTokens = usageData.usage.completion_tokens ?? usageData.usage.output_tokens ?? 0;
|
|
16622
|
+
extractedUsage = {
|
|
16623
|
+
promptTokens,
|
|
16624
|
+
completionTokens,
|
|
16625
|
+
totalTokens: usageData.usage.total_tokens ?? promptTokens + completionTokens,
|
|
16626
|
+
cachedTokens: usageData.usage.prompt_tokens_details?.cached_tokens ?? usageData.usage.input_tokens_details?.cached_tokens,
|
|
16627
|
+
hookResults: extractedHookResults
|
|
16628
|
+
};
|
|
16629
|
+
}
|
|
16630
|
+
} catch {}
|
|
16631
|
+
}
|
|
16594
16632
|
return {
|
|
16595
16633
|
stream: new TransformStream({
|
|
16596
16634
|
transform(chunk, controller) {
|
|
@@ -16599,39 +16637,17 @@ function createStreamingCostExtractor() {
|
|
|
16599
16637
|
buffer += text;
|
|
16600
16638
|
const messages = buffer.split("\n\n");
|
|
16601
16639
|
buffer = messages.pop() || "";
|
|
16602
|
-
for (const message of messages)
|
|
16603
|
-
const trimmed = message.trim();
|
|
16604
|
-
if (!trimmed) continue;
|
|
16605
|
-
if (!trimmed.startsWith("data:")) continue;
|
|
16606
|
-
const jsonPart = trimmed.slice(5).trim();
|
|
16607
|
-
if (jsonPart === "[DONE]") continue;
|
|
16608
|
-
try {
|
|
16609
|
-
const parsed = JSON.parse(jsonPart);
|
|
16610
|
-
if (parsed.usage) extractedUsage = {
|
|
16611
|
-
promptTokens: parsed.usage.prompt_tokens ?? 0,
|
|
16612
|
-
completionTokens: parsed.usage.completion_tokens ?? 0,
|
|
16613
|
-
totalTokens: parsed.usage.total_tokens ?? 0,
|
|
16614
|
-
cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
|
|
16615
|
-
};
|
|
16616
|
-
} catch {}
|
|
16617
|
-
}
|
|
16640
|
+
for (const message of messages) parseSSEMessage(message);
|
|
16618
16641
|
},
|
|
16619
16642
|
flush(controller) {
|
|
16620
|
-
if (buffer.trim())
|
|
16621
|
-
|
|
16622
|
-
|
|
16623
|
-
|
|
16624
|
-
|
|
16625
|
-
|
|
16626
|
-
|
|
16627
|
-
|
|
16628
|
-
completionTokens: parsed.usage.completion_tokens ?? 0,
|
|
16629
|
-
totalTokens: parsed.usage.total_tokens ?? 0,
|
|
16630
|
-
cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
|
|
16631
|
-
};
|
|
16632
|
-
} catch {}
|
|
16633
|
-
}
|
|
16634
|
-
}
|
|
16643
|
+
if (buffer.trim()) parseSSEMessage(buffer);
|
|
16644
|
+
if (extractedUsage && extractedHookResults) extractedUsage.hookResults = extractedHookResults;
|
|
16645
|
+
else if (!extractedUsage && extractedHookResults) extractedUsage = {
|
|
16646
|
+
promptTokens: 0,
|
|
16647
|
+
completionTokens: 0,
|
|
16648
|
+
totalTokens: 0,
|
|
16649
|
+
hookResults: extractedHookResults
|
|
16650
|
+
};
|
|
16635
16651
|
resolveUsage(extractedUsage);
|
|
16636
16652
|
}
|
|
16637
16653
|
}),
|
|
@@ -16871,6 +16887,47 @@ var PricingProvider = class {
|
|
|
16871
16887
|
};
|
|
16872
16888
|
const pricingProvider = new PricingProvider();
|
|
16873
16889
|
/**
|
|
16890
|
+
* Transform gateway hook results to our schema format for telemetry
|
|
16891
|
+
*/
|
|
16892
|
+
function transformHookResultsToGuardrailResults(hookResults, wasBlocked) {
|
|
16893
|
+
if (!hookResults) return null;
|
|
16894
|
+
const beforeHooks = hookResults.before_request_hooks || [];
|
|
16895
|
+
const afterHooks = hookResults.after_request_hooks || [];
|
|
16896
|
+
if (beforeHooks.length === 0 && afterHooks.length === 0) return null;
|
|
16897
|
+
const results = [];
|
|
16898
|
+
let totalLatencyMs = 0;
|
|
16899
|
+
for (const hook of beforeHooks) {
|
|
16900
|
+
totalLatencyMs += hook.execution_time;
|
|
16901
|
+
for (const check$1 of hook.checks) results.push({
|
|
16902
|
+
checkId: check$1.id,
|
|
16903
|
+
functionId: check$1.id.split(".")[1] || check$1.id,
|
|
16904
|
+
hookType: "beforeRequestHook",
|
|
16905
|
+
verdict: check$1.verdict,
|
|
16906
|
+
latencyMs: check$1.execution_time
|
|
16907
|
+
});
|
|
16908
|
+
}
|
|
16909
|
+
for (const hook of afterHooks) {
|
|
16910
|
+
totalLatencyMs += hook.execution_time;
|
|
16911
|
+
for (const check$1 of hook.checks) results.push({
|
|
16912
|
+
checkId: check$1.id,
|
|
16913
|
+
functionId: check$1.id.split(".")[1] || check$1.id,
|
|
16914
|
+
hookType: "afterRequestHook",
|
|
16915
|
+
verdict: check$1.verdict,
|
|
16916
|
+
latencyMs: check$1.execution_time
|
|
16917
|
+
});
|
|
16918
|
+
}
|
|
16919
|
+
const anyFailed = results.some((r) => !r.verdict);
|
|
16920
|
+
let action;
|
|
16921
|
+
if (wasBlocked) action = "blocked";
|
|
16922
|
+
else if (anyFailed) action = "logged";
|
|
16923
|
+
else action = "allowed";
|
|
16924
|
+
return {
|
|
16925
|
+
results,
|
|
16926
|
+
action,
|
|
16927
|
+
totalLatencyMs
|
|
16928
|
+
};
|
|
16929
|
+
}
|
|
16930
|
+
/**
|
|
16874
16931
|
* Creates cost tracking middleware that logs LLM requests with usage and cost data.
|
|
16875
16932
|
*
|
|
16876
16933
|
* Features:
|
|
@@ -16885,7 +16942,18 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16885
16942
|
return async (c, next) => {
|
|
16886
16943
|
if (!enabled) return next();
|
|
16887
16944
|
const path = c.req.path;
|
|
16888
|
-
if (!
|
|
16945
|
+
if (![
|
|
16946
|
+
"/chat/completions",
|
|
16947
|
+
"/completions",
|
|
16948
|
+
"/responses",
|
|
16949
|
+
"/embeddings",
|
|
16950
|
+
"/images/generations",
|
|
16951
|
+
"/images/edits",
|
|
16952
|
+
"/audio/speech",
|
|
16953
|
+
"/audio/transcriptions",
|
|
16954
|
+
"/audio/translations",
|
|
16955
|
+
"/messages"
|
|
16956
|
+
].some((endpoint) => path.endsWith(endpoint) || endpoint === "/responses" && path.match(/\/responses\/[^/]+$/))) return next();
|
|
16889
16957
|
const requestId = randomUUID();
|
|
16890
16958
|
const startTime = Date.now();
|
|
16891
16959
|
c.header("x-llmops-request-id", requestId);
|
|
@@ -16948,6 +17016,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16948
17016
|
const { response: wrappedResponse, usagePromise } = wrapStreamingResponse(response);
|
|
16949
17017
|
c.res = wrappedResponse;
|
|
16950
17018
|
usagePromise.then(async (usage) => {
|
|
17019
|
+
const guardrailResults = usage?.hookResults ? transformHookResultsToGuardrailResults(usage.hookResults, statusCode === 446) : null;
|
|
16951
17020
|
await processUsageAndLog({
|
|
16952
17021
|
requestId,
|
|
16953
17022
|
provider,
|
|
@@ -16966,6 +17035,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16966
17035
|
totalTokens: usage.totalTokens,
|
|
16967
17036
|
cachedTokens: usage.cachedTokens
|
|
16968
17037
|
} : null,
|
|
17038
|
+
guardrailResults,
|
|
16969
17039
|
tags: customTags,
|
|
16970
17040
|
batchWriter,
|
|
16971
17041
|
trackErrors,
|
|
@@ -16976,14 +17046,24 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
16976
17046
|
});
|
|
16977
17047
|
} else {
|
|
16978
17048
|
let usage = null;
|
|
17049
|
+
let guardrailResults = null;
|
|
16979
17050
|
try {
|
|
16980
17051
|
const responseBody = await response.clone().json();
|
|
16981
|
-
if (responseBody.usage)
|
|
16982
|
-
promptTokens
|
|
16983
|
-
completionTokens
|
|
16984
|
-
|
|
16985
|
-
|
|
16986
|
-
|
|
17052
|
+
if (responseBody.usage) {
|
|
17053
|
+
const promptTokens = responseBody.usage.prompt_tokens ?? responseBody.usage.input_tokens ?? 0;
|
|
17054
|
+
const completionTokens = responseBody.usage.completion_tokens ?? responseBody.usage.output_tokens ?? 0;
|
|
17055
|
+
usage = {
|
|
17056
|
+
promptTokens,
|
|
17057
|
+
completionTokens,
|
|
17058
|
+
totalTokens: responseBody.usage.total_tokens || promptTokens + completionTokens,
|
|
17059
|
+
cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens ?? responseBody.usage.input_tokens_details?.cached_tokens
|
|
17060
|
+
};
|
|
17061
|
+
}
|
|
17062
|
+
if (responseBody.hook_results) {
|
|
17063
|
+
const wasBlocked = statusCode === 446;
|
|
17064
|
+
guardrailResults = transformHookResultsToGuardrailResults(responseBody.hook_results, wasBlocked);
|
|
17065
|
+
if (guardrailResults) log(`Extracted guardrail results: ${guardrailResults.results.length} checks, action=${guardrailResults.action}`);
|
|
17066
|
+
}
|
|
16987
17067
|
} catch {
|
|
16988
17068
|
log("Failed to parse response body for usage");
|
|
16989
17069
|
}
|
|
@@ -17000,6 +17080,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
17000
17080
|
latencyMs,
|
|
17001
17081
|
isStreaming: false,
|
|
17002
17082
|
usage,
|
|
17083
|
+
guardrailResults,
|
|
17003
17084
|
tags: customTags,
|
|
17004
17085
|
batchWriter,
|
|
17005
17086
|
trackErrors,
|
|
@@ -17012,7 +17093,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
|
|
|
17012
17093
|
* Process usage data and log to batch writer
|
|
17013
17094
|
*/
|
|
17014
17095
|
async function processUsageAndLog(params) {
|
|
17015
|
-
const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, tags = {}, batchWriter, trackErrors, log } = params;
|
|
17096
|
+
const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, guardrailResults, tags = {}, batchWriter, trackErrors, log } = params;
|
|
17016
17097
|
if (!trackErrors && statusCode >= 400) {
|
|
17017
17098
|
log(`Skipping error response (${statusCode})`);
|
|
17018
17099
|
return;
|
|
@@ -17063,7 +17144,8 @@ async function processUsageAndLog(params) {
|
|
|
17063
17144
|
statusCode,
|
|
17064
17145
|
latencyMs,
|
|
17065
17146
|
isStreaming,
|
|
17066
|
-
tags
|
|
17147
|
+
tags,
|
|
17148
|
+
guardrailResults: guardrailResults || null
|
|
17067
17149
|
};
|
|
17068
17150
|
batchWriter.enqueue(requestData);
|
|
17069
17151
|
log(`Enqueued request ${requestId} for logging`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@llmops/app",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4-beta.1",
|
|
4
4
|
"description": "LLMOps application with server and client",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -67,8 +67,8 @@
|
|
|
67
67
|
"react-aria-components": "^1.13.0",
|
|
68
68
|
"react-hook-form": "^7.68.0",
|
|
69
69
|
"recharts": "^3.6.0",
|
|
70
|
-
"@llmops/core": "^0.3.
|
|
71
|
-
"@llmops/gateway": "^0.3.
|
|
70
|
+
"@llmops/core": "^0.3.4-beta.1",
|
|
71
|
+
"@llmops/gateway": "^0.3.4-beta.1"
|
|
72
72
|
},
|
|
73
73
|
"peerDependencies": {
|
|
74
74
|
"react": "^19.2.1",
|