@llmops/app 0.3.2 → 0.3.4-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.cjs +124 -42
  2. package/dist/index.mjs +124 -42
  3. package/package.json +3 -3
package/dist/index.cjs CHANGED
@@ -16432,7 +16432,8 @@ async function handleDirectProviderRequest(c, next, originalBody, providerSlug,
16432
16432
  ...originalBody,
16433
16433
  model: modelName
16434
16434
  };
16435
- delete updatedBody.input;
16435
+ const path$1 = c.req.path;
16436
+ if (path$1.endsWith("/chat/completions") || path$1.endsWith("/completions")) delete updatedBody.input;
16436
16437
  const newHeaders = new Headers(c.req.raw.headers);
16437
16438
  newHeaders.set("x-llmops-config", JSON.stringify(portkeyConfig));
16438
16439
  if (portkeyConfig.default_input_guardrails) newHeaders.set("x-portkey-default-input-guardrails", JSON.stringify(portkeyConfig.default_input_guardrails));
@@ -16474,7 +16475,7 @@ const createGatewayAdapterMiddleware = () => {
16474
16475
  const method = c.req.method;
16475
16476
  const contentType = c.req.header("content-type")?.split(";")[0];
16476
16477
  const isChatRequest = method === "POST" && contentType === "application/json" && (path$1.endsWith("/chat/completions") || path$1.endsWith("/completions"));
16477
- if (!configId && isChatRequest) {
16478
+ if (!configId && method === "POST" && contentType === "application/json") {
16478
16479
  try {
16479
16480
  const body = await c.req.json();
16480
16481
  const model = body.model;
@@ -16613,12 +16614,49 @@ const createGatewayAdapterMiddleware = () => {
16613
16614
  */
16614
16615
  function createStreamingCostExtractor() {
16615
16616
  let extractedUsage = null;
16617
+ let extractedHookResults = void 0;
16616
16618
  let buffer = "";
16617
16619
  let resolveUsage;
16618
16620
  const usagePromise = new Promise((resolve) => {
16619
16621
  resolveUsage = resolve;
16620
16622
  });
16621
16623
  const decoder = new TextDecoder();
16624
+ /**
16625
+ * Parse an SSE message and extract usage/hook_results
16626
+ */
16627
+ function parseSSEMessage(message) {
16628
+ const trimmed = message.trim();
16629
+ if (!trimmed) return;
16630
+ const lines = trimmed.split("\n");
16631
+ let eventType = null;
16632
+ let dataLine = null;
16633
+ for (const line of lines) if (line.startsWith("event:")) eventType = line.slice(6).trim();
16634
+ else if (line.startsWith("data:")) dataLine = line.slice(5).trim();
16635
+ if (dataLine === "[DONE]") return;
16636
+ if (!dataLine) return;
16637
+ try {
16638
+ const parsed = JSON.parse(dataLine);
16639
+ if (eventType === "hook_results" || parsed.hook_results) {
16640
+ const hookData = parsed.hook_results || parsed;
16641
+ if (hookData.before_request_hooks || hookData.after_request_hooks) extractedHookResults = {
16642
+ before_request_hooks: hookData.before_request_hooks,
16643
+ after_request_hooks: hookData.after_request_hooks
16644
+ };
16645
+ }
16646
+ const usageData = parsed;
16647
+ if (usageData.usage) {
16648
+ const promptTokens = usageData.usage.prompt_tokens ?? usageData.usage.input_tokens ?? 0;
16649
+ const completionTokens = usageData.usage.completion_tokens ?? usageData.usage.output_tokens ?? 0;
16650
+ extractedUsage = {
16651
+ promptTokens,
16652
+ completionTokens,
16653
+ totalTokens: usageData.usage.total_tokens ?? promptTokens + completionTokens,
16654
+ cachedTokens: usageData.usage.prompt_tokens_details?.cached_tokens ?? usageData.usage.input_tokens_details?.cached_tokens,
16655
+ hookResults: extractedHookResults
16656
+ };
16657
+ }
16658
+ } catch {}
16659
+ }
16622
16660
  return {
16623
16661
  stream: new TransformStream({
16624
16662
  transform(chunk, controller) {
@@ -16627,39 +16665,17 @@ function createStreamingCostExtractor() {
16627
16665
  buffer += text;
16628
16666
  const messages = buffer.split("\n\n");
16629
16667
  buffer = messages.pop() || "";
16630
- for (const message of messages) {
16631
- const trimmed = message.trim();
16632
- if (!trimmed) continue;
16633
- if (!trimmed.startsWith("data:")) continue;
16634
- const jsonPart = trimmed.slice(5).trim();
16635
- if (jsonPart === "[DONE]") continue;
16636
- try {
16637
- const parsed = JSON.parse(jsonPart);
16638
- if (parsed.usage) extractedUsage = {
16639
- promptTokens: parsed.usage.prompt_tokens ?? 0,
16640
- completionTokens: parsed.usage.completion_tokens ?? 0,
16641
- totalTokens: parsed.usage.total_tokens ?? 0,
16642
- cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
16643
- };
16644
- } catch {}
16645
- }
16668
+ for (const message of messages) parseSSEMessage(message);
16646
16669
  },
16647
16670
  flush(controller) {
16648
- if (buffer.trim()) {
16649
- const trimmed = buffer.trim();
16650
- if (trimmed.startsWith("data:")) {
16651
- const jsonPart = trimmed.slice(5).trim();
16652
- if (jsonPart !== "[DONE]") try {
16653
- const parsed = JSON.parse(jsonPart);
16654
- if (parsed.usage) extractedUsage = {
16655
- promptTokens: parsed.usage.prompt_tokens ?? 0,
16656
- completionTokens: parsed.usage.completion_tokens ?? 0,
16657
- totalTokens: parsed.usage.total_tokens ?? 0,
16658
- cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
16659
- };
16660
- } catch {}
16661
- }
16662
- }
16671
+ if (buffer.trim()) parseSSEMessage(buffer);
16672
+ if (extractedUsage && extractedHookResults) extractedUsage.hookResults = extractedHookResults;
16673
+ else if (!extractedUsage && extractedHookResults) extractedUsage = {
16674
+ promptTokens: 0,
16675
+ completionTokens: 0,
16676
+ totalTokens: 0,
16677
+ hookResults: extractedHookResults
16678
+ };
16663
16679
  resolveUsage(extractedUsage);
16664
16680
  }
16665
16681
  }),
@@ -16899,6 +16915,47 @@ var PricingProvider = class {
16899
16915
  };
16900
16916
  const pricingProvider = new PricingProvider();
16901
16917
  /**
16918
+ * Transform gateway hook results to our schema format for telemetry
16919
+ */
16920
+ function transformHookResultsToGuardrailResults(hookResults, wasBlocked) {
16921
+ if (!hookResults) return null;
16922
+ const beforeHooks = hookResults.before_request_hooks || [];
16923
+ const afterHooks = hookResults.after_request_hooks || [];
16924
+ if (beforeHooks.length === 0 && afterHooks.length === 0) return null;
16925
+ const results = [];
16926
+ let totalLatencyMs = 0;
16927
+ for (const hook of beforeHooks) {
16928
+ totalLatencyMs += hook.execution_time;
16929
+ for (const check$1 of hook.checks) results.push({
16930
+ checkId: check$1.id,
16931
+ functionId: check$1.id.split(".")[1] || check$1.id,
16932
+ hookType: "beforeRequestHook",
16933
+ verdict: check$1.verdict,
16934
+ latencyMs: check$1.execution_time
16935
+ });
16936
+ }
16937
+ for (const hook of afterHooks) {
16938
+ totalLatencyMs += hook.execution_time;
16939
+ for (const check$1 of hook.checks) results.push({
16940
+ checkId: check$1.id,
16941
+ functionId: check$1.id.split(".")[1] || check$1.id,
16942
+ hookType: "afterRequestHook",
16943
+ verdict: check$1.verdict,
16944
+ latencyMs: check$1.execution_time
16945
+ });
16946
+ }
16947
+ const anyFailed = results.some((r) => !r.verdict);
16948
+ let action;
16949
+ if (wasBlocked) action = "blocked";
16950
+ else if (anyFailed) action = "logged";
16951
+ else action = "allowed";
16952
+ return {
16953
+ results,
16954
+ action,
16955
+ totalLatencyMs
16956
+ };
16957
+ }
16958
+ /**
16902
16959
  * Creates cost tracking middleware that logs LLM requests with usage and cost data.
16903
16960
  *
16904
16961
  * Features:
@@ -16913,7 +16970,18 @@ function createCostTrackingMiddleware(config$1 = {}) {
16913
16970
  return async (c, next) => {
16914
16971
  if (!enabled) return next();
16915
16972
  const path$1 = c.req.path;
16916
- if (!path$1.endsWith("/chat/completions") && !path$1.endsWith("/completions")) return next();
16973
+ if (![
16974
+ "/chat/completions",
16975
+ "/completions",
16976
+ "/responses",
16977
+ "/embeddings",
16978
+ "/images/generations",
16979
+ "/images/edits",
16980
+ "/audio/speech",
16981
+ "/audio/transcriptions",
16982
+ "/audio/translations",
16983
+ "/messages"
16984
+ ].some((endpoint) => path$1.endsWith(endpoint) || endpoint === "/responses" && path$1.match(/\/responses\/[^/]+$/))) return next();
16917
16985
  const requestId = (0, node_crypto.randomUUID)();
16918
16986
  const startTime = Date.now();
16919
16987
  c.header("x-llmops-request-id", requestId);
@@ -16976,6 +17044,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
16976
17044
  const { response: wrappedResponse, usagePromise } = wrapStreamingResponse(response);
16977
17045
  c.res = wrappedResponse;
16978
17046
  usagePromise.then(async (usage) => {
17047
+ const guardrailResults = usage?.hookResults ? transformHookResultsToGuardrailResults(usage.hookResults, statusCode === 446) : null;
16979
17048
  await processUsageAndLog({
16980
17049
  requestId,
16981
17050
  provider,
@@ -16994,6 +17063,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
16994
17063
  totalTokens: usage.totalTokens,
16995
17064
  cachedTokens: usage.cachedTokens
16996
17065
  } : null,
17066
+ guardrailResults,
16997
17067
  tags: customTags,
16998
17068
  batchWriter,
16999
17069
  trackErrors,
@@ -17004,14 +17074,24 @@ function createCostTrackingMiddleware(config$1 = {}) {
17004
17074
  });
17005
17075
  } else {
17006
17076
  let usage = null;
17077
+ let guardrailResults = null;
17007
17078
  try {
17008
17079
  const responseBody = await response.clone().json();
17009
- if (responseBody.usage) usage = {
17010
- promptTokens: responseBody.usage.prompt_tokens || 0,
17011
- completionTokens: responseBody.usage.completion_tokens || 0,
17012
- totalTokens: responseBody.usage.total_tokens || 0,
17013
- cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens
17014
- };
17080
+ if (responseBody.usage) {
17081
+ const promptTokens = responseBody.usage.prompt_tokens ?? responseBody.usage.input_tokens ?? 0;
17082
+ const completionTokens = responseBody.usage.completion_tokens ?? responseBody.usage.output_tokens ?? 0;
17083
+ usage = {
17084
+ promptTokens,
17085
+ completionTokens,
17086
+ totalTokens: responseBody.usage.total_tokens || promptTokens + completionTokens,
17087
+ cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens ?? responseBody.usage.input_tokens_details?.cached_tokens
17088
+ };
17089
+ }
17090
+ if (responseBody.hook_results) {
17091
+ const wasBlocked = statusCode === 446;
17092
+ guardrailResults = transformHookResultsToGuardrailResults(responseBody.hook_results, wasBlocked);
17093
+ if (guardrailResults) log(`Extracted guardrail results: ${guardrailResults.results.length} checks, action=${guardrailResults.action}`);
17094
+ }
17015
17095
  } catch {
17016
17096
  log("Failed to parse response body for usage");
17017
17097
  }
@@ -17028,6 +17108,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
17028
17108
  latencyMs,
17029
17109
  isStreaming: false,
17030
17110
  usage,
17111
+ guardrailResults,
17031
17112
  tags: customTags,
17032
17113
  batchWriter,
17033
17114
  trackErrors,
@@ -17040,7 +17121,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
17040
17121
  * Process usage data and log to batch writer
17041
17122
  */
17042
17123
  async function processUsageAndLog(params) {
17043
- const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, tags = {}, batchWriter, trackErrors, log } = params;
17124
+ const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, guardrailResults, tags = {}, batchWriter, trackErrors, log } = params;
17044
17125
  if (!trackErrors && statusCode >= 400) {
17045
17126
  log(`Skipping error response (${statusCode})`);
17046
17127
  return;
@@ -17091,7 +17172,8 @@ async function processUsageAndLog(params) {
17091
17172
  statusCode,
17092
17173
  latencyMs,
17093
17174
  isStreaming,
17094
- tags
17175
+ tags,
17176
+ guardrailResults: guardrailResults || null
17095
17177
  };
17096
17178
  batchWriter.enqueue(requestData);
17097
17179
  log(`Enqueued request ${requestId} for logging`);
package/dist/index.mjs CHANGED
@@ -16404,7 +16404,8 @@ async function handleDirectProviderRequest(c, next, originalBody, providerSlug,
16404
16404
  ...originalBody,
16405
16405
  model: modelName
16406
16406
  };
16407
- delete updatedBody.input;
16407
+ const path = c.req.path;
16408
+ if (path.endsWith("/chat/completions") || path.endsWith("/completions")) delete updatedBody.input;
16408
16409
  const newHeaders = new Headers(c.req.raw.headers);
16409
16410
  newHeaders.set("x-llmops-config", JSON.stringify(portkeyConfig));
16410
16411
  if (portkeyConfig.default_input_guardrails) newHeaders.set("x-portkey-default-input-guardrails", JSON.stringify(portkeyConfig.default_input_guardrails));
@@ -16446,7 +16447,7 @@ const createGatewayAdapterMiddleware = () => {
16446
16447
  const method = c.req.method;
16447
16448
  const contentType = c.req.header("content-type")?.split(";")[0];
16448
16449
  const isChatRequest = method === "POST" && contentType === "application/json" && (path.endsWith("/chat/completions") || path.endsWith("/completions"));
16449
- if (!configId && isChatRequest) {
16450
+ if (!configId && method === "POST" && contentType === "application/json") {
16450
16451
  try {
16451
16452
  const body = await c.req.json();
16452
16453
  const model = body.model;
@@ -16585,12 +16586,49 @@ const createGatewayAdapterMiddleware = () => {
16585
16586
  */
16586
16587
  function createStreamingCostExtractor() {
16587
16588
  let extractedUsage = null;
16589
+ let extractedHookResults = void 0;
16588
16590
  let buffer = "";
16589
16591
  let resolveUsage;
16590
16592
  const usagePromise = new Promise((resolve) => {
16591
16593
  resolveUsage = resolve;
16592
16594
  });
16593
16595
  const decoder = new TextDecoder();
16596
+ /**
16597
+ * Parse an SSE message and extract usage/hook_results
16598
+ */
16599
+ function parseSSEMessage(message) {
16600
+ const trimmed = message.trim();
16601
+ if (!trimmed) return;
16602
+ const lines = trimmed.split("\n");
16603
+ let eventType = null;
16604
+ let dataLine = null;
16605
+ for (const line of lines) if (line.startsWith("event:")) eventType = line.slice(6).trim();
16606
+ else if (line.startsWith("data:")) dataLine = line.slice(5).trim();
16607
+ if (dataLine === "[DONE]") return;
16608
+ if (!dataLine) return;
16609
+ try {
16610
+ const parsed = JSON.parse(dataLine);
16611
+ if (eventType === "hook_results" || parsed.hook_results) {
16612
+ const hookData = parsed.hook_results || parsed;
16613
+ if (hookData.before_request_hooks || hookData.after_request_hooks) extractedHookResults = {
16614
+ before_request_hooks: hookData.before_request_hooks,
16615
+ after_request_hooks: hookData.after_request_hooks
16616
+ };
16617
+ }
16618
+ const usageData = parsed;
16619
+ if (usageData.usage) {
16620
+ const promptTokens = usageData.usage.prompt_tokens ?? usageData.usage.input_tokens ?? 0;
16621
+ const completionTokens = usageData.usage.completion_tokens ?? usageData.usage.output_tokens ?? 0;
16622
+ extractedUsage = {
16623
+ promptTokens,
16624
+ completionTokens,
16625
+ totalTokens: usageData.usage.total_tokens ?? promptTokens + completionTokens,
16626
+ cachedTokens: usageData.usage.prompt_tokens_details?.cached_tokens ?? usageData.usage.input_tokens_details?.cached_tokens,
16627
+ hookResults: extractedHookResults
16628
+ };
16629
+ }
16630
+ } catch {}
16631
+ }
16594
16632
  return {
16595
16633
  stream: new TransformStream({
16596
16634
  transform(chunk, controller) {
@@ -16599,39 +16637,17 @@ function createStreamingCostExtractor() {
16599
16637
  buffer += text;
16600
16638
  const messages = buffer.split("\n\n");
16601
16639
  buffer = messages.pop() || "";
16602
- for (const message of messages) {
16603
- const trimmed = message.trim();
16604
- if (!trimmed) continue;
16605
- if (!trimmed.startsWith("data:")) continue;
16606
- const jsonPart = trimmed.slice(5).trim();
16607
- if (jsonPart === "[DONE]") continue;
16608
- try {
16609
- const parsed = JSON.parse(jsonPart);
16610
- if (parsed.usage) extractedUsage = {
16611
- promptTokens: parsed.usage.prompt_tokens ?? 0,
16612
- completionTokens: parsed.usage.completion_tokens ?? 0,
16613
- totalTokens: parsed.usage.total_tokens ?? 0,
16614
- cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
16615
- };
16616
- } catch {}
16617
- }
16640
+ for (const message of messages) parseSSEMessage(message);
16618
16641
  },
16619
16642
  flush(controller) {
16620
- if (buffer.trim()) {
16621
- const trimmed = buffer.trim();
16622
- if (trimmed.startsWith("data:")) {
16623
- const jsonPart = trimmed.slice(5).trim();
16624
- if (jsonPart !== "[DONE]") try {
16625
- const parsed = JSON.parse(jsonPart);
16626
- if (parsed.usage) extractedUsage = {
16627
- promptTokens: parsed.usage.prompt_tokens ?? 0,
16628
- completionTokens: parsed.usage.completion_tokens ?? 0,
16629
- totalTokens: parsed.usage.total_tokens ?? 0,
16630
- cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
16631
- };
16632
- } catch {}
16633
- }
16634
- }
16643
+ if (buffer.trim()) parseSSEMessage(buffer);
16644
+ if (extractedUsage && extractedHookResults) extractedUsage.hookResults = extractedHookResults;
16645
+ else if (!extractedUsage && extractedHookResults) extractedUsage = {
16646
+ promptTokens: 0,
16647
+ completionTokens: 0,
16648
+ totalTokens: 0,
16649
+ hookResults: extractedHookResults
16650
+ };
16635
16651
  resolveUsage(extractedUsage);
16636
16652
  }
16637
16653
  }),
@@ -16871,6 +16887,47 @@ var PricingProvider = class {
16871
16887
  };
16872
16888
  const pricingProvider = new PricingProvider();
16873
16889
  /**
16890
+ * Transform gateway hook results to our schema format for telemetry
16891
+ */
16892
+ function transformHookResultsToGuardrailResults(hookResults, wasBlocked) {
16893
+ if (!hookResults) return null;
16894
+ const beforeHooks = hookResults.before_request_hooks || [];
16895
+ const afterHooks = hookResults.after_request_hooks || [];
16896
+ if (beforeHooks.length === 0 && afterHooks.length === 0) return null;
16897
+ const results = [];
16898
+ let totalLatencyMs = 0;
16899
+ for (const hook of beforeHooks) {
16900
+ totalLatencyMs += hook.execution_time;
16901
+ for (const check$1 of hook.checks) results.push({
16902
+ checkId: check$1.id,
16903
+ functionId: check$1.id.split(".")[1] || check$1.id,
16904
+ hookType: "beforeRequestHook",
16905
+ verdict: check$1.verdict,
16906
+ latencyMs: check$1.execution_time
16907
+ });
16908
+ }
16909
+ for (const hook of afterHooks) {
16910
+ totalLatencyMs += hook.execution_time;
16911
+ for (const check$1 of hook.checks) results.push({
16912
+ checkId: check$1.id,
16913
+ functionId: check$1.id.split(".")[1] || check$1.id,
16914
+ hookType: "afterRequestHook",
16915
+ verdict: check$1.verdict,
16916
+ latencyMs: check$1.execution_time
16917
+ });
16918
+ }
16919
+ const anyFailed = results.some((r) => !r.verdict);
16920
+ let action;
16921
+ if (wasBlocked) action = "blocked";
16922
+ else if (anyFailed) action = "logged";
16923
+ else action = "allowed";
16924
+ return {
16925
+ results,
16926
+ action,
16927
+ totalLatencyMs
16928
+ };
16929
+ }
16930
+ /**
16874
16931
  * Creates cost tracking middleware that logs LLM requests with usage and cost data.
16875
16932
  *
16876
16933
  * Features:
@@ -16885,7 +16942,18 @@ function createCostTrackingMiddleware(config$1 = {}) {
16885
16942
  return async (c, next) => {
16886
16943
  if (!enabled) return next();
16887
16944
  const path = c.req.path;
16888
- if (!path.endsWith("/chat/completions") && !path.endsWith("/completions")) return next();
16945
+ if (![
16946
+ "/chat/completions",
16947
+ "/completions",
16948
+ "/responses",
16949
+ "/embeddings",
16950
+ "/images/generations",
16951
+ "/images/edits",
16952
+ "/audio/speech",
16953
+ "/audio/transcriptions",
16954
+ "/audio/translations",
16955
+ "/messages"
16956
+ ].some((endpoint) => path.endsWith(endpoint) || endpoint === "/responses" && path.match(/\/responses\/[^/]+$/))) return next();
16889
16957
  const requestId = randomUUID();
16890
16958
  const startTime = Date.now();
16891
16959
  c.header("x-llmops-request-id", requestId);
@@ -16948,6 +17016,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
16948
17016
  const { response: wrappedResponse, usagePromise } = wrapStreamingResponse(response);
16949
17017
  c.res = wrappedResponse;
16950
17018
  usagePromise.then(async (usage) => {
17019
+ const guardrailResults = usage?.hookResults ? transformHookResultsToGuardrailResults(usage.hookResults, statusCode === 446) : null;
16951
17020
  await processUsageAndLog({
16952
17021
  requestId,
16953
17022
  provider,
@@ -16966,6 +17035,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
16966
17035
  totalTokens: usage.totalTokens,
16967
17036
  cachedTokens: usage.cachedTokens
16968
17037
  } : null,
17038
+ guardrailResults,
16969
17039
  tags: customTags,
16970
17040
  batchWriter,
16971
17041
  trackErrors,
@@ -16976,14 +17046,24 @@ function createCostTrackingMiddleware(config$1 = {}) {
16976
17046
  });
16977
17047
  } else {
16978
17048
  let usage = null;
17049
+ let guardrailResults = null;
16979
17050
  try {
16980
17051
  const responseBody = await response.clone().json();
16981
- if (responseBody.usage) usage = {
16982
- promptTokens: responseBody.usage.prompt_tokens || 0,
16983
- completionTokens: responseBody.usage.completion_tokens || 0,
16984
- totalTokens: responseBody.usage.total_tokens || 0,
16985
- cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens
16986
- };
17052
+ if (responseBody.usage) {
17053
+ const promptTokens = responseBody.usage.prompt_tokens ?? responseBody.usage.input_tokens ?? 0;
17054
+ const completionTokens = responseBody.usage.completion_tokens ?? responseBody.usage.output_tokens ?? 0;
17055
+ usage = {
17056
+ promptTokens,
17057
+ completionTokens,
17058
+ totalTokens: responseBody.usage.total_tokens || promptTokens + completionTokens,
17059
+ cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens ?? responseBody.usage.input_tokens_details?.cached_tokens
17060
+ };
17061
+ }
17062
+ if (responseBody.hook_results) {
17063
+ const wasBlocked = statusCode === 446;
17064
+ guardrailResults = transformHookResultsToGuardrailResults(responseBody.hook_results, wasBlocked);
17065
+ if (guardrailResults) log(`Extracted guardrail results: ${guardrailResults.results.length} checks, action=${guardrailResults.action}`);
17066
+ }
16987
17067
  } catch {
16988
17068
  log("Failed to parse response body for usage");
16989
17069
  }
@@ -17000,6 +17080,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
17000
17080
  latencyMs,
17001
17081
  isStreaming: false,
17002
17082
  usage,
17083
+ guardrailResults,
17003
17084
  tags: customTags,
17004
17085
  batchWriter,
17005
17086
  trackErrors,
@@ -17012,7 +17093,7 @@ function createCostTrackingMiddleware(config$1 = {}) {
17012
17093
  * Process usage data and log to batch writer
17013
17094
  */
17014
17095
  async function processUsageAndLog(params) {
17015
- const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, tags = {}, batchWriter, trackErrors, log } = params;
17096
+ const { requestId, provider, model, configId, variantId, environmentId, providerConfigId, endpoint, statusCode, latencyMs, isStreaming, usage, guardrailResults, tags = {}, batchWriter, trackErrors, log } = params;
17016
17097
  if (!trackErrors && statusCode >= 400) {
17017
17098
  log(`Skipping error response (${statusCode})`);
17018
17099
  return;
@@ -17063,7 +17144,8 @@ async function processUsageAndLog(params) {
17063
17144
  statusCode,
17064
17145
  latencyMs,
17065
17146
  isStreaming,
17066
- tags
17147
+ tags,
17148
+ guardrailResults: guardrailResults || null
17067
17149
  };
17068
17150
  batchWriter.enqueue(requestData);
17069
17151
  log(`Enqueued request ${requestId} for logging`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@llmops/app",
3
- "version": "0.3.2",
3
+ "version": "0.3.4-beta.1",
4
4
  "description": "LLMOps application with server and client",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -67,8 +67,8 @@
67
67
  "react-aria-components": "^1.13.0",
68
68
  "react-hook-form": "^7.68.0",
69
69
  "recharts": "^3.6.0",
70
- "@llmops/core": "^0.3.2",
71
- "@llmops/gateway": "^0.3.2"
70
+ "@llmops/core": "^0.3.4-beta.1",
71
+ "@llmops/gateway": "^0.3.4-beta.1"
72
72
  },
73
73
  "peerDependencies": {
74
74
  "react": "^19.2.1",