@llmops/app 0.1.5-beta.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/.vite/manifest.json +453 -180
  2. package/dist/assets/Form-B1rZ0Hvr.js +1 -0
  3. package/dist/assets/InternalBackdrop-CXjGP6a9.js +12 -0
  4. package/dist/assets/_environment-zVjmOhBI.js +1 -0
  5. package/dist/assets/_observability-CQ4Ll_WT.js +1 -0
  6. package/dist/assets/_observability-DPSeR4-h.css +1 -0
  7. package/dist/assets/{_settings-BK4LOT_O.js → _settings-Bv8acMq8.js} +1 -1
  8. package/dist/assets/{_tabs-spsrS7rN.js → _tabs-Bi-rsAVV.js} +1 -1
  9. package/dist/assets/{_tabs-DWvdcnbx.js → _tabs-oKfgxR5m.js} +1 -1
  10. package/dist/assets/_variant-Zg5HjNSN.js +30 -0
  11. package/dist/assets/{_variants-DiPU43aS.js → _variants-B36Lo8m_.js} +1 -1
  12. package/dist/assets/{area.css-d_0zeUpe.js → area.css-DTnbbu5M.js} +1 -1
  13. package/dist/assets/button-DftzA6-1.js +1 -0
  14. package/dist/assets/check-PaWYhFn6.js +1 -0
  15. package/dist/assets/chevron-down-BjDiaa62.js +1 -0
  16. package/dist/assets/{chevron-right-CS1Toe_9.js → chevron-right-Dz77YeP_.js} +1 -1
  17. package/dist/assets/configs-CtNuMmVR.js +1 -0
  18. package/dist/assets/{copy-BQ-62Ur4.js → copy-D-DZB-6Z.js} +1 -1
  19. package/dist/assets/costs-DwVSRTwW.js +1 -0
  20. package/dist/assets/en-US-C8ut0f5H.js +1 -0
  21. package/dist/assets/environments-BB95BiC0.js +1 -0
  22. package/dist/assets/formatDistance-Bok3-MB1.js +1 -0
  23. package/dist/assets/getDisabledMountTransitionStyles-CKUvhO4q.js +1 -0
  24. package/dist/assets/index-BCY9aD4r.js +16 -0
  25. package/dist/assets/index-BS3B2BG5.js +1 -0
  26. package/dist/assets/{index-DnQMGf2R.css → index-BiCR_Kj2.css} +1 -1
  27. package/dist/assets/{index-BEbVINNT.js → index-CnOYeKc1.js} +1 -1
  28. package/dist/assets/index-CpAxLUl2.js +1 -0
  29. package/dist/assets/index-D841new6.js +1 -0
  30. package/dist/assets/index-DvzXLQL6.js +4 -0
  31. package/dist/assets/index-xdhK5tU9.js +1 -0
  32. package/dist/assets/index.esm-COTq2pHX.js +1 -0
  33. package/dist/assets/info-box.css-HIaMvhbC.js +1 -0
  34. package/dist/assets/llmops-B3IIte87.css +1 -0
  35. package/dist/assets/llmops-C5uyvq6E.js +1 -0
  36. package/dist/assets/observability-C5jdcUSg.css +1 -0
  37. package/dist/assets/observability.css-dztiak5K.js +1 -0
  38. package/dist/assets/overview-BwCF8A1G.js +1 -0
  39. package/dist/assets/{plus-5oc_diIr.js → plus-C_L0BpbU.js} +1 -1
  40. package/dist/assets/popover-CSR0ctop.js +1 -0
  41. package/dist/assets/popupStateMapping-DkOpwBhG.js +1 -0
  42. package/dist/assets/requests-D8wE_E2M.js +1 -0
  43. package/dist/assets/{route-CpTvuTp9.js → route-CI77gokb.js} +1 -1
  44. package/dist/assets/{route-TG1bSyy8.js → route-CZ2yCnzN.js} +1 -1
  45. package/dist/assets/{route-CYSM3PkP.js → route-vfMzB8KR.js} +1 -1
  46. package/dist/assets/route-wrnLt3bn.js +1 -0
  47. package/dist/assets/secrets-B9u5jvFm.js +1 -0
  48. package/dist/assets/settings-24qWHZEq.js +1 -0
  49. package/dist/assets/settings-BLt538aO.js +1 -0
  50. package/dist/assets/{table-SKwX9qUc.js → table-xLf4iXV8.js} +1 -1
  51. package/dist/assets/{tabs.css-BvfbDFW8.js → tabs.css-fttLH8Xj.js} +1 -1
  52. package/dist/assets/{targeting-rpE0HiKJ.js → targeting-DZIwwaYV.js} +1 -1
  53. package/dist/assets/tooltip-BKoDpdAC.js +1 -0
  54. package/dist/assets/update-or-create-name-ChWLK2jI.js +1 -0
  55. package/dist/assets/useButton-Ct8hIyjz.js +1 -0
  56. package/dist/assets/{useConfigList-CN-eUdau.js → useConfigList-Du__Gu7q.js} +1 -1
  57. package/dist/assets/{useConfigVariants-CIAilMey.js → useConfigVariants-C8frrCcZ.js} +1 -1
  58. package/dist/assets/{useEnvironments-D798jdLi.js → useEnvironments-CYkrqAdO.js} +1 -1
  59. package/dist/assets/useFocus-Du1aH2j1.js +1 -0
  60. package/dist/assets/{useMutation-ppBqjQps.js → useMutation-VUB-GXkv.js} +1 -1
  61. package/dist/assets/usePopupAutoResize-ddPHogUJ.js +1 -0
  62. package/dist/assets/useRole-DP91D1OX.js +1 -0
  63. package/dist/assets/useSetTargeting-D1rPM8vz.js +1 -0
  64. package/dist/assets/useSyncedFloatingRootContext-BzuhbdSw.js +1 -0
  65. package/dist/assets/{useTargetingRules-Bi-hIPWR.js → useTargetingRules-DS57orU0.js} +1 -1
  66. package/dist/assets/useValueChanged-vz8uKgCk.js +1 -0
  67. package/dist/assets/{user-profile-CvRPKkVY.js → user-profile-Bt3_D9Gr.js} +1 -1
  68. package/dist/assets/{variants-CZaBWam0.js → variants-CYXSNKzX.js} +1 -1
  69. package/dist/assets/variants.css-BjGKD3Nv.js +1 -0
  70. package/dist/assets/{workspace-general-umhuExNi.js → workspace-general-BRMpUqb-.js} +1 -1
  71. package/dist/index.cjs +723 -10
  72. package/dist/index.mjs +724 -11
  73. package/package.json +3 -3
  74. package/dist/assets/Form-B1Xes2HX.js +0 -1
  75. package/dist/assets/_environment-BHroMRce.js +0 -1
  76. package/dist/assets/_variant-i8-2ZjCg.js +0 -30
  77. package/dist/assets/button-D46pme1q.js +0 -1
  78. package/dist/assets/check-Cp187nGx.js +0 -1
  79. package/dist/assets/configs-BWQEZkYp.js +0 -1
  80. package/dist/assets/environments-CxEwSvmp.js +0 -1
  81. package/dist/assets/formatDistance-CEmiEHX5.js +0 -1
  82. package/dist/assets/index-CadPnEMv.js +0 -1
  83. package/dist/assets/index-D3G5uHLZ.js +0 -1
  84. package/dist/assets/index-KuidudiV.js +0 -1
  85. package/dist/assets/index-nHWjhrFW.js +0 -16
  86. package/dist/assets/index.esm-DJMnkOur.js +0 -1
  87. package/dist/assets/info-box.css-DyjK40Ax.js +0 -1
  88. package/dist/assets/new-config-state-B8sMe-TC.css +0 -1
  89. package/dist/assets/new-config-state.css-zldOiAQS.js +0 -1
  90. package/dist/assets/secrets-dM6OVjcE.js +0 -1
  91. package/dist/assets/settings-C74FVCr5.js +0 -1
  92. package/dist/assets/settings-D4fugFLx.js +0 -1
  93. package/dist/assets/update-or-create-name-Cd1zyalO.js +0 -4
  94. package/dist/assets/useButton-DfUAk02P.js +0 -1
  95. package/dist/assets/useRole-D2HhbdS4.js +0 -1
  96. package/dist/assets/useSetTargeting-C3RG0vdV.js +0 -1
  97. package/dist/assets/useValueChanged-2Bqw2vWT.js +0 -12
  98. package/dist/assets/variants.css-C4fJhzPe.js +0 -1
package/dist/index.cjs CHANGED
@@ -56,13 +56,14 @@ let __hono_zod_validator = require("@hono/zod-validator");
56
56
  let hono_pretty_json = require("hono/pretty-json");
57
57
  let hono_http_exception = require("hono/http-exception");
58
58
  let hono_cors = require("hono/cors");
59
+ let node_crypto = require("node:crypto");
59
60
  let __llmops_gateway = require("@llmops/gateway");
60
61
  __llmops_gateway = __toESM(__llmops_gateway);
61
62
  let node_process = require("node:process");
62
63
  let __llmops_core_db = require("@llmops/core/db");
63
64
 
64
65
  //#region src/client/index.tsx?url
65
- var client_default = "/assets/index-nHWjhrFW.js";
66
+ var client_default = "/assets/index-BCY9aD4r.js";
66
67
 
67
68
  //#endregion
68
69
  //#region src/client/styles/styles.css?url
@@ -12708,11 +12709,13 @@ var zod_default = external_exports;
12708
12709
  //#endregion
12709
12710
  //#region src/server/lib/zv.ts
12710
12711
  const zv = (target, schema) => (0, __hono_zod_validator.zValidator)(target, schema, (result, c) => {
12711
- if (!result.success)
12712
- /**
12713
- * @todo handle error properly
12714
- */
12715
- return c.json({ message: "Bad Request" }, 400);
12712
+ if (!result.success) return c.json({
12713
+ message: "Bad Request",
12714
+ errors: result.error.issues.map((issue$1) => ({
12715
+ path: issue$1.path.join("."),
12716
+ message: issue$1.message
12717
+ }))
12718
+ }, 400);
12716
12719
  });
12717
12720
 
12718
12721
  //#endregion
@@ -12739,6 +12742,225 @@ const internalServerError = (message, code) => {
12739
12742
  };
12740
12743
  };
12741
12744
 
12745
+ //#endregion
12746
+ //#region src/server/handlers/analytics/index.ts
12747
+ /**
12748
+ * Convert micro-dollars to formatted dollar string
12749
+ */
12750
+ function formatCost(microDollars, decimals = 6) {
12751
+ return `$${(microDollars / 1e6).toFixed(decimals)}`;
12752
+ }
12753
+ /**
12754
+ * Parse ISO date string to Date object
12755
+ * Accepts both ISO strings (2026-01-02T10:30:00.000Z) and date-only strings (2026-01-02)
12756
+ */
12757
+ function parseDate(dateStr) {
12758
+ const date$4 = new Date(dateStr);
12759
+ if (isNaN(date$4.getTime())) throw new Error(`Invalid date string: ${dateStr}`);
12760
+ return date$4;
12761
+ }
12762
+ /**
12763
+ * Parse date string for start of range
12764
+ * - ISO strings are used as-is
12765
+ * - Date-only strings (YYYY-MM-DD) are treated as start of day UTC
12766
+ */
12767
+ function parseStartDate(dateStr) {
12768
+ return parseDate(dateStr);
12769
+ }
12770
+ /**
12771
+ * Parse date string for end of range
12772
+ * - ISO strings are used as-is
12773
+ * - Date-only strings (YYYY-MM-DD) are set to end of day (23:59:59.999 UTC)
12774
+ */
12775
+ function parseEndDate(dateStr) {
12776
+ const date$4 = parseDate(dateStr);
12777
+ if (!dateStr.includes("T")) date$4.setUTCHours(23, 59, 59, 999);
12778
+ return date$4;
12779
+ }
12780
+ /**
12781
+ * Zod schema for ISO date strings
12782
+ * Validates that the string can be parsed as a valid date
12783
+ */
12784
+ const isoDateString = zod_default.string().refine((val) => !isNaN(new Date(val).getTime()), { message: "Invalid date format. Expected ISO 8601 string (e.g., 2026-01-02T10:30:00.000Z) or date string (e.g., 2026-01-02)" });
12785
+ /**
12786
+ * Date range query schema
12787
+ * Accepts ISO 8601 date strings or date-only strings (YYYY-MM-DD)
12788
+ * - startDate: Used as-is for ISO strings, start of day for date-only
12789
+ * - endDate: Used as-is for ISO strings, end of day (23:59:59.999) for date-only
12790
+ */
12791
+ const dateRangeSchema = zod_default.object({
12792
+ startDate: isoDateString.transform(parseStartDate),
12793
+ endDate: isoDateString.transform(parseEndDate)
12794
+ });
12795
+ /**
12796
+ * Analytics API routes for cost and usage tracking
12797
+ */
12798
+ const app$10 = new hono.Hono().get("/requests", zv("query", zod_default.object({
12799
+ limit: zod_default.string().transform(Number).optional(),
12800
+ offset: zod_default.string().transform(Number).optional(),
12801
+ configId: zod_default.string().uuid().optional(),
12802
+ provider: zod_default.string().optional(),
12803
+ model: zod_default.string().optional(),
12804
+ startDate: isoDateString.optional(),
12805
+ endDate: isoDateString.optional()
12806
+ })), async (c) => {
12807
+ const db = c.get("db");
12808
+ const query = c.req.valid("query");
12809
+ try {
12810
+ const requests = await db.listRequests({
12811
+ limit: query.limit,
12812
+ offset: query.offset,
12813
+ configId: query.configId,
12814
+ provider: query.provider,
12815
+ model: query.model,
12816
+ startDate: query.startDate ? parseStartDate(query.startDate) : void 0,
12817
+ endDate: query.endDate ? parseEndDate(query.endDate) : void 0
12818
+ });
12819
+ return c.json(successResponse(requests, 200));
12820
+ } catch (error$45) {
12821
+ console.error("Error fetching requests:", error$45);
12822
+ return c.json(internalServerError("Failed to fetch requests", 500), 500);
12823
+ }
12824
+ }).get("/requests/:requestId", zv("param", zod_default.object({ requestId: zod_default.string().uuid() })), async (c) => {
12825
+ const db = c.get("db");
12826
+ const { requestId } = c.req.valid("param");
12827
+ try {
12828
+ const request = await db.getRequestByRequestId(requestId);
12829
+ if (!request) return c.json({ error: "Request not found" }, 404);
12830
+ return c.json(successResponse(request, 200));
12831
+ } catch (error$45) {
12832
+ console.error("Error fetching request:", error$45);
12833
+ return c.json(internalServerError("Failed to fetch request", 500), 500);
12834
+ }
12835
+ }).get("/costs/total", zv("query", dateRangeSchema), async (c) => {
12836
+ const db = c.get("db");
12837
+ const { startDate, endDate } = c.req.valid("query");
12838
+ try {
12839
+ const data = await db.getTotalCost({
12840
+ startDate,
12841
+ endDate
12842
+ });
12843
+ if (!data) return c.json(successResponse({
12844
+ totalCost: 0,
12845
+ totalCostFormatted: "$0.000000",
12846
+ totalInputCost: 0,
12847
+ totalOutputCost: 0,
12848
+ totalPromptTokens: 0,
12849
+ totalCompletionTokens: 0,
12850
+ totalTokens: 0,
12851
+ requestCount: 0
12852
+ }, 200));
12853
+ return c.json(successResponse({
12854
+ ...data,
12855
+ totalCostFormatted: formatCost(data.totalCost),
12856
+ totalInputCostFormatted: formatCost(data.totalInputCost),
12857
+ totalOutputCostFormatted: formatCost(data.totalOutputCost)
12858
+ }, 200));
12859
+ } catch (error$45) {
12860
+ console.error("Error fetching total costs:", error$45);
12861
+ return c.json(internalServerError("Failed to fetch total costs", 500), 500);
12862
+ }
12863
+ }).get("/costs/by-model", zv("query", dateRangeSchema), async (c) => {
12864
+ const db = c.get("db");
12865
+ const { startDate, endDate } = c.req.valid("query");
12866
+ try {
12867
+ const data = await db.getCostByModel({
12868
+ startDate,
12869
+ endDate
12870
+ });
12871
+ return c.json(successResponse(data, 200));
12872
+ } catch (error$45) {
12873
+ console.error("Error fetching costs by model:", error$45);
12874
+ return c.json(internalServerError("Failed to fetch costs by model", 500), 500);
12875
+ }
12876
+ }).get("/costs/by-provider", zv("query", dateRangeSchema), async (c) => {
12877
+ const db = c.get("db");
12878
+ const { startDate, endDate } = c.req.valid("query");
12879
+ try {
12880
+ const data = await db.getCostByProvider({
12881
+ startDate,
12882
+ endDate
12883
+ });
12884
+ return c.json(successResponse(data, 200));
12885
+ } catch (error$45) {
12886
+ console.error("Error fetching costs by provider:", error$45);
12887
+ return c.json(internalServerError("Failed to fetch costs by provider", 500), 500);
12888
+ }
12889
+ }).get("/costs/by-config", zv("query", dateRangeSchema), async (c) => {
12890
+ const db = c.get("db");
12891
+ const { startDate, endDate } = c.req.valid("query");
12892
+ try {
12893
+ const data = await db.getCostByConfig({
12894
+ startDate,
12895
+ endDate
12896
+ });
12897
+ return c.json(successResponse(data, 200));
12898
+ } catch (error$45) {
12899
+ console.error("Error fetching costs by config:", error$45);
12900
+ return c.json(internalServerError("Failed to fetch costs by config", 500), 500);
12901
+ }
12902
+ }).get("/costs/daily", zv("query", dateRangeSchema), async (c) => {
12903
+ const db = c.get("db");
12904
+ const { startDate, endDate } = c.req.valid("query");
12905
+ try {
12906
+ const data = await db.getDailyCosts({
12907
+ startDate,
12908
+ endDate
12909
+ });
12910
+ return c.json(successResponse(data, 200));
12911
+ } catch (error$45) {
12912
+ console.error("Error fetching daily costs:", error$45);
12913
+ return c.json(internalServerError("Failed to fetch daily costs", 500), 500);
12914
+ }
12915
+ }).get("/costs/summary", zv("query", dateRangeSchema.extend({ groupBy: zod_default.enum([
12916
+ "day",
12917
+ "hour",
12918
+ "model",
12919
+ "provider",
12920
+ "config"
12921
+ ]).optional() })), async (c) => {
12922
+ const db = c.get("db");
12923
+ const { startDate, endDate, groupBy } = c.req.valid("query");
12924
+ try {
12925
+ const data = await db.getCostSummary({
12926
+ startDate,
12927
+ endDate,
12928
+ groupBy
12929
+ });
12930
+ return c.json(successResponse(data, 200));
12931
+ } catch (error$45) {
12932
+ console.error("Error fetching cost summary:", error$45);
12933
+ return c.json(internalServerError("Failed to fetch cost summary", 500), 500);
12934
+ }
12935
+ }).get("/stats", zv("query", dateRangeSchema), async (c) => {
12936
+ const db = c.get("db");
12937
+ const { startDate, endDate } = c.req.valid("query");
12938
+ try {
12939
+ const data = await db.getRequestStats({
12940
+ startDate,
12941
+ endDate
12942
+ });
12943
+ if (!data) return c.json(successResponse({
12944
+ totalRequests: 0,
12945
+ successfulRequests: 0,
12946
+ failedRequests: 0,
12947
+ streamingRequests: 0,
12948
+ avgLatencyMs: 0,
12949
+ maxLatencyMs: 0,
12950
+ minLatencyMs: 0,
12951
+ successRate: 0
12952
+ }, 200));
12953
+ return c.json(successResponse({
12954
+ ...data,
12955
+ successRate: data.totalRequests > 0 ? (data.successfulRequests / data.totalRequests * 100).toFixed(2) : 0
12956
+ }, 200));
12957
+ } catch (error$45) {
12958
+ console.error("Error fetching request stats:", error$45);
12959
+ return c.json(internalServerError("Failed to fetch request stats", 500), 500);
12960
+ }
12961
+ });
12962
+ var analytics_default = app$10;
12963
+
12742
12964
  //#endregion
12743
12965
  //#region src/server/handlers/configs/index.ts
12744
12966
  const app$9 = new hono.Hono().post("/", zv("json", zod_default.object({ name: zod_default.string().min(1) })), async (c) => {
@@ -13396,7 +13618,7 @@ const app$3 = new hono.Hono().use("*", async (c, next) => {
13396
13618
  error: "Auth middleware not configured",
13397
13619
  message: `Auth type "${config$1.auth.type}" requires @llmops/enterprise middleware. Either use basicAuth() from @llmops/sdk or install @llmops/enterprise and add the auth middleware.`
13398
13620
  }, 501);
13399
- }).route("/configs", configs_default).route("/environments", environments_default).route("/providers", providers_default).route("/targeting", targeting_default).route("/variants", variants_default).route("/workspace-settings", workspace_settings_default);
13621
+ }).route("/analytics", analytics_default).route("/configs", configs_default).route("/environments", environments_default).route("/providers", providers_default).route("/targeting", targeting_default).route("/variants", variants_default).route("/workspace-settings", workspace_settings_default);
13400
13622
  var v1_default = app$3;
13401
13623
 
13402
13624
  //#endregion
@@ -13566,7 +13788,7 @@ const createGatewayAdapterMiddleware = () => {
13566
13788
  if (method === "POST" && contentType === "application/json" && (path$1.endsWith("/chat/completions") || path$1.endsWith("/completions"))) {
13567
13789
  const mergedBody = mergeChatCompletionBody(await c.req.json(), variantConfig, data.modelName);
13568
13790
  const newHeaders = new Headers(c.req.raw.headers);
13569
- newHeaders.set("x-portkey-config", JSON.stringify(portkeyConfig));
13791
+ newHeaders.set("x-llmops-config", JSON.stringify(portkeyConfig));
13570
13792
  const newRequest = new Request(c.req.raw.url, {
13571
13793
  method: c.req.raw.method,
13572
13794
  headers: newHeaders,
@@ -13579,9 +13801,11 @@ const createGatewayAdapterMiddleware = () => {
13579
13801
  configurable: true
13580
13802
  });
13581
13803
  c.req.bodyCache = {};
13582
- } else c.req.raw.headers.set("x-portkey-config", JSON.stringify(portkeyConfig));
13804
+ } else c.req.raw.headers.set("x-llmops-config", JSON.stringify(portkeyConfig));
13583
13805
  c.set("variantConfig", variantConfig);
13584
13806
  c.set("variantModel", variantConfig.model || data.modelName);
13807
+ c.set("configId", data.configId);
13808
+ c.set("variantId", data.variantId);
13585
13809
  await next();
13586
13810
  } catch (error$45) {
13587
13811
  console.error("Gateway adapter error:", error$45);
@@ -13593,12 +13817,501 @@ const createGatewayAdapterMiddleware = () => {
13593
13817
  };
13594
13818
  };
13595
13819
 
13820
+ //#endregion
13821
+ //#region src/server/lib/streamingCostExtractor.ts
13822
+ /**
13823
+ * Creates a TransformStream that passes through SSE data while extracting usage info.
13824
+ *
13825
+ * @param onComplete - Callback invoked when stream completes with extracted usage
13826
+ * @returns TransformStream that passes through the original stream
13827
+ *
13828
+ * @example
13829
+ * ```typescript
13830
+ * const { stream, usagePromise } = createStreamingCostExtractor();
13831
+ *
13832
+ * // Pipe the response through the extractor
13833
+ * const transformedResponse = originalResponse.body.pipeThrough(stream);
13834
+ *
13835
+ * // Later, get the usage
13836
+ * const usage = await usagePromise;
13837
+ * if (usage) {
13838
+ * console.log(`Tokens used: ${usage.totalTokens}`);
13839
+ * }
13840
+ * ```
13841
+ */
13842
+ function createStreamingCostExtractor() {
13843
+ let extractedUsage = null;
13844
+ let buffer = "";
13845
+ let resolveUsage;
13846
+ const usagePromise = new Promise((resolve) => {
13847
+ resolveUsage = resolve;
13848
+ });
13849
+ const decoder = new TextDecoder();
13850
+ return {
13851
+ stream: new TransformStream({
13852
+ transform(chunk, controller) {
13853
+ controller.enqueue(chunk);
13854
+ const text = decoder.decode(chunk, { stream: true });
13855
+ buffer += text;
13856
+ const messages = buffer.split("\n\n");
13857
+ buffer = messages.pop() || "";
13858
+ for (const message of messages) {
13859
+ const trimmed = message.trim();
13860
+ if (!trimmed) continue;
13861
+ if (!trimmed.startsWith("data:")) continue;
13862
+ const jsonPart = trimmed.slice(5).trim();
13863
+ if (jsonPart === "[DONE]") continue;
13864
+ try {
13865
+ const parsed = JSON.parse(jsonPart);
13866
+ if (parsed.usage) extractedUsage = {
13867
+ promptTokens: parsed.usage.prompt_tokens ?? 0,
13868
+ completionTokens: parsed.usage.completion_tokens ?? 0,
13869
+ totalTokens: parsed.usage.total_tokens ?? 0,
13870
+ cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
13871
+ };
13872
+ } catch {}
13873
+ }
13874
+ },
13875
+ flush(controller) {
13876
+ if (buffer.trim()) {
13877
+ const trimmed = buffer.trim();
13878
+ if (trimmed.startsWith("data:")) {
13879
+ const jsonPart = trimmed.slice(5).trim();
13880
+ if (jsonPart !== "[DONE]") try {
13881
+ const parsed = JSON.parse(jsonPart);
13882
+ if (parsed.usage) extractedUsage = {
13883
+ promptTokens: parsed.usage.prompt_tokens ?? 0,
13884
+ completionTokens: parsed.usage.completion_tokens ?? 0,
13885
+ totalTokens: parsed.usage.total_tokens ?? 0,
13886
+ cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
13887
+ };
13888
+ } catch {}
13889
+ }
13890
+ }
13891
+ resolveUsage(extractedUsage);
13892
+ }
13893
+ }),
13894
+ usagePromise
13895
+ };
13896
+ }
13897
+ /**
13898
+ * Wraps a Response with a streaming body to extract usage information.
13899
+ *
13900
+ * @param response - Original streaming Response
13901
+ * @returns Object with transformed response and promise for usage data
13902
+ *
13903
+ * @example
13904
+ * ```typescript
13905
+ * const result = wrapStreamingResponse(originalResponse);
13906
+ *
13907
+ * // Return the transformed response to the client
13908
+ * return result.response;
13909
+ *
13910
+ * // After response is sent, get usage for cost tracking
13911
+ * result.usagePromise.then((usage) => {
13912
+ * if (usage) {
13913
+ * trackCost(usage);
13914
+ * }
13915
+ * });
13916
+ * ```
13917
+ */
13918
+ function wrapStreamingResponse(response) {
13919
+ if (!response.body) return {
13920
+ response,
13921
+ usagePromise: Promise.resolve(null)
13922
+ };
13923
+ const { stream, usagePromise } = createStreamingCostExtractor();
13924
+ const transformedBody = response.body.pipeThrough(stream);
13925
+ return {
13926
+ response: new Response(transformedBody, {
13927
+ status: response.status,
13928
+ statusText: response.statusText,
13929
+ headers: response.headers
13930
+ }),
13931
+ usagePromise
13932
+ };
13933
+ }
13934
+ /**
13935
+ * Ensures stream_options.include_usage is set for cost tracking
13936
+ * Modifies the body in place.
13937
+ *
13938
+ * @param body - Request body (will be modified)
13939
+ * @returns Modified body with include_usage enabled
13940
+ */
13941
+ function ensureStreamUsageEnabled(body) {
13942
+ if (body.stream === true) body.stream_options = {
13943
+ ...body.stream_options || {},
13944
+ include_usage: true
13945
+ };
13946
+ return body;
13947
+ }
13948
+
13949
+ //#endregion
13950
+ //#region src/server/services/batchWriter.ts
13951
+ /**
13952
+ * Creates a BatchWriter instance
13953
+ *
13954
+ * @example
13955
+ * ```typescript
13956
+ * const writer = createBatchWriter(
13957
+ * { batchInsertRequests: db.batchInsertRequests },
13958
+ * { flushIntervalMs: 2000 }
13959
+ * );
13960
+ *
13961
+ * // Enqueue a request
13962
+ * writer.enqueue({
13963
+ * requestId: 'req-123',
13964
+ * provider: 'openai',
13965
+ * model: 'gpt-4o',
13966
+ * // ... other fields
13967
+ * });
13968
+ *
13969
+ * // When shutting down
13970
+ * await writer.stop();
13971
+ * ```
13972
+ */
13973
+ function createBatchWriter(deps, config$1 = {}) {
13974
+ const { flushIntervalMs = 2e3, maxBatchSize = 100, debug = false } = config$1;
13975
+ let queue = [];
13976
+ let flushTimer = null;
13977
+ let running = false;
13978
+ let flushing = false;
13979
+ const log = debug ? (msg) => __llmops_core.logger.debug(msg) : () => {};
13980
+ /**
13981
+ * Flush all queued requests to the database
13982
+ */
13983
+ async function flush() {
13984
+ if (flushing || queue.length === 0) return;
13985
+ flushing = true;
13986
+ const batch = queue;
13987
+ queue = [];
13988
+ try {
13989
+ log(`[BatchWriter] Flushing ${batch.length} requests`);
13990
+ log(`[BatchWriter] Flushed ${(await deps.batchInsertRequests(batch)).count} requests successfully`);
13991
+ } catch (error$45) {
13992
+ const errorMsg = error$45 instanceof Error ? error$45.message : String(error$45);
13993
+ __llmops_core.logger.error(`[BatchWriter] Flush failed, re-queuing requests: ${errorMsg}`);
13994
+ queue = [...batch, ...queue];
13995
+ } finally {
13996
+ flushing = false;
13997
+ }
13998
+ }
13999
+ /**
14000
+ * Start the periodic flush timer
14001
+ */
14002
+ function start() {
14003
+ if (running) return;
14004
+ running = true;
14005
+ flushTimer = setInterval(() => {
14006
+ flush().catch((err) => {
14007
+ const errorMsg = err instanceof Error ? err.message : String(err);
14008
+ __llmops_core.logger.error(`[BatchWriter] Periodic flush error: ${errorMsg}`);
14009
+ });
14010
+ }, flushIntervalMs);
14011
+ log(`[BatchWriter] Started with ${flushIntervalMs}ms flush interval`);
14012
+ }
14013
+ /**
14014
+ * Stop the batch writer and flush remaining items
14015
+ */
14016
+ async function stop() {
14017
+ if (!running) return;
14018
+ running = false;
14019
+ if (flushTimer) {
14020
+ clearInterval(flushTimer);
14021
+ flushTimer = null;
14022
+ }
14023
+ await flush();
14024
+ log("[BatchWriter] Stopped");
14025
+ }
14026
+ /**
14027
+ * Add a request to the batch queue
14028
+ */
14029
+ function enqueue(request) {
14030
+ queue.push(request);
14031
+ log(`[BatchWriter] Enqueued request ${request.requestId}, queue size: ${queue.length}`);
14032
+ if (!running) start();
14033
+ if (queue.length >= maxBatchSize) {
14034
+ log(`[BatchWriter] Max batch size reached, forcing flush`);
14035
+ flush().catch((err) => {
14036
+ const errorMsg = err instanceof Error ? err.message : String(err);
14037
+ __llmops_core.logger.error(`[BatchWriter] Forced flush error: ${errorMsg}`);
14038
+ });
14039
+ }
14040
+ }
14041
+ return {
14042
+ enqueue,
14043
+ flush,
14044
+ stop,
14045
+ queueLength: () => queue.length,
14046
+ isRunning: () => running
14047
+ };
14048
+ }
14049
+ /**
14050
+ * Global singleton instance
14051
+ * Lazily initialized when first accessed
14052
+ */
14053
+ let globalWriter = null;
14054
+ /**
14055
+ * Get or create the global BatchWriter instance
14056
+ *
14057
+ * @param deps - Database dependencies (required on first call)
14058
+ * @param config - Optional configuration
14059
+ * @returns The global BatchWriter instance
14060
+ */
14061
+ function getGlobalBatchWriter(deps, config$1) {
14062
+ if (!globalWriter) {
14063
+ if (!deps) throw new Error("BatchWriter dependencies required on first initialization");
14064
+ globalWriter = createBatchWriter(deps, config$1);
14065
+ }
14066
+ return globalWriter;
14067
+ }
14068
+
14069
+ //#endregion
14070
+ //#region src/server/middlewares/costTracking.ts
14071
+ /**
14072
+ * Calculate cost in micro-dollars
14073
+ * 1 dollar = 1,000,000 micro-dollars
14074
+ */
14075
+ function calculateCost(usage, pricing) {
14076
+ const inputCost = Math.round(usage.promptTokens * pricing.inputCostPer1M);
14077
+ const outputCost = Math.round(usage.completionTokens * pricing.outputCostPer1M);
14078
+ return {
14079
+ inputCost,
14080
+ outputCost,
14081
+ totalCost: inputCost + outputCost
14082
+ };
14083
+ }
14084
+ /**
14085
+ * Simple pricing provider that fetches from models.dev
14086
+ */
14087
+ var PricingProvider = class {
14088
+ cache = /* @__PURE__ */ new Map();
14089
+ lastFetch = 0;
14090
+ cacheTTL = 300 * 1e3;
14091
+ fetchPromise = null;
14092
+ getCacheKey(provider, model) {
14093
+ return `${provider.toLowerCase()}:${model.toLowerCase()}`;
14094
+ }
14095
+ async fetchPricingData() {
14096
+ try {
14097
+ const response = await fetch("https://models.dev/api.json");
14098
+ if (!response.ok) return;
14099
+ const data = await response.json();
14100
+ this.cache.clear();
14101
+ for (const [providerId, provider] of Object.entries(data)) {
14102
+ const p = provider;
14103
+ if (!p.models) continue;
14104
+ for (const [, model] of Object.entries(p.models)) {
14105
+ if (!model.cost) continue;
14106
+ const cacheKey = this.getCacheKey(providerId, model.id);
14107
+ this.cache.set(cacheKey, {
14108
+ inputCostPer1M: model.cost.input ?? 0,
14109
+ outputCostPer1M: model.cost.output ?? 0
14110
+ });
14111
+ }
14112
+ }
14113
+ this.lastFetch = Date.now();
14114
+ } catch {}
14115
+ }
14116
+ async ensureFreshCache() {
14117
+ if (Date.now() - this.lastFetch < this.cacheTTL && this.cache.size > 0) return;
14118
+ if (!this.fetchPromise) this.fetchPromise = this.fetchPricingData().finally(() => {
14119
+ this.fetchPromise = null;
14120
+ });
14121
+ await this.fetchPromise;
14122
+ }
14123
+ async getModelPricing(provider, model) {
14124
+ await this.ensureFreshCache();
14125
+ return this.cache.get(this.getCacheKey(provider, model)) || null;
14126
+ }
14127
+ };
14128
+ const pricingProvider = new PricingProvider();
14129
+ /**
14130
+ * Creates cost tracking middleware that logs LLM requests with usage and cost data.
14131
+ *
14132
+ * Features:
14133
+ * - Tracks both streaming and non-streaming requests
14134
+ * - Calculates costs using models.dev pricing data
14135
+ * - Batches database writes for performance
14136
+ * - Adds x-llmops-request-id header for tracing
14137
+ */
14138
+ function createCostTrackingMiddleware(config$1 = {}) {
14139
+ const { enabled = true, trackErrors = true, flushIntervalMs = 2e3, debug = false } = config$1;
14140
+ const log = debug ? (msg) => __llmops_core.logger.debug(`[CostTracking] ${msg}`) : () => {};
14141
+ return async (c, next) => {
14142
+ if (!enabled) return next();
14143
+ const path$1 = c.req.path;
14144
+ if (!path$1.endsWith("/chat/completions") && !path$1.endsWith("/completions")) return next();
14145
+ const requestId = (0, node_crypto.randomUUID)();
14146
+ const startTime = Date.now();
14147
+ c.header("x-llmops-request-id", requestId);
14148
+ let body = {};
14149
+ let isStreaming = false;
14150
+ try {
14151
+ body = await c.req.raw.clone().json();
14152
+ isStreaming = body.stream === true;
14153
+ if (isStreaming) {
14154
+ body = ensureStreamUsageEnabled(body);
14155
+ const newHeaders = new Headers(c.req.raw.headers);
14156
+ const newRequest = new Request(c.req.raw.url, {
14157
+ method: c.req.raw.method,
14158
+ headers: newHeaders,
14159
+ body: JSON.stringify(body),
14160
+ duplex: "half"
14161
+ });
14162
+ Object.defineProperty(c.req, "raw", {
14163
+ value: newRequest,
14164
+ writable: true,
14165
+ configurable: true
14166
+ });
14167
+ c.req.bodyCache = {};
14168
+ }
14169
+ } catch {
14170
+ log("Failed to parse request body");
14171
+ }
14172
+ const context = {
14173
+ requestId,
14174
+ startTime,
14175
+ provider: "",
14176
+ model: body.model || "",
14177
+ configId: c.get("configId"),
14178
+ endpoint: path$1,
14179
+ isStreaming
14180
+ };
14181
+ c.set("__costTrackingContext", context);
14182
+ await next();
14183
+ const response = c.res;
14184
+ const statusCode = response.status;
14185
+ const latencyMs = Date.now() - startTime;
14186
+ const variantModel = c.get("variantModel") || context.model;
14187
+ let provider = "unknown";
14188
+ const llmopsConfigHeader = c.req.header("x-llmops-config");
14189
+ if (llmopsConfigHeader) try {
14190
+ provider = JSON.parse(llmopsConfigHeader).provider || provider;
14191
+ } catch {}
14192
+ if (!variantModel) {
14193
+ log(`Skipping request tracking - no model info`);
14194
+ return;
14195
+ }
14196
+ const db = c.get("db");
14197
+ const batchWriter = getGlobalBatchWriter({ batchInsertRequests: (requests) => db.batchInsertRequests(requests) }, {
14198
+ flushIntervalMs,
14199
+ debug
14200
+ });
14201
+ if (isStreaming && response.body) {
14202
+ const { response: wrappedResponse, usagePromise } = wrapStreamingResponse(response);
14203
+ c.res = wrappedResponse;
14204
+ usagePromise.then(async (usage) => {
14205
+ await processUsageAndLog({
14206
+ requestId,
14207
+ provider,
14208
+ model: variantModel,
14209
+ configId: c.get("configId"),
14210
+ variantId: c.get("variantId"),
14211
+ endpoint: context.endpoint,
14212
+ statusCode,
14213
+ latencyMs,
14214
+ isStreaming: true,
14215
+ usage: usage ? {
14216
+ promptTokens: usage.promptTokens,
14217
+ completionTokens: usage.completionTokens,
14218
+ totalTokens: usage.totalTokens,
14219
+ cachedTokens: usage.cachedTokens
14220
+ } : null,
14221
+ batchWriter,
14222
+ trackErrors,
14223
+ log
14224
+ });
14225
+ }).catch((err) => {
14226
+ __llmops_core.logger.error(`[CostTracking] Failed to process streaming usage: ${err}`);
14227
+ });
14228
+ } else {
14229
+ let usage = null;
14230
+ try {
14231
+ const responseBody = await response.clone().json();
14232
+ if (responseBody.usage) usage = {
14233
+ promptTokens: responseBody.usage.prompt_tokens || 0,
14234
+ completionTokens: responseBody.usage.completion_tokens || 0,
14235
+ totalTokens: responseBody.usage.total_tokens || 0,
14236
+ cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens
14237
+ };
14238
+ } catch {
14239
+ log("Failed to parse response body for usage");
14240
+ }
14241
+ await processUsageAndLog({
14242
+ requestId,
14243
+ provider,
14244
+ model: variantModel,
14245
+ configId: c.get("configId"),
14246
+ variantId: c.get("variantId"),
14247
+ endpoint: context.endpoint,
14248
+ statusCode,
14249
+ latencyMs,
14250
+ isStreaming: false,
14251
+ usage,
14252
+ batchWriter,
14253
+ trackErrors,
14254
+ log
14255
+ });
14256
+ }
14257
+ };
14258
+ }
14259
+ /**
14260
+ * Process usage data and log to batch writer
14261
+ */
14262
+ async function processUsageAndLog(params) {
14263
+ const { requestId, provider, model, configId, variantId, endpoint, statusCode, latencyMs, isStreaming, usage, batchWriter, trackErrors, log } = params;
14264
+ if (!trackErrors && statusCode >= 400) {
14265
+ log(`Skipping error response (${statusCode})`);
14266
+ return;
14267
+ }
14268
+ let cost = 0;
14269
+ let inputCost = 0;
14270
+ let outputCost = 0;
14271
+ if (usage && usage.promptTokens + usage.completionTokens > 0) try {
14272
+ const pricing = await pricingProvider.getModelPricing(provider, model);
14273
+ if (pricing) {
14274
+ const costResult = calculateCost({
14275
+ promptTokens: usage.promptTokens,
14276
+ completionTokens: usage.completionTokens
14277
+ }, pricing);
14278
+ cost = costResult.totalCost;
14279
+ inputCost = costResult.inputCost;
14280
+ outputCost = costResult.outputCost;
14281
+ log(`Calculated cost: ${cost} micro-dollars for ${provider}/${model}`);
14282
+ } else log(`No pricing found for ${provider}/${model}`);
14283
+ } catch (err) {
14284
+ __llmops_core.logger.error(`[CostTracking] Failed to calculate cost: ${err}`);
14285
+ }
14286
+ const requestData = {
14287
+ requestId,
14288
+ configId: configId || null,
14289
+ variantId: variantId || null,
14290
+ provider,
14291
+ model,
14292
+ promptTokens: usage?.promptTokens || 0,
14293
+ completionTokens: usage?.completionTokens || 0,
14294
+ totalTokens: usage?.totalTokens || 0,
14295
+ cachedTokens: usage?.cachedTokens || 0,
14296
+ cost,
14297
+ inputCost,
14298
+ outputCost,
14299
+ endpoint,
14300
+ statusCode,
14301
+ latencyMs,
14302
+ isStreaming,
14303
+ tags: {}
14304
+ };
14305
+ batchWriter.enqueue(requestData);
14306
+ log(`Enqueued request ${requestId} for logging`);
14307
+ }
14308
+
13596
14309
  //#endregion
13597
14310
  //#region src/server/handlers/genai/index.ts
13598
14311
  const app$2 = new hono.Hono();
13599
14312
  app$2.use("*", (0, hono_pretty_json.prettyJSON)()).get("/health", async (c) => {
13600
14313
  return c.json({ status: "healthy" });
13601
- }).use("*", requestValidator).use("*", createRequestGuardMiddleware()).use("*", createGatewayAdapterMiddleware()).route("/", __llmops_gateway.default).notFound((c) => c.json({ error: {
14314
+ }).use("*", requestValidator).use("*", createRequestGuardMiddleware()).use("*", createCostTrackingMiddleware()).use("*", createGatewayAdapterMiddleware()).route("/", __llmops_gateway.default).notFound((c) => c.json({ error: {
13602
14315
  message: "Not Found",
13603
14316
  type: "invalid_request_error"
13604
14317
  } }, 404)).onError((err, c) => {