@llmops/app 0.1.4 → 0.1.5-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/.vite/manifest.json +453 -180
  2. package/dist/assets/Form-B1rZ0Hvr.js +1 -0
  3. package/dist/assets/InternalBackdrop-CXjGP6a9.js +12 -0
  4. package/dist/assets/_environment-zVjmOhBI.js +1 -0
  5. package/dist/assets/_observability-CQ4Ll_WT.js +1 -0
  6. package/dist/assets/_observability-DPSeR4-h.css +1 -0
  7. package/dist/assets/{_settings-BK4LOT_O.js → _settings-Bv8acMq8.js} +1 -1
  8. package/dist/assets/{_tabs-spsrS7rN.js → _tabs-Bi-rsAVV.js} +1 -1
  9. package/dist/assets/{_tabs-DWvdcnbx.js → _tabs-oKfgxR5m.js} +1 -1
  10. package/dist/assets/_variant-Zg5HjNSN.js +30 -0
  11. package/dist/assets/{_variants-DiPU43aS.js → _variants-B36Lo8m_.js} +1 -1
  12. package/dist/assets/{area.css-d_0zeUpe.js → area.css-DTnbbu5M.js} +1 -1
  13. package/dist/assets/button-DftzA6-1.js +1 -0
  14. package/dist/assets/check-PaWYhFn6.js +1 -0
  15. package/dist/assets/chevron-down-BjDiaa62.js +1 -0
  16. package/dist/assets/{chevron-right-CS1Toe_9.js → chevron-right-Dz77YeP_.js} +1 -1
  17. package/dist/assets/configs-CtNuMmVR.js +1 -0
  18. package/dist/assets/{copy-BQ-62Ur4.js → copy-D-DZB-6Z.js} +1 -1
  19. package/dist/assets/costs-DwVSRTwW.js +1 -0
  20. package/dist/assets/en-US-C8ut0f5H.js +1 -0
  21. package/dist/assets/environments-BB95BiC0.js +1 -0
  22. package/dist/assets/formatDistance-Bok3-MB1.js +1 -0
  23. package/dist/assets/getDisabledMountTransitionStyles-CKUvhO4q.js +1 -0
  24. package/dist/assets/index-BCY9aD4r.js +16 -0
  25. package/dist/assets/index-BS3B2BG5.js +1 -0
  26. package/dist/assets/{index-DnQMGf2R.css → index-BiCR_Kj2.css} +1 -1
  27. package/dist/assets/{index-BEbVINNT.js → index-CnOYeKc1.js} +1 -1
  28. package/dist/assets/index-CpAxLUl2.js +1 -0
  29. package/dist/assets/index-D841new6.js +1 -0
  30. package/dist/assets/index-DvzXLQL6.js +4 -0
  31. package/dist/assets/index-xdhK5tU9.js +1 -0
  32. package/dist/assets/index.esm-COTq2pHX.js +1 -0
  33. package/dist/assets/info-box.css-HIaMvhbC.js +1 -0
  34. package/dist/assets/llmops-B3IIte87.css +1 -0
  35. package/dist/assets/llmops-C5uyvq6E.js +1 -0
  36. package/dist/assets/observability-C5jdcUSg.css +1 -0
  37. package/dist/assets/observability.css-dztiak5K.js +1 -0
  38. package/dist/assets/overview-BwCF8A1G.js +1 -0
  39. package/dist/assets/{plus-5oc_diIr.js → plus-C_L0BpbU.js} +1 -1
  40. package/dist/assets/popover-CSR0ctop.js +1 -0
  41. package/dist/assets/popupStateMapping-DkOpwBhG.js +1 -0
  42. package/dist/assets/requests-D8wE_E2M.js +1 -0
  43. package/dist/assets/{route-CpTvuTp9.js → route-CI77gokb.js} +1 -1
  44. package/dist/assets/{route-TG1bSyy8.js → route-CZ2yCnzN.js} +1 -1
  45. package/dist/assets/{route-CYSM3PkP.js → route-vfMzB8KR.js} +1 -1
  46. package/dist/assets/route-wrnLt3bn.js +1 -0
  47. package/dist/assets/secrets-B9u5jvFm.js +1 -0
  48. package/dist/assets/settings-24qWHZEq.js +1 -0
  49. package/dist/assets/settings-BLt538aO.js +1 -0
  50. package/dist/assets/{table-SKwX9qUc.js → table-xLf4iXV8.js} +1 -1
  51. package/dist/assets/{tabs.css-BvfbDFW8.js → tabs.css-fttLH8Xj.js} +1 -1
  52. package/dist/assets/{targeting-rpE0HiKJ.js → targeting-DZIwwaYV.js} +1 -1
  53. package/dist/assets/tooltip-BKoDpdAC.js +1 -0
  54. package/dist/assets/update-or-create-name-ChWLK2jI.js +1 -0
  55. package/dist/assets/useButton-Ct8hIyjz.js +1 -0
  56. package/dist/assets/{useConfigList-CN-eUdau.js → useConfigList-Du__Gu7q.js} +1 -1
  57. package/dist/assets/{useConfigVariants-CIAilMey.js → useConfigVariants-C8frrCcZ.js} +1 -1
  58. package/dist/assets/{useEnvironments-D798jdLi.js → useEnvironments-CYkrqAdO.js} +1 -1
  59. package/dist/assets/useFocus-Du1aH2j1.js +1 -0
  60. package/dist/assets/{useMutation-ppBqjQps.js → useMutation-VUB-GXkv.js} +1 -1
  61. package/dist/assets/usePopupAutoResize-ddPHogUJ.js +1 -0
  62. package/dist/assets/useRole-DP91D1OX.js +1 -0
  63. package/dist/assets/useSetTargeting-D1rPM8vz.js +1 -0
  64. package/dist/assets/useSyncedFloatingRootContext-BzuhbdSw.js +1 -0
  65. package/dist/assets/{useTargetingRules-Bi-hIPWR.js → useTargetingRules-DS57orU0.js} +1 -1
  66. package/dist/assets/useValueChanged-vz8uKgCk.js +1 -0
  67. package/dist/assets/{user-profile-CvRPKkVY.js → user-profile-Bt3_D9Gr.js} +1 -1
  68. package/dist/assets/{variants-CZaBWam0.js → variants-CYXSNKzX.js} +1 -1
  69. package/dist/assets/variants.css-BjGKD3Nv.js +1 -0
  70. package/dist/assets/{workspace-general-umhuExNi.js → workspace-general-BRMpUqb-.js} +1 -1
  71. package/dist/index.cjs +723 -10
  72. package/dist/index.mjs +724 -11
  73. package/package.json +3 -3
  74. package/dist/assets/Form-B1Xes2HX.js +0 -1
  75. package/dist/assets/_environment-BHroMRce.js +0 -1
  76. package/dist/assets/_variant-i8-2ZjCg.js +0 -30
  77. package/dist/assets/button-D46pme1q.js +0 -1
  78. package/dist/assets/check-Cp187nGx.js +0 -1
  79. package/dist/assets/configs-BWQEZkYp.js +0 -1
  80. package/dist/assets/environments-CxEwSvmp.js +0 -1
  81. package/dist/assets/formatDistance-CEmiEHX5.js +0 -1
  82. package/dist/assets/index-CadPnEMv.js +0 -1
  83. package/dist/assets/index-D3G5uHLZ.js +0 -1
  84. package/dist/assets/index-KuidudiV.js +0 -1
  85. package/dist/assets/index-nHWjhrFW.js +0 -16
  86. package/dist/assets/index.esm-DJMnkOur.js +0 -1
  87. package/dist/assets/info-box.css-DyjK40Ax.js +0 -1
  88. package/dist/assets/new-config-state-B8sMe-TC.css +0 -1
  89. package/dist/assets/new-config-state.css-zldOiAQS.js +0 -1
  90. package/dist/assets/secrets-dM6OVjcE.js +0 -1
  91. package/dist/assets/settings-C74FVCr5.js +0 -1
  92. package/dist/assets/settings-D4fugFLx.js +0 -1
  93. package/dist/assets/update-or-create-name-Cd1zyalO.js +0 -4
  94. package/dist/assets/useButton-DfUAk02P.js +0 -1
  95. package/dist/assets/useRole-D2HhbdS4.js +0 -1
  96. package/dist/assets/useSetTargeting-C3RG0vdV.js +0 -1
  97. package/dist/assets/useValueChanged-2Bqw2vWT.js +0 -12
  98. package/dist/assets/variants.css-C4fJhzPe.js +0 -1
package/dist/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Hono } from "hono";
2
- import { SupportedProviders, createDataLayer, generateId, validateLLMOpsConfig, variantJsonDataSchema } from "@llmops/core";
2
+ import { SupportedProviders, createDataLayer, generateId, logger, validateLLMOpsConfig, variantJsonDataSchema } from "@llmops/core";
3
3
  import reactServer from "react-dom/server";
4
4
  import { Fragment, jsx, jsxs } from "react/jsx-runtime";
5
5
  import { fileURLToPath } from "node:url";
@@ -14,6 +14,7 @@ import { zValidator } from "@hono/zod-validator";
14
14
  import { prettyJSON } from "hono/pretty-json";
15
15
  import { HTTPException } from "hono/http-exception";
16
16
  import { cors } from "hono/cors";
17
+ import { randomUUID } from "node:crypto";
17
18
  import gateway from "@llmops/gateway";
18
19
  import { env } from "node:process";
19
20
  import { createDatabaseFromConnection } from "@llmops/core/db";
@@ -36,7 +37,7 @@ var __export = (all, symbols) => {
36
37
 
37
38
  //#endregion
38
39
  //#region src/client/index.tsx?url
39
- var client_default = "/assets/index-nHWjhrFW.js";
40
+ var client_default = "/assets/index-BCY9aD4r.js";
40
41
 
41
42
  //#endregion
42
43
  //#region src/client/styles/styles.css?url
@@ -12682,11 +12683,13 @@ var zod_default = external_exports;
12682
12683
  //#endregion
12683
12684
  //#region src/server/lib/zv.ts
12684
12685
  const zv = (target, schema) => zValidator(target, schema, (result, c) => {
12685
- if (!result.success)
12686
- /**
12687
- * @todo handle error properly
12688
- */
12689
- return c.json({ message: "Bad Request" }, 400);
12686
+ if (!result.success) return c.json({
12687
+ message: "Bad Request",
12688
+ errors: result.error.issues.map((issue$1) => ({
12689
+ path: issue$1.path.join("."),
12690
+ message: issue$1.message
12691
+ }))
12692
+ }, 400);
12690
12693
  });
12691
12694
 
12692
12695
  //#endregion
@@ -12713,6 +12716,225 @@ const internalServerError = (message, code) => {
12713
12716
  };
12714
12717
  };
12715
12718
 
12719
+ //#endregion
12720
+ //#region src/server/handlers/analytics/index.ts
12721
+ /**
12722
+ * Convert micro-dollars to formatted dollar string
12723
+ */
12724
+ function formatCost(microDollars, decimals = 6) {
12725
+ return `$${(microDollars / 1e6).toFixed(decimals)}`;
12726
+ }
12727
+ /**
12728
+ * Parse ISO date string to Date object
12729
+ * Accepts both ISO strings (2026-01-02T10:30:00.000Z) and date-only strings (2026-01-02)
12730
+ */
12731
+ function parseDate(dateStr) {
12732
+ const date$4 = new Date(dateStr);
12733
+ if (isNaN(date$4.getTime())) throw new Error(`Invalid date string: ${dateStr}`);
12734
+ return date$4;
12735
+ }
12736
+ /**
12737
+ * Parse date string for start of range
12738
+ * - ISO strings are used as-is
12739
+ * - Date-only strings (YYYY-MM-DD) are treated as start of day UTC
12740
+ */
12741
+ function parseStartDate(dateStr) {
12742
+ return parseDate(dateStr);
12743
+ }
12744
+ /**
12745
+ * Parse date string for end of range
12746
+ * - ISO strings are used as-is
12747
+ * - Date-only strings (YYYY-MM-DD) are set to end of day (23:59:59.999 UTC)
12748
+ */
12749
+ function parseEndDate(dateStr) {
12750
+ const date$4 = parseDate(dateStr);
12751
+ if (!dateStr.includes("T")) date$4.setUTCHours(23, 59, 59, 999);
12752
+ return date$4;
12753
+ }
12754
+ /**
12755
+ * Zod schema for ISO date strings
12756
+ * Validates that the string can be parsed as a valid date
12757
+ */
12758
+ const isoDateString = zod_default.string().refine((val) => !isNaN(new Date(val).getTime()), { message: "Invalid date format. Expected ISO 8601 string (e.g., 2026-01-02T10:30:00.000Z) or date string (e.g., 2026-01-02)" });
12759
+ /**
12760
+ * Date range query schema
12761
+ * Accepts ISO 8601 date strings or date-only strings (YYYY-MM-DD)
12762
+ * - startDate: Used as-is for ISO strings, start of day for date-only
12763
+ * - endDate: Used as-is for ISO strings, end of day (23:59:59.999) for date-only
12764
+ */
12765
+ const dateRangeSchema = zod_default.object({
12766
+ startDate: isoDateString.transform(parseStartDate),
12767
+ endDate: isoDateString.transform(parseEndDate)
12768
+ });
12769
+ /**
12770
+ * Analytics API routes for cost and usage tracking
12771
+ */
12772
+ const app$10 = new Hono().get("/requests", zv("query", zod_default.object({
12773
+ limit: zod_default.string().transform(Number).optional(),
12774
+ offset: zod_default.string().transform(Number).optional(),
12775
+ configId: zod_default.string().uuid().optional(),
12776
+ provider: zod_default.string().optional(),
12777
+ model: zod_default.string().optional(),
12778
+ startDate: isoDateString.optional(),
12779
+ endDate: isoDateString.optional()
12780
+ })), async (c) => {
12781
+ const db = c.get("db");
12782
+ const query = c.req.valid("query");
12783
+ try {
12784
+ const requests = await db.listRequests({
12785
+ limit: query.limit,
12786
+ offset: query.offset,
12787
+ configId: query.configId,
12788
+ provider: query.provider,
12789
+ model: query.model,
12790
+ startDate: query.startDate ? parseStartDate(query.startDate) : void 0,
12791
+ endDate: query.endDate ? parseEndDate(query.endDate) : void 0
12792
+ });
12793
+ return c.json(successResponse(requests, 200));
12794
+ } catch (error$45) {
12795
+ console.error("Error fetching requests:", error$45);
12796
+ return c.json(internalServerError("Failed to fetch requests", 500), 500);
12797
+ }
12798
+ }).get("/requests/:requestId", zv("param", zod_default.object({ requestId: zod_default.string().uuid() })), async (c) => {
12799
+ const db = c.get("db");
12800
+ const { requestId } = c.req.valid("param");
12801
+ try {
12802
+ const request = await db.getRequestByRequestId(requestId);
12803
+ if (!request) return c.json({ error: "Request not found" }, 404);
12804
+ return c.json(successResponse(request, 200));
12805
+ } catch (error$45) {
12806
+ console.error("Error fetching request:", error$45);
12807
+ return c.json(internalServerError("Failed to fetch request", 500), 500);
12808
+ }
12809
+ }).get("/costs/total", zv("query", dateRangeSchema), async (c) => {
12810
+ const db = c.get("db");
12811
+ const { startDate, endDate } = c.req.valid("query");
12812
+ try {
12813
+ const data = await db.getTotalCost({
12814
+ startDate,
12815
+ endDate
12816
+ });
12817
+ if (!data) return c.json(successResponse({
12818
+ totalCost: 0,
12819
+ totalCostFormatted: "$0.000000",
12820
+ totalInputCost: 0,
12821
+ totalOutputCost: 0,
12822
+ totalPromptTokens: 0,
12823
+ totalCompletionTokens: 0,
12824
+ totalTokens: 0,
12825
+ requestCount: 0
12826
+ }, 200));
12827
+ return c.json(successResponse({
12828
+ ...data,
12829
+ totalCostFormatted: formatCost(data.totalCost),
12830
+ totalInputCostFormatted: formatCost(data.totalInputCost),
12831
+ totalOutputCostFormatted: formatCost(data.totalOutputCost)
12832
+ }, 200));
12833
+ } catch (error$45) {
12834
+ console.error("Error fetching total costs:", error$45);
12835
+ return c.json(internalServerError("Failed to fetch total costs", 500), 500);
12836
+ }
12837
+ }).get("/costs/by-model", zv("query", dateRangeSchema), async (c) => {
12838
+ const db = c.get("db");
12839
+ const { startDate, endDate } = c.req.valid("query");
12840
+ try {
12841
+ const data = await db.getCostByModel({
12842
+ startDate,
12843
+ endDate
12844
+ });
12845
+ return c.json(successResponse(data, 200));
12846
+ } catch (error$45) {
12847
+ console.error("Error fetching costs by model:", error$45);
12848
+ return c.json(internalServerError("Failed to fetch costs by model", 500), 500);
12849
+ }
12850
+ }).get("/costs/by-provider", zv("query", dateRangeSchema), async (c) => {
12851
+ const db = c.get("db");
12852
+ const { startDate, endDate } = c.req.valid("query");
12853
+ try {
12854
+ const data = await db.getCostByProvider({
12855
+ startDate,
12856
+ endDate
12857
+ });
12858
+ return c.json(successResponse(data, 200));
12859
+ } catch (error$45) {
12860
+ console.error("Error fetching costs by provider:", error$45);
12861
+ return c.json(internalServerError("Failed to fetch costs by provider", 500), 500);
12862
+ }
12863
+ }).get("/costs/by-config", zv("query", dateRangeSchema), async (c) => {
12864
+ const db = c.get("db");
12865
+ const { startDate, endDate } = c.req.valid("query");
12866
+ try {
12867
+ const data = await db.getCostByConfig({
12868
+ startDate,
12869
+ endDate
12870
+ });
12871
+ return c.json(successResponse(data, 200));
12872
+ } catch (error$45) {
12873
+ console.error("Error fetching costs by config:", error$45);
12874
+ return c.json(internalServerError("Failed to fetch costs by config", 500), 500);
12875
+ }
12876
+ }).get("/costs/daily", zv("query", dateRangeSchema), async (c) => {
12877
+ const db = c.get("db");
12878
+ const { startDate, endDate } = c.req.valid("query");
12879
+ try {
12880
+ const data = await db.getDailyCosts({
12881
+ startDate,
12882
+ endDate
12883
+ });
12884
+ return c.json(successResponse(data, 200));
12885
+ } catch (error$45) {
12886
+ console.error("Error fetching daily costs:", error$45);
12887
+ return c.json(internalServerError("Failed to fetch daily costs", 500), 500);
12888
+ }
12889
+ }).get("/costs/summary", zv("query", dateRangeSchema.extend({ groupBy: zod_default.enum([
12890
+ "day",
12891
+ "hour",
12892
+ "model",
12893
+ "provider",
12894
+ "config"
12895
+ ]).optional() })), async (c) => {
12896
+ const db = c.get("db");
12897
+ const { startDate, endDate, groupBy } = c.req.valid("query");
12898
+ try {
12899
+ const data = await db.getCostSummary({
12900
+ startDate,
12901
+ endDate,
12902
+ groupBy
12903
+ });
12904
+ return c.json(successResponse(data, 200));
12905
+ } catch (error$45) {
12906
+ console.error("Error fetching cost summary:", error$45);
12907
+ return c.json(internalServerError("Failed to fetch cost summary", 500), 500);
12908
+ }
12909
+ }).get("/stats", zv("query", dateRangeSchema), async (c) => {
12910
+ const db = c.get("db");
12911
+ const { startDate, endDate } = c.req.valid("query");
12912
+ try {
12913
+ const data = await db.getRequestStats({
12914
+ startDate,
12915
+ endDate
12916
+ });
12917
+ if (!data) return c.json(successResponse({
12918
+ totalRequests: 0,
12919
+ successfulRequests: 0,
12920
+ failedRequests: 0,
12921
+ streamingRequests: 0,
12922
+ avgLatencyMs: 0,
12923
+ maxLatencyMs: 0,
12924
+ minLatencyMs: 0,
12925
+ successRate: 0
12926
+ }, 200));
12927
+ return c.json(successResponse({
12928
+ ...data,
12929
+ successRate: data.totalRequests > 0 ? (data.successfulRequests / data.totalRequests * 100).toFixed(2) : 0
12930
+ }, 200));
12931
+ } catch (error$45) {
12932
+ console.error("Error fetching request stats:", error$45);
12933
+ return c.json(internalServerError("Failed to fetch request stats", 500), 500);
12934
+ }
12935
+ });
12936
+ var analytics_default = app$10;
12937
+
12716
12938
  //#endregion
12717
12939
  //#region src/server/handlers/configs/index.ts
12718
12940
  const app$9 = new Hono().post("/", zv("json", zod_default.object({ name: zod_default.string().min(1) })), async (c) => {
@@ -13370,7 +13592,7 @@ const app$3 = new Hono().use("*", async (c, next) => {
13370
13592
  error: "Auth middleware not configured",
13371
13593
  message: `Auth type "${config$1.auth.type}" requires @llmops/enterprise middleware. Either use basicAuth() from @llmops/sdk or install @llmops/enterprise and add the auth middleware.`
13372
13594
  }, 501);
13373
- }).route("/configs", configs_default).route("/environments", environments_default).route("/providers", providers_default).route("/targeting", targeting_default).route("/variants", variants_default).route("/workspace-settings", workspace_settings_default);
13595
+ }).route("/analytics", analytics_default).route("/configs", configs_default).route("/environments", environments_default).route("/providers", providers_default).route("/targeting", targeting_default).route("/variants", variants_default).route("/workspace-settings", workspace_settings_default);
13374
13596
  var v1_default = app$3;
13375
13597
 
13376
13598
  //#endregion
@@ -13540,7 +13762,7 @@ const createGatewayAdapterMiddleware = () => {
13540
13762
  if (method === "POST" && contentType === "application/json" && (path.endsWith("/chat/completions") || path.endsWith("/completions"))) {
13541
13763
  const mergedBody = mergeChatCompletionBody(await c.req.json(), variantConfig, data.modelName);
13542
13764
  const newHeaders = new Headers(c.req.raw.headers);
13543
- newHeaders.set("x-portkey-config", JSON.stringify(portkeyConfig));
13765
+ newHeaders.set("x-llmops-config", JSON.stringify(portkeyConfig));
13544
13766
  const newRequest = new Request(c.req.raw.url, {
13545
13767
  method: c.req.raw.method,
13546
13768
  headers: newHeaders,
@@ -13553,9 +13775,11 @@ const createGatewayAdapterMiddleware = () => {
13553
13775
  configurable: true
13554
13776
  });
13555
13777
  c.req.bodyCache = {};
13556
- } else c.req.raw.headers.set("x-portkey-config", JSON.stringify(portkeyConfig));
13778
+ } else c.req.raw.headers.set("x-llmops-config", JSON.stringify(portkeyConfig));
13557
13779
  c.set("variantConfig", variantConfig);
13558
13780
  c.set("variantModel", variantConfig.model || data.modelName);
13781
+ c.set("configId", data.configId);
13782
+ c.set("variantId", data.variantId);
13559
13783
  await next();
13560
13784
  } catch (error$45) {
13561
13785
  console.error("Gateway adapter error:", error$45);
@@ -13567,12 +13791,501 @@ const createGatewayAdapterMiddleware = () => {
13567
13791
  };
13568
13792
  };
13569
13793
 
13794
+ //#endregion
13795
+ //#region src/server/lib/streamingCostExtractor.ts
13796
+ /**
13797
+ * Creates a TransformStream that passes through SSE data while extracting usage info.
13798
+ *
13799
+ * @param onComplete - Callback invoked when stream completes with extracted usage
13800
+ * @returns TransformStream that passes through the original stream
13801
+ *
13802
+ * @example
13803
+ * ```typescript
13804
+ * const { stream, usagePromise } = createStreamingCostExtractor();
13805
+ *
13806
+ * // Pipe the response through the extractor
13807
+ * const transformedResponse = originalResponse.body.pipeThrough(stream);
13808
+ *
13809
+ * // Later, get the usage
13810
+ * const usage = await usagePromise;
13811
+ * if (usage) {
13812
+ * console.log(`Tokens used: ${usage.totalTokens}`);
13813
+ * }
13814
+ * ```
13815
+ */
13816
+ function createStreamingCostExtractor() {
13817
+ let extractedUsage = null;
13818
+ let buffer = "";
13819
+ let resolveUsage;
13820
+ const usagePromise = new Promise((resolve) => {
13821
+ resolveUsage = resolve;
13822
+ });
13823
+ const decoder = new TextDecoder();
13824
+ return {
13825
+ stream: new TransformStream({
13826
+ transform(chunk, controller) {
13827
+ controller.enqueue(chunk);
13828
+ const text = decoder.decode(chunk, { stream: true });
13829
+ buffer += text;
13830
+ const messages = buffer.split("\n\n");
13831
+ buffer = messages.pop() || "";
13832
+ for (const message of messages) {
13833
+ const trimmed = message.trim();
13834
+ if (!trimmed) continue;
13835
+ if (!trimmed.startsWith("data:")) continue;
13836
+ const jsonPart = trimmed.slice(5).trim();
13837
+ if (jsonPart === "[DONE]") continue;
13838
+ try {
13839
+ const parsed = JSON.parse(jsonPart);
13840
+ if (parsed.usage) extractedUsage = {
13841
+ promptTokens: parsed.usage.prompt_tokens ?? 0,
13842
+ completionTokens: parsed.usage.completion_tokens ?? 0,
13843
+ totalTokens: parsed.usage.total_tokens ?? 0,
13844
+ cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
13845
+ };
13846
+ } catch {}
13847
+ }
13848
+ },
13849
+ flush(controller) {
13850
+ if (buffer.trim()) {
13851
+ const trimmed = buffer.trim();
13852
+ if (trimmed.startsWith("data:")) {
13853
+ const jsonPart = trimmed.slice(5).trim();
13854
+ if (jsonPart !== "[DONE]") try {
13855
+ const parsed = JSON.parse(jsonPart);
13856
+ if (parsed.usage) extractedUsage = {
13857
+ promptTokens: parsed.usage.prompt_tokens ?? 0,
13858
+ completionTokens: parsed.usage.completion_tokens ?? 0,
13859
+ totalTokens: parsed.usage.total_tokens ?? 0,
13860
+ cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens
13861
+ };
13862
+ } catch {}
13863
+ }
13864
+ }
13865
+ resolveUsage(extractedUsage);
13866
+ }
13867
+ }),
13868
+ usagePromise
13869
+ };
13870
+ }
13871
+ /**
13872
+ * Wraps a Response with a streaming body to extract usage information.
13873
+ *
13874
+ * @param response - Original streaming Response
13875
+ * @returns Object with transformed response and promise for usage data
13876
+ *
13877
+ * @example
13878
+ * ```typescript
13879
+ * const result = wrapStreamingResponse(originalResponse);
13880
+ *
13881
+ * // Return the transformed response to the client
13882
+ * return result.response;
13883
+ *
13884
+ * // After response is sent, get usage for cost tracking
13885
+ * result.usagePromise.then((usage) => {
13886
+ * if (usage) {
13887
+ * trackCost(usage);
13888
+ * }
13889
+ * });
13890
+ * ```
13891
+ */
13892
+ function wrapStreamingResponse(response) {
13893
+ if (!response.body) return {
13894
+ response,
13895
+ usagePromise: Promise.resolve(null)
13896
+ };
13897
+ const { stream, usagePromise } = createStreamingCostExtractor();
13898
+ const transformedBody = response.body.pipeThrough(stream);
13899
+ return {
13900
+ response: new Response(transformedBody, {
13901
+ status: response.status,
13902
+ statusText: response.statusText,
13903
+ headers: response.headers
13904
+ }),
13905
+ usagePromise
13906
+ };
13907
+ }
13908
+ /**
13909
+ * Ensures stream_options.include_usage is set for cost tracking
13910
+ * Modifies the body in place.
13911
+ *
13912
+ * @param body - Request body (will be modified)
13913
+ * @returns Modified body with include_usage enabled
13914
+ */
13915
+ function ensureStreamUsageEnabled(body) {
13916
+ if (body.stream === true) body.stream_options = {
13917
+ ...body.stream_options || {},
13918
+ include_usage: true
13919
+ };
13920
+ return body;
13921
+ }
13922
+
13923
+ //#endregion
13924
+ //#region src/server/services/batchWriter.ts
13925
+ /**
13926
+ * Creates a BatchWriter instance
13927
+ *
13928
+ * @example
13929
+ * ```typescript
13930
+ * const writer = createBatchWriter(
13931
+ * { batchInsertRequests: db.batchInsertRequests },
13932
+ * { flushIntervalMs: 2000 }
13933
+ * );
13934
+ *
13935
+ * // Enqueue a request
13936
+ * writer.enqueue({
13937
+ * requestId: 'req-123',
13938
+ * provider: 'openai',
13939
+ * model: 'gpt-4o',
13940
+ * // ... other fields
13941
+ * });
13942
+ *
13943
+ * // When shutting down
13944
+ * await writer.stop();
13945
+ * ```
13946
+ */
13947
+ function createBatchWriter(deps, config$1 = {}) {
13948
+ const { flushIntervalMs = 2e3, maxBatchSize = 100, debug = false } = config$1;
13949
+ let queue = [];
13950
+ let flushTimer = null;
13951
+ let running = false;
13952
+ let flushing = false;
13953
+ const log = debug ? (msg) => logger.debug(msg) : () => {};
13954
+ /**
13955
+ * Flush all queued requests to the database
13956
+ */
13957
+ async function flush() {
13958
+ if (flushing || queue.length === 0) return;
13959
+ flushing = true;
13960
+ const batch = queue;
13961
+ queue = [];
13962
+ try {
13963
+ log(`[BatchWriter] Flushing ${batch.length} requests`);
13964
+ log(`[BatchWriter] Flushed ${(await deps.batchInsertRequests(batch)).count} requests successfully`);
13965
+ } catch (error$45) {
13966
+ const errorMsg = error$45 instanceof Error ? error$45.message : String(error$45);
13967
+ logger.error(`[BatchWriter] Flush failed, re-queuing requests: ${errorMsg}`);
13968
+ queue = [...batch, ...queue];
13969
+ } finally {
13970
+ flushing = false;
13971
+ }
13972
+ }
13973
+ /**
13974
+ * Start the periodic flush timer
13975
+ */
13976
+ function start() {
13977
+ if (running) return;
13978
+ running = true;
13979
+ flushTimer = setInterval(() => {
13980
+ flush().catch((err) => {
13981
+ const errorMsg = err instanceof Error ? err.message : String(err);
13982
+ logger.error(`[BatchWriter] Periodic flush error: ${errorMsg}`);
13983
+ });
13984
+ }, flushIntervalMs);
13985
+ log(`[BatchWriter] Started with ${flushIntervalMs}ms flush interval`);
13986
+ }
13987
+ /**
13988
+ * Stop the batch writer and flush remaining items
13989
+ */
13990
+ async function stop() {
13991
+ if (!running) return;
13992
+ running = false;
13993
+ if (flushTimer) {
13994
+ clearInterval(flushTimer);
13995
+ flushTimer = null;
13996
+ }
13997
+ await flush();
13998
+ log("[BatchWriter] Stopped");
13999
+ }
14000
+ /**
14001
+ * Add a request to the batch queue
14002
+ */
14003
+ function enqueue(request) {
14004
+ queue.push(request);
14005
+ log(`[BatchWriter] Enqueued request ${request.requestId}, queue size: ${queue.length}`);
14006
+ if (!running) start();
14007
+ if (queue.length >= maxBatchSize) {
14008
+ log(`[BatchWriter] Max batch size reached, forcing flush`);
14009
+ flush().catch((err) => {
14010
+ const errorMsg = err instanceof Error ? err.message : String(err);
14011
+ logger.error(`[BatchWriter] Forced flush error: ${errorMsg}`);
14012
+ });
14013
+ }
14014
+ }
14015
+ return {
14016
+ enqueue,
14017
+ flush,
14018
+ stop,
14019
+ queueLength: () => queue.length,
14020
+ isRunning: () => running
14021
+ };
14022
+ }
14023
+ /**
14024
+ * Global singleton instance
14025
+ * Lazily initialized when first accessed
14026
+ */
14027
+ let globalWriter = null;
14028
+ /**
14029
+ * Get or create the global BatchWriter instance
14030
+ *
14031
+ * @param deps - Database dependencies (required on first call)
14032
+ * @param config - Optional configuration
14033
+ * @returns The global BatchWriter instance
14034
+ */
14035
+ function getGlobalBatchWriter(deps, config$1) {
14036
+ if (!globalWriter) {
14037
+ if (!deps) throw new Error("BatchWriter dependencies required on first initialization");
14038
+ globalWriter = createBatchWriter(deps, config$1);
14039
+ }
14040
+ return globalWriter;
14041
+ }
14042
+
14043
+ //#endregion
14044
+ //#region src/server/middlewares/costTracking.ts
14045
+ /**
14046
+ * Calculate cost in micro-dollars
14047
+ * 1 dollar = 1,000,000 micro-dollars
14048
+ */
14049
+ function calculateCost(usage, pricing) {
14050
+ const inputCost = Math.round(usage.promptTokens * pricing.inputCostPer1M);
14051
+ const outputCost = Math.round(usage.completionTokens * pricing.outputCostPer1M);
14052
+ return {
14053
+ inputCost,
14054
+ outputCost,
14055
+ totalCost: inputCost + outputCost
14056
+ };
14057
+ }
14058
+ /**
14059
+ * Simple pricing provider that fetches from models.dev
14060
+ */
14061
+ var PricingProvider = class {
14062
+ cache = /* @__PURE__ */ new Map();
14063
+ lastFetch = 0;
14064
+ cacheTTL = 300 * 1e3;
14065
+ fetchPromise = null;
14066
+ getCacheKey(provider, model) {
14067
+ return `${provider.toLowerCase()}:${model.toLowerCase()}`;
14068
+ }
14069
+ async fetchPricingData() {
14070
+ try {
14071
+ const response = await fetch("https://models.dev/api.json");
14072
+ if (!response.ok) return;
14073
+ const data = await response.json();
14074
+ this.cache.clear();
14075
+ for (const [providerId, provider] of Object.entries(data)) {
14076
+ const p = provider;
14077
+ if (!p.models) continue;
14078
+ for (const [, model] of Object.entries(p.models)) {
14079
+ if (!model.cost) continue;
14080
+ const cacheKey = this.getCacheKey(providerId, model.id);
14081
+ this.cache.set(cacheKey, {
14082
+ inputCostPer1M: model.cost.input ?? 0,
14083
+ outputCostPer1M: model.cost.output ?? 0
14084
+ });
14085
+ }
14086
+ }
14087
+ this.lastFetch = Date.now();
14088
+ } catch {}
14089
+ }
14090
+ async ensureFreshCache() {
14091
+ if (Date.now() - this.lastFetch < this.cacheTTL && this.cache.size > 0) return;
14092
+ if (!this.fetchPromise) this.fetchPromise = this.fetchPricingData().finally(() => {
14093
+ this.fetchPromise = null;
14094
+ });
14095
+ await this.fetchPromise;
14096
+ }
14097
+ async getModelPricing(provider, model) {
14098
+ await this.ensureFreshCache();
14099
+ return this.cache.get(this.getCacheKey(provider, model)) || null;
14100
+ }
14101
+ };
14102
+ const pricingProvider = new PricingProvider();
14103
+ /**
14104
+ * Creates cost tracking middleware that logs LLM requests with usage and cost data.
14105
+ *
14106
+ * Features:
14107
+ * - Tracks both streaming and non-streaming requests
14108
+ * - Calculates costs using models.dev pricing data
14109
+ * - Batches database writes for performance
14110
+ * - Adds x-llmops-request-id header for tracing
14111
+ */
14112
+ function createCostTrackingMiddleware(config$1 = {}) {
14113
+ const { enabled = true, trackErrors = true, flushIntervalMs = 2e3, debug = false } = config$1;
14114
+ const log = debug ? (msg) => logger.debug(`[CostTracking] ${msg}`) : () => {};
14115
+ return async (c, next) => {
14116
+ if (!enabled) return next();
14117
+ const path = c.req.path;
14118
+ if (!path.endsWith("/chat/completions") && !path.endsWith("/completions")) return next();
14119
+ const requestId = randomUUID();
14120
+ const startTime = Date.now();
14121
+ c.header("x-llmops-request-id", requestId);
14122
+ let body = {};
14123
+ let isStreaming = false;
14124
+ try {
14125
+ body = await c.req.raw.clone().json();
14126
+ isStreaming = body.stream === true;
14127
+ if (isStreaming) {
14128
+ body = ensureStreamUsageEnabled(body);
14129
+ const newHeaders = new Headers(c.req.raw.headers);
14130
+ const newRequest = new Request(c.req.raw.url, {
14131
+ method: c.req.raw.method,
14132
+ headers: newHeaders,
14133
+ body: JSON.stringify(body),
14134
+ duplex: "half"
14135
+ });
14136
+ Object.defineProperty(c.req, "raw", {
14137
+ value: newRequest,
14138
+ writable: true,
14139
+ configurable: true
14140
+ });
14141
+ c.req.bodyCache = {};
14142
+ }
14143
+ } catch {
14144
+ log("Failed to parse request body");
14145
+ }
14146
+ const context = {
14147
+ requestId,
14148
+ startTime,
14149
+ provider: "",
14150
+ model: body.model || "",
14151
+ configId: c.get("configId"),
14152
+ endpoint: path,
14153
+ isStreaming
14154
+ };
14155
+ c.set("__costTrackingContext", context);
14156
+ await next();
14157
+ const response = c.res;
14158
+ const statusCode = response.status;
14159
+ const latencyMs = Date.now() - startTime;
14160
+ const variantModel = c.get("variantModel") || context.model;
14161
+ let provider = "unknown";
14162
+ const llmopsConfigHeader = c.req.header("x-llmops-config");
14163
+ if (llmopsConfigHeader) try {
14164
+ provider = JSON.parse(llmopsConfigHeader).provider || provider;
14165
+ } catch {}
14166
+ if (!variantModel) {
14167
+ log(`Skipping request tracking - no model info`);
14168
+ return;
14169
+ }
14170
+ const db = c.get("db");
14171
+ const batchWriter = getGlobalBatchWriter({ batchInsertRequests: (requests) => db.batchInsertRequests(requests) }, {
14172
+ flushIntervalMs,
14173
+ debug
14174
+ });
14175
+ if (isStreaming && response.body) {
14176
+ const { response: wrappedResponse, usagePromise } = wrapStreamingResponse(response);
14177
+ c.res = wrappedResponse;
14178
+ usagePromise.then(async (usage) => {
14179
+ await processUsageAndLog({
14180
+ requestId,
14181
+ provider,
14182
+ model: variantModel,
14183
+ configId: c.get("configId"),
14184
+ variantId: c.get("variantId"),
14185
+ endpoint: context.endpoint,
14186
+ statusCode,
14187
+ latencyMs,
14188
+ isStreaming: true,
14189
+ usage: usage ? {
14190
+ promptTokens: usage.promptTokens,
14191
+ completionTokens: usage.completionTokens,
14192
+ totalTokens: usage.totalTokens,
14193
+ cachedTokens: usage.cachedTokens
14194
+ } : null,
14195
+ batchWriter,
14196
+ trackErrors,
14197
+ log
14198
+ });
14199
+ }).catch((err) => {
14200
+ logger.error(`[CostTracking] Failed to process streaming usage: ${err}`);
14201
+ });
14202
+ } else {
14203
+ let usage = null;
14204
+ try {
14205
+ const responseBody = await response.clone().json();
14206
+ if (responseBody.usage) usage = {
14207
+ promptTokens: responseBody.usage.prompt_tokens || 0,
14208
+ completionTokens: responseBody.usage.completion_tokens || 0,
14209
+ totalTokens: responseBody.usage.total_tokens || 0,
14210
+ cachedTokens: responseBody.usage.prompt_tokens_details?.cached_tokens
14211
+ };
14212
+ } catch {
14213
+ log("Failed to parse response body for usage");
14214
+ }
14215
+ await processUsageAndLog({
14216
+ requestId,
14217
+ provider,
14218
+ model: variantModel,
14219
+ configId: c.get("configId"),
14220
+ variantId: c.get("variantId"),
14221
+ endpoint: context.endpoint,
14222
+ statusCode,
14223
+ latencyMs,
14224
+ isStreaming: false,
14225
+ usage,
14226
+ batchWriter,
14227
+ trackErrors,
14228
+ log
14229
+ });
14230
+ }
14231
+ };
14232
+ }
14233
+ /**
14234
+ * Process usage data and log to batch writer
14235
+ */
14236
+ async function processUsageAndLog(params) {
14237
+ const { requestId, provider, model, configId, variantId, endpoint, statusCode, latencyMs, isStreaming, usage, batchWriter, trackErrors, log } = params;
14238
+ if (!trackErrors && statusCode >= 400) {
14239
+ log(`Skipping error response (${statusCode})`);
14240
+ return;
14241
+ }
14242
+ let cost = 0;
14243
+ let inputCost = 0;
14244
+ let outputCost = 0;
14245
+ if (usage && usage.promptTokens + usage.completionTokens > 0) try {
14246
+ const pricing = await pricingProvider.getModelPricing(provider, model);
14247
+ if (pricing) {
14248
+ const costResult = calculateCost({
14249
+ promptTokens: usage.promptTokens,
14250
+ completionTokens: usage.completionTokens
14251
+ }, pricing);
14252
+ cost = costResult.totalCost;
14253
+ inputCost = costResult.inputCost;
14254
+ outputCost = costResult.outputCost;
14255
+ log(`Calculated cost: ${cost} micro-dollars for ${provider}/${model}`);
14256
+ } else log(`No pricing found for ${provider}/${model}`);
14257
+ } catch (err) {
14258
+ logger.error(`[CostTracking] Failed to calculate cost: ${err}`);
14259
+ }
14260
+ const requestData = {
14261
+ requestId,
14262
+ configId: configId || null,
14263
+ variantId: variantId || null,
14264
+ provider,
14265
+ model,
14266
+ promptTokens: usage?.promptTokens || 0,
14267
+ completionTokens: usage?.completionTokens || 0,
14268
+ totalTokens: usage?.totalTokens || 0,
14269
+ cachedTokens: usage?.cachedTokens || 0,
14270
+ cost,
14271
+ inputCost,
14272
+ outputCost,
14273
+ endpoint,
14274
+ statusCode,
14275
+ latencyMs,
14276
+ isStreaming,
14277
+ tags: {}
14278
+ };
14279
+ batchWriter.enqueue(requestData);
14280
+ log(`Enqueued request ${requestId} for logging`);
14281
+ }
14282
+
13570
14283
  //#endregion
13571
14284
  //#region src/server/handlers/genai/index.ts
13572
14285
  const app$2 = new Hono();
13573
14286
  app$2.use("*", prettyJSON()).get("/health", async (c) => {
13574
14287
  return c.json({ status: "healthy" });
13575
- }).use("*", requestValidator).use("*", createRequestGuardMiddleware()).use("*", createGatewayAdapterMiddleware()).route("/", gateway).notFound((c) => c.json({ error: {
14288
+ }).use("*", requestValidator).use("*", createRequestGuardMiddleware()).use("*", createCostTrackingMiddleware()).use("*", createGatewayAdapterMiddleware()).route("/", gateway).notFound((c) => c.json({ error: {
13576
14289
  message: "Not Found",
13577
14290
  type: "invalid_request_error"
13578
14291
  } }, 404)).onError((err, c) => {