@llmops/core 0.6.1 → 0.6.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,5 +1,5 @@
1
- const require_db = require('./db-C6ApWDjW.cjs');
2
- const require_neon_dialect = require('./neon-dialect-SqAJhPFS.cjs');
1
+ const require_db = require('./db-i0OOYxJm.cjs');
2
+ const require_neon_dialect = require('./neon-dialect-oh8u9vRy.cjs');
3
3
  let __llmops_gateway = require("@llmops/gateway");
4
4
  __llmops_gateway = require_db.__toESM(__llmops_gateway);
5
5
  let kysely = require("kysely");
@@ -2004,7 +2004,9 @@ const insertLLMRequestSchema = require_db.zod_default.object({
2004
2004
  completionTokens: require_db.zod_default.number().int().default(0),
2005
2005
  totalTokens: require_db.zod_default.number().int().default(0),
2006
2006
  cachedTokens: require_db.zod_default.number().int().default(0),
2007
+ cacheCreationTokens: require_db.zod_default.number().int().default(0),
2007
2008
  cost: require_db.zod_default.number().int().default(0),
2009
+ cacheSavings: require_db.zod_default.number().int().default(0),
2008
2010
  inputCost: require_db.zod_default.number().int().default(0),
2009
2011
  outputCost: require_db.zod_default.number().int().default(0),
2010
2012
  endpoint: require_db.zod_default.string(),
@@ -2099,7 +2101,9 @@ const createLLMRequestsDataLayer = (db) => {
2099
2101
  completionTokens: req.completionTokens,
2100
2102
  totalTokens: req.totalTokens,
2101
2103
  cachedTokens: req.cachedTokens,
2104
+ cacheCreationTokens: req.cacheCreationTokens,
2102
2105
  cost: req.cost,
2106
+ cacheSavings: req.cacheSavings,
2103
2107
  inputCost: req.inputCost,
2104
2108
  outputCost: req.outputCost,
2105
2109
  endpoint: req.endpoint,
@@ -2137,7 +2141,9 @@ const createLLMRequestsDataLayer = (db) => {
2137
2141
  completionTokens: req.completionTokens,
2138
2142
  totalTokens: req.totalTokens,
2139
2143
  cachedTokens: req.cachedTokens,
2144
+ cacheCreationTokens: req.cacheCreationTokens,
2140
2145
  cost: req.cost,
2146
+ cacheSavings: req.cacheSavings,
2141
2147
  inputCost: req.inputCost,
2142
2148
  outputCost: req.outputCost,
2143
2149
  endpoint: req.endpoint,
@@ -2199,6 +2205,8 @@ const createLLMRequestsDataLayer = (db) => {
2199
2205
  kysely.sql`COALESCE(SUM(${col$1("promptTokens")}), 0)`.as("totalPromptTokens"),
2200
2206
  kysely.sql`COALESCE(SUM(${col$1("completionTokens")}), 0)`.as("totalCompletionTokens"),
2201
2207
  kysely.sql`COALESCE(SUM(${col$1("totalTokens")}), 0)`.as("totalTokens"),
2208
+ kysely.sql`COALESCE(SUM(${col$1("cachedTokens")}), 0)`.as("totalCachedTokens"),
2209
+ kysely.sql`COALESCE(SUM(${col$1("cacheSavings")}), 0)`.as("totalCacheSavings"),
2202
2210
  kysely.sql`COUNT(*)`.as("requestCount")
2203
2211
  ]).where(kysely.sql`${col$1("createdAt")} >= ${startDate.toISOString()}`).where(kysely.sql`${col$1("createdAt")} <= ${endDate.toISOString()}`);
2204
2212
  if (configId) query = query.where("configId", "=", configId);
@@ -3712,7 +3720,64 @@ function calculateCost(usage, pricing) {
3712
3720
  return {
3713
3721
  inputCost,
3714
3722
  outputCost,
3715
- totalCost: inputCost + outputCost
3723
+ totalCost: inputCost + outputCost,
3724
+ cacheSavings: 0
3725
+ };
3726
+ }
3727
+ /**
3728
+ * Get default cache read rate as a fraction of input cost per provider.
3729
+ * Used when models.dev doesn't provide cache pricing.
3730
+ */
3731
+ function getDefaultCacheReadRate(provider, inputCostPer1M) {
3732
+ switch (provider?.toLowerCase()) {
3733
+ case "anthropic": return inputCostPer1M * .1;
3734
+ case "openai":
3735
+ case "azure-openai": return inputCostPer1M * .5;
3736
+ case "google":
3737
+ case "gemini":
3738
+ case "vertex_ai": return inputCostPer1M * .25;
3739
+ default: return inputCostPer1M * .5;
3740
+ }
3741
+ }
3742
+ /**
3743
+ * Get default cache write/creation rate as a fraction of input cost per provider.
3744
+ * Used when models.dev doesn't provide cache pricing.
3745
+ */
3746
+ function getDefaultCacheWriteRate(provider, inputCostPer1M) {
3747
+ switch (provider?.toLowerCase()) {
3748
+ case "anthropic": return inputCostPer1M * 1.25;
3749
+ default: return inputCostPer1M;
3750
+ }
3751
+ }
3752
+ /**
3753
+ * Calculate cache-aware cost of an LLM request in micro-dollars.
3754
+ *
3755
+ * Splits input tokens into uncached, cache-read, and cache-creation buckets,
3756
+ * each priced at different rates. Falls back to provider-specific multipliers
3757
+ * when models.dev doesn't provide cache pricing.
3758
+ *
3759
+ * @param usage - Token usage data (with cachedTokens and cacheCreationTokens)
3760
+ * @param pricing - Model pricing (may include cacheReadCostPer1M / cacheWriteCostPer1M)
3761
+ * @param provider - Provider name for fallback rate selection
3762
+ * @returns Cost breakdown in micro-dollars
3763
+ */
3764
+ function calculateCacheAwareCost(usage, pricing, provider) {
3765
+ const cachedTokens = usage.cachedTokens ?? 0;
3766
+ const cacheCreationTokens = usage.cacheCreationTokens ?? 0;
3767
+ if (cachedTokens === 0 && cacheCreationTokens === 0) return calculateCost(usage, pricing);
3768
+ const cacheReadRate = pricing.cacheReadCostPer1M ?? getDefaultCacheReadRate(provider, pricing.inputCostPer1M);
3769
+ const cacheWriteRate = pricing.cacheWriteCostPer1M ?? getDefaultCacheWriteRate(provider, pricing.inputCostPer1M);
3770
+ const uncachedInputTokens = Math.max(0, usage.promptTokens - cachedTokens - cacheCreationTokens);
3771
+ const regularInputCost = Math.round(uncachedInputTokens * pricing.inputCostPer1M);
3772
+ const cacheReadCost = Math.round(cachedTokens * cacheReadRate);
3773
+ const cacheWriteCost = Math.round(cacheCreationTokens * cacheWriteRate);
3774
+ const outputCost = Math.round(usage.completionTokens * pricing.outputCostPer1M);
3775
+ const inputCost = regularInputCost + cacheReadCost + cacheWriteCost;
3776
+ return {
3777
+ inputCost,
3778
+ outputCost,
3779
+ totalCost: inputCost + outputCost,
3780
+ cacheSavings: Math.round((cachedTokens + cacheCreationTokens) * pricing.inputCostPer1M) - cacheReadCost - cacheWriteCost
3716
3781
  };
3717
3782
  }
3718
3783
  /**
@@ -3764,111 +3829,108 @@ function formatCost(microDollars, decimals = 6) {
3764
3829
 
3765
3830
  //#endregion
3766
3831
  //#region src/pricing/provider.ts
3767
- const MODELS_DEV_API = "https://models.dev/api.json";
3832
+ const LLMOPS_MODELS_API = "https://models.llmops.build";
3833
+ /**
3834
+ * Convert price from USD cents per token to dollars per 1M tokens.
3835
+ *
3836
+ * API returns cents/token. Our system uses dollars/1M tokens.
3837
+ * Formula: (centsPerToken / 100) * 1_000_000 = centsPerToken * 10_000
3838
+ */
3839
+ function centsPerTokenToCostPer1M(centsPerToken) {
3840
+ return centsPerToken * 1e4;
3841
+ }
3768
3842
  /**
3769
- * Pricing provider that fetches data from models.dev API
3843
+ * Pricing provider that fetches per-model data from the LLMOps Models API.
3770
3844
  *
3771
3845
  * Features:
3772
- * - Caches pricing data with configurable TTL (default 5 minutes)
3773
- * - Supports fallback to local cache on fetch failure
3774
- * - Thread-safe cache refresh
3846
+ * - Per-model in-memory cache with configurable TTL (default 5 minutes)
3847
+ * - Deduplicates concurrent fetches for the same model
3848
+ * - Caches null results (404s) to avoid repeated lookups
3849
+ * - Falls back to stale cache on fetch errors
3775
3850
  */
3776
- var ModelsDevPricingProvider = class {
3851
+ var LLMOpsPricingProvider = class {
3777
3852
  cache = /* @__PURE__ */ new Map();
3778
- lastFetch = 0;
3853
+ pendingFetches = /* @__PURE__ */ new Map();
3779
3854
  cacheTTL;
3780
- fetchPromise = null;
3781
- ready = false;
3782
- /**
3783
- * Create a new ModelsDevPricingProvider
3784
- *
3785
- * @param cacheTTL - Cache TTL in milliseconds (default: 5 minutes)
3786
- */
3787
- constructor(cacheTTL = 300 * 1e3) {
3788
- this.cacheTTL = cacheTTL;
3855
+ baseUrl;
3856
+ constructor(options) {
3857
+ this.cacheTTL = options?.cacheTTL ?? 300 * 1e3;
3858
+ this.baseUrl = options?.baseUrl ?? LLMOPS_MODELS_API;
3789
3859
  }
3790
- /**
3791
- * Generate a cache key for a provider/model combination
3792
- */
3793
3860
  getCacheKey(provider, model) {
3794
3861
  return `${provider.toLowerCase()}:${model.toLowerCase()}`;
3795
3862
  }
3796
3863
  /**
3797
- * Fetch pricing data from models.dev API
3864
+ * Fetch pricing for a single model from the API
3798
3865
  */
3799
- async fetchPricingData() {
3866
+ async fetchModelPricing(provider, model) {
3867
+ const url = `${this.baseUrl}/model-configs/pricing/${encodeURIComponent(provider)}/${model}`;
3800
3868
  try {
3801
- require_db.logger.debug("[Pricing] Fetching pricing data from models.dev");
3802
- const response = await fetch(MODELS_DEV_API);
3803
- if (!response.ok) throw new Error(`Failed to fetch models.dev API: ${response.status}`);
3804
- const data = await response.json();
3805
- this.cache.clear();
3806
- for (const [providerId, provider] of Object.entries(data)) {
3807
- if (!provider.models) continue;
3808
- for (const [_modelId, model] of Object.entries(provider.models)) {
3809
- if (!model.cost) continue;
3810
- const cacheKey = this.getCacheKey(providerId, model.id);
3811
- this.cache.set(cacheKey, {
3812
- inputCostPer1M: model.cost.input ?? 0,
3813
- outputCostPer1M: model.cost.output ?? 0,
3814
- cacheReadCostPer1M: model.cost.cache_read,
3815
- cacheWriteCostPer1M: model.cost.cache_write,
3816
- reasoningCostPer1M: model.cost.reasoning
3817
- });
3818
- const nameKey = this.getCacheKey(providerId, model.name);
3819
- if (nameKey !== cacheKey) this.cache.set(nameKey, this.cache.get(cacheKey));
3820
- }
3869
+ require_db.logger.debug(`[Pricing] GET ${url}`);
3870
+ const startTime = Date.now();
3871
+ const response = await fetch(url);
3872
+ const elapsed = Date.now() - startTime;
3873
+ require_db.logger.debug(`[Pricing] GET ${url} -> ${response.status} (${elapsed}ms)`);
3874
+ if (response.status === 404) {
3875
+ require_db.logger.debug(`[Pricing] No pricing found for ${provider}/${model}`);
3876
+ return null;
3821
3877
  }
3822
- this.lastFetch = Date.now();
3823
- this.ready = true;
3824
- require_db.logger.debug(`[Pricing] Cached pricing for ${this.cache.size} models from models.dev`);
3878
+ if (!response.ok) throw new Error(`API returned ${response.status}`);
3879
+ const data = await response.json();
3880
+ if (!data.pay_as_you_go) return null;
3881
+ const payg = data.pay_as_you_go;
3882
+ const pricing = {
3883
+ inputCostPer1M: centsPerTokenToCostPer1M(payg.request_token?.price ?? 0),
3884
+ outputCostPer1M: centsPerTokenToCostPer1M(payg.response_token?.price ?? 0),
3885
+ cacheReadCostPer1M: payg.cache_read_input_token?.price != null ? centsPerTokenToCostPer1M(payg.cache_read_input_token.price) : void 0,
3886
+ cacheWriteCostPer1M: payg.cache_write_input_token?.price != null ? centsPerTokenToCostPer1M(payg.cache_write_input_token.price) : void 0
3887
+ };
3888
+ require_db.logger.debug(`[Pricing] Cached pricing for ${provider}/${model}: input=$${pricing.inputCostPer1M}/1M, output=$${pricing.outputCostPer1M}/1M`);
3889
+ return pricing;
3825
3890
  } catch (error) {
3826
- require_db.logger.error(`[Pricing] Failed to fetch pricing data: ${error instanceof Error ? error.message : String(error)}`);
3827
- if (this.cache.size === 0) throw error;
3891
+ require_db.logger.error(`[Pricing] Failed to fetch pricing for ${provider}/${model}: ${error instanceof Error ? error.message : String(error)}`);
3892
+ const cacheKey = this.getCacheKey(provider, model);
3893
+ const stale = this.cache.get(cacheKey);
3894
+ if (stale) {
3895
+ require_db.logger.debug(`[Pricing] Using stale cache for ${provider}/${model}`);
3896
+ return stale.pricing;
3897
+ }
3898
+ return null;
3828
3899
  }
3829
3900
  }
3830
3901
  /**
3831
- * Ensure cache is fresh, fetching if necessary
3832
- */
3833
- async ensureFreshCache() {
3834
- if (!(Date.now() - this.lastFetch > this.cacheTTL) && this.cache.size > 0) return;
3835
- if (!this.fetchPromise) this.fetchPromise = this.fetchPricingData().finally(() => {
3836
- this.fetchPromise = null;
3837
- });
3838
- await this.fetchPromise;
3839
- }
3840
- /**
3841
3902
  * Get pricing for a specific model
3842
3903
  */
3843
3904
  async getModelPricing(provider, model) {
3844
- await this.ensureFreshCache();
3845
3905
  const cacheKey = this.getCacheKey(provider, model);
3846
- const pricing = this.cache.get(cacheKey);
3847
- if (!pricing) {
3848
- require_db.logger.debug(`[Pricing] No pricing found for ${provider}/${model}, trying partial match`);
3849
- for (const [key, value] of this.cache.entries()) if (key.startsWith(`${provider.toLowerCase()}:`)) {
3850
- const modelPart = key.split(":")[1];
3851
- if (model.toLowerCase().includes(modelPart)) {
3852
- require_db.logger.debug(`[Pricing] Found partial match: ${key}`);
3853
- return value;
3854
- }
3855
- }
3856
- return null;
3906
+ const cached = this.cache.get(cacheKey);
3907
+ if (cached && Date.now() - cached.fetchedAt < this.cacheTTL) return cached.pricing;
3908
+ let pending = this.pendingFetches.get(cacheKey);
3909
+ if (!pending) {
3910
+ pending = this.fetchModelPricing(provider, model).then((pricing) => {
3911
+ this.cache.set(cacheKey, {
3912
+ pricing,
3913
+ fetchedAt: Date.now()
3914
+ });
3915
+ return pricing;
3916
+ }).finally(() => {
3917
+ this.pendingFetches.delete(cacheKey);
3918
+ });
3919
+ this.pendingFetches.set(cacheKey, pending);
3857
3920
  }
3858
- return pricing;
3921
+ return pending;
3859
3922
  }
3860
3923
  /**
3861
- * Force refresh the pricing cache
3924
+ * Force refresh the pricing cache (clears all cached entries)
3862
3925
  */
3863
3926
  async refreshCache() {
3864
- this.lastFetch = 0;
3865
- await this.ensureFreshCache();
3927
+ this.cache.clear();
3866
3928
  }
3867
3929
  /**
3868
- * Check if the provider is ready
3930
+ * Always ready no bulk pre-fetch needed
3869
3931
  */
3870
3932
  isReady() {
3871
- return this.ready;
3933
+ return true;
3872
3934
  }
3873
3935
  /**
3874
3936
  * Get the number of cached models (for debugging)
@@ -3882,7 +3944,7 @@ let defaultProvider = null;
3882
3944
  * Get the default pricing provider instance
3883
3945
  */
3884
3946
  function getDefaultPricingProvider() {
3885
- if (!defaultProvider) defaultProvider = new ModelsDevPricingProvider();
3947
+ if (!defaultProvider) defaultProvider = new LLMOpsPricingProvider();
3886
3948
  return defaultProvider;
3887
3949
  }
3888
3950
 
@@ -4267,14 +4329,15 @@ exports.LLMOPS_SPAN_NAME_HEADER = LLMOPS_SPAN_NAME_HEADER;
4267
4329
  exports.LLMOPS_TRACE_ID_HEADER = LLMOPS_TRACE_ID_HEADER;
4268
4330
  exports.LLMOPS_TRACE_NAME_HEADER = LLMOPS_TRACE_NAME_HEADER;
4269
4331
  exports.LLMOPS_USER_ID_HEADER = LLMOPS_USER_ID_HEADER;
4332
+ exports.LLMOpsPricingProvider = LLMOpsPricingProvider;
4270
4333
  exports.MS = MS;
4271
4334
  exports.ManifestBuilder = ManifestBuilder;
4272
4335
  exports.ManifestRouter = ManifestRouter;
4273
4336
  exports.ManifestService = ManifestService;
4274
4337
  exports.MemoryCacheBackend = MemoryCacheBackend;
4275
- exports.ModelsDevPricingProvider = ModelsDevPricingProvider;
4276
4338
  exports.SCHEMA_METADATA = require_db.SCHEMA_METADATA;
4277
4339
  exports.SupportedProviders = SupportedProviders;
4340
+ exports.calculateCacheAwareCost = calculateCacheAwareCost;
4278
4341
  exports.calculateCost = calculateCost;
4279
4342
  exports.chatCompletionCreateParamsBaseSchema = chatCompletionCreateParamsBaseSchema;
4280
4343
  exports.configVariantsSchema = require_db.configVariantsSchema;
package/dist/index.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as SCHEMA_METADATA, A as DatasetsTable, At as playgroundsSchema, B as LLMRequest, Bt as workspaceSettingsSchema, C as Dataset, Ct as environmentSecretsSchema, D as DatasetVersionRecord, Dt as playgroundColumnSchema, E as DatasetVersion, Et as llmRequestsSchema, F as GuardrailConfig, Ft as spansSchema, G as PlaygroundResultsTable, H as Playground, I as GuardrailConfigsTable, It as targetingRulesSchema, J as PlaygroundsTable, K as PlaygroundRun, L as GuardrailResult, Lt as tracesSchema, M as EnvironmentSecret, Mt as providerGuardrailOverridesSchema, N as EnvironmentSecretsTable, Nt as schemas, O as DatasetVersionRecordsTable, Ot as playgroundResultsSchema, P as EnvironmentsTable, Pt as spanEventsSchema, Q as ProviderGuardrailOverridesTable, R as GuardrailResults, Rt as variantVersionsSchema, S as Database, St as datasetsSchema, T as DatasetRecordsTable, Tt as guardrailConfigsSchema, U as PlaygroundColumn, V as LLMRequestsTable, W as PlaygroundResult, X as ProviderConfigsTable, Y as ProviderConfig, Z as ProviderGuardrailOverride, _ as validateTableData, _t as configVariantsSchema, a as createDatabaseFromConnection, at as TableName, b as ConfigVariantsTable, bt as datasetVersionRecordsSchema, c as executeWithSchema, ct as Trace, d as getMigrations, dt as Variant, et as Selectable, f as matchType, ft as VariantVersion, g as validatePartialTableData, gt as WorkspaceSettingsTable, h as parseTableData, ht as WorkspaceSettings, i as createDatabase, it as SpansTable, j as Environment, jt as providerConfigsSchema, k as DatasetVersionsTable, kt as playgroundRunsSchema, l as MigrationOptions, lt as TracesTable, m as parsePartialTableData, mt as VariantsTable, n as DatabaseOptions, nt as SpanEvent, o as detectDatabaseType, ot as TargetingRule, p as runAutoMigrations, pt as VariantVersionsTable, q as PlaygroundRunsTable, r as DatabaseType, rt as SpanEventsTable, s as createNeonDialect, st as TargetingRulesTable, t as DatabaseConnection, tt as Span, u as MigrationResult, ut as Updateable, v as Config, vt as configsSchema, w as DatasetRecord, wt as environmentsSchema, x as ConfigsTable, xt as datasetVersionsSchema, y as ConfigVariant, yt as datasetRecordsSchema, z as Insertable, zt as variantsSchema } from "./index-DdG7GtcE.cjs";
1
+ import { $ as SCHEMA_METADATA, A as DatasetsTable, At as playgroundsSchema, B as LLMRequest, Bt as workspaceSettingsSchema, C as Dataset, Ct as environmentSecretsSchema, D as DatasetVersionRecord, Dt as playgroundColumnSchema, E as DatasetVersion, Et as llmRequestsSchema, F as GuardrailConfig, Ft as spansSchema, G as PlaygroundResultsTable, H as Playground, I as GuardrailConfigsTable, It as targetingRulesSchema, J as PlaygroundsTable, K as PlaygroundRun, L as GuardrailResult, Lt as tracesSchema, M as EnvironmentSecret, Mt as providerGuardrailOverridesSchema, N as EnvironmentSecretsTable, Nt as schemas, O as DatasetVersionRecordsTable, Ot as playgroundResultsSchema, P as EnvironmentsTable, Pt as spanEventsSchema, Q as ProviderGuardrailOverridesTable, R as GuardrailResults, Rt as variantVersionsSchema, S as Database, St as datasetsSchema, T as DatasetRecordsTable, Tt as guardrailConfigsSchema, U as PlaygroundColumn, V as LLMRequestsTable, W as PlaygroundResult, X as ProviderConfigsTable, Y as ProviderConfig, Z as ProviderGuardrailOverride, _ as validateTableData, _t as configVariantsSchema, a as createDatabaseFromConnection, at as TableName, b as ConfigVariantsTable, bt as datasetVersionRecordsSchema, c as executeWithSchema, ct as Trace, d as getMigrations, dt as Variant, et as Selectable, f as matchType, ft as VariantVersion, g as validatePartialTableData, gt as WorkspaceSettingsTable, h as parseTableData, ht as WorkspaceSettings, i as createDatabase, it as SpansTable, j as Environment, jt as providerConfigsSchema, k as DatasetVersionsTable, kt as playgroundRunsSchema, l as MigrationOptions, lt as TracesTable, m as parsePartialTableData, mt as VariantsTable, n as DatabaseOptions, nt as SpanEvent, o as detectDatabaseType, ot as TargetingRule, p as runAutoMigrations, pt as VariantVersionsTable, q as PlaygroundRunsTable, r as DatabaseType, rt as SpanEventsTable, s as createNeonDialect, st as TargetingRulesTable, t as DatabaseConnection, tt as Span, u as MigrationResult, ut as Updateable, v as Config, vt as configsSchema, w as DatasetRecord, wt as environmentsSchema, x as ConfigsTable, xt as datasetVersionsSchema, y as ConfigVariant, yt as datasetRecordsSchema, z as Insertable, zt as variantsSchema } from "./index-CyVFWq76.cjs";
2
2
  import * as kysely0 from "kysely";
3
3
  import { Kysely } from "kysely";
4
4
  import z$1, { z } from "zod";
@@ -1956,7 +1956,9 @@ declare const insertLLMRequestSchema: z$1.ZodObject<{
1956
1956
  completionTokens: z$1.ZodDefault<z$1.ZodNumber>;
1957
1957
  totalTokens: z$1.ZodDefault<z$1.ZodNumber>;
1958
1958
  cachedTokens: z$1.ZodDefault<z$1.ZodNumber>;
1959
+ cacheCreationTokens: z$1.ZodDefault<z$1.ZodNumber>;
1959
1960
  cost: z$1.ZodDefault<z$1.ZodNumber>;
1961
+ cacheSavings: z$1.ZodDefault<z$1.ZodNumber>;
1960
1962
  inputCost: z$1.ZodDefault<z$1.ZodNumber>;
1961
1963
  outputCost: z$1.ZodDefault<z$1.ZodNumber>;
1962
1964
  endpoint: z$1.ZodString;
@@ -2069,6 +2071,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
2069
2071
  requestId: string;
2070
2072
  model: string;
2071
2073
  cachedTokens: number;
2074
+ cacheCreationTokens: number;
2075
+ cacheSavings: number;
2072
2076
  inputCost: number;
2073
2077
  outputCost: number;
2074
2078
  endpoint: string;
@@ -2114,6 +2118,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
2114
2118
  requestId: string;
2115
2119
  model: string;
2116
2120
  cachedTokens: number;
2121
+ cacheCreationTokens: number;
2122
+ cacheSavings: number;
2117
2123
  inputCost: number;
2118
2124
  outputCost: number;
2119
2125
  endpoint: string;
@@ -2161,6 +2167,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
2161
2167
  requestId: string;
2162
2168
  model: string;
2163
2169
  cachedTokens: number;
2170
+ cacheCreationTokens: number;
2171
+ cacheSavings: number;
2164
2172
  inputCost: number;
2165
2173
  outputCost: number;
2166
2174
  endpoint: string;
@@ -2194,6 +2202,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
2194
2202
  totalPromptTokens: number;
2195
2203
  totalCompletionTokens: number;
2196
2204
  totalTokens: number;
2205
+ totalCachedTokens: number;
2206
+ totalCacheSavings: number;
2197
2207
  requestCount: number;
2198
2208
  } | undefined>;
2199
2209
  /**
@@ -3787,8 +3797,10 @@ interface UsageData {
3787
3797
  completionTokens: number;
3788
3798
  /** Total tokens (prompt + completion) */
3789
3799
  totalTokens?: number;
3790
- /** Number of cached tokens (optional) */
3800
+ /** Number of cache read tokens (OpenAI cached_tokens / Anthropic cache_read_input_tokens) */
3791
3801
  cachedTokens?: number;
3802
+ /** Number of cache creation tokens (Anthropic cache_creation_input_tokens) */
3803
+ cacheCreationTokens?: number;
3792
3804
  /** Number of reasoning tokens (optional, for models like o1) */
3793
3805
  reasoningTokens?: number;
3794
3806
  }
@@ -3804,6 +3816,8 @@ interface CostResult {
3804
3816
  inputCost: number;
3805
3817
  /** Output/completion cost in micro-dollars */
3806
3818
  outputCost: number;
3819
+ /** Cost saved by cache hits in micro-dollars (negative means cache write premium exceeded savings) */
3820
+ cacheSavings: number;
3807
3821
  }
3808
3822
  /**
3809
3823
  * Provider for fetching model pricing data
@@ -3850,6 +3864,19 @@ interface PricingProvider {
3850
3864
  * ```
3851
3865
  */
3852
3866
  declare function calculateCost(usage: UsageData, pricing: ModelPricing): CostResult;
3867
+ /**
3868
+ * Calculate cache-aware cost of an LLM request in micro-dollars.
3869
+ *
3870
+ * Splits input tokens into uncached, cache-read, and cache-creation buckets,
3871
+ * each priced at different rates. Falls back to provider-specific multipliers
3872
+ * when models.dev doesn't provide cache pricing.
3873
+ *
3874
+ * @param usage - Token usage data (with cachedTokens and cacheCreationTokens)
3875
+ * @param pricing - Model pricing (may include cacheReadCostPer1M / cacheWriteCostPer1M)
3876
+ * @param provider - Provider name for fallback rate selection
3877
+ * @returns Cost breakdown in micro-dollars
3878
+ */
3879
+ declare function calculateCacheAwareCost(usage: UsageData, pricing: ModelPricing, provider?: string): CostResult;
3853
3880
  /**
3854
3881
  * Convert micro-dollars to dollars
3855
3882
  *
@@ -3893,47 +3920,38 @@ declare function formatCost(microDollars: number, decimals?: number): string;
3893
3920
  //#endregion
3894
3921
  //#region src/pricing/provider.d.ts
3895
3922
  /**
3896
- * Pricing provider that fetches data from models.dev API
3923
+ * Pricing provider that fetches per-model data from the LLMOps Models API.
3897
3924
  *
3898
3925
  * Features:
3899
- * - Caches pricing data with configurable TTL (default 5 minutes)
3900
- * - Supports fallback to local cache on fetch failure
3901
- * - Thread-safe cache refresh
3926
+ * - Per-model in-memory cache with configurable TTL (default 5 minutes)
3927
+ * - Deduplicates concurrent fetches for the same model
3928
+ * - Caches null results (404s) to avoid repeated lookups
3929
+ * - Falls back to stale cache on fetch errors
3902
3930
  */
3903
- declare class ModelsDevPricingProvider implements PricingProvider {
3931
+ declare class LLMOpsPricingProvider implements PricingProvider {
3904
3932
  private cache;
3905
- private lastFetch;
3933
+ private pendingFetches;
3906
3934
  private cacheTTL;
3907
- private fetchPromise;
3908
- private ready;
3909
- /**
3910
- * Create a new ModelsDevPricingProvider
3911
- *
3912
- * @param cacheTTL - Cache TTL in milliseconds (default: 5 minutes)
3913
- */
3914
- constructor(cacheTTL?: number);
3915
- /**
3916
- * Generate a cache key for a provider/model combination
3917
- */
3935
+ private baseUrl;
3936
+ constructor(options?: {
3937
+ cacheTTL?: number;
3938
+ baseUrl?: string;
3939
+ });
3918
3940
  private getCacheKey;
3919
3941
  /**
3920
- * Fetch pricing data from models.dev API
3942
+ * Fetch pricing for a single model from the API
3921
3943
  */
3922
- private fetchPricingData;
3923
- /**
3924
- * Ensure cache is fresh, fetching if necessary
3925
- */
3926
- private ensureFreshCache;
3944
+ private fetchModelPricing;
3927
3945
  /**
3928
3946
  * Get pricing for a specific model
3929
3947
  */
3930
3948
  getModelPricing(provider: string, model: string): Promise<ModelPricing | null>;
3931
3949
  /**
3932
- * Force refresh the pricing cache
3950
+ * Force refresh the pricing cache (clears all cached entries)
3933
3951
  */
3934
3952
  refreshCache(): Promise<void>;
3935
3953
  /**
3936
- * Check if the provider is ready
3954
+ * Always ready no bulk pre-fetch needed
3937
3955
  */
3938
3956
  isReady(): boolean;
3939
3957
  /**
@@ -3944,7 +3962,7 @@ declare class ModelsDevPricingProvider implements PricingProvider {
3944
3962
  /**
3945
3963
  * Get the default pricing provider instance
3946
3964
  */
3947
- declare function getDefaultPricingProvider(): ModelsDevPricingProvider;
3965
+ declare function getDefaultPricingProvider(): LLMOpsPricingProvider;
3948
3966
  //#endregion
3949
3967
  //#region src/auth/get-auth-client-options.d.ts
3950
3968
  interface AuthClientDatabaseConfig {
@@ -4174,4 +4192,4 @@ declare class ManifestRouter {
4174
4192
  routeWithWeights(configIdOrSlug: string, environmentId: string, context?: RoutingContext): RoutingResult | null;
4175
4193
  }
4176
4194
  //#endregion
4177
- export { type AnthropicProviderConfig, type AnyProviderConfig, AuthClientDatabaseConfig, AuthClientOptions, type AzureAIProviderConfig, type AzureOpenAIProviderConfig, BaseCacheConfig, type BaseProviderConfig, type BedrockProviderConfig, COST_SUMMARY_GROUP_BY, CacheBackend, CacheBackendType, CacheConfig, CacheEntry, CacheOptions, CacheService, CacheStats, ChatCompletionCreateParamsBase, Config, ConfigVariant, ConfigVariantsDataLayer, ConfigVariantsTable, ConfigsDataLayer, ConfigsTable, type CortexProviderConfig, CostResult, CostSummaryGroupBy, DEFAULT_PROVIDER_ENV_VARS, DataLayer, Database, DatabaseConnection, DatabaseOptions, DatabaseType, Dataset, DatasetRecord, DatasetRecordsTable, DatasetVersion, DatasetVersionRecord, DatasetVersionRecordsTable, DatasetVersionsTable, DatasetsDataLayer, DatasetsTable, Environment, EnvironmentSecret, EnvironmentSecretsDataLayer, EnvironmentSecretsTable, EnvironmentsDataLayer, EnvironmentsTable, FileCacheBackend, FileCacheConfig, type FireworksAIProviderConfig, GatewayManifest, type GoogleProviderConfig, GuardrailConfig, GuardrailConfigsDataLayer, GuardrailConfigsTable, GuardrailResult, GuardrailResults, type HuggingFaceProviderConfig, type InlineProviderConfig, type InlineProvidersConfig, Insertable, LLMOPS_INTERNAL_HEADER, LLMOPS_REQUEST_ID_HEADER, LLMOPS_SESSION_ID_HEADER, LLMOPS_SPAN_ID_HEADER, LLMOPS_SPAN_NAME_HEADER, LLMOPS_TRACE_ID_HEADER, LLMOPS_TRACE_NAME_HEADER, LLMOPS_USER_ID_HEADER, LLMOpsClient, LLMOpsConfig, type LLMOpsConfigInput, LLMRequest, LLMRequestInsert, LLMRequestsDataLayer, LLMRequestsTable, MS, ManifestBuilder, ManifestConfig, ManifestEnvironment, ManifestGuardrail, ManifestProviderGuardrailOverride, ManifestRouter, ManifestService, ManifestTargetingRule, ManifestVariantVersion, MemoryCacheBackend, MemoryCacheConfig, MigrationOptions, MigrationResult, type MistralAIProviderConfig, ModelPricing, ModelsDevPricingProvider, type OpenAIProviderConfig, type OracleProviderConfig, Playground, PlaygroundColumn, PlaygroundResult, PlaygroundResultsDataLayer, PlaygroundResultsTable, PlaygroundRun, PlaygroundRunsDataLayer, PlaygroundRunsTable, PlaygroundsDataLayer, PlaygroundsTable, Prettify, PricingProvider, ProviderConfig, type ProviderConfigMap, ProviderConfigsDataLayer, ProviderConfigsTable, ProviderGuardrailOverride, ProviderGuardrailOverridesDataLayer, ProviderGuardrailOverridesTable, type ProvidersConfig, RoutingContext, RoutingResult, SCHEMA_METADATA, type SagemakerProviderConfig, Selectable, Span, SpanEvent, SpanEventInsert, SpanEventsTable, SpanInsert, SpansTable, type StabilityAIProviderConfig, SupportedProviders, TableName, TargetingRule, TargetingRulesDataLayer, TargetingRulesTable, Trace, TraceUpsert, TracesDataLayer, TracesTable, Updateable, UsageData, type ValidatedLLMOpsConfig, Variant, VariantJsonData, VariantVersion, VariantVersionsDataLayer, VariantVersionsTable, VariantsDataLayer, VariantsTable, type VertexAIProviderConfig, type WorkersAIProviderConfig, WorkspaceSettings, WorkspaceSettingsDataLayer, WorkspaceSettingsTable, calculateCost, chatCompletionCreateParamsBaseSchema, configVariantsSchema, configsSchema, createConfigDataLayer, createConfigVariantDataLayer, createDataLayer, createDatabase, createDatabaseFromConnection, createDatasetsDataLayer, createEnvironmentDataLayer, createEnvironmentSecretDataLayer, createGuardrailConfigsDataLayer, createLLMRequestsDataLayer, createNeonDialect, createPlaygroundDataLayer, createPlaygroundResultsDataLayer, createPlaygroundRunsDataLayer, createProviderConfigsDataLayer, createProviderGuardrailOverridesDataLayer, createTargetingRulesDataLayer, createTracesDataLayer, createVariantDataLayer, createVariantVersionsDataLayer, createWorkspaceSettingsDataLayer, datasetRecordsSchema, datasetVersionRecordsSchema, datasetVersionsSchema, datasetsSchema, detectDatabaseType, dollarsToMicroDollars, environmentSecretsSchema, environmentsSchema, executeWithSchema, formatCost, gateway, generateId, getAuthClientOptions, getDefaultPricingProvider, getDefaultProviders, getMigrations, guardrailConfigsSchema, llmRequestsSchema, llmopsConfigSchema, logger, matchType, mergeWithDefaultProviders, microDollarsToDollars, parsePartialTableData, parseTableData, playgroundColumnSchema, playgroundResultsSchema, playgroundRunsSchema, playgroundsSchema, providerConfigsSchema, providerGuardrailOverridesSchema, runAutoMigrations, schemas, spanEventsSchema, spansSchema, targetingRulesSchema, tracesSchema, validateLLMOpsConfig, validatePartialTableData, validateTableData, variantJsonDataSchema, variantVersionsSchema, variantsSchema, workspaceSettingsSchema };
4195
+ export { type AnthropicProviderConfig, type AnyProviderConfig, AuthClientDatabaseConfig, AuthClientOptions, type AzureAIProviderConfig, type AzureOpenAIProviderConfig, BaseCacheConfig, type BaseProviderConfig, type BedrockProviderConfig, COST_SUMMARY_GROUP_BY, CacheBackend, CacheBackendType, CacheConfig, CacheEntry, CacheOptions, CacheService, CacheStats, ChatCompletionCreateParamsBase, Config, ConfigVariant, ConfigVariantsDataLayer, ConfigVariantsTable, ConfigsDataLayer, ConfigsTable, type CortexProviderConfig, CostResult, CostSummaryGroupBy, DEFAULT_PROVIDER_ENV_VARS, DataLayer, Database, DatabaseConnection, DatabaseOptions, DatabaseType, Dataset, DatasetRecord, DatasetRecordsTable, DatasetVersion, DatasetVersionRecord, DatasetVersionRecordsTable, DatasetVersionsTable, DatasetsDataLayer, DatasetsTable, Environment, EnvironmentSecret, EnvironmentSecretsDataLayer, EnvironmentSecretsTable, EnvironmentsDataLayer, EnvironmentsTable, FileCacheBackend, FileCacheConfig, type FireworksAIProviderConfig, GatewayManifest, type GoogleProviderConfig, GuardrailConfig, GuardrailConfigsDataLayer, GuardrailConfigsTable, GuardrailResult, GuardrailResults, type HuggingFaceProviderConfig, type InlineProviderConfig, type InlineProvidersConfig, Insertable, LLMOPS_INTERNAL_HEADER, LLMOPS_REQUEST_ID_HEADER, LLMOPS_SESSION_ID_HEADER, LLMOPS_SPAN_ID_HEADER, LLMOPS_SPAN_NAME_HEADER, LLMOPS_TRACE_ID_HEADER, LLMOPS_TRACE_NAME_HEADER, LLMOPS_USER_ID_HEADER, LLMOpsClient, LLMOpsConfig, type LLMOpsConfigInput, LLMOpsPricingProvider, LLMRequest, LLMRequestInsert, LLMRequestsDataLayer, LLMRequestsTable, MS, ManifestBuilder, ManifestConfig, ManifestEnvironment, ManifestGuardrail, ManifestProviderGuardrailOverride, ManifestRouter, ManifestService, ManifestTargetingRule, ManifestVariantVersion, MemoryCacheBackend, MemoryCacheConfig, MigrationOptions, MigrationResult, type MistralAIProviderConfig, ModelPricing, type OpenAIProviderConfig, type OracleProviderConfig, Playground, PlaygroundColumn, PlaygroundResult, PlaygroundResultsDataLayer, PlaygroundResultsTable, PlaygroundRun, PlaygroundRunsDataLayer, PlaygroundRunsTable, PlaygroundsDataLayer, PlaygroundsTable, Prettify, PricingProvider, ProviderConfig, type ProviderConfigMap, ProviderConfigsDataLayer, ProviderConfigsTable, ProviderGuardrailOverride, ProviderGuardrailOverridesDataLayer, ProviderGuardrailOverridesTable, type ProvidersConfig, RoutingContext, RoutingResult, SCHEMA_METADATA, type SagemakerProviderConfig, Selectable, Span, SpanEvent, SpanEventInsert, SpanEventsTable, SpanInsert, SpansTable, type StabilityAIProviderConfig, SupportedProviders, TableName, TargetingRule, TargetingRulesDataLayer, TargetingRulesTable, Trace, TraceUpsert, TracesDataLayer, TracesTable, Updateable, UsageData, type ValidatedLLMOpsConfig, Variant, VariantJsonData, VariantVersion, VariantVersionsDataLayer, VariantVersionsTable, VariantsDataLayer, VariantsTable, type VertexAIProviderConfig, type WorkersAIProviderConfig, WorkspaceSettings, WorkspaceSettingsDataLayer, WorkspaceSettingsTable, calculateCacheAwareCost, calculateCost, chatCompletionCreateParamsBaseSchema, configVariantsSchema, configsSchema, createConfigDataLayer, createConfigVariantDataLayer, createDataLayer, createDatabase, createDatabaseFromConnection, createDatasetsDataLayer, createEnvironmentDataLayer, createEnvironmentSecretDataLayer, createGuardrailConfigsDataLayer, createLLMRequestsDataLayer, createNeonDialect, createPlaygroundDataLayer, createPlaygroundResultsDataLayer, createPlaygroundRunsDataLayer, createProviderConfigsDataLayer, createProviderGuardrailOverridesDataLayer, createTargetingRulesDataLayer, createTracesDataLayer, createVariantDataLayer, createVariantVersionsDataLayer, createWorkspaceSettingsDataLayer, datasetRecordsSchema, datasetVersionRecordsSchema, datasetVersionsSchema, datasetsSchema, detectDatabaseType, dollarsToMicroDollars, environmentSecretsSchema, environmentsSchema, executeWithSchema, formatCost, gateway, generateId, getAuthClientOptions, getDefaultPricingProvider, getDefaultProviders, getMigrations, guardrailConfigsSchema, llmRequestsSchema, llmopsConfigSchema, logger, matchType, mergeWithDefaultProviders, microDollarsToDollars, parsePartialTableData, parseTableData, playgroundColumnSchema, playgroundResultsSchema, playgroundRunsSchema, playgroundsSchema, providerConfigsSchema, providerGuardrailOverridesSchema, runAutoMigrations, schemas, spanEventsSchema, spansSchema, targetingRulesSchema, tracesSchema, validateLLMOpsConfig, validatePartialTableData, validateTableData, variantJsonDataSchema, variantVersionsSchema, variantsSchema, workspaceSettingsSchema };