@llmops/core 0.6.1-beta.2 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{bun-sqlite-dialect-DFKD-iqI.cjs → bun-sqlite-dialect-BUnuGpx1.cjs} +1 -1
- package/dist/db/index.cjs +2 -2
- package/dist/db/index.d.cts +1 -1
- package/dist/db/index.d.mts +1 -1
- package/dist/db/index.mjs +1 -1
- package/dist/{db-CQvUnGBp.mjs → db-Cd1KR24Y.mjs} +12 -0
- package/dist/{db-C6ApWDjW.cjs → db-i0OOYxJm.cjs} +16 -4
- package/dist/{index-DdG7GtcE.d.cts → index-CyVFWq76.d.cts} +28 -0
- package/dist/{index-BosemZ_J.d.mts → index-D3onb7gK.d.mts} +28 -0
- package/dist/index.cjs +142 -79
- package/dist/index.d.cts +52 -34
- package/dist/index.d.mts +52 -34
- package/dist/index.mjs +140 -78
- package/dist/{neon-dialect-DNyVaL-1.cjs → neon-dialect-DavGzunb.cjs} +1 -1
- package/dist/{neon-dialect-SqAJhPFS.cjs → neon-dialect-oh8u9vRy.cjs} +1 -1
- package/dist/{node-sqlite-dialect-DI0PJyHV.cjs → node-sqlite-dialect-CbwETvHG.cjs} +1 -1
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
const require_db = require('./db-
|
|
2
|
-
const require_neon_dialect = require('./neon-dialect-
|
|
1
|
+
const require_db = require('./db-i0OOYxJm.cjs');
|
|
2
|
+
const require_neon_dialect = require('./neon-dialect-oh8u9vRy.cjs');
|
|
3
3
|
let __llmops_gateway = require("@llmops/gateway");
|
|
4
4
|
__llmops_gateway = require_db.__toESM(__llmops_gateway);
|
|
5
5
|
let kysely = require("kysely");
|
|
@@ -2004,7 +2004,9 @@ const insertLLMRequestSchema = require_db.zod_default.object({
|
|
|
2004
2004
|
completionTokens: require_db.zod_default.number().int().default(0),
|
|
2005
2005
|
totalTokens: require_db.zod_default.number().int().default(0),
|
|
2006
2006
|
cachedTokens: require_db.zod_default.number().int().default(0),
|
|
2007
|
+
cacheCreationTokens: require_db.zod_default.number().int().default(0),
|
|
2007
2008
|
cost: require_db.zod_default.number().int().default(0),
|
|
2009
|
+
cacheSavings: require_db.zod_default.number().int().default(0),
|
|
2008
2010
|
inputCost: require_db.zod_default.number().int().default(0),
|
|
2009
2011
|
outputCost: require_db.zod_default.number().int().default(0),
|
|
2010
2012
|
endpoint: require_db.zod_default.string(),
|
|
@@ -2099,7 +2101,9 @@ const createLLMRequestsDataLayer = (db) => {
|
|
|
2099
2101
|
completionTokens: req.completionTokens,
|
|
2100
2102
|
totalTokens: req.totalTokens,
|
|
2101
2103
|
cachedTokens: req.cachedTokens,
|
|
2104
|
+
cacheCreationTokens: req.cacheCreationTokens,
|
|
2102
2105
|
cost: req.cost,
|
|
2106
|
+
cacheSavings: req.cacheSavings,
|
|
2103
2107
|
inputCost: req.inputCost,
|
|
2104
2108
|
outputCost: req.outputCost,
|
|
2105
2109
|
endpoint: req.endpoint,
|
|
@@ -2137,7 +2141,9 @@ const createLLMRequestsDataLayer = (db) => {
|
|
|
2137
2141
|
completionTokens: req.completionTokens,
|
|
2138
2142
|
totalTokens: req.totalTokens,
|
|
2139
2143
|
cachedTokens: req.cachedTokens,
|
|
2144
|
+
cacheCreationTokens: req.cacheCreationTokens,
|
|
2140
2145
|
cost: req.cost,
|
|
2146
|
+
cacheSavings: req.cacheSavings,
|
|
2141
2147
|
inputCost: req.inputCost,
|
|
2142
2148
|
outputCost: req.outputCost,
|
|
2143
2149
|
endpoint: req.endpoint,
|
|
@@ -2199,6 +2205,8 @@ const createLLMRequestsDataLayer = (db) => {
|
|
|
2199
2205
|
kysely.sql`COALESCE(SUM(${col$1("promptTokens")}), 0)`.as("totalPromptTokens"),
|
|
2200
2206
|
kysely.sql`COALESCE(SUM(${col$1("completionTokens")}), 0)`.as("totalCompletionTokens"),
|
|
2201
2207
|
kysely.sql`COALESCE(SUM(${col$1("totalTokens")}), 0)`.as("totalTokens"),
|
|
2208
|
+
kysely.sql`COALESCE(SUM(${col$1("cachedTokens")}), 0)`.as("totalCachedTokens"),
|
|
2209
|
+
kysely.sql`COALESCE(SUM(${col$1("cacheSavings")}), 0)`.as("totalCacheSavings"),
|
|
2202
2210
|
kysely.sql`COUNT(*)`.as("requestCount")
|
|
2203
2211
|
]).where(kysely.sql`${col$1("createdAt")} >= ${startDate.toISOString()}`).where(kysely.sql`${col$1("createdAt")} <= ${endDate.toISOString()}`);
|
|
2204
2212
|
if (configId) query = query.where("configId", "=", configId);
|
|
@@ -3712,7 +3720,64 @@ function calculateCost(usage, pricing) {
|
|
|
3712
3720
|
return {
|
|
3713
3721
|
inputCost,
|
|
3714
3722
|
outputCost,
|
|
3715
|
-
totalCost: inputCost + outputCost
|
|
3723
|
+
totalCost: inputCost + outputCost,
|
|
3724
|
+
cacheSavings: 0
|
|
3725
|
+
};
|
|
3726
|
+
}
|
|
3727
|
+
/**
|
|
3728
|
+
* Get default cache read rate as a fraction of input cost per provider.
|
|
3729
|
+
* Used when models.dev doesn't provide cache pricing.
|
|
3730
|
+
*/
|
|
3731
|
+
function getDefaultCacheReadRate(provider, inputCostPer1M) {
|
|
3732
|
+
switch (provider?.toLowerCase()) {
|
|
3733
|
+
case "anthropic": return inputCostPer1M * .1;
|
|
3734
|
+
case "openai":
|
|
3735
|
+
case "azure-openai": return inputCostPer1M * .5;
|
|
3736
|
+
case "google":
|
|
3737
|
+
case "gemini":
|
|
3738
|
+
case "vertex_ai": return inputCostPer1M * .25;
|
|
3739
|
+
default: return inputCostPer1M * .5;
|
|
3740
|
+
}
|
|
3741
|
+
}
|
|
3742
|
+
/**
|
|
3743
|
+
* Get default cache write/creation rate as a fraction of input cost per provider.
|
|
3744
|
+
* Used when models.dev doesn't provide cache pricing.
|
|
3745
|
+
*/
|
|
3746
|
+
function getDefaultCacheWriteRate(provider, inputCostPer1M) {
|
|
3747
|
+
switch (provider?.toLowerCase()) {
|
|
3748
|
+
case "anthropic": return inputCostPer1M * 1.25;
|
|
3749
|
+
default: return inputCostPer1M;
|
|
3750
|
+
}
|
|
3751
|
+
}
|
|
3752
|
+
/**
|
|
3753
|
+
* Calculate cache-aware cost of an LLM request in micro-dollars.
|
|
3754
|
+
*
|
|
3755
|
+
* Splits input tokens into uncached, cache-read, and cache-creation buckets,
|
|
3756
|
+
* each priced at different rates. Falls back to provider-specific multipliers
|
|
3757
|
+
* when models.dev doesn't provide cache pricing.
|
|
3758
|
+
*
|
|
3759
|
+
* @param usage - Token usage data (with cachedTokens and cacheCreationTokens)
|
|
3760
|
+
* @param pricing - Model pricing (may include cacheReadCostPer1M / cacheWriteCostPer1M)
|
|
3761
|
+
* @param provider - Provider name for fallback rate selection
|
|
3762
|
+
* @returns Cost breakdown in micro-dollars
|
|
3763
|
+
*/
|
|
3764
|
+
function calculateCacheAwareCost(usage, pricing, provider) {
|
|
3765
|
+
const cachedTokens = usage.cachedTokens ?? 0;
|
|
3766
|
+
const cacheCreationTokens = usage.cacheCreationTokens ?? 0;
|
|
3767
|
+
if (cachedTokens === 0 && cacheCreationTokens === 0) return calculateCost(usage, pricing);
|
|
3768
|
+
const cacheReadRate = pricing.cacheReadCostPer1M ?? getDefaultCacheReadRate(provider, pricing.inputCostPer1M);
|
|
3769
|
+
const cacheWriteRate = pricing.cacheWriteCostPer1M ?? getDefaultCacheWriteRate(provider, pricing.inputCostPer1M);
|
|
3770
|
+
const uncachedInputTokens = Math.max(0, usage.promptTokens - cachedTokens - cacheCreationTokens);
|
|
3771
|
+
const regularInputCost = Math.round(uncachedInputTokens * pricing.inputCostPer1M);
|
|
3772
|
+
const cacheReadCost = Math.round(cachedTokens * cacheReadRate);
|
|
3773
|
+
const cacheWriteCost = Math.round(cacheCreationTokens * cacheWriteRate);
|
|
3774
|
+
const outputCost = Math.round(usage.completionTokens * pricing.outputCostPer1M);
|
|
3775
|
+
const inputCost = regularInputCost + cacheReadCost + cacheWriteCost;
|
|
3776
|
+
return {
|
|
3777
|
+
inputCost,
|
|
3778
|
+
outputCost,
|
|
3779
|
+
totalCost: inputCost + outputCost,
|
|
3780
|
+
cacheSavings: Math.round((cachedTokens + cacheCreationTokens) * pricing.inputCostPer1M) - cacheReadCost - cacheWriteCost
|
|
3716
3781
|
};
|
|
3717
3782
|
}
|
|
3718
3783
|
/**
|
|
@@ -3764,111 +3829,108 @@ function formatCost(microDollars, decimals = 6) {
|
|
|
3764
3829
|
|
|
3765
3830
|
//#endregion
|
|
3766
3831
|
//#region src/pricing/provider.ts
|
|
3767
|
-
const
|
|
3832
|
+
const LLMOPS_MODELS_API = "https://models.llmops.build";
|
|
3833
|
+
/**
|
|
3834
|
+
* Convert price from USD cents per token to dollars per 1M tokens.
|
|
3835
|
+
*
|
|
3836
|
+
* API returns cents/token. Our system uses dollars/1M tokens.
|
|
3837
|
+
* Formula: (centsPerToken / 100) * 1_000_000 = centsPerToken * 10_000
|
|
3838
|
+
*/
|
|
3839
|
+
function centsPerTokenToCostPer1M(centsPerToken) {
|
|
3840
|
+
return centsPerToken * 1e4;
|
|
3841
|
+
}
|
|
3768
3842
|
/**
|
|
3769
|
-
* Pricing provider that fetches data from
|
|
3843
|
+
* Pricing provider that fetches per-model data from the LLMOps Models API.
|
|
3770
3844
|
*
|
|
3771
3845
|
* Features:
|
|
3772
|
-
* -
|
|
3773
|
-
* -
|
|
3774
|
-
* -
|
|
3846
|
+
* - Per-model in-memory cache with configurable TTL (default 5 minutes)
|
|
3847
|
+
* - Deduplicates concurrent fetches for the same model
|
|
3848
|
+
* - Caches null results (404s) to avoid repeated lookups
|
|
3849
|
+
* - Falls back to stale cache on fetch errors
|
|
3775
3850
|
*/
|
|
3776
|
-
var
|
|
3851
|
+
var LLMOpsPricingProvider = class {
|
|
3777
3852
|
cache = /* @__PURE__ */ new Map();
|
|
3778
|
-
|
|
3853
|
+
pendingFetches = /* @__PURE__ */ new Map();
|
|
3779
3854
|
cacheTTL;
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
*
|
|
3785
|
-
* @param cacheTTL - Cache TTL in milliseconds (default: 5 minutes)
|
|
3786
|
-
*/
|
|
3787
|
-
constructor(cacheTTL = 300 * 1e3) {
|
|
3788
|
-
this.cacheTTL = cacheTTL;
|
|
3855
|
+
baseUrl;
|
|
3856
|
+
constructor(options) {
|
|
3857
|
+
this.cacheTTL = options?.cacheTTL ?? 300 * 1e3;
|
|
3858
|
+
this.baseUrl = options?.baseUrl ?? LLMOPS_MODELS_API;
|
|
3789
3859
|
}
|
|
3790
|
-
/**
|
|
3791
|
-
* Generate a cache key for a provider/model combination
|
|
3792
|
-
*/
|
|
3793
3860
|
getCacheKey(provider, model) {
|
|
3794
3861
|
return `${provider.toLowerCase()}:${model.toLowerCase()}`;
|
|
3795
3862
|
}
|
|
3796
3863
|
/**
|
|
3797
|
-
* Fetch pricing
|
|
3864
|
+
* Fetch pricing for a single model from the API
|
|
3798
3865
|
*/
|
|
3799
|
-
async
|
|
3866
|
+
async fetchModelPricing(provider, model) {
|
|
3867
|
+
const url = `${this.baseUrl}/model-configs/pricing/${encodeURIComponent(provider)}/${model}`;
|
|
3800
3868
|
try {
|
|
3801
|
-
require_db.logger.debug(
|
|
3802
|
-
const
|
|
3803
|
-
|
|
3804
|
-
const
|
|
3805
|
-
|
|
3806
|
-
|
|
3807
|
-
|
|
3808
|
-
|
|
3809
|
-
if (!model.cost) continue;
|
|
3810
|
-
const cacheKey = this.getCacheKey(providerId, model.id);
|
|
3811
|
-
this.cache.set(cacheKey, {
|
|
3812
|
-
inputCostPer1M: model.cost.input ?? 0,
|
|
3813
|
-
outputCostPer1M: model.cost.output ?? 0,
|
|
3814
|
-
cacheReadCostPer1M: model.cost.cache_read,
|
|
3815
|
-
cacheWriteCostPer1M: model.cost.cache_write,
|
|
3816
|
-
reasoningCostPer1M: model.cost.reasoning
|
|
3817
|
-
});
|
|
3818
|
-
const nameKey = this.getCacheKey(providerId, model.name);
|
|
3819
|
-
if (nameKey !== cacheKey) this.cache.set(nameKey, this.cache.get(cacheKey));
|
|
3820
|
-
}
|
|
3869
|
+
require_db.logger.debug(`[Pricing] GET ${url}`);
|
|
3870
|
+
const startTime = Date.now();
|
|
3871
|
+
const response = await fetch(url);
|
|
3872
|
+
const elapsed = Date.now() - startTime;
|
|
3873
|
+
require_db.logger.debug(`[Pricing] GET ${url} -> ${response.status} (${elapsed}ms)`);
|
|
3874
|
+
if (response.status === 404) {
|
|
3875
|
+
require_db.logger.debug(`[Pricing] No pricing found for ${provider}/${model}`);
|
|
3876
|
+
return null;
|
|
3821
3877
|
}
|
|
3822
|
-
|
|
3823
|
-
|
|
3824
|
-
|
|
3878
|
+
if (!response.ok) throw new Error(`API returned ${response.status}`);
|
|
3879
|
+
const data = await response.json();
|
|
3880
|
+
if (!data.pay_as_you_go) return null;
|
|
3881
|
+
const payg = data.pay_as_you_go;
|
|
3882
|
+
const pricing = {
|
|
3883
|
+
inputCostPer1M: centsPerTokenToCostPer1M(payg.request_token?.price ?? 0),
|
|
3884
|
+
outputCostPer1M: centsPerTokenToCostPer1M(payg.response_token?.price ?? 0),
|
|
3885
|
+
cacheReadCostPer1M: payg.cache_read_input_token?.price != null ? centsPerTokenToCostPer1M(payg.cache_read_input_token.price) : void 0,
|
|
3886
|
+
cacheWriteCostPer1M: payg.cache_write_input_token?.price != null ? centsPerTokenToCostPer1M(payg.cache_write_input_token.price) : void 0
|
|
3887
|
+
};
|
|
3888
|
+
require_db.logger.debug(`[Pricing] Cached pricing for ${provider}/${model}: input=$${pricing.inputCostPer1M}/1M, output=$${pricing.outputCostPer1M}/1M`);
|
|
3889
|
+
return pricing;
|
|
3825
3890
|
} catch (error) {
|
|
3826
|
-
require_db.logger.error(`[Pricing] Failed to fetch pricing
|
|
3827
|
-
|
|
3891
|
+
require_db.logger.error(`[Pricing] Failed to fetch pricing for ${provider}/${model}: ${error instanceof Error ? error.message : String(error)}`);
|
|
3892
|
+
const cacheKey = this.getCacheKey(provider, model);
|
|
3893
|
+
const stale = this.cache.get(cacheKey);
|
|
3894
|
+
if (stale) {
|
|
3895
|
+
require_db.logger.debug(`[Pricing] Using stale cache for ${provider}/${model}`);
|
|
3896
|
+
return stale.pricing;
|
|
3897
|
+
}
|
|
3898
|
+
return null;
|
|
3828
3899
|
}
|
|
3829
3900
|
}
|
|
3830
3901
|
/**
|
|
3831
|
-
* Ensure cache is fresh, fetching if necessary
|
|
3832
|
-
*/
|
|
3833
|
-
async ensureFreshCache() {
|
|
3834
|
-
if (!(Date.now() - this.lastFetch > this.cacheTTL) && this.cache.size > 0) return;
|
|
3835
|
-
if (!this.fetchPromise) this.fetchPromise = this.fetchPricingData().finally(() => {
|
|
3836
|
-
this.fetchPromise = null;
|
|
3837
|
-
});
|
|
3838
|
-
await this.fetchPromise;
|
|
3839
|
-
}
|
|
3840
|
-
/**
|
|
3841
3902
|
* Get pricing for a specific model
|
|
3842
3903
|
*/
|
|
3843
3904
|
async getModelPricing(provider, model) {
|
|
3844
|
-
await this.ensureFreshCache();
|
|
3845
3905
|
const cacheKey = this.getCacheKey(provider, model);
|
|
3846
|
-
const
|
|
3847
|
-
if (
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
}
|
|
3855
|
-
|
|
3856
|
-
|
|
3906
|
+
const cached = this.cache.get(cacheKey);
|
|
3907
|
+
if (cached && Date.now() - cached.fetchedAt < this.cacheTTL) return cached.pricing;
|
|
3908
|
+
let pending = this.pendingFetches.get(cacheKey);
|
|
3909
|
+
if (!pending) {
|
|
3910
|
+
pending = this.fetchModelPricing(provider, model).then((pricing) => {
|
|
3911
|
+
this.cache.set(cacheKey, {
|
|
3912
|
+
pricing,
|
|
3913
|
+
fetchedAt: Date.now()
|
|
3914
|
+
});
|
|
3915
|
+
return pricing;
|
|
3916
|
+
}).finally(() => {
|
|
3917
|
+
this.pendingFetches.delete(cacheKey);
|
|
3918
|
+
});
|
|
3919
|
+
this.pendingFetches.set(cacheKey, pending);
|
|
3857
3920
|
}
|
|
3858
|
-
return
|
|
3921
|
+
return pending;
|
|
3859
3922
|
}
|
|
3860
3923
|
/**
|
|
3861
|
-
* Force refresh the pricing cache
|
|
3924
|
+
* Force refresh the pricing cache (clears all cached entries)
|
|
3862
3925
|
*/
|
|
3863
3926
|
async refreshCache() {
|
|
3864
|
-
this.
|
|
3865
|
-
await this.ensureFreshCache();
|
|
3927
|
+
this.cache.clear();
|
|
3866
3928
|
}
|
|
3867
3929
|
/**
|
|
3868
|
-
*
|
|
3930
|
+
* Always ready — no bulk pre-fetch needed
|
|
3869
3931
|
*/
|
|
3870
3932
|
isReady() {
|
|
3871
|
-
return
|
|
3933
|
+
return true;
|
|
3872
3934
|
}
|
|
3873
3935
|
/**
|
|
3874
3936
|
* Get the number of cached models (for debugging)
|
|
@@ -3882,7 +3944,7 @@ let defaultProvider = null;
|
|
|
3882
3944
|
* Get the default pricing provider instance
|
|
3883
3945
|
*/
|
|
3884
3946
|
function getDefaultPricingProvider() {
|
|
3885
|
-
if (!defaultProvider) defaultProvider = new
|
|
3947
|
+
if (!defaultProvider) defaultProvider = new LLMOpsPricingProvider();
|
|
3886
3948
|
return defaultProvider;
|
|
3887
3949
|
}
|
|
3888
3950
|
|
|
@@ -4267,14 +4329,15 @@ exports.LLMOPS_SPAN_NAME_HEADER = LLMOPS_SPAN_NAME_HEADER;
|
|
|
4267
4329
|
exports.LLMOPS_TRACE_ID_HEADER = LLMOPS_TRACE_ID_HEADER;
|
|
4268
4330
|
exports.LLMOPS_TRACE_NAME_HEADER = LLMOPS_TRACE_NAME_HEADER;
|
|
4269
4331
|
exports.LLMOPS_USER_ID_HEADER = LLMOPS_USER_ID_HEADER;
|
|
4332
|
+
exports.LLMOpsPricingProvider = LLMOpsPricingProvider;
|
|
4270
4333
|
exports.MS = MS;
|
|
4271
4334
|
exports.ManifestBuilder = ManifestBuilder;
|
|
4272
4335
|
exports.ManifestRouter = ManifestRouter;
|
|
4273
4336
|
exports.ManifestService = ManifestService;
|
|
4274
4337
|
exports.MemoryCacheBackend = MemoryCacheBackend;
|
|
4275
|
-
exports.ModelsDevPricingProvider = ModelsDevPricingProvider;
|
|
4276
4338
|
exports.SCHEMA_METADATA = require_db.SCHEMA_METADATA;
|
|
4277
4339
|
exports.SupportedProviders = SupportedProviders;
|
|
4340
|
+
exports.calculateCacheAwareCost = calculateCacheAwareCost;
|
|
4278
4341
|
exports.calculateCost = calculateCost;
|
|
4279
4342
|
exports.chatCompletionCreateParamsBaseSchema = chatCompletionCreateParamsBaseSchema;
|
|
4280
4343
|
exports.configVariantsSchema = require_db.configVariantsSchema;
|
package/dist/index.d.cts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as SCHEMA_METADATA, A as DatasetsTable, At as playgroundsSchema, B as LLMRequest, Bt as workspaceSettingsSchema, C as Dataset, Ct as environmentSecretsSchema, D as DatasetVersionRecord, Dt as playgroundColumnSchema, E as DatasetVersion, Et as llmRequestsSchema, F as GuardrailConfig, Ft as spansSchema, G as PlaygroundResultsTable, H as Playground, I as GuardrailConfigsTable, It as targetingRulesSchema, J as PlaygroundsTable, K as PlaygroundRun, L as GuardrailResult, Lt as tracesSchema, M as EnvironmentSecret, Mt as providerGuardrailOverridesSchema, N as EnvironmentSecretsTable, Nt as schemas, O as DatasetVersionRecordsTable, Ot as playgroundResultsSchema, P as EnvironmentsTable, Pt as spanEventsSchema, Q as ProviderGuardrailOverridesTable, R as GuardrailResults, Rt as variantVersionsSchema, S as Database, St as datasetsSchema, T as DatasetRecordsTable, Tt as guardrailConfigsSchema, U as PlaygroundColumn, V as LLMRequestsTable, W as PlaygroundResult, X as ProviderConfigsTable, Y as ProviderConfig, Z as ProviderGuardrailOverride, _ as validateTableData, _t as configVariantsSchema, a as createDatabaseFromConnection, at as TableName, b as ConfigVariantsTable, bt as datasetVersionRecordsSchema, c as executeWithSchema, ct as Trace, d as getMigrations, dt as Variant, et as Selectable, f as matchType, ft as VariantVersion, g as validatePartialTableData, gt as WorkspaceSettingsTable, h as parseTableData, ht as WorkspaceSettings, i as createDatabase, it as SpansTable, j as Environment, jt as providerConfigsSchema, k as DatasetVersionsTable, kt as playgroundRunsSchema, l as MigrationOptions, lt as TracesTable, m as parsePartialTableData, mt as VariantsTable, n as DatabaseOptions, nt as SpanEvent, o as detectDatabaseType, ot as TargetingRule, p as runAutoMigrations, pt as VariantVersionsTable, q as PlaygroundRunsTable, r as DatabaseType, rt as SpanEventsTable, s as createNeonDialect, st as TargetingRulesTable, t as DatabaseConnection, tt as Span, u as MigrationResult, ut as Updateable, v as Config, vt as configsSchema, w as DatasetRecord, wt as environmentsSchema, x as ConfigsTable, xt as datasetVersionsSchema, y as ConfigVariant, yt as datasetRecordsSchema, z as Insertable, zt as variantsSchema } from "./index-
|
|
1
|
+
import { $ as SCHEMA_METADATA, A as DatasetsTable, At as playgroundsSchema, B as LLMRequest, Bt as workspaceSettingsSchema, C as Dataset, Ct as environmentSecretsSchema, D as DatasetVersionRecord, Dt as playgroundColumnSchema, E as DatasetVersion, Et as llmRequestsSchema, F as GuardrailConfig, Ft as spansSchema, G as PlaygroundResultsTable, H as Playground, I as GuardrailConfigsTable, It as targetingRulesSchema, J as PlaygroundsTable, K as PlaygroundRun, L as GuardrailResult, Lt as tracesSchema, M as EnvironmentSecret, Mt as providerGuardrailOverridesSchema, N as EnvironmentSecretsTable, Nt as schemas, O as DatasetVersionRecordsTable, Ot as playgroundResultsSchema, P as EnvironmentsTable, Pt as spanEventsSchema, Q as ProviderGuardrailOverridesTable, R as GuardrailResults, Rt as variantVersionsSchema, S as Database, St as datasetsSchema, T as DatasetRecordsTable, Tt as guardrailConfigsSchema, U as PlaygroundColumn, V as LLMRequestsTable, W as PlaygroundResult, X as ProviderConfigsTable, Y as ProviderConfig, Z as ProviderGuardrailOverride, _ as validateTableData, _t as configVariantsSchema, a as createDatabaseFromConnection, at as TableName, b as ConfigVariantsTable, bt as datasetVersionRecordsSchema, c as executeWithSchema, ct as Trace, d as getMigrations, dt as Variant, et as Selectable, f as matchType, ft as VariantVersion, g as validatePartialTableData, gt as WorkspaceSettingsTable, h as parseTableData, ht as WorkspaceSettings, i as createDatabase, it as SpansTable, j as Environment, jt as providerConfigsSchema, k as DatasetVersionsTable, kt as playgroundRunsSchema, l as MigrationOptions, lt as TracesTable, m as parsePartialTableData, mt as VariantsTable, n as DatabaseOptions, nt as SpanEvent, o as detectDatabaseType, ot as TargetingRule, p as runAutoMigrations, pt as VariantVersionsTable, q as PlaygroundRunsTable, r as DatabaseType, rt as SpanEventsTable, s as createNeonDialect, st as TargetingRulesTable, t as DatabaseConnection, tt as Span, u as MigrationResult, ut as Updateable, v as Config, vt as configsSchema, w as DatasetRecord, wt as environmentsSchema, x as ConfigsTable, xt as datasetVersionsSchema, y as ConfigVariant, yt as datasetRecordsSchema, z as Insertable, zt as variantsSchema } from "./index-CyVFWq76.cjs";
|
|
2
2
|
import * as kysely0 from "kysely";
|
|
3
3
|
import { Kysely } from "kysely";
|
|
4
4
|
import z$1, { z } from "zod";
|
|
@@ -1259,13 +1259,13 @@ declare const createConfigDataLayer: (db: Kysely<Database>) => {
|
|
|
1259
1259
|
modelName: string | null;
|
|
1260
1260
|
jsonData: Record<string, unknown> | null;
|
|
1261
1261
|
variantVersionId: string | null;
|
|
1262
|
-
id: string;
|
|
1263
1262
|
slug: string;
|
|
1264
1263
|
name: string | undefined;
|
|
1264
|
+
id: string;
|
|
1265
1265
|
createdAt: Date;
|
|
1266
1266
|
updatedAt: Date;
|
|
1267
|
-
variantName: string | null;
|
|
1268
1267
|
variantId: string | null;
|
|
1268
|
+
variantName: string | null;
|
|
1269
1269
|
}[]>;
|
|
1270
1270
|
};
|
|
1271
1271
|
//#endregion
|
|
@@ -1370,13 +1370,13 @@ declare const createConfigVariantDataLayer: (db: Kysely<Database>) => {
|
|
|
1370
1370
|
modelName: string;
|
|
1371
1371
|
jsonData: Record<string, unknown>;
|
|
1372
1372
|
} | null;
|
|
1373
|
+
configId: string;
|
|
1373
1374
|
id: string;
|
|
1374
1375
|
createdAt: Date;
|
|
1375
1376
|
updatedAt: Date;
|
|
1376
1377
|
variantId: string;
|
|
1377
|
-
configId: string;
|
|
1378
|
-
configName: string | null | undefined;
|
|
1379
1378
|
variantName: string | null;
|
|
1379
|
+
configName: string | null | undefined;
|
|
1380
1380
|
} | undefined>;
|
|
1381
1381
|
/**
|
|
1382
1382
|
* Get config variants with details including latest version data
|
|
@@ -1395,11 +1395,11 @@ declare const createConfigVariantDataLayer: (db: Kysely<Database>) => {
|
|
|
1395
1395
|
modelName: string;
|
|
1396
1396
|
jsonData: Record<string, unknown>;
|
|
1397
1397
|
} | null;
|
|
1398
|
+
configId: string;
|
|
1398
1399
|
id: string;
|
|
1399
1400
|
createdAt: Date;
|
|
1400
1401
|
updatedAt: Date;
|
|
1401
1402
|
variantId: string;
|
|
1402
|
-
configId: string;
|
|
1403
1403
|
name: string | null;
|
|
1404
1404
|
}[]>;
|
|
1405
1405
|
/**
|
|
@@ -1956,7 +1956,9 @@ declare const insertLLMRequestSchema: z$1.ZodObject<{
|
|
|
1956
1956
|
completionTokens: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1957
1957
|
totalTokens: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1958
1958
|
cachedTokens: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1959
|
+
cacheCreationTokens: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1959
1960
|
cost: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1961
|
+
cacheSavings: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1960
1962
|
inputCost: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1961
1963
|
outputCost: z$1.ZodDefault<z$1.ZodNumber>;
|
|
1962
1964
|
endpoint: z$1.ZodString;
|
|
@@ -2069,6 +2071,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
|
|
|
2069
2071
|
requestId: string;
|
|
2070
2072
|
model: string;
|
|
2071
2073
|
cachedTokens: number;
|
|
2074
|
+
cacheCreationTokens: number;
|
|
2075
|
+
cacheSavings: number;
|
|
2072
2076
|
inputCost: number;
|
|
2073
2077
|
outputCost: number;
|
|
2074
2078
|
endpoint: string;
|
|
@@ -2114,6 +2118,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
|
|
|
2114
2118
|
requestId: string;
|
|
2115
2119
|
model: string;
|
|
2116
2120
|
cachedTokens: number;
|
|
2121
|
+
cacheCreationTokens: number;
|
|
2122
|
+
cacheSavings: number;
|
|
2117
2123
|
inputCost: number;
|
|
2118
2124
|
outputCost: number;
|
|
2119
2125
|
endpoint: string;
|
|
@@ -2161,6 +2167,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
|
|
|
2161
2167
|
requestId: string;
|
|
2162
2168
|
model: string;
|
|
2163
2169
|
cachedTokens: number;
|
|
2170
|
+
cacheCreationTokens: number;
|
|
2171
|
+
cacheSavings: number;
|
|
2164
2172
|
inputCost: number;
|
|
2165
2173
|
outputCost: number;
|
|
2166
2174
|
endpoint: string;
|
|
@@ -2194,6 +2202,8 @@ declare const createLLMRequestsDataLayer: (db: Kysely<Database>) => {
|
|
|
2194
2202
|
totalPromptTokens: number;
|
|
2195
2203
|
totalCompletionTokens: number;
|
|
2196
2204
|
totalTokens: number;
|
|
2205
|
+
totalCachedTokens: number;
|
|
2206
|
+
totalCacheSavings: number;
|
|
2197
2207
|
requestCount: number;
|
|
2198
2208
|
} | undefined>;
|
|
2199
2209
|
/**
|
|
@@ -3787,8 +3797,10 @@ interface UsageData {
|
|
|
3787
3797
|
completionTokens: number;
|
|
3788
3798
|
/** Total tokens (prompt + completion) */
|
|
3789
3799
|
totalTokens?: number;
|
|
3790
|
-
/** Number of
|
|
3800
|
+
/** Number of cache read tokens (OpenAI cached_tokens / Anthropic cache_read_input_tokens) */
|
|
3791
3801
|
cachedTokens?: number;
|
|
3802
|
+
/** Number of cache creation tokens (Anthropic cache_creation_input_tokens) */
|
|
3803
|
+
cacheCreationTokens?: number;
|
|
3792
3804
|
/** Number of reasoning tokens (optional, for models like o1) */
|
|
3793
3805
|
reasoningTokens?: number;
|
|
3794
3806
|
}
|
|
@@ -3804,6 +3816,8 @@ interface CostResult {
|
|
|
3804
3816
|
inputCost: number;
|
|
3805
3817
|
/** Output/completion cost in micro-dollars */
|
|
3806
3818
|
outputCost: number;
|
|
3819
|
+
/** Cost saved by cache hits in micro-dollars (negative means cache write premium exceeded savings) */
|
|
3820
|
+
cacheSavings: number;
|
|
3807
3821
|
}
|
|
3808
3822
|
/**
|
|
3809
3823
|
* Provider for fetching model pricing data
|
|
@@ -3850,6 +3864,19 @@ interface PricingProvider {
|
|
|
3850
3864
|
* ```
|
|
3851
3865
|
*/
|
|
3852
3866
|
declare function calculateCost(usage: UsageData, pricing: ModelPricing): CostResult;
|
|
3867
|
+
/**
|
|
3868
|
+
* Calculate cache-aware cost of an LLM request in micro-dollars.
|
|
3869
|
+
*
|
|
3870
|
+
* Splits input tokens into uncached, cache-read, and cache-creation buckets,
|
|
3871
|
+
* each priced at different rates. Falls back to provider-specific multipliers
|
|
3872
|
+
* when models.dev doesn't provide cache pricing.
|
|
3873
|
+
*
|
|
3874
|
+
* @param usage - Token usage data (with cachedTokens and cacheCreationTokens)
|
|
3875
|
+
* @param pricing - Model pricing (may include cacheReadCostPer1M / cacheWriteCostPer1M)
|
|
3876
|
+
* @param provider - Provider name for fallback rate selection
|
|
3877
|
+
* @returns Cost breakdown in micro-dollars
|
|
3878
|
+
*/
|
|
3879
|
+
declare function calculateCacheAwareCost(usage: UsageData, pricing: ModelPricing, provider?: string): CostResult;
|
|
3853
3880
|
/**
|
|
3854
3881
|
* Convert micro-dollars to dollars
|
|
3855
3882
|
*
|
|
@@ -3893,47 +3920,38 @@ declare function formatCost(microDollars: number, decimals?: number): string;
|
|
|
3893
3920
|
//#endregion
|
|
3894
3921
|
//#region src/pricing/provider.d.ts
|
|
3895
3922
|
/**
|
|
3896
|
-
* Pricing provider that fetches data from
|
|
3923
|
+
* Pricing provider that fetches per-model data from the LLMOps Models API.
|
|
3897
3924
|
*
|
|
3898
3925
|
* Features:
|
|
3899
|
-
* -
|
|
3900
|
-
* -
|
|
3901
|
-
* -
|
|
3926
|
+
* - Per-model in-memory cache with configurable TTL (default 5 minutes)
|
|
3927
|
+
* - Deduplicates concurrent fetches for the same model
|
|
3928
|
+
* - Caches null results (404s) to avoid repeated lookups
|
|
3929
|
+
* - Falls back to stale cache on fetch errors
|
|
3902
3930
|
*/
|
|
3903
|
-
declare class
|
|
3931
|
+
declare class LLMOpsPricingProvider implements PricingProvider {
|
|
3904
3932
|
private cache;
|
|
3905
|
-
private
|
|
3933
|
+
private pendingFetches;
|
|
3906
3934
|
private cacheTTL;
|
|
3907
|
-
private
|
|
3908
|
-
|
|
3909
|
-
|
|
3910
|
-
|
|
3911
|
-
|
|
3912
|
-
* @param cacheTTL - Cache TTL in milliseconds (default: 5 minutes)
|
|
3913
|
-
*/
|
|
3914
|
-
constructor(cacheTTL?: number);
|
|
3915
|
-
/**
|
|
3916
|
-
* Generate a cache key for a provider/model combination
|
|
3917
|
-
*/
|
|
3935
|
+
private baseUrl;
|
|
3936
|
+
constructor(options?: {
|
|
3937
|
+
cacheTTL?: number;
|
|
3938
|
+
baseUrl?: string;
|
|
3939
|
+
});
|
|
3918
3940
|
private getCacheKey;
|
|
3919
3941
|
/**
|
|
3920
|
-
* Fetch pricing
|
|
3942
|
+
* Fetch pricing for a single model from the API
|
|
3921
3943
|
*/
|
|
3922
|
-
private
|
|
3923
|
-
/**
|
|
3924
|
-
* Ensure cache is fresh, fetching if necessary
|
|
3925
|
-
*/
|
|
3926
|
-
private ensureFreshCache;
|
|
3944
|
+
private fetchModelPricing;
|
|
3927
3945
|
/**
|
|
3928
3946
|
* Get pricing for a specific model
|
|
3929
3947
|
*/
|
|
3930
3948
|
getModelPricing(provider: string, model: string): Promise<ModelPricing | null>;
|
|
3931
3949
|
/**
|
|
3932
|
-
* Force refresh the pricing cache
|
|
3950
|
+
* Force refresh the pricing cache (clears all cached entries)
|
|
3933
3951
|
*/
|
|
3934
3952
|
refreshCache(): Promise<void>;
|
|
3935
3953
|
/**
|
|
3936
|
-
*
|
|
3954
|
+
* Always ready — no bulk pre-fetch needed
|
|
3937
3955
|
*/
|
|
3938
3956
|
isReady(): boolean;
|
|
3939
3957
|
/**
|
|
@@ -3944,7 +3962,7 @@ declare class ModelsDevPricingProvider implements PricingProvider {
|
|
|
3944
3962
|
/**
|
|
3945
3963
|
* Get the default pricing provider instance
|
|
3946
3964
|
*/
|
|
3947
|
-
declare function getDefaultPricingProvider():
|
|
3965
|
+
declare function getDefaultPricingProvider(): LLMOpsPricingProvider;
|
|
3948
3966
|
//#endregion
|
|
3949
3967
|
//#region src/auth/get-auth-client-options.d.ts
|
|
3950
3968
|
interface AuthClientDatabaseConfig {
|
|
@@ -4174,4 +4192,4 @@ declare class ManifestRouter {
|
|
|
4174
4192
|
routeWithWeights(configIdOrSlug: string, environmentId: string, context?: RoutingContext): RoutingResult | null;
|
|
4175
4193
|
}
|
|
4176
4194
|
//#endregion
|
|
4177
|
-
export { type AnthropicProviderConfig, type AnyProviderConfig, AuthClientDatabaseConfig, AuthClientOptions, type AzureAIProviderConfig, type AzureOpenAIProviderConfig, BaseCacheConfig, type BaseProviderConfig, type BedrockProviderConfig, COST_SUMMARY_GROUP_BY, CacheBackend, CacheBackendType, CacheConfig, CacheEntry, CacheOptions, CacheService, CacheStats, ChatCompletionCreateParamsBase, Config, ConfigVariant, ConfigVariantsDataLayer, ConfigVariantsTable, ConfigsDataLayer, ConfigsTable, type CortexProviderConfig, CostResult, CostSummaryGroupBy, DEFAULT_PROVIDER_ENV_VARS, DataLayer, Database, DatabaseConnection, DatabaseOptions, DatabaseType, Dataset, DatasetRecord, DatasetRecordsTable, DatasetVersion, DatasetVersionRecord, DatasetVersionRecordsTable, DatasetVersionsTable, DatasetsDataLayer, DatasetsTable, Environment, EnvironmentSecret, EnvironmentSecretsDataLayer, EnvironmentSecretsTable, EnvironmentsDataLayer, EnvironmentsTable, FileCacheBackend, FileCacheConfig, type FireworksAIProviderConfig, GatewayManifest, type GoogleProviderConfig, GuardrailConfig, GuardrailConfigsDataLayer, GuardrailConfigsTable, GuardrailResult, GuardrailResults, type HuggingFaceProviderConfig, type InlineProviderConfig, type InlineProvidersConfig, Insertable, LLMOPS_INTERNAL_HEADER, LLMOPS_REQUEST_ID_HEADER, LLMOPS_SESSION_ID_HEADER, LLMOPS_SPAN_ID_HEADER, LLMOPS_SPAN_NAME_HEADER, LLMOPS_TRACE_ID_HEADER, LLMOPS_TRACE_NAME_HEADER, LLMOPS_USER_ID_HEADER, LLMOpsClient, LLMOpsConfig, type LLMOpsConfigInput, LLMRequest, LLMRequestInsert, LLMRequestsDataLayer, LLMRequestsTable, MS, ManifestBuilder, ManifestConfig, ManifestEnvironment, ManifestGuardrail, ManifestProviderGuardrailOverride, ManifestRouter, ManifestService, ManifestTargetingRule, ManifestVariantVersion, MemoryCacheBackend, MemoryCacheConfig, MigrationOptions, MigrationResult, type MistralAIProviderConfig, ModelPricing,
|
|
4195
|
+
export { type AnthropicProviderConfig, type AnyProviderConfig, AuthClientDatabaseConfig, AuthClientOptions, type AzureAIProviderConfig, type AzureOpenAIProviderConfig, BaseCacheConfig, type BaseProviderConfig, type BedrockProviderConfig, COST_SUMMARY_GROUP_BY, CacheBackend, CacheBackendType, CacheConfig, CacheEntry, CacheOptions, CacheService, CacheStats, ChatCompletionCreateParamsBase, Config, ConfigVariant, ConfigVariantsDataLayer, ConfigVariantsTable, ConfigsDataLayer, ConfigsTable, type CortexProviderConfig, CostResult, CostSummaryGroupBy, DEFAULT_PROVIDER_ENV_VARS, DataLayer, Database, DatabaseConnection, DatabaseOptions, DatabaseType, Dataset, DatasetRecord, DatasetRecordsTable, DatasetVersion, DatasetVersionRecord, DatasetVersionRecordsTable, DatasetVersionsTable, DatasetsDataLayer, DatasetsTable, Environment, EnvironmentSecret, EnvironmentSecretsDataLayer, EnvironmentSecretsTable, EnvironmentsDataLayer, EnvironmentsTable, FileCacheBackend, FileCacheConfig, type FireworksAIProviderConfig, GatewayManifest, type GoogleProviderConfig, GuardrailConfig, GuardrailConfigsDataLayer, GuardrailConfigsTable, GuardrailResult, GuardrailResults, type HuggingFaceProviderConfig, type InlineProviderConfig, type InlineProvidersConfig, Insertable, LLMOPS_INTERNAL_HEADER, LLMOPS_REQUEST_ID_HEADER, LLMOPS_SESSION_ID_HEADER, LLMOPS_SPAN_ID_HEADER, LLMOPS_SPAN_NAME_HEADER, LLMOPS_TRACE_ID_HEADER, LLMOPS_TRACE_NAME_HEADER, LLMOPS_USER_ID_HEADER, LLMOpsClient, LLMOpsConfig, type LLMOpsConfigInput, LLMOpsPricingProvider, LLMRequest, LLMRequestInsert, LLMRequestsDataLayer, LLMRequestsTable, MS, ManifestBuilder, ManifestConfig, ManifestEnvironment, ManifestGuardrail, ManifestProviderGuardrailOverride, ManifestRouter, ManifestService, ManifestTargetingRule, ManifestVariantVersion, MemoryCacheBackend, MemoryCacheConfig, MigrationOptions, MigrationResult, type MistralAIProviderConfig, ModelPricing, type OpenAIProviderConfig, type OracleProviderConfig, Playground, PlaygroundColumn, PlaygroundResult, PlaygroundResultsDataLayer, PlaygroundResultsTable, PlaygroundRun, PlaygroundRunsDataLayer, PlaygroundRunsTable, PlaygroundsDataLayer, PlaygroundsTable, Prettify, PricingProvider, ProviderConfig, type ProviderConfigMap, ProviderConfigsDataLayer, ProviderConfigsTable, ProviderGuardrailOverride, ProviderGuardrailOverridesDataLayer, ProviderGuardrailOverridesTable, type ProvidersConfig, RoutingContext, RoutingResult, SCHEMA_METADATA, type SagemakerProviderConfig, Selectable, Span, SpanEvent, SpanEventInsert, SpanEventsTable, SpanInsert, SpansTable, type StabilityAIProviderConfig, SupportedProviders, TableName, TargetingRule, TargetingRulesDataLayer, TargetingRulesTable, Trace, TraceUpsert, TracesDataLayer, TracesTable, Updateable, UsageData, type ValidatedLLMOpsConfig, Variant, VariantJsonData, VariantVersion, VariantVersionsDataLayer, VariantVersionsTable, VariantsDataLayer, VariantsTable, type VertexAIProviderConfig, type WorkersAIProviderConfig, WorkspaceSettings, WorkspaceSettingsDataLayer, WorkspaceSettingsTable, calculateCacheAwareCost, calculateCost, chatCompletionCreateParamsBaseSchema, configVariantsSchema, configsSchema, createConfigDataLayer, createConfigVariantDataLayer, createDataLayer, createDatabase, createDatabaseFromConnection, createDatasetsDataLayer, createEnvironmentDataLayer, createEnvironmentSecretDataLayer, createGuardrailConfigsDataLayer, createLLMRequestsDataLayer, createNeonDialect, createPlaygroundDataLayer, createPlaygroundResultsDataLayer, createPlaygroundRunsDataLayer, createProviderConfigsDataLayer, createProviderGuardrailOverridesDataLayer, createTargetingRulesDataLayer, createTracesDataLayer, createVariantDataLayer, createVariantVersionsDataLayer, createWorkspaceSettingsDataLayer, datasetRecordsSchema, datasetVersionRecordsSchema, datasetVersionsSchema, datasetsSchema, detectDatabaseType, dollarsToMicroDollars, environmentSecretsSchema, environmentsSchema, executeWithSchema, formatCost, gateway, generateId, getAuthClientOptions, getDefaultPricingProvider, getDefaultProviders, getMigrations, guardrailConfigsSchema, llmRequestsSchema, llmopsConfigSchema, logger, matchType, mergeWithDefaultProviders, microDollarsToDollars, parsePartialTableData, parseTableData, playgroundColumnSchema, playgroundResultsSchema, playgroundRunsSchema, playgroundsSchema, providerConfigsSchema, providerGuardrailOverridesSchema, runAutoMigrations, schemas, spanEventsSchema, spansSchema, targetingRulesSchema, tracesSchema, validateLLMOpsConfig, validatePartialTableData, validateTableData, variantJsonDataSchema, variantVersionsSchema, variantsSchema, workspaceSettingsSchema };
|