@fallom/trace 0.2.6 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,305 @@
1
+ import {
2
+ __export
3
+ } from "./chunk-7P6ASYW6.mjs";
4
+
5
+ // src/models.ts
6
+ var models_exports = {};
7
+ __export(models_exports, {
8
+ get: () => get,
9
+ init: () => init
10
+ });
11
+ import { createHash } from "crypto";
12
+ var apiKey = null;
13
+ var baseUrl = "https://configs.fallom.com";
14
+ var initialized = false;
15
+ var syncInterval = null;
16
+ var debugMode = false;
17
+ var configCache = /* @__PURE__ */ new Map();
18
+ var SYNC_TIMEOUT = 2e3;
19
+ var RECORD_TIMEOUT = 1e3;
20
+ function log(msg) {
21
+ if (debugMode) {
22
+ console.log(`[Fallom] ${msg}`);
23
+ }
24
+ }
25
+ function evaluateTargeting(targeting, customerId, context) {
26
+ if (!targeting || targeting.enabled === false) {
27
+ return null;
28
+ }
29
+ const evalContext = {
30
+ ...context || {},
31
+ ...customerId ? { customerId } : {}
32
+ };
33
+ log(`Evaluating targeting with context: ${JSON.stringify(evalContext)}`);
34
+ if (targeting.individualTargets) {
35
+ for (const target of targeting.individualTargets) {
36
+ const fieldValue = evalContext[target.field];
37
+ if (fieldValue === target.value) {
38
+ log(`Individual target matched: ${target.field}=${target.value} -> variant ${target.variantIndex}`);
39
+ return target.variantIndex;
40
+ }
41
+ }
42
+ }
43
+ if (targeting.rules) {
44
+ for (const rule of targeting.rules) {
45
+ const allConditionsMatch = rule.conditions.every((condition) => {
46
+ const fieldValue = evalContext[condition.field];
47
+ if (fieldValue === void 0) return false;
48
+ switch (condition.operator) {
49
+ case "eq":
50
+ return fieldValue === condition.value;
51
+ case "neq":
52
+ return fieldValue !== condition.value;
53
+ case "in":
54
+ return Array.isArray(condition.value) && condition.value.includes(fieldValue);
55
+ case "nin":
56
+ return Array.isArray(condition.value) && !condition.value.includes(fieldValue);
57
+ case "contains":
58
+ return typeof condition.value === "string" && fieldValue.includes(condition.value);
59
+ case "startsWith":
60
+ return typeof condition.value === "string" && fieldValue.startsWith(condition.value);
61
+ case "endsWith":
62
+ return typeof condition.value === "string" && fieldValue.endsWith(condition.value);
63
+ default:
64
+ return false;
65
+ }
66
+ });
67
+ if (allConditionsMatch) {
68
+ log(`Rule matched: ${JSON.stringify(rule.conditions)} -> variant ${rule.variantIndex}`);
69
+ return rule.variantIndex;
70
+ }
71
+ }
72
+ }
73
+ log("No targeting rules matched, falling back to weighted random");
74
+ return null;
75
+ }
76
+ function init(options = {}) {
77
+ apiKey = options.apiKey || process.env.FALLOM_API_KEY || null;
78
+ baseUrl = options.baseUrl || process.env.FALLOM_CONFIGS_URL || process.env.FALLOM_BASE_URL || "https://configs.fallom.com";
79
+ initialized = true;
80
+ if (!apiKey) {
81
+ return;
82
+ }
83
+ fetchConfigs().catch(() => {
84
+ });
85
+ if (!syncInterval) {
86
+ syncInterval = setInterval(() => {
87
+ fetchConfigs().catch(() => {
88
+ });
89
+ }, 3e4);
90
+ syncInterval.unref();
91
+ }
92
+ }
93
+ function ensureInit() {
94
+ if (!initialized) {
95
+ try {
96
+ init();
97
+ } catch {
98
+ }
99
+ }
100
+ }
101
+ async function fetchConfigs(timeout = SYNC_TIMEOUT) {
102
+ if (!apiKey) {
103
+ log("_fetchConfigs: No API key, skipping");
104
+ return;
105
+ }
106
+ try {
107
+ log(`Fetching configs from ${baseUrl}/configs`);
108
+ const controller = new AbortController();
109
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
110
+ const resp = await fetch(`${baseUrl}/configs`, {
111
+ headers: { Authorization: `Bearer ${apiKey}` },
112
+ signal: controller.signal
113
+ });
114
+ clearTimeout(timeoutId);
115
+ log(`Response status: ${resp.status}`);
116
+ if (resp.ok) {
117
+ const data = await resp.json();
118
+ const configs = data.configs || [];
119
+ log(`Got ${configs.length} configs: ${configs.map((c) => c.key)}`);
120
+ for (const c of configs) {
121
+ const key = c.key;
122
+ const version = c.version || 1;
123
+ log(`Config '${key}' v${version}: ${JSON.stringify(c.variants)}`);
124
+ if (!configCache.has(key)) {
125
+ configCache.set(key, { versions: /* @__PURE__ */ new Map(), latest: null });
126
+ }
127
+ const cached = configCache.get(key);
128
+ cached.versions.set(version, c);
129
+ cached.latest = version;
130
+ }
131
+ } else {
132
+ log(`Fetch failed: ${resp.statusText}`);
133
+ }
134
+ } catch (e) {
135
+ log(`Fetch exception: ${e}`);
136
+ }
137
+ }
138
+ async function fetchSpecificVersion(configKey, version, timeout = SYNC_TIMEOUT) {
139
+ if (!apiKey) return null;
140
+ try {
141
+ const controller = new AbortController();
142
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
143
+ const resp = await fetch(
144
+ `${baseUrl}/configs/${configKey}/version/${version}`,
145
+ {
146
+ headers: { Authorization: `Bearer ${apiKey}` },
147
+ signal: controller.signal
148
+ }
149
+ );
150
+ clearTimeout(timeoutId);
151
+ if (resp.ok) {
152
+ const config = await resp.json();
153
+ if (!configCache.has(configKey)) {
154
+ configCache.set(configKey, { versions: /* @__PURE__ */ new Map(), latest: null });
155
+ }
156
+ configCache.get(configKey).versions.set(version, config);
157
+ return config;
158
+ }
159
+ } catch {
160
+ }
161
+ return null;
162
+ }
163
+ async function get(configKey, sessionId, options = {}) {
164
+ const { version, fallback, customerId, context, debug = false } = options;
165
+ debugMode = debug;
166
+ ensureInit();
167
+ log(
168
+ `get() called: configKey=${configKey}, sessionId=${sessionId}, fallback=${fallback}`
169
+ );
170
+ try {
171
+ let configData = configCache.get(configKey);
172
+ log(
173
+ `Cache lookup for '${configKey}': ${configData ? "found" : "not found"}`
174
+ );
175
+ if (!configData) {
176
+ log("Not in cache, fetching...");
177
+ await fetchConfigs(SYNC_TIMEOUT);
178
+ configData = configCache.get(configKey);
179
+ log(
180
+ `After fetch, cache lookup: ${configData ? "found" : "still not found"}`
181
+ );
182
+ }
183
+ if (!configData) {
184
+ log(`Config not found, using fallback: ${fallback}`);
185
+ if (fallback) {
186
+ console.warn(
187
+ `[Fallom WARNING] Config '${configKey}' not found, using fallback model: ${fallback}`
188
+ );
189
+ return returnModel(configKey, sessionId, fallback, 0);
190
+ }
191
+ throw new Error(
192
+ `Config '${configKey}' not found. Check that it exists in your Fallom dashboard.`
193
+ );
194
+ }
195
+ let config;
196
+ let targetVersion;
197
+ if (version !== void 0) {
198
+ config = configData.versions.get(version);
199
+ if (!config) {
200
+ config = await fetchSpecificVersion(configKey, version, SYNC_TIMEOUT) || void 0;
201
+ }
202
+ if (!config) {
203
+ if (fallback) {
204
+ console.warn(
205
+ `[Fallom WARNING] Config '${configKey}' version ${version} not found, using fallback: ${fallback}`
206
+ );
207
+ return returnModel(configKey, sessionId, fallback, 0);
208
+ }
209
+ throw new Error(`Config '${configKey}' version ${version} not found.`);
210
+ }
211
+ targetVersion = version;
212
+ } else {
213
+ targetVersion = configData.latest;
214
+ config = configData.versions.get(targetVersion);
215
+ if (!config) {
216
+ if (fallback) {
217
+ console.warn(
218
+ `[Fallom WARNING] Config '${configKey}' has no cached version, using fallback: ${fallback}`
219
+ );
220
+ return returnModel(configKey, sessionId, fallback, 0);
221
+ }
222
+ throw new Error(`Config '${configKey}' has no cached version.`);
223
+ }
224
+ }
225
+ const variantsRaw = config.variants;
226
+ const configVersion = config.version || targetVersion;
227
+ const variants = Array.isArray(variantsRaw) ? variantsRaw : Object.values(variantsRaw);
228
+ log(
229
+ `Config found! Version: ${configVersion}, Variants: ${JSON.stringify(
230
+ variants
231
+ )}`
232
+ );
233
+ const targetedVariantIndex = evaluateTargeting(config.targeting, customerId, context);
234
+ if (targetedVariantIndex !== null && variants[targetedVariantIndex]) {
235
+ const assignedModel2 = variants[targetedVariantIndex].model;
236
+ log(`\u2705 Assigned model via targeting: ${assignedModel2}`);
237
+ return returnModel(configKey, sessionId, assignedModel2, configVersion);
238
+ }
239
+ const hashBytes = createHash("md5").update(sessionId).digest();
240
+ const hashVal = hashBytes.readUInt32BE(0) % 1e6;
241
+ log(`Session hash: ${hashVal} (out of 1,000,000)`);
242
+ let cumulative = 0;
243
+ let assignedModel = variants[variants.length - 1].model;
244
+ for (const v of variants) {
245
+ const oldCumulative = cumulative;
246
+ cumulative += v.weight * 1e4;
247
+ log(
248
+ `Variant ${v.model}: weight=${v.weight}%, range=${oldCumulative}-${cumulative}, hash=${hashVal}, match=${hashVal < cumulative}`
249
+ );
250
+ if (hashVal < cumulative) {
251
+ assignedModel = v.model;
252
+ break;
253
+ }
254
+ }
255
+ log(`\u2705 Assigned model via weighted random: ${assignedModel}`);
256
+ return returnModel(configKey, sessionId, assignedModel, configVersion);
257
+ } catch (e) {
258
+ if (e instanceof Error && e.message.includes("not found")) {
259
+ throw e;
260
+ }
261
+ if (fallback) {
262
+ console.warn(
263
+ `[Fallom WARNING] Error getting model for '${configKey}': ${e}. Using fallback: ${fallback}`
264
+ );
265
+ return returnModel(configKey, sessionId, fallback, 0);
266
+ }
267
+ throw e;
268
+ }
269
+ }
270
+ function returnModel(configKey, sessionId, model, version) {
271
+ if (version > 0) {
272
+ recordSession(configKey, version, sessionId, model).catch(() => {
273
+ });
274
+ }
275
+ return model;
276
+ }
277
+ async function recordSession(configKey, version, sessionId, model) {
278
+ if (!apiKey) return;
279
+ try {
280
+ const controller = new AbortController();
281
+ const timeoutId = setTimeout(() => controller.abort(), RECORD_TIMEOUT);
282
+ await fetch(`${baseUrl}/sessions`, {
283
+ method: "POST",
284
+ headers: {
285
+ Authorization: `Bearer ${apiKey}`,
286
+ "Content-Type": "application/json"
287
+ },
288
+ body: JSON.stringify({
289
+ config_key: configKey,
290
+ config_version: version,
291
+ session_id: sessionId,
292
+ assigned_model: model
293
+ }),
294
+ signal: controller.signal
295
+ });
296
+ clearTimeout(timeoutId);
297
+ } catch {
298
+ }
299
+ }
300
+
301
+ export {
302
+ init,
303
+ get,
304
+ models_exports
305
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-2NGJF2JZ.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
package/dist/index.d.mts CHANGED
@@ -101,7 +101,7 @@ interface WrapAISDKOptions {
101
101
  * await generateText({ model: openai("gpt-4o"), prompt: "Hello!" });
102
102
  * ```
103
103
  */
104
- declare function init$3(options?: {
104
+ declare function init$4(options?: {
105
105
  apiKey?: string;
106
106
  baseUrl?: string;
107
107
  captureContent?: boolean;
@@ -223,7 +223,7 @@ type trace_WrapAISDKOptions = WrapAISDKOptions;
223
223
  declare const trace_session: typeof session;
224
224
  declare const trace_shutdown: typeof shutdown;
225
225
  declare namespace trace {
226
- export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$3 as init, trace_session as session, trace_shutdown as shutdown };
226
+ export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
227
227
  }
228
228
 
229
229
  /**
@@ -244,7 +244,7 @@ declare namespace trace {
244
244
  * This is optional - get() will auto-init if needed.
245
245
  * Non-blocking: starts background config fetch immediately.
246
246
  */
247
- declare function init$2(options?: {
247
+ declare function init$3(options?: {
248
248
  apiKey?: string;
249
249
  baseUrl?: string;
250
250
  }): void;
@@ -276,7 +276,7 @@ declare function get$1(configKey: string, sessionId: string, options?: {
276
276
  }): Promise<string>;
277
277
 
278
278
  declare namespace models {
279
- export { get$1 as get, init$2 as init };
279
+ export { get$1 as get, init$3 as init };
280
280
  }
281
281
 
282
282
  /**
@@ -313,7 +313,7 @@ interface PromptResult {
313
313
  * Initialize Fallom prompts.
314
314
  * This is called automatically by fallom.init().
315
315
  */
316
- declare function init$1(options?: {
316
+ declare function init$2(options?: {
317
317
  apiKey?: string;
318
318
  baseUrl?: string;
319
319
  }): void;
@@ -390,7 +390,263 @@ declare const prompts_get: typeof get;
390
390
  declare const prompts_getAB: typeof getAB;
391
391
  declare const prompts_getPromptContext: typeof getPromptContext;
392
392
  declare namespace prompts {
393
- export { type prompts_PromptResult as PromptResult, prompts_clearPromptContext as clearPromptContext, prompts_get as get, prompts_getAB as getAB, prompts_getPromptContext as getPromptContext, init$1 as init };
393
+ export { type prompts_PromptResult as PromptResult, prompts_clearPromptContext as clearPromptContext, prompts_get as get, prompts_getAB as getAB, prompts_getPromptContext as getPromptContext, init$2 as init };
394
+ }
395
+
396
+ /**
397
+ * Type definitions for Fallom Evals.
398
+ */
399
+ /** Built-in metric names */
400
+ type MetricName = "answer_relevancy" | "hallucination" | "toxicity" | "faithfulness" | "completeness";
401
+ /** List of all available built-in metrics */
402
+ declare const AVAILABLE_METRICS: MetricName[];
403
+ /**
404
+ * Define a custom evaluation metric using G-Eval.
405
+ */
406
+ interface CustomMetric {
407
+ /** Unique identifier for the metric (e.g., "brand_alignment") */
408
+ name: string;
409
+ /** Description of what the metric evaluates */
410
+ criteria: string;
411
+ /** List of evaluation steps for the LLM judge to follow */
412
+ steps: string[];
413
+ }
414
+ /** Metric can be a built-in name or a custom metric */
415
+ type MetricInput = MetricName | CustomMetric;
416
+ /** Dataset can be a list of items OR a string (dataset key to fetch from Fallom) */
417
+ type DatasetInput = DatasetItem[] | string;
418
+ /** A single item in an evaluation dataset */
419
+ interface DatasetItem {
420
+ input: string;
421
+ output: string;
422
+ systemMessage?: string;
423
+ metadata?: Record<string, unknown>;
424
+ }
425
+ /** Evaluation result for a single item */
426
+ interface EvalResult {
427
+ input: string;
428
+ output: string;
429
+ systemMessage?: string;
430
+ model: string;
431
+ isProduction: boolean;
432
+ answerRelevancy?: number;
433
+ hallucination?: number;
434
+ toxicity?: number;
435
+ faithfulness?: number;
436
+ completeness?: number;
437
+ reasoning: Record<string, string>;
438
+ latencyMs?: number;
439
+ tokensIn?: number;
440
+ tokensOut?: number;
441
+ cost?: number;
442
+ }
443
+ /** Response format from model calls */
444
+ interface ModelResponse {
445
+ content: string;
446
+ tokensIn?: number;
447
+ tokensOut?: number;
448
+ cost?: number;
449
+ }
450
+ /** Message format for model calls */
451
+ interface Message {
452
+ role: "system" | "user" | "assistant";
453
+ content: string;
454
+ }
455
+ /** Callable type for custom models */
456
+ type ModelCallable = (messages: Message[]) => Promise<ModelResponse>;
457
+ /**
458
+ * A model configuration for use in compareModels().
459
+ * Can represent either an OpenRouter model or a custom model (fine-tuned, self-hosted)
460
+ */
461
+ interface Model {
462
+ name: string;
463
+ callFn?: ModelCallable;
464
+ }
465
+ /** Options for init() */
466
+ interface InitOptions$1 {
467
+ apiKey?: string;
468
+ baseUrl?: string;
469
+ }
470
+ /** Options for evaluate() */
471
+ interface EvaluateOptions {
472
+ dataset: DatasetInput;
473
+ /** List of metrics to run (built-in or custom). Default: all built-in metrics */
474
+ metrics?: MetricInput[];
475
+ judgeModel?: string;
476
+ name?: string;
477
+ description?: string;
478
+ verbose?: boolean;
479
+ _skipUpload?: boolean;
480
+ }
481
+ /** Options for compareModels() */
482
+ interface CompareModelsOptions extends EvaluateOptions {
483
+ /**
484
+ * List of models to test. Each can be:
485
+ * - A string (model slug for OpenRouter, e.g., "anthropic/claude-3-5-sonnet")
486
+ * - A Model object (for custom/fine-tuned models)
487
+ */
488
+ models: Array<string | Model>;
489
+ includeProduction?: boolean;
490
+ modelKwargs?: Record<string, unknown>;
491
+ }
492
+ /** Type guard to check if a metric is a CustomMetric */
493
+ declare function isCustomMetric(metric: MetricInput): metric is CustomMetric;
494
+ /** Get the name of a metric (works for both built-in and custom) */
495
+ declare function getMetricName(metric: MetricInput): string;
496
+
497
+ /**
498
+ * G-Eval prompts for each metric.
499
+ */
500
+
501
+ /** G-Eval prompts for each built-in metric */
502
+ declare const METRIC_PROMPTS: Record<MetricName, {
503
+ criteria: string;
504
+ steps: string[];
505
+ }>;
506
+
507
+ /**
508
+ * Core evaluation functions.
509
+ */
510
+
511
+ declare const DEFAULT_JUDGE_MODEL = "openai/gpt-4o-mini";
512
+ /**
513
+ * Initialize Fallom evals.
514
+ */
515
+ declare function init$1(options?: InitOptions$1): void;
516
+ /**
517
+ * Evaluate production outputs against specified metrics using G-Eval.
518
+ *
519
+ * Results are automatically uploaded to Fallom dashboard.
520
+ */
521
+ declare function evaluate(options: EvaluateOptions): Promise<EvalResult[]>;
522
+ /**
523
+ * Compare multiple models on the same dataset.
524
+ *
525
+ * Results are automatically uploaded to Fallom dashboard.
526
+ */
527
+ declare function compareModels(options: CompareModelsOptions): Promise<Record<string, EvalResult[]>>;
528
+ /**
529
+ * Public function to upload results manually.
530
+ */
531
+ declare function uploadResultsPublic(results: EvalResult[] | Record<string, EvalResult[]>, options: {
532
+ name: string;
533
+ description?: string;
534
+ judgeModel?: string;
535
+ }): Promise<string>;
536
+
537
+ /**
538
+ * Helper functions for creating models and datasets.
539
+ */
540
+
541
+ /**
542
+ * Create a Model using OpenAI directly (for fine-tuned models or Azure OpenAI).
543
+ *
544
+ * @param modelId - The OpenAI model ID (e.g., "gpt-4o" or "ft:gpt-4o-2024-08-06:org::id")
545
+ * @param options - Configuration options
546
+ * @returns Model instance that can be used in compareModels()
547
+ */
548
+ declare function createOpenAIModel(modelId: string, options?: {
549
+ name?: string;
550
+ apiKey?: string;
551
+ baseUrl?: string;
552
+ temperature?: number;
553
+ maxTokens?: number;
554
+ }): Model;
555
+ /**
556
+ * Create a Model for any OpenAI-compatible API endpoint.
557
+ *
558
+ * Works with self-hosted models (vLLM, Ollama, LMStudio, etc.), custom endpoints,
559
+ * or any service that follows the OpenAI chat completions API format.
560
+ *
561
+ * @param name - Display name for the model
562
+ * @param options - Configuration options
563
+ * @returns A Model instance
564
+ */
565
+ declare function createCustomModel(name: string, options: {
566
+ endpoint: string;
567
+ apiKey?: string;
568
+ headers?: Record<string, string>;
569
+ modelField?: string;
570
+ modelValue?: string;
571
+ extraParams?: Record<string, unknown>;
572
+ }): Model;
573
+ /**
574
+ * Create a Model from any callable function.
575
+ *
576
+ * This is the most flexible option - you provide a function that takes
577
+ * messages and returns a response.
578
+ *
579
+ * @param name - Display name for the model
580
+ * @param callFn - Function that takes messages and returns a response
581
+ * @returns A Model instance
582
+ */
583
+ declare function createModelFromCallable(name: string, callFn: ModelCallable): Model;
584
+ /**
585
+ * Create a custom evaluation metric using G-Eval.
586
+ *
587
+ * @param name - Unique identifier for the metric (e.g., "brand_alignment")
588
+ * @param criteria - Description of what the metric evaluates
589
+ * @param steps - List of evaluation steps for the LLM judge to follow
590
+ * @returns A CustomMetric instance
591
+ */
592
+ declare function customMetric(name: string, criteria: string, steps: string[]): CustomMetric;
593
+ /**
594
+ * Create a dataset from Fallom trace data.
595
+ *
596
+ * @param traces - List of trace objects with attributes
597
+ * @returns List of DatasetItem ready for evaluation
598
+ */
599
+ declare function datasetFromTraces(traces: Array<{
600
+ attributes?: Record<string, unknown>;
601
+ }>): DatasetItem[];
602
+ /**
603
+ * Fetch a dataset stored in Fallom by its key.
604
+ *
605
+ * @param datasetKey - The unique key of the dataset (e.g., "customer-support-qa")
606
+ * @param version - Specific version number to fetch. If undefined, fetches latest.
607
+ * @param config - Internal config (api key, base url, initialized flag)
608
+ * @returns List of DatasetItem ready for evaluation
609
+ */
610
+ declare function datasetFromFallom(datasetKey: string, version?: number, config?: {
611
+ _apiKey?: string | null;
612
+ _baseUrl?: string;
613
+ _initialized?: boolean;
614
+ }): Promise<DatasetItem[]>;
615
+
616
+ /**
617
+ * Fallom Evals - Run LLM evaluations locally using G-Eval with LLM as a Judge.
618
+ *
619
+ * Evaluate production outputs or compare different models on your dataset.
620
+ * Results are uploaded to Fallom dashboard for visualization.
621
+ */
622
+
623
+ declare const evals_AVAILABLE_METRICS: typeof AVAILABLE_METRICS;
624
+ type evals_CompareModelsOptions = CompareModelsOptions;
625
+ type evals_CustomMetric = CustomMetric;
626
+ declare const evals_DEFAULT_JUDGE_MODEL: typeof DEFAULT_JUDGE_MODEL;
627
+ type evals_DatasetInput = DatasetInput;
628
+ type evals_DatasetItem = DatasetItem;
629
+ type evals_EvalResult = EvalResult;
630
+ type evals_EvaluateOptions = EvaluateOptions;
631
+ declare const evals_METRIC_PROMPTS: typeof METRIC_PROMPTS;
632
+ type evals_Message = Message;
633
+ type evals_MetricInput = MetricInput;
634
+ type evals_MetricName = MetricName;
635
+ type evals_Model = Model;
636
+ type evals_ModelCallable = ModelCallable;
637
+ type evals_ModelResponse = ModelResponse;
638
+ declare const evals_compareModels: typeof compareModels;
639
+ declare const evals_createCustomModel: typeof createCustomModel;
640
+ declare const evals_createModelFromCallable: typeof createModelFromCallable;
641
+ declare const evals_createOpenAIModel: typeof createOpenAIModel;
642
+ declare const evals_customMetric: typeof customMetric;
643
+ declare const evals_datasetFromFallom: typeof datasetFromFallom;
644
+ declare const evals_datasetFromTraces: typeof datasetFromTraces;
645
+ declare const evals_evaluate: typeof evaluate;
646
+ declare const evals_getMetricName: typeof getMetricName;
647
+ declare const evals_isCustomMetric: typeof isCustomMetric;
648
+ declare namespace evals {
649
+ export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_CustomMetric as CustomMetric, evals_DEFAULT_JUDGE_MODEL as DEFAULT_JUDGE_MODEL, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, type InitOptions$1 as InitOptions, evals_METRIC_PROMPTS as METRIC_PROMPTS, type evals_Message as Message, type evals_MetricInput as MetricInput, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_customMetric as customMetric, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, evals_evaluate as evaluate, evals_getMetricName as getMetricName, init$1 as init, evals_isCustomMetric as isCustomMetric, uploadResultsPublic as uploadResults };
394
650
  }
395
651
 
396
652
  /**
@@ -551,7 +807,7 @@ declare class FallomExporter implements SpanExporter {
551
807
  }
552
808
 
553
809
  /**
554
- * Fallom - Model A/B testing, prompt management, and tracing for LLM applications.
810
+ * Fallom - Model A/B testing, prompt management, tracing, and evals for LLM applications.
555
811
  *
556
812
  * @example
557
813
  * ```typescript
@@ -579,6 +835,14 @@ declare class FallomExporter implements SpanExporter {
579
835
  *
580
836
  * // Get A/B tested model within session
581
837
  * const modelName = await session.getModel({ fallback: "gpt-4o-mini" });
838
+ *
839
+ * // Run evaluations
840
+ * fallom.evals.init({ apiKey: "your-api-key" });
841
+ * const results = await fallom.evals.evaluate({
842
+ * dataset: [{ input: "...", output: "...", systemMessage: "..." }],
843
+ * metrics: ["answer_relevancy", "faithfulness"]
844
+ * });
845
+ * await fallom.evals.uploadResults(results, "My Eval Run");
582
846
  * ```
583
847
  */
584
848
 
@@ -587,7 +851,8 @@ declare const _default: {
587
851
  trace: typeof trace;
588
852
  models: typeof models;
589
853
  prompts: typeof prompts;
854
+ evals: typeof evals;
590
855
  session: typeof session;
591
856
  };
592
857
 
593
- export { FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
858
+ export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, evals, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };