@fallom/trace 0.2.6 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +203 -8
- package/dist/index.d.ts +203 -8
- package/dist/index.js +1100 -349
- package/dist/index.mjs +1026 -286
- package/package.json +3 -2
package/dist/index.d.mts
CHANGED
|
@@ -101,7 +101,7 @@ interface WrapAISDKOptions {
|
|
|
101
101
|
* await generateText({ model: openai("gpt-4o"), prompt: "Hello!" });
|
|
102
102
|
* ```
|
|
103
103
|
*/
|
|
104
|
-
declare function init$
|
|
104
|
+
declare function init$4(options?: {
|
|
105
105
|
apiKey?: string;
|
|
106
106
|
baseUrl?: string;
|
|
107
107
|
captureContent?: boolean;
|
|
@@ -223,7 +223,7 @@ type trace_WrapAISDKOptions = WrapAISDKOptions;
|
|
|
223
223
|
declare const trace_session: typeof session;
|
|
224
224
|
declare const trace_shutdown: typeof shutdown;
|
|
225
225
|
declare namespace trace {
|
|
226
|
-
export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$
|
|
226
|
+
export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
|
|
227
227
|
}
|
|
228
228
|
|
|
229
229
|
/**
|
|
@@ -244,7 +244,7 @@ declare namespace trace {
|
|
|
244
244
|
* This is optional - get() will auto-init if needed.
|
|
245
245
|
* Non-blocking: starts background config fetch immediately.
|
|
246
246
|
*/
|
|
247
|
-
declare function init$
|
|
247
|
+
declare function init$3(options?: {
|
|
248
248
|
apiKey?: string;
|
|
249
249
|
baseUrl?: string;
|
|
250
250
|
}): void;
|
|
@@ -276,7 +276,7 @@ declare function get$1(configKey: string, sessionId: string, options?: {
|
|
|
276
276
|
}): Promise<string>;
|
|
277
277
|
|
|
278
278
|
declare namespace models {
|
|
279
|
-
export { get$1 as get, init$
|
|
279
|
+
export { get$1 as get, init$3 as init };
|
|
280
280
|
}
|
|
281
281
|
|
|
282
282
|
/**
|
|
@@ -313,7 +313,7 @@ interface PromptResult {
|
|
|
313
313
|
* Initialize Fallom prompts.
|
|
314
314
|
* This is called automatically by fallom.init().
|
|
315
315
|
*/
|
|
316
|
-
declare function init$
|
|
316
|
+
declare function init$2(options?: {
|
|
317
317
|
apiKey?: string;
|
|
318
318
|
baseUrl?: string;
|
|
319
319
|
}): void;
|
|
@@ -390,7 +390,193 @@ declare const prompts_get: typeof get;
|
|
|
390
390
|
declare const prompts_getAB: typeof getAB;
|
|
391
391
|
declare const prompts_getPromptContext: typeof getPromptContext;
|
|
392
392
|
declare namespace prompts {
|
|
393
|
-
export { type prompts_PromptResult as PromptResult, prompts_clearPromptContext as clearPromptContext, prompts_get as get, prompts_getAB as getAB, prompts_getPromptContext as getPromptContext, init$
|
|
393
|
+
export { type prompts_PromptResult as PromptResult, prompts_clearPromptContext as clearPromptContext, prompts_get as get, prompts_getAB as getAB, prompts_getPromptContext as getPromptContext, init$2 as init };
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Fallom Evals - Run LLM evaluations locally using G-Eval with LLM as a Judge.
|
|
398
|
+
*
|
|
399
|
+
* Evaluate production outputs or compare different models on your dataset.
|
|
400
|
+
* Results are uploaded to Fallom dashboard for visualization.
|
|
401
|
+
*
|
|
402
|
+
*/
|
|
403
|
+
type MetricName = "answer_relevancy" | "hallucination" | "toxicity" | "faithfulness" | "completeness";
|
|
404
|
+
declare const AVAILABLE_METRICS: MetricName[];
|
|
405
|
+
/** Dataset can be a list of items OR a string (dataset key to fetch from Fallom) */
|
|
406
|
+
type DatasetInput = DatasetItem[] | string;
|
|
407
|
+
interface DatasetItem {
|
|
408
|
+
input: string;
|
|
409
|
+
output: string;
|
|
410
|
+
systemMessage?: string;
|
|
411
|
+
metadata?: Record<string, unknown>;
|
|
412
|
+
}
|
|
413
|
+
interface EvalResult {
|
|
414
|
+
input: string;
|
|
415
|
+
output: string;
|
|
416
|
+
systemMessage?: string;
|
|
417
|
+
model: string;
|
|
418
|
+
isProduction: boolean;
|
|
419
|
+
answerRelevancy?: number;
|
|
420
|
+
hallucination?: number;
|
|
421
|
+
toxicity?: number;
|
|
422
|
+
faithfulness?: number;
|
|
423
|
+
completeness?: number;
|
|
424
|
+
reasoning: Record<string, string>;
|
|
425
|
+
latencyMs?: number;
|
|
426
|
+
tokensIn?: number;
|
|
427
|
+
tokensOut?: number;
|
|
428
|
+
cost?: number;
|
|
429
|
+
}
|
|
430
|
+
/** Response format from model calls */
|
|
431
|
+
interface ModelResponse {
|
|
432
|
+
content: string;
|
|
433
|
+
tokensIn?: number;
|
|
434
|
+
tokensOut?: number;
|
|
435
|
+
cost?: number;
|
|
436
|
+
}
|
|
437
|
+
/** Message format for model calls */
|
|
438
|
+
interface Message {
|
|
439
|
+
role: "system" | "user" | "assistant";
|
|
440
|
+
content: string;
|
|
441
|
+
}
|
|
442
|
+
/** Callable type for custom models */
|
|
443
|
+
type ModelCallable = (messages: Message[]) => Promise<ModelResponse>;
|
|
444
|
+
/**
|
|
445
|
+
* A model configuration for use in compareModels().
|
|
446
|
+
* Can represent either an OpenRouter model or a custom model (fine-tuned, self-hosted)
|
|
447
|
+
*/
|
|
448
|
+
interface Model {
|
|
449
|
+
name: string;
|
|
450
|
+
callFn?: ModelCallable;
|
|
451
|
+
}
|
|
452
|
+
interface InitOptions$1 {
|
|
453
|
+
apiKey?: string;
|
|
454
|
+
baseUrl?: string;
|
|
455
|
+
}
|
|
456
|
+
interface EvaluateOptions {
|
|
457
|
+
dataset: DatasetInput;
|
|
458
|
+
metrics?: MetricName[];
|
|
459
|
+
judgeModel?: string;
|
|
460
|
+
name?: string;
|
|
461
|
+
description?: string;
|
|
462
|
+
verbose?: boolean;
|
|
463
|
+
_skipUpload?: boolean;
|
|
464
|
+
}
|
|
465
|
+
interface CompareModelsOptions extends EvaluateOptions {
|
|
466
|
+
/**
|
|
467
|
+
* List of models to test. Each can be:
|
|
468
|
+
* - A string (model slug for OpenRouter, e.g., "anthropic/claude-3-5-sonnet")
|
|
469
|
+
* - A Model object (for custom/fine-tuned models)
|
|
470
|
+
*/
|
|
471
|
+
models: Array<string | Model>;
|
|
472
|
+
includeProduction?: boolean;
|
|
473
|
+
modelKwargs?: Record<string, unknown>;
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Initialize Fallom evals.
|
|
477
|
+
*/
|
|
478
|
+
declare function init$1(options?: InitOptions$1): void;
|
|
479
|
+
/**
|
|
480
|
+
* Evaluate production outputs against specified metrics using G-Eval.
|
|
481
|
+
* Results are automatically uploaded to Fallom dashboard.
|
|
482
|
+
*/
|
|
483
|
+
declare function evaluate(options: EvaluateOptions): Promise<EvalResult[]>;
|
|
484
|
+
/**
|
|
485
|
+
* Create a Model using OpenAI directly (for fine-tuned models or direct API access).
|
|
486
|
+
*
|
|
487
|
+
* @param modelId - The OpenAI model ID (e.g., "gpt-4o" or "ft:gpt-4o-2024-08-06:org::id")
|
|
488
|
+
* @param options - Configuration options
|
|
489
|
+
* @returns A Model instance that can be used in compareModels()
|
|
490
|
+
*/
|
|
491
|
+
declare function createOpenAIModel(modelId: string, options?: {
|
|
492
|
+
name?: string;
|
|
493
|
+
apiKey?: string;
|
|
494
|
+
baseURL?: string;
|
|
495
|
+
temperature?: number;
|
|
496
|
+
maxTokens?: number;
|
|
497
|
+
}): Model;
|
|
498
|
+
/**
|
|
499
|
+
* Create a Model for any OpenAI-compatible API endpoint.
|
|
500
|
+
* Works with self-hosted models (vLLM, Ollama, LMStudio, etc.), custom endpoints,
|
|
501
|
+
* or any service that follows the OpenAI chat completions API format.
|
|
502
|
+
*
|
|
503
|
+
* @param name - Display name for the model
|
|
504
|
+
* @param options - Configuration options
|
|
505
|
+
* @returns A Model instance
|
|
506
|
+
*/
|
|
507
|
+
declare function createCustomModel(name: string, options: {
|
|
508
|
+
endpoint: string;
|
|
509
|
+
apiKey?: string;
|
|
510
|
+
headers?: Record<string, string>;
|
|
511
|
+
modelField?: string;
|
|
512
|
+
modelValue?: string;
|
|
513
|
+
temperature?: number;
|
|
514
|
+
maxTokens?: number;
|
|
515
|
+
}): Model;
|
|
516
|
+
/**
|
|
517
|
+
* Create a Model from any callable function.
|
|
518
|
+
* This is the most flexible option - you provide a function that handles the model call.
|
|
519
|
+
*
|
|
520
|
+
* @param name - Display name for the model
|
|
521
|
+
* @param callFn - Function that takes messages and returns a response
|
|
522
|
+
* @returns A Model instance
|
|
523
|
+
*/
|
|
524
|
+
declare function createModelFromCallable(name: string, callFn: ModelCallable): Model;
|
|
525
|
+
/**
|
|
526
|
+
* Compare multiple models on the same dataset.
|
|
527
|
+
*/
|
|
528
|
+
declare function compareModels(options: CompareModelsOptions): Promise<Record<string, EvalResult[]>>;
|
|
529
|
+
/**
|
|
530
|
+
* Manually upload evaluation results to Fallom dashboard.
|
|
531
|
+
* Note: Results are automatically uploaded after evaluate() and compareModels(),
|
|
532
|
+
* so this is only needed for custom scenarios.
|
|
533
|
+
*/
|
|
534
|
+
declare function uploadResults(results: EvalResult[] | Record<string, EvalResult[]>, name: string, description?: string, judgeModel?: string): Promise<string>;
|
|
535
|
+
/**
|
|
536
|
+
* Create a dataset from Fallom trace data.
|
|
537
|
+
*/
|
|
538
|
+
declare function datasetFromTraces(traces: Array<{
|
|
539
|
+
attributes?: Record<string, unknown>;
|
|
540
|
+
}>): DatasetItem[];
|
|
541
|
+
/**
|
|
542
|
+
* Fetch a dataset stored in Fallom by its key.
|
|
543
|
+
*
|
|
544
|
+
* @param datasetKey - The unique key of the dataset (e.g., "customer-support-qa")
|
|
545
|
+
* @param version - Specific version number to fetch. If undefined, fetches the latest version.
|
|
546
|
+
* @returns List of DatasetItem ready for evaluation
|
|
547
|
+
*/
|
|
548
|
+
declare function datasetFromFallom(datasetKey: string, version?: number): Promise<DatasetItem[]>;
|
|
549
|
+
declare const _default$1: {
|
|
550
|
+
init: typeof init$1;
|
|
551
|
+
evaluate: typeof evaluate;
|
|
552
|
+
compareModels: typeof compareModels;
|
|
553
|
+
uploadResults: typeof uploadResults;
|
|
554
|
+
datasetFromTraces: typeof datasetFromTraces;
|
|
555
|
+
datasetFromFallom: typeof datasetFromFallom;
|
|
556
|
+
AVAILABLE_METRICS: MetricName[];
|
|
557
|
+
};
|
|
558
|
+
|
|
559
|
+
declare const evals_AVAILABLE_METRICS: typeof AVAILABLE_METRICS;
|
|
560
|
+
type evals_CompareModelsOptions = CompareModelsOptions;
|
|
561
|
+
type evals_DatasetInput = DatasetInput;
|
|
562
|
+
type evals_DatasetItem = DatasetItem;
|
|
563
|
+
type evals_EvalResult = EvalResult;
|
|
564
|
+
type evals_EvaluateOptions = EvaluateOptions;
|
|
565
|
+
type evals_Message = Message;
|
|
566
|
+
type evals_MetricName = MetricName;
|
|
567
|
+
type evals_Model = Model;
|
|
568
|
+
type evals_ModelCallable = ModelCallable;
|
|
569
|
+
type evals_ModelResponse = ModelResponse;
|
|
570
|
+
declare const evals_compareModels: typeof compareModels;
|
|
571
|
+
declare const evals_createCustomModel: typeof createCustomModel;
|
|
572
|
+
declare const evals_createModelFromCallable: typeof createModelFromCallable;
|
|
573
|
+
declare const evals_createOpenAIModel: typeof createOpenAIModel;
|
|
574
|
+
declare const evals_datasetFromFallom: typeof datasetFromFallom;
|
|
575
|
+
declare const evals_datasetFromTraces: typeof datasetFromTraces;
|
|
576
|
+
declare const evals_evaluate: typeof evaluate;
|
|
577
|
+
declare const evals_uploadResults: typeof uploadResults;
|
|
578
|
+
declare namespace evals {
|
|
579
|
+
export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, type InitOptions$1 as InitOptions, type evals_Message as Message, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, _default$1 as default, evals_evaluate as evaluate, init$1 as init, evals_uploadResults as uploadResults };
|
|
394
580
|
}
|
|
395
581
|
|
|
396
582
|
/**
|
|
@@ -551,7 +737,7 @@ declare class FallomExporter implements SpanExporter {
|
|
|
551
737
|
}
|
|
552
738
|
|
|
553
739
|
/**
|
|
554
|
-
* Fallom - Model A/B testing, prompt management, and
|
|
740
|
+
* Fallom - Model A/B testing, prompt management, tracing, and evals for LLM applications.
|
|
555
741
|
*
|
|
556
742
|
* @example
|
|
557
743
|
* ```typescript
|
|
@@ -579,6 +765,14 @@ declare class FallomExporter implements SpanExporter {
|
|
|
579
765
|
*
|
|
580
766
|
* // Get A/B tested model within session
|
|
581
767
|
* const modelName = await session.getModel({ fallback: "gpt-4o-mini" });
|
|
768
|
+
*
|
|
769
|
+
* // Run evaluations
|
|
770
|
+
* fallom.evals.init({ apiKey: "your-api-key" });
|
|
771
|
+
* const results = await fallom.evals.evaluate({
|
|
772
|
+
* dataset: [{ input: "...", output: "...", systemMessage: "..." }],
|
|
773
|
+
* metrics: ["answer_relevancy", "faithfulness"]
|
|
774
|
+
* });
|
|
775
|
+
* await fallom.evals.uploadResults(results, "My Eval Run");
|
|
582
776
|
* ```
|
|
583
777
|
*/
|
|
584
778
|
|
|
@@ -587,7 +781,8 @@ declare const _default: {
|
|
|
587
781
|
trace: typeof trace;
|
|
588
782
|
models: typeof models;
|
|
589
783
|
prompts: typeof prompts;
|
|
784
|
+
evals: typeof evals;
|
|
590
785
|
session: typeof session;
|
|
591
786
|
};
|
|
592
787
|
|
|
593
|
-
export { FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
|
|
788
|
+
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, evals, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
|
package/dist/index.d.ts
CHANGED
|
@@ -101,7 +101,7 @@ interface WrapAISDKOptions {
|
|
|
101
101
|
* await generateText({ model: openai("gpt-4o"), prompt: "Hello!" });
|
|
102
102
|
* ```
|
|
103
103
|
*/
|
|
104
|
-
declare function init$
|
|
104
|
+
declare function init$4(options?: {
|
|
105
105
|
apiKey?: string;
|
|
106
106
|
baseUrl?: string;
|
|
107
107
|
captureContent?: boolean;
|
|
@@ -223,7 +223,7 @@ type trace_WrapAISDKOptions = WrapAISDKOptions;
|
|
|
223
223
|
declare const trace_session: typeof session;
|
|
224
224
|
declare const trace_shutdown: typeof shutdown;
|
|
225
225
|
declare namespace trace {
|
|
226
|
-
export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$
|
|
226
|
+
export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
|
|
227
227
|
}
|
|
228
228
|
|
|
229
229
|
/**
|
|
@@ -244,7 +244,7 @@ declare namespace trace {
|
|
|
244
244
|
* This is optional - get() will auto-init if needed.
|
|
245
245
|
* Non-blocking: starts background config fetch immediately.
|
|
246
246
|
*/
|
|
247
|
-
declare function init$
|
|
247
|
+
declare function init$3(options?: {
|
|
248
248
|
apiKey?: string;
|
|
249
249
|
baseUrl?: string;
|
|
250
250
|
}): void;
|
|
@@ -276,7 +276,7 @@ declare function get$1(configKey: string, sessionId: string, options?: {
|
|
|
276
276
|
}): Promise<string>;
|
|
277
277
|
|
|
278
278
|
declare namespace models {
|
|
279
|
-
export { get$1 as get, init$
|
|
279
|
+
export { get$1 as get, init$3 as init };
|
|
280
280
|
}
|
|
281
281
|
|
|
282
282
|
/**
|
|
@@ -313,7 +313,7 @@ interface PromptResult {
|
|
|
313
313
|
* Initialize Fallom prompts.
|
|
314
314
|
* This is called automatically by fallom.init().
|
|
315
315
|
*/
|
|
316
|
-
declare function init$
|
|
316
|
+
declare function init$2(options?: {
|
|
317
317
|
apiKey?: string;
|
|
318
318
|
baseUrl?: string;
|
|
319
319
|
}): void;
|
|
@@ -390,7 +390,193 @@ declare const prompts_get: typeof get;
|
|
|
390
390
|
declare const prompts_getAB: typeof getAB;
|
|
391
391
|
declare const prompts_getPromptContext: typeof getPromptContext;
|
|
392
392
|
declare namespace prompts {
|
|
393
|
-
export { type prompts_PromptResult as PromptResult, prompts_clearPromptContext as clearPromptContext, prompts_get as get, prompts_getAB as getAB, prompts_getPromptContext as getPromptContext, init$
|
|
393
|
+
export { type prompts_PromptResult as PromptResult, prompts_clearPromptContext as clearPromptContext, prompts_get as get, prompts_getAB as getAB, prompts_getPromptContext as getPromptContext, init$2 as init };
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Fallom Evals - Run LLM evaluations locally using G-Eval with LLM as a Judge.
|
|
398
|
+
*
|
|
399
|
+
* Evaluate production outputs or compare different models on your dataset.
|
|
400
|
+
* Results are uploaded to Fallom dashboard for visualization.
|
|
401
|
+
*
|
|
402
|
+
*/
|
|
403
|
+
type MetricName = "answer_relevancy" | "hallucination" | "toxicity" | "faithfulness" | "completeness";
|
|
404
|
+
declare const AVAILABLE_METRICS: MetricName[];
|
|
405
|
+
/** Dataset can be a list of items OR a string (dataset key to fetch from Fallom) */
|
|
406
|
+
type DatasetInput = DatasetItem[] | string;
|
|
407
|
+
interface DatasetItem {
|
|
408
|
+
input: string;
|
|
409
|
+
output: string;
|
|
410
|
+
systemMessage?: string;
|
|
411
|
+
metadata?: Record<string, unknown>;
|
|
412
|
+
}
|
|
413
|
+
interface EvalResult {
|
|
414
|
+
input: string;
|
|
415
|
+
output: string;
|
|
416
|
+
systemMessage?: string;
|
|
417
|
+
model: string;
|
|
418
|
+
isProduction: boolean;
|
|
419
|
+
answerRelevancy?: number;
|
|
420
|
+
hallucination?: number;
|
|
421
|
+
toxicity?: number;
|
|
422
|
+
faithfulness?: number;
|
|
423
|
+
completeness?: number;
|
|
424
|
+
reasoning: Record<string, string>;
|
|
425
|
+
latencyMs?: number;
|
|
426
|
+
tokensIn?: number;
|
|
427
|
+
tokensOut?: number;
|
|
428
|
+
cost?: number;
|
|
429
|
+
}
|
|
430
|
+
/** Response format from model calls */
|
|
431
|
+
interface ModelResponse {
|
|
432
|
+
content: string;
|
|
433
|
+
tokensIn?: number;
|
|
434
|
+
tokensOut?: number;
|
|
435
|
+
cost?: number;
|
|
436
|
+
}
|
|
437
|
+
/** Message format for model calls */
|
|
438
|
+
interface Message {
|
|
439
|
+
role: "system" | "user" | "assistant";
|
|
440
|
+
content: string;
|
|
441
|
+
}
|
|
442
|
+
/** Callable type for custom models */
|
|
443
|
+
type ModelCallable = (messages: Message[]) => Promise<ModelResponse>;
|
|
444
|
+
/**
|
|
445
|
+
* A model configuration for use in compareModels().
|
|
446
|
+
* Can represent either an OpenRouter model or a custom model (fine-tuned, self-hosted)
|
|
447
|
+
*/
|
|
448
|
+
interface Model {
|
|
449
|
+
name: string;
|
|
450
|
+
callFn?: ModelCallable;
|
|
451
|
+
}
|
|
452
|
+
interface InitOptions$1 {
|
|
453
|
+
apiKey?: string;
|
|
454
|
+
baseUrl?: string;
|
|
455
|
+
}
|
|
456
|
+
interface EvaluateOptions {
|
|
457
|
+
dataset: DatasetInput;
|
|
458
|
+
metrics?: MetricName[];
|
|
459
|
+
judgeModel?: string;
|
|
460
|
+
name?: string;
|
|
461
|
+
description?: string;
|
|
462
|
+
verbose?: boolean;
|
|
463
|
+
_skipUpload?: boolean;
|
|
464
|
+
}
|
|
465
|
+
interface CompareModelsOptions extends EvaluateOptions {
|
|
466
|
+
/**
|
|
467
|
+
* List of models to test. Each can be:
|
|
468
|
+
* - A string (model slug for OpenRouter, e.g., "anthropic/claude-3-5-sonnet")
|
|
469
|
+
* - A Model object (for custom/fine-tuned models)
|
|
470
|
+
*/
|
|
471
|
+
models: Array<string | Model>;
|
|
472
|
+
includeProduction?: boolean;
|
|
473
|
+
modelKwargs?: Record<string, unknown>;
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Initialize Fallom evals.
|
|
477
|
+
*/
|
|
478
|
+
declare function init$1(options?: InitOptions$1): void;
|
|
479
|
+
/**
|
|
480
|
+
* Evaluate production outputs against specified metrics using G-Eval.
|
|
481
|
+
* Results are automatically uploaded to Fallom dashboard.
|
|
482
|
+
*/
|
|
483
|
+
declare function evaluate(options: EvaluateOptions): Promise<EvalResult[]>;
|
|
484
|
+
/**
|
|
485
|
+
* Create a Model using OpenAI directly (for fine-tuned models or direct API access).
|
|
486
|
+
*
|
|
487
|
+
* @param modelId - The OpenAI model ID (e.g., "gpt-4o" or "ft:gpt-4o-2024-08-06:org::id")
|
|
488
|
+
* @param options - Configuration options
|
|
489
|
+
* @returns A Model instance that can be used in compareModels()
|
|
490
|
+
*/
|
|
491
|
+
declare function createOpenAIModel(modelId: string, options?: {
|
|
492
|
+
name?: string;
|
|
493
|
+
apiKey?: string;
|
|
494
|
+
baseURL?: string;
|
|
495
|
+
temperature?: number;
|
|
496
|
+
maxTokens?: number;
|
|
497
|
+
}): Model;
|
|
498
|
+
/**
|
|
499
|
+
* Create a Model for any OpenAI-compatible API endpoint.
|
|
500
|
+
* Works with self-hosted models (vLLM, Ollama, LMStudio, etc.), custom endpoints,
|
|
501
|
+
* or any service that follows the OpenAI chat completions API format.
|
|
502
|
+
*
|
|
503
|
+
* @param name - Display name for the model
|
|
504
|
+
* @param options - Configuration options
|
|
505
|
+
* @returns A Model instance
|
|
506
|
+
*/
|
|
507
|
+
declare function createCustomModel(name: string, options: {
|
|
508
|
+
endpoint: string;
|
|
509
|
+
apiKey?: string;
|
|
510
|
+
headers?: Record<string, string>;
|
|
511
|
+
modelField?: string;
|
|
512
|
+
modelValue?: string;
|
|
513
|
+
temperature?: number;
|
|
514
|
+
maxTokens?: number;
|
|
515
|
+
}): Model;
|
|
516
|
+
/**
|
|
517
|
+
* Create a Model from any callable function.
|
|
518
|
+
* This is the most flexible option - you provide a function that handles the model call.
|
|
519
|
+
*
|
|
520
|
+
* @param name - Display name for the model
|
|
521
|
+
* @param callFn - Function that takes messages and returns a response
|
|
522
|
+
* @returns A Model instance
|
|
523
|
+
*/
|
|
524
|
+
declare function createModelFromCallable(name: string, callFn: ModelCallable): Model;
|
|
525
|
+
/**
|
|
526
|
+
* Compare multiple models on the same dataset.
|
|
527
|
+
*/
|
|
528
|
+
declare function compareModels(options: CompareModelsOptions): Promise<Record<string, EvalResult[]>>;
|
|
529
|
+
/**
|
|
530
|
+
* Manually upload evaluation results to Fallom dashboard.
|
|
531
|
+
* Note: Results are automatically uploaded after evaluate() and compareModels(),
|
|
532
|
+
* so this is only needed for custom scenarios.
|
|
533
|
+
*/
|
|
534
|
+
declare function uploadResults(results: EvalResult[] | Record<string, EvalResult[]>, name: string, description?: string, judgeModel?: string): Promise<string>;
|
|
535
|
+
/**
|
|
536
|
+
* Create a dataset from Fallom trace data.
|
|
537
|
+
*/
|
|
538
|
+
declare function datasetFromTraces(traces: Array<{
|
|
539
|
+
attributes?: Record<string, unknown>;
|
|
540
|
+
}>): DatasetItem[];
|
|
541
|
+
/**
|
|
542
|
+
* Fetch a dataset stored in Fallom by its key.
|
|
543
|
+
*
|
|
544
|
+
* @param datasetKey - The unique key of the dataset (e.g., "customer-support-qa")
|
|
545
|
+
* @param version - Specific version number to fetch. If undefined, fetches the latest version.
|
|
546
|
+
* @returns List of DatasetItem ready for evaluation
|
|
547
|
+
*/
|
|
548
|
+
declare function datasetFromFallom(datasetKey: string, version?: number): Promise<DatasetItem[]>;
|
|
549
|
+
declare const _default$1: {
|
|
550
|
+
init: typeof init$1;
|
|
551
|
+
evaluate: typeof evaluate;
|
|
552
|
+
compareModels: typeof compareModels;
|
|
553
|
+
uploadResults: typeof uploadResults;
|
|
554
|
+
datasetFromTraces: typeof datasetFromTraces;
|
|
555
|
+
datasetFromFallom: typeof datasetFromFallom;
|
|
556
|
+
AVAILABLE_METRICS: MetricName[];
|
|
557
|
+
};
|
|
558
|
+
|
|
559
|
+
declare const evals_AVAILABLE_METRICS: typeof AVAILABLE_METRICS;
|
|
560
|
+
type evals_CompareModelsOptions = CompareModelsOptions;
|
|
561
|
+
type evals_DatasetInput = DatasetInput;
|
|
562
|
+
type evals_DatasetItem = DatasetItem;
|
|
563
|
+
type evals_EvalResult = EvalResult;
|
|
564
|
+
type evals_EvaluateOptions = EvaluateOptions;
|
|
565
|
+
type evals_Message = Message;
|
|
566
|
+
type evals_MetricName = MetricName;
|
|
567
|
+
type evals_Model = Model;
|
|
568
|
+
type evals_ModelCallable = ModelCallable;
|
|
569
|
+
type evals_ModelResponse = ModelResponse;
|
|
570
|
+
declare const evals_compareModels: typeof compareModels;
|
|
571
|
+
declare const evals_createCustomModel: typeof createCustomModel;
|
|
572
|
+
declare const evals_createModelFromCallable: typeof createModelFromCallable;
|
|
573
|
+
declare const evals_createOpenAIModel: typeof createOpenAIModel;
|
|
574
|
+
declare const evals_datasetFromFallom: typeof datasetFromFallom;
|
|
575
|
+
declare const evals_datasetFromTraces: typeof datasetFromTraces;
|
|
576
|
+
declare const evals_evaluate: typeof evaluate;
|
|
577
|
+
declare const evals_uploadResults: typeof uploadResults;
|
|
578
|
+
declare namespace evals {
|
|
579
|
+
export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, type InitOptions$1 as InitOptions, type evals_Message as Message, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, _default$1 as default, evals_evaluate as evaluate, init$1 as init, evals_uploadResults as uploadResults };
|
|
394
580
|
}
|
|
395
581
|
|
|
396
582
|
/**
|
|
@@ -551,7 +737,7 @@ declare class FallomExporter implements SpanExporter {
|
|
|
551
737
|
}
|
|
552
738
|
|
|
553
739
|
/**
|
|
554
|
-
* Fallom - Model A/B testing, prompt management, and
|
|
740
|
+
* Fallom - Model A/B testing, prompt management, tracing, and evals for LLM applications.
|
|
555
741
|
*
|
|
556
742
|
* @example
|
|
557
743
|
* ```typescript
|
|
@@ -579,6 +765,14 @@ declare class FallomExporter implements SpanExporter {
|
|
|
579
765
|
*
|
|
580
766
|
* // Get A/B tested model within session
|
|
581
767
|
* const modelName = await session.getModel({ fallback: "gpt-4o-mini" });
|
|
768
|
+
*
|
|
769
|
+
* // Run evaluations
|
|
770
|
+
* fallom.evals.init({ apiKey: "your-api-key" });
|
|
771
|
+
* const results = await fallom.evals.evaluate({
|
|
772
|
+
* dataset: [{ input: "...", output: "...", systemMessage: "..." }],
|
|
773
|
+
* metrics: ["answer_relevancy", "faithfulness"]
|
|
774
|
+
* });
|
|
775
|
+
* await fallom.evals.uploadResults(results, "My Eval Run");
|
|
582
776
|
* ```
|
|
583
777
|
*/
|
|
584
778
|
|
|
@@ -587,7 +781,8 @@ declare const _default: {
|
|
|
587
781
|
trace: typeof trace;
|
|
588
782
|
models: typeof models;
|
|
589
783
|
prompts: typeof prompts;
|
|
784
|
+
evals: typeof evals;
|
|
590
785
|
session: typeof session;
|
|
591
786
|
};
|
|
592
787
|
|
|
593
|
-
export { FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
|
|
788
|
+
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, evals, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
|