@fallom/trace 0.2.10 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -11,6 +11,10 @@ interface SessionContext {
11
11
  configKey: string;
12
12
  sessionId: string;
13
13
  customerId?: string;
14
+ /** Custom key-value metadata for filtering/grouping */
15
+ metadata?: Record<string, string | number | boolean>;
16
+ /** Simple string tags for quick filtering */
17
+ tags?: string[];
14
18
  }
15
19
  /**
16
20
  * Trace context for linking spans together.
@@ -42,6 +46,8 @@ interface TraceData {
42
46
  error_message?: string;
43
47
  time_to_first_token_ms?: number;
44
48
  is_streaming?: boolean;
49
+ metadata?: Record<string, string | number | boolean>;
50
+ tags?: string[];
45
51
  attributes?: Record<string, unknown>;
46
52
  prompt_key?: string;
47
53
  prompt_version?: number;
@@ -58,6 +64,18 @@ interface SessionOptions {
58
64
  sessionId: string;
59
65
  /** Optional customer/user identifier for analytics */
60
66
  customerId?: string;
67
+ /**
68
+ * Custom key-value metadata for filtering/grouping traces.
69
+ * Use this for structured data like deployment type, environment, etc.
70
+ * @example { deployment: "dedicated", requestType: "transcript", provider: "novita" }
71
+ */
72
+ metadata?: Record<string, string | number | boolean>;
73
+ /**
74
+ * Simple string tags for quick filtering.
75
+ * Use this for simple labels/categories.
76
+ * @example ["production", "dedicated", "transcript-analysis"]
77
+ */
78
+ tags?: string[];
61
79
  }
62
80
  /**
63
81
  * Options for wrapAISDK.
@@ -394,22 +412,35 @@ declare namespace prompts {
394
412
  }
395
413
 
396
414
  /**
397
- * Fallom Evals - Run LLM evaluations locally using G-Eval with LLM as a Judge.
398
- *
399
- * Evaluate production outputs or compare different models on your dataset.
400
- * Results are uploaded to Fallom dashboard for visualization.
401
- *
415
+ * Type definitions for Fallom Evals.
402
416
  */
417
+ /** Built-in metric names */
403
418
  type MetricName = "answer_relevancy" | "hallucination" | "toxicity" | "faithfulness" | "completeness";
419
+ /** List of all available built-in metrics */
404
420
  declare const AVAILABLE_METRICS: MetricName[];
421
+ /**
422
+ * Define a custom evaluation metric using G-Eval.
423
+ */
424
+ interface CustomMetric {
425
+ /** Unique identifier for the metric (e.g., "brand_alignment") */
426
+ name: string;
427
+ /** Description of what the metric evaluates */
428
+ criteria: string;
429
+ /** List of evaluation steps for the LLM judge to follow */
430
+ steps: string[];
431
+ }
432
+ /** Metric can be a built-in name or a custom metric */
433
+ type MetricInput = MetricName | CustomMetric;
405
434
  /** Dataset can be a list of items OR a string (dataset key to fetch from Fallom) */
406
435
  type DatasetInput = DatasetItem[] | string;
436
+ /** A single item in an evaluation dataset */
407
437
  interface DatasetItem {
408
438
  input: string;
409
439
  output: string;
410
440
  systemMessage?: string;
411
441
  metadata?: Record<string, unknown>;
412
442
  }
443
+ /** Evaluation result for a single item */
413
444
  interface EvalResult {
414
445
  input: string;
415
446
  output: string;
@@ -449,19 +480,23 @@ interface Model {
449
480
  name: string;
450
481
  callFn?: ModelCallable;
451
482
  }
483
+ /** Options for init() */
452
484
  interface InitOptions$1 {
453
485
  apiKey?: string;
454
486
  baseUrl?: string;
455
487
  }
488
+ /** Options for evaluate() */
456
489
  interface EvaluateOptions {
457
490
  dataset: DatasetInput;
458
- metrics?: MetricName[];
491
+ /** List of metrics to run (built-in or custom). Default: all built-in metrics */
492
+ metrics?: MetricInput[];
459
493
  judgeModel?: string;
460
494
  name?: string;
461
495
  description?: string;
462
496
  verbose?: boolean;
463
497
  _skipUpload?: boolean;
464
498
  }
499
+ /** Options for compareModels() */
465
500
  interface CompareModelsOptions extends EvaluateOptions {
466
501
  /**
467
502
  * List of models to test. Each can be:
@@ -472,31 +507,72 @@ interface CompareModelsOptions extends EvaluateOptions {
472
507
  includeProduction?: boolean;
473
508
  modelKwargs?: Record<string, unknown>;
474
509
  }
510
+ /** Type guard to check if a metric is a CustomMetric */
511
+ declare function isCustomMetric(metric: MetricInput): metric is CustomMetric;
512
+ /** Get the name of a metric (works for both built-in and custom) */
513
+ declare function getMetricName(metric: MetricInput): string;
514
+
515
+ /**
516
+ * G-Eval prompts for each metric.
517
+ */
518
+
519
+ /** G-Eval prompts for each built-in metric */
520
+ declare const METRIC_PROMPTS: Record<MetricName, {
521
+ criteria: string;
522
+ steps: string[];
523
+ }>;
524
+
525
+ /**
526
+ * Core evaluation functions.
527
+ */
528
+
529
+ declare const DEFAULT_JUDGE_MODEL = "openai/gpt-4o-mini";
475
530
  /**
476
531
  * Initialize Fallom evals.
477
532
  */
478
533
  declare function init$1(options?: InitOptions$1): void;
479
534
  /**
480
535
  * Evaluate production outputs against specified metrics using G-Eval.
536
+ *
481
537
  * Results are automatically uploaded to Fallom dashboard.
482
538
  */
483
539
  declare function evaluate(options: EvaluateOptions): Promise<EvalResult[]>;
484
540
  /**
485
- * Create a Model using OpenAI directly (for fine-tuned models or direct API access).
541
+ * Compare multiple models on the same dataset.
542
+ *
543
+ * Results are automatically uploaded to Fallom dashboard.
544
+ */
545
+ declare function compareModels(options: CompareModelsOptions): Promise<Record<string, EvalResult[]>>;
546
+ /**
547
+ * Public function to upload results manually.
548
+ */
549
+ declare function uploadResultsPublic(results: EvalResult[] | Record<string, EvalResult[]>, options: {
550
+ name: string;
551
+ description?: string;
552
+ judgeModel?: string;
553
+ }): Promise<string>;
554
+
555
+ /**
556
+ * Helper functions for creating models and datasets.
557
+ */
558
+
559
+ /**
560
+ * Create a Model using OpenAI directly (for fine-tuned models or Azure OpenAI).
486
561
  *
487
562
  * @param modelId - The OpenAI model ID (e.g., "gpt-4o" or "ft:gpt-4o-2024-08-06:org::id")
488
563
  * @param options - Configuration options
489
- * @returns A Model instance that can be used in compareModels()
564
+ * @returns Model instance that can be used in compareModels()
490
565
  */
491
566
  declare function createOpenAIModel(modelId: string, options?: {
492
567
  name?: string;
493
568
  apiKey?: string;
494
- baseURL?: string;
569
+ baseUrl?: string;
495
570
  temperature?: number;
496
571
  maxTokens?: number;
497
572
  }): Model;
498
573
  /**
499
574
  * Create a Model for any OpenAI-compatible API endpoint.
575
+ *
500
576
  * Works with self-hosted models (vLLM, Ollama, LMStudio, etc.), custom endpoints,
501
577
  * or any service that follows the OpenAI chat completions API format.
502
578
  *
@@ -510,12 +586,13 @@ declare function createCustomModel(name: string, options: {
510
586
  headers?: Record<string, string>;
511
587
  modelField?: string;
512
588
  modelValue?: string;
513
- temperature?: number;
514
- maxTokens?: number;
589
+ extraParams?: Record<string, unknown>;
515
590
  }): Model;
516
591
  /**
517
592
  * Create a Model from any callable function.
518
- * This is the most flexible option - you provide a function that handles the model call.
593
+ *
594
+ * This is the most flexible option - you provide a function that takes
595
+ * messages and returns a response.
519
596
  *
520
597
  * @param name - Display name for the model
521
598
  * @param callFn - Function that takes messages and returns a response
@@ -523,17 +600,19 @@ declare function createCustomModel(name: string, options: {
523
600
  */
524
601
  declare function createModelFromCallable(name: string, callFn: ModelCallable): Model;
525
602
  /**
526
- * Compare multiple models on the same dataset.
527
- */
528
- declare function compareModels(options: CompareModelsOptions): Promise<Record<string, EvalResult[]>>;
529
- /**
530
- * Manually upload evaluation results to Fallom dashboard.
531
- * Note: Results are automatically uploaded after evaluate() and compareModels(),
532
- * so this is only needed for custom scenarios.
603
+ * Create a custom evaluation metric using G-Eval.
604
+ *
605
+ * @param name - Unique identifier for the metric (e.g., "brand_alignment")
606
+ * @param criteria - Description of what the metric evaluates
607
+ * @param steps - List of evaluation steps for the LLM judge to follow
608
+ * @returns A CustomMetric instance
533
609
  */
534
- declare function uploadResults(results: EvalResult[] | Record<string, EvalResult[]>, name: string, description?: string, judgeModel?: string): Promise<string>;
610
+ declare function customMetric(name: string, criteria: string, steps: string[]): CustomMetric;
535
611
  /**
536
612
  * Create a dataset from Fallom trace data.
613
+ *
614
+ * @param traces - List of trace objects with attributes
615
+ * @returns List of DatasetItem ready for evaluation
537
616
  */
538
617
  declare function datasetFromTraces(traces: Array<{
539
618
  attributes?: Record<string, unknown>;
@@ -542,27 +621,34 @@ declare function datasetFromTraces(traces: Array<{
542
621
  * Fetch a dataset stored in Fallom by its key.
543
622
  *
544
623
  * @param datasetKey - The unique key of the dataset (e.g., "customer-support-qa")
545
- * @param version - Specific version number to fetch. If undefined, fetches the latest version.
624
+ * @param version - Specific version number to fetch. If undefined, fetches latest.
625
+ * @param config - Internal config (api key, base url, initialized flag)
546
626
  * @returns List of DatasetItem ready for evaluation
547
627
  */
548
- declare function datasetFromFallom(datasetKey: string, version?: number): Promise<DatasetItem[]>;
549
- declare const _default$1: {
550
- init: typeof init$1;
551
- evaluate: typeof evaluate;
552
- compareModels: typeof compareModels;
553
- uploadResults: typeof uploadResults;
554
- datasetFromTraces: typeof datasetFromTraces;
555
- datasetFromFallom: typeof datasetFromFallom;
556
- AVAILABLE_METRICS: MetricName[];
557
- };
628
+ declare function datasetFromFallom(datasetKey: string, version?: number, config?: {
629
+ _apiKey?: string | null;
630
+ _baseUrl?: string;
631
+ _initialized?: boolean;
632
+ }): Promise<DatasetItem[]>;
633
+
634
+ /**
635
+ * Fallom Evals - Run LLM evaluations locally using G-Eval with LLM as a Judge.
636
+ *
637
+ * Evaluate production outputs or compare different models on your dataset.
638
+ * Results are uploaded to Fallom dashboard for visualization.
639
+ */
558
640
 
559
641
  declare const evals_AVAILABLE_METRICS: typeof AVAILABLE_METRICS;
560
642
  type evals_CompareModelsOptions = CompareModelsOptions;
643
+ type evals_CustomMetric = CustomMetric;
644
+ declare const evals_DEFAULT_JUDGE_MODEL: typeof DEFAULT_JUDGE_MODEL;
561
645
  type evals_DatasetInput = DatasetInput;
562
646
  type evals_DatasetItem = DatasetItem;
563
647
  type evals_EvalResult = EvalResult;
564
648
  type evals_EvaluateOptions = EvaluateOptions;
649
+ declare const evals_METRIC_PROMPTS: typeof METRIC_PROMPTS;
565
650
  type evals_Message = Message;
651
+ type evals_MetricInput = MetricInput;
566
652
  type evals_MetricName = MetricName;
567
653
  type evals_Model = Model;
568
654
  type evals_ModelCallable = ModelCallable;
@@ -571,12 +657,14 @@ declare const evals_compareModels: typeof compareModels;
571
657
  declare const evals_createCustomModel: typeof createCustomModel;
572
658
  declare const evals_createModelFromCallable: typeof createModelFromCallable;
573
659
  declare const evals_createOpenAIModel: typeof createOpenAIModel;
660
+ declare const evals_customMetric: typeof customMetric;
574
661
  declare const evals_datasetFromFallom: typeof datasetFromFallom;
575
662
  declare const evals_datasetFromTraces: typeof datasetFromTraces;
576
663
  declare const evals_evaluate: typeof evaluate;
577
- declare const evals_uploadResults: typeof uploadResults;
664
+ declare const evals_getMetricName: typeof getMetricName;
665
+ declare const evals_isCustomMetric: typeof isCustomMetric;
578
666
  declare namespace evals {
579
- export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, type InitOptions$1 as InitOptions, type evals_Message as Message, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, _default$1 as default, evals_evaluate as evaluate, init$1 as init, evals_uploadResults as uploadResults };
667
+ export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_CustomMetric as CustomMetric, evals_DEFAULT_JUDGE_MODEL as DEFAULT_JUDGE_MODEL, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, type InitOptions$1 as InitOptions, evals_METRIC_PROMPTS as METRIC_PROMPTS, type evals_Message as Message, type evals_MetricInput as MetricInput, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_customMetric as customMetric, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, evals_evaluate as evaluate, evals_getMetricName as getMetricName, init$1 as init, evals_isCustomMetric as isCustomMetric, uploadResultsPublic as uploadResults };
580
668
  }
581
669
 
582
670
  /**